07 Summary Figures
Jupyter notebook from the Pan-Bacterial Metal Fitness Atlas project.
NB 07: Summary Figures¶
Generate publication-quality summary figures combining results from NB01-NB06.
Runs locally.
Outputs: figures/summary_*.png
In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sns
PROJECT_DIR = Path('..').resolve()
DATA_DIR = PROJECT_DIR / 'data'
FIGURES_DIR = PROJECT_DIR / 'figures'
# Load all results
metal_exps = pd.read_csv(DATA_DIR / 'metal_experiments.csv')
metal_fitness = pd.read_csv(DATA_DIR / 'metal_fitness_scores.csv')
metal_cons = pd.read_csv(DATA_DIR / 'metal_conservation_stats.csv')
org_cons = pd.read_csv(DATA_DIR / 'organism_conservation_stats.csv')
conserved_fam = pd.read_csv(DATA_DIR / 'conserved_metal_families.csv')
# Optional files
novel_path = DATA_DIR / 'novel_metal_candidates.csv'
novel = pd.read_csv(novel_path) if novel_path.exists() else pd.DataFrame()
pred_path = DATA_DIR / 'metal_tolerance_predictions_fb.csv'
predictions = pd.read_csv(pred_path) if pred_path.exists() else pd.DataFrame()
print('Data loaded.')
Data loaded.
Figure 1: Atlas Overview — Scale and Scope¶
In [2]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
# Panel A: Experiments per metal
ax = axes[0]
exclude = ['Platinum', 'Metal_limitation']
exps_clean = metal_exps[~metal_exps['metal_element'].isin(exclude)]
metal_counts = exps_clean.groupby('metal_element').size().sort_values(ascending=True)
colors = ['#3498db' if m in ('Iron', 'Molybdenum', 'Tungsten', 'Selenium', 'Manganese')
else '#e74c3c' for m in metal_counts.index]
metal_counts.plot(kind='barh', ax=ax, color=colors, edgecolor='black', linewidth=0.5)
ax.set_xlabel('Number of Experiments')
ax.set_title('A. Metal Experiments in Fitness Browser')
from matplotlib.patches import Patch
ax.legend(handles=[
Patch(color='#e74c3c', label='Toxic'),
Patch(color='#3498db', label='Essential'),
], loc='lower right', fontsize=9)
# Panel B: Metal-important genes per metal (% of genome)
ax = axes[1]
imp_pct = metal_fitness.groupby('metal_element').apply(
lambda x: 100 * x['is_metal_important'].mean()
).sort_values(ascending=True)
imp_pct.plot(kind='barh', ax=ax, color='#e67e22', edgecolor='black', linewidth=0.5)
ax.set_xlabel('% Genes with Metal Fitness Defect')
ax.set_title('B. Metal Impact on Gene Fitness')
ax.axvline(metal_fitness['is_metal_important'].mean()*100, color='gray',
linestyle='--', alpha=0.7, label='Overall mean')
ax.legend(fontsize=9)
# Panel C: Conservation delta (core fraction: important - baseline) per metal
ax = axes[2]
cons_sorted = metal_cons.sort_values('delta')
colors_c = ['#e74c3c' if c == 'toxic' else '#3498db' for c in cons_sorted['category']]
ax.barh(range(len(cons_sorted)), cons_sorted['delta'], color=colors_c,
edgecolor='black', linewidth=0.5)
ax.set_yticks(range(len(cons_sorted)))
ax.set_yticklabels(cons_sorted['metal'])
ax.axvline(0, color='black', linewidth=0.5)
ax.set_xlabel('Δ Core Fraction (important - baseline)')
ax.set_title('C. Metal Genes: Core Enrichment')
# Add significance markers
for i, (_, row) in enumerate(cons_sorted.iterrows()):
if row['p_value'] < 0.05:
ax.text(row['delta'] + 0.002 * np.sign(row['delta']), i, '*',
ha='center', va='center', fontsize=12, fontweight='bold')
plt.suptitle('Pan-Bacterial Metal Fitness Atlas: Overview', fontsize=16, y=1.05)
plt.tight_layout()
fig.savefig(FIGURES_DIR / 'summary_atlas_overview.png', dpi=150, bbox_inches='tight')
plt.show()
print(f'Saved: figures/summary_atlas_overview.png')
Saved: figures/summary_atlas_overview.png
Figure 2: Cross-Species Conservation of Metal Gene Families¶
In [3]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Panel A: Distribution of family breadth
ax = axes[0]
breadth = conserved_fam['n_organisms_any']
ax.hist(breadth, bins=range(1, breadth.max()+2), color='#2ecc71',
alpha=0.8, edgecolor='black', linewidth=0.5, align='left')
ax.set_xlabel('# Organisms with Metal Phenotype')
ax.set_ylabel('# Gene Families')
ax.set_title('A. Metal Gene Family Breadth')
n_conserved = (breadth >= 3).sum()
ax.text(0.95, 0.95, f'{n_conserved} families in ≥3 organisms',
transform=ax.transAxes, ha='right', va='top', fontsize=10,
bbox=dict(boxstyle='round', facecolor='lightyellow'))
# Panel B: Novel vs annotated
ax = axes[1]
n_annotated = len(conserved_fam) - len(novel)
n_novel_val = len(novel)
ax.bar(['Annotated', 'Hypothetical\n(novel candidates)'],
[n_annotated, n_novel_val],
color=['#3498db', '#e74c3c'], edgecolor='black', linewidth=0.5)
ax.set_ylabel('# Conserved Metal Gene Families')
ax.set_title('B. Annotated vs Novel Metal Gene Families')
total = n_annotated + n_novel_val
ax.text(0, n_annotated + 5, f'{n_annotated}\n({100*n_annotated/total:.0f}%)',
ha='center', fontsize=11)
ax.text(1, n_novel_val + 5, f'{n_novel_val}\n({100*n_novel_val/total:.0f}%)',
ha='center', fontsize=11)
plt.suptitle('Conserved Metal Fitness Gene Families', fontsize=14, y=1.02)
plt.tight_layout()
fig.savefig(FIGURES_DIR / 'summary_metal_families.png', dpi=150, bbox_inches='tight')
plt.show()
print(f'Saved: figures/summary_metal_families.png')
Saved: figures/summary_metal_families.png
In [4]:
print('=' * 80)
print('NB07 SUMMARY: All Figures Generated')
print('=' * 80)
print(f'\nAll figures in {FIGURES_DIR}:')
for f in sorted(FIGURES_DIR.glob('*.png')):
print(f' {f.name}')
print(f'\nAll data files in {DATA_DIR}:')
for f in sorted(DATA_DIR.glob('*.csv')):
n_rows = len(pd.read_csv(f))
print(f' {f.name:45s} {n_rows:>8,} rows')
print('=' * 80)
================================================================================ NB07 SUMMARY: All Figures Generated ================================================================================ All figures in /home/psdehal/pangenome_science/BERIL-research-observatory/projects/metal_fitness_atlas/figures: bioleaching_species_scores.png core_fraction_by_metal.png metal_conservation_by_organism.png metal_family_conservation_heatmap.png metal_fitness_distributions.png metal_important_genes_by_organism.png metal_module_activity_heatmap.png organism_metal_matrix.png species_metal_score_distribution.png summary_atlas_overview.png summary_metal_families.png All data files in /home/psdehal/pangenome_science/BERIL-research-observatory/projects/metal_fitness_atlas/data: conserved_metal_families.csv 1,182 rows metal_conservation_stats.csv 14 rows metal_experiments.csv 559 rows metal_experiments_analysis.csv 379 rows
metal_fitness_scores.csv 383,349 rows metal_functional_signature.csv 1,287 rows metal_important_genes.csv 12,838 rows metal_module_conservation.csv 183 rows metal_modules.csv 19,453 rows novel_metal_candidates.csv 149 rows organism_conservation_stats.csv 22 rows sensitivity_analysis.csv 2 rows species_metal_scores.csv 27,702 rows ================================================================================