import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
from statsmodels.stats.multitest import multipletests
# Ensure plots render correctly in Jupyter Notebook
%matplotlib inline
# Load provided dataset from PubMed source
# 1. At1g71720 Mutant Phenotype Data (Mesophyll-Specific Suppression)
chloroplast_data = {
"Gene_Locus": ["At1g71720"],
"Mutant_Allele": ["SAIL_162_G11"],
"Phenotype": ["Seedling lethal (Mesophyll-specific suppression)"],
"Rescue_Under_High_CO2": ["No"],
"Plastid_Defect": ["Severe grana stacking disruption (Mesophyll only)"],
"Viability_Days": [14]
}
chloroplast_df = pd.DataFrame(chloroplast_data)
# 2. Protein Abundance in Chloroplast-Defective Mutants
protein_data = {
"AGI_Locus": ["At1g67090", "At2g04030", "At4g09650"],
"Gene_Name": ["rbcL", "Hsp90", "ATP_synthase"],
"Fold_Change_WT_vs_Mutant": [-1.64, 2.41, 0.72],
"Function": ["Rubisco large subunit (Mesophyll-suppressed only)", "Chaperone", "ATP synthase δ-subunit"]
}
protein_df = pd.DataFrame(protein_data)
# 3. C3 Plant Carbon Fixation Parameters (Wild-Type Baseline)
c3_baseline = {
"Parameter": ["Vcmax", "Jmax", "Rd", "CO2_compensation_point"],
"Value": [100, 180, 1.5, 40],
"Description": [
"Max carboxylation rate",
"Max electron transport rate",
"Day respiration",
"Γ* (CO2 comp. point)"
]
}
c3_df = pd.DataFrame(c3_baseline)
# 4. Regulatory Elements for Evolutionary Modeling
regulatory_data = {
"Gene": ["PEP carboxylase", "rbcL"],
"Species": ["Zea mays (C4)", "Oryza sativa (C3)"],
"Cell_Specificity": ["Mesophyll", "Bundle Sheath"],
"ACR_Count": [3, 1]
}
regulatory_df = pd.DataFrame(regulatory_data)
# Display sample data
display(chloroplast_df.head())
display(protein_df.head())
display(c3_df.head())
display(regulatory_df.head())