import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import cptac
en = cptac.Ucec() # Loading the Endometrial dataset

der_molecular = en.get_derived_molecular(type='ancestry_prediction', source='harmonized')

der_molecular.columns

Index(['cancer_type', 'self_reported_race', 'self_reported_ethnicity',
       'self_reported_ethnicity_race_ancestry_identified',
       'self_reported_participant_country', 'washU_pred_ancestry',
       'mssm_pred_anc', 'consensus_pred_ancestry'],
      dtype='object', name='Name')

der_molecular['washU_pred_ancestry'].unique()

array(['EUR', 'AMR', 'SAS', 'AFR'], dtype=object)

joined_data = en.join_metadata_to_omics(
    metadata_name="ancestry_prediction",
    metadata_source="harmonized",
    metadata_cols='washU_pred_ancestry',
    omics_name="proteomics",
    omics_source="umich"
)

macrophages_boxplot = sns.boxplot(x='washU_pred_ancestry', y='JAK1_umich_proteomics', data=joined_data, showfliers=False)
macrophages_boxplot = sns.stripplot(x='washU_pred_ancestry', y='JAK1_umich_proteomics', data=joined_data, color = '.3')
plt.show()

macrophages_histogram = sns.FacetGrid(joined_data[['washU_pred_ancestry', 'JAK1_umich_proteomics']], hue="washU_pred_ancestry", 
                              legend_out=False, aspect=3)
macrophages_histogram = macrophages_histogram.map(sns.kdeplot, "JAK1_umich_proteomics").add_legend(title="washU_pred_ancestry")
macrophages_histogram.set(ylabel='Proportion')
plt.show()

cptac warning: Your version of cptac (1.5.1) is out-of-date. Latest is 1.5.0. Please run 'pip install --upgrade cptac' to update it. (C:\Users\sabme\anaconda3\lib\threading.py, line 910)
C:\Users\sabme\anaconda3\lib\site-packages\seaborn\axisgrid.py:848: UserWarning: Dataset has 0 variance; skipping density estimate. Pass `warn_singular=False` to disable this warning.
  func(*plot_args, **plot_kwargs)

Use Case 6: Comparing Derived Molecular Data with Proteomics¶

Step 1: Importing packages¶

Step 2: Retrieving data and selecting attributes¶

Step 3: Joining dataframes¶

Step 4: Plotting the data¶