import cptac
en = cptac.Ucec()

en.list_data_sources()

#Some data may take several minutes to load
en.get_proteomics('umich')
en.get_phosphoproteomics('umich')
en.get_CNV('washu')

cptac warning: Your version of cptac (1.5.1) is out-of-date. Latest is 1.5.0. Please run 'pip install --upgrade cptac' to update it. (C:\Users\sabme\anaconda3\lib\threading.py, line 910)

# Display first 5 rows of phosphoproteomics data
en.get_phosphoproteomics('umich').head()

phospho_and_CNV = en.join_omics_to_omics(df1_name="CNV", df2_name="phosphoproteomics", df1_source='washu', df2_source = 'umich')
phospho_and_CNV.head()

prot = en.get_proteomics('umich')
all_levels_selection = prot["ARF5"]

#Display the first 10 rows of the desired data
all_levels_selection.head(10)

gene1_filter = prot.columns.get_level_values("Name").str.startswith("A") # Select all columns where the gene starts with "A". This will grab every column where the key "Name" starts with AA
gene1_data = prot.loc[:, gene1_filter]
gene1_data.head()

y_site_filter = prot.columns.get_level_values("Database_ID").str.contains("ENSp") # Create a boolean filter selecting all columns where the Site level contains a "Y"

y_sites = prot.loc[:, y_site_filter] # Select the columns
y_sites.head()

import cptac.utils as ut

ut.reduce_multiindex(df=prot, levels_to_drop="Database_ID").head()

cptac warning: Due to dropping the specified levels, dataframe now has 1299 duplicated column headers. (C:\Users\sabme\AppData\Local\Temp\ipykernel_21892\2675409348.py, line 1)

colon = cptac.Coad()
prot = colon.get_proteomics('umich')
prot.head()

# Drop level 'Name'
ut.reduce_multiindex(df=prot, levels_to_drop='Name').head()
#You can also pass a list in order to drop multiple levels

ut.reduce_multiindex(df=prot, flatten=True).head()

phospho_and_CNV = en.join_omics_to_omics(df1_name="CNV", df2_name="phosphoproteomics", df1_source = 'washu', df2_source = 'umich')
phospho_and_CNV.head()

# Note that the CNV columns all have empty strings in the "Site" level of the columns,
# since the CNV data doesn't have any values for that.

ut.reduce_multiindex(df=phospho_and_CNV, flatten=True).head()
# Notice that the empty strings have been dropped

ut.reduce_multiindex(df=prot, tuples=True).head()

ut.reduce_multiindex(df=prot, levels_to_drop="Name").head()

# No warning will be issued
ut.reduce_multiindex(df=prot, levels_to_drop="Name", quiet=True).head()

	Data type	Available sources
0	CNV	[bcm, washu]
1	circular_RNA	[bcm]
2	miRNA	[bcm, washu]
3	proteomics	[bcm, umich]
4	transcriptomics	[bcm, broad, washu]
5	ancestry_prediction	[harmonized]
6	somatic_mutation	[harmonized, washu]
7	clinical	[mssm]
8	follow-up	[mssm]
9	medical_history	[mssm]
10	acetylproteomics	[umich]
11	phosphoproteomics	[umich]
12	cibersort	[washu]
13	hla_typing	[washu]
14	tumor_purity	[washu]
15	xcell	[washu]

Name	A1BG	A1CF	A2M	A2ML1	A3GALT2	A4GALT	A4GNT	AAAS	AACS	AADAC	...	ZW10	ZWILCH	ZWINT	ZXDC	ZYG11A	ZYG11B	ZYX	ZZEF1	ZZZ3	pk
Database_ID	ENSG00000121410.10	ENSG00000148584.13	ENSG00000175899.13	ENSG00000166535.18	ENSG00000184389.9	ENSG00000128274.14	ENSG00000118017.3	ENSG00000094914.11	ENSG00000081760.15	ENSG00000114771.12	...	ENSG00000086827.7	ENSG00000174442.10	ENSG00000122952.15	ENSG00000070476.13	ENSG00000203995.8	ENSG00000162378.11	ENSG00000159840.14	ENSG00000074755.13	ENSG00000036549.11	ENSG00000091436.15
Patient_ID
C3L-00006	-0.00659	-0.01982	-0.01402	-0.01402	-0.01418	-0.00839	-0.01305	-0.01402	-0.01402	-0.01305	...	-0.01641	-0.00963	-0.01982	-0.01305	-0.01418	-0.01418	-0.01897	-0.00529	-0.01418	-0.01480
C3L-00008	0.02578	0.00726	0.01350	0.01350	0.00732	0.01642	0.01005	0.01225	0.01225	0.01005	...	0.01583	0.01844	0.00726	0.01005	0.00732	0.00732	0.01200	0.01969	0.00732	0.01121
C3L-00032	0.01262	0.00425	-0.00275	-0.00275	0.00166	0.00549	-0.00038	-0.00275	-0.00275	-0.00038	...	-0.00305	0.00214	0.00425	-0.00038	0.00166	0.00166	0.01408	0.00683	0.00166	0.00208
C3L-00090	0.00100	0.41191	-0.02299	-0.02299	-0.02436	-0.01198	-0.03307	-0.02299	-0.02299	-0.03307	...	-0.01982	-0.02071	0.41191	-0.61621	-0.02436	-0.02436	-0.02182	-0.00336	-0.02436	-0.02548
C3L-00098	1.01075	0.27221	-0.39802	-0.39802	0.00226	0.31684	0.31108	-0.38507	-0.41089	0.31108	...	-0.39843	-0.38591	0.27221	0.31108	0.01711	0.01711	-0.01434	-0.34344	-0.01427	0.53267
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
C3N-01520	-0.05661	-0.06508	-0.06174	-0.06174	-0.05318	-0.05054	-0.06413	-0.06174	-0.06174	-0.06413	...	-0.05110	-0.05228	-0.06508	-0.06413	-0.05318	-0.05318	0.37759	-0.04914	-0.05318	-0.05533
C3N-01521	-0.36477	-0.00244	-0.06953	-0.06953	0.38241	-0.34150	0.31502	-0.06953	-0.04038	0.31502	...	-0.05879	-0.19572	-0.00244	0.79082	0.38241	0.38241	0.58054	-0.34785	0.38241	0.00842
C3N-01537	0.09203	0.00535	0.08807	0.08807	-0.19341	-0.29275	0.08943	0.08807	0.07883	0.08943	...	-0.11330	0.07121	0.00535	0.08943	-0.10289	-0.10289	-0.01151	-0.30164	-0.10289	0.02389
C3N-01802	-0.06298	-0.04134	0.05070	0.05070	-0.00420	-0.18427	0.17981	0.08711	-0.12670	0.17981	...	0.14454	0.05509	-0.04134	0.17981	-0.12128	-0.12128	-0.06015	0.14747	-0.13738	-0.01938
C3N-01825	0.12974	0.03784	0.11400	0.11400	0.04662	0.01662	0.13939	0.11400	-0.00039	0.13939	...	0.02229	-0.00842	0.03784	0.13939	0.04662	0.04662	0.03222	-0.02923	0.04662	0.02880

Name	ARF5	M6PR	ESRRA								...	SCRIB		TSGA10						SVIL
Site	S137	S267	S19	S22	S19S22	T31	S19S22	S19S22S27	S19S22T31	S27	...	S1575T1588S1594	S1594	S11	S173	S213	S391	S779	S101	S296	S459
Peptide	QDMPNAMPVsELTDK	GVGDDQLGEEsEERDDHLLPM	AEPAsPDSPK	AEPASPDsPK	AEPAsPDsPK	AEPASPDSPKGSSETEtEPPVALAPGPAPTR	AEPAsPDsPKGSSETETEPPVALAPGPAPTR	AEPAsPDsPKGSsETETEPPVALAPGPAPTR	AEPAsPDsPKGSSETEtEPPVALAPGPAPTR	GSsETETEPPVALAPGPAPTR	...	LAEAPSPAPTPsPTPVEDLGPQTStSPGRLsPDFAEELR	LsPDFAEELR	sPGRDPELQVEAAEVTTK	sPSRLDSFVK	RPsPTAR	AMDTEsELGR	GLDRsLEENLCYR;GLDRsLEENLCYRDF	EVVSSQVDDLTsHNEHLCK	DSEGDTPsLINWPSSK	LPsPTVAR
Database_ID	ENSP00000000233.5	ENSP00000000412.3	ENSP00000000442.6	ENSP00000000442.6	ENSP00000000442.6	ENSP00000000442.6	ENSP00000000442.6	ENSP00000000442.6	ENSP00000000442.6	ENSP00000000442.6	...	ENSP00000501177.1	ENSP00000501177.1	ENSP00000501312.1	ENSP00000501312.1	ENSP00000501312.1	ENSP00000501312.1	ENSP00000501312.1	ENSP00000501312.1	ENSP00000501521.1	ENSP00000501521.1
Patient_ID
C3L-00006	NaN	0.573633	NaN	NaN	0.304721	NaN	NaN	NaN	NaN	NaN	...	NaN	0.667426	NaN	0.905606	NaN	-0.069911	-0.584774	NaN	-0.561657	-0.652457
C3L-00008	0.003632	-0.393734	NaN	NaN	0.789193	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	-0.488427	NaN	NaN	NaN	NaN	-0.431599	-1.079638
C3L-00032	NaN	-0.211020	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.131605	...	NaN	0.104862	NaN	NaN	NaN	NaN	NaN	NaN	NaN	-1.439041
C3L-00084	NaN	0.220473	NaN	NaN	-0.290506	NaN	NaN	NaN	NaN	NaN	...	NaN	0.399718	-0.875016	-0.579824	NaN	NaN	-0.505807	NaN	NaN	-1.521725
C3L-00090	NaN	0.161496	NaN	NaN	0.708453	NaN	0.405402	1.253045	NaN	0.265813	...	NaN	1.069439	NaN	0.510268	-1.889144	NaN	-0.592203	NaN	NaN	-1.126482

Name	A1BG_washu_CNV	A1CF_washu_CNV	A2M_washu_CNV	A2ML1_washu_CNV	A3GALT2_washu_CNV	A4GALT_washu_CNV	A4GNT_washu_CNV	AAAS_washu_CNV	AACS_washu_CNV	AADAC_washu_CNV	...	SCRIB_umich_phosphoproteomics		TSGA10_umich_phosphoproteomics						SVIL_umich_phosphoproteomics
Site											...	S1575T1588S1594	S1594	S11	S173	S213	S391	S779	S101	S296	S459
Peptide											...	LAEAPSPAPTPsPTPVEDLGPQTStSPGRLsPDFAEELR	LsPDFAEELR	sPGRDPELQVEAAEVTTK	sPSRLDSFVK	RPsPTAR	AMDTEsELGR	GLDRsLEENLCYR;GLDRsLEENLCYRDF	EVVSSQVDDLTsHNEHLCK	DSEGDTPsLINWPSSK	LPsPTVAR
Patient_ID
C3L-00006	-0.00659	-0.01982	-0.01402	-0.01402	-0.01418	-0.00839	-0.01305	-0.01402	-0.01402	-0.01305	...	NaN	0.667426	NaN	0.905606	NaN	-0.069911	-0.584774	NaN	-0.561657	-0.652457
C3L-00008	0.02578	0.00726	0.01350	0.01350	0.00732	0.01642	0.01005	0.01225	0.01225	0.01005	...	NaN	NaN	NaN	-0.488427	NaN	NaN	NaN	NaN	-0.431599	-1.079638
C3L-00032	0.01262	0.00425	-0.00275	-0.00275	0.00166	0.00549	-0.00038	-0.00275	-0.00275	-0.00038	...	NaN	0.104862	NaN	NaN	NaN	NaN	NaN	NaN	NaN	-1.439041
C3L-00084	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	0.399718	-0.875016	-0.579824	NaN	NaN	-0.505807	NaN	NaN	-1.521725
C3L-00090	0.00100	0.41191	-0.02299	-0.02299	-0.02436	-0.01198	-0.03307	-0.02299	-0.02299	-0.03307	...	NaN	1.069439	NaN	0.510268	-1.889144	NaN	-0.592203	NaN	NaN	-1.126482

Database_ID	ENSP00000000233.5
Patient_ID
C3L-00006	-0.056513
C3L-00008	0.549959
C3L-00032	0.088681
C3L-00084	-0.846555
C3L-00090	0.539019
C3L-00098	-0.017370
C3L-00136	0.230347
C3L-00137	0.191915
C3L-00139	-0.410142
C3L-00143	-0.170514

Tutorial 4: Multi-level column indices (`MultiIndex`)¶

Join functions with multiindices¶

How to select from multiindex¶

Selecting based on all levels¶

Selecting based on one level¶

Selecting based on a different level of the multiindex¶

How to use `cptac.utils.reduce_multiindex()`¶

Dropping Levels¶

Dropping by index or name¶

Dropping single or multiple levels at once¶

Combining levels (Flattening)¶

Getting a single level index of tuples¶

Turning off warnings¶

Name	ARF5	AKAP11	ARHGEF5	APPBP2	AQR	ACAP1	ANO8	AP3M2	ASNS	ALDH3A2	...	ABCC2	ANKS1A	AL034430.2	AP5Z1	ATP8B1	AKR1B15	AC004706.3	ATAD3B	ANK2
Database_ID	ENSP00000000233.5	ENSP00000025301.2	ENSP00000056217.5	ENSP00000083182.3	ENSP00000156471.5	ENSP00000158762.3	ENSP00000159087.4	ENSP00000174653.3	ENSP00000175506.4	ENSP00000176643.6	...	ENSP00000497274.1	ENSP00000497393.1	ENSP00000497510.1	ENSP00000497815.1	ENSP00000497896.1	ENSP00000498877.1	ENSP00000499350.1	ENSP00000500094.1	ENSP00000500102.1	ENSP00000500937.1
Patient_ID
C3L-00006	-0.056513	-0.385278	0.188877	-0.059319	0.276154	-0.252270	1.280740	0.086567	0.334008	1.048464	...	NaN	0.454359	1.346643	-0.186762	-0.361594	NaN	NaN	NaN	NaN	NaN
C3L-00008	0.549959	-0.491451	0.277281	0.225857	0.400321	-0.485365	NaN	-0.544367	1.634042	-0.848812	...	NaN	0.438931	0.250021	0.005658	1.065706	-0.310341	0.060549	NaN	-0.62873	NaN
C3L-00032	0.088681	0.203899	0.261918	0.192734	-0.244333	0.169655	NaN	0.223638	0.358561	-0.314030	...	NaN	NaN	0.411541	0.043151	0.461451	NaN	NaN	0.300528	NaN	NaN
C3L-00084	-0.846555	-0.286751	-0.468015	0.249142	0.013797	-0.606966	-0.303256	-0.398076	1.017079	-0.385280	...	NaN	1.423702	-0.524652	0.111429	0.172027	NaN	NaN	0.102475	NaN	NaN
C3L-00090	0.539019	-0.098589	0.605331	0.571185	0.178541	-0.567123	NaN	0.053186	0.390269	1.059128	...	-0.737756	NaN	0.580644	-0.108808	0.429643	-0.218494	NaN	0.314156	NaN	NaN

Database_ID	ENSP00000000233.5	ENSP00000000412.3	ENSP00000000442.6	ENSP00000001008.4	ENSP00000002125.4	ENSP00000002165.5	ENSP00000003084.6	ENSP00000003100.8	ENSP00000003302.4	ENSP00000004103.3	...	ENSP00000499339.1	ENSP00000499757.1	ENSP00000499778.1	ENSP00000499869.1	ENSP00000499937.1	ENSP00000500094.1	ENSP00000500633.1	ENSP00000500710.1	ENSP00000501300.1	ENSP00000501491.1
Patient_ID
01CO005	-0.203037	-0.223341	-0.283633	-0.612614	0.514855	-0.824026	NaN	0.045383	NaN	-0.248511	...	NaN	NaN	-0.042548	NaN	NaN	0.925011	-0.173468	-0.180521	0.139707	-0.882283
01CO006	0.188931	0.544620	NaN	-0.571640	-0.209734	0.799090	NaN	-0.338493	-0.042567	NaN	...	-0.411664	-0.454109	-0.725892	NaN	NaN	-0.707588	-0.846624	0.329813	-0.311147	-0.446358
01CO008	0.404810	-0.246523	-0.053940	0.252995	0.190861	0.101419	-0.502876	0.627060	0.089815	-0.106411	...	0.192279	-0.558236	-0.093708	-1.874293	-0.248307	-0.899186	-0.526260	0.668713	0.109366	-1.125296
01CO013	-0.276982	-0.017659	NaN	-0.455055	0.500686	-0.350366	NaN	0.263168	0.683830	NaN	...	0.220231	NaN	0.241860	-3.939263	NaN	0.514931	-0.078267	0.122032	0.130764	-1.146911
01CO014	-0.160155	0.100022	0.259696	0.341345	-0.310265	0.095461	-0.745855	1.006614	NaN	NaN	...	-0.198671	0.226146	0.036229	NaN	NaN	1.189468	0.117736	0.586529	-0.006767	-1.106068

Name	ARF5	M6PR	ESRRA	FKBP4	NDUFAF7	FUCA2	DBNDD1	SEMA3F	CFTR	CYP51A1	...	SCRIB	WIZ	BPIFB4	LDB1	WIZ	TSGA10	RFX7	SWSAP1	MSANTD2	SVIL
Patient_ID
C3L-00006	-0.056513	0.016557	0.002569	0.389819	0.603610	-0.332543	-0.790426	NaN	0.822732	0.039134	...	0.161720	-0.884807	NaN	0.268247	0.125392	-0.880833	0.108554	0.107413	-0.085833	NaN
C3L-00008	0.549959	-0.206129	0.905784	-0.303631	0.018767	0.503513	0.950955	0.080142	NaN	-0.063213	...	NaN	0.054284	NaN	-0.106450	0.380557	-0.756099	0.264611	0.044423	-0.248319	-1.206596
C3L-00032	0.088681	-0.154447	-0.190515	0.170753	0.196356	0.544194	-0.179078	NaN	NaN	0.377405	...	-1.086905	0.055991	NaN	-0.021986	-0.229645	1.923986	NaN	-0.176694	-0.332384	-1.330653
C3L-00084	-0.846555	0.027740	NaN	0.178700	0.264054	-0.183548	0.077215	-0.247164	0.152277	-0.279549	...	-0.125796	0.944212	NaN	0.917409	0.026862	-0.885976	-0.006510	-0.014162	0.365158	NaN
C3L-00090	0.539019	0.956619	-0.039516	0.323656	0.064605	0.173433	-0.524325	-0.038590	-0.311486	0.309905	...	0.853362	-0.716947	NaN	-0.286277	-0.046076	0.089645	-0.444506	-0.072531	-0.463495	NaN

Tutorial 4: Multi-level column indices (MultiIndex)¶

Join functions with multiindices¶

How to select from multiindex¶

Selecting based on all levels¶

Selecting based on one level¶

Selecting based on a different level of the multiindex¶

How to use cptac.utils.reduce_multiindex()¶

Dropping Levels¶

Dropping by index or name¶

Dropping single or multiple levels at once¶

Combining levels (Flattening)¶

Getting a single level index of tuples¶

Turning off warnings¶

Tutorial 4: Multi-level column indices (`MultiIndex`)¶

How to use `cptac.utils.reduce_multiindex()`¶