import pandas as pd
import numpy as np
import matplotlib
import plotly
matplotlib.use('Agg')
import matplotlib.pyplot as plt
pson_expr_tpm_df1 = pd.read_csv('/home/alex/pson_expr_tpm_df.csv')
pson_expr_tpm_symbol_df = pd.read_csv('/home/alex/pson_expr_tpm_symbol_df.csv')
cell_speeds_df = pd.read_csv('/home/alex/cell_speeds_df.csv')
pson_expr_tpm_df1.head()
## gene_id mRNA_R17 mRNA_R21 ... mRNA_R60 mRNA_R58 mRNA_R57
## 0 ENSG00000000003 33.56 45.10 ... 9.38 14.81 9.84
## 1 ENSG00000000005 0.00 0.00 ... 0.00 0.00 0.00
## 2 ENSG00000000419 169.46 129.88 ... 85.66 100.57 70.69
## 3 ENSG00000000457 1.85 1.85 ... 3.49 4.09 4.40
## 4 ENSG00000000460 5.73 11.85 ... 13.37 19.29 12.16
##
## [5 rows x 64 columns]
pson_expr_tpm_symbol_df.head()
## symbol mRNA_R17 mRNA_R21 ... mRNA_R60 mRNA_R58 mRNA_R57
## 0 TSPAN6 33.56 45.10 ... 9.38 14.81 9.84
## 1 TNMD 0.00 0.00 ... 0.00 0.00 0.00
## 2 DPM1 169.46 129.88 ... 85.66 100.57 70.69
## 3 SCYL3 1.85 1.85 ... 3.49 4.09 4.40
## 4 C1orf112 5.73 11.85 ... 13.37 19.29 12.16
##
## [5 rows x 64 columns]
cell_speeds_df.head()
## sample summary_metric ... diagnosis cellLine
## 0 mRNA_R17 speed_um_hr ... Colon Cancer SW620
## 1 mRNA_R21 speed_um_hr ... Colon Cancer SW620
## 2 mRNA_R20 speed_um_hr ... Colon Cancer SW620
## 3 mRNA_R19 speed_um_hr ... Colon Cancer SW620
## 4 mRNA_R18 speed_um_hr ... Colon Cancer SW620
##
## [5 rows x 7 columns]
np.unique(cell_speeds_df["cellLine"])
## array(['22Rv1', 'A375', 'MDA-MB-231', 'RWPE-1', 'SW480', 'SW620', 'T-47D',
## 'T98G', 'U-87'], dtype=object)
np.unique(cell_speeds_df["diagnosis"])
## array(['Brain Cancer', 'Breast Cancer', 'Colon Cancer', 'Not Applicable',
## 'Prostate Cancer', 'Skin Cancer'], dtype=object)
dt = cell_speeds_df[["cellLine", "diagnosis"]]
np.unique(dt)
## array(['22Rv1', 'A375', 'Brain Cancer', 'Breast Cancer', 'Colon Cancer',
## 'MDA-MB-231', 'Not Applicable', 'Prostate Cancer', 'RWPE-1',
## 'SW480', 'SW620', 'Skin Cancer', 'T-47D', 'T98G', 'U-87'],
## dtype=object)
pson_expr_tpm_df = pson_expr_tpm_df1.drop(columns = ["gene_id"])
pson_expr_tpm_symbol_df = pson_expr_tpm_symbol_df.set_index("symbol")
pson_expr_tpm_symbol_df.head()
## mRNA_R17 mRNA_R21 mRNA_R20 ... mRNA_R60 mRNA_R58 mRNA_R57
## symbol ...
## TSPAN6 33.56 45.10 39.42 ... 9.38 14.81 9.84
## TNMD 0.00 0.00 0.00 ... 0.00 0.00 0.00
## DPM1 169.46 129.88 132.06 ... 85.66 100.57 70.69
## SCYL3 1.85 1.85 1.77 ... 3.49 4.09 4.40
## C1orf112 5.73 11.85 10.16 ... 13.37 19.29 12.16
##
## [5 rows x 63 columns]
pson_expr_tpm_symbol_df.sum(0)
## mRNA_R17 925249.15
## mRNA_R21 939584.77
## mRNA_R20 936327.99
## mRNA_R19 939342.91
## mRNA_R18 937213.35
## ...
## mRNA_R62 950446.64
## mRNA_R61 951255.04
## mRNA_R60 946777.25
## mRNA_R58 951215.83
## mRNA_R57 949333.95
## Length: 63, dtype: float64
df3 = pd.DataFrame(pson_expr_tpm_df)
df = round(df3, 1)
dfadd = df + 1
pson_logtpm = np.log2(dfadd)
df4 = pd.DataFrame(pson_expr_tpm_symbol_df)
df1=round(df4, 1)
dfadd1 = df1 + 1
pson_logtpm_symbol = np.log2(dfadd1)
pson_logtpm_symbol.head()
## mRNA_R17 mRNA_R21 mRNA_R20 ... mRNA_R60 mRNA_R58 mRNA_R57
## symbol ...
## TSPAN6 5.112700 5.526695 5.336283 ... 3.378512 3.981853 3.432959
## TNMD 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000
## DPM1 7.413628 7.032321 7.056367 ... 6.437960 6.666757 6.163901
## SCYL3 1.485427 1.485427 1.485427 ... 2.169925 2.350497 2.432959
## C1orf112 2.744161 3.678072 3.485427 ... 3.847997 4.343408 3.722466
##
## [5 rows x 63 columns]
np.unique(cell_speeds_df["experimentalCondition"])
## array(['30 kPa polyacrylamide Collagen',
## '30 kPa polyacrylamide Fibronectin',
## '500 Pa polyacrylamide Collagen',
## '500 Pa polyacrylamide Fibronectin', 'Glass',
## 'HyaluronicAcid Collagen', 'HyaluronicAcid Fibronectin'],
## dtype=object)
hyal_coll_df = cell_speeds_df[(cell_speeds_df["experimentalCondition"] == "HyaluronicAcid Collagen")]
hyal_coll_df
## sample summary_metric ... diagnosis cellLine
## 1 mRNA_R21 speed_um_hr ... Colon Cancer SW620
## 7 mRNA_R42 speed_um_hr ... Colon Cancer SW480
## 14 mRNA_R28 speed_um_hr ... Not Applicable RWPE-1
## 21 mRNA_R49 speed_um_hr ... Skin Cancer A375
## 28 mRNA_R14 speed_um_hr ... Brain Cancer T98G
## 35 mRNA_R7 speed_um_hr ... Prostate Cancer 22Rv1
## 42 mRNA_R56 speed_um_hr ... Breast Cancer T-47D
## 49 mRNA_R35 speed_um_hr ... Brain Cancer U-87
## 56 mRNA_R63 speed_um_hr ... Breast Cancer MDA-MB-231
##
## [9 rows x 7 columns]
hyal_brca_df = hyal_coll_df[(hyal_coll_df["diagnosis"] == "Breast Cancer")]
hyal_brca_df.head()
## sample summary_metric ... diagnosis cellLine
## 42 mRNA_R56 speed_um_hr ... Breast Cancer T-47D
## 56 mRNA_R63 speed_um_hr ... Breast Cancer MDA-MB-231
##
## [2 rows x 7 columns]
hyal_brca_logtpm = pson_logtpm.iloc[0:18682, [43, 57]]
hyal_brca_logtpm_symbol = pson_logtpm_symbol.iloc[0:18682, [43, 57]]
hyal_brca_logtpm.columns = ["slow", "fast"]
hyal_brca_logtpm_symbol.columns = ["slow", "fast"]
hyal_brca_logtpm_symbol.head()
## slow fast
## symbol
## TSPAN6 4.224966 4.061776
## TNMD 0.000000 0.000000
## DPM1 6.960002 6.738768
## SCYL3 3.292782 2.632268
## C1orf112 3.498251 4.095924
dge = hyal_brca_logtpm["fast"] - hyal_brca_logtpm["slow"]
dge = pd.DataFrame(dge)
DGE = pd.concat([hyal_brca_logtpm, dge], axis = 1)
#DGE_symbol = pd.concat([hyal_brca_logtpm_symbol, dge], axis = 1)
#DGE_symbol
order_dge = dge.sort_values(0, ascending = False)
order_dge = order_dge.index
DGE = DGE.iloc[order_dge, ]
DGE.columns = ["slow", "fast", "dge"]
#DGE_symbol.head()
plt.show(plt.hist(DGE["dge"]))
