import pandas as pd
import numpy as np
import matplotlib
import plotly
matplotlib.use('Agg')
import matplotlib.pyplot as plt
pson_expr_tpm_df1 = pd.read_csv('/home/alex/pson_expr_tpm_df.csv')
pson_expr_tpm_symbol_df = pd.read_csv('/home/alex/pson_expr_tpm_symbol_df.csv')
cell_speeds_df = pd.read_csv('/home/alex/cell_speeds_df.csv')
pson_expr_tpm_symbol_df.head()
## symbol mRNA_R17 mRNA_R21 ... mRNA_R60 mRNA_R58 mRNA_R57
## 0 TSPAN6 33.56 45.10 ... 9.38 14.81 9.84
## 1 TNMD 0.00 0.00 ... 0.00 0.00 0.00
## 2 DPM1 169.46 129.88 ... 85.66 100.57 70.69
## 3 SCYL3 1.85 1.85 ... 3.49 4.09 4.40
## 4 C1orf112 5.73 11.85 ... 13.37 19.29 12.16
##
## [5 rows x 64 columns]
pson_expr_tpm_df = pson_expr_tpm_df1.drop(columns = ["gene_id"])
pson_expr_tpm_symbol_df = pson_expr_tpm_symbol_df.set_index("symbol")
pson_expr_tpm_symbol_df.head()
## mRNA_R17 mRNA_R21 mRNA_R20 ... mRNA_R60 mRNA_R58 mRNA_R57
## symbol ...
## TSPAN6 33.56 45.10 39.42 ... 9.38 14.81 9.84
## TNMD 0.00 0.00 0.00 ... 0.00 0.00 0.00
## DPM1 169.46 129.88 132.06 ... 85.66 100.57 70.69
## SCYL3 1.85 1.85 1.77 ... 3.49 4.09 4.40
## C1orf112 5.73 11.85 10.16 ... 13.37 19.29 12.16
##
## [5 rows x 63 columns]
df3 = pd.DataFrame(pson_expr_tpm_df)
df = round(df3, 1)
dfadd = df + 1
pson_logtpm = np.log2(dfadd)
df4 = pd.DataFrame(pson_expr_tpm_symbol_df)
df1=round(df4, 1)
dfadd1 = df1 + 1
pson_logtpm_symbol = np.log2(dfadd1)
pson_logtpm_symbol.head()
## mRNA_R17 mRNA_R21 mRNA_R20 ... mRNA_R60 mRNA_R58 mRNA_R57
## symbol ...
## TSPAN6 5.112700 5.526695 5.336283 ... 3.378512 3.981853 3.432959
## TNMD 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000
## DPM1 7.413628 7.032321 7.056367 ... 6.437960 6.666757 6.163901
## SCYL3 1.485427 1.485427 1.485427 ... 2.169925 2.350497 2.432959
## C1orf112 2.744161 3.678072 3.485427 ... 3.847997 4.343408 3.722466
##
## [5 rows x 63 columns]
hyal_coll_df = cell_speeds_df[(cell_speeds_df["experimentalCondition"] == "HyaluronicAcid Collagen")]
hyal_brca_df = hyal_coll_df[(hyal_coll_df["diagnosis"] == "Breast Cancer")]
hyal_brca_df
## sample summary_metric ... diagnosis cellLine
## 42 mRNA_R56 speed_um_hr ... Breast Cancer T-47D
## 56 mRNA_R63 speed_um_hr ... Breast Cancer MDA-MB-231
##
## [2 rows x 7 columns]
hyal_brca_logtpm = pson_logtpm.iloc[0:18682, [43, 57]]
hyal_brca_logtpm_symbol = pson_logtpm_symbol.iloc[0:18682, [43, 57]]
hyal_brca_logtpm.columns = ["slow", "fast"]
hyal_brca_logtpm_symbol.columns = ["slow", "fast"]
hyal_brca_logtpm_symbol
## slow fast
## symbol
## TSPAN6 4.224966 4.061776
## TNMD 0.000000 0.000000
## DPM1 6.960002 6.738768
## SCYL3 3.292782 2.632268
## C1orf112 3.498251 4.095924
## ... ... ...
## MUC8 0.137504 0.000000
## ZIM2 0.000000 0.000000
## GOLGA7B 0.137504 0.000000
## AC012313.1 0.678072 0.000000
## EGLN2 5.892391 5.419539
##
## [18682 rows x 2 columns]
x = np.linspace(0,16,100)
y = x
plt.scatter(hyal_brca_logtpm_symbol["slow"], hyal_brca_logtpm_symbol["fast"], c = "black")
plt.plot(x, y, color = "red")
plt.ylabel("Log expression in slow cell line")
plt.xlabel("Log expression in fast cell line")

dge = hyal_brca_logtpm["fast"] - hyal_brca_logtpm["slow"]
dge = pd.DataFrame(dge)
DGE = pd.concat([hyal_brca_logtpm, dge], axis = 1)
#DGE_symbol = pd.concat([hyal_brca_logtpm_symbol, dge], axis = 1)
#DGE_symbol
order_dge = dge.sort_values(0, ascending = False)
order_dge = order_dge.index
DGE = DGE.iloc[order_dge, ]
DGE.columns = ["slow", "fast", "dge"]
#DGE_symbol.head()
plt.hist(DGE["dge"])
## (array([2.0000e+00, 7.0000e+00, 4.6000e+01, 2.4200e+02, 1.4450e+03,
## 1.3654e+04, 2.7520e+03, 4.3400e+02, 8.2000e+01, 1.8000e+01]), array([-12.33171007, -10.128922 , -7.92613393, -5.72334586,
## -3.52055779, -1.31776972, 0.88501836, 3.08780643,
## 5.2905945 , 7.49338257, 9.69617064]), <BarContainer object of 10 artists>)
plt.title("Histogram of dge values")
plt.xlabel("Differential gene expression, dge")
plt.show()

cutoff = 4
genelist = DGE["dge"]
#genelist_symbol = DGE_symbol["dge"]
genesfast = genelist[genelist > cutoff]
genesslow = genelist[genelist < -cutoff]
len(genesfast)
## 289
len(genesslow)
#genelist_symbol.head()
## 207