paquetes
library(knitr)
package 㤼㸱knitr㤼㸲 was built under R version 3.6.3
importo la data ya normalizada y emprolijada
clindata = readRDS('clin_data_lista_2020')
norm_counts = readRDS("norm_counts_lista_2020")
vamos a estudiar los genes x1, x2, x3, Estan todos esos genes en el dataset?
genes.quiero = c('KPNA1','KPNA2','KPNA3','KPNA4','KPNA5','KPNA6','KPNA7', 'KPNB1','RAC1', 'RHOA','CDC42','PGP','ABCB1', 'ABCC1','ABCC2','ABCC3','ABCC4','ABCC5','ABCC6','ABCC10','ABCC11', 'DKC1', 'TP53')
if( all(genes.quiero %in% row.names(norm_counts))){
print('todos los genes estan en el dataset')} else {
faltantes = genes.quiero[which(!genes.quiero %in% row.names(norm_counts))]
print(paste('el o los genes que faltan:', faltantes))
}
[1] "todos los genes estan en el dataset"
graficar
#DEG
genes_int = genes.quiero
genes=c()
pval1=c()
pval2=c()
pval3=c()
pval4=c()
pval5=c()
pval6=c()
exp.norm=c()
exp.l.norm=c()
igual.var = c()
igual.var.l=c()
for (i in 1:length(genes_int)){
gen = genes_int[i]
exp = as.vector(norm_counts[paste(gen),]+1)
exp.l = as.vector(log2(t(norm_counts[paste(gen),]+1)))
positivity = clindata$positivity
datitos = data.frame(positivity, exp, exp.l )
genstat = wilcox.test(exp ~ positivity, paired = FALSE)
ploteo = ggplot(data = datitos, aes(x=positivity, y=exp.l )) +
geom_jitter(aes(shape=positivity, color=positivity), size=3)+
xlab(NULL) +
ylab(paste(gen, "expression \n log2 (norm counts +1)")) +
theme(legend.position = "bottom") +
theme_bw() +
theme(axis.text = element_text(size = 15),
axis.title = element_text(size = 15),
plot.title =element_text(size = 25),
legend.position = 'none') +
stat_summary(fun=mean,
geom="point",
shape= '_',
size=14,
colour= c('#b53d35', '#066e70'))
print(ploteo + ggplot2::annotate("text", x = 1.5, y = max(exp.l), label = paste(round(genstat$p.value, digits = 4)), size = 6))
assign(paste(gen,'ploteo_DGE', sep = '_'), ploteo )
genes[i]=gen
tt = t.test(exp ~ positivity)
pval1[i]=tt$p.value
tt =t.test(exp.l ~ positivity)
pval2[i]=tt$p.value
tt =t.test(exp ~ positivity, var.equal = T)
pval3[i]=tt$p.value
tt =t.test(exp.l ~ positivity, var.equal = T)
pval4[i]=tt$p.value
tt =wilcox.test(exp ~ positivity)
pval5[i]=tt$p.value
tt =wilcox.test(exp.l ~ positivity)
pval6[i]=tt$p.value
nor = shapiro.test(exp) #El p me tiene que dar mayor a 0.05
nor.l = shapiro.test(exp.l)
# 3. LAS DOS POBLACIONES TIENEN LA MISMA VARIANZA?
res.ftest <- var.test(exp ~ positivity) # El p me tiene que dar mayor a 0.05
res.ftest.l <- var.test(exp.l ~ positivity)
exp.norm[i]=nor$p.value>0.05
exp.l.norm[i]=nor.l$p.value>0.05
igual.var[i] = res.ftest$p.value > 0.05
igual.var.l[i]= res.ftest.l$p.value > 0.05
}
# # tabla para correlaciones y edad
#
# exp.gen = as.data.frame(t(norm_counts[genes.quiero,]))
#
#
# tabla = merge(clindata, exp.gen, by = 0)
# #emprolijo
# rownames(tabla)=tabla[,1]
# tabla = tabla[,-1]
# tabla$ct = as.numeric(tabla$ct)
# tabla$age_cat = factor(tabla$age_cat, levels = c("< 30", "30s", '40s', '50s', '60s', '70+'), ordered = TRUE)
#
# # correlac ct
#
# tabla.cor = tabla[tabla$positivity == 'COVID19' & !is.na(tabla$ct),]
# for (i in 1:length(genes.quiero)){
# gen = genes.quiero[i]
# exp = log2(tabla.cor[,gen]+1)
# carga = -tabla.cor$ct
# stat= cor.test(exp, carga, method = "spearman", use = "complete.obs", exact = FALSE)
# datitos = data.frame(exp, carga)
#
# ploteo= ggplot(datitos, aes(x = carga, y =exp)) +
# geom_point(size = 2, na.rm = TRUE, color = '#00BFC4', shape = 17) +
# ylab(paste(gen, "expression \n log2 (norm counts +1)")) +
# xlab( " Viral load (-ct N1)") +
# #ylim(min(exp)-1,max(exp)+4) +
# coord_cartesian(ylim = c(min(exp)-1,max(exp)+4))+
# theme_bw() +
# theme(axis.text = element_text(size = 15),
# axis.title = element_text(size = 15),
# plot.title =element_text(size = 25),
# legend.position = 'none') +
# geom_smooth(method="lm", col="black")
# ploteo
# r = round(as.numeric(stat$estimate), digits = 4)
# p = round(as.numeric(stat$p.value), digits = 4)
# print(ploteo + ggplot2::annotate("text", x = -20, y = max(exp)+3, label = paste('Correlacion Spearman:', r, '\np valor:', p), size = 5))
# assign(paste(gen,'ploteo_cor_ct', sep = '_'), ploteo )
#
# }
#
#
# # graficos por edad
#
# tabla$grupo = paste(tabla$age_cat, '\n', tabla$positivity)
# #levels(as.factor(tabla$grupo))
# tabla$grupo <- factor(tabla$grupo, levels = c("< 30 \n HEALTHY", "30s \n HEALTHY", "40s \n HEALTHY", "50s \n HEALTHY", "60s \n HEALTHY", "70+ \n HEALTHY", "< 30 \n COVID19", "30s \n COVID19", "40s \n COVID19", "50s \n COVID19", "60s \n COVID19", "70+ \n COVID19"), ordered = TRUE)
# #levels(tabla$grupo)
#
#
# tabla.grupo = tabla[!is.na(tabla$age_cat), ]
#
#
# for (i in 1:length(genes.quiero)){
# gen = genes.quiero[i]
# exp = log2(tabla.grupo[,gen]+1)
# grupito= tabla.grupo$grupo
# positividad = tabla.grupo$positivity
# stat.c.i= jonckheere.test(x= exp[positividad == 'COVID19'],
# g= grupito[positividad == 'COVID19'],
# alternative = 'increasing',
# nperm = 500)
# stat.c.d= jonckheere.test(x= exp[positividad == 'COVID19'],
# g= grupito[positividad == 'COVID19'],
# alternative = 'decreasing',
# nperm = 500)
# stat.c.t= jonckheere.test(x= exp[positividad == 'COVID19'],
# g= grupito[positividad == 'COVID19'],
# alternative = 'two.sided',
# nperm = 500)
# stat.h.i= jonckheere.test(x= exp[positividad == 'HEALTHY'],
# g= grupito[positividad == 'HEALTHY'],
# alternative = 'increasing',
# nperm = 500)
# stat.h.d= jonckheere.test(x= exp[positividad == 'HEALTHY'],
# g= grupito[positividad == 'HEALTHY'],
# alternative = 'decreasing',
# nperm = 500)
# stat.h.t= jonckheere.test(x= exp[positividad == 'HEALTHY'],
# g= grupito[positividad == 'HEALTHY'],
# alternative = 'two.sided',
# nperm = 500)
#
#
# datitos = data.frame(exp, grupito, positividad)
#
# ploteo = ggplot(datitos, aes(x = grupito, y = exp)) +
# geom_jitter( width = 0.2 , aes(shape= positividad, color = positividad), size = 2)+
# xlab(NULL) +
# ylab(paste(gen,'expression RNA-seq \n log2 (norm counts +1)')) +
# theme(legend.position = "bottom") +
# #ylim(-0.01, max(log2(tabla.grupo[,'PGP']+1)+5)) +
# coord_cartesian(ylim =c(-0.01, max(log2(tabla.grupo[,gen]+1)+5))) +
# theme_bw() +
# theme(axis.text = element_text(size = 7),
# axis.title = element_text(size = 15),
# plot.title =element_text(size = 25),
# legend.position = 'none') +
# stat_summary(fun=mean,
# geom="point",
# shape= '_',
# size=8 ) #,
# #colour= c('#b53d35', '#066e70'))
#
# stat.izq = paste("p.trend increasing: ",stat.h.i$p.value,
# '\np.trend decreasing: ',stat.h.d$p.value,
# '\np.trend two.sided: ',stat.h.t$p.value, sep = '')
# stat.der = paste("p.trend increasing: ",stat.c.i$p.value,
# '\np.trend decreasing: ',stat.c.d$p.value,
# '\np.trend two.sided: ',stat.c.t$p.value, sep = '')
#
# stat.izq.simp = paste("p.trend increasing: ",stat.h.i$p.value,
# '\np.trend decreasing: ',stat.h.d$p.value, sep = '')
# stat.der.simp = paste("p.trend increasing: ",stat.c.i$p.value,
# '\np.trend decreasing: ',stat.c.d$p.value, sep = '')
#
# print(ploteo + ggplot2::annotate("text", x = c(3,9), y = max(exp)+2, label = c(stat.izq.simp, stat.der.simp), size = 5))
# assign(paste(gen,'ploteo_grupo', sep = '_'), ploteo )
# }
#
resultados = data.frame(genes, pval1, pval2, pval3, pval4, pval5, pval6)
resultados[,-1] = round(resultados[,-1], 4)
presup = data.frame(exp.norm, exp.l.norm, igual.var, igual.var.l)
conflict = (resultados$pval2 < 0.05 & resultados$pval6 > 0.05) | (resultados$pval2 > 0.05 & resultados$pval6 < 0.05)
resultados$conflicto = conflict
final = cbind(resultados, presup)
colnames(final)[2:7] = c('Welch', 'Welch.log', 'student', 'student.log', 'wilcox', 'wilcox.log')
kable(final)
genes | Welch | Welch.log | student | student.log | wilcox | wilcox.log | conflicto | exp.norm | exp.l.norm | igual.var | igual.var.l |
---|---|---|---|---|---|---|---|---|---|---|---|
KPNA1 | 0.9687 | 0.8031 | 0.9727 | 0.7977 | 0.5951 | 0.5951 | FALSE | FALSE | FALSE | TRUE | TRUE |
KPNA2 | 0.5762 | 0.9005 | 0.6768 | 0.8966 | 0.7504 | 0.7504 | FALSE | FALSE | FALSE | FALSE | TRUE |
KPNA3 | 0.0630 | 0.1146 | 0.0104 | 0.0855 | 0.0526 | 0.0526 | FALSE | FALSE | FALSE | FALSE | TRUE |
KPNA4 | 0.5923 | 0.8815 | 0.6220 | 0.8585 | 0.3881 | 0.3881 | FALSE | FALSE | FALSE | TRUE | FALSE |
KPNA5 | 0.1234 | 0.0031 | 0.5844 | 0.0012 | 0.0025 | 0.0025 | FALSE | FALSE | FALSE | FALSE | TRUE |
KPNA6 | 0.8109 | 0.6546 | 0.8383 | 0.6415 | 0.6547 | 0.6547 | FALSE | FALSE | FALSE | TRUE | TRUE |
KPNA7 | 0.0210 | 0.0043 | 0.0136 | 0.0003 | 0.0017 | 0.0017 | FALSE | FALSE | FALSE | TRUE | FALSE |
KPNB1 | 0.0598 | 0.0790 | 0.1214 | 0.0987 | 0.0540 | 0.0540 | FALSE | FALSE | FALSE | FALSE | TRUE |
RAC1 | 0.7876 | 0.9544 | 0.7969 | 0.9546 | 0.8830 | 0.8830 | FALSE | FALSE | FALSE | TRUE | TRUE |
RHOA | 0.2164 | 0.0355 | 0.1670 | 0.0512 | 0.0587 | 0.0587 | TRUE | FALSE | TRUE | TRUE | TRUE |
CDC42 | 0.0188 | 0.0632 | 0.0004 | 0.0267 | 0.0044 | 0.0044 | TRUE | FALSE | FALSE | FALSE | FALSE |
PGP | 0.0044 | 0.0000 | 0.0438 | 0.0000 | 0.0000 | 0.0000 | FALSE | FALSE | FALSE | FALSE | FALSE |
ABCB1 | 0.8017 | 0.3732 | 0.8795 | 0.3641 | 0.1925 | 0.1925 | FALSE | FALSE | FALSE | FALSE | TRUE |
ABCC1 | 0.0220 | 0.0002 | 0.0370 | 0.0002 | 0.0001 | 0.0001 | FALSE | FALSE | FALSE | TRUE | TRUE |
ABCC2 | 0.0124 | 0.1881 | 0.3694 | 0.3559 | 0.9680 | 0.9680 | FALSE | FALSE | FALSE | FALSE | FALSE |
ABCC3 | 0.4228 | 0.0048 | 0.6183 | 0.0005 | 0.0025 | 0.0025 | FALSE | FALSE | FALSE | FALSE | FALSE |
ABCC4 | 0.2273 | 0.2628 | 0.1585 | 0.2104 | 0.3221 | 0.3221 | FALSE | FALSE | FALSE | TRUE | TRUE |
ABCC5 | 0.0055 | 0.0000 | 0.0001 | 0.0000 | 0.0000 | 0.0000 | FALSE | FALSE | FALSE | FALSE | TRUE |
ABCC6 | 0.0576 | 0.9899 | 0.4092 | 0.9906 | 0.7363 | 0.7363 | FALSE | FALSE | FALSE | FALSE | TRUE |
ABCC10 | 0.6876 | 0.0033 | 0.8254 | 0.0029 | 0.0007 | 0.0007 | FALSE | FALSE | FALSE | FALSE | TRUE |
ABCC11 | 0.1107 | 0.3499 | 0.5133 | 0.4471 | 0.0055 | 0.0055 | TRUE | FALSE | FALSE | FALSE | FALSE |
DKC1 | 0.1701 | 0.2015 | 0.0993 | 0.1826 | 0.1117 | 0.1117 | FALSE | FALSE | FALSE | FALSE | TRUE |
TP53 | 0.7505 | 0.5495 | 0.7687 | 0.5598 | 0.3834 | 0.3834 | FALSE | FALSE | FALSE | TRUE | TRUE |
NA
imprimir una tabla con formato condicional
#library(DT)
datatable(final) %>% formatStyle(
c('Welch', 'Welch.log', 'student', 'student.log', 'wilcox', 'wilcox.log'),
backgroundColor = styleInterval(0.05001, c('yellow', 'white'))) %>%
formatStyle(columns = c(8:12), backgroundColor = styleEqual(c(TRUE, FALSE), c('green', 'white'))
)
NA
NA