library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
predicted <- read.table('/Users/korshe/Documents/scEP Sonja/donor_aggregatedmonocyte_expression_parsed_predicted - donor_aggregatedmonocyte_expression_parsed_predicted.tsv',sep='\t', header=T)
#
other_data_mono <- read.table('/Users/korshe/Documents/Data_Groningen/WMA_sep2022/pseudobulk/monocyte_expression.tsv',sep='\t', header=T)
other_data_bulk <- read.table('/Users/korshe/Documents/Data_Groningen/WMA_sep2022/pseudobulk/bulk_expression.tsv',sep='\t', header=T)
expr_stat <- read.table('/Users/korshe/Documents/Data_Groningen/WMA_sep2022/pseudobulk/v3_1m.mjb.tsv',sep='\t', header=T)
expr_stat <- expr_stat %>% distinct(feature_id, .keep_all= TRUE)
gene_features_gc_length <-read.table('/Users/korshe/Documents/Data_Groningen/ipsc_220909/gene_features.tsv',sep='\t',header=T)
# looking into correlation values as they are:
merged_mono <- merge(other_data_mono, gene_features_gc_length, by.y='gene',by.x='X')
dim(merged_mono) #21153
## [1] 21153 46
merged_mono <- merge(merged_mono, predicted, by.y='gene', by.x='hgnc_symbol')
y - is a predicted value
cor.test(merged_mono$X1_LLDeep_0117.x, merged_mono$X1_LLDeep_0117.y)
##
## Pearson's product-moment correlation
##
## data: merged_mono$X1_LLDeep_0117.x and merged_mono$X1_LLDeep_0117.y
## t = -1.9441, df = 14731, p-value = 0.05191
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.0321548640 0.0001321277
## sample estimates:
## cor
## -0.01601554
ggplot(merged_mono, aes(x=X1_LLDeep_0117.x,y=X1_LLDeep_0117.y)) + geom_point()
cor.test(merged_mono$X1_LLDeep_1300.x, merged_mono$X1_LLDeep_1300.y)
##
## Pearson's product-moment correlation
##
## data: merged_mono$X1_LLDeep_1300.x and merged_mono$X1_LLDeep_1300.y
## t = -1.9716, df = 14731, p-value = 0.04867
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -3.238153e-02 -9.477828e-05
## sample estimates:
## cor
## -0.01624239
ggplot(merged_mono, aes(x=X1_LLDeep_1300.x,y=X1_LLDeep_1300.y)) + geom_point()
merged_mono_with_stats <- merge(merged_mono, expr_stat, by.y='feature_id', by.x='X')
hist(merged_mono_with_stats$mean.donor,breaks = 100)
merged_mono_high_expressnion <- merged_mono_with_stats[merged_mono_with_stats$mean.donor > 1,]
dim(merged_mono_high_expressnion)
## [1] 711 102
cor.test(merged_mono_high_expressnion$X1_LLDeep_1300.x, merged_mono_high_expressnion$X1_LLDeep_1300.y)
##
## Pearson's product-moment correlation
##
## data: merged_mono_high_expressnion$X1_LLDeep_1300.x and merged_mono_high_expressnion$X1_LLDeep_1300.y
## t = -0.55147, df = 709, p-value = 0.5815
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.09409014 0.05290120
## sample estimates:
## cor
## -0.02070637
ggplot(merged_mono_high_expressnion, aes(x=X1_LLDeep_1300.x,y=X1_LLDeep_1300.y)) + geom_point()
merged_mono_high_expressnion <- merged_mono_with_stats[merged_mono_with_stats$mean.donor > 3,]
dim(merged_mono_high_expressnion)
## [1] 232 102
cor.test(merged_mono_high_expressnion$X1_LLDeep_1300.x, merged_mono_high_expressnion$X1_LLDeep_1300.y)
##
## Pearson's product-moment correlation
##
## data: merged_mono_high_expressnion$X1_LLDeep_1300.x and merged_mono_high_expressnion$X1_LLDeep_1300.y
## t = -0.1819, df = 230, p-value = 0.8558
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1405745 0.1169864
## sample estimates:
## cor
## -0.01199296
ggplot(merged_mono_high_expressnion, aes(x=X1_LLDeep_1300.x,y=X1_LLDeep_1300.y)) + geom_point()