R Markdown

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
predicted <- read.table('/Users/korshe/Documents/scEP Sonja/donor_aggregatedmonocyte_expression_parsed_predicted - donor_aggregatedmonocyte_expression_parsed_predicted.tsv',sep='\t', header=T)


# 
other_data_mono <- read.table('/Users/korshe/Documents/Data_Groningen/WMA_sep2022/pseudobulk/monocyte_expression.tsv',sep='\t', header=T)

other_data_bulk <- read.table('/Users/korshe/Documents/Data_Groningen/WMA_sep2022/pseudobulk/bulk_expression.tsv',sep='\t', header=T)


expr_stat <- read.table('/Users/korshe/Documents/Data_Groningen/WMA_sep2022/pseudobulk/v3_1m.mjb.tsv',sep='\t', header=T)
expr_stat <- expr_stat %>% distinct(feature_id, .keep_all= TRUE)

gene_features_gc_length <-read.table('/Users/korshe/Documents/Data_Groningen/ipsc_220909/gene_features.tsv',sep='\t',header=T)

# looking into correlation values as they are:

merged_mono <- merge(other_data_mono, gene_features_gc_length, by.y='gene',by.x='X')
dim(merged_mono) #21153
## [1] 21153    46
merged_mono <- merge(merged_mono, predicted, by.y='gene', by.x='hgnc_symbol')

Getting correlations

y - is a predicted value

cor.test(merged_mono$X1_LLDeep_0117.x, merged_mono$X1_LLDeep_0117.y)
## 
##  Pearson's product-moment correlation
## 
## data:  merged_mono$X1_LLDeep_0117.x and merged_mono$X1_LLDeep_0117.y
## t = -1.9441, df = 14731, p-value = 0.05191
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.0321548640  0.0001321277
## sample estimates:
##         cor 
## -0.01601554
ggplot(merged_mono, aes(x=X1_LLDeep_0117.x,y=X1_LLDeep_0117.y)) + geom_point()

cor.test(merged_mono$X1_LLDeep_1300.x, merged_mono$X1_LLDeep_1300.y)
## 
##  Pearson's product-moment correlation
## 
## data:  merged_mono$X1_LLDeep_1300.x and merged_mono$X1_LLDeep_1300.y
## t = -1.9716, df = 14731, p-value = 0.04867
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -3.238153e-02 -9.477828e-05
## sample estimates:
##         cor 
## -0.01624239
ggplot(merged_mono, aes(x=X1_LLDeep_1300.x,y=X1_LLDeep_1300.y)) + geom_point()

lookinf if genes with high expression correlate well

merged_mono_with_stats <- merge(merged_mono, expr_stat, by.y='feature_id', by.x='X')
hist(merged_mono_with_stats$mean.donor,breaks  = 100)

merged_mono_high_expressnion <- merged_mono_with_stats[merged_mono_with_stats$mean.donor > 1,]
dim(merged_mono_high_expressnion)
## [1] 711 102
cor.test(merged_mono_high_expressnion$X1_LLDeep_1300.x, merged_mono_high_expressnion$X1_LLDeep_1300.y)
## 
##  Pearson's product-moment correlation
## 
## data:  merged_mono_high_expressnion$X1_LLDeep_1300.x and merged_mono_high_expressnion$X1_LLDeep_1300.y
## t = -0.55147, df = 709, p-value = 0.5815
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.09409014  0.05290120
## sample estimates:
##         cor 
## -0.02070637
ggplot(merged_mono_high_expressnion, aes(x=X1_LLDeep_1300.x,y=X1_LLDeep_1300.y)) + geom_point()

merged_mono_high_expressnion <- merged_mono_with_stats[merged_mono_with_stats$mean.donor > 3,]
dim(merged_mono_high_expressnion)
## [1] 232 102
cor.test(merged_mono_high_expressnion$X1_LLDeep_1300.x, merged_mono_high_expressnion$X1_LLDeep_1300.y)
## 
##  Pearson's product-moment correlation
## 
## data:  merged_mono_high_expressnion$X1_LLDeep_1300.x and merged_mono_high_expressnion$X1_LLDeep_1300.y
## t = -0.1819, df = 230, p-value = 0.8558
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1405745  0.1169864
## sample estimates:
##         cor 
## -0.01199296
ggplot(merged_mono_high_expressnion, aes(x=X1_LLDeep_1300.x,y=X1_LLDeep_1300.y)) + geom_point()