Input data
library(readxl)
library(DT)
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
RAS <- read.csv('/Users/korshe/Documents/Authism_disoers-table.csv')
DT::datatable(RAS)RAS_top_donor <- RAS %>% distinct(Patient,.keep_all = T)
RAS_sex <- table(RAS_top_donor$Sex)
RAS_top_donor[RAS_top_donor$Sex == "М",]$Sex <- NULL
RAS_sex##
## F M
## 162 256
RAS_top_donor$age <- 2022 - RAS_top_donor$Year_of_birth
#ggplot(RAS_top_donor , aes(x = age, y = Sex)) + geom_violin()
ggplot(RAS_top_donor, aes(x = age, y = Sex, fill = Sex)) +
geom_violin(trim = FALSE) +
geom_boxplot(width = 0.07) +
scale_fill_brewer() + theme_light() + ggtitle('Age-sex ditribution in cohort')RAS_test <- as.data.frame(table(RAS$Gene))
RAS_test <-RAS_test[order(RAS_test$Freq,decreasing = T),]
#barplot(table(RAS$Gene))
dim(RAS_test)## [1] 1244 2
head(RAS_test)## Var1 Freq
## 437 GALC 402
## 17 ACADS 193
## 193 CHIT1 91
## 1194 UGT1A1 78
## 429 GAA 64
## 442 GALT 41
library(ggcharts)
chart <- RAS_test[1:20,] %>%
bar_chart(Var1, Freq) %>%
print() + ggtitle('Frequency of the genetic tests performed') + xlab('Number of tests') + ylab('Genetic tagret')#chart# library(stringr)
# variant_summary<- read.table('/Users/korshe/Documents/variant_summary.txt',sep='\t', header=T, fill=T)
#
# variant_summary_snp <-str_split_fixed(variant_summary$NM_014855.3.AP5Z1..c.80_83delinsTGCTGTAAACTGTAACTGTAAA..p.Arg27_Ile28delinsLeuLeuTer., pattern = ':', n=2)
# variant_summary_snp2 <- as.data.frame(str_split_fixed(variant_summary_snp[,2], pattern = ' ', n=2))
#
#
# variant_summary_snp$SNP <- variant_summary_snp2[,1]
# #RAS$rs_code
#
# RAS_and_SNP_id <- merge(variant_summary_snp2,RAS, by.x='V1', by.y='rs_code')
# # dim(RAS_and_SNP_id)
# # [1] 109 12freq_meaning <- as.data.frame(table(RAS$Clinvar_class_major))
freq_meaning <- freq_meaning[order(as.numeric(freq_meaning$Freq), decreasing = T),]
DT::datatable(freq_meaning)Top 20 annotations
length(unique(RAS$Gene))## [1] 1244
#[1] 1244
RAS_susbet_to_top_50 <- RAS_test[1:20,]$Var1
RAS_top_50 <- RAS[RAS$Gene%in% RAS_susbet_to_top_50,]
RAS_test_gene_test <- as.data.frame(table(RAS_top_50$Gene,RAS_top_50$Sex,RAS_top_50$Clinvar_class_major ))
RAS_test_gene_test <- RAS_test_gene_test[order(RAS_test_gene_test$Freq, decreasing = T),]
colnames(RAS_test_gene_test) <- c('Gene','Sex','Value','Freq')
ggplot(RAS_test_gene_test, aes(x=reorder(Gene,Freq ), y=Freq)) +geom_bar(stat="identity", aes(fill=Value)) + coord_flip() + theme_light() + xlab('Number of donors') + ylab('Gene target')# + geom_bar(stat="identity", fill="black", aes(alpha=Sex))+ theme(legend.key=element_rect(color="#EEEEEE"))From 20 to 50annotations
length(unique(RAS$Gene))## [1] 1244
#[1] 1244
RAS_susbet_to_top_50 <- RAS_test[21:70,]$Var1
RAS_top_50 <- RAS[RAS$Gene%in% RAS_susbet_to_top_50,]
RAS_test_gene_test <- as.data.frame(table(RAS_top_50$Gene,RAS_top_50$Sex,RAS_top_50$Clinvar_class_major ))
RAS_test_gene_test <- RAS_test_gene_test[order(RAS_test_gene_test$Freq, decreasing = T),]
colnames(RAS_test_gene_test) <- c('Gene','Sex','Value','Freq')
ggplot(RAS_test_gene_test, aes(x=reorder(Gene,Freq ), y=Freq)) +geom_bar(stat="identity", aes(fill=Value)) + coord_flip() + theme_light() + xlab('Number of donors') + ylab('Gene target')# + geom_bar(stat="identity", fill="black", aes(alpha=Sex))+ theme(legend.key=element_rect(color="#EEEEEE"))