Project title: observational study of the children cohort of Autism spectrum disoders in Ukraine

Input data

library(readxl)
library(DT)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

RAS <- read.csv('/Users/korshe/Documents/Authism_disoers-table.csv')

DT::datatable(RAS)

Age & Gender distribution

RAS_top_donor <- RAS %>% distinct(Patient,.keep_all = T)
RAS_sex <- table(RAS_top_donor$Sex)
RAS_top_donor[RAS_top_donor$Sex == "М",]$Sex <- NULL
RAS_sex
## 
##   F   M 
## 162 256
RAS_top_donor$age <- 2022 - RAS_top_donor$Year_of_birth

#ggplot(RAS_top_donor , aes(x = age, y = Sex)) +  geom_violin()

ggplot(RAS_top_donor, aes(x = age, y = Sex, fill = Sex)) +
  geom_violin(trim = FALSE) + 
  geom_boxplot(width = 0.07) +
  scale_fill_brewer() + theme_light() + ggtitle('Age-sex ditribution in cohort')

RAS_test <- as.data.frame(table(RAS$Gene))
RAS_test <-RAS_test[order(RAS_test$Freq,decreasing = T),]
#barplot(table(RAS$Gene))
dim(RAS_test)
## [1] 1244    2
head(RAS_test)
##        Var1 Freq
## 437    GALC  402
## 17    ACADS  193
## 193   CHIT1   91
## 1194 UGT1A1   78
## 429     GAA   64
## 442    GALT   41
library(ggcharts)

chart <- RAS_test[1:20,] %>%
  bar_chart(Var1, Freq) %>%
  print() + ggtitle('Frequency of the genetic tests performed') + xlab('Number of tests') + ylab('Genetic tagret')

#chart
# library(stringr)
# variant_summary<- read.table('/Users/korshe/Documents/variant_summary.txt',sep='\t', header=T, fill=T)
# 
# variant_summary_snp <-str_split_fixed(variant_summary$NM_014855.3.AP5Z1..c.80_83delinsTGCTGTAAACTGTAACTGTAAA..p.Arg27_Ile28delinsLeuLeuTer., pattern = ':', n=2)
# variant_summary_snp2 <- as.data.frame(str_split_fixed(variant_summary_snp[,2], pattern = ' ', n=2))
# 
# 
# variant_summary_snp$SNP <- variant_summary_snp2[,1]
# #RAS$rs_code
# 
# RAS_and_SNP_id <- merge(variant_summary_snp2,RAS, by.x='V1', by.y='rs_code')
# # dim(RAS_and_SNP_id)
# # [1] 109  12

Frequency of finding interpretation

freq_meaning <- as.data.frame(table(RAS$Clinvar_class_major))
freq_meaning <- freq_meaning[order(as.numeric(freq_meaning$Freq), decreasing = T),]

DT::datatable(freq_meaning)

Top 20 annotations

length(unique(RAS$Gene))
## [1] 1244
#[1] 1244
RAS_susbet_to_top_50 <- RAS_test[1:20,]$Var1

RAS_top_50 <- RAS[RAS$Gene%in% RAS_susbet_to_top_50,]
RAS_test_gene_test <- as.data.frame(table(RAS_top_50$Gene,RAS_top_50$Sex,RAS_top_50$Clinvar_class_major ))
RAS_test_gene_test <- RAS_test_gene_test[order(RAS_test_gene_test$Freq, decreasing = T),]
colnames(RAS_test_gene_test) <- c('Gene','Sex','Value','Freq')

 ggplot(RAS_test_gene_test, aes(x=reorder(Gene,Freq ), y=Freq)) +geom_bar(stat="identity", aes(fill=Value)) +  coord_flip() + theme_light() + xlab('Number of donors') + ylab('Gene target')# +  geom_bar(stat="identity", fill="black", aes(alpha=Sex))+ theme(legend.key=element_rect(color="#EEEEEE"))

From 20 to 50annotations

length(unique(RAS$Gene))
## [1] 1244
#[1] 1244
RAS_susbet_to_top_50 <- RAS_test[21:70,]$Var1

RAS_top_50 <- RAS[RAS$Gene%in% RAS_susbet_to_top_50,]
RAS_test_gene_test <- as.data.frame(table(RAS_top_50$Gene,RAS_top_50$Sex,RAS_top_50$Clinvar_class_major ))
RAS_test_gene_test <- RAS_test_gene_test[order(RAS_test_gene_test$Freq, decreasing = T),]
colnames(RAS_test_gene_test) <- c('Gene','Sex','Value','Freq')

 ggplot(RAS_test_gene_test, aes(x=reorder(Gene,Freq ), y=Freq)) +geom_bar(stat="identity", aes(fill=Value)) +  coord_flip() + theme_light() + xlab('Number of donors') + ylab('Gene target')# +  geom_bar(stat="identity", fill="black", aes(alpha=Sex))+ theme(legend.key=element_rect(color="#EEEEEE"))