This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot. SCZvsBD <- read.delim(“C:/Users/ag2646/Desktop/Vitamin D project/Vitamin_D_sumstats/SCZvsBD.txt”, header=T) SCZvsBD$N <- 94785 SCZvsBD_1 <-SCZvsBD[-c(1,3,6,7,8,10,12,13)]
write.table(heritability,“C:/Users/ag2646/Desktop/Vitamin D ,project/Vitamin_D_sumstats/heritability1.txt”,quote=F, row.names=F) # heritability(Convert z score to p value ) heritability\(p<-convert.z.score(heritability\)z,one.sided=NULL)
MarkerName Allele1 Allele2 Freq.Allele1.HapMapCEU p N (Header sumstats file) rs10 a c 0.0333 0.708 80566 rs1000000 g a 0.6333 0.506 123865 rs10000010 c t 0.425 0.736 123827 rs10000012 c g 0.8083 0.042 123809 rs10000013 c a 0.1667 0.0689 123863 rs10000017 t c 0.2333 0.457 123262 rs1000002 c t 0.475 0.0322 123783 rs10000023 t g 0.5917 0.939 123756 rs10000029 t c 0.975 0.24 103623f head(df)c
data1 <-data2[,c(1,2,3,4,5,6)]
Number of columns you need to remove please select the coulmns.
data5 <-merge(data, data4, by=“variant”) Merge by name of the two columns using the command by=“The common column name”
colnames(data6) <-c(“N”,“BETA”, “P”, “A2”, “A1”, “SNP”) # Arranging columns In R data6 <- subset(data6, select = c(“SNP”, “A1,” “A2”,“BETA”, “P”, “N”)) Base R approach
data6 %>% select(SNP, A1, A2, BETA, P, N) Dplyr approach
srun –pty -p interactive bash
/SAY/standard7/PolimantiLab-CC1096-MEDPSY # Common space in new cluster for lab work
/gpfs/gibbs/pi/polimanti
awk ‘{print $2,$1,$5,$4,$3}’ zz.txt > zz1.txt Press S to save terminal output to file
awk ‘!/:/’ Revezetal2020_25OHD_BMIcov.txt >xx.txt
awk ’{split($2,a,/_/);$2=a[1]}1’ Revezetal2020_25OHD_BMIcond.txt >Revezetal2020_25OHD_BMIcond_out1.txt
awk ’{sub(/.*:/,“rs”,$2);}1’ yy.txt >Revezetal2020_25OHD_BMIcond_out2.txt
module load miniconda module avail R source activate base module load R/3.6.1-foss-2018b # how to copy files from one directory to another directory cp /SAY/standard7/PolimantiLab-CC1096-MEDPSY/Aranyak/c.txt /SAY/standard7/PolimantiLab-CC1096-MEDPSY/Aranyak/Diet
wc -l = word count and number of lines in a file.
cd Path/to/folder
awk ‘FNR==62 {print FILENAME, $0}’ *.log > MERGED.results.file.txt
# convert zscore to p values # importing the file after saving as “.txt” for ease of manipulation
A <- read.delim(“~/Downloads/A.txt”)
convert.z.score<-function(z, one.sided=NULL) { if(is.null(one.sided)) { pval = pnorm(-abs(z)); pval = 2 * pval } else if(one.sided==“-”) { pval = pnorm(z); } else { pval = pnorm(-z);
} return(pval); } # using lapply to use function on every cell in “Z.score”
A\(P.value <- lapply(A\)Z.score, function(x) sapply(x, convert.z.score)) # To write a df of the p values obtained. A <- df
df = as.matrix(df)
FILENAME p1 p2 rg se z p h2_obs h2_obs_se h2_int h2_int_se gcov_int gcov_int_se
cp -R “/gpfs/ysm/home/ag2646/ldsc/” "/SAY/standard7/PolimantiLab-CC1096-MEDPSY/Aranyak/A/
cd Path/to/folder
You will obtain an output file MERGED.results.file.txt with the following columns.
awk ‘FNR==62 {print FILENAME, $0}’ *.log > MERGED.results.file.txt
25OHDVitamin.sumstats.gz
https://ood-farnam.hpc.yale.edu/pun/sys/dashboard
correlation, rg1 vs. rg2
#you calculate first the z score
Z= (rg1-rg2)/√ (SErg1^2+ SErg2^2)
#then you can calculate the p value in excel using this formula
P=2*NORMSDIST(-ABS(Z))
#if you put if you put all files sumstats.gz in the same folder (PATH/to/FILES), the script below will run LDSC across all pair-wise combinations and save the log files in a specific folder (e.g., /PATH/to/OUTPUT/)
cd PATH/to/FILES
for a in *.gz; do
for b in *.gz; do
./ldsc.py \
--rg "$a","$b" \
--ref-ld-chr eur_w_ld_chr/ \
--w-ld-chr eur_w_ld_chr/ \
--out /PATH/to/OUTPUT/"${a%.txt}$b"
done
done
type clear in screen # Heatmap with VItaminD data as example
import DATA from local machine like read.table("C:\Users\Lenovo\Desktop\Correlation matrix VITD _Disorder_R\Z1.txt“header=TRUE, row.names=1, sep=”) heatmap(as.matrix(Z[, -1])) [Without clustering method] # heatmap with clustering method library(gplots) library(heatmap.2) input<- as.matrix(Z) heatmap.2(input,trace =“none”, density = “none”, col = redblue(100), cexRow =1, cexCol =0.2,margins = c(1,7), scale=“row”,hclust=function(x) hclust(x,method = “complete”)) # To start a new device and avoid graphics error dev.off() # To change margin par(mar=c(1,1,1,1)) # To show default margin par(“mar”)
x = read.csv(“Z.csv”)
data = x[,-1] rownames(data) = x[,1] data = as.matrix(data)
heatmap.2(data, trace =“none”, density = “none”, col = redblue(100), cexRow =1, cexCol =1, margins = c(5,5), labCol = colnames(data), labRow = x$Gene, scale=“row”, hclust=function(x) hclust(x,method = “complete”))
cp -r Aranyak /gpfs/gibbs/pi/polimanti/ # Present project directory /gpfs/loomis/project/polimanti/ag2646 # labelling PCA Plots z <- plotPCA( DESeqTransform( rld ) ) z + geom_label(aes(label = name)) nudge <- position_nudge(y = 1) z + geom_label(aes(label = name), position = nudge) z + geom_text(aes(label = name), position = nudge) css:r.css