Download PLINK from its website to a local folder “plink_mac_20231211” then go to ‘Terminal’ and run commands as follow:
cd ~/Downloads/plink_mac_20231211
– verify plink exist in the folder
ls
– move plink to /bin
sudo mv plink /usr/local/bin/
cd /bin
– verify plink exist in the folder “/usr/local/bin”
ls
– make the file executable
sudo chmod +x /usr/local/bin/plink
– check if plink works
plink –version
– Verify that /usr/local/bin is in your PATH:
echo $PATH
– if not:
export PATH=/usr/local/bin:$PATH”
– Make this change permanent, add the above line to your shell configuration file (e.g., ~/.zshrc or ~/.bash_profile):
echo ‘export PATH=/usr/local/bin:$PATH’ >> ~/.zshrc source ~/.zshrc
plink –version
# clear workspace
rm(list = ls())
# set working directory
setwd("/Users/nnthieu/Downloads/ADAPTmap_genotypeTOP_20160222_full")
# Define the PLINK command
plink_command <- "plink --bfile ADAPTmap_genotypeTOP_20160222_full --cow --nonfounders --allow-no-sex --recode --out ADAPTmap_TOP"
# Run the command
system(plink_command)
install.packages(“genesis”) install.packages(“snpStats”)
library("tidyverse")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("data.table")
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
##
## The following objects are masked from 'package:dplyr':
##
## between, first, last
##
## The following object is masked from 'package:purrr':
##
## transpose
library("genio")
library("bedr")
##
##
## ######################
## #### bedr v1.0.7 ####
## ######################
##
## checking binary availability...
## * Checking path for bedtools... FAIL
## * Checking path for bedops... FAIL
## * Checking path for tabix... FAIL
## tests and examples will be skipped on R CMD check if binaries are missing
fam <- fread("ADAPTmap_genotypeTOP_20160222_full.fam", header = FALSE)
bim <- fread("ADAPTmap_genotypeTOP_20160222_full.bim", header = FALSE)
head(fam)
## V1 V2 V3 V4 V5 V6
## <char> <char> <int> <int> <int> <int>
## 1: ABR ET_ABR0001 0 0 0 -9
## 2: ABR ET_ABR0002 0 0 0 -9
## 3: ABR ET_ABR0003 0 0 0 -9
## 4: ABR ET_ABR0004 0 0 0 -9
## 5: ABR ET_ABR0005 0 0 0 -9
## 6: ABR ET_ABR0006 0 0 0 -9
head(bim)
## V1 V2 V3 V4 V5 V6
## <int> <char> <int> <int> <char> <char>
## 1: 0 snp10134-scaffold1361-15149 0 0 A G
## 2: 0 snp10135-scaffold1361-44576 0 0 A G
## 3: 0 snp10136-scaffold1361-91495 0 0 G A
## 4: 0 snp10412-scaffold1372-579082 0 0 0 G
## 5: 0 snp10413-scaffold1372-610565 0 0 G A
## 6: 0 snp10415-scaffold1372-688806 0 0 G A
# Adding column names
colnames(fam) <- c("FID", "IID", "PID", "MID", "Sex", "Phenotype")
# Display the first few rows with column names
head(fam)
## FID IID PID MID Sex Phenotype
## <char> <char> <int> <int> <int> <int>
## 1: ABR ET_ABR0001 0 0 0 -9
## 2: ABR ET_ABR0002 0 0 0 -9
## 3: ABR ET_ABR0003 0 0 0 -9
## 4: ABR ET_ABR0004 0 0 0 -9
## 5: ABR ET_ABR0005 0 0 0 -9
## 6: ABR ET_ABR0006 0 0 0 -9
summary(fam)
## FID IID PID MID
## Length:4653 Length:4653 Min. :0 Min. :0
## Class :character Class :character 1st Qu.:0 1st Qu.:0
## Mode :character Mode :character Median :0 Median :0
## Mean :0 Mean :0
## 3rd Qu.:0 3rd Qu.:0
## Max. :0 Max. :0
## Sex Phenotype
## Min. :0.00000 Min. :-9
## 1st Qu.:0.00000 1st Qu.:-9
## Median :0.00000 Median :-9
## Mean :0.07479 Mean :-9
## 3rd Qu.:0.00000 3rd Qu.:-9
## Max. :2.00000 Max. :-9
# Principal Component Analysis - PCA #
## Genetic distances between individuals
system("plink --cow --allow-no-sex --nonfounders --file ADAPTmap_TOP --distance-matrix --out dataForPCA")
## Load data
dist_populations<-read.table("dataForPCA.mdist",header=F)
### Extract breed names
fam <- data.frame(famids=read.table("dataForPCA.mdist.id")[,1])
### Extract individual names
famInd <- data.frame(IID=read.table("dataForPCA.mdist.id")[,2])
## Perform PCA using the cmdscale function
# Time intensive step - takes a few minutes with the 4.5K animals
mds_populations <- cmdscale(dist_populations,eig=T,5)
## Extract the eigen vectors
eigenvec_populations <- cbind(fam,famInd,mds_populations$points)
## Proportion of variation captured by each eigen vector
eigen_percent <- round(((mds_populations$eig)/sum(mds_populations$eig))*100,2)
Options in effect:
–allow-no-sex
–cow
–distance-matrix
–file ADAPTmap_TOP
–nonfounders
–out dataForPCA
16384 MB RAM detected; reserving 8192 MB for main workspace.
.ped scan complete (for binary autoconversion).
Performing single-pass .bed write (53347 variants, 4653 cattle).
–file: dataForPCA-temporary.bed + dataForPCA-temporary.bim + dataForPCA-temporary.fam written.
53347 variants loaded from .bim file.
4653 cattle (32 males, 158 females, 4463 ambiguous) loaded from .fam.
Ambiguous sex IDs written to dataForPCA.nosex .
Using up to 11 threads (change this with –threads).
Before main variant filters, 4653 founders and 0 nonfounders present.
Calculating allele frequencies… done.
Warning: 191 het. haploid genotypes present (see dataForPCA.hh ); many commands treat these as missing.
Total genotyping rate is 0.969745.
53347 variants and 4653 cattle pass filters and QC.
Note: No phenotypes present.
Excluding 1987 variants on non-autosomes from distance matrix calc.
Distance matrix calculation complete.
Distances (proportions) written to dataForPCA.mdist , and IDs to dataForPCA.mdist.id .
# PCA plot
library("ggplot2")
ggplot(data = eigenvec_populations) +
geom_point(mapping = aes(x = `1`, y = `2`,color = famids), show.legend = FALSE ) +
geom_hline(yintercept = 0, linetype="dotted") +
geom_vline(xintercept = 0, linetype="dotted") +
labs(title = "PCA of wordwide goat populations",
x = paste0("Principal component 1 (",eigen_percent[1]," %)"),
y = paste0("Principal component 2 (",eigen_percent[2]," %)")) +
theme_minimal()
.