We will explore GWAS data for Diabetes Mellitus and produce Manhattan and QQ plots.
# =====================================================
# Load Required Libraries
# =====================================================
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(qqman)
##
## For example usage please run: vignette('qqman')
##
## Citation appreciated but not required:
## Turner, (2018). qqman: an R package for visualizing GWAS results using Q-Q and manhattan plots. Journal of Open Source Software, 3(25), 731, https://doi.org/10.21105/joss.00731.
library(dplyr)
# =====================================================
# Check Working Directory and Files
# =====================================================
cat("📁 Current working directory:\n")
## 📁 Current working directory:
print(getwd())
## [1] "/Users/jason/Desktop/gwas 9.1"
cat("\n📂 Files in this directory:\n")
##
## 📂 Files in this directory:
print(list.files())
## [1] "Gwas Diabetes 2 data.csv"
## [2] "gwas diabetes 2.0.R"
## [3] "Lab_9.1_GWAS_for_Diabetes_mellitus_FIXED.html"
## [4] "Lab_9.1_GWAS_for_Diabetes_mellitus_FIXED.Rmd"
# =====================================================
# Load and Clean GWAS Dataset
# =====================================================
data_file <- "Gwas Diabetes 2 data.csv"
cat("\n🔍 Looking for file in:\n", getwd(), "\n")
##
## 🔍 Looking for file in:
## /Users/jason/Desktop/gwas 9.1
# Check file existence
if (!file.exists(data_file)) {
stop("❌ File not found. Make sure 'Gwas Diabetes 2 data.csv' is in the SAME folder as this RMD file.")
}
# Read CSV safely
diabetes <- tryCatch({
readr::read_csv(data_file)
}, error = function(e) {
cat("⚠️ Error reading CSV:", conditionMessage(e), "\n")
return(NULL)
})
## Rows: 10996 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): SNP, CHR, Gene
## dbl (1): P
## num (1): BP
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Confirm dataset created
if (is.null(diabetes)) stop("❌ Dataset 'diabetes' was not created. Check CSV format or encoding.")
cat("\n✅ File loaded successfully!\n")
##
## ✅ File loaded successfully!
print(dim(diabetes))
## [1] 10996 5
print(head(diabetes, 3))
## # A tibble: 3 × 5
## SNP CHR BP P Gene
## <chr> <chr> <dbl> <dbl> <chr>
## 1 rs9273368-G 6 32658698 8e-40 HLA-DQB1,HLA-DQA1
## 2 rs9273368-? 6 32658698 3e-78 HLA-DQB1,HLA-DQA1
## 3 rs689-? 11 2160994 1e-18 INS-IGF2,INS
print(colnames(diabetes))
## [1] "SNP" "CHR" "BP" "P" "Gene"
# Clean and prepare data for qqman
diabetes <- diabetes %>%
rename_with(toupper) %>% # Standardize column names
mutate(
CHR = as.character(CHR),
CHR = case_when(
CHR %in% c("X", "x") ~ "23",
CHR %in% c("Y", "y") ~ "24",
CHR %in% c("MT", "Mt", "M", "m") ~ "25",
TRUE ~ CHR
),
CHR = suppressWarnings(as.numeric(CHR)),
P = suppressWarnings(as.numeric(P))
) %>%
filter(!is.na(CHR), !is.na(BP), !is.na(P), P > 0, P <= 1) %>%
arrange(CHR, BP)
cat("\n✅ Cleaned data summary:\n")
##
## ✅ Cleaned data summary:
print(summary(diabetes$P))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000e+00 0.000e+00 8.000e-11 4.291e-07 2.000e-08 1.000e-05
# =====================================================
# Manhattan Plot
# =====================================================
if (exists("diabetes") && nrow(diabetes) > 0) {
manhattan(
diabetes,
chr = "CHR",
bp = "BP",
snp = "SNP",
p = "P",
main = "Manhattan Plot - GWAS Diabetes Mellitus"
)
} else {
cat("⚠️ Skipping Manhattan plot — no data loaded.\n")
}
# =====================================================
# QQ Plot
# =====================================================
if (exists("diabetes") && nrow(diabetes) > 0) {
qq(diabetes$P, main = "QQ Plot - GWAS Diabetes Mellitus")
} else {
cat("⚠️ Skipping QQ plot — no data loaded.\n")
}