We will explore GWAS data for Diabetes Mellitus and produce Manhattan and QQ plots.

# =====================================================
#        Load Required Libraries
# =====================================================
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(qqman)
## 
## For example usage please run: vignette('qqman')
## 
## Citation appreciated but not required:
## Turner, (2018). qqman: an R package for visualizing GWAS results using Q-Q and manhattan plots. Journal of Open Source Software, 3(25), 731, https://doi.org/10.21105/joss.00731.
library(dplyr)

# =====================================================
#        Check Working Directory and Files
# =====================================================
cat("📁 Current working directory:\n")
## 📁 Current working directory:
print(getwd())
## [1] "/Users/jason/Desktop/gwas 9.1"
cat("\n📂 Files in this directory:\n")
## 
## 📂 Files in this directory:
print(list.files())
## [1] "Gwas Diabetes 2 data.csv"                     
## [2] "gwas diabetes 2.0.R"                          
## [3] "Lab_9.1_GWAS_for_Diabetes_mellitus_FIXED.html"
## [4] "Lab_9.1_GWAS_for_Diabetes_mellitus_FIXED.Rmd"
# =====================================================
#        Load and Clean GWAS Dataset
# =====================================================
data_file <- "Gwas Diabetes 2 data.csv"
cat("\n🔍 Looking for file in:\n", getwd(), "\n")
## 
## 🔍 Looking for file in:
##  /Users/jason/Desktop/gwas 9.1
# Check file existence
if (!file.exists(data_file)) {
  stop("❌ File not found. Make sure 'Gwas Diabetes 2 data.csv' is in the SAME folder as this RMD file.")
}

# Read CSV safely
diabetes <- tryCatch({
  readr::read_csv(data_file)
}, error = function(e) {
  cat("⚠️ Error reading CSV:", conditionMessage(e), "\n")
  return(NULL)
})
## Rows: 10996 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): SNP, CHR, Gene
## dbl (1): P
## num (1): BP
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Confirm dataset created
if (is.null(diabetes)) stop("❌ Dataset 'diabetes' was not created. Check CSV format or encoding.")

cat("\n✅ File loaded successfully!\n")
## 
## ✅ File loaded successfully!
print(dim(diabetes))
## [1] 10996     5
print(head(diabetes, 3))
## # A tibble: 3 × 5
##   SNP         CHR         BP     P Gene             
##   <chr>       <chr>    <dbl> <dbl> <chr>            
## 1 rs9273368-G 6     32658698 8e-40 HLA-DQB1,HLA-DQA1
## 2 rs9273368-? 6     32658698 3e-78 HLA-DQB1,HLA-DQA1
## 3 rs689-?     11     2160994 1e-18 INS-IGF2,INS
print(colnames(diabetes))
## [1] "SNP"  "CHR"  "BP"   "P"    "Gene"
# Clean and prepare data for qqman
diabetes <- diabetes %>%
  rename_with(toupper) %>%  # Standardize column names
  mutate(
    CHR = as.character(CHR),
    CHR = case_when(
      CHR %in% c("X", "x") ~ "23",
      CHR %in% c("Y", "y") ~ "24",
      CHR %in% c("MT", "Mt", "M", "m") ~ "25",
      TRUE ~ CHR
    ),
    CHR = suppressWarnings(as.numeric(CHR)),
    P = suppressWarnings(as.numeric(P))
  ) %>%
  filter(!is.na(CHR), !is.na(BP), !is.na(P), P > 0, P <= 1) %>%
  arrange(CHR, BP)

cat("\n✅ Cleaned data summary:\n")
## 
## ✅ Cleaned data summary:
print(summary(diabetes$P))
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.000e+00 0.000e+00 8.000e-11 4.291e-07 2.000e-08 1.000e-05
# =====================================================
#        Manhattan Plot
# =====================================================
if (exists("diabetes") && nrow(diabetes) > 0) {
  manhattan(
    diabetes,
    chr = "CHR",
    bp = "BP",
    snp = "SNP",
    p = "P",
    main = "Manhattan Plot - GWAS Diabetes Mellitus"
  )
} else {
  cat("⚠️ Skipping Manhattan plot — no data loaded.\n")
}

# =====================================================
#        QQ Plot
# =====================================================
if (exists("diabetes") && nrow(diabetes) > 0) {
  qq(diabetes$P, main = "QQ Plot - GWAS Diabetes Mellitus")
} else {
  cat("⚠️ Skipping QQ plot — no data loaded.\n")
}