# Set working directory
setwd("~/Downloads/Intro to R/Module 3")

# Load required packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# Load the squid dataset
squid <- read.csv("squid.csv")

# Create factor label for Sex
squid$sex_label <- factor(squid$Sex, levels = c(1, 2), labels = c("Male", "Female"))

# Log-transform GSI
squid$logGSI <- log(squid$GSI + 0.01)

# Histogram: GSI for all squid
hist(squid$GSI,
     main = "Histogram of GSI (All Squid)",
     xlab = "GSI",
     col = "skyblue",
     border = "white")

# Histogram: GSI for male squid
hist(squid$GSI[squid$Sex == 1],
     main = "Histogram of GSI (Male Squid)",
     xlab = "GSI",
     col = "lightgreen",
     border = "white")

# Histogram: GSI for female squid
hist(squid$GSI[squid$Sex == 2],
     main = "Histogram of GSI (Female Squid)",
     xlab = "GSI",
     col = "lightpink",
     border = "white")

# Boxplot: log(GSI) by sex
boxplot(logGSI ~ sex_label,
        data = squid,
        main = "Boxplot of log(GSI) by Sex",
        xlab = "Sex",
        ylab = "log(GSI)",
        col = c("lightblue", "lightcoral"))

# Boxplot: log(GSI) by location
boxplot(logGSI ~ Location,
        data = squid,
        main = "Boxplot of log(GSI) by Location",
        xlab = "Location",
        ylab = "log(GSI)",
        col = "lightgray")