Iris and Squid Data Analysis

Author

ZACH GRUBE

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

1 + 1
[1] 2

You can add options to executable code like this

[1] 4

The echo: false option disables the printing of code (only output is displayed).

Part A

iris <- read.csv("iris.csv", stringsAsFactors = TRUE)
# Create numeric species codes if the data file does not already include them.
if (!"Code" %in% names(iris)) {
  iris$Code <- as.integer(iris$Species)
}
# Check the structure
str(iris)
'data.frame':   150 obs. of  6 variables:
 $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
 $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
 $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ Code        : int  1 1 1 1 1 1 1 1 1 1 ...

1. Petal Length vs Petal Width

plot(iris$Petal.Length, iris$Petal.Width,
     xlab = "Petal Length",
     ylab = "Petal Width",
     main = "Petal Length vs Petal Width",
     pch = 16)

2. Petal Length vs Petal Width for Setosa Only

setosa <- subset(iris, Species == "setosa")
plot(setosa$Petal.Length, setosa$Petal.Width,
     xlab = "Petal Length",
     ylab = "Petal Width",
     main = "Setosa: Petal Length vs Petal Width",
     pch = 16)

3. Different Symbols for Different Species

plot(iris$Petal.Length, iris$Petal.Width,
     xlab = "Petal Length",
     ylab = "Petal Width",
     main = "Petal Length vs Petal Width by Species",
     pch = iris$Code)
legend("topleft",
       legend = levels(iris$Species),
       pch = seq_along(levels(iris$Species)),
       title = "Species")

4. Different Colors for Different Species

species.colors <- c("darkorange", "purple", "deepskyblue")
iris$Color <- species.colors[iris$Code]
plot(iris$Petal.Length, iris$Petal.Width,
     xlab = "Petal Length",
     ylab = "Petal Width",
     main = "Petal Length vs Petal Width by Species",
     pch = 16,
     col = iris$Color)
legend("topleft",
       legend = levels(iris$Species),
       col = species.colors,
       pch = 16,
       title = "Species")

5. Symbols Scaled by Sepal Width and Colored by Species

plot(iris$Petal.Length, iris$Petal.Width,
     xlab = "Petal Length",
     ylab = "Petal Width",
     main = "Petal Length vs Petal Width with Sepal Width Scaling",
     pch = 16,
     col = iris$Color,
     cex = iris$Sepal.Width / 2)
legend("topleft",
       legend = levels(iris$Species),
       col = species.colors,
       pch = 16,
       title = "Species")

6. Petal Length vs Petal Width with Smoothing Line

scatter.smooth(iris$Petal.Length, iris$Petal.Width,
               xlab = "Petal Length",
               ylab = "Petal Width",
               main = "Petal Length vs Petal Width with Smoothing Line",
               pch = 16,
               col = "darkorange")

Part B

# Load the squid data
squid <- read.csv("squid.csv")
# Convert Sex and Location to categorical variables
squid$Sex <- factor(squid$Sex,
                    levels = c(1, 2),
                    labels = c("Male", "Female"))
squid$Location <- factor(squid$Location)

1. Histogram of All GSI Values

hist(squid$GSI,
     main = "Histogram of GSI Values",
     xlab = "GSI",
     col = "lightblue",
     border = "white")

2. Separate Histograms of GSI Values for Male and Female Squid

par(mfrow = c(1, 2))
hist(squid$GSI[squid$Sex == "Male"],
     main = "GSI Values for Male Squid",
     xlab = "GSI",
     col = "lightgreen",
     border = "white")
hist(squid$GSI[squid$Sex == "Female"],
     main = "GSI Values for Female Squid",
     xlab = "GSI",
     col = "lightpink",
     border = "white")

par(mfrow = c(1, 1))

3. Boxplot Showing Relationship of GSI to Sex

boxplot(GSI ~ Sex,
        data = squid,
        main = "GSI by Sex",
        xlab = "Sex",
        ylab = "GSI",
        col = c("lightgreen", "lightpink"))

4. Boxplot Showing Relationship of GSI to Catch Location

boxplot(GSI ~ Location,
        data = squid,
        main = "GSI by Catch Location",
        xlab = "Location",
        ylab = "GSI",
        col = "lightblue")