# Load the iris dataset
data(iris)

# View the data set
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

Step 1:

Using the standard Anderson Iris dataset, draw the following scatterplot with the main title and axes labels:

Title: Anderson Iris data

y-axis: Petal width (cm)

x-axis: Petal length (cm)

# Load the iris dataset
data(iris)

# Create species colors and point characters
species_colors <- c("setosa" = "blue", "versicolor" = "red", "virginica" = "green")
species_pch <- c("setosa" = 22, "versicolor" = 21, "virginica" = 23)  # 22 = square, 21 = circle, 23 = diamond

# Create the scatterplot
plot(iris$Petal.Length, iris$Petal.Width,
     main = "Anderson Iris data",
     xlab = "Petal length (cm)",
     ylab = "Petal width (cm)",
     pch = species_pch[iris$Species],  # Different shapes for different species
     col = species_colors[iris$Species],  # Different colors for different species
     bg = species_colors[iris$Species]  # Background color for filled symbols
)

# Add a legend to differentiate species
legend("topleft", legend = levels(iris$Species),
       col = species_colors,
       pch = species_pch,
       pt.bg = species_colors)  # Background color for filled symbols in legend

## Step 2: ### With the same dataset, add two horizontal and two vertical lines at the mean and median (considerable as hypothetical support vectors) and the centroids red and blue points as shown in the diagram below. Use the abline and points methods. Learn more about abline and points functions at:

abline: Add Straight Lines to a Plot

points: Add Points to a Plot

# Create species colors and point characters
species_colors <- c("setosa" = "blue", "versicolor" = "red", "virginica" = "green")
species_pch <- c("setosa" = 22, "versicolor" = 21, "virginica" = 23)  # 22 = square, 21 = circle, 23 = diamond

# Calculate means and medians
mean_petal_length <- mean(iris$Petal.Length)
median_petal_length <- median(iris$Petal.Length)
mean_petal_width <- mean(iris$Petal.Width)
median_petal_width <- median(iris$Petal.Width)

# Create the scatterplot
plot(iris$Petal.Length, iris$Petal.Width,
     main = "Anderson Iris data",
     xlab = "Petal length (cm)",
     ylab = "Petal width (cm)",
     pch = species_pch[iris$Species],  # Different shapes for different species
     col = species_colors[iris$Species],  # Different colors for different species
     bg = species_colors[iris$Species]  # Background color for filled symbols
)

# Add horizontal and vertical lines at the mean and median
abline(h = mean_petal_width, col = "red", lty = 2)  # Horizontal line at mean petal width
abline(h = median_petal_width, col = "blue", lty = 2)  # Horizontal line at median petal width
abline(v = mean_petal_length, col = "red", lty = 2)  # Vertical line at mean petal length
abline(v = median_petal_length, col = "blue", lty = 2)  # Vertical line at median petal length

# Plotting mean points
points(mean_petal_length, mean_petal_width, col = "black", bg = "red", pch = 23, cex = 1.7) # Red diamond with black outline for mean

# Plotting median points - completing your provided code snippet
points(median_petal_length, median_petal_width, col = "black", bg = "blue", pch = 23, cex = 1.7) # Blue diamond with black outline for median

## Step 3: ### With the same dataset, add two more lines for the least-squares regression and the robust regression with the legend as listed in Figure 4. Use the lm and the lqs methods (From MASS package).

Legend

Setosa - purple circle

Versicolor - red square

Virginica - green diamond

# Create species colors and point characters
species_colors <- c("setosa" = "blue", "versicolor" = "red", "virginica" = "green")
species_pch <- c("setosa" = 22, "versicolor" = 21, "virginica" = 23)  # 22 = square, 21 = circle, 23 = diamond

# Calculate means and medians
mean_petal_length <- mean(iris$Petal.Length)
median_petal_length <- median(iris$Petal.Length)
mean_petal_width <- mean(iris$Petal.Width)
median_petal_width <- median(iris$Petal.Width)

# Create the scatterplot
plot(iris$Petal.Length, iris$Petal.Width,
     main = "Anderson Iris data",
     xlab = "Petal length (cm)",
     ylab = "Petal width (cm)",
     pch = species_pch[iris$Species],  # Different shapes for different species
     col = species_colors[iris$Species],  # Different colors for different species
     bg = species_colors[iris$Species]  # Background color for filled symbols
)

# Add horizontal and vertical lines at the mean and median
abline(h = mean_petal_width, col = "red", lty = 2)  # Horizontal line at mean petal width
abline(h = median_petal_width, col = "blue", lty = 2)  # Horizontal line at median petal width
abline(v = mean_petal_length, col = "red", lty = 2)  # Vertical line at mean petal length
abline(v = median_petal_length, col = "blue", lty = 2)  # Vertical line at median petal length

# Plotting mean points
points(mean_petal_length, mean_petal_width, col = "black", bg = "red", pch = 23, cex = 1.7) # Red diamond with black outline for mean

# Plotting median points - completing your provided code snippet
points(median_petal_length, median_petal_width, col = "black", bg = "blue", pch = 23, cex = 1.7) # Blue diamond with black outline for median

# Load the MASS package for robust regression
library(MASS)

# Fit the least squares linear model
lm_model <- lm(Petal.Width ~ Petal.Length, data = iris)

# Fit the robust linear model using "lms" method
robust_model <- lqs(Petal.Width ~ Petal.Length, data = iris, method = "lms")

# Add the least squares regression line to the plot as a dotted red line
abline(lm_model, col = "red", lty = 3, lwd = 2)  # Change lty to 3 for dotted, col to "red"

# Add the robust regression line to the plot as a dotted blue line
# Use coefficients from the lqs model to add the line
abline(a = robust_model$coefficients[1], b = robust_model$coefficients[2], col = "blue", lty = 3, lwd = 2)  # Change col to "blue", lty to 3 for dotted

# Legend adjustment including new line styles
legend("topleft", 
       legend = c("Setosa", "Versicolor", "Virginica", "Least Squares", "Robust Regression"),
       col = c("blue", "red", "green", "red", "blue"), 
       pch = c(22, 21, 23, NA, NA), lty = c(NA, NA, NA, 3, 3),  # Update lty for lines
       pt.bg = c("blue", "red", "green", NA, NA), pt.cex = 1.5,
       merge = TRUE, cex = 0.8)