# Load the iris dataset
data(iris)
# View the data set
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
# Load the iris dataset
data(iris)
# Create species colors and point characters
species_colors <- c("setosa" = "blue", "versicolor" = "red", "virginica" = "green")
species_pch <- c("setosa" = 22, "versicolor" = 21, "virginica" = 23) # 22 = square, 21 = circle, 23 = diamond
# Create the scatterplot
plot(iris$Petal.Length, iris$Petal.Width,
main = "Anderson Iris data",
xlab = "Petal length (cm)",
ylab = "Petal width (cm)",
pch = species_pch[iris$Species], # Different shapes for different species
col = species_colors[iris$Species], # Different colors for different species
bg = species_colors[iris$Species] # Background color for filled symbols
)
# Add a legend to differentiate species
legend("topleft", legend = levels(iris$Species),
col = species_colors,
pch = species_pch,
pt.bg = species_colors) # Background color for filled symbols in legend
## Step 2: ### With the same dataset, add two horizontal and two
vertical lines at the mean and median (considerable as hypothetical
support vectors) and the centroids red and blue points as shown in the
diagram below. Use the abline and points methods. Learn more about
abline and points functions at:
# Create species colors and point characters
species_colors <- c("setosa" = "blue", "versicolor" = "red", "virginica" = "green")
species_pch <- c("setosa" = 22, "versicolor" = 21, "virginica" = 23) # 22 = square, 21 = circle, 23 = diamond
# Calculate means and medians
mean_petal_length <- mean(iris$Petal.Length)
median_petal_length <- median(iris$Petal.Length)
mean_petal_width <- mean(iris$Petal.Width)
median_petal_width <- median(iris$Petal.Width)
# Create the scatterplot
plot(iris$Petal.Length, iris$Petal.Width,
main = "Anderson Iris data",
xlab = "Petal length (cm)",
ylab = "Petal width (cm)",
pch = species_pch[iris$Species], # Different shapes for different species
col = species_colors[iris$Species], # Different colors for different species
bg = species_colors[iris$Species] # Background color for filled symbols
)
# Add horizontal and vertical lines at the mean and median
abline(h = mean_petal_width, col = "red", lty = 2) # Horizontal line at mean petal width
abline(h = median_petal_width, col = "blue", lty = 2) # Horizontal line at median petal width
abline(v = mean_petal_length, col = "red", lty = 2) # Vertical line at mean petal length
abline(v = median_petal_length, col = "blue", lty = 2) # Vertical line at median petal length
# Plotting mean points
points(mean_petal_length, mean_petal_width, col = "black", bg = "red", pch = 23, cex = 1.7) # Red diamond with black outline for mean
# Plotting median points - completing your provided code snippet
points(median_petal_length, median_petal_width, col = "black", bg = "blue", pch = 23, cex = 1.7) # Blue diamond with black outline for median
## Step 3: ### With the same dataset, add two more lines for the
least-squares regression and the robust regression with the legend as
listed in Figure 4. Use the lm and the lqs methods (From MASS
package).
# Create species colors and point characters
species_colors <- c("setosa" = "blue", "versicolor" = "red", "virginica" = "green")
species_pch <- c("setosa" = 22, "versicolor" = 21, "virginica" = 23) # 22 = square, 21 = circle, 23 = diamond
# Calculate means and medians
mean_petal_length <- mean(iris$Petal.Length)
median_petal_length <- median(iris$Petal.Length)
mean_petal_width <- mean(iris$Petal.Width)
median_petal_width <- median(iris$Petal.Width)
# Create the scatterplot
plot(iris$Petal.Length, iris$Petal.Width,
main = "Anderson Iris data",
xlab = "Petal length (cm)",
ylab = "Petal width (cm)",
pch = species_pch[iris$Species], # Different shapes for different species
col = species_colors[iris$Species], # Different colors for different species
bg = species_colors[iris$Species] # Background color for filled symbols
)
# Add horizontal and vertical lines at the mean and median
abline(h = mean_petal_width, col = "red", lty = 2) # Horizontal line at mean petal width
abline(h = median_petal_width, col = "blue", lty = 2) # Horizontal line at median petal width
abline(v = mean_petal_length, col = "red", lty = 2) # Vertical line at mean petal length
abline(v = median_petal_length, col = "blue", lty = 2) # Vertical line at median petal length
# Plotting mean points
points(mean_petal_length, mean_petal_width, col = "black", bg = "red", pch = 23, cex = 1.7) # Red diamond with black outline for mean
# Plotting median points - completing your provided code snippet
points(median_petal_length, median_petal_width, col = "black", bg = "blue", pch = 23, cex = 1.7) # Blue diamond with black outline for median
# Load the MASS package for robust regression
library(MASS)
# Fit the least squares linear model
lm_model <- lm(Petal.Width ~ Petal.Length, data = iris)
# Fit the robust linear model using "lms" method
robust_model <- lqs(Petal.Width ~ Petal.Length, data = iris, method = "lms")
# Add the least squares regression line to the plot as a dotted red line
abline(lm_model, col = "red", lty = 3, lwd = 2) # Change lty to 3 for dotted, col to "red"
# Add the robust regression line to the plot as a dotted blue line
# Use coefficients from the lqs model to add the line
abline(a = robust_model$coefficients[1], b = robust_model$coefficients[2], col = "blue", lty = 3, lwd = 2) # Change col to "blue", lty to 3 for dotted
# Legend adjustment including new line styles
legend("topleft",
legend = c("Setosa", "Versicolor", "Virginica", "Least Squares", "Robust Regression"),
col = c("blue", "red", "green", "red", "blue"),
pch = c(22, 21, 23, NA, NA), lty = c(NA, NA, NA, 3, 3), # Update lty for lines
pt.bg = c("blue", "red", "green", NA, NA), pt.cex = 1.5,
merge = TRUE, cex = 0.8)