R Markdown
# Load the dataset
df <- read.csv(file.choose(), header = TRUE)
dim(df)
## [1] 103 4
# Summary statistics
summary(df)
## Instructor Final Anxiety Gender
## Length:103 Min. : 10.00 Min. : 0.056 Length:103
## Class :character 1st Qu.: 40.00 1st Qu.:69.775 Class :character
## Mode :character Median : 65.00 Median :79.044 Mode :character
## Mean : 59.76 Mean :74.344
## 3rd Qu.: 80.00 3rd Qu.:84.686
## Max. :100.00 Max. :97.582
# Visualizing numerical features
hist(df$Final, main = "Final Exam Scores", xlab = "Scores from 0 - 100", ylab = "Frequency", col = "skyblue")

hist(df$Anxiety, main = "Student Anxiety Scores", xlab = "Scores from 0 - 100", ylab = "Frequency", col = "lightgreen")

boxplot(df$Final, main = "Final Scores Distribution", col = "skyblue")

boxplot(df$Anxiety, main = "Anxiety Scores Distribution", col='lightgreen')

# Density curves
plot(density(df$Final), main = "Final Exam Scores Density", col = "skyblue", lwd = 2)

plot(density(df$Anxiety), main = "Anxiety Scores Density", col = "lightgreen", lwd = 2)

# Visualizing categorical data
barplot(table(df$Instructor), main = "Count of Students Per Instructor")

barplot(table(df$Gender), main = "Count of Students by Gender")

# Compute correlation matrix
correlation_matrix <- cor(df[, c("Final", "Anxiety")])
print("Correlation Matrix:")
## [1] "Correlation Matrix:"
print(correlation_matrix)
## Final Anxiety
## Final 1.000000 -0.413029
## Anxiety -0.413029 1.000000
# Scatter plot of 'Anxiety' vs 'Final' scores with 'Gender' as hue
colors <- c("blue", "red")
plot(df$Anxiety[df$Gender == "Male"], df$Final[df$Gender == "Male"], main = "Final Exam Scores vs Anxiety Scores (Gender)", xlab = "Anxiety Scores", ylab = "Final Exam Scores", col = colors[1])
points(df$Anxiety[df$Gender == "Female"], df$Final[df$Gender == "Female"], col = colors[2])
legend("topright", legend = unique(df$Gender), fill = colors)
