R Markdown

# Load the dataset
df <- read.csv(file.choose(), header = TRUE)
dim(df)
## [1] 103   4
# Summary statistics
summary(df)
##   Instructor            Final           Anxiety          Gender         
##  Length:103         Min.   : 10.00   Min.   : 0.056   Length:103        
##  Class :character   1st Qu.: 40.00   1st Qu.:69.775   Class :character  
##  Mode  :character   Median : 65.00   Median :79.044   Mode  :character  
##                     Mean   : 59.76   Mean   :74.344                     
##                     3rd Qu.: 80.00   3rd Qu.:84.686                     
##                     Max.   :100.00   Max.   :97.582
# Visualizing numerical features
hist(df$Final, main = "Final Exam Scores", xlab = "Scores from 0 - 100", ylab = "Frequency", col = "skyblue")

hist(df$Anxiety, main = "Student Anxiety Scores", xlab = "Scores from 0 - 100", ylab = "Frequency", col = "lightgreen")

boxplot(df$Final, main = "Final Scores Distribution", col = "skyblue")

boxplot(df$Anxiety, main = "Anxiety Scores Distribution", col='lightgreen')

# Density curves
plot(density(df$Final), main = "Final Exam Scores Density", col = "skyblue", lwd = 2)

plot(density(df$Anxiety), main = "Anxiety Scores Density", col = "lightgreen", lwd = 2)

# Visualizing categorical data
barplot(table(df$Instructor), main = "Count of Students Per Instructor")

barplot(table(df$Gender), main = "Count of Students by Gender")

# Compute correlation matrix
correlation_matrix <- cor(df[, c("Final", "Anxiety")])
print("Correlation Matrix:")
## [1] "Correlation Matrix:"
print(correlation_matrix)
##             Final   Anxiety
## Final    1.000000 -0.413029
## Anxiety -0.413029  1.000000
# Scatter plot of 'Anxiety' vs 'Final' scores with 'Gender' as hue
colors <- c("blue", "red")
plot(df$Anxiety[df$Gender == "Male"], df$Final[df$Gender == "Male"], main = "Final Exam Scores vs Anxiety Scores (Gender)", xlab = "Anxiety Scores", ylab = "Final Exam Scores", col = colors[1])
points(df$Anxiety[df$Gender == "Female"], df$Final[df$Gender == "Female"], col = colors[2])
legend("topright", legend = unique(df$Gender), fill = colors)