R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#Business Scenario: Predicting Student Performance #Data Generation:

# Set a fixed random seed for reproducibility
set.seed(10923)

# Number of students
num_students <- 500  # Set the number of students to 500

# Simulate study hours (ranging from 1 to 20 hours)
study_hours <- sample(1:20, num_students, replace = TRUE)

# Simulate quiz scores (ranging from 0 to 100)
quiz_scores <- sample(0:100, num_students, replace = TRUE)

# Simulate forum participation (ranging from 0 to 50 posts)
forum_posts <- sample(0:50, num_students, replace = TRUE)

# Simulate previous grades (ranging from 0 to 100)
previous_grades <- sample(0:100, num_students, replace = TRUE)

# Simulate final grades (ranging from 0 to 100)
final_grades <- 0.3 * study_hours + 0.4 * quiz_scores + 0.2 * forum_posts + 0.1 * previous_grades + rnorm(num_students, mean = 0, sd = 5) + 25

# Create a data frame
student_data <- data.frame(StudyHours = study_hours, QuizScores = quiz_scores, ForumPosts = forum_posts, PreviousGrades = previous_grades, FinalGrades = final_grades)

# View the first few rows of the generated data
head(student_data)
##   StudyHours QuizScores ForumPosts PreviousGrades FinalGrades
## 1         20         91         22             78    80.80895
## 2         12         26         27              1    46.45853
## 3         13          5          8             60    40.22946
## 4          4         96         13             78    70.64216
## 5          5         74         45             31    62.35254
## 6         18          1         47             50    48.42835

#Explore the data

# Todo:

#Modeling

Use 80% of the data for training and 20% for testing to predict final grades. Compute the Mean Squared Error and model accuracy based on prediction interval.

# Splitting the data into training and testing sets (80% training, 20% testing)
set.seed(10923) # Set seed for reproducibility
sample_index <- sample(1:nrow(student_data), 0.8 * nrow(student_data))
train_data <- student_data[sample_index, ]
test_data <- student_data[-sample_index, ]

# Building a Linear Regression model using the train data and assign it to an object # called model.
# Todo: Target variable is FinalGrades and the Features are StudyHours, QuizScores, # ForumPosts, and PreviousGrades
# Enter code below:
model <- lm(FinalGrades ~ StudyHours + QuizScores + ForumPosts + PreviousGrades, data = train_data)

# Making predictions on the test set. use the model object to make predictions.
# Enter code below:
predictions <- predict(model, newdata = test_data)

# Evaluation metrics
# Compute the mean squared error and R-squared
# Enter code below:
mse <- mean((test_data$FinalGrades - predictions)^2)
rsquared <- 1 - mse / var(test_data$FinalGrades)

# Print evaluation metrics
#Enter code below
cat("Mean Squared Error (MSE):", mse, "\n")
## Mean Squared Error (MSE): 22.34656
cat("R-squared (R^2):", rsquared, "\n")
## R-squared (R^2): 0.8865354

#Model Accuracy based on Prediction Interval

# Get the predictions and prediction intervals
pred_int <- predict(model, newdata = test_data, interval = "prediction")

# Extract lower and upper bounds of the prediction interval
lower_bound <- pred_int[, "lwr"]
upper_bound <- pred_int[, "upr"]

# Actual values from the test data
actual_values <- test_data$FinalGrades

# Check if the actual values fall within the prediction interval
correct_predictions <- actual_values >= lower_bound & actual_values <= upper_bound

# Compute accuracy
accuracy <- sum(correct_predictions) / length(correct_predictions)

# Print accuracy
cat("Model Accuracy using Prediction Interval:", accuracy, "\n")
## Model Accuracy using Prediction Interval: 0.96