library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:dplyr':
##
## combine
data <- read.csv("/Users/timyang/Downloads/food_wastage_data.csv")
# Data encoding: Convert categorical variables to factors
data <- data %>%
mutate(across(where(is.character), as.factor))
# Split training and testing sets (80/20)
set.seed(123)
trainIndex <- createDataPartition(data$Wastage.Food.Amount, p = 0.8, list = FALSE)
train_data <- data[trainIndex, ]
test_data <- data[-trainIndex, ]
# ========================== Linear Regression Model
linear_model <- lm(Wastage.Food.Amount ~ ., data = train_data)
cat("============ Linear Regression Results ============ \n")
## ============ Linear Regression Results ============
# Training performance
train_predictions_lm <- predict(linear_model, train_data)
train_mse_lm <- mean((train_predictions_lm - train_data$Wastage.Food.Amount)^2)
cat("Linear Regression Training MSE:", train_mse_lm, "\n")
## Linear Regression Training MSE: 25.71025
train_mape_lm <- mean(abs(train_predictions_lm - train_data$Wastage.Food.Amount) / train_data$Wastage.Food.Amount)
cat("Linear Regression Training MAPE:", train_mape_lm, "\n")
## Linear Regression Training MAPE: 0.1470058
# Testing performance
test_predictions_lm <- predict(linear_model, test_data)
test_mse_lm <- mean((test_predictions_lm -
test_data$Wastage.Food.Amount)^2)
cat("Linear Regression Testing MSE:", test_mse_lm, "\n")
## Linear Regression Testing MSE: 23.80189
test_mape_lm <- mean(abs(test_predictions_lm -
test_data$Wastage.Food.Amount) / test_data$Wastage.Food.Amount)
cat("Linear Regression Testing MAPE:", test_mape_lm, "\n")
## Linear Regression Testing MAPE: 0.1437108
# ========================== Nonlinear Regression Model
# Add nonlinear features: Squared terms
train_data_nl <- train_data %>%
mutate(Number.of.Guests2 = Number.of.Guests^2,
Quantity.of.Food2 = Quantity.of.Food^2)
test_data_nl <- test_data %>%
mutate(Number.of.Guests2 = Number.of.Guests^2,
Quantity.of.Food2 = Quantity.of.Food^2)
# Nonlinear regression model
nonlinear_model <- lm(Wastage.Food.Amount ~ . + I(Number.of.Guests^2) +
I(Quantity.of.Food^2), data = train_data_nl)
cat("============ Nonlinear Regression Results ============ \n")
## ============ Nonlinear Regression Results ============
# Training performance
train_predictions_nl <- predict(nonlinear_model, train_data_nl)
train_mse_nl <- mean((train_predictions_nl -
train_data_nl$Wastage.Food.Amount)^2)
cat("Nonlinear Regression Training MSE:", train_mse_nl, "\n")
## Nonlinear Regression Training MSE: 25.60621
train_mape_nl <- mean(abs(train_predictions_nl -
train_data_nl$Wastage.Food.Amount) / train_data_nl$Wastage.Food.Amount)
cat("Nonlinear Regression Training MAPE:", train_mape_nl, "\n")
## Nonlinear Regression Training MAPE: 0.147233
# Testing performance
test_predictions_nl <- predict(nonlinear_model, test_data_nl)
test_mse_nl <- mean((test_predictions_nl -
test_data_nl$Wastage.Food.Amount)^2)
cat("Nonlinear Regression Testing MSE:", test_mse_nl, "\n")
## Nonlinear Regression Testing MSE: 23.75306
test_mape_nl <- mean(abs(test_predictions_nl -
test_data_nl$Wastage.Food.Amount) / test_data_nl$Wastage.Food.Amount)
cat("Nonlinear Regression Testing MAPE:", test_mape_nl, "\n")
## Nonlinear Regression Testing MAPE: 0.1449359
# ========================== Random Forest Model
# Random Forest model
set.seed(123)
rf_model <- randomForest(Wastage.Food.Amount ~ ., data = train_data,
ntree = 100)
cat("============ Random Forest Results ============ \n")
## ============ Random Forest Results ============
# Training performance
train_predictions_rf <- predict(rf_model, train_data)
train_mse_rf <- mean((train_predictions_rf -
train_data$Wastage.Food.Amount)^2)
cat("Random Forest Training MSE:", train_mse_rf, "\n")
## Random Forest Training MSE: 4.466325
train_mape_rf <- mean(abs(train_predictions_rf -
train_data$Wastage.Food.Amount) / train_data$Wastage.Food.Amount)
cat("Random Forest Training MAPE:", train_mape_rf, "\n")
## Random Forest Training MAPE: 0.05527655
# Testing performance
test_predictions_rf <- predict(rf_model, test_data)
test_mse_rf <- mean((test_predictions_rf -
test_data$Wastage.Food.Amount)^2)
cat("Random Forest Testing MSE:", test_mse_rf, "\n")
## Random Forest Testing MSE: 9.412642
test_mape_rf <- mean(abs(test_predictions_rf -
test_data$Wastage.Food.Amount) / test_data$Wastage.Food.Amount)
cat("Random Forest Testing MAPE:", test_mape_rf, "\n")
## Random Forest Testing MAPE: 0.0849227
# ========================== Model Comparison
cat("============ Model Comparison ============ \n")
## ============ Model Comparison ============
cat("---------- MSE ---------- \n")
## ---------- MSE ----------
cat("Linear Regression Testing MSE:", test_mse_lm, "\n")
## Linear Regression Testing MSE: 23.80189
cat("Nonlinear Regression Testing MSE:", test_mse_nl, "\n")
## Nonlinear Regression Testing MSE: 23.75306
cat("Random Forest Testing MSE:", test_mse_rf, "\n")
## Random Forest Testing MSE: 9.412642
cat("---------- MAPE ---------- \n")
## ---------- MAPE ----------
cat("Linear Regression Testing MAPE:", test_mape_lm, "\n")
## Linear Regression Testing MAPE: 0.1437108
cat("Nonlinear Regression Testing MAPE:", test_mape_nl, "\n")
## Nonlinear Regression Testing MAPE: 0.1449359
cat("Random Forest Testing MAPE:", test_mape_rf, "\n")
## Random Forest Testing MAPE: 0.0849227