Load Libraries
library(tidyverse)
library(tidymodels)
library(data.table)
library(DT)
library(MASS)
df <- read.csv("C:/Users/PC/Documents/R_4DS/NUMERIC_/AgricultureData/AgrcultureDataset.csv")
glimpse(df)
Data Inspection and Cleaning
null_vars <- (sapply(df, function(x) sum(is.na(x))))
t(data.frame(null_vars)) # No missing data
## Remove Trailing White space
df$Season <- (sapply(df$Season, function(x) trimws(x)))
## scales Area
df$Scaled_Area <- scale(df$Area)
Exploratory Data Analysis
df_num <- df %>%
select_if(is.numeric) %>%
subset()
par(mfrow= c(3,3))
invisible(lapply(names(df_num), function(col_name)
truehist(df_num[,col_name], main = paste("Histogram of ", col_name), xlab = NA)))
##
library(superml)
lbl = LabelEncoder$new()
ml_df$Location = lbl$fit_transform(ml_df$Location)
##
# library(mltools)
# library(data.table)
#
# ml_df <- ml_df %>%
# mutate(Crop = as.factor(Crop))
#
# ml_df <- data.table(ml_df)
#
# newdata <- one_hot(ml_df)
## Train-Test
n_split <- round(0.8 * nrow(ml_df))
train_indices <- sample(1:nrow(ml_df), n_split)
train_set <- df[train_indices, ]
test_set <- df[-train_indices, ]
## Checking for Colinearity with new features
library(corrgram)
# corrgram(newdata, lower.panel=panel.shade, upper.panel=panel.cor)
Model Fitting
## Simple Linear Regression (+ it handles the Categorical Variables by default, -takes time to resolve)
mdl <- lm(Production ~ ., data = train_set)
summary(mdl)
Model Evaluation
## Checkingn for Residuals
modelResiduals <- as.data.frame(residuals(mdl)) %>%
ggplot(aes(residuals(mdl))) +
geom_histogram(fill='deepskyblue', color='black')
preds <- predict(model, test_set)
## Compare
modelEval <- cbind(test_set$Production, preds)
colnames(modelEval) <- c('Actual', 'Predicted')
modelEval <- as.data.frame(modelEval)
mse <- mean((modelEval$Actual - modelEval$Predicted))
rmse <- sqrt(mse)
LS0tDQp0aXRsZTogIlByZWRpY3RpdmUgTW9kZWxsaW5nIChsaW5lYXIgUmVncmVzc2lvbik6IEFncmljdWx0dXJhbCBQcm9kdWN0aW9uIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyMgTG9hZCBMaWJyYXJpZXMNCg0KYGBge3IgbWVzc2FnZSA9IEZ9DQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkodGlkeW1vZGVscykNCmxpYnJhcnkoZGF0YS50YWJsZSkNCmxpYnJhcnkoRFQpDQpsaWJyYXJ5KE1BU1MpDQoNCmRmIDwtIHJlYWQuY3N2KCJDOi9Vc2Vycy9QQy9Eb2N1bWVudHMvUl80RFMvTlVNRVJJQ18vQWdyaWN1bHR1cmVEYXRhL0FncmN1bHR1cmVEYXRhc2V0LmNzdiIpDQoNCmdsaW1wc2UoZGYpDQpgYGANCg0KDQojIyBEYXRhIEluc3BlY3Rpb24gYW5kIENsZWFuaW5nDQoNCmBgYHtyfQ0KbnVsbF92YXJzIDwtIChzYXBwbHkoZGYsIGZ1bmN0aW9uKHgpIHN1bShpcy5uYSh4KSkpKQ0KdChkYXRhLmZyYW1lKG51bGxfdmFycykpICMgTm8gbWlzc2luZyBkYXRhDQoNCiMjIFJlbW92ZSBUcmFpbGluZyBXaGl0ZSBzcGFjZSANCmRmJFNlYXNvbiA8LSAoc2FwcGx5KGRmJFNlYXNvbiwgZnVuY3Rpb24oeCkgdHJpbXdzKHgpKSkNCg0KIyMgc2NhbGVzIEFyZWENCmRmJFNjYWxlZF9BcmVhIDwtIHNjYWxlKGRmJEFyZWEpDQpgYGANCg0KDQojIyBFeHBsb3JhdG9yeSBEYXRhIEFuYWx5c2lzDQoNCmBgYHtyfQ0KZGZfbnVtIDwtIGRmICU+JSANCiAgc2VsZWN0X2lmKGlzLm51bWVyaWMpICU+JSANCiAgc3Vic2V0KCkNCg0KcGFyKG1mcm93PSBjKDMsMykpDQoNCmludmlzaWJsZShsYXBwbHkobmFtZXMoZGZfbnVtKSwgZnVuY3Rpb24oY29sX25hbWUpIA0KICB0cnVlaGlzdChkZl9udW1bLGNvbF9uYW1lXSwgbWFpbiA9IHBhc3RlKCJIaXN0b2dyYW0gb2YgIiwgY29sX25hbWUpLCB4bGFiID0gTkEpKSkNCmBgYA0KYGBge3IgZWNobyA9IEZBTFNFLCBtZXNzYWdlID0gRkFMU0V9DQojIyBGZWF0dXJlIEVuZw0KbGlicmFyeShtbHRvb2xzKQ0KbGlicmFyeShjYXJldCkNCg0KIyMjIExvY2F0aW9uIHRvIGNvY2F0IGJvdGhlIFN0YXRlcyBhbmQgRGlzdHJpY3RzDQpkZiRMb2NhdGlvbiA8LSBwYXN0ZShkZiRTdGF0ZV9OYW1lLCBkZiREaXN0cmljdF9OYW1lLCBzZXAgPSAiXyIpDQoNCiMjIExhYmVsIEVuY29kZQ0KbGlicmFyeShzdXBlcm1sKQ0KDQpsYmwgPSBMYWJlbEVuY29kZXIkbmV3KCkNCm1sX2RmJExvY2F0aW9uID0gbGJsJGZpdF90cmFuc2Zvcm0obWxfZGYkTG9jYXRpb24pDQoNCiMjIE90aGVycw0KbWxfZGYgPC0gZGYgJT4lIA0KICBtdXRhdGUoUHJvZHVjdGlvbiA9IGFzLmludGVnZXIoUHJvZHVjdGlvbikpICU+JSANCiAgbXV0YXRlKFNlYXNvbiA9IGNhc2Vfd2hlbigNCiAgICBTZWFzb24gPT0gIkF1dHVtbiIgfiAxLA0KICAgIFNlYXNvbiA9PSAiS2hhcmlmIiB+IDIsDQogICAgU2Vhc29uID09ICJSYWJpIiB+IDMsDQogICAgU2Vhc29uID09ICJTdW1tZXIiIH4gNCwNCiAgICBTZWFzb24gPT0gIldob2xlIFllYXIiIH4gNSwNCiAgICBUUlVFIH4gNg0KICApKSAlPiUgDQogIGRwbHlyOjpzZWxlY3QoLWMoIlN0YXRlX05hbWUiLCAiRGlzdHJpY3RfTmFtZSIsICJBcmVhIikpDQoNCmBgYA0KDQpgYGB7ciBMYWJlbCBFbmNvZGV9DQojIyANCmxpYnJhcnkoc3VwZXJtbCkNCg0KbGJsID0gTGFiZWxFbmNvZGVyJG5ldygpDQptbF9kZiRMb2NhdGlvbiA9IGxibCRmaXRfdHJhbnNmb3JtKG1sX2RmJExvY2F0aW9uKQ0KYGBgDQoNCmBgYHtyIE9uZS1Ib3QgRW5jb2RlfQ0KIyMgDQojIGxpYnJhcnkobWx0b29scykNCiMgbGlicmFyeShkYXRhLnRhYmxlKQ0KIyANCiMgbWxfZGYgPC0gbWxfZGYgJT4lIA0KIyAgIG11dGF0ZShDcm9wID0gYXMuZmFjdG9yKENyb3ApKQ0KIyANCiMgbWxfZGYgPC0gZGF0YS50YWJsZShtbF9kZikNCiMgDQojIG5ld2RhdGEgPC0gb25lX2hvdChtbF9kZikNCg0KYGBgDQoNCmBgYHtyfQ0KIyMgVHJhaW4tVGVzdA0Kbl9zcGxpdCA8LSByb3VuZCgwLjggKiBucm93KG1sX2RmKSkNCg0KdHJhaW5faW5kaWNlcyA8LSBzYW1wbGUoMTpucm93KG1sX2RmKSwgbl9zcGxpdCkNCnRyYWluX3NldCA8LSBkZlt0cmFpbl9pbmRpY2VzLCBdDQp0ZXN0X3NldCA8LSBkZlstdHJhaW5faW5kaWNlcywgXQ0KYGBgDQoNCg0KDQpgYGB7cn0NCiMjIENoZWNraW5nIGZvciBDb2xpbmVhcml0eSB3aXRoIG5ldyBmZWF0dXJlcw0KbGlicmFyeShjb3JyZ3JhbSkNCiMgY29ycmdyYW0obmV3ZGF0YSwgbG93ZXIucGFuZWw9cGFuZWwuc2hhZGUsIHVwcGVyLnBhbmVsPXBhbmVsLmNvcikNCmBgYA0KDQojIyBNb2RlbCBGaXR0aW5nDQoNCmBgYHtyfQ0KIyMgU2ltcGxlIExpbmVhciBSZWdyZXNzaW9uICgrIGl0IGhhbmRsZXMgdGhlIENhdGVnb3JpY2FsIFZhcmlhYmxlcyBieSBkZWZhdWx0LCAtdGFrZXMgdGltZSB0byByZXNvbHZlKQ0KbWRsIDwtIGxtKFByb2R1Y3Rpb24gfiAuLCBkYXRhID0gdHJhaW5fc2V0KQ0KDQpzdW1tYXJ5KG1kbCkNCmBgYA0KDQoNCiMjIE1vZGVsIEV2YWx1YXRpb24NCg0KYGBge3J9DQojIyBDaGVja2luZ24gZm9yIFJlc2lkdWFscw0KbW9kZWxSZXNpZHVhbHMgPC0gYXMuZGF0YS5mcmFtZShyZXNpZHVhbHMobWRsKSkgJT4lIA0KICBnZ3Bsb3QoYWVzKHJlc2lkdWFscyhtZGwpKSkgKw0KICBnZW9tX2hpc3RvZ3JhbShmaWxsPSdkZWVwc2t5Ymx1ZScsIGNvbG9yPSdibGFjaycpDQpgYGANCg0KDQoNCmBgYHtyIFByZWRpY3Rpb259DQpwcmVkcyA8LSBwcmVkaWN0KG1vZGVsLCB0ZXN0X3NldCkNCg0KIyMgQ29tcGFyZQ0KbW9kZWxFdmFsIDwtIGNiaW5kKHRlc3Rfc2V0JFByb2R1Y3Rpb24sIHByZWRzKQ0KY29sbmFtZXMobW9kZWxFdmFsKSA8LSBjKCdBY3R1YWwnLCAnUHJlZGljdGVkJykNCm1vZGVsRXZhbCA8LSBhcy5kYXRhLmZyYW1lKG1vZGVsRXZhbCkNCmBgYA0KDQpgYGB7ciBSb290IE1lYW4gU3F1YXJlfQ0KbXNlIDwtIG1lYW4oKG1vZGVsRXZhbCRBY3R1YWwgLSBtb2RlbEV2YWwkUHJlZGljdGVkKSkNCnJtc2UgPC0gc3FydChtc2UpDQpgYGANCg0K