This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
install.packages("moments")
## Installing package into 'C:/Users/sivak/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
## package 'moments' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\sivak\AppData\Local\Temp\Rtmp44Faus\downloaded_packages
library(readr)
seattle_weather <- read_csv("seattle-weather.csv")
## Rows: 1461 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): date, weather
## dbl (4): precipitation, temp_max, temp_min, wind
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
missing_data <- is.na(seattle_weather)
# Count the missing values for each variable
missing_counts <- colSums(missing_data)
# Print the number of missing values for each variable
print("Missing values in each variable:")
## [1] "Missing values in each variable:"
print(missing_counts)
## date precipitation temp_max temp_min wind
## 0 0 0 0 0
## weather
## 0
# Summarize the total number of missing values in the dataset
total_missing_values <- sum(missing_counts)
print(paste("Total missing values in the dataset:", total_missing_values))
## [1] "Total missing values in the dataset: 0"
seattle_weather$date <- as.Date(seattle_weather$date, format = "%d/%m/%Y")
# Where:
# - 'seattle_weather' is the name of your data frame.
# - 'date' is the name of the column you want to convert.
# - 'format' specifies the current date format ("%d/%m/%Y" for "01/01/2012").
# Print the first few rows to verify the conversion
head(seattle_weather)
## # A tibble: 6 × 6
## date precipitation temp_max temp_min wind weather
## <date> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2012-01-01 0 12.8 5 4.7 drizzle
## 2 2012-01-02 10.9 10.6 2.8 4.5 rain
## 3 2012-01-03 0.8 11.7 7.2 2.3 rain
## 4 2012-01-04 20.3 12.2 5.6 4.7 rain
## 5 2012-01-05 1.3 8.9 2.8 6.1 rain
## 6 2012-01-06 2.5 4.4 2.2 2.2 rain
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
# Create a histogram for temperaturemin
ggplot(seattle_weather, aes(x = temp_min)) +
geom_histogram(binwidth = 2, fill = "blue", color = "black") +
labs(title = "Distribution of Minimum Temperature", x = "Temperature (°C)", y = "Frequency")
summary(seattle_weather)
## date precipitation temp_max temp_min
## Min. :2012-01-01 Min. : 0.000 Min. :-1.60 Min. :-7.100
## 1st Qu.:2012-12-31 1st Qu.: 0.000 1st Qu.:10.60 1st Qu.: 4.400
## Median :2013-12-31 Median : 0.000 Median :15.60 Median : 8.300
## Mean :2013-12-31 Mean : 3.029 Mean :16.44 Mean : 8.235
## 3rd Qu.:2014-12-31 3rd Qu.: 2.800 3rd Qu.:22.20 3rd Qu.:12.200
## Max. :2015-12-31 Max. :55.900 Max. :35.60 Max. :18.300
## wind weather
## Min. :0.400 Length:1461
## 1st Qu.:2.200 Class :character
## Median :3.000 Mode :character
## Mean :3.241
## 3rd Qu.:4.000
## Max. :9.500
the data set period is from jan 1,2012 to dec 31,2015. for precipitation min is 0 and max is 55.90 and mean is 3.029. we can see right skew by comparing mean and max precipitation.
for temp max temp is 35.60 and min is -1.60 and mean is 16.44. there is little right skew in the data.
for temp_min min is -7.100 and max is 18.30 and mean is 8.235. right skew.
same way wind speed vary from 0.4 m/s to 9.5 m/s, with the mean of 3.241.
# Install and load the 'moments' package if you haven't already
install.packages("moments")
## Installing package into 'C:/Users/sivak/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
## package 'moments' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\sivak\AppData\Local\Temp\Rtmp44Faus\downloaded_packages
library(moments)
# Assuming you have a data frame named 'seattle_weather'
# Calculate skewness for each numeric variable separately
skewness_temp_max <- skewness(seattle_weather$temp_max, na.rm = TRUE)
skewness_temp_min <- skewness(seattle_weather$temp_min, na.rm = TRUE)
skewness_wind <- skewness(seattle_weather$wind, na.rm = TRUE)
skewness_precipitation <- skewness(seattle_weather$precipitation, na.rm = TRUE)
# Print the skewness values
cat("Skewness Value for 'temp_max':", skewness_temp_max, "\n")
## Skewness Value for 'temp_max': 0.2806415
cat("Skewness Value for 'temp_min':", skewness_temp_min, "\n")
## Skewness Value for 'temp_min': -0.2492024
cat("Skewness Value for 'wind':", skewness_wind, "\n")
## Skewness Value for 'wind': 0.8907518
cat("Skewness Value for 'precipitation':", skewness_precipitation, "\n")
## Skewness Value for 'precipitation': 3.502043
# Add a small constant before log transformation
small_constant <- 0.001
seattle_weather$log_precipitation <- log(seattle_weather$precipitation + small_constant)
install.packages("moments")
## Warning: package 'moments' is in use and will not be installed
library(moments)
# Calculate skewness for the log-transformed precipitation data
skewness_precipitation <- skewness(seattle_weather$log_precipitation, na.rm = TRUE)
# Print the skewness value for log-transformed precipitation
cat("Skewness Value for Log-Transformed Precipitation Data:", skewness_precipitation, "\n")
## Skewness Value for Log-Transformed Precipitation Data: 0.4271911
seattle_weather
## # A tibble: 1,461 × 7
## date precipitation temp_max temp_min wind weather log_precipitation
## <date> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 2012-01-01 0 12.8 5 4.7 drizzle -6.91
## 2 2012-01-02 10.9 10.6 2.8 4.5 rain 2.39
## 3 2012-01-03 0.8 11.7 7.2 2.3 rain -0.222
## 4 2012-01-04 20.3 12.2 5.6 4.7 rain 3.01
## 5 2012-01-05 1.3 8.9 2.8 6.1 rain 0.263
## 6 2012-01-06 2.5 4.4 2.2 2.2 rain 0.917
## 7 2012-01-07 0 7.2 2.8 2.3 rain -6.91
## 8 2012-01-08 0 10 2.8 2 sun -6.91
## 9 2012-01-09 4.3 9.4 5 3.4 rain 1.46
## 10 2012-01-10 1 6.1 0.6 3.4 rain 0.00100
## # ℹ 1,451 more rows
# Standardization (Z-Score Normalization)
seattle_weather_standardized <- seattle_weather
# Define the numeric columns you want to standardize
numeric_cols <- c("temp_max", "temp_min", "wind", "log_precipitation")
# Apply Z-Score normalization
seattle_weather_standardized[, numeric_cols] <- scale(seattle_weather_standardized[, numeric_cols])
# View the first few rows of the standardized data
head(seattle_weather_standardized)
## # A tibble: 6 × 7
## date precipitation temp_max temp_min wind weather log_precipitation
## <date> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 2012-01-01 0 -0.495 -0.644 1.01 drizzle -0.842
## 2 2012-01-02 10.9 -0.794 -1.08 0.876 rain 1.42
## 3 2012-01-03 0.8 -0.645 -0.206 -0.655 rain 0.783
## 4 2012-01-04 20.3 -0.577 -0.525 1.01 rain 1.57
## 5 2012-01-05 1.3 -1.03 -1.08 1.99 rain 0.901
## 6 2012-01-06 2.5 -1.64 -1.20 -0.724 rain 1.06
library(rsample)
## Warning: package 'rsample' was built under R version 4.3.1
set.seed(20230712)
train_test_split <- initial_split(seattle_weather_standardized, prop = .80)
data_train <- training(train_test_split)
data_test <- testing(train_test_split)
data_test
## # A tibble: 293 × 7
## date precipitation temp_max temp_min wind weather log_precipitation
## <date> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 2012-01-02 10.9 -0.794 -1.08 0.876 rain 1.42
## 2 2012-01-14 4.1 -1.64 -1.52 1.43 snow 1.18
## 3 2012-01-17 8.1 -1.79 -1.64 1.64 snow 1.35
## 4 2012-01-18 19.8 -2.24 -2.20 1.22 snow 1.56
## 5 2012-01-19 15.2 -2.39 -2.20 -1.14 snow 1.50
## 6 2012-01-26 4.8 -1.03 -1.42 1.08 rain 1.22
## 7 2012-01-29 27.7 -0.958 -0.863 0.876 rain 1.64
## 8 2012-02-08 2.8 -0.876 -0.644 -0.376 rain 1.09
## 9 2012-02-15 0 -1.26 -1.52 -1.00 drizzle -0.842
## 10 2012-02-20 3 -1.18 -1.30 -0.237 rain 1.10
## # ℹ 283 more rows
data_train
## # A tibble: 1,168 × 7
## date precipitation temp_max temp_min wind weather log_precipitation
## <date> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 2012-06-04 1.3 -0.495 0.132 -0.0982 rain 0.901
## 2 2013-07-03 0 1.31 1.69 -0.0286 sun -0.842
## 3 2013-05-20 0 0.403 0.232 -1.00 sun -0.842
## 4 2014-12-10 13 0.335 0.351 2.41 rain 1.46
## 5 2015-12-02 2.5 -0.794 -0.763 1.22 rain 1.06
## 6 2014-01-25 0 -0.577 -1.42 -1.70 sun -0.842
## 7 2015-05-24 0 0.185 0.570 -0.376 sun -0.842
## 8 2013-12-21 5.6 -1.03 -0.525 -0.655 rain 1.26
## 9 2014-03-13 0.5 -0.345 -0.644 -0.515 rain 0.669
## 10 2014-03-17 0.3 -0.876 -1.08 -0.0286 rain 0.545
## # ℹ 1,158 more rows
install.packages("magrittr")
## Installing package into 'C:/Users/sivak/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
## package 'magrittr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'magrittr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\sivak\AppData\Local\R\win-library\4.3\00LOCK\magrittr\libs\x64\magrittr.dll
## to
## C:\Users\sivak\AppData\Local\R\win-library\4.3\magrittr\libs\x64\magrittr.dll:
## Permission denied
## Warning: restored 'magrittr'
##
## The downloaded binary packages are in
## C:\Users\sivak\AppData\Local\Temp\Rtmp44Faus\downloaded_packages
options(warn.conflicts = FALSE)
library(recipes)
## Warning: package 'recipes' was built under R version 4.3.1
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'recipes'
## The following object is masked from 'package:stats':
##
## step
library(magrittr)
library(workflows)
## Warning: package 'workflows' was built under R version 4.3.1
library(parsnip)
## Warning: package 'parsnip' was built under R version 4.3.1
my_rec <- recipe(weather ~ temp_max+ temp_min + wind + log_precipitation , data = data_train)
my_rec
##
## ── Recipe ──────────────────────────────────────────────────────────────────────
##
## ── Inputs
## Number of variables by role
## outcome: 1
## predictor: 4
# Assuming 'data_train' is your data frame
seattle_weather$weather <- as.factor(seattle_weather$weather)
levels(seattle_weather$weather)
## [1] "drizzle" "fog" "rain" "snow" "sun"
install.packages("nnet") # Install the 'nnet' package
## Installing package into 'C:/Users/sivak/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
## package 'nnet' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'nnet'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\sivak\AppData\Local\R\win-library\4.3\00LOCK\nnet\libs\x64\nnet.dll to
## C:\Users\sivak\AppData\Local\R\win-library\4.3\nnet\libs\x64\nnet.dll:
## Permission denied
## Warning: restored 'nnet'
##
## The downloaded binary packages are in
## C:\Users\sivak\AppData\Local\Temp\Rtmp44Faus\downloaded_packages
library(nnet) # Load the 'nnet' package
## Warning: package 'nnet' was built under R version 4.3.1
# Install and load the 'nnet' package
install.packages("nnet")
## Warning: package 'nnet' is in use and will not be installed
library(nnet)
# Fit the multinomial logistic regression model
multinom_model <- multinom(weather ~ temp_max + temp_min + wind + log_precipitation, data = data_train)
## # weights: 30 (20 variable)
## initial value 1879.823482
## iter 10 value 695.743897
## iter 20 value 529.159979
## iter 30 value 527.556250
## iter 40 value 527.365101
## iter 50 value 527.313081
## iter 60 value 527.302171
## iter 70 value 527.292290
## final value 527.292018
## converged
# Summary of the model
summary(multinom_model)
## Call:
## multinom(formula = weather ~ temp_max + temp_min + wind + log_precipitation,
## data = data_train)
##
## Coefficients:
## (Intercept) temp_max temp_min wind log_precipitation
## fog 3.035856 0.01800714 0.2543537 0.1913660 2.7186955
## rain 13.163772 -0.58936404 0.5849886 0.9962782 15.5386525
## snow -2.715052 -3.86803531 -3.3271336 1.5239656 19.1788878
## sun 3.045500 1.07381872 -0.5795547 0.7517946 0.4814626
##
## Std. Errors:
## (Intercept) temp_max temp_min wind log_precipitation
## fog 0.1489662 0.4527075 0.4196636 0.2786007 0.1254845
## rain 0.5015885 0.5712987 0.5181402 0.2932140 0.5801325
## snow 2.9020567 1.6271192 1.1637404 0.3893581 1.5583548
## sun 0.1278680 0.3892005 0.3579674 0.2375716 0.1077126
##
## Residual Deviance: 1054.584
## AIC: 1094.584
Intercept: After accounting for wind, temperature min, temperature max, and log precipitation, the log-odds of having foggy weather are around 3.036 when all other predictors are zero. temp_max: Keeping all other variables equal, a one-unit increase in temp_max is correlated with a very little increase in the log-odds of the weather being foggy (0.018). temp_min: Keeping all other variables equal, a one-unit increase in temp_min is correlated with a little increase in the log-odds of the weather being foggy (0.254). wind: Keeping all other factors equal, a one-unit increase in wind is correlated with a little increase in the log-odds of the weather being foggy (0.191).
log_precipitation: Keeping other factors equal, a one-unit increase in log_precipitation corresponds to a 2.72-fold increase in the likelihood of cloudy weather.
Rainfall:
Intercept: At 0 for all other predictors, the log-odds of wet weather are around 13.164. temp_max: Keeping all other factors equal, a one-unit rise in temp_max is correlated with a -0.589 drop in the log-odds of wet weather. temp_min: Keeping all other variables equal, a one-unit increase in temp_min is correlated with an increase in the log-odds of it raining (0.585). wind: Keeping all other factors equal, a one-unit increase in wind is correlated with a one-unit increase in the log-odds of wet weather (0.996). log_precipitation: Keeping other factors equal, a one-unit increase in log_precipitation corresponds to a 15.54-fold increase in the likelihood of wet weather.
Snowfall:
Intercept: At zero for all other predictors, the log-odds of snowy weather are around -2.715. temp_max: Keeping other factors equal, a one-unit rise in temp_max is correlated with a considerable drop in the log-odds of the weather being snowy (-3.868). temp_min: When all other factors are held constant, a one-unit rise in temp_min is correlated with a substantial drop in the log-odds of the weather being snowy (-3.327). wind: Keeping all factors equal, a one-unit increase in wind is correlated with a one-unit increase in the log-odds of snowy weather (1.524). log_precipitation: Keeping all other factors equal, a one-unit increase in log_precipitation corresponds to a 19.18-fold increase in the likelihood of snowy weather.
Sun:
Intercept: There is an approximate 3.045 log-odds chance of sunny weather when all other predictors are zero. temp_max: Keeping other factors equal, a one-unit increase in temp_max is correlated with a substantial rise in the log-odds of the weather being sunny (1.074). temp_min: Keeping other factors equal, a one-unit rise in temp_min is correlated with a substantial drop in the log-odds of the weather being sunny (-0.580). wind: Keeping all other factors equal, a one-unit increase in wind is correlated with a small increase in the log-odds of sunny weather (0.752). log_precipitation: Keeping all other variables equal, a one-unit increase in log_precipitation is correlated with a little increase in the log-odds of the weather being sunny (0.481).
# Assuming 'data_train' is your data frame
data_train$weather <- as.factor(data_train$weather)
data_test$weather <- as.factor(data_test$weather)
# Now fit the model
my_mod <- multinom_reg() %>%
set_engine("nnet", trace = FALSE)
fit <- fit(my_mod, data = data_train, weather ~ temp_max + temp_min + wind + log_precipitation)
# Evaluate the model on the training data (classification)
train_results <- predict(fit, data_train, type = "class")
# Evaluate the model on the test data
test_results <- predict(fit, data_test, type = "class")
# Print the training results
train_results
## # A tibble: 1,168 × 1
## .pred_class
## <fct>
## 1 rain
## 2 sun
## 3 sun
## 4 rain
## 5 rain
## 6 sun
## 7 sun
## 8 rain
## 9 rain
## 10 rain
## # ℹ 1,158 more rows
test_results
## # A tibble: 293 × 1
## .pred_class
## <fct>
## 1 rain
## 2 snow
## 3 snow
## 4 snow
## 5 snow
## 6 rain
## 7 rain
## 8 rain
## 9 sun
## 10 rain
## # ℹ 283 more rows
my_wf <- workflow() %>%
add_model(my_mod) %>%
add_recipe(my_rec)
# Fit the workflow to the training data
fit <- fit(my_wf, data_train)
# Print the fit object
fit
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: multinom_reg()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 0 Recipe Steps
##
## ── Model ───────────────────────────────────────────────────────────────────────
## Call:
## nnet::multinom(formula = ..y ~ ., data = data, trace = ~FALSE)
##
## Coefficients:
## (Intercept) temp_max temp_min wind log_precipitation
## fog 3.035856 0.01800714 0.2543537 0.1913660 2.7186955
## rain 13.163772 -0.58936404 0.5849886 0.9962782 15.5386525
## snow -2.715052 -3.86803531 -3.3271336 1.5239656 19.1788878
## sun 3.045500 1.07381872 -0.5795547 0.7517946 0.4814626
##
## Residual Deviance: 1054.584
## AIC: 1094.584
readr::spec
## function (x)
## {
## stopifnot(inherits(x, "tbl_df"))
## attr(x, "spec")
## }
## <bytecode: 0x0000025f7c1d9060>
## <environment: namespace:readr>
install.packages("parsnip")
## Warning: package 'parsnip' is in use and will not be installed
install.packages("workflows")
## Warning: package 'workflows' is in use and will not be installed
install.packages("yardstick")
## Installing package into 'C:/Users/sivak/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
## package 'yardstick' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'yardstick'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\sivak\AppData\Local\R\win-library\4.3\00LOCK\yardstick\libs\x64\yardstick.dll
## to
## C:\Users\sivak\AppData\Local\R\win-library\4.3\yardstick\libs\x64\yardstick.dll:
## Permission denied
## Warning: restored 'yardstick'
##
## The downloaded binary packages are in
## C:\Users\sivak\AppData\Local\Temp\Rtmp44Faus\downloaded_packages
install.packages("tidymodels")
## Installing package into 'C:/Users/sivak/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
## package 'tidymodels' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\sivak\AppData\Local\Temp\Rtmp44Faus\downloaded_packages
# Load the installed packages
library(parsnip)
library(workflows)
library(yardstick)
## Warning: package 'yardstick' was built under R version 4.3.1
##
## Attaching package: 'yardstick'
## The following object is masked from 'package:readr':
##
## spec
library(tidymodels)
## Warning: package 'tidymodels' was built under R version 4.3.1
## ── Attaching packages ────────────────────────────────────── tidymodels 1.1.1 ──
## ✔ broom 1.0.5 ✔ tibble 3.2.1
## ✔ dials 1.2.0 ✔ tidyr 1.3.0
## ✔ infer 1.0.5 ✔ tune 1.1.2
## ✔ modeldata 1.2.0 ✔ workflowsets 1.0.1
## ✔ purrr 1.0.2
## Warning: package 'dials' was built under R version 4.3.1
## Warning: package 'modeldata' was built under R version 4.3.1
## Warning: package 'purrr' was built under R version 4.3.1
## Warning: package 'tune' was built under R version 4.3.1
## Warning: package 'workflowsets' was built under R version 4.3.1
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ tidyr::extract() masks magrittr::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step() masks stats::step()
## • Use suppressPackageStartupMessages() to eliminate package startup messages
# Define my_mod, my_rec, and train_test_split
# Perform the last_fit operation
final_fit <- last_fit(my_mod, my_rec, split = train_test_split)
final_fit
## # Resampling results
## # Manual resampling
## # A tibble: 1 × 6
## splits id .metrics .notes .predictions .workflow
## <list> <chr> <list> <list> <list> <list>
## 1 <split [1168/293]> train/test split <tibble> <tibble> <tibble> <workflow>
options(scipen = 999)
final_fit %>%
collect_predictions()
## # A tibble: 293 × 10
## id .pred_drizzle .pred_fog .pred_rain .pred_snow .pred_sun .row
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 train/test spl… 2.47e-16 2.15e-13 0.968 0.0317 1.59e-14 2
## 2 train/test spl… 1.76e-15 7.87e-13 0.380 0.620 7.97e-14 14
## 3 train/test spl… 2.92e-17 2.06e-14 0.103 0.897 1.53e-15 17
## 4 train/test spl… 2.62e-20 2.65e-17 0.00169 0.998 9.49e-19 18
## 5 train/test spl… 1.85e-18 9.95e-16 0.00453 0.995 9.32e-18 19
## 6 train/test spl… 4.28e-15 2.06e-12 0.876 0.124 2.77e-13 26
## 7 train/test spl… 5.71e-18 9.71e-15 0.949 0.0515 3.02e-16 29
## 8 train/test spl… 1.11e-13 3.46e-11 0.999 0.00120 1.68e-12 39
## 9 train/test spl… 1.51e- 1 1.74e- 1 0.0517 0.00000425 6.23e- 1 46
## 10 train/test spl… 8.77e-14 2.47e-11 0.954 0.0456 1.58e-12 51
## # ℹ 283 more rows
## # ℹ 3 more variables: .pred_class <fct>, weather <fct>, .config <chr>
for example in the first row predicted _drizzle is very less. model has predicted it as rain with 96% and the actual condition also rain. same way for all rows.
# Install the required packages if you haven't already
install.packages("parsnip")
## Warning: package 'parsnip' is in use and will not be installed
install.packages("workflows")
## Warning: package 'workflows' is in use and will not be installed
install.packages("yardstick")
## Warning: package 'yardstick' is in use and will not be installed
install.packages("tidymodels")
## Warning: package 'tidymodels' is in use and will not be installed
# Load the installed packages
library(parsnip)
library(workflows)
library(yardstick)
library(tidymodels)
# Assuming you have already defined your `my_wf` workflow and `data_train` and `data_test` datasets
# Fit the model using the training data
trained_model <- last_fit(fit, split = train_test_split)
# Evaluate the model on the testing data
testing_results <- collect_metrics(trained_model, data = train_test_split)
# Print the evaluation metrics (e.g., accuracy, ROC AUC)
testing_results
## # A tibble: 2 × 4
## .metric .estimator .estimate .config
## <chr> <chr> <dbl> <chr>
## 1 accuracy multiclass 0.850 Preprocessor1_Model1
## 2 roc_auc hand_till 0.858 Preprocessor1_Model1
# Extract predictions from the trained model
predictions <- collect_predictions(trained_model, data = data_test)
# Calculate accuracy
accuracy_result <- accuracy(predictions, truth = weather, estimate = .pred_class)
# Print the accuracy result
accuracy_result
## # A tibble: 1 × 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 accuracy multiclass 0.850
# Extract predictions from the trained model
predictions <- collect_predictions(trained_model, data = data_test)
# Calculate precision, recall, and F1 score
precision_result <- precision(predictions, truth = weather, estimate = .pred_class)
## Warning: While computing multiclass `precision()`, some levels had no predicted events (i.e. `true_positive + false_positive = 0`).
## Precision is undefined in this case, and those levels will be removed from the averaged result.
## Note that the following number of true events actually occured for each problematic event level:
## 'drizzle': 9
## 'fog': 21
recall_result <- recall(predictions, truth = weather, estimate = .pred_class)
f1_result <- f_meas(predictions, truth = weather, estimate = .pred_class)
## Warning: While computing multiclass `precision()`, some levels had no predicted events (i.e. `true_positive + false_positive = 0`).
## Precision is undefined in this case, and those levels will be removed from the averaged result.
## Note that the following number of true events actually occured for each problematic event level:
## 'drizzle': 9
## 'fog': 21
# Print the precision, recall, and F1 score results
precision_result
## # A tibble: 1 × 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 precision macro 0.905
recall_result
## # A tibble: 1 × 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 recall macro 0.509
f1_result
## # A tibble: 1 × 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 f_meas macro 0.856
I want to try with naive bias model and i will check for accuracy, precision, recall,f1 score.
install.packages("naivebayes")
## Installing package into 'C:/Users/sivak/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
## package 'naivebayes' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\sivak\AppData\Local\Temp\Rtmp44Faus\downloaded_packages
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 4.3.1
## naivebayes 0.9.7 loaded
# Load the required data and libraries
library(naivebayes)
# Assuming 'data_train' is your training data and 'data_test' is your test data
# 'weather' is the target variable, and 'temp_max', 'temp_min', 'wind', and 'log_precipitation' are your predictor variables
# Create and fit the Naive Bayes model
nb_model <- naive_bayes(weather ~ temp_max + temp_min + wind + log_precipitation, data = data_train)
# Predict on the test data
nb_predictions <- predict(nb_model, newdata = data_test)
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
# Print the predicted classes
nb_predictions
## [1] rain snow snow snow snow rain rain rain sun
## [10] rain sun rain rain rain rain snow rain sun
## [19] rain rain rain rain rain rain rain rain sun
## [28] sun sun sun rain rain rain sun rain sun
## [37] sun sun sun sun rain rain sun sun sun
## [46] sun sun sun sun sun sun sun sun sun
## [55] sun sun sun sun sun rain rain sun rain
## [64] rain sun rain rain rain rain sun sun rain
## [73] rain rain rain rain snow snow rain rain rain
## [82] sun rain rain sun sun rain rain rain sun
## [91] rain rain sun sun rain rain sun sun sun
## [100] sun sun rain sun sun rain rain rain sun
## [109] sun rain sun sun sun sun sun sun rain
## [118] sun sun sun sun sun sun sun sun rain
## [127] rain sun sun rain sun rain rain rain rain
## [136] rain rain sun rain sun sun sun sun rain
## [145] rain snow rain sun rain sun sun sun rain
## [154] sun rain rain rain rain rain rain sun rain
## [163] sun sun sun sun sun rain rain sun rain
## [172] sun sun sun sun sun sun sun sun rain
## [181] rain rain sun sun sun rain sun sun sun
## [190] sun rain sun sun sun sun sun sun sun
## [199] rain rain sun sun rain rain rain sun sun
## [208] sun rain rain rain rain sun sun rain sun
## [217] rain rain rain rain sun rain rain sun rain
## [226] sun sun sun sun rain rain rain sun rain
## [235] sun sun sun rain sun sun sun sun sun
## [244] sun sun sun sun sun sun sun sun sun
## [253] sun sun sun sun sun sun rain sun sun
## [262] rain rain rain rain rain sun sun sun sun
## [271] sun rain sun sun sun rain rain rain rain
## [280] rain rain sun sun sun drizzle rain rain rain
## [289] rain rain snow rain sun
## Levels: drizzle fog rain snow sun
# Convert predicted classes to character type
nb_predictions <- as.character(nb_predictions)
# Convert true class labels in test data to character type
true_labels <- as.character(data_test$weather)
# Calculate accuracy
accuracy <- sum(nb_predictions == true_labels) / length(nb_predictions)
# Create a confusion matrix
confusion_matrix <- table(true_labels, nb_predictions)
# Calculate precision, recall, and F1 score
precision <- diag(confusion_matrix) / rowSums(confusion_matrix)
## Warning in diag(confusion_matrix)/rowSums(confusion_matrix): longer object
## length is not a multiple of shorter object length
recall <- diag(confusion_matrix) / colSums(confusion_matrix)
f1_score <- 2 * (precision * recall) / (precision + recall)
## Warning in precision * recall: longer object length is not a multiple of
## shorter object length
## Warning in precision + recall: longer object length is not a multiple of
## shorter object length
# Print the metrics
cat("Accuracy:", accuracy, "\n")
## Accuracy: 0.8395904
cat("Precision:", precision, "\n")
## Precision: 0 0 0.02158273 0 0
cat("Recall:", recall, "\n")
## Recall: 0 0 0.3333333 0
cat("F1 Score:", f1_score, "\n")
## F1 Score: NaN NaN 0.04054054 NaN NaN
so when we compare with naive bias and multi linear logistic regression model. Multi linear logistic regression is best.