library(readr)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
## Cargando paquete requerido: ggplot2
## Cargando paquete requerido: lattice
library(rsample)
##
## Adjuntando el paquete: 'rsample'
## The following object is masked from 'package:caret':
##
## calibration
Sleep<- read_csv("Sleep_Efficiency.csv")
## Rows: 452 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gender, Smoking status
## dbl (11): ID, Age, Sleep duration, Sleep efficiency, REM sleep percentage, ...
## dttm (2): Bedtime, Wakeup time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Sleep=as.data.frame(unclass(Sleep),
stringsAsFactors = TRUE)
View(Sleep)
names(Sleep)
## [1] "ID" "Age" "Gender"
## [4] "Bedtime" "Wakeup.time" "Sleep.duration"
## [7] "Sleep.efficiency" "REM.sleep.percentage" "Deep.sleep.percentage"
## [10] "Light.sleep.percentage" "Awakenings" "Caffeine.consumption"
## [13] "Alcohol.consumption" "Smoking.status" "Exercise.frequency"
##V-Fold Cross-Validation
dim(Sleep)
## [1] 452 15
vfold_cv(Sleep, v=10)
## # 10-fold cross-validation
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [406/46]> Fold01
## 2 <split [406/46]> Fold02
## 3 <split [407/45]> Fold03
## 4 <split [407/45]> Fold04
## 5 <split [407/45]> Fold05
## 6 <split [407/45]> Fold06
## 7 <split [407/45]> Fold07
## 8 <split [407/45]> Fold08
## 9 <split [407/45]> Fold09
## 10 <split [407/45]> Fold10
vfold_x=vfold_cv(Sleep, v=10)
###
split=initial_split(Sleep, prop=0.8,
strata="Sleep.duration")
Sleep_train=training(split)
Sleep_test=testing(split)
model1=lm(Sleep.duration~Sleep.efficiency,
data=Sleep_train)
summary(model1)
##
## Call:
## lm(formula = Sleep.duration ~ Sleep.efficiency, data = Sleep_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.46086 -0.45304 0.04491 0.54886 2.55068
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.47666 0.26865 27.83 <2e-16 ***
## Sleep.efficiency -0.03038 0.33678 -0.09 0.928
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8655 on 358 degrees of freedom
## Multiple R-squared: 2.272e-05, Adjusted R-squared: -0.002771
## F-statistic: 0.008136 on 1 and 358 DF, p-value: 0.9282
sigma(model1)#RMSE
## [1] 0.8655271
sigma(model1)^2#MSE
## [1] 0.7491372
confint(model1, level=0.95)
## 2.5 % 97.5 %
## (Intercept) 6.9483210 8.0049944
## Sleep.efficiency -0.6926822 0.6319298
##caret
#model1
cv_model1=train(
form=Sleep.duration~Sleep.efficiency,
data=Sleep_train,
method="lm",
trControl=trainControl(method="cv", #cross validation
number=10)
)
cv_model1$results
## intercept RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 TRUE 0.8580938 0.01318794 0.6533849 0.1133707 0.01929105 0.0703815
cv_model1$results$RMSE
## [1] 0.8580938
cv_model1$finalModel
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Coefficients:
## (Intercept) Sleep.efficiency
## 7.47666 -0.03038
cv_model1$resample
## RMSE Rsquared MAE Resample
## 1 0.6608008 4.093091e-03 0.5341207 Fold01
## 2 0.8812110 6.636744e-02 0.6443924 Fold02
## 3 0.8328914 5.876394e-03 0.6328331 Fold03
## 4 0.7072253 1.163005e-02 0.5611163 Fold04
## 5 0.9230607 1.172550e-02 0.7121637 Fold05
## 6 0.9400367 3.661831e-03 0.6991950 Fold06
## 7 1.0123186 1.513574e-02 0.7427958 Fold07
## 8 0.8234389 1.034693e-05 0.6563831 Fold08
## 9 0.8181524 1.042135e-02 0.6170595 Fold09
## 10 0.9818026 2.957698e-03 0.7337900 Fold10
cv_model1
## Linear Regression
##
## 360 samples
## 1 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 324, 323, 324, 324, 323, 324, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.8580938 0.01318794 0.6533849
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
#model2
cv_model2=train(
form=Sleep.duration~Sleep.efficiency+
REM.sleep.percentage,
data=Sleep_train,
method="lm",
trControl=trainControl(method="cv", #cross validation
number=10)
)
cv_model2$results$RMSE
## [1] 0.8658866
cv_model2$finalModel
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Coefficients:
## (Intercept) Sleep.efficiency REM.sleep.percentage
## 7.631772 -0.013089 -0.007468
cv_model2$resample
## RMSE Rsquared MAE Resample
## 1 0.7851389 1.281089e-01 0.6323213 Fold01
## 2 0.7364212 1.397875e-03 0.5916214 Fold02
## 3 0.9252839 7.179172e-04 0.6902221 Fold03
## 4 0.7904127 1.034620e-03 0.6121849 Fold04
## 5 0.8775245 5.746887e-03 0.6401153 Fold05
## 6 0.8511409 3.906858e-02 0.6414378 Fold06
## 7 0.9036466 1.110634e-01 0.7014613 Fold07
## 8 1.0310877 3.728090e-04 0.7580479 Fold08
## 9 0.8391072 1.004516e-01 0.6351740 Fold09
## 10 0.9191021 1.086389e-05 0.7006501 Fold10
cv_model2
## Linear Regression
##
## 360 samples
## 2 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 324, 324, 324, 325, 323, 323, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.8658866 0.03879734 0.6603236
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
predict(cv_model1,
Sleep_test %>% slice(1:5))
## 1 2 3 4 5
## 7.453572 7.449015 7.459951 7.451749 7.451445
predict(cv_model2,
Sleep_test %>% slice(1:5))
## 1 2 3 4 5
## 7.420176 7.470491 7.452798 7.426859 7.456602