library(readr)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(caret)
## Cargando paquete requerido: ggplot2
## Cargando paquete requerido: lattice
library(rsample)
## 
## Adjuntando el paquete: 'rsample'
## The following object is masked from 'package:caret':
## 
##     calibration
Sleep<- read_csv("Sleep_Efficiency.csv")
## Rows: 452 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (2): Gender, Smoking status
## dbl  (11): ID, Age, Sleep duration, Sleep efficiency, REM sleep percentage, ...
## dttm  (2): Bedtime, Wakeup time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Sleep=as.data.frame(unclass(Sleep),
                    stringsAsFactors = TRUE)
View(Sleep)
names(Sleep)
##  [1] "ID"                     "Age"                    "Gender"                
##  [4] "Bedtime"                "Wakeup.time"            "Sleep.duration"        
##  [7] "Sleep.efficiency"       "REM.sleep.percentage"   "Deep.sleep.percentage" 
## [10] "Light.sleep.percentage" "Awakenings"             "Caffeine.consumption"  
## [13] "Alcohol.consumption"    "Smoking.status"         "Exercise.frequency"
##V-Fold Cross-Validation
dim(Sleep)
## [1] 452  15
vfold_cv(Sleep, v=10)
## #  10-fold cross-validation 
## # A tibble: 10 × 2
##    splits           id    
##    <list>           <chr> 
##  1 <split [406/46]> Fold01
##  2 <split [406/46]> Fold02
##  3 <split [407/45]> Fold03
##  4 <split [407/45]> Fold04
##  5 <split [407/45]> Fold05
##  6 <split [407/45]> Fold06
##  7 <split [407/45]> Fold07
##  8 <split [407/45]> Fold08
##  9 <split [407/45]> Fold09
## 10 <split [407/45]> Fold10
vfold_x=vfold_cv(Sleep, v=10)


###
split=initial_split(Sleep, prop=0.8,
                    strata="Sleep.duration")
Sleep_train=training(split)
Sleep_test=testing(split)

model1=lm(Sleep.duration~Sleep.efficiency, 
          data=Sleep_train)
summary(model1)
## 
## Call:
## lm(formula = Sleep.duration ~ Sleep.efficiency, data = Sleep_train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.46086 -0.45304  0.04491  0.54886  2.55068 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       7.47666    0.26865   27.83   <2e-16 ***
## Sleep.efficiency -0.03038    0.33678   -0.09    0.928    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8655 on 358 degrees of freedom
## Multiple R-squared:  2.272e-05,  Adjusted R-squared:  -0.002771 
## F-statistic: 0.008136 on 1 and 358 DF,  p-value: 0.9282
sigma(model1)#RMSE
## [1] 0.8655271
sigma(model1)^2#MSE
## [1] 0.7491372
confint(model1, level=0.95)
##                       2.5 %    97.5 %
## (Intercept)       6.9483210 8.0049944
## Sleep.efficiency -0.6926822 0.6319298
##caret
#model1
cv_model1=train(
  form=Sleep.duration~Sleep.efficiency,
  data=Sleep_train,
  method="lm",
  trControl=trainControl(method="cv", #cross validation
                         number=10)
)
cv_model1$results
##   intercept      RMSE   Rsquared       MAE    RMSESD RsquaredSD     MAESD
## 1      TRUE 0.8580938 0.01318794 0.6533849 0.1133707 0.01929105 0.0703815
cv_model1$results$RMSE
## [1] 0.8580938
cv_model1$finalModel
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Coefficients:
##      (Intercept)  Sleep.efficiency  
##          7.47666          -0.03038
cv_model1$resample
##         RMSE     Rsquared       MAE Resample
## 1  0.6608008 4.093091e-03 0.5341207   Fold01
## 2  0.8812110 6.636744e-02 0.6443924   Fold02
## 3  0.8328914 5.876394e-03 0.6328331   Fold03
## 4  0.7072253 1.163005e-02 0.5611163   Fold04
## 5  0.9230607 1.172550e-02 0.7121637   Fold05
## 6  0.9400367 3.661831e-03 0.6991950   Fold06
## 7  1.0123186 1.513574e-02 0.7427958   Fold07
## 8  0.8234389 1.034693e-05 0.6563831   Fold08
## 9  0.8181524 1.042135e-02 0.6170595   Fold09
## 10 0.9818026 2.957698e-03 0.7337900   Fold10
cv_model1
## Linear Regression 
## 
## 360 samples
##   1 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 324, 323, 324, 324, 323, 324, ... 
## Resampling results:
## 
##   RMSE       Rsquared    MAE      
##   0.8580938  0.01318794  0.6533849
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
#model2
cv_model2=train(
  form=Sleep.duration~Sleep.efficiency+
    REM.sleep.percentage,
  data=Sleep_train,
  method="lm",
  trControl=trainControl(method="cv", #cross validation
                         number=10)
)
cv_model2$results$RMSE
## [1] 0.8658866
cv_model2$finalModel
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Coefficients:
##          (Intercept)      Sleep.efficiency  REM.sleep.percentage  
##             7.631772             -0.013089             -0.007468
cv_model2$resample
##         RMSE     Rsquared       MAE Resample
## 1  0.7851389 1.281089e-01 0.6323213   Fold01
## 2  0.7364212 1.397875e-03 0.5916214   Fold02
## 3  0.9252839 7.179172e-04 0.6902221   Fold03
## 4  0.7904127 1.034620e-03 0.6121849   Fold04
## 5  0.8775245 5.746887e-03 0.6401153   Fold05
## 6  0.8511409 3.906858e-02 0.6414378   Fold06
## 7  0.9036466 1.110634e-01 0.7014613   Fold07
## 8  1.0310877 3.728090e-04 0.7580479   Fold08
## 9  0.8391072 1.004516e-01 0.6351740   Fold09
## 10 0.9191021 1.086389e-05 0.7006501   Fold10
cv_model2
## Linear Regression 
## 
## 360 samples
##   2 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 324, 324, 324, 325, 323, 323, ... 
## Resampling results:
## 
##   RMSE       Rsquared    MAE      
##   0.8658866  0.03879734  0.6603236
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
predict(cv_model1, 
        Sleep_test %>% slice(1:5))
##        1        2        3        4        5 
## 7.453572 7.449015 7.459951 7.451749 7.451445
predict(cv_model2, 
        Sleep_test %>% slice(1:5))
##        1        2        3        4        5 
## 7.420176 7.470491 7.452798 7.426859 7.456602