library(dplyr)
library(ggplot2)
faithful %>% ggplot( aes(x=eruptions, y=waiting) ) +
geom_point()
fit<-lm(waiting~eruptions,data=faithful)
fit
Call:
lm(formula = waiting ~ eruptions, data = faithful)
Coefficients:
(Intercept) eruptions
33.47 10.73
fit$coefficients
(Intercept) eruptions
33.47440 10.72964
data.frame(observacion=faithful$waiting,
estimacion=fit$fitted.values,
residuo_manual=abs(fit$fitted.values-faithful$waiting),
residuo=fit$residuals)
sum(fit$residuals)
[1] -3.108624e-15
\[RMSE=\sqrt{ \sum_{i=1}^{n} (Y_i-\hat Y_i)^2 }\]
RMSE
[1] 5.892227
Valor RMSE del modelo 6.0878763
summary(faithful)
eruptions waiting
Min. :1.600 Min. :43.0
1st Qu.:2.163 1st Qu.:58.0
Median :4.000 Median :76.0
Mean :3.488 Mean :70.9
3rd Qu.:4.454 3rd Qu.:82.0
Max. :5.100 Max. :96.0
summary(fit)
Call:
lm(formula = waiting ~ eruptions, data = faithful)
Residuals:
Min 1Q Median 3Q Max
-12.0796 -4.4831 0.2122 3.9246 15.9719
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 33.4744 1.1549 28.98 <2e-16 ***
eruptions 10.7296 0.3148 34.09 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.914 on 270 degrees of freedom
Multiple R-squared: 0.8115, Adjusted R-squared: 0.8108
F-statistic: 1162 on 1 and 270 DF, p-value: < 2.2e-16
nrow(faithful)
[1] 272
Shuffling
set.seed(161)
index<-1:nrow(faithful)
shuffle_index<-sample(index)
shuffle_faithful<-faithful[shuffle_index,]
train<-shuffle_faithful[1:(0.7*nrow(faithful)) , ]
test<-shuffle_faithful[(0.7*nrow(faithful)):nrow(faithful),]
nrow(train)
[1] 190
nrow(test)
[1] 82
fit<-lm(data=train, waiting ~ eruptions)
pred_test<-fit$coefficients[1]+fit$coefficients[2]*test$eruptions
length(pred_test)
[1] 82
RMSE<-sqrt( sum( (test$waiting-as.vector(pred_test))^2 )/nrow(test) )
RMSE
[1] 6.087876
delta<-nrow(faithful)/10
test1<-shuffle_faithful[1:delta,]
train1<-shuffle_faithful[-(1:delta),]
test2<-shuffle_faithful[delta:(2*delta),]
train2<-shuffle_faithful[-(delta:(2*delta)),]
test3<-shuffle_faithful[(2*delta):(3*delta),]
train3<-shuffle_faithful[-((2*delta):(3*delta)),]
test4<-shuffle_faithful[(3*delta):(4*delta),]
train4<-shuffle_faithful[-((3*delta):(4*delta)),]
test5<-shuffle_faithful[(4*delta):(5*delta),]
train5<-shuffle_faithful[-((4*delta):(5*delta)),]
test6<-shuffle_faithful[(5*delta):(6*delta),]
train6<-shuffle_faithful[-((5*delta):(6*delta)),]
test7<-shuffle_faithful[(6*delta):(7*delta),]
train7<-shuffle_faithful[-((6*delta):(7*delta)),]
test8<-shuffle_faithful[(7*delta):(8*delta),]
train8<-shuffle_faithful[-((7*delta):(8*delta)),]
test9<-shuffle_faithful[(8*delta):(9*delta),]
train9<-shuffle_faithful[-((8*delta):(9*delta)),]
test10<-shuffle_faithful[(9*delta):(10*delta),]
train10<-shuffle_faithful[-((9*delta):(10*delta)),]
fit1<-lm(data = train1, formula = waiting~eruptions)
fit2<-lm(data = train2, formula = waiting~eruptions)
fit3<-lm(data = train3, formula = waiting~eruptions)
fit4<-lm(data = train4, formula = waiting~eruptions)
fit5<-lm(data = train5, formula = waiting~eruptions)
fit6<-lm(data = train6, formula = waiting~eruptions)
fit7<-lm(data = train7, formula = waiting~eruptions)
fit8<-lm(data = train8, formula = waiting~eruptions)
fit9<-lm(data = train9, formula = waiting~eruptions)
fit10<-lm(data = train10, formula = waiting~eruptions)
ptest1<-fit1$coefficients[1]+fit1$coefficients[2]*test1$eruptions
ptest2<-fit2$coefficients[1]+fit2$coefficients[2]*test2$eruptions
ptest3<-fit3$coefficients[1]+fit3$coefficients[2]*test3$eruptions
ptest4<-fit4$coefficients[1]+fit4$coefficients[2]*test4$eruptions
ptest5<-fit5$coefficients[1]+fit5$coefficients[2]*test5$eruptions
ptest6<-fit6$coefficients[1]+fit6$coefficients[2]*test6$eruptions
ptest7<-fit7$coefficients[1]+fit7$coefficients[2]*test7$eruptions
ptest8<-fit8$coefficients[1]+fit8$coefficients[2]*test8$eruptions
ptest9<-fit9$coefficients[1]+fit9$coefficients[2]*test9$eruptions
ptest10<-fit10$coefficients[1]+fit10$coefficients[2]*test10$eruptions
RMSE1<-sqrt( sum( (test1$waiting-as.vector(ptest1))^2 )/nrow(test1) )
RMSE2<-sqrt( sum( (test2$waiting-as.vector(ptest2))^2 )/nrow(test2) )
RMSE3<-sqrt( sum( (test3$waiting-as.vector(ptest3))^2 )/nrow(test3) )
RMSE4<-sqrt( sum( (test4$waiting-as.vector(ptest4))^2 )/nrow(test4) )
RMSE5<-sqrt( sum( (test5$waiting-as.vector(ptest5))^2 )/nrow(test5) )
RMSE6<-sqrt( sum( (test6$waiting-as.vector(ptest6))^2 )/nrow(test6) )
RMSE7<-sqrt( sum( (test7$waiting-as.vector(ptest7))^2 )/nrow(test7) )
RMSE8<-sqrt( sum( (test8$waiting-as.vector(ptest8))^2 )/nrow(test8) )
RMSE9<-sqrt( sum( (test9$waiting-as.vector(ptest9))^2 )/nrow(test9) )
RMSE10<-sqrt( sum( (test10$waiting-as.vector(ptest10))^2 )/nrow(test10) )
c(RMSE1,RMSE2,RMSE3,RMSE4,RMSE5,RMSE6,RMSE7,RMSE8,RMSE9,RMSE10)%>%mean()
[1] 5.93506
fit<-lm(data = faithful, waiting~eruptions)
summary(fit)
Call:
lm(formula = waiting ~ eruptions, data = faithful)
Residuals:
Min 1Q Median 3Q Max
-12.0796 -4.4831 0.2122 3.9246 15.9719
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 33.4744 1.1549 28.98 <2e-16 ***
eruptions 10.7296 0.3148 34.09 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.914 on 270 degrees of freedom
Multiple R-squared: 0.8115, Adjusted R-squared: 0.8108
F-statistic: 1162 on 1 and 270 DF, p-value: < 2.2e-16