Derek Corcoran
"19/10, 2017"
library(dplyr)
TempHum <- readRDS("TempHum.rds")
Pun <- TempHum %>% filter(Ciudad_localidad == "Punta Arenas")
Lineal <- lm(TempMedia ~ mes, data = Pun)
Cuad <- lm(TempMedia ~ mes + I(mes^2), data = Pun)
stargazer::stargazer(Lineal, Cuad, type = "html", single.row = TRUE, model.names = TRUE, model.numbers = FALSE)
Dependent variable: | ||
TempMedia | ||
(1) | (2) | |
mes | -0.199*** (0.048) | -3.987*** (0.084) |
I(mes2) | 0.291*** (0.006) | |
Constant | 7.445*** (0.350) | 16.283*** (0.237) |
Observations | 420 | 420 |
R2 | 0.040 | 0.845 |
Adjusted R2 | 0.038 | 0.844 |
Residual Std. Error | 3.364 (df = 418) | 1.355 (df = 417) |
F Statistic | 17.564*** (df = 1; 418) | 1,133.228*** (df = 2; 417) |
Note: | *p<0.1; **p<0.05; ***p<0.01 |
library(broom)
data("mtcars")
Eficiencia <- lm(mpg ~ wt , data = mtcars)
tidy(Eficiencia)
term estimate std.error statistic p.value
1 (Intercept) 37.285126 1.877627 19.857575 8.241799e-19
2 wt -5.344472 0.559101 -9.559044 1.293959e-10
glance(Eficiencia)
r.squared adj.r.squared sigma statistic p.value df logLik
1 0.7528328 0.7445939 3.045882 91.37533 1.293959e-10 2 -80.01471
AIC BIC deviance df.residual
1 166.0294 170.4266 278.3219 30
tidy(Eficiencia)
term estimate std.error statistic p.value
1 (Intercept) 37.285126 1.877627 19.857575 8.241799e-19
2 wt -5.344472 0.559101 -9.559044 1.293959e-10
library(ggplot2)
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_smooth(method = "lm")
MasDatos <- augment(Eficiencia)
knitr::kable(head(MasDatos, 10))
.rownames | mpg | wt | .fitted | .se.fit | .resid | .hat | .sigma | .cooksd | .std.resid |
---|---|---|---|---|---|---|---|---|---|
Mazda RX4 | 21.0 | 2.620 | 23.28261 | 0.6335798 | -2.2826106 | 0.0432690 | 3.067494 | 0.0132741 | -0.7661677 |
Mazda RX4 Wag | 21.0 | 2.875 | 21.91977 | 0.5714319 | -0.9197704 | 0.0351968 | 3.093068 | 0.0017240 | -0.3074305 |
Datsun 710 | 22.8 | 2.320 | 24.88595 | 0.7359177 | -2.0859521 | 0.0583757 | 3.072127 | 0.0154394 | -0.7057525 |
Hornet 4 Drive | 21.4 | 3.215 | 20.10265 | 0.5384424 | 1.2973499 | 0.0312502 | 3.088268 | 0.0030206 | 0.4327511 |
Hornet Sportabout | 18.7 | 3.440 | 18.90014 | 0.5526562 | -0.2001440 | 0.0329218 | 3.097722 | 0.0000760 | -0.0668188 |
Valiant | 18.1 | 3.460 | 18.79325 | 0.5552829 | -0.6932545 | 0.0332355 | 3.095184 | 0.0009211 | -0.2314831 |
Duster 360 | 14.3 | 3.570 | 18.20536 | 0.5734244 | -3.9053627 | 0.0354426 | 3.008664 | 0.0313139 | -1.3055222 |
Merc 240D | 24.4 | 3.190 | 20.23626 | 0.5386565 | 4.1637381 | 0.0312750 | 2.996697 | 0.0311392 | 1.3888971 |
Merc 230 | 22.8 | 3.150 | 20.45004 | 0.5397522 | 2.3499593 | 0.0314024 | 3.066058 | 0.0099619 | 0.7839269 |
Merc 280 | 19.2 | 3.440 | 18.90014 | 0.5526562 | 0.2998560 | 0.0329218 | 3.097435 | 0.0001706 | 0.1001080 |
library(ggplot2)
ggplot(MasDatos, aes(x = wt, y = mpg)) + geom_point(aes(color = "blue")) + geom_point(aes(color = "red",y = .fitted)) + scale_color_manual(name = "Residuales", values = c("blue", "red"), labels = c("Observado", "Predicho")) + geom_segment(aes(xend = wt, yend = .fitted))
hist(MasDatos$.resid)
shapiro.test(MasDatos$.resid)
Shapiro-Wilk normality test
data: MasDatos$.resid
W = 0.94508, p-value = 0.1044
data("Titanic")
library(epitools)
Titanic2 <- expand.table(Titanic)
Titanic2$Survived <- ifelse(Titanic2$Survived == "Yes", 1, 0)
knitr::kable(head(Titanic2))
Class | Sex | Age | Survived |
---|---|---|---|
1st | Male | Child | 1 |
1st | Male | Child | 1 |
1st | Male | Child | 1 |
1st | Male | Child | 1 |
1st | Male | Child | 1 |
1st | Male | Adult | 0 |
library(ggplot2)
ggplot(Titanic2, aes(x = Class, y = Survived)) + geom_violin(aes(fill = Sex))
ModeloTitanic <- glm(Survived ~.,family=binomial() ,data = Titanic2)
stargazer::stargazer(ModeloTitanic, type = "html", single.row = TRUE)
Dependent variable: | |
Survived | |
Class2nd | -1.018*** (0.196) |
Class3rd | -1.778*** (0.172) |
ClassCrew | -0.858*** (0.157) |
SexFemale | 2.420*** (0.140) |
AgeAdult | -1.062*** (0.244) |
Constant | 0.685** (0.273) |
Observations | 2,201 |
Log Likelihood | -1,105.031 |
Akaike Inf. Crit. | 2,222.061 |
Note: | *p<0.1; **p<0.05; ***p<0.01 |
library(MuMIn)
library(kableExtra)
library(knitr)
options(na.action = "na.fail")
Select <- dredge(Eficiencia)
Select <-subset(Select, delta <= 2)
Mejor <- get.models(Select, subset = 1)[[1]]
Select <-as.data.frame(Select)
Select <- Select[,colSums(is.na(Select))<nrow(Select)]
kable(Select, digits = 2)
(Intercept) | am | carb | cyl | hp | qsec | wt | df | logLik | AICc | delta | weight | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
642 | 9.62 | 2.94 | NA | NA | NA | 1.23 | -3.92 | 5 | -72.06 | 156.43 | 0.00 | 0.23 |
517 | 39.69 | NA | NA | -1.51 | NA | NA | -3.19 | 4 | -74.01 | 157.49 | 1.06 | 0.13 |
706 | 17.44 | 2.93 | NA | NA | -0.02 | 0.81 | -3.24 | 6 | -71.16 | 157.69 | 1.26 | 0.12 |
581 | 38.75 | NA | NA | -0.94 | -0.02 | NA | -3.17 | 5 | -72.74 | 157.78 | 1.36 | 0.12 |
644 | 12.90 | 3.51 | -0.49 | NA | NA | 1.02 | -3.43 | 6 | -71.28 | 157.92 | 1.50 | 0.11 |
519 | 39.60 | NA | -0.49 | -1.29 | NA | NA | -3.16 | 5 | -72.81 | 157.93 | 1.50 | 0.11 |
577 | 37.23 | NA | NA | NA | -0.03 | NA | -3.88 | 4 | -74.33 | 158.13 | 1.71 | 0.10 |
641 | 19.75 | NA | NA | NA | NA | 0.93 | -5.05 | 4 | -74.36 | 158.20 | 1.77 | 0.09 |
library(MuMIn)
options(na.action = "na.fail")
select <- dredge(ModeloTitanic)
select <-subset(select, delta <= 20)
knitr::kable(select)
(Intercept) | Age | Class | Sex | df | logLik | AICc | delta | weight | |
---|---|---|---|---|---|---|---|---|---|
8 | 0.6853195 | + | + | + | 6 | -1105.031 | 2222.099 | 0.00000 | 0.9997797 |
7 | -0.3531200 | NA | + | + | 5 | -1114.456 | 2238.940 | 16.84076 | 0.0002203 |
library(MuMIn)
options(na.action = "na.fail")
select <- dredge(glm(TempMedia ~ mes + I(mes^2) + I(mes^3) + I(mes^4) + I(mes^5), data = Pun))
select <-subset(select, delta <= 2)
knitr::kable(select)
(Intercept) | mes | I(mes2) | I(mes3) | I(mes4) | I(mes5) | df | logLik | AICc | delta | weight | |
---|---|---|---|---|---|---|---|---|---|---|---|
16 | 7.867330 | 4.954460 | -2.353265 | 0.2915816 | -0.0107301 | NA | 6 | -540.4758 | 1093.155 | 0.000000 | 0.7289686 |
32 | 8.022727 | 4.723586 | -2.247572 | 0.2713073 | -0.0090162 | -5.27e-05 | 7 | -540.4310 | 1095.134 | 1.978792 | 0.2710314 |
library(broom)
mejor <-glm(TempMedia ~ mes + I(mes^2) + I(mes^3) + I(mes^4), data = Pun)
MasDatos <-augment(mejor)
masomenos <- glm(TempMedia ~ mes + I(mes^2), data = Pun)
MasoMenosDatos <-augment(masomenos)
library(ggplot2)
MejorGraph <- ggplot(MasDatos, aes(x = mes, y = TempMedia)) + geom_point(color = "blue") + geom_line(aes(y = .fitted), color = "red")
MasMenosGraph <- ggplot(MasoMenosDatos, aes(x = mes, y = TempMedia)) + geom_point(color = "blue") + geom_line(aes(y = .fitted), color = "red")
library(gridExtra)
grid.arrange(MejorGraph, MasMenosGraph, ncol = 1)
library(caret)
Eficiencia <- train(mpg ~ wt, method = "lm", data = mtcars)
glance(Eficiencia$finalModel)
r.squared adj.r.squared sigma statistic p.value df logLik
1 0.7528328 0.7445939 3.045882 91.37533 1.293959e-10 2 -80.01471
AIC BIC deviance df.residual
1 166.0294 170.4266 278.3219 30
library(caret)
Eficiencia2 <- train(mpg ~ wt, method = "glm", data = mtcars)
glance(Eficiencia2$finalModel)
null.deviance df.null logLik AIC BIC deviance df.residual
1 1126.047 31 -80.01471 166.0294 170.4266 278.3219 30
library(caret)
Eficiencia3 <- train(mpg ~ wt, method = "bagEarth", data = mtcars)
Eficiencia3$results
degree nprune RMSE Rsquared RMSESD RsquaredSD
1 1 2 3.089513 0.7582406 0.8052680 0.1126395
2 1 3 3.039855 0.7588059 0.7055485 0.1055907
3 1 5 3.011795 0.7628268 0.7078696 0.1002260
library(caret)
Especies <- train(Species ~. , method = "glm", data = iris)
library(caret)
Especies <- train(Species ~. , method = "rpart", data = iris)
Especies$results
cp Accuracy Kappa AccuracySD KappaSD
1 0.00 0.9339355 0.9004116 0.0287348 0.04298657
2 0.44 0.8333194 0.7551549 0.1529923 0.21978473
3 0.50 0.4662235 0.2253695 0.1592222 0.22421450
library(rattle)
library(rpart.plot)
rpart.plot(Especies$finalModel)