20/10/2016
y = a*x + b (onde a e b são constantes)# Data set mtcars: # mpg = milhas por galão (consumo) # hp = horsepower plot(mtcars$hp, mtcars$mpg)
cor.test(mtcars$hp, mtcars$mpg) ## teste de significância
## ## Pearson's product-moment correlation ## ## data: mtcars$hp and mtcars$mpg ## t = -6.7424, df = 30, p-value = 1.788e-07 ## alternative hypothesis: true correlation is not equal to 0 ## 95 percent confidence interval: ## -0.8852686 -0.5860994 ## sample estimates: ## cor ## -0.7761684
# uma correlação pode ser alta e # não ser estatisticamente significativa!
cor.test(mtcars$hp, mtcars$mpg, method="spearman")
## Warning in cor.test.default(mtcars$hp, mtcars$mpg, method = "spearman"): ## Cannot compute exact p-value with ties
## ## Spearman's rank correlation rho ## ## data: mtcars$hp and mtcars$mpg ## S = 10337, p-value = 5.086e-12 ## alternative hypothesis: true rho is not equal to 0 ## sample estimates: ## rho ## -0.8946646
modelo <- lm(mpg ~ hp, data=mtcars) plot(mtcars$hp, mtcars$mpg) abline(modelo, col="red")
b é chamado de Intercept.print(modelo)
## ## Call: ## lm(formula = mpg ~ hp, data = mtcars) ## ## Coefficients: ## (Intercept) hp ## 30.09886 -0.06823
Detalhes do modelo de regressão podem ser obtidos com summary(modelo)
## ## Call: ## lm(formula = mpg ~ hp, data = mtcars) ## ## Residuals: ## Min 1Q Median 3Q Max ## -5.7121 -2.1122 -0.8854 1.5819 8.2360 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 30.09886 1.63392 18.421 < 2e-16 *** ## hp -0.06823 0.01012 -6.742 1.79e-07 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 3.863 on 30 degrees of freedom ## Multiple R-squared: 0.6024, Adjusted R-squared: 0.5892 ## F-statistic: 45.46 on 1 and 30 DF, p-value: 1.788e-07
Na regressão múltipla, consideram-se 2 ou mais variáveis independentes.
modelo <- lm(formula = mpg ~ hp + wt + cyl, data = mtcars) print(modelo)
## ## Call: ## lm(formula = mpg ~ hp + wt + cyl, data = mtcars) ## ## Coefficients: ## (Intercept) hp wt cyl ## 38.75179 -0.01804 -3.16697 -0.94162
## ## Call: ## lm(formula = mpg ~ hp + wt + cyl, data = mtcars) ## ## Residuals: ## Min 1Q Median 3Q Max ## -3.9290 -1.5598 -0.5311 1.1850 5.8986 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 38.75179 1.78686 21.687 < 2e-16 *** ## hp -0.01804 0.01188 -1.519 0.140015 ## wt -3.16697 0.74058 -4.276 0.000199 *** ## cyl -0.94162 0.55092 -1.709 0.098480 . ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 2.512 on 28 degrees of freedom ## Multiple R-squared: 0.8431, Adjusted R-squared: 0.8263 ## F-statistic: 50.17 on 3 and 28 DF, p-value: 2.184e-11
| hp | vs | |
|---|---|---|
| Mazda RX4 | 110 | 0 |
| Mazda RX4 Wag | 110 | 0 |
| Datsun 710 | 93 | 1 |
| Hornet 4 Drive | 110 | 1 |
| Hornet Sportabout | 175 | 0 |
| Valiant | 105 | 1 |
| Duster 360 | 245 | 0 |
| Merc 240D | 62 | 1 |
| Merc 230 | 95 | 1 |
| Merc 280 | 123 | 1 |
| Merc 280C | 123 | 1 |
| Merc 450SE | 180 | 0 |
| Merc 450SL | 180 | 0 |
| Merc 450SLC | 180 | 0 |
| Cadillac Fleetwood | 205 | 0 |
| Lincoln Continental | 215 | 0 |
| Chrysler Imperial | 230 | 0 |
| Fiat 128 | 66 | 1 |
| Honda Civic | 52 | 1 |
| Toyota Corolla | 65 | 1 |
| Toyota Corona | 97 | 1 |
| Dodge Challenger | 150 | 0 |
| AMC Javelin | 150 | 0 |
| Camaro Z28 | 245 | 0 |
| Pontiac Firebird | 175 | 0 |
| Fiat X1-9 | 66 | 1 |
| Porsche 914-2 | 91 | 0 |
| Lotus Europa | 113 | 1 |
| Ford Pantera L | 264 | 0 |
| Ferrari Dino | 175 | 0 |
| Maserati Bora | 335 | 0 |
| Volvo 142E | 109 | 1 |
library(MLmetrics, warn.conflicts = FALSE)
# vs: tipo de motor (0 = motor em V, 1 = motor reto)
# hp: potência do motor, em cavalos
# Cria modelo de regressão logística
logreg <- glm(formula = vs ~ hp,
family = binomial(link = "logit"), data = mtcars)
# Usa o modelo para prever o tipo de motor
dados <- mtcars %>%
select(hp, vs) %>%
mutate(vsPrevisto = ifelse(logreg$fitted.values < 0.5, 0, 1))
| hp | vs | vsPrevisto |
|---|---|---|
| 110 | 0 | 1 |
| 110 | 0 | 1 |
| 93 | 1 | 1 |
| 110 | 1 | 1 |
| 175 | 0 | 0 |
| 105 | 1 | 1 |
| 245 | 0 | 0 |
| 62 | 1 | 1 |
| 95 | 1 | 1 |
| 123 | 1 | 0 |
| 123 | 1 | 0 |
| 180 | 0 | 0 |
| 180 | 0 | 0 |
| 180 | 0 | 0 |
| 205 | 0 | 0 |
| 215 | 0 | 0 |
| 230 | 0 | 0 |
| 66 | 1 | 1 |
| 52 | 1 | 1 |
| 65 | 1 | 1 |
| 97 | 1 | 1 |
| 150 | 0 | 0 |
| 150 | 0 | 0 |
| 245 | 0 | 0 |
| 175 | 0 | 0 |
| 66 | 1 | 1 |
| 91 | 0 | 1 |
| 113 | 1 | 1 |
| 264 | 0 | 0 |
| 175 | 0 | 0 |
| 335 | 0 | 0 |
| 109 | 1 | 1 |
xtabs(~ vsPrevisto + vs, data=dados)
## vs ## vsPrevisto 0 1 ## 0 15 2 ## 1 3 12
Accuracy(y_pred = dados$vsPrevisto, y_true = dados$vs)
## [1] 0.84375
Precision(y_pred = dados$vsPrevisto, y_true = dados$vs)
## [1] 0.8823529
Recall(y_pred = dados$vsPrevisto, y_true = dados$vs)
## [1] 0.8333333
F1_Score(y_pred = dados$vsPrevisto, y_true = dados$vs)
## [1] 0.8571429