require(DT)
require(tidyverse)
#install.packages("readxl")
require("readxl")
Dados dos 6 automóveis
Referência
C. W. Ahn, K. C. Chae, and G. M. Clark, “Estimating parameters of the power law process with two measures of failure time”. Journal of Quality Technology, 1998, vol. 30, iss. 2,pp. 127-132.
dados=read_excel("dados_automoveis.xls")
datatable(dados)
Análise gráfica
Temos uma linha média e os carros estão em torno desta linha média, alguns “mais acelerados” e outros “menos acelerados”
dados %>%
ggplot(aes(x = Age, y = Mil,color = car))+
geom_point() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
model_medio=lm(Mil ~ Age,data=dados)
summary(model_medio)
##
## Call:
## lm(formula = Mil ~ Age, data = dados)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26058 -8311 1197 7506 21753
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -296.571 2678.812 -0.111 0.912
## Age 35.762 2.904 12.315 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11670 on 98 degrees of freedom
## Multiple R-squared: 0.6075, Adjusted R-squared: 0.6035
## F-statistic: 151.7 on 1 and 98 DF, p-value: < 2.2e-16
plot(model_medio)
shapiro.test(residuals(model_medio))
##
## Shapiro-Wilk normality test
##
## data: residuals(model_medio)
## W = 0.9767, p-value = 0.07331
È interessante separar os carros: cada um tem uma linha média de regressão: Milhas x Idade
dados %>%
ggplot(aes (x = Age, y = Mil, color = as.factor(car)))+
geom_point() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
Estimando o coeficiente angular da linha de regressão de cada carro:
model=list()
shapiro=list()
i=1
while(i<=6){
model[[i]]=lm(Mil ~ Age, data=dados[dados$car==i,])
shapiro[[i]]=shapiro.test(residuals(model[[i]]))
plot(model[[i]])
i=i+1
}
model
## [[1]]
##
## Call:
## lm(formula = Mil ~ Age, data = dados[dados$car == i, ])
##
## Coefficients:
## (Intercept) Age
## 6581.60 17.02
##
##
## [[2]]
##
## Call:
## lm(formula = Mil ~ Age, data = dados[dados$car == i, ])
##
## Coefficients:
## (Intercept) Age
## -5155.18 30.35
##
##
## [[3]]
##
## Call:
## lm(formula = Mil ~ Age, data = dados[dados$car == i, ])
##
## Coefficients:
## (Intercept) Age
## -8987.2 55.2
##
##
## [[4]]
##
## Call:
## lm(formula = Mil ~ Age, data = dados[dados$car == i, ])
##
## Coefficients:
## (Intercept) Age
## -8579.02 47.01
##
##
## [[5]]
##
## Call:
## lm(formula = Mil ~ Age, data = dados[dados$car == i, ])
##
## Coefficients:
## (Intercept) Age
## 2311.81 14.69
##
##
## [[6]]
##
## Call:
## lm(formula = Mil ~ Age, data = dados[dados$car == i, ])
##
## Coefficients:
## (Intercept) Age
## 3251.69 45.48
shapiro
## [[1]]
##
## Shapiro-Wilk normality test
##
## data: residuals(model[[i]])
## W = 0.9636, p-value = 0.7273
##
##
## [[2]]
##
## Shapiro-Wilk normality test
##
## data: residuals(model[[i]])
## W = 0.93418, p-value = 0.3861
##
##
## [[3]]
##
## Shapiro-Wilk normality test
##
## data: residuals(model[[i]])
## W = 0.97634, p-value = 0.8648
##
##
## [[4]]
##
## Shapiro-Wilk normality test
##
## data: residuals(model[[i]])
## W = 0.89508, p-value = 0.0472
##
##
## [[5]]
##
## Shapiro-Wilk normality test
##
## data: residuals(model[[i]])
## W = 0.93582, p-value = 0.3327
##
##
## [[6]]
##
## Shapiro-Wilk normality test
##
## data: residuals(model[[i]])
## W = 0.94319, p-value = 0.3579
Outro modo: Modelo Linear com carro sendo uma variável dummy
model_2=list()
i=1
while(i<=6){
temp = dados %>%
mutate(car=relevel(as.factor(car), i))
model_2[[i]] <- lm(Mil ~ Age*car, temp)
print(summary(model_2[[i]]))
i=i+1
}
##
## Call:
## lm(formula = Mil ~ Age * car, data = temp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7062.2 -1250.2 -51.3 1448.6 6480.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6581.602 1183.245 5.562 2.82e-07 ***
## Age 17.017 1.586 10.727 < 2e-16 ***
## car2 -11736.783 2101.661 -5.585 2.57e-07 ***
## car3 -15568.801 2162.675 -7.199 1.94e-10 ***
## car4 -15160.623 1935.449 -7.833 1.01e-11 ***
## car5 -4269.793 1838.659 -2.322 0.0225 *
## car6 -3329.911 1736.954 -1.917 0.0585 .
## Age:car2 13.338 2.397 5.565 2.79e-07 ***
## Age:car3 38.179 2.366 16.139 < 2e-16 ***
## Age:car4 29.989 2.244 13.365 < 2e-16 ***
## Age:car5 -2.326 2.178 -1.068 0.2885
## Age:car6 28.468 2.210 12.879 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2531 on 88 degrees of freedom
## Multiple R-squared: 0.9834, Adjusted R-squared: 0.9814
## F-statistic: 474.6 on 11 and 88 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = Mil ~ Age * car, data = temp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7062.2 -1250.2 -51.3 1448.6 6480.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5155.181 1736.925 -2.968 0.003861 **
## Age 30.354 1.797 16.896 < 2e-16 ***
## car1 11736.783 2101.661 5.585 2.57e-07 ***
## car3 -3832.018 2508.785 -1.527 0.130238
## car4 -3423.840 2315.773 -1.478 0.142848
## car5 7466.990 2235.510 3.340 0.001230 **
## car6 8406.872 2152.637 3.905 0.000184 ***
## Age:car1 -13.338 2.397 -5.565 2.79e-07 ***
## Age:car3 24.841 2.511 9.891 6.02e-16 ***
## Age:car4 16.651 2.397 6.946 6.19e-10 ***
## Age:car5 -15.663 2.336 -6.706 1.85e-09 ***
## Age:car6 15.130 2.366 6.395 7.49e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2531 on 88 degrees of freedom
## Multiple R-squared: 0.9834, Adjusted R-squared: 0.9814
## F-statistic: 474.6 on 11 and 88 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = Mil ~ Age * car, data = temp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7062.2 -1250.2 -51.3 1448.6 6480.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8987.200 1810.274 -4.965 3.34e-06 ***
## Age 55.195 1.755 31.451 < 2e-16 ***
## car1 15568.801 2162.675 7.199 1.94e-10 ***
## car2 3832.018 2508.785 1.527 0.130238
## car4 408.178 2371.284 0.172 0.863728
## car5 11299.009 2292.965 4.928 3.88e-06 ***
## car6 12238.891 2212.246 5.532 3.21e-07 ***
## Age:car1 -38.179 2.366 -16.139 < 2e-16 ***
## Age:car2 -24.841 2.511 -9.891 6.02e-16 ***
## Age:car4 -8.190 2.366 -3.461 0.000832 ***
## Age:car5 -40.504 2.304 -17.581 < 2e-16 ***
## Age:car6 -9.711 2.334 -4.160 7.38e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2531 on 88 degrees of freedom
## Multiple R-squared: 0.9834, Adjusted R-squared: 0.9814
## F-statistic: 474.6 on 11 and 88 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = Mil ~ Age * car, data = temp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7062.2 -1250.2 -51.3 1448.6 6480.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8579.021 1531.631 -5.601 2.39e-07 ***
## Age 47.006 1.587 29.620 < 2e-16 ***
## car1 15160.623 1935.449 7.833 1.01e-11 ***
## car2 3423.840 2315.773 1.478 0.142848
## car3 -408.178 2371.284 -0.172 0.863728
## car5 10890.830 2080.022 5.236 1.11e-06 ***
## car6 11830.712 1990.687 5.943 5.50e-08 ***
## Age:car1 -29.989 2.244 -13.365 < 2e-16 ***
## Age:car2 -16.651 2.397 -6.946 6.19e-10 ***
## Age:car3 8.190 2.366 3.461 0.000832 ***
## Age:car5 -32.315 2.179 -14.833 < 2e-16 ***
## Age:car6 -1.521 2.211 -0.688 0.493156
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2531 on 88 degrees of freedom
## Multiple R-squared: 0.9834, Adjusted R-squared: 0.9814
## F-statistic: 474.6 on 11 and 88 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = Mil ~ Age * car, data = temp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7062.2 -1250.2 -51.3 1448.6 6480.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2311.809 1407.337 1.643 0.10402
## Age 14.691 1.493 9.843 7.55e-16 ***
## car1 4269.793 1838.659 2.322 0.02253 *
## car2 -7466.990 2235.510 -3.340 0.00123 **
## car3 -11299.009 2292.965 -4.928 3.88e-06 ***
## car4 -10890.830 2080.022 -5.236 1.11e-06 ***
## car6 939.882 1896.717 0.496 0.62146
## Age:car1 2.326 2.178 1.068 0.28854
## Age:car2 15.663 2.336 6.706 1.85e-09 ***
## Age:car3 40.504 2.304 17.581 < 2e-16 ***
## Age:car4 32.315 2.179 14.833 < 2e-16 ***
## Age:car6 30.793 2.144 14.362 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2531 on 88 degrees of freedom
## Multiple R-squared: 0.9834, Adjusted R-squared: 0.9814
## F-statistic: 474.6 on 11 and 88 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = Mil ~ Age * car, data = temp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7062.2 -1250.2 -51.3 1448.6 6480.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3251.691 1271.589 2.557 0.012265 *
## Age 45.484 1.539 29.548 < 2e-16 ***
## car1 3329.911 1736.954 1.917 0.058470 .
## car2 -8406.872 2152.637 -3.905 0.000184 ***
## car3 -12238.891 2212.246 -5.532 3.21e-07 ***
## car4 -11830.712 1990.687 -5.943 5.50e-08 ***
## car5 -939.882 1896.717 -0.496 0.621461
## Age:car1 -28.468 2.210 -12.879 < 2e-16 ***
## Age:car2 -15.130 2.366 -6.395 7.49e-09 ***
## Age:car3 9.711 2.334 4.160 7.38e-05 ***
## Age:car4 1.521 2.211 0.688 0.493156
## Age:car5 -30.793 2.144 -14.362 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2531 on 88 degrees of freedom
## Multiple R-squared: 0.9834, Adjusted R-squared: 0.9814
## F-statistic: 474.6 on 11 and 88 DF, p-value: < 2.2e-16
shapiro.test(residuals(model_2[[1]]))
##
## Shapiro-Wilk normality test
##
## data: residuals(model_2[[1]])
## W = 0.98932, p-value = 0.61
plot(model_2[[1]])