require(DT)
require(tidyverse)
#install.packages("readxl")
require("readxl")

Dados dos 6 automóveis

Referência

C. W. Ahn, K. C. Chae, and G. M. Clark, “Estimating parameters of the power law process with two measures of failure time”. Journal of Quality Technology, 1998, vol. 30, iss. 2,pp. 127-132.

dados=read_excel("dados_automoveis.xls")
datatable(dados)

Análise gráfica

Temos uma linha média e os carros estão em torno desta linha média, alguns “mais acelerados” e outros “menos acelerados”

dados %>% 
  ggplot(aes(x = Age, y = Mil,color = car))+
    geom_point() +
  geom_smooth(method = "lm") 
## `geom_smooth()` using formula 'y ~ x'

model_medio=lm(Mil ~ Age,data=dados)
summary(model_medio)
## 
## Call:
## lm(formula = Mil ~ Age, data = dados)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -26058  -8311   1197   7506  21753 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -296.571   2678.812  -0.111    0.912    
## Age           35.762      2.904  12.315   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11670 on 98 degrees of freedom
## Multiple R-squared:  0.6075, Adjusted R-squared:  0.6035 
## F-statistic: 151.7 on 1 and 98 DF,  p-value: < 2.2e-16
plot(model_medio)

shapiro.test(residuals(model_medio))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model_medio)
## W = 0.9767, p-value = 0.07331

È interessante separar os carros: cada um tem uma linha média de regressão: Milhas x Idade

dados %>% 
  ggplot(aes (x = Age, y = Mil, color = as.factor(car)))+
    geom_point() +
  geom_smooth(method = "lm") 
## `geom_smooth()` using formula 'y ~ x'

Estimando o coeficiente angular da linha de regressão de cada carro:

model=list()
shapiro=list()
i=1
while(i<=6){
model[[i]]=lm(Mil ~ Age, data=dados[dados$car==i,])
shapiro[[i]]=shapiro.test(residuals(model[[i]]))
plot(model[[i]])
i=i+1
}

model
## [[1]]
## 
## Call:
## lm(formula = Mil ~ Age, data = dados[dados$car == i, ])
## 
## Coefficients:
## (Intercept)          Age  
##     6581.60        17.02  
## 
## 
## [[2]]
## 
## Call:
## lm(formula = Mil ~ Age, data = dados[dados$car == i, ])
## 
## Coefficients:
## (Intercept)          Age  
##    -5155.18        30.35  
## 
## 
## [[3]]
## 
## Call:
## lm(formula = Mil ~ Age, data = dados[dados$car == i, ])
## 
## Coefficients:
## (Intercept)          Age  
##     -8987.2         55.2  
## 
## 
## [[4]]
## 
## Call:
## lm(formula = Mil ~ Age, data = dados[dados$car == i, ])
## 
## Coefficients:
## (Intercept)          Age  
##    -8579.02        47.01  
## 
## 
## [[5]]
## 
## Call:
## lm(formula = Mil ~ Age, data = dados[dados$car == i, ])
## 
## Coefficients:
## (Intercept)          Age  
##     2311.81        14.69  
## 
## 
## [[6]]
## 
## Call:
## lm(formula = Mil ~ Age, data = dados[dados$car == i, ])
## 
## Coefficients:
## (Intercept)          Age  
##     3251.69        45.48
shapiro
## [[1]]
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model[[i]])
## W = 0.9636, p-value = 0.7273
## 
## 
## [[2]]
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model[[i]])
## W = 0.93418, p-value = 0.3861
## 
## 
## [[3]]
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model[[i]])
## W = 0.97634, p-value = 0.8648
## 
## 
## [[4]]
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model[[i]])
## W = 0.89508, p-value = 0.0472
## 
## 
## [[5]]
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model[[i]])
## W = 0.93582, p-value = 0.3327
## 
## 
## [[6]]
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model[[i]])
## W = 0.94319, p-value = 0.3579

Outro modo: Modelo Linear com carro sendo uma variável dummy

model_2=list()
i=1
while(i<=6){
temp = dados %>%
       mutate(car=relevel(as.factor(car), i))
model_2[[i]] <- lm(Mil ~ Age*car, temp)
print(summary(model_2[[i]]))
i=i+1
}
## 
## Call:
## lm(formula = Mil ~ Age * car, data = temp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7062.2 -1250.2   -51.3  1448.6  6480.4 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6581.602   1183.245   5.562 2.82e-07 ***
## Age             17.017      1.586  10.727  < 2e-16 ***
## car2        -11736.783   2101.661  -5.585 2.57e-07 ***
## car3        -15568.801   2162.675  -7.199 1.94e-10 ***
## car4        -15160.623   1935.449  -7.833 1.01e-11 ***
## car5         -4269.793   1838.659  -2.322   0.0225 *  
## car6         -3329.911   1736.954  -1.917   0.0585 .  
## Age:car2        13.338      2.397   5.565 2.79e-07 ***
## Age:car3        38.179      2.366  16.139  < 2e-16 ***
## Age:car4        29.989      2.244  13.365  < 2e-16 ***
## Age:car5        -2.326      2.178  -1.068   0.2885    
## Age:car6        28.468      2.210  12.879  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2531 on 88 degrees of freedom
## Multiple R-squared:  0.9834, Adjusted R-squared:  0.9814 
## F-statistic: 474.6 on 11 and 88 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = Mil ~ Age * car, data = temp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7062.2 -1250.2   -51.3  1448.6  6480.4 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5155.181   1736.925  -2.968 0.003861 ** 
## Age            30.354      1.797  16.896  < 2e-16 ***
## car1        11736.783   2101.661   5.585 2.57e-07 ***
## car3        -3832.018   2508.785  -1.527 0.130238    
## car4        -3423.840   2315.773  -1.478 0.142848    
## car5         7466.990   2235.510   3.340 0.001230 ** 
## car6         8406.872   2152.637   3.905 0.000184 ***
## Age:car1      -13.338      2.397  -5.565 2.79e-07 ***
## Age:car3       24.841      2.511   9.891 6.02e-16 ***
## Age:car4       16.651      2.397   6.946 6.19e-10 ***
## Age:car5      -15.663      2.336  -6.706 1.85e-09 ***
## Age:car6       15.130      2.366   6.395 7.49e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2531 on 88 degrees of freedom
## Multiple R-squared:  0.9834, Adjusted R-squared:  0.9814 
## F-statistic: 474.6 on 11 and 88 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = Mil ~ Age * car, data = temp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7062.2 -1250.2   -51.3  1448.6  6480.4 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8987.200   1810.274  -4.965 3.34e-06 ***
## Age            55.195      1.755  31.451  < 2e-16 ***
## car1        15568.801   2162.675   7.199 1.94e-10 ***
## car2         3832.018   2508.785   1.527 0.130238    
## car4          408.178   2371.284   0.172 0.863728    
## car5        11299.009   2292.965   4.928 3.88e-06 ***
## car6        12238.891   2212.246   5.532 3.21e-07 ***
## Age:car1      -38.179      2.366 -16.139  < 2e-16 ***
## Age:car2      -24.841      2.511  -9.891 6.02e-16 ***
## Age:car4       -8.190      2.366  -3.461 0.000832 ***
## Age:car5      -40.504      2.304 -17.581  < 2e-16 ***
## Age:car6       -9.711      2.334  -4.160 7.38e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2531 on 88 degrees of freedom
## Multiple R-squared:  0.9834, Adjusted R-squared:  0.9814 
## F-statistic: 474.6 on 11 and 88 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = Mil ~ Age * car, data = temp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7062.2 -1250.2   -51.3  1448.6  6480.4 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8579.021   1531.631  -5.601 2.39e-07 ***
## Age            47.006      1.587  29.620  < 2e-16 ***
## car1        15160.623   1935.449   7.833 1.01e-11 ***
## car2         3423.840   2315.773   1.478 0.142848    
## car3         -408.178   2371.284  -0.172 0.863728    
## car5        10890.830   2080.022   5.236 1.11e-06 ***
## car6        11830.712   1990.687   5.943 5.50e-08 ***
## Age:car1      -29.989      2.244 -13.365  < 2e-16 ***
## Age:car2      -16.651      2.397  -6.946 6.19e-10 ***
## Age:car3        8.190      2.366   3.461 0.000832 ***
## Age:car5      -32.315      2.179 -14.833  < 2e-16 ***
## Age:car6       -1.521      2.211  -0.688 0.493156    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2531 on 88 degrees of freedom
## Multiple R-squared:  0.9834, Adjusted R-squared:  0.9814 
## F-statistic: 474.6 on 11 and 88 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = Mil ~ Age * car, data = temp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7062.2 -1250.2   -51.3  1448.6  6480.4 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2311.809   1407.337   1.643  0.10402    
## Age             14.691      1.493   9.843 7.55e-16 ***
## car1          4269.793   1838.659   2.322  0.02253 *  
## car2         -7466.990   2235.510  -3.340  0.00123 ** 
## car3        -11299.009   2292.965  -4.928 3.88e-06 ***
## car4        -10890.830   2080.022  -5.236 1.11e-06 ***
## car6           939.882   1896.717   0.496  0.62146    
## Age:car1         2.326      2.178   1.068  0.28854    
## Age:car2        15.663      2.336   6.706 1.85e-09 ***
## Age:car3        40.504      2.304  17.581  < 2e-16 ***
## Age:car4        32.315      2.179  14.833  < 2e-16 ***
## Age:car6        30.793      2.144  14.362  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2531 on 88 degrees of freedom
## Multiple R-squared:  0.9834, Adjusted R-squared:  0.9814 
## F-statistic: 474.6 on 11 and 88 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = Mil ~ Age * car, data = temp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7062.2 -1250.2   -51.3  1448.6  6480.4 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3251.691   1271.589   2.557 0.012265 *  
## Age             45.484      1.539  29.548  < 2e-16 ***
## car1          3329.911   1736.954   1.917 0.058470 .  
## car2         -8406.872   2152.637  -3.905 0.000184 ***
## car3        -12238.891   2212.246  -5.532 3.21e-07 ***
## car4        -11830.712   1990.687  -5.943 5.50e-08 ***
## car5          -939.882   1896.717  -0.496 0.621461    
## Age:car1       -28.468      2.210 -12.879  < 2e-16 ***
## Age:car2       -15.130      2.366  -6.395 7.49e-09 ***
## Age:car3         9.711      2.334   4.160 7.38e-05 ***
## Age:car4         1.521      2.211   0.688 0.493156    
## Age:car5       -30.793      2.144 -14.362  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2531 on 88 degrees of freedom
## Multiple R-squared:  0.9834, Adjusted R-squared:  0.9814 
## F-statistic: 474.6 on 11 and 88 DF,  p-value: < 2.2e-16
shapiro.test(residuals(model_2[[1]]))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model_2[[1]])
## W = 0.98932, p-value = 0.61
plot(model_2[[1]])