lab4

Author

Vladyslava Bondarenko

cars <- read.csv('C:/Users/User/Desktop/study/cars.csv')
brands <- subset(cars, (brand == 'ford' | brand == 'dodge' | brand == 'chevrolet'), select = c(price, brand))
cars$brand <- factor(cars$brand)
boxplot(price ~ brand, data = brands, xlab = 'brand of the vehicle', ylab = 'price of the vehicle', main = 'vehicle price', col = c('firebrick1', 'darkolivegreen2', 'cyan3'))

stripchart(price ~ brand, data = brands, xlab = 'price of the vehicle', ylab = 'brand of the vehicle', main = 'vehicle price', col = c('firebrick1', 'darkolivegreen2', 'cyan3'))

aggregate(x = brands$price, by = list(brands$brand), FUN = mean)
    Group.1        x
1 chevrolet 18669.95
2     dodge 17781.99
3      ford 21666.89
mod1 <- aov(price ~ brand, data = brands)
summary(mod1)
              Df    Sum Sq   Mean Sq F value   Pr(>F)    
brand          2 5.829e+09 2.914e+09   19.77 3.15e-09 ***
Residuals   1961 2.890e+11 1.474e+08                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm(price ~ brand, data = brands))

Call:
lm(formula = price ~ brand, data = brands)

Residuals:
   Min     1Q Median     3Q    Max 
-21667  -8470   -667   6333  52333 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  18670.0      704.5   26.50  < 2e-16 ***
branddodge    -888.0      915.1   -0.97 0.332012    
brandford     2996.9      784.6    3.82 0.000138 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 12140 on 1961 degrees of freedom
Multiple R-squared:  0.01977,   Adjusted R-squared:  0.01877 
F-statistic: 19.77 on 2 and 1961 DF,  p-value: 3.15e-09
hist(mod1$residuals, main = 'гістограма залишків моделі', freq = F, col = 'lavender')

plot(mod1)

shapiro.test(mod1$residuals)

    Shapiro-Wilk normality test

data:  mod1$residuals
W = 0.96972, p-value < 2.2e-16
kruskal.test(price ~ brand, data = brands)

    Kruskal-Wallis rank sum test

data:  price by brand
Kruskal-Wallis chi-squared = 33.735, df = 2, p-value = 4.726e-08
library(car)
Loading required package: carData
leveneTest(price ~ brand, data = brands)
Warning in leveneTest.default(y = y, group = group, ...): group coerced to
factor.
Levene's Test for Homogeneity of Variance (center = median)
        Df F value    Pr(>F)    
group    2  30.862 6.362e-14 ***
      1961                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

contrasts

lmod <- lm(price ~ brand, data = brands)
library(lsmeans)
Warning: package 'lsmeans' was built under R version 4.2.1
Loading required package: emmeans
The 'lsmeans' package is now basically a front end for 'emmeans'.
Users are encouraged to switch the rest of the way.
See help('transition') for more information, including how to
convert old 'lsmeans' objects and scripts to work with 'emmeans'.
lsmeans(lmod, pairwise ~ brand, adjust = 'tukey')
$lsmeans
 brand     lsmean  SE   df lower.CL upper.CL
 chevrolet  18670 704 1961    17288    20052
 dodge      17782 584 1961    16636    18928
 ford       21667 345 1961    20989    22344

Confidence level used: 0.95 

$contrasts
 contrast          estimate  SE   df t.ratio p.value
 chevrolet - dodge      888 915 1961   0.970  0.5958
 chevrolet - ford     -2997 785 1961  -3.820  0.0004
 dodge - ford         -3885 679 1961  -5.725  <.0001

P value adjustment: tukey method for comparing a family of 3 estimates 
plot(lsmeans(lmod, ~ brand))