cars <-read.csv('C:/Users/User/Desktop/study/cars.csv')brands <-subset(cars, (brand =='ford'| brand =='dodge'| brand =='chevrolet'), select =c(price, brand))cars$brand <-factor(cars$brand)boxplot(price ~ brand, data = brands, xlab ='brand of the vehicle', ylab ='price of the vehicle', main ='vehicle price', col =c('firebrick1', 'darkolivegreen2', 'cyan3'))
stripchart(price ~ brand, data = brands, xlab ='price of the vehicle', ylab ='brand of the vehicle', main ='vehicle price', col =c('firebrick1', 'darkolivegreen2', 'cyan3'))
aggregate(x = brands$price, by =list(brands$brand), FUN = mean)
Group.1 x
1 chevrolet 18669.95
2 dodge 17781.99
3 ford 21666.89
mod1 <-aov(price ~ brand, data = brands)summary(mod1)
Df Sum Sq Mean Sq F value Pr(>F)
brand 2 5.829e+09 2.914e+09 19.77 3.15e-09 ***
Residuals 1961 2.890e+11 1.474e+08
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm(price ~ brand, data = brands))
Call:
lm(formula = price ~ brand, data = brands)
Residuals:
Min 1Q Median 3Q Max
-21667 -8470 -667 6333 52333
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 18670.0 704.5 26.50 < 2e-16 ***
branddodge -888.0 915.1 -0.97 0.332012
brandford 2996.9 784.6 3.82 0.000138 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 12140 on 1961 degrees of freedom
Multiple R-squared: 0.01977, Adjusted R-squared: 0.01877
F-statistic: 19.77 on 2 and 1961 DF, p-value: 3.15e-09
hist(mod1$residuals, main ='гістограма залишків моделі', freq = F, col ='lavender')
plot(mod1)
shapiro.test(mod1$residuals)
Shapiro-Wilk normality test
data: mod1$residuals
W = 0.96972, p-value < 2.2e-16
kruskal.test(price ~ brand, data = brands)
Kruskal-Wallis rank sum test
data: price by brand
Kruskal-Wallis chi-squared = 33.735, df = 2, p-value = 4.726e-08
library(car)
Loading required package: carData
leveneTest(price ~ brand, data = brands)
Warning in leveneTest.default(y = y, group = group, ...): group coerced to
factor.
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 2 30.862 6.362e-14 ***
1961
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
contrasts
lmod <-lm(price ~ brand, data = brands)library(lsmeans)
Warning: package 'lsmeans' was built under R version 4.2.1
Loading required package: emmeans
The 'lsmeans' package is now basically a front end for 'emmeans'.
Users are encouraged to switch the rest of the way.
See help('transition') for more information, including how to
convert old 'lsmeans' objects and scripts to work with 'emmeans'.
lsmeans(lmod, pairwise ~ brand, adjust ='tukey')
$lsmeans
brand lsmean SE df lower.CL upper.CL
chevrolet 18670 704 1961 17288 20052
dodge 17782 584 1961 16636 18928
ford 21667 345 1961 20989 22344
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
chevrolet - dodge 888 915 1961 0.970 0.5958
chevrolet - ford -2997 785 1961 -3.820 0.0004
dodge - ford -3885 679 1961 -5.725 <.0001
P value adjustment: tukey method for comparing a family of 3 estimates