dat<- read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv")
dat
## USCars JapaneseCars
## 1 18 24
## 2 15 27
## 3 18 27
## 4 16 25
## 5 17 31
## 6 15 35
## 7 14 24
## 8 14 19
## 9 14 28
## 10 15 23
## 11 15 27
## 12 14 20
## 13 15 22
## 14 14 18
## 15 22 20
## 16 18 31
## 17 21 32
## 18 21 31
## 19 10 32
## 20 10 24
## 21 11 26
## 22 9 29
## 23 28 24
## 24 25 24
## 25 19 33
## 26 16 33
## 27 17 32
## 28 19 28
## 29 18 NA
## 30 14 NA
## 31 14 NA
## 32 14 NA
## 33 14 NA
## 34 12 NA
## 35 13 NA
US_Cars<-dat$USCars
US_Cars
## [1] 18 15 18 16 17 15 14 14 14 15 15 14 15 14 22 18 21 21 10 10 11 9 28 25 19
## [26] 16 17 19 18 14 14 14 14 12 13
Japanese_Cars<-dat$JapaneseCars[!is.na(dat$JapaneseCars)]
Japanese_Cars
## [1] 24 27 27 25 31 35 24 19 28 23 27 20 22 18 20 31 32 31 32 24 26 29 24 24 33
## [26] 33 32 28
qqnorm(US_Cars,
main= "normal probability plot of USCars")
qqline(US_Cars,
col= "pink")
qqnorm(Japanese_Cars,
main= "normal probability plot of JapaneseCars")
qqline(Japanese_Cars,
col= "blue")
Comments: The mpg of Japanese cars seems to approximately normal while the mpg of the US cars bends away from the straight line at the upper part the most showing the skewness. Overall, mpg of the both the cars don’t seem to be normal.
boxplot(US_Cars, Japanese_Cars,
names = c("USCArs", "JapaneseCars"),
col= c("pink", "blue"))
Comments: The variance are not equal. The box and whisker plot of the JapaneseCars are wider than that of the UScars which means that the mpg of Japanese Cars has more spread.
L_UCars<- log(US_Cars)
L_JCars<- log(Japanese_Cars)
qqnorm(L_UCars,
main= "normal probability plot of Logarithm value of USCars")
qqline(L_UCars,
col= "red")
qqnorm(L_JCars,
main= "normal probability plot of Logarithm value of the JapaneseCars")
qqline(L_JCars,
col= "green")
Comments: After the log transformation of the data the log-mpgs of both the US cars and the Japanese cars seems to be approximately normal.
Null hypotheis:
\(H_0:\ \mu_1 = \mu_2\)
Alternative hypothesis:
\(H_a:\ \mu_1 \neq \mu_2\)
t.test(L_UCars, L_JCars,
alternative = "less",
var.equal = TRUE)
##
## Two Sample t-test
##
## data: L_UCars and L_JCars
## t = -9.4828, df = 61, p-value = 6.528e-14
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.4366143
## sample estimates:
## mean of x mean of y
## 2.741001 3.270957
The averages for the log of the mpg of US and Japanese cars are 2.741001 and 3.270957 respectively.
The p-value obtained is far less than the level of significance. So, we reject the null hypothesis such that the mean of mpg value of US cars is less than that of mean value of the Japanese cars.
dat<- read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv")
dat
US_Cars<-dat$USCars
US_Cars
Japanese_Cars<-dat$JapaneseCars[!is.na(dat$JapaneseCars)]
Japanese_Cars
qqnorm(US_Cars,
main= "normal probability plot of USCars")
qqline(US_Cars,
col= "pink")
qqnorm(Japanese_Cars,
main= "normal probability plot of JapaneseCars")
qqline(Japanese_Cars,
col= "blue")
boxplot(US_Cars, Japanese_Cars,
names = c("USCArs", "JapaneseCars"),
col= c("pink", "blue"))
L_UCars<- log(US_Cars)
L_JCars<- log(Japanese_Cars)
qqnorm(L_UCars,
main= "normal probability plot of Logarithm value of USCars")
qqline(L_UCars,
col= "red")
qqnorm(L_JCars,
main= "normal probability plot of Logarithm value of the JapaneseCars")
qqline(L_JCars,
col= "green")
t.test(L_UCars, L_JCars,
alternative = "less",
var.equal = TRUE)