dat <-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv")
dat$USCars
## [1] 18 15 18 16 17 15 14 14 14 15 15 14 15 14 22 18 21 21 10 10 11 9 28 25 19
## [26] 16 17 19 18 14 14 14 14 12 13
dat$JapaneseCars
## [1] 24 27 27 25 31 35 24 19 28 23 27 20 22 18 20 31 32 31 32 24 26 29 24 24 33
## [26] 33 32 28 NA NA NA NA NA NA NA
qqnorm(dat$USCars, main = "Normal Q-Q plot (US cars)")
qqline(dat$USCars)
qqnorm(dat$JapaneseCars, main = "Normal Q-Q plot (Japanese cars)")
qqline(dat$JapaneseCars)
The Normal Probability Plots for both US and Japanese cars show the data points lying approximately along the Q-Q line confirming normal distribution of the data. The Japanese cars data seems to be more normally distributed compared to the US cars data.
boxplot(dat$USCars, dat$JapaneseCars, names = c("US","Japan"), main ="Box Plots (US Cars vs Japanese Cars)",xlab= "Cars",ylab="mpg")
The variance does not seem to be constant, with the Japanese cars having more variance compared to US Cars.
dat2 <- transform(dat,log_USCars=log(USCars),log_JapaneseCars=log(JapaneseCars))
qqnorm(dat2$log_USCars, main = "Normal Q-Q plot (US cars data Transformed)")
qqline(dat2$log_USCars)
qqnorm(dat2$log_JapaneseCars, main = "Normal Q-Q plot (Japanese cars Transformed data)")
qqline(dat2$log_JapaneseCars)
boxplot(dat2$log_USCars, dat2$log_JapaneseCars, names = c("US","Japan"), main ="Box Plots (US Cars vs Japanese Cars Transformed Data)",xlab= "Cars",ylab="mpg")
The Normal Probability Plot for US Cars seems to be slightly more normally distributed compared to the plot before transformation, while the Japanese car plot does not show much difference. The Box Plots for the log transformed data now show constant variance between US Cars and Japanese Cars.
Null Hypothesis, Ho: μ1=μ2 and Alternate Hypothesis, Ha: μ1!=μ2
t.test(dat2$log_USCars,dat2$log_JapaneseCars,var.equal=TRUE)
##
## Two Sample t-test
##
## data: dat2$log_USCars and dat2$log_JapaneseCars
## t = -9.4828, df = 61, p-value = 1.306e-13
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.6417062 -0.4182053
## sample estimates:
## mean of x mean of y
## 2.741001 3.270957
The p-value is significantly lower than 0.05. Therefore, the Null Hypothesis can be rejected. The alternate hypothesis that the fuel consumption of US Cars and Japanese Cars are different can be accepted (with Japanese Cars being more fuel efficient).
dat <-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv")
dat$USCars
dat$JapaneseCars
qqnorm(dat$USCars, main = "Normal Q-Q plot (US cars)")
qqline(dat$USCars)
qqnorm(dat$JapaneseCars, main = "Normal Q-Q plot (Japanese cars)")
qqline(dat$JapaneseCars)
boxplot(dat$USCars, dat$JapaneseCars, names = c("US","Japan"), main ="Box Plots (US Cars vs Japanese Cars)",xlab= "Cars",ylab="mpg")
dat2 <- transform(dat,log_USCars=log(USCars),log_JapaneseCars=log(JapaneseCars))
qqnorm(dat2$log_USCars, main = "Normal Q-Q plot (US cars data Transformed)")
qqline(dat2$log_USCars)
qqnorm(dat2$log_JapaneseCars, main = "Normal Q-Q plot (Japanese cars Transformed data)")
qqline(dat2$log_JapaneseCars)
boxplot(dat2$log_USCars, dat2$log_JapaneseCars, names = c("US","Japan"), main ="Box Plots (US Cars vs Japanese Cars Transformed Data)",xlab= "Cars",ylab="mpg")
t.test(dat2$log_USCars,dat2$log_JapaneseCars,var.equal=TRUE)