getwd()
## [1] "/Users/abir/Desktop/FA"
dat <- read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv")
print(dat)
## USCars JapaneseCars
## 1 18 24
## 2 15 27
## 3 18 27
## 4 16 25
## 5 17 31
## 6 15 35
## 7 14 24
## 8 14 19
## 9 14 28
## 10 15 23
## 11 15 27
## 12 14 20
## 13 15 22
## 14 14 18
## 15 22 20
## 16 18 31
## 17 21 32
## 18 21 31
## 19 10 32
## 20 10 24
## 21 11 26
## 22 9 29
## 23 28 24
## 24 25 24
## 25 19 33
## 26 16 33
## 27 17 32
## 28 19 28
## 29 18 NA
## 30 14 NA
## 31 14 NA
## 32 14 NA
## 33 14 NA
## 34 12 NA
## 35 13 NA
length(dat$USCars)
## [1] 35
Japanese_cars <- dat[c(1:28),2]
length(Japanese_cars)
## [1] 28
# Since sample size for US car is n1=35 and sample size for the Japanese car is
# n2=28 so the sample size is not large enough, both n1 & n2 less than
# 40 and hence, the central limit theorem doesn't hold here.
# Check US cars Normal Probability plot
qqnorm(dat$USCars,main="US cars Normal Probability Plot",col="blue",ylab="MPG")
qqline(dat$USCars)
# Since data points on the plot are almost falling on a straight line,
# so we can say that MPG of the US cars appear to be normally distributed.
# Check Japanese cars Normal Probability plot
qqnorm(dat$JapaneseCars,main="Japanese cars Normal Probability Plot",col="red",ylab="MPG")
qqline(dat$JapaneseCars)
# Since data points on the plot are almost falling on a straight line,
# so we can say that MPG of the Japanese cars appear to be normally distributed.
boxplot(dat$USCars,dat$JapaneseCars,main ="Variance Equality Check",names = c("US Cars", "Japanese Cars"),ylab="MPG")
# By looking at the box plot, we can say that the IQR or distance of the box
# is not same so there is a large difference in the variance and the variance
# doesn't appear to be a constant.
dat2 <- log(dat)
print(dat2)
## USCars JapaneseCars
## 1 2.890372 3.178054
## 2 2.708050 3.295837
## 3 2.890372 3.295837
## 4 2.772589 3.218876
## 5 2.833213 3.433987
## 6 2.708050 3.555348
## 7 2.639057 3.178054
## 8 2.639057 2.944439
## 9 2.639057 3.332205
## 10 2.708050 3.135494
## 11 2.708050 3.295837
## 12 2.639057 2.995732
## 13 2.708050 3.091042
## 14 2.639057 2.890372
## 15 3.091042 2.995732
## 16 2.890372 3.433987
## 17 3.044522 3.465736
## 18 3.044522 3.433987
## 19 2.302585 3.465736
## 20 2.302585 3.178054
## 21 2.397895 3.258097
## 22 2.197225 3.367296
## 23 3.332205 3.178054
## 24 3.218876 3.178054
## 25 2.944439 3.496508
## 26 2.772589 3.496508
## 27 2.833213 3.465736
## 28 2.944439 3.332205
## 29 2.890372 NA
## 30 2.639057 NA
## 31 2.639057 NA
## 32 2.639057 NA
## 33 2.639057 NA
## 34 2.484907 NA
## 35 2.564949 NA
# Check US cars Normal Probability plot After log transformation
qqnorm(dat2$USCars,main="US cars NPP after log transformation",col="blue",ylab="MPG")
qqline(dat2$USCars)
# Since data points on the plot are almost falling on a straight line,
# so we can say that MPG of the US cars appear to be normally distributed.
# We also see after log transformation the data points in NPP came
# more closer to the straight line compared to the before transformed of the US cars.
# Check Japanese cars Normal Probability plot After log transformation
qqnorm(dat2$JapaneseCars,main="Japanese cars NPP after log transformation",col="red",ylab="MPG")
qqline(dat2$JapaneseCars)
# Since data points on the plot are almost falling on a straight line,
# so we can say that MPG of the Japanese cars appear to be normally distributed.
# We also see after log transformation the data points in NPP came
# more closer to the straight line compared to the before transformed of the Japanese cars.
boxplot(dat2$USCars,dat2$JapaneseCars,main ="Variance Equality Check",names = c("US Cars", "Japanese Cars"),ylab="MPG")
# By looking at the box plot, we can say that the IQR or distance of the box
# is now the same compared to the the previous box plot of the cars.
# so there is not a large difference in the variance and the variance now
# appears to be a constant after the log transformation.
# The null and the alternative Hypothesis statement:
# H0: u1 = u2 or u1-u2 = 0
# Ha: u1 not = u2 or u1 > u2 or u1 < u2
t.test(dat2$USCars,dat2$JapaneseCars,var.equal=TRUE)
##
## Two Sample t-test
##
## data: dat2$USCars and dat2$JapaneseCars
## t = -9.4828, df = 61, p-value = 1.306e-13
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.6417062 -0.4182053
## sample estimates:
## mean of x mean of y
## 2.741001 3.270957
# sample average for the log of the mpg of US car is 2.741001 and
# sample averages for the log of the mpg of Japanese car is 3.270957.
# Since P-value < 0.05, so we reject the null hypothesis H0.
# Hence the conclusion is mean number of the log of the mpg of US car is
# significantly different from (less than) the mean number of
# the log of the mpg of Japanese car at a 0.05 level of significance.
Source Code
getwd()
dat <- read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv")
print(dat)
length(dat$USCars)
Japanese_cars <- dat[c(1:28),2]
length(Japanese_cars)
qqnorm(dat$USCars,main="US cars Normal Probability Plot",col="blue",ylab="MPG")
qqline(dat$USCars)
qqnorm(dat$JapaneseCars,main="Japanese cars Normal Probability Plot",col="red",ylab="MPG")
qqline(dat$JapaneseCars)
boxplot(dat$USCars,dat$JapaneseCars,main ="Variance Equality Check",names = c("US Cars", "Japanese Cars"),ylab="MPG")
dat2 <- log(dat)
print(dat2)
qqnorm(dat2$USCars,main="US cars NPP after log transformation",col="blue",ylab="MPG")
qqline(dat2$USCars)
qqnorm(dat2$JapaneseCars,main="Japanese cars NPP after log transformation",col="red",ylab="MPG")
qqline(dat2$JapaneseCars)
boxplot(dat2$USCars,dat2$JapaneseCars,main ="Variance Equality Check",names = c("US Cars", "Japanese Cars"),ylab="MPG")
t.test(dat2$USCars,dat2$JapaneseCars,var.equal=TRUE)