getwd()
## [1] "/Users/abir/Desktop/FA"
dat <- read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv")
print(dat)
##    USCars JapaneseCars
## 1      18           24
## 2      15           27
## 3      18           27
## 4      16           25
## 5      17           31
## 6      15           35
## 7      14           24
## 8      14           19
## 9      14           28
## 10     15           23
## 11     15           27
## 12     14           20
## 13     15           22
## 14     14           18
## 15     22           20
## 16     18           31
## 17     21           32
## 18     21           31
## 19     10           32
## 20     10           24
## 21     11           26
## 22      9           29
## 23     28           24
## 24     25           24
## 25     19           33
## 26     16           33
## 27     17           32
## 28     19           28
## 29     18           NA
## 30     14           NA
## 31     14           NA
## 32     14           NA
## 33     14           NA
## 34     12           NA
## 35     13           NA
length(dat$USCars)
## [1] 35
Japanese_cars <- dat[c(1:28),2]
length(Japanese_cars)
## [1] 28
# Since sample size for US car is n1=35 and sample size for the Japanese car is
# n2=28 so the sample size is not large enough, both n1 & n2 less than 
# 40 and hence, the central limit theorem doesn't hold here.

# Check US cars Normal Probability plot
qqnorm(dat$USCars,main="US cars Normal Probability Plot",col="blue",ylab="MPG")
qqline(dat$USCars)

# Since data points on the plot are almost falling on a straight line,
# so we can say that MPG of the US cars appear to be normally distributed.

# Check Japanese cars Normal Probability plot
qqnorm(dat$JapaneseCars,main="Japanese cars Normal Probability Plot",col="red",ylab="MPG")
qqline(dat$JapaneseCars)

# Since data points on the plot are almost falling on a straight line,
# so we can say that MPG of the Japanese cars appear to be normally distributed.

boxplot(dat$USCars,dat$JapaneseCars,main ="Variance Equality Check",names = c("US Cars", "Japanese Cars"),ylab="MPG")

# By looking at the box plot, we can say that the IQR or distance of the box
# is not same so there is a large difference in the variance and the variance 
# doesn't appear to be a constant.

dat2 <- log(dat)
print(dat2)
##      USCars JapaneseCars
## 1  2.890372     3.178054
## 2  2.708050     3.295837
## 3  2.890372     3.295837
## 4  2.772589     3.218876
## 5  2.833213     3.433987
## 6  2.708050     3.555348
## 7  2.639057     3.178054
## 8  2.639057     2.944439
## 9  2.639057     3.332205
## 10 2.708050     3.135494
## 11 2.708050     3.295837
## 12 2.639057     2.995732
## 13 2.708050     3.091042
## 14 2.639057     2.890372
## 15 3.091042     2.995732
## 16 2.890372     3.433987
## 17 3.044522     3.465736
## 18 3.044522     3.433987
## 19 2.302585     3.465736
## 20 2.302585     3.178054
## 21 2.397895     3.258097
## 22 2.197225     3.367296
## 23 3.332205     3.178054
## 24 3.218876     3.178054
## 25 2.944439     3.496508
## 26 2.772589     3.496508
## 27 2.833213     3.465736
## 28 2.944439     3.332205
## 29 2.890372           NA
## 30 2.639057           NA
## 31 2.639057           NA
## 32 2.639057           NA
## 33 2.639057           NA
## 34 2.484907           NA
## 35 2.564949           NA
# Check US cars Normal Probability plot After log transformation
qqnorm(dat2$USCars,main="US cars NPP after log transformation",col="blue",ylab="MPG")
qqline(dat2$USCars)

# Since data points on the plot are almost falling on a straight line,
# so we can say that MPG of the US cars appear to be normally distributed.
# We also see after log transformation the data points in NPP came 
# more closer to the straight line compared to the before transformed of the US cars.

# Check Japanese cars Normal Probability plot After log transformation
qqnorm(dat2$JapaneseCars,main="Japanese cars NPP after log transformation",col="red",ylab="MPG")
qqline(dat2$JapaneseCars)

# Since data points on the plot are almost falling on a straight line,
# so we can say that MPG of the Japanese cars appear to be normally distributed.
# We also see after log transformation the data points in NPP came 
# more closer to the straight line compared to the before transformed of the Japanese cars.

boxplot(dat2$USCars,dat2$JapaneseCars,main ="Variance Equality Check",names = c("US Cars", "Japanese Cars"),ylab="MPG")

# By looking at the box plot, we can say that the IQR or distance of the box
# is now the same compared to the the previous box plot of the cars. 
# so there is not a large difference in the variance and the variance now
# appears to be a constant after the log transformation.

# The null and the alternative Hypothesis statement:
# H0: u1 = u2 or u1-u2 = 0
# Ha: u1 not = u2 or u1 > u2 or u1 < u2
t.test(dat2$USCars,dat2$JapaneseCars,var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  dat2$USCars and dat2$JapaneseCars
## t = -9.4828, df = 61, p-value = 1.306e-13
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.6417062 -0.4182053
## sample estimates:
## mean of x mean of y 
##  2.741001  3.270957
# sample average for the log of the mpg of US car is 2.741001 and
# sample averages for the log of the mpg of Japanese car is 3.270957.
# Since P-value < 0.05, so we reject the null hypothesis H0.
# Hence the conclusion is mean number of the log of the mpg of US car is 
# significantly different from (less than) the mean number of
# the log of the mpg of Japanese car at a 0.05 level of significance.

Source Code

getwd()
dat <- read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv")
print(dat)
length(dat$USCars)
Japanese_cars <- dat[c(1:28),2]
length(Japanese_cars)
qqnorm(dat$USCars,main="US cars Normal Probability Plot",col="blue",ylab="MPG")
qqline(dat$USCars)
qqnorm(dat$JapaneseCars,main="Japanese cars Normal Probability Plot",col="red",ylab="MPG")
qqline(dat$JapaneseCars)
boxplot(dat$USCars,dat$JapaneseCars,main ="Variance Equality Check",names = c("US Cars", "Japanese Cars"),ylab="MPG")
dat2 <- log(dat)
print(dat2)
qqnorm(dat2$USCars,main="US cars NPP after log transformation",col="blue",ylab="MPG")
qqline(dat2$USCars)
qqnorm(dat2$JapaneseCars,main="Japanese cars NPP after log transformation",col="red",ylab="MPG")
qqline(dat2$JapaneseCars)
boxplot(dat2$USCars,dat2$JapaneseCars,main ="Variance Equality Check",names = c("US Cars", "Japanese Cars"),ylab="MPG")
t.test(dat2$USCars,dat2$JapaneseCars,var.equal=TRUE)