df<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv")
UScars<-df[,1]
japanesecars<-df[1:28,2]
UScars
##  [1] 18 15 18 16 17 15 14 14 14 15 15 14 15 14 22 18 21 21 10 10 11  9 28 25 19
## [26] 16 17 19 18 14 14 14 14 12 13
japanesecars
##  [1] 24 27 27 25 31 35 24 19 28 23 27 20 22 18 20 31 32 31 32 24 26 29 24 24 33
## [26] 33 32 28
length(UScars)
## [1] 35
length(japanesecars)
## [1] 28
qqnorm(UScars, main = "noarmal Q-Q plot of us cars")
qqline(UScars)

qqnorm(japanesecars, main = "noarmal Q-Q plot of japanese cars")
qqline(japanesecars)

Ans1:The normal plot of USCars shows deviation from normality i.e. the data points does not follow straight line #whereas, the normal plot of Japanese cars does show normality by following straight line.

boxplot(UScars, japanesecars, main ="UScars vs. Japanesecars Box Plots" ,names=c("Uscars","japanesecars"))

Answer 2: Huge difference in maginute of variance between UScars and japanesecars as the boxes vary in length.

lUScars<-log(UScars)
ljapanesecars<-log(japanesecars)
lUScars
##  [1] 2.890372 2.708050 2.890372 2.772589 2.833213 2.708050 2.639057 2.639057
##  [9] 2.639057 2.708050 2.708050 2.639057 2.708050 2.639057 3.091042 2.890372
## [17] 3.044522 3.044522 2.302585 2.302585 2.397895 2.197225 3.332205 3.218876
## [25] 2.944439 2.772589 2.833213 2.944439 2.890372 2.639057 2.639057 2.639057
## [33] 2.639057 2.484907 2.564949
ljapanesecars
##  [1] 3.178054 3.295837 3.295837 3.218876 3.433987 3.555348 3.178054 2.944439
##  [9] 3.332205 3.135494 3.295837 2.995732 3.091042 2.890372 2.995732 3.433987
## [17] 3.465736 3.433987 3.465736 3.178054 3.258097 3.367296 3.178054 3.178054
## [25] 3.496508 3.496508 3.465736 3.332205
qqnorm(lUScars, main = "noarmal Q-Q plot of US cars")
qqline(lUScars)

qqnorm(ljapanesecars, main = "noarmal Q-Q plot of japanesecars")
qqline(ljapanesecars)

boxplot(lUScars, ljapanesecars, main ="US Cars vs. Japanesecars Box Plots" ,names=c("USCars","JapaneseCars"))

Answer 3: The interquartile range for both log transformed USCars & Japanese Cars is approximately 0.2. the log transformation has significantly reduced the difference in the variance between the two.

r<-t.test(lUScars,ljapanesecars,var.equal=TRUE,alternative=c("less"))
r
## 
##  Two Sample t-test
## 
## data:  lUScars and ljapanesecars
## t = -9.4828, df = 61, p-value = 6.528e-14
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf -0.4366143
## sample estimates:
## mean of x mean of y 
##  2.741001  3.270957

Data: the null and alternative hypothesis and test using a 0.05 level ofsignificance: \[H_{0}:\mu{1}=\mu{2}\] \[H_{1}:\mu^{1}\neq\mu^{2}\] Answer 4a: sample averages of us cars : 2.741001

sample averages of japanese cars : 3.270957

4b: the T test gave us the p value= 6.528e-14 which is far more less than 0.05

Unevaluated R code:

df<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv")
UScars<-df[,1]
japanesecars<-df[1:28,2]
UScars
japanesecars
length(UScars)
length(japanesecars)
qqnorm(UScars, main = "noarmal Q-Q plot of us cars")
qqline(UScars)
qqnorm(japanesecars, main = "noarmal Q-Q plot of japanese cars")
qqline(japanesecars)
boxplot(UScars, japanesecars, main ="UScars vs. Japanesecars Box Plots" ,names=c("Uscars","japanesecars"))
lUScars<-log(UScars)
ljapanesecars<-log(japanesecars)
lUScars
ljapanesecars
qqnorm(lUScars, main = "noarmal Q-Q plot of US cars")
qqline(lUScars)
qqnorm(ljapanesecars, main = "noarmal Q-Q plot of japanesecars")
qqline(ljapanesecars)
boxplot(lUScars, ljapanesecars, main ="US Cars vs. Japanesecars Box Plots" ,names=c("USCars","JapaneseCars"))
r<-t.test(lUScars,ljapanesecars,var.equal=TRUE,alternative=c("less"))
r