# Load Data
data<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv")
rmarkdown::paged_table(data)
# Data Wrangling
uscars = data[,1]
uscars
## [1] 18 15 18 16 17 15 14 14 14 15 15 14 15 14 22 18 21 21 10 10 11 9 28 25 19
## [26] 16 17 19 18 14 14 14 14 12 13
jpcars = data[1:28,2]
jpcars
## [1] 24 27 27 25 31 35 24 19 28 23 27 20 22 18 20 31 32 31 32 24 26 29 24 24 33
## [26] 33 32 28
qqnorm(uscars, main = "QQ plot for US Cars", col ="blue")
We can see the points tend to lay about a straight line (normality)
qqnorm(jpcars, main = "QQ plot for Japanesse Cars", col ="red")
We can see the points tend to lay about a straight line (normality)
boxplot(uscars, jpcars, names=c("uscars", "jpcars"))
The variance is not the same for US cars and Japanesse Cars
Apply log Transformation
log_uscars = log(uscars)
log_jpcars = log(jpcars)
Check the Normality
qqnorm(log_uscars,main = "QQ plot for US Cars (log transform)")
We can see the points tend to lay about a straight line (normality)
qqnorm(log_jpcars,main = "QQ plot for Japanesse Cars (log transform)")
We can see the points tend to lay about a straight line (normality)
boxplot(log_uscars, log_jpcars,names =c("uscars","jpcars"))
We can see now the variance are most closer with the log
transformation
We have the following Hypothesis to test:
t.test(x=log_uscars, y = log_jpcars, alternative = "less", var.equal = TRUE)
##
## Two Sample t-test
##
## data: log_uscars and log_jpcars
## t = -9.4828, df = 61, p-value = 6.528e-14
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.4366143
## sample estimates:
## mean of x mean of y
## 2.741001 3.270957
Regarding the P-value (Approx: 0.00) We can reject the Null Hypothesis. The sample mean of Uscars is 2.74 and sample mean of JPcars is 3.27
In conclusion the US Cars has less MPG than JP Cars Both samples follow a normal distribution Before apply the log transformation the variance of the samples are different We observe some outliers in the UScars sample
# Load data
data<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv")
# Wrangling Data
uscars = data[,1]
uscars
jpcars = data[1:28,2]
jpcars
# Check Normal Distribution
qqnorm(uscars, main = "QQ plot for US Cars", col ="blue")
qqnorm(jpcars, main = "QQ plot for Japanesse Cars", col ="red")
# Check Variance using boxplot
boxplot(uscars, jpcars, names=c("uscars", "jpcars"))
# Apply Log transformation
log_uscars = log(uscars)
log_jpcars = log(jpcars)
# Check Normal Distribution
qqnorm(log_uscars,main = "QQ plot for US Cars (log transform)")
qqnorm(log_jpcars,main = "QQ plot for Japanesse Cars (log transform)")
# Check Variance using boxplot
boxplot(log_uscars, log_jpcars,names =c("uscars","jpcars"))
# Apply the two sample t-test
t.test(x=log_uscars, y = log_jpcars, alternative = "less", var.equal = TRUE)