cars <- read.csv(text = "US,Japan
18,24
15,27
18,27
16,25
17,31
15,35
14,24
14,19
14,28
15,23
15,27
14,20
15,22
14,18
22,20
18,31
21,32
21,31
10,32
10,24
11,26
9,29
28,24
25,24
19,33
16,33
17,32
19,28
18,NA
14,NA
14,NA
14,NA
14,NA
12,NA
13,NA")

us <- na.omit(cars$US)
jp <- na.omit(cars$Japan)

1. Normality Check

par(mfrow=c(1,2))
qqnorm(us, main="US Cars MPG"); qqline(us)
qqnorm(jp, main="Japanese Cars MPG"); qqline(jp)

2. Variance Check (Boxplots)

boxplot(us, jp, names=c("US","Japan"), main="MPG by Country")

3. Log Transformation

log_us <- log(us)
log_jp <- log(jp)

par(mfrow=c(1,2))
qqnorm(log_us, main="Log US MPG"); qqline(log_us)
qqnorm(log_jp, main="Log Japan MPG"); qqline(log_jp)

boxplot(log_us, log_jp, names=c("US","Japan"), main="Log-MPG by Country")

4. Hypothesis Test

# Null: H0: mean(log_US) = mean(log_Japan)
# Alternative: Ha: mean(log_US) < mean(log_Japan)

t_result <- t.test(log_us, log_jp, alternative = "less")

# Sample means
mean_us <- mean(log_us)
mean_jp <- mean(log_jp)

cat("Sample mean log-MPG (US):", mean_us, "\n")
## Sample mean log-MPG (US): 2.741001
cat("Sample mean log-MPG (Japan):", mean_jp, "\n")
## Sample mean log-MPG (Japan): 3.270957
cat("t-statistic:", t_result$statistic, "\n")
## t-statistic: -9.804013
cat("p-value:", t_result$p.value, "\n")
## p-value: 2.007635e-14
# Plain conclusion
if(t_result$p.value < 0.05){
  cat("Conclusion: US cars have significantly lower MPG than Japanese cars.\n")
} else {
  cat("Conclusion: No significant difference in MPG.\n")
}
## Conclusion: US cars have significantly lower MPG than Japanese cars.