df <- read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv")
df$USCars
##  [1] 18 15 18 16 17 15 14 14 14 15 15 14 15 14 22 18 21 21 10 10 11  9 28 25 19
## [26] 16 17 19 18 14 14 14 14 12 13
df$JapaneseCars
##  [1] 24 27 27 25 31 35 24 19 28 23 27 20 22 18 20 31 32 31 32 24 26 29 24 24 33
## [26] 33 32 28 NA NA NA NA NA NA NA
qqnorm(df$USCars)

qqnorm(df$JapaneseCars)

#The Normal Probability Plot seems approximately

boxplot(df$USCars,df$JapaneseCars, names = c("US","Japan"), main ="Boxplot of car Origins",xlab= "car origin",ylab="mpg")

#US Cars show a larger spread, so variances are not equal.

df2 <- transform(df,log_USCars=log(USCars),log_JapaneseCars=log(JapaneseCars))
df2
##    USCars JapaneseCars log_USCars log_JapaneseCars
## 1      18           24   2.890372         3.178054
## 2      15           27   2.708050         3.295837
## 3      18           27   2.890372         3.295837
## 4      16           25   2.772589         3.218876
## 5      17           31   2.833213         3.433987
## 6      15           35   2.708050         3.555348
## 7      14           24   2.639057         3.178054
## 8      14           19   2.639057         2.944439
## 9      14           28   2.639057         3.332205
## 10     15           23   2.708050         3.135494
## 11     15           27   2.708050         3.295837
## 12     14           20   2.639057         2.995732
## 13     15           22   2.708050         3.091042
## 14     14           18   2.639057         2.890372
## 15     22           20   3.091042         2.995732
## 16     18           31   2.890372         3.433987
## 17     21           32   3.044522         3.465736
## 18     21           31   3.044522         3.433987
## 19     10           32   2.302585         3.465736
## 20     10           24   2.302585         3.178054
## 21     11           26   2.397895         3.258097
## 22      9           29   2.197225         3.367296
## 23     28           24   3.332205         3.178054
## 24     25           24   3.218876         3.178054
## 25     19           33   2.944439         3.496508
## 26     16           33   2.772589         3.496508
## 27     17           32   2.833213         3.465736
## 28     19           28   2.944439         3.332205
## 29     18           NA   2.890372               NA
## 30     14           NA   2.639057               NA
## 31     14           NA   2.639057               NA
## 32     14           NA   2.639057               NA
## 33     14           NA   2.639057               NA
## 34     12           NA   2.484907               NA
## 35     13           NA   2.564949               NA
qqnorm(df2$log_USCars)

qqnorm(df2$log_JapaneseCars)

#The log-transformed data is now approximately normally distributed for both US and Japanese cars.

boxplot(df2$log_USCars,df2$log_JapaneseCars, names = c("US","Japan"),main = "Boxplot of Transformed value",xlab="log of Car Origin",ylab= "log of mpg")

The Data looks approximately normal for the transformed log value.

# Read Data
cars <- read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv",
                 na.strings = c("", "NA"))

# Convert columns to numeric safely
cars$USCars <- as.numeric(cars$USCars)
cars$JapaneseCars <- as.numeric(cars$JapaneseCars)

# Remove rows with any NA values
cars_clean <- na.omit(cars)

# Take log transform
cars_clean$log_US <- log(cars_clean$USCars)
cars_clean$log_Japan <- log(cars_clean$JapaneseCars)

# Sample averages
mean_log_us <- mean(cars_clean$log_US)
mean_log_japan <- mean(cars_clean$log_Japan)

cat("Mean log(mpg) for US cars:", mean_log_us, "\n")
## Mean log(mpg) for US cars: 2.765664
cat("Mean log(mpg) for Japanese cars:", mean_log_japan, "\n")
## Mean log(mpg) for Japanese cars: 3.270957
# Welch two-sample t-test
t_test_result <- t.test(cars_clean$log_US, cars_clean$log_Japan, var.equal = FALSE)
print(t_test_result)
## 
##  Welch Two Sample t-test
## 
## data:  cars_clean$log_US and cars_clean$log_Japan
## t = -8.3235, df = 47.87, p-value = 7.325e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.6273609 -0.3832255
## sample estimates:
## mean of x mean of y 
##  2.765664  3.270957

#Conclusion : At the 5% significance level, the mean log(mpg) is not equal between US and Japanese cars. Japanese cars are significantly more fuel efficient.