df <- read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/US_Japanese_Cars.csv")
df
## USCars JapaneseCars
## 1 18 24
## 2 15 27
## 3 18 27
## 4 16 25
## 5 17 31
## 6 15 35
## 7 14 24
## 8 14 19
## 9 14 28
## 10 15 23
## 11 15 27
## 12 14 20
## 13 15 22
## 14 14 18
## 15 22 20
## 16 18 31
## 17 21 32
## 18 21 31
## 19 10 32
## 20 10 24
## 21 11 26
## 22 9 29
## 23 28 24
## 24 25 24
## 25 19 33
## 26 16 33
## 27 17 32
## 28 19 28
## 29 18 NA
## 30 14 NA
## 31 14 NA
## 32 14 NA
## 33 14 NA
## 34 12 NA
## 35 13 NA
df$USCars
## [1] 18 15 18 16 17 15 14 14 14 15 15 14 15 14 22 18 21 21 10 10 11 9 28 25 19
## [26] 16 17 19 18 14 14 14 14 12 13
df$JapaneseCars
## [1] 24 27 27 25 31 35 24 19 28 23 27 20 22 18 20 31 32 31 32 24 26 29 24 24 33
## [26] 33 32 28 NA NA NA NA NA NA NA
qqnorm(df$USCars)

qqnorm(df$JapaneseCars)

boxplot(df$USCars,df$JapaneseCars)

Variance doesn’t appear to be constant from the box plot
df2 <- transform(df,log_USCars = log(USCars),log_JapaneseCars = log(JapaneseCars))
df2
## USCars JapaneseCars log_USCars log_JapaneseCars
## 1 18 24 2.890372 3.178054
## 2 15 27 2.708050 3.295837
## 3 18 27 2.890372 3.295837
## 4 16 25 2.772589 3.218876
## 5 17 31 2.833213 3.433987
## 6 15 35 2.708050 3.555348
## 7 14 24 2.639057 3.178054
## 8 14 19 2.639057 2.944439
## 9 14 28 2.639057 3.332205
## 10 15 23 2.708050 3.135494
## 11 15 27 2.708050 3.295837
## 12 14 20 2.639057 2.995732
## 13 15 22 2.708050 3.091042
## 14 14 18 2.639057 2.890372
## 15 22 20 3.091042 2.995732
## 16 18 31 2.890372 3.433987
## 17 21 32 3.044522 3.465736
## 18 21 31 3.044522 3.433987
## 19 10 32 2.302585 3.465736
## 20 10 24 2.302585 3.178054
## 21 11 26 2.397895 3.258097
## 22 9 29 2.197225 3.367296
## 23 28 24 3.332205 3.178054
## 24 25 24 3.218876 3.178054
## 25 19 33 2.944439 3.496508
## 26 16 33 2.772589 3.496508
## 27 17 32 2.833213 3.465736
## 28 19 28 2.944439 3.332205
## 29 18 NA 2.890372 NA
## 30 14 NA 2.639057 NA
## 31 14 NA 2.639057 NA
## 32 14 NA 2.639057 NA
## 33 14 NA 2.639057 NA
## 34 12 NA 2.484907 NA
## 35 13 NA 2.564949 NA
qqnorm(df2$log_USCars)

qqnorm(df2$log_JapaneseCars)

There is very little improvement in the box plot