Assignment 5

Author

Brady Heath

cars <- read.csv("C:/Users/bheat/Downloads/cars_outliers.txt")
head(cars)
   mpg cylinders cubicinches  hp weightlbs time.to.60 year    brand
1 14.0         8         350 165      4209         12 1972      US.
2 31.9         4          89  71      1925         14 1980  Europe.
3 17.0         8         302 140      3449         11 1971      US.
4 15.0         8         400 150      3761         10 1971      US.
5 30.5         4          98  63      2051         17 1978      US.
6 23.0         8         350 125      3900         17 1980      US.
str(cars)
'data.frame':   261 obs. of  8 variables:
 $ mpg        : num  14 31.9 17 15 30.5 23 13 14 25.4 37.7 ...
 $ cylinders  : int  8 4 8 8 4 8 8 8 5 4 ...
 $ cubicinches: int  350 89 302 400 98 350 351 440 183 89 ...
 $ hp         : int  165 71 140 150 63 125 158 215 77 62 ...
 $ weightlbs  : num  4209 1925 3449 3761 2051 ...
 $ time.to.60 : int  12 14 11 10 17 17 13 9 20 17 ...
 $ year       : int  1972 1980 1971 1971 1978 1980 1974 1971 1980 1982 ...
 $ brand      : chr  " US." " Europe." " US." " US." ...
summary(cars)
      mpg           cylinders     cubicinches          hp       
 Min.   : 10.00   Min.   :3.00   Min.   : 68.0   Min.   : 46.0  
 1st Qu.: 16.90   1st Qu.:4.00   1st Qu.:101.0   1st Qu.: 75.0  
 Median : 22.00   Median :6.00   Median :156.0   Median : 95.0  
 Mean   : 24.99   Mean   :5.59   Mean   :201.1   Mean   :106.4  
 3rd Qu.: 29.00   3rd Qu.:8.00   3rd Qu.:302.0   3rd Qu.:138.0  
 Max.   :500.00   Max.   :8.00   Max.   :455.0   Max.   :230.0  
   weightlbs        time.to.60         year         brand          
 Min.   : 192.5   Min.   : 8.00   Min.   :1971   Length:261        
 1st Qu.:2246.0   1st Qu.:14.00   1st Qu.:1974   Class :character  
 Median :2835.0   Median :16.00   Median :1977   Mode  :character  
 Mean   :2991.4   Mean   :15.55   Mean   :1977                     
 3rd Qu.:3664.0   3rd Qu.:17.00   3rd Qu.:1980                     
 Max.   :4997.0   Max.   :25.00   Max.   :1983                     
cars$brand <- trimws(cars$brand)
cars$brand <- gsub("\\.", "", cars$brand)
cars$brand[cars$brand == "USA"] <- "US"
cars$brand[cars$brand == "France"] <- "Europe"
unique(cars$brand)
[1] "US"     "Europe" "Japan" 
find_outliers <- function(x) {
  Q1 <- quantile(x, 0.25, na.rm = TRUE)
  Q3 <- quantile(x, 0.75, na.rm = TRUE)
  IQR_val <- Q3 - Q1
  lower <- Q1 - 1.5 * IQR_val
  upper <- Q3 + 1.5 * IQR_val
  outliers <- which(x < lower | x > upper)
  return(list(index = outliers, lower = lower, upper = upper))
}
boxplot(cars$mpg, main = "Boxplot of MPG", col = "lightblue", horizontal = TRUE)

mpg_out <- find_outliers(cars$mpg)
cars[mpg_out$index, c("mpg", "brand")]
   mpg brand
43 500    US
cars <- cars[cars$mpg < 100, ]

boxplot(cars$weightlbs, main = "Boxplot of Weight (lbs)", col = "lightgreen", horizontal = TRUE)

wt_out <- find_outliers(cars$weightlbs)
cars[wt_out$index, c("weightlbs", "brand")]
[1] weightlbs brand    
<0 rows> (or 0-length row.names)
numeric_cols <- c("cylinders", "cubicinches", "hp", "time.to.60", "year")

for (col in numeric_cols) {
  cat("\nChecking", col, "for outliers:\n")
  boxplot(cars[[col]], main = paste("Boxplot of", col), col = "lightpink", horizontal = TRUE)
  out <- find_outliers(cars[[col]])
  if (length(out$index) == 0) {
    cat("No outliers found (IQR method)\n")
  } else {
    print(cars[out$index, c(col, "brand")])
  }
}

Checking cylinders for outliers:

No outliers found (IQR method)

Checking cubicinches for outliers:

No outliers found (IQR method)

Checking hp for outliers:

No outliers found (IQR method)

Checking time.to.60 for outliers:

    time.to.60  brand
8            9     US
52          25 Europe
53           8     US
107         24 Europe
165         24 Europe
198         22 Europe
203         22     US
206          9     US
236         25 Europe
245          9     US
248         22 Europe

Checking year for outliers:

No outliers found (IQR method)
summary(cars)
      mpg          cylinders      cubicinches          hp       
 Min.   :10.00   Min.   :3.000   Min.   : 68.0   Min.   : 46.0  
 1st Qu.:16.80   1st Qu.:4.000   1st Qu.:100.2   1st Qu.: 75.0  
 Median :22.00   Median :5.500   Median :156.0   Median : 95.0  
 Mean   :23.16   Mean   :5.588   Mean   :200.9   Mean   :106.3  
 3rd Qu.:28.85   3rd Qu.:8.000   3rd Qu.:302.5   3rd Qu.:138.2  
 Max.   :46.60   Max.   :8.000   Max.   :455.0   Max.   :230.0  
   weightlbs        time.to.60         year         brand          
 Min.   : 192.5   Min.   : 8.00   Min.   :1971   Length:260        
 1st Qu.:2245.8   1st Qu.:14.00   1st Qu.:1974   Class :character  
 Median :2832.5   Median :16.00   Median :1977   Mode  :character  
 Mean   :2988.9   Mean   :15.55   Mean   :1977                     
 3rd Qu.:3666.0   3rd Qu.:17.00   3rd Qu.:1980                     
 Max.   :4997.0   Max.   :25.00   Max.   :1983                     
str(cars)
'data.frame':   260 obs. of  8 variables:
 $ mpg        : num  14 31.9 17 15 30.5 23 13 14 25.4 37.7 ...
 $ cylinders  : int  8 4 8 8 4 8 8 8 5 4 ...
 $ cubicinches: int  350 89 302 400 98 350 351 440 183 89 ...
 $ hp         : int  165 71 140 150 63 125 158 215 77 62 ...
 $ weightlbs  : num  4209 1925 3449 3761 2051 ...
 $ time.to.60 : int  12 14 11 10 17 17 13 9 20 17 ...
 $ year       : int  1972 1980 1971 1971 1978 1980 1974 1971 1980 1982 ...
 $ brand      : chr  "US" "Europe" "US" "US" ...