1.

Cars2020 <- read.csv("~/Documents/R code for Stats/Cars2020.csv")
str(Cars2020)
## 'data.frame':    110 obs. of  21 variables:
##  $ Make     : chr  "Acura" "Acura" "Audi" "Audi" ...
##  $ Model    : chr  "MDX" "RLX" "A3" "A4" ...
##  $ Type     : chr  "SUV" "Sedan" "Sedan" "Sporty" ...
##  $ LowPrice : num  44.4 54.9 33.3 37.4 54.9 ...
##  $ HighPrice: num  60.1 61 43 45.7 73.9 ...
##  $ CityMPG  : int  14 15 18 18 17 20 15 18 19 16 ...
##  $ HwyMPG   : int  31 36 40 40 39 27 33 35 44 40 ...
##  $ Seating  : int  7 5 5 5 5 5 5 4 5 5 ...
##  $ Drive    : chr  "AWD" "AWD" "AWD" "AWD" ...
##  $ Acc030   : num  2.8 2.7 3.2 2.7 2.8 2.4 3.2 2.5 2.6 2.9 ...
##  $ Acc060   : num  6.8 6.5 8.3 6.3 6.8 6.1 7.8 6.7 6.4 7.2 ...
##  $ QtrMile  : num  15.3 15 16.4 14.9 15.3 14.5 16.1 14.8 14.8 15.5 ...
##  $ Braking  : int  135 128 124 135 129 133 126 113 129 130 ...
##  $ FuelCap  : num  19.5 18.5 13.2 15.3 19.3 21.7 15.9 14.5 15.6 17.9 ...
##  $ Length   : int  196 198 175 186 195 209 177 165 186 195 ...
##  $ Width    : int  77 74 70 73 74 77 73 72 72 74 ...
##  $ Height   : int  67 58 56 56 57 59 63 53 57 58 ...
##  $ Wheelbase: int  111 112 104 111 115 123 106 99 112 117 ...
##  $ UTurn    : int  40 40 37 40 38 43 40 36 41 42 ...
##  $ Weight   : int  4200 3930 3135 3630 4015 4810 3880 3140 3640 3950 ...
##  $ Size     : chr  "Midsized" "Midsized" "Small" "Small" ...

a.

hist(Cars2020$CityMPG,
     col = 'magenta', 
     main = 'Frequency of Miles Per Gallon per Car',
     xlab = 'Miles Per Gallon',
     ylab = 'Frequency')

Drive <- table(Cars2020$Drive)
barplot(Drive,
        col = 'green',
        main = 'Frequency of Drive Type',
        xlab = 'Drive Type',
        ylab = 'Frequency')

hist(Cars2020$LowPrice,
     col = 'blue',
     main = "Low Price of Cars in 2020", 
     xlab = "Low Price")

Type <- table(Cars2020$Type)
barplot(Type,
        col = 'pink',
        main = "Frequency of Types of Cars",
        xlab = "Car Type", 
        ylab = "Frequency")

b.

mean(Cars2020$CityMPG)
## [1] 16.19091
median(Cars2020$CityMPG)
## [1] 15.5
sd(Cars2020$CityMPG)
## [1] 3.74042
IQR(Cars2020$CityMPG)
## [1] 5.75
range(Cars2020$CityMPG)
## [1] 10 28
quantile(Cars2020$CityMPG, .05)
## 5% 
## 11
quantile(Cars2020$CityMPG, .95)
##   95% 
## 22.55

c.

#(i)
Type
## 
## Hatchback   Minivan     Sedan    Sporty       SUV     Wagon 
##         4         4        38        11        50         3
#(ii)
Type/sum(Type)*100
## 
## Hatchback   Minivan     Sedan    Sporty       SUV     Wagon 
##  3.636364  3.636364 34.545455 10.000000 45.454545  2.727273

2.

Hurricanes2018 <- read.csv("Documents/R code for Stats/Hurricanes2018.csv")
str(Hurricanes2018)
## 'data.frame':    105 obs. of  2 variables:
##  $ Year      : int  1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 ...
##  $ Hurricanes: int  0 5 10 2 4 2 4 5 3 4 ...
plot(Hurricanes2018, 
     type = "o",
     ylab = "Number of Hurricanes",
     xlab = "Year",
     col = "purple",
     main = "Number of Hurricanes over the Years")

3.

HollywoodMoviesReal <- read.csv("Documents/R code for Stats/HollywoodMoviesReal.csv")
View(HollywoodMoviesReal)

unique(HollywoodMoviesReal$Genre)
##  [1] "Action"      "Animation"   "Adventure"   "Thriller"    "Comedy"     
##  [6] "Musical"     "Drama"       "Biography"   "Horror"      "Romance"    
## [11] "Fantasy"     "Documentary" "Crime"       ""            "Mystery"
Three_Genres <- subset(HollywoodMoviesReal, Genre %in% c("Action", "Drama", "Romance"))
noNA <- Three_Genres[!is.na(Three_Genres$DomesticGross),]
boxplot(DomesticGross ~ Genre, data = noNA,
        col = c('pink', "blue", "purple"),
        main = "Domestic Gross by Genre",
        ylab = "Domestic Gross ($million)",
        xlab = "Genre") 

4.

mlb2011 <- read.csv("Documents/R code for Stats/mlb2011.csv")
View(mlb2011)

Clean_Mlb2011 <- na.omit(mlb2011)
AL_Mlb2011 <- Clean_Mlb2011[(Clean_Mlb2011$LG == "AL") & (Clean_Mlb2011$GS >= 10) , ]
NL_mlb2011 <- Clean_Mlb2011[(Clean_Mlb2011$LG == "NL") & (Clean_Mlb2011$GS >= 10) , ]
atleast_10GS <- Clean_Mlb2011[(Clean_Mlb2011$GS >= 10) , ]
plot(atleast_10GS$W ~ atleast_10GS$SOA,
     type = "n",
     ylim = range(c(NL_mlb2011$W , AL_Mlb2011$W)),
     xlim = range(c(NL_mlb2011$SOA , AL_Mlb2011$SOA)),
     ylab = "Wins",
     xlab = "Strikeouts", 
     main = "Wins and Strikeouts for the NL and AL who Started at Least 10 Times")
points(NL_mlb2011$W ~ NL_mlb2011$SOA,
       col = "#c91589",
       pch = 25)
points(AL_Mlb2011$W ~ AL_Mlb2011$SOA,
       col = "#a1efd5",
       pch = 17)
abline(a = 1.966, b = 0.059, col = "#c91589", lwd = 1)
abline(a = 0.945, b = 0.073, col = "#a1efd5", lwd = 1)
legend(x = 10, y = 24,
       legend = c("NL","NL Regression", "AL", "AL Regression"),
       col = c("#c91589", "#c91589", "#a1efd5","#a1efd5"),
       pch = c(25, NA, 17, NA),  
       lty = c(NA, 1, NA, 1),   
       lwd = c(NA, 1, NA, 1),  
       bty = "n",
       title = "Legend"
       )