1.
Cars2020 <- read.csv("~/Documents/R code for Stats/Cars2020.csv")
str(Cars2020)
## 'data.frame': 110 obs. of 21 variables:
## $ Make : chr "Acura" "Acura" "Audi" "Audi" ...
## $ Model : chr "MDX" "RLX" "A3" "A4" ...
## $ Type : chr "SUV" "Sedan" "Sedan" "Sporty" ...
## $ LowPrice : num 44.4 54.9 33.3 37.4 54.9 ...
## $ HighPrice: num 60.1 61 43 45.7 73.9 ...
## $ CityMPG : int 14 15 18 18 17 20 15 18 19 16 ...
## $ HwyMPG : int 31 36 40 40 39 27 33 35 44 40 ...
## $ Seating : int 7 5 5 5 5 5 5 4 5 5 ...
## $ Drive : chr "AWD" "AWD" "AWD" "AWD" ...
## $ Acc030 : num 2.8 2.7 3.2 2.7 2.8 2.4 3.2 2.5 2.6 2.9 ...
## $ Acc060 : num 6.8 6.5 8.3 6.3 6.8 6.1 7.8 6.7 6.4 7.2 ...
## $ QtrMile : num 15.3 15 16.4 14.9 15.3 14.5 16.1 14.8 14.8 15.5 ...
## $ Braking : int 135 128 124 135 129 133 126 113 129 130 ...
## $ FuelCap : num 19.5 18.5 13.2 15.3 19.3 21.7 15.9 14.5 15.6 17.9 ...
## $ Length : int 196 198 175 186 195 209 177 165 186 195 ...
## $ Width : int 77 74 70 73 74 77 73 72 72 74 ...
## $ Height : int 67 58 56 56 57 59 63 53 57 58 ...
## $ Wheelbase: int 111 112 104 111 115 123 106 99 112 117 ...
## $ UTurn : int 40 40 37 40 38 43 40 36 41 42 ...
## $ Weight : int 4200 3930 3135 3630 4015 4810 3880 3140 3640 3950 ...
## $ Size : chr "Midsized" "Midsized" "Small" "Small" ...
a.
hist(Cars2020$CityMPG,
col = 'magenta',
main = 'Frequency of Miles Per Gallon per Car',
xlab = 'Miles Per Gallon',
ylab = 'Frequency')
Drive <- table(Cars2020$Drive)
barplot(Drive,
col = 'green',
main = 'Frequency of Drive Type',
xlab = 'Drive Type',
ylab = 'Frequency')
hist(Cars2020$LowPrice,
col = 'blue',
main = "Low Price of Cars in 2020",
xlab = "Low Price")
Type <- table(Cars2020$Type)
barplot(Type,
col = 'pink',
main = "Frequency of Types of Cars",
xlab = "Car Type",
ylab = "Frequency")
b.
mean(Cars2020$CityMPG)
## [1] 16.19091
median(Cars2020$CityMPG)
## [1] 15.5
sd(Cars2020$CityMPG)
## [1] 3.74042
IQR(Cars2020$CityMPG)
## [1] 5.75
range(Cars2020$CityMPG)
## [1] 10 28
quantile(Cars2020$CityMPG, .05)
## 5%
## 11
quantile(Cars2020$CityMPG, .95)
## 95%
## 22.55
c.
#(i)
Type
##
## Hatchback Minivan Sedan Sporty SUV Wagon
## 4 4 38 11 50 3
#(ii)
Type/sum(Type)*100
##
## Hatchback Minivan Sedan Sporty SUV Wagon
## 3.636364 3.636364 34.545455 10.000000 45.454545 2.727273
2.
Hurricanes2018 <- read.csv("Documents/R code for Stats/Hurricanes2018.csv")
str(Hurricanes2018)
## 'data.frame': 105 obs. of 2 variables:
## $ Year : int 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 ...
## $ Hurricanes: int 0 5 10 2 4 2 4 5 3 4 ...
plot(Hurricanes2018,
type = "o",
ylab = "Number of Hurricanes",
xlab = "Year",
col = "purple",
main = "Number of Hurricanes over the Years")
3.
HollywoodMoviesReal <- read.csv("Documents/R code for Stats/HollywoodMoviesReal.csv")
View(HollywoodMoviesReal)
unique(HollywoodMoviesReal$Genre)
## [1] "Action" "Animation" "Adventure" "Thriller" "Comedy"
## [6] "Musical" "Drama" "Biography" "Horror" "Romance"
## [11] "Fantasy" "Documentary" "Crime" "" "Mystery"
Three_Genres <- subset(HollywoodMoviesReal, Genre %in% c("Action", "Drama", "Romance"))
noNA <- Three_Genres[!is.na(Three_Genres$DomesticGross),]
boxplot(DomesticGross ~ Genre, data = noNA,
col = c('pink', "blue", "purple"),
main = "Domestic Gross by Genre",
ylab = "Domestic Gross ($million)",
xlab = "Genre")
4.
mlb2011 <- read.csv("Documents/R code for Stats/mlb2011.csv")
View(mlb2011)
Clean_Mlb2011 <- na.omit(mlb2011)
AL_Mlb2011 <- Clean_Mlb2011[(Clean_Mlb2011$LG == "AL") & (Clean_Mlb2011$GS >= 10) , ]
NL_mlb2011 <- Clean_Mlb2011[(Clean_Mlb2011$LG == "NL") & (Clean_Mlb2011$GS >= 10) , ]
atleast_10GS <- Clean_Mlb2011[(Clean_Mlb2011$GS >= 10) , ]
plot(atleast_10GS$W ~ atleast_10GS$SOA,
type = "n",
ylim = range(c(NL_mlb2011$W , AL_Mlb2011$W)),
xlim = range(c(NL_mlb2011$SOA , AL_Mlb2011$SOA)),
ylab = "Wins",
xlab = "Strikeouts",
main = "Wins and Strikeouts for the NL and AL who Started at Least 10 Times")
points(NL_mlb2011$W ~ NL_mlb2011$SOA,
col = "#c91589",
pch = 25)
points(AL_Mlb2011$W ~ AL_Mlb2011$SOA,
col = "#a1efd5",
pch = 17)
abline(a = 1.966, b = 0.059, col = "#c91589", lwd = 1)
abline(a = 0.945, b = 0.073, col = "#a1efd5", lwd = 1)
legend(x = 10, y = 24,
legend = c("NL","NL Regression", "AL", "AL Regression"),
col = c("#c91589", "#c91589", "#a1efd5","#a1efd5"),
pch = c(25, NA, 17, NA),
lty = c(NA, 1, NA, 1),
lwd = c(NA, 1, NA, 1),
bty = "n",
title = "Legend"
)