Statistical Computing 3

Cars2020 <- read.csv("~/Documents/R code for Stats/Cars2020.csv")
str(Cars2020)

## 'data.frame':    110 obs. of  21 variables:
##  $ Make     : chr  "Acura" "Acura" "Audi" "Audi" ...
##  $ Model    : chr  "MDX" "RLX" "A3" "A4" ...
##  $ Type     : chr  "SUV" "Sedan" "Sedan" "Sporty" ...
##  $ LowPrice : num  44.4 54.9 33.3 37.4 54.9 ...
##  $ HighPrice: num  60.1 61 43 45.7 73.9 ...
##  $ CityMPG  : int  14 15 18 18 17 20 15 18 19 16 ...
##  $ HwyMPG   : int  31 36 40 40 39 27 33 35 44 40 ...
##  $ Seating  : int  7 5 5 5 5 5 5 4 5 5 ...
##  $ Drive    : chr  "AWD" "AWD" "AWD" "AWD" ...
##  $ Acc030   : num  2.8 2.7 3.2 2.7 2.8 2.4 3.2 2.5 2.6 2.9 ...
##  $ Acc060   : num  6.8 6.5 8.3 6.3 6.8 6.1 7.8 6.7 6.4 7.2 ...
##  $ QtrMile  : num  15.3 15 16.4 14.9 15.3 14.5 16.1 14.8 14.8 15.5 ...
##  $ Braking  : int  135 128 124 135 129 133 126 113 129 130 ...
##  $ FuelCap  : num  19.5 18.5 13.2 15.3 19.3 21.7 15.9 14.5 15.6 17.9 ...
##  $ Length   : int  196 198 175 186 195 209 177 165 186 195 ...
##  $ Width    : int  77 74 70 73 74 77 73 72 72 74 ...
##  $ Height   : int  67 58 56 56 57 59 63 53 57 58 ...
##  $ Wheelbase: int  111 112 104 111 115 123 106 99 112 117 ...
##  $ UTurn    : int  40 40 37 40 38 43 40 36 41 42 ...
##  $ Weight   : int  4200 3930 3135 3630 4015 4810 3880 3140 3640 3950 ...
##  $ Size     : chr  "Midsized" "Midsized" "Small" "Small" ...

hist(Cars2020$CityMPG,
     col = 'magenta', 
     main = 'Frequency of Miles Per Gallon per Car',
     xlab = 'Miles Per Gallon',
     ylab = 'Frequency')

Drive <- table(Cars2020$Drive)
barplot(Drive,
        col = 'green',
        main = 'Frequency of Drive Type',
        xlab = 'Drive Type',
        ylab = 'Frequency')

hist(Cars2020$LowPrice,
     col = 'blue',
     main = "Low Price of Cars in 2020", 
     xlab = "Low Price")

Type <- table(Cars2020$Type)
barplot(Type,
        col = 'pink',
        main = "Frequency of Types of Cars",
        xlab = "Car Type", 
        ylab = "Frequency")

The average for CityMPG is 16.19091.
The median for CityMPG is 15.50.
The standard deviation for CityMPG is 3.74042.
The IQR for CityMPG is 5.75.
The range for CityMPG is 10 to 28.
The 5th percentile is 11 for CityMPG.
The 95th percentile is 22.55 for CityMPG.

mean(Cars2020$CityMPG)

## [1] 16.19091

median(Cars2020$CityMPG)

## [1] 15.5

sd(Cars2020$CityMPG)

## [1] 3.74042

IQR(Cars2020$CityMPG)

## [1] 5.75

range(Cars2020$CityMPG)

## [1] 10 28

quantile(Cars2020$CityMPG, .05)

## 5% 
## 11

quantile(Cars2020$CityMPG, .95)

##   95% 
## 22.55

There are 4 Hatchbacks, 4 Minivans, 38 Sedans, 11 Sportys, 50 SUVs, and 3 Wagons in this data set.
That means that:
- 3.636364% are Hatchbacks
- 3.636364% are Minivans
- 34.545455 are Sedans
- 10.0% are Sportys
- 45.454545 are SUVs
- 2.727273% are Wagons

#(i)
Type

## 
## Hatchback   Minivan     Sedan    Sporty       SUV     Wagon 
##         4         4        38        11        50         3

#(ii)
Type/sum(Type)*100

## 
## Hatchback   Minivan     Sedan    Sporty       SUV     Wagon 
##  3.636364  3.636364 34.545455 10.000000 45.454545  2.727273

Hurricanes2018 <- read.csv("Documents/R code for Stats/Hurricanes2018.csv")
str(Hurricanes2018)

## 'data.frame':    105 obs. of  2 variables:
##  $ Year      : int  1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 ...
##  $ Hurricanes: int  0 5 10 2 4 2 4 5 3 4 ...

plot(Hurricanes2018, 
     type = "o",
     ylab = "Number of Hurricanes",
     xlab = "Year",
     col = "purple",
     main = "Number of Hurricanes over the Years")

HollywoodMoviesReal <- read.csv("Documents/R code for Stats/HollywoodMoviesReal.csv")
View(HollywoodMoviesReal)

unique(HollywoodMoviesReal$Genre)

##  [1] "Action"      "Animation"   "Adventure"   "Thriller"    "Comedy"     
##  [6] "Musical"     "Drama"       "Biography"   "Horror"      "Romance"    
## [11] "Fantasy"     "Documentary" "Crime"       ""            "Mystery"

Three_Genres <- subset(HollywoodMoviesReal, Genre %in% c("Action", "Drama", "Romance"))
noNA <- Three_Genres[!is.na(Three_Genres$DomesticGross),]
boxplot(DomesticGross ~ Genre, data = noNA,
        col = c('pink', "blue", "purple"),
        main = "Domestic Gross by Genre",
        ylab = "Domestic Gross ($million)",
        xlab = "Genre")

mlb2011 <- read.csv("Documents/R code for Stats/mlb2011.csv")
View(mlb2011)

Clean_Mlb2011 <- na.omit(mlb2011)
AL_Mlb2011 <- Clean_Mlb2011[(Clean_Mlb2011$LG == "AL") & (Clean_Mlb2011$GS >= 10) , ]
NL_mlb2011 <- Clean_Mlb2011[(Clean_Mlb2011$LG == "NL") & (Clean_Mlb2011$GS >= 10) , ]
atleast_10GS <- Clean_Mlb2011[(Clean_Mlb2011$GS >= 10) , ]
plot(atleast_10GS$W ~ atleast_10GS$SOA,
     type = "n",
     ylim = range(c(NL_mlb2011$W , AL_Mlb2011$W)),
     xlim = range(c(NL_mlb2011$SOA , AL_Mlb2011$SOA)),
     ylab = "Wins",
     xlab = "Strikeouts", 
     main = "Wins and Strikeouts for the NL and AL who Started at Least 10 Times")
points(NL_mlb2011$W ~ NL_mlb2011$SOA,
       col = "#c91589",
       pch = 25)
points(AL_Mlb2011$W ~ AL_Mlb2011$SOA,
       col = "#a1efd5",
       pch = 17)
abline(a = 1.966, b = 0.059, col = "#c91589", lwd = 1)
abline(a = 0.945, b = 0.073, col = "#a1efd5", lwd = 1)
legend(x = 10, y = 24,
       legend = c("NL","NL Regression", "AL", "AL Regression"),
       col = c("#c91589", "#c91589", "#a1efd5","#a1efd5"),
       pch = c(25, NA, 17, NA),  
       lty = c(NA, 1, NA, 1),   
       lwd = c(NA, 1, NA, 1),  
       bty = "n",
       title = "Legend"
       )

Statistical Computing 3

Kyra Ligas

2025-03-19