5.5 through section 6.1

Section 5.5

1

V <- (1:100)

V [c(4, 196, 501:556)] <- "brussel sprouts"

print(V[c(2,3,4)])
## [1] "2"               "3"               "brussel sprouts"

The vector changed from a numeric vector to a character vector. The datapoints I didn’t change to brussel sprouts changed from 2 to “2”.

2

mpg <- mtcars$mpg[15:20]

mean(mpg)
## [1] 22.03333

3

olympic_athletes <- read.csv("olympic_athletes.csv")

olympicdata <- (olympic_athletes)

Wrestling <- olympicdata$Sport == "Wrestling"

sum(Wrestling)
## [1] 123
names <- olympicdata$Name[Wrestling]

head(names)
## [1] "Joaquim Vital"                 "Vasile Iorga"                 
## [3] "Davit Gochayevich Chakvetadze" "Nestoras Batzelas"            
## [5] "Sergey Yevgenyevich Artyukhin" "Guanbunima"

4

athletes_winter <- olympicdata[olympicdata$Season == "Winter", ]

5

no.medal <- olympicdata$Medal[is.na(olympicdata$Medal)] <- "No Medal"

save(no.medal, file = "STAT 158")

5.6

1

basketball <- olympicdata[olympicdata$Sport == "Basketball", ]

hist(basketball$Height, breaks = 30, main = "Olympic Basketball Player Heights", xlab = "Height (in)")

### 2

athletes_cycling <- olympicdata[olympicdata$Sport == "Cycling", ]

boxplot(athletes_cycling$Height ~ athletes_cycling$Medal, main= "Cycling Height vs Medal", xlab = "Medal", ylab = "Height")

athletes_cycling$Height[is.na(athletes_cycling$Height)] <- "No Height"

sum(athletes_cycling$Height == "No Height")
## [1] 55
sum(olympicdata$Sport == "Cycling") - 55
## [1] 150

Out of the 205 cyclists, 55 do not have their height listed. This could skew the data as to not represent the actual heights of the cyclists. Our box plots may have the wrong mean values than the actual population due to these missing values.

3

weightlifting <- olympicdata[olympicdata$Sport == "Weightlifting", ]

colors <- as.factor(weightlifting$Sex)

plot(weightlifting$Height, weightlifting$Year, pch=20, col=colors, main = "Weightlifters Height vs Year", xlab = "Height", ylab = "Year")

Women began competing in weightlifting in the olympics in the year 2000.

4

season_tab <- table(olympicdata$Season)

olympic_season <-barplot(season_tab, col = rgb(.5, .9, .8), main="Olympic Seasons", xlab = "Seasons")

Summer has more rows in the dataframe.

5

png("olympic_season.png")

6.1

1

covid <- read.csv("Provisional_COVID-19_Death_Counts_by_Sex__Age__and_State.csv", fileEncoding = "UTF-8")

covid <- covid[!grepl("Total", covid$State),]
# remove all ages category

covid <- covid[covid$Age.group != "All ages",]

2

for(i in unique(covid$Age.group))
  {
    age = i
    data <- covid$Age.group ==i
    data_df <- covid[data,]
    print(sum(data_df$Total.Deaths,na.rm=TRUE))

        
  }
## [1] 24212
## [1] 4425
## [1] 6952
## [1] 44511
## [1] 92850
## [1] 131666
## [1] 246179
## [1] 569222
## [1] 866520
## [1] 1067190
## [1] 1347181
## [1] 1466723

3

state <- table(covid$State)

barplot(state)

max_state <- table(covid$State)

sort(max_state)
## 
##              Alabama               Alaska              Arizona 
##                   22                   22                   22 
##             Arkansas           California             Colorado 
##                   22                   22                   22 
##          Connecticut             Delaware District of Columbia 
##                   22                   22                   22 
##              Florida              Georgia               Hawaii 
##                   22                   22                   22 
##                Idaho             Illinois              Indiana 
##                   22                   22                   22 
##                 Iowa               Kansas             Kentucky 
##                   22                   22                   22 
##            Louisiana                Maine             Maryland 
##                   22                   22                   22 
##        Massachusetts             Michigan            Minnesota 
##                   22                   22                   22 
##          Mississippi             Missouri              Montana 
##                   22                   22                   22 
##             Nebraska               Nevada        New Hampshire 
##                   22                   22                   22 
##           New Jersey           New Mexico             New York 
##                   22                   22                   22 
##        New York City       North Carolina         North Dakota 
##                   22                   22                   22 
##                 Ohio             Oklahoma               Oregon 
##                   22                   22                   22 
##         Pennsylvania          Puerto Rico         Rhode Island 
##                   22                   22                   22 
##       South Carolina         South Dakota            Tennessee 
##                   22                   22                   22 
##                Texas                 Utah              Vermont 
##                   22                   22                   22 
##             Virginia           Washington        West Virginia 
##                   22                   22                   22 
##            Wisconsin              Wyoming        United States 
##                   22                   22                   34
unitedstates <- covid[covid$State == "United States", ]

head(unitedstates)
##   Data.as.of Start.week   End.Week         State Sex    Age.group
## 1 07/22/2020 02/01/2020 07/11/2020 United States All Under 1 year
## 2 07/22/2020 02/01/2020 07/11/2020 United States All    1-4 years
## 3 07/22/2020 02/01/2020 07/11/2020 United States All   5-14 years
## 4 07/22/2020 02/01/2020 07/11/2020 United States All  15-24 years
## 5 07/22/2020 02/01/2020 07/11/2020 United States All  25-34 years
## 6 07/22/2020 02/01/2020 07/11/2020 United States All  35-44 years
##   COVID.19.Deaths Total.Deaths Pneumonia.Deaths Pneumonia.and.COVID.19.Deaths
## 1              11         8072               75                             2
## 2               9         1547               52                             2
## 3              16         2379               78                             5
## 4             190        14810              300                            62
## 5             935        30885             1113                           416
## 6            2411        43783             2433                          1009
##   Influenza.Deaths Pneumonia..Influenza..or.COVID.19.Deaths Footnote
## 1               14                                       98         
## 2               41                                      100         
## 3               49                                      138         
## 4               51                                      475         
## 5              149                                     1768         
## 6              242                                     4048

The state with the most observations is “The United States”. This is not concerning as these seem to be averages across all genders and over an extended period of time.

4

for(i in unique(covid$State))
{
  statevage <- c(covid$State == i & covid$Age.group == "45-54 years")
  
  statevage_df <- covid[statevage, ]
  
  female <- statevage_df$Sex == "Female"
  male <- statevage_df$Sex == "Male"
  
  female_df <- statevage_df[female, ]
  male_df <- statevage_df[male, ]
  
  fsum <- sum(female_df$Total.Deaths)
  msum <- sum(male_df$Total.Deaths)
  
  if(fsum>msum)
  {cat("There are more female deaths in", i, "\n")}
  else
  {cat("There are not more female deaths in", i, "\n")}
}
## There are not more female deaths in United States 
## There are not more female deaths in Alabama 
## There are not more female deaths in Alaska 
## There are not more female deaths in Arizona 
## There are not more female deaths in Arkansas 
## There are not more female deaths in California 
## There are not more female deaths in Colorado 
## There are not more female deaths in Connecticut 
## There are not more female deaths in Delaware 
## There are not more female deaths in District of Columbia 
## There are not more female deaths in Florida 
## There are not more female deaths in Georgia 
## There are not more female deaths in Hawaii 
## There are not more female deaths in Idaho 
## There are not more female deaths in Illinois 
## There are not more female deaths in Indiana 
## There are not more female deaths in Iowa 
## There are not more female deaths in Kansas 
## There are not more female deaths in Kentucky 
## There are not more female deaths in Louisiana 
## There are not more female deaths in Maine 
## There are not more female deaths in Maryland 
## There are not more female deaths in Massachusetts 
## There are not more female deaths in Michigan 
## There are not more female deaths in Minnesota 
## There are not more female deaths in Mississippi 
## There are not more female deaths in Missouri 
## There are not more female deaths in Montana 
## There are not more female deaths in Nebraska 
## There are not more female deaths in Nevada 
## There are not more female deaths in New Hampshire 
## There are not more female deaths in New Jersey 
## There are not more female deaths in New Mexico 
## There are not more female deaths in New York 
## There are not more female deaths in New York City 
## There are not more female deaths in North Carolina 
## There are not more female deaths in North Dakota 
## There are not more female deaths in Ohio 
## There are not more female deaths in Oklahoma 
## There are not more female deaths in Oregon 
## There are not more female deaths in Pennsylvania 
## There are not more female deaths in Rhode Island 
## There are not more female deaths in South Carolina 
## There are not more female deaths in South Dakota 
## There are not more female deaths in Tennessee 
## There are not more female deaths in Texas 
## There are not more female deaths in Utah 
## There are not more female deaths in Vermont 
## There are not more female deaths in Virginia 
## There are not more female deaths in Washington 
## There are not more female deaths in West Virginia 
## There are not more female deaths in Wisconsin 
## There are not more female deaths in Wyoming 
## There are not more female deaths in Puerto Rico