5.5 through section 6.1
V <- (1:100)
V [c(4, 196, 501:556)] <- "brussel sprouts"
print(V[c(2,3,4)])
## [1] "2" "3" "brussel sprouts"
The vector changed from a numeric vector to a character vector. The datapoints I didn’t change to brussel sprouts changed from 2 to “2”.
mpg <- mtcars$mpg[15:20]
mean(mpg)
## [1] 22.03333
olympic_athletes <- read.csv("olympic_athletes.csv")
olympicdata <- (olympic_athletes)
Wrestling <- olympicdata$Sport == "Wrestling"
sum(Wrestling)
## [1] 123
names <- olympicdata$Name[Wrestling]
head(names)
## [1] "Joaquim Vital" "Vasile Iorga"
## [3] "Davit Gochayevich Chakvetadze" "Nestoras Batzelas"
## [5] "Sergey Yevgenyevich Artyukhin" "Guanbunima"
athletes_winter <- olympicdata[olympicdata$Season == "Winter", ]
no.medal <- olympicdata$Medal[is.na(olympicdata$Medal)] <- "No Medal"
save(no.medal, file = "STAT 158")
basketball <- olympicdata[olympicdata$Sport == "Basketball", ]
hist(basketball$Height, breaks = 30, main = "Olympic Basketball Player Heights", xlab = "Height (in)")
### 2
athletes_cycling <- olympicdata[olympicdata$Sport == "Cycling", ]
boxplot(athletes_cycling$Height ~ athletes_cycling$Medal, main= "Cycling Height vs Medal", xlab = "Medal", ylab = "Height")
athletes_cycling$Height[is.na(athletes_cycling$Height)] <- "No Height"
sum(athletes_cycling$Height == "No Height")
## [1] 55
sum(olympicdata$Sport == "Cycling") - 55
## [1] 150
Out of the 205 cyclists, 55 do not have their height listed. This could skew the data as to not represent the actual heights of the cyclists. Our box plots may have the wrong mean values than the actual population due to these missing values.
weightlifting <- olympicdata[olympicdata$Sport == "Weightlifting", ]
colors <- as.factor(weightlifting$Sex)
plot(weightlifting$Height, weightlifting$Year, pch=20, col=colors, main = "Weightlifters Height vs Year", xlab = "Height", ylab = "Year")
Women began competing in weightlifting in the olympics in the year 2000.
season_tab <- table(olympicdata$Season)
olympic_season <-barplot(season_tab, col = rgb(.5, .9, .8), main="Olympic Seasons", xlab = "Seasons")
Summer has more rows in the dataframe.
png("olympic_season.png")
covid <- read.csv("Provisional_COVID-19_Death_Counts_by_Sex__Age__and_State.csv", fileEncoding = "UTF-8")
covid <- covid[!grepl("Total", covid$State),]
# remove all ages category
covid <- covid[covid$Age.group != "All ages",]
for(i in unique(covid$Age.group))
{
age = i
data <- covid$Age.group ==i
data_df <- covid[data,]
print(sum(data_df$Total.Deaths,na.rm=TRUE))
}
## [1] 24212
## [1] 4425
## [1] 6952
## [1] 44511
## [1] 92850
## [1] 131666
## [1] 246179
## [1] 569222
## [1] 866520
## [1] 1067190
## [1] 1347181
## [1] 1466723
state <- table(covid$State)
barplot(state)
max_state <- table(covid$State)
sort(max_state)
##
## Alabama Alaska Arizona
## 22 22 22
## Arkansas California Colorado
## 22 22 22
## Connecticut Delaware District of Columbia
## 22 22 22
## Florida Georgia Hawaii
## 22 22 22
## Idaho Illinois Indiana
## 22 22 22
## Iowa Kansas Kentucky
## 22 22 22
## Louisiana Maine Maryland
## 22 22 22
## Massachusetts Michigan Minnesota
## 22 22 22
## Mississippi Missouri Montana
## 22 22 22
## Nebraska Nevada New Hampshire
## 22 22 22
## New Jersey New Mexico New York
## 22 22 22
## New York City North Carolina North Dakota
## 22 22 22
## Ohio Oklahoma Oregon
## 22 22 22
## Pennsylvania Puerto Rico Rhode Island
## 22 22 22
## South Carolina South Dakota Tennessee
## 22 22 22
## Texas Utah Vermont
## 22 22 22
## Virginia Washington West Virginia
## 22 22 22
## Wisconsin Wyoming United States
## 22 22 34
unitedstates <- covid[covid$State == "United States", ]
head(unitedstates)
## Data.as.of Start.week End.Week State Sex Age.group
## 1 07/22/2020 02/01/2020 07/11/2020 United States All Under 1 year
## 2 07/22/2020 02/01/2020 07/11/2020 United States All 1-4 years
## 3 07/22/2020 02/01/2020 07/11/2020 United States All 5-14 years
## 4 07/22/2020 02/01/2020 07/11/2020 United States All 15-24 years
## 5 07/22/2020 02/01/2020 07/11/2020 United States All 25-34 years
## 6 07/22/2020 02/01/2020 07/11/2020 United States All 35-44 years
## COVID.19.Deaths Total.Deaths Pneumonia.Deaths Pneumonia.and.COVID.19.Deaths
## 1 11 8072 75 2
## 2 9 1547 52 2
## 3 16 2379 78 5
## 4 190 14810 300 62
## 5 935 30885 1113 416
## 6 2411 43783 2433 1009
## Influenza.Deaths Pneumonia..Influenza..or.COVID.19.Deaths Footnote
## 1 14 98
## 2 41 100
## 3 49 138
## 4 51 475
## 5 149 1768
## 6 242 4048
The state with the most observations is “The United States”. This is not concerning as these seem to be averages across all genders and over an extended period of time.
for(i in unique(covid$State))
{
statevage <- c(covid$State == i & covid$Age.group == "45-54 years")
statevage_df <- covid[statevage, ]
female <- statevage_df$Sex == "Female"
male <- statevage_df$Sex == "Male"
female_df <- statevage_df[female, ]
male_df <- statevage_df[male, ]
fsum <- sum(female_df$Total.Deaths)
msum <- sum(male_df$Total.Deaths)
if(fsum>msum)
{cat("There are more female deaths in", i, "\n")}
else
{cat("There are not more female deaths in", i, "\n")}
}
## There are not more female deaths in United States
## There are not more female deaths in Alabama
## There are not more female deaths in Alaska
## There are not more female deaths in Arizona
## There are not more female deaths in Arkansas
## There are not more female deaths in California
## There are not more female deaths in Colorado
## There are not more female deaths in Connecticut
## There are not more female deaths in Delaware
## There are not more female deaths in District of Columbia
## There are not more female deaths in Florida
## There are not more female deaths in Georgia
## There are not more female deaths in Hawaii
## There are not more female deaths in Idaho
## There are not more female deaths in Illinois
## There are not more female deaths in Indiana
## There are not more female deaths in Iowa
## There are not more female deaths in Kansas
## There are not more female deaths in Kentucky
## There are not more female deaths in Louisiana
## There are not more female deaths in Maine
## There are not more female deaths in Maryland
## There are not more female deaths in Massachusetts
## There are not more female deaths in Michigan
## There are not more female deaths in Minnesota
## There are not more female deaths in Mississippi
## There are not more female deaths in Missouri
## There are not more female deaths in Montana
## There are not more female deaths in Nebraska
## There are not more female deaths in Nevada
## There are not more female deaths in New Hampshire
## There are not more female deaths in New Jersey
## There are not more female deaths in New Mexico
## There are not more female deaths in New York
## There are not more female deaths in New York City
## There are not more female deaths in North Carolina
## There are not more female deaths in North Dakota
## There are not more female deaths in Ohio
## There are not more female deaths in Oklahoma
## There are not more female deaths in Oregon
## There are not more female deaths in Pennsylvania
## There are not more female deaths in Rhode Island
## There are not more female deaths in South Carolina
## There are not more female deaths in South Dakota
## There are not more female deaths in Tennessee
## There are not more female deaths in Texas
## There are not more female deaths in Utah
## There are not more female deaths in Vermont
## There are not more female deaths in Virginia
## There are not more female deaths in Washington
## There are not more female deaths in West Virginia
## There are not more female deaths in Wisconsin
## There are not more female deaths in Wyoming
## There are not more female deaths in Puerto Rico