#install and load the required packages
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# read the data from the xlxs file
library(readxl)
Week_2_HW1 <- read_excel("C:/Users/Roxana/OneDrive - Latino Commission on AIDS/Documents/UTSA Adolph Delgado/Week 2 HW1.xlsx")
#View(Week_2_HW1)
#remove the specified columns
Week_2_HW1_cleaned <- Week_2_HW1 %>%
select(-c("Indicator", "Year", "FIPS", "Age Group", "Race/Ethnicity", "Sex", "Transmission Category"))
#View(Week_2_HW1_cleaned)
#calculate the rate and create a new column with it
Week_2_HW1_rates_Guzman <- Week_2_HW1_cleaned %>%
mutate(Rate = (Cases/Population) *100000)
head(Week_2_HW1_rates_Guzman)
## # A tibble: 6 × 5
## Geography Cases Percent Population Rate
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Alabama 13992 84.4 16600 84289.
## 2 Alaska 721 84.9 850 84824.
## 3 Arizona 17472 84.3 20700 84406.
## 4 Arkansas 5948 79.8 7500 79307.
## 5 California 133497 87.3 152800 87367.
## 6 Colorado 13031 87.7 14900 87456.
#view the data frame with the new rate column
print(Week_2_HW1_rates_Guzman)
## # A tibble: 51 × 5
## Geography Cases Percent Population Rate
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Alabama 13992 84.4 16600 84289.
## 2 Alaska 721 84.9 850 84824.
## 3 Arizona 17472 84.3 20700 84406.
## 4 Arkansas 5948 79.8 7500 79307.
## 5 California 133497 87.3 152800 87367.
## 6 Colorado 13031 87.7 14900 87456.
## 7 Connecticut 10391 91.7 11300 91956.
## 8 Delaware 3381 87.5 3900 86692.
## 9 District of Columbia 13655 94 14500 94172.
## 10 Florida 113398 86.7 130900 86629.
## # ℹ 41 more rows