library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Set the file path to your Excel file
file_path <- "C:/Users/bex/Desktop/R Workshop/Homeworks/Week 3/Data Manipulation with Dplyr (Week 3).xlsx"
# Read the Excel sheet into R
data <- read_excel(file_path)
## Warning: Expecting numeric in I2284 / R2284C9: got 'NA'
## Warning: Expecting numeric in I2285 / R2285C9: got 'NA'
## Warning: Expecting numeric in I2286 / R2286C9: got 'NA'
## Warning: Expecting numeric in I2287 / R2287C9: got 'NA'
## Warning: Expecting numeric in I2288 / R2288C9: got 'NA'
## Warning: Expecting numeric in I2289 / R2289C9: got 'NA'
## Warning: Expecting numeric in I2290 / R2290C9: got 'NA'
## Warning: Expecting numeric in I2291 / R2291C9: got 'NA'
## Warning: Expecting numeric in I2292 / R2292C9: got 'NA'
## Warning: Expecting numeric in I2293 / R2293C9: got 'NA'
## Warning: Expecting numeric in I2294 / R2294C9: got 'NA'
## Warning: Expecting numeric in I2295 / R2295C9: got 'NA'
## Warning: Expecting numeric in I2296 / R2296C9: got 'NA'
## Warning: Expecting numeric in I2297 / R2297C9: got 'NA'
## Warning: Expecting numeric in I2298 / R2298C9: got 'NA'
## Warning: Expecting numeric in I2299 / R2299C9: got 'NA'
## Warning: Expecting numeric in I2300 / R2300C9: got 'NA'
## Warning: Expecting numeric in I2301 / R2301C9: got 'NA'
## Warning: Expecting numeric in I2302 / R2302C9: got 'NA'
## Warning: Expecting numeric in I2303 / R2303C9: got 'NA'
## Warning: Expecting numeric in I2304 / R2304C9: got 'NA'
## Warning: Expecting numeric in I2305 / R2305C9: got 'NA'
## Warning: Expecting numeric in I2306 / R2306C9: got 'NA'
## Warning: Expecting numeric in I2307 / R2307C9: got 'NA'
## Warning: Expecting numeric in I2308 / R2308C9: got 'NA'
## Warning: Expecting numeric in I2309 / R2309C9: got 'NA'
## Warning: Expecting numeric in I2310 / R2310C9: got 'NA'
## Warning: Expecting numeric in I2311 / R2311C9: got 'NA'
## Warning: Expecting numeric in I4216 / R4216C9: got 'NA'
## Warning: Expecting numeric in I4217 / R4217C9: got 'NA'
## Warning: Expecting numeric in I4218 / R4218C9: got 'NA'
## Warning: Expecting numeric in I4219 / R4219C9: got 'NA'
## Warning: Expecting numeric in I4220 / R4220C9: got 'NA'
## Warning: Expecting numeric in I4221 / R4221C9: got 'NA'
## Warning: Expecting numeric in I4222 / R4222C9: got 'NA'
## Warning: Expecting numeric in I4223 / R4223C9: got 'NA'
## Warning: Expecting numeric in I4224 / R4224C9: got 'NA'
## Warning: Expecting numeric in I4225 / R4225C9: got 'NA'
## Warning: Expecting numeric in I4226 / R4226C9: got 'NA'
## Warning: Expecting numeric in I4227 / R4227C9: got 'NA'
## Warning: Expecting numeric in I4228 / R4228C9: got 'NA'
## Warning: Expecting numeric in I4229 / R4229C9: got 'NA'
## Warning: Expecting numeric in I4230 / R4230C9: got 'NA'
## Warning: Expecting numeric in I4231 / R4231C9: got 'NA'
## Warning: Expecting numeric in I4232 / R4232C9: got 'NA'
## Warning: Expecting numeric in I4233 / R4233C9: got 'NA'
## Warning: Expecting numeric in I4234 / R4234C9: got 'NA'
## Warning: Expecting numeric in I4235 / R4235C9: got 'NA'
## Warning: Expecting numeric in I4236 / R4236C9: got 'NA'
## Warning: Expecting numeric in I4237 / R4237C9: got 'NA'
## Warning: Expecting numeric in I4238 / R4238C9: got 'NA'
## Warning: Expecting numeric in I4239 / R4239C9: got 'NA'
## Warning: Expecting numeric in I4240 / R4240C9: got 'NA'
## Warning: Expecting numeric in I4241 / R4241C9: got 'NA'
## Warning: Expecting numeric in I4242 / R4242C9: got 'NA'
## Warning: Expecting numeric in I4243 / R4243C9: got 'NA'
## Warning: Expecting numeric in I4622 / R4622C9: got 'NA'
## Warning: Expecting numeric in I4623 / R4623C9: got 'NA'
## Warning: Expecting numeric in I4624 / R4624C9: got 'NA'
## Warning: Expecting numeric in I4625 / R4625C9: got 'NA'
## Warning: Expecting numeric in I4626 / R4626C9: got 'NA'
## Warning: Expecting numeric in I4627 / R4627C9: got 'NA'
## Warning: Expecting numeric in I4628 / R4628C9: got 'NA'
## Warning: Expecting numeric in I4629 / R4629C9: got 'NA'
## Warning: Expecting numeric in I4630 / R4630C9: got 'NA'
## Warning: Expecting numeric in I4631 / R4631C9: got 'NA'
## Warning: Expecting numeric in I4632 / R4632C9: got 'NA'
## Warning: Expecting numeric in I4633 / R4633C9: got 'NA'
## Warning: Expecting numeric in I4634 / R4634C9: got 'NA'
## Warning: Expecting numeric in I4635 / R4635C9: got 'NA'
## Warning: Expecting numeric in I4958 / R4958C9: got 'NA'
## Warning: Expecting numeric in I4959 / R4959C9: got 'NA'
## Warning: Expecting numeric in I4960 / R4960C9: got 'NA'
## Warning: Expecting numeric in I4961 / R4961C9: got 'NA'
## Warning: Expecting numeric in I4962 / R4962C9: got 'NA'
## Warning: Expecting numeric in I4963 / R4963C9: got 'NA'
## Warning: Expecting numeric in I4964 / R4964C9: got 'NA'
## Warning: Expecting numeric in I4965 / R4965C9: got 'NA'
## Warning: Expecting numeric in I4966 / R4966C9: got 'NA'
## Warning: Expecting numeric in I4967 / R4967C9: got 'NA'
## Warning: Expecting numeric in I4968 / R4968C9: got 'NA'
## Warning: Expecting numeric in I4969 / R4969C9: got 'NA'
## Warning: Expecting numeric in I4970 / R4970C9: got 'NA'
## Warning: Expecting numeric in I4971 / R4971C9: got 'NA'
## Warning: Expecting numeric in I4972 / R4972C9: got 'NA'
## Warning: Expecting numeric in I4973 / R4973C9: got 'NA'
## Warning: Expecting numeric in I4974 / R4974C9: got 'NA'
## Warning: Expecting numeric in I4975 / R4975C9: got 'NA'
## Warning: Expecting numeric in I4976 / R4976C9: got 'NA'
## Warning: Expecting numeric in I4977 / R4977C9: got 'NA'
## Warning: Expecting numeric in I4978 / R4978C9: got 'NA'
## Warning: Expecting numeric in I4979 / R4979C9: got 'NA'
## Warning: Expecting numeric in I4980 / R4980C9: got 'NA'
## Warning: Expecting numeric in I4981 / R4981C9: got 'NA'
## Warning: Expecting numeric in I4982 / R4982C9: got 'NA'
## Warning: Expecting numeric in I4983 / R4983C9: got 'NA'
## Warning: Expecting numeric in I4984 / R4984C9: got 'NA'
## Warning: Expecting numeric in I4985 / R4985C9: got 'NA'
## Warning: Expecting numeric in I4986 / R4986C9: got 'NA'
## Warning: Expecting numeric in I4987 / R4987C9: got 'NA'
## Warning: Expecting numeric in I4988 / R4988C9: got 'NA'
## Warning: Expecting numeric in I4989 / R4989C9: got 'NA'
## Warning: Expecting numeric in I4990 / R4990C9: got 'NA'
## Warning: Expecting numeric in I4991 / R4991C9: got 'NA'
## Warning: Expecting numeric in I4992 / R4992C9: got 'NA'
## Warning: Expecting numeric in I4993 / R4993C9: got 'NA'
## Warning: Expecting numeric in I4994 / R4994C9: got 'NA'
## Warning: Expecting numeric in I4995 / R4995C9: got 'NA'
## Warning: Expecting numeric in I4996 / R4996C9: got 'NA'
## Warning: Expecting numeric in I4997 / R4997C9: got 'NA'
## Warning: Expecting numeric in I4998 / R4998C9: got 'NA'
## Warning: Expecting numeric in I4999 / R4999C9: got 'NA'
## Warning: Expecting numeric in I5000 / R5000C9: got 'NA'
## Warning: Expecting numeric in I5001 / R5001C9: got 'NA'
## Warning: Expecting numeric in I5002 / R5002C9: got 'NA'
## Warning: Expecting numeric in I5003 / R5003C9: got 'NA'
## Warning: Expecting numeric in I5004 / R5004C9: got 'NA'
## Warning: Expecting numeric in I5005 / R5005C9: got 'NA'
## Warning: Expecting numeric in I5006 / R5006C9: got 'NA'
## Warning: Expecting numeric in I5007 / R5007C9: got 'NA'
## Warning: Expecting numeric in I5008 / R5008C9: got 'NA'
## Warning: Expecting numeric in I5009 / R5009C9: got 'NA'
## Warning: Expecting numeric in I5010 / R5010C9: got 'NA'
## Warning: Expecting numeric in I5011 / R5011C9: got 'NA'
## Warning: Expecting numeric in I5012 / R5012C9: got 'NA'
## Warning: Expecting numeric in I5013 / R5013C9: got 'NA'
warning = FALSE
# Preview the data
head(data)
## # A tibble: 6 × 9
## Indicator Year State County FIPS `Race/Ethnicity` Sex Cases Population
## <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <dbl>
## 1 HIV diagnos… 2021 OH Akron 1.04e9 Multiracial Fema… 1 5960
## 2 HIV diagnos… 2021 OH Akron 1.04e9 Multiracial Male 1 5487
## 3 HIV diagnos… 2021 OH Akron 1.04e9 White Fema… 3 246500
## 4 HIV diagnos… 2021 OH Akron 1.04e9 White Male 21 235597
## 5 HIV diagnos… 2021 OH Akron 1.04e9 Native Hawaiian… Fema… Data… 87
## 6 HIV diagnos… 2021 OH Akron 1.04e9 Native Hawaiian… Male Data… 92
# Step 1: Clean the data and ensure numeric types
data_cleaned <- data %>%
mutate(
Cases = as.numeric(Cases),
Population = as.numeric(gsub(",", "", Population)) # Remove commas and convert to numeric
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Cases = as.numeric(Cases)`.
## Caused by warning:
## ! NAs introduced by coercion
# Step 2: Add the Case_Rate column
data_cleaned <- data_cleaned %>%
mutate(Case_Rate = Cases / Population * 100000)
# Preview the cleaned data
head(data_cleaned)
## # A tibble: 6 × 10
## Indicator Year State County FIPS `Race/Ethnicity` Sex Cases Population
## <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <dbl> <dbl>
## 1 HIV diagnos… 2021 OH Akron 1.04e9 Multiracial Fema… 1 5960
## 2 HIV diagnos… 2021 OH Akron 1.04e9 Multiracial Male 1 5487
## 3 HIV diagnos… 2021 OH Akron 1.04e9 White Fema… 3 246500
## 4 HIV diagnos… 2021 OH Akron 1.04e9 White Male 21 235597
## 5 HIV diagnos… 2021 OH Akron 1.04e9 Native Hawaiian… Fema… NA 87
## 6 HIV diagnos… 2021 OH Akron 1.04e9 Native Hawaiian… Male NA 92
## # ℹ 1 more variable: Case_Rate <dbl>
selected_data <- data_cleaned %>%
select(Year, State, County, Case_Rate)
filtered_data_2021 <- selected_data %>%
filter(Year == 2021)
total_Cases_by_location <- data_cleaned %>%
group_by(State, County) %>%
summarise(Total_Cases = sum(Cases, na.rm = TRUE))
## `summarise()` has grouped output by 'State'. You can override using the
## `.groups` argument.
arranged_by_case_rate <- selected_data %>%
arrange(desc(Case_Rate))
print(arranged_by_case_rate)
## # A tibble: 6,048 × 4
## Year State County Case_Rate
## <chr> <chr> <chr> <dbl>
## 1 2020 (COVID-19 Pandemic) IA Des Moines-West Des Moines 820.
## 2 2019 NC Winston-Salem 794.
## 3 2021 WI Milwaukee-Waukesha 725.
## 4 2020 (COVID-19 Pandemic) IN Indianapolis-Carmel-Anderson 717.
## 5 2021 NC Winston-Salem 606.
## 6 2019 NE-IA Omaha-Council Bluffs 439.
## 7 2021 NC Raleigh-Cary 412.
## 8 2019 IN Indianapolis-Carmel-Anderson 395.
## 9 2020 (COVID-19 Pandemic) PA Montgomery County-Bucks County-Ches… 392.
## 10 2020 (COVID-19 Pandemic) FL Miami-Miami Beach-Kendall 361.
## # ℹ 6,038 more rows