library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Set the file path to your Excel file
file_path <- "C:/Users/bex/Desktop/R Workshop/Homeworks/Week 3/Data Manipulation with Dplyr (Week 3).xlsx"

# Read the Excel sheet into R
data <- read_excel(file_path)
## Warning: Expecting numeric in I2284 / R2284C9: got 'NA'
## Warning: Expecting numeric in I2285 / R2285C9: got 'NA'
## Warning: Expecting numeric in I2286 / R2286C9: got 'NA'
## Warning: Expecting numeric in I2287 / R2287C9: got 'NA'
## Warning: Expecting numeric in I2288 / R2288C9: got 'NA'
## Warning: Expecting numeric in I2289 / R2289C9: got 'NA'
## Warning: Expecting numeric in I2290 / R2290C9: got 'NA'
## Warning: Expecting numeric in I2291 / R2291C9: got 'NA'
## Warning: Expecting numeric in I2292 / R2292C9: got 'NA'
## Warning: Expecting numeric in I2293 / R2293C9: got 'NA'
## Warning: Expecting numeric in I2294 / R2294C9: got 'NA'
## Warning: Expecting numeric in I2295 / R2295C9: got 'NA'
## Warning: Expecting numeric in I2296 / R2296C9: got 'NA'
## Warning: Expecting numeric in I2297 / R2297C9: got 'NA'
## Warning: Expecting numeric in I2298 / R2298C9: got 'NA'
## Warning: Expecting numeric in I2299 / R2299C9: got 'NA'
## Warning: Expecting numeric in I2300 / R2300C9: got 'NA'
## Warning: Expecting numeric in I2301 / R2301C9: got 'NA'
## Warning: Expecting numeric in I2302 / R2302C9: got 'NA'
## Warning: Expecting numeric in I2303 / R2303C9: got 'NA'
## Warning: Expecting numeric in I2304 / R2304C9: got 'NA'
## Warning: Expecting numeric in I2305 / R2305C9: got 'NA'
## Warning: Expecting numeric in I2306 / R2306C9: got 'NA'
## Warning: Expecting numeric in I2307 / R2307C9: got 'NA'
## Warning: Expecting numeric in I2308 / R2308C9: got 'NA'
## Warning: Expecting numeric in I2309 / R2309C9: got 'NA'
## Warning: Expecting numeric in I2310 / R2310C9: got 'NA'
## Warning: Expecting numeric in I2311 / R2311C9: got 'NA'
## Warning: Expecting numeric in I4216 / R4216C9: got 'NA'
## Warning: Expecting numeric in I4217 / R4217C9: got 'NA'
## Warning: Expecting numeric in I4218 / R4218C9: got 'NA'
## Warning: Expecting numeric in I4219 / R4219C9: got 'NA'
## Warning: Expecting numeric in I4220 / R4220C9: got 'NA'
## Warning: Expecting numeric in I4221 / R4221C9: got 'NA'
## Warning: Expecting numeric in I4222 / R4222C9: got 'NA'
## Warning: Expecting numeric in I4223 / R4223C9: got 'NA'
## Warning: Expecting numeric in I4224 / R4224C9: got 'NA'
## Warning: Expecting numeric in I4225 / R4225C9: got 'NA'
## Warning: Expecting numeric in I4226 / R4226C9: got 'NA'
## Warning: Expecting numeric in I4227 / R4227C9: got 'NA'
## Warning: Expecting numeric in I4228 / R4228C9: got 'NA'
## Warning: Expecting numeric in I4229 / R4229C9: got 'NA'
## Warning: Expecting numeric in I4230 / R4230C9: got 'NA'
## Warning: Expecting numeric in I4231 / R4231C9: got 'NA'
## Warning: Expecting numeric in I4232 / R4232C9: got 'NA'
## Warning: Expecting numeric in I4233 / R4233C9: got 'NA'
## Warning: Expecting numeric in I4234 / R4234C9: got 'NA'
## Warning: Expecting numeric in I4235 / R4235C9: got 'NA'
## Warning: Expecting numeric in I4236 / R4236C9: got 'NA'
## Warning: Expecting numeric in I4237 / R4237C9: got 'NA'
## Warning: Expecting numeric in I4238 / R4238C9: got 'NA'
## Warning: Expecting numeric in I4239 / R4239C9: got 'NA'
## Warning: Expecting numeric in I4240 / R4240C9: got 'NA'
## Warning: Expecting numeric in I4241 / R4241C9: got 'NA'
## Warning: Expecting numeric in I4242 / R4242C9: got 'NA'
## Warning: Expecting numeric in I4243 / R4243C9: got 'NA'
## Warning: Expecting numeric in I4622 / R4622C9: got 'NA'
## Warning: Expecting numeric in I4623 / R4623C9: got 'NA'
## Warning: Expecting numeric in I4624 / R4624C9: got 'NA'
## Warning: Expecting numeric in I4625 / R4625C9: got 'NA'
## Warning: Expecting numeric in I4626 / R4626C9: got 'NA'
## Warning: Expecting numeric in I4627 / R4627C9: got 'NA'
## Warning: Expecting numeric in I4628 / R4628C9: got 'NA'
## Warning: Expecting numeric in I4629 / R4629C9: got 'NA'
## Warning: Expecting numeric in I4630 / R4630C9: got 'NA'
## Warning: Expecting numeric in I4631 / R4631C9: got 'NA'
## Warning: Expecting numeric in I4632 / R4632C9: got 'NA'
## Warning: Expecting numeric in I4633 / R4633C9: got 'NA'
## Warning: Expecting numeric in I4634 / R4634C9: got 'NA'
## Warning: Expecting numeric in I4635 / R4635C9: got 'NA'
## Warning: Expecting numeric in I4958 / R4958C9: got 'NA'
## Warning: Expecting numeric in I4959 / R4959C9: got 'NA'
## Warning: Expecting numeric in I4960 / R4960C9: got 'NA'
## Warning: Expecting numeric in I4961 / R4961C9: got 'NA'
## Warning: Expecting numeric in I4962 / R4962C9: got 'NA'
## Warning: Expecting numeric in I4963 / R4963C9: got 'NA'
## Warning: Expecting numeric in I4964 / R4964C9: got 'NA'
## Warning: Expecting numeric in I4965 / R4965C9: got 'NA'
## Warning: Expecting numeric in I4966 / R4966C9: got 'NA'
## Warning: Expecting numeric in I4967 / R4967C9: got 'NA'
## Warning: Expecting numeric in I4968 / R4968C9: got 'NA'
## Warning: Expecting numeric in I4969 / R4969C9: got 'NA'
## Warning: Expecting numeric in I4970 / R4970C9: got 'NA'
## Warning: Expecting numeric in I4971 / R4971C9: got 'NA'
## Warning: Expecting numeric in I4972 / R4972C9: got 'NA'
## Warning: Expecting numeric in I4973 / R4973C9: got 'NA'
## Warning: Expecting numeric in I4974 / R4974C9: got 'NA'
## Warning: Expecting numeric in I4975 / R4975C9: got 'NA'
## Warning: Expecting numeric in I4976 / R4976C9: got 'NA'
## Warning: Expecting numeric in I4977 / R4977C9: got 'NA'
## Warning: Expecting numeric in I4978 / R4978C9: got 'NA'
## Warning: Expecting numeric in I4979 / R4979C9: got 'NA'
## Warning: Expecting numeric in I4980 / R4980C9: got 'NA'
## Warning: Expecting numeric in I4981 / R4981C9: got 'NA'
## Warning: Expecting numeric in I4982 / R4982C9: got 'NA'
## Warning: Expecting numeric in I4983 / R4983C9: got 'NA'
## Warning: Expecting numeric in I4984 / R4984C9: got 'NA'
## Warning: Expecting numeric in I4985 / R4985C9: got 'NA'
## Warning: Expecting numeric in I4986 / R4986C9: got 'NA'
## Warning: Expecting numeric in I4987 / R4987C9: got 'NA'
## Warning: Expecting numeric in I4988 / R4988C9: got 'NA'
## Warning: Expecting numeric in I4989 / R4989C9: got 'NA'
## Warning: Expecting numeric in I4990 / R4990C9: got 'NA'
## Warning: Expecting numeric in I4991 / R4991C9: got 'NA'
## Warning: Expecting numeric in I4992 / R4992C9: got 'NA'
## Warning: Expecting numeric in I4993 / R4993C9: got 'NA'
## Warning: Expecting numeric in I4994 / R4994C9: got 'NA'
## Warning: Expecting numeric in I4995 / R4995C9: got 'NA'
## Warning: Expecting numeric in I4996 / R4996C9: got 'NA'
## Warning: Expecting numeric in I4997 / R4997C9: got 'NA'
## Warning: Expecting numeric in I4998 / R4998C9: got 'NA'
## Warning: Expecting numeric in I4999 / R4999C9: got 'NA'
## Warning: Expecting numeric in I5000 / R5000C9: got 'NA'
## Warning: Expecting numeric in I5001 / R5001C9: got 'NA'
## Warning: Expecting numeric in I5002 / R5002C9: got 'NA'
## Warning: Expecting numeric in I5003 / R5003C9: got 'NA'
## Warning: Expecting numeric in I5004 / R5004C9: got 'NA'
## Warning: Expecting numeric in I5005 / R5005C9: got 'NA'
## Warning: Expecting numeric in I5006 / R5006C9: got 'NA'
## Warning: Expecting numeric in I5007 / R5007C9: got 'NA'
## Warning: Expecting numeric in I5008 / R5008C9: got 'NA'
## Warning: Expecting numeric in I5009 / R5009C9: got 'NA'
## Warning: Expecting numeric in I5010 / R5010C9: got 'NA'
## Warning: Expecting numeric in I5011 / R5011C9: got 'NA'
## Warning: Expecting numeric in I5012 / R5012C9: got 'NA'
## Warning: Expecting numeric in I5013 / R5013C9: got 'NA'
warning = FALSE

# Preview the data
head(data)
## # A tibble: 6 × 9
##   Indicator    Year  State County   FIPS `Race/Ethnicity` Sex   Cases Population
##   <chr>        <chr> <chr> <chr>   <dbl> <chr>            <chr> <chr>      <dbl>
## 1 HIV diagnos… 2021  OH    Akron  1.04e9 Multiracial      Fema… 1           5960
## 2 HIV diagnos… 2021  OH    Akron  1.04e9 Multiracial      Male  1           5487
## 3 HIV diagnos… 2021  OH    Akron  1.04e9 White            Fema… 3         246500
## 4 HIV diagnos… 2021  OH    Akron  1.04e9 White            Male  21        235597
## 5 HIV diagnos… 2021  OH    Akron  1.04e9 Native Hawaiian… Fema… Data…         87
## 6 HIV diagnos… 2021  OH    Akron  1.04e9 Native Hawaiian… Male  Data…         92
# Step 1: Clean the data and ensure numeric types
data_cleaned <- data %>%
  mutate(
    Cases = as.numeric(Cases), 
    Population = as.numeric(gsub(",", "", Population)) # Remove commas and convert to numeric
  )
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Cases = as.numeric(Cases)`.
## Caused by warning:
## ! NAs introduced by coercion
# Step 2: Add the Case_Rate column
data_cleaned <- data_cleaned %>%
  mutate(Case_Rate = Cases / Population * 100000)

# Preview the cleaned data
head(data_cleaned)
## # A tibble: 6 × 10
##   Indicator    Year  State County   FIPS `Race/Ethnicity` Sex   Cases Population
##   <chr>        <chr> <chr> <chr>   <dbl> <chr>            <chr> <dbl>      <dbl>
## 1 HIV diagnos… 2021  OH    Akron  1.04e9 Multiracial      Fema…     1       5960
## 2 HIV diagnos… 2021  OH    Akron  1.04e9 Multiracial      Male      1       5487
## 3 HIV diagnos… 2021  OH    Akron  1.04e9 White            Fema…     3     246500
## 4 HIV diagnos… 2021  OH    Akron  1.04e9 White            Male     21     235597
## 5 HIV diagnos… 2021  OH    Akron  1.04e9 Native Hawaiian… Fema…    NA         87
## 6 HIV diagnos… 2021  OH    Akron  1.04e9 Native Hawaiian… Male     NA         92
## # ℹ 1 more variable: Case_Rate <dbl>
selected_data <- data_cleaned %>%
  select(Year, State, County, Case_Rate)
filtered_data_2021 <- selected_data %>%
  filter(Year == 2021)
total_Cases_by_location <- data_cleaned %>%
  group_by(State, County) %>%
  summarise(Total_Cases = sum(Cases, na.rm = TRUE))
## `summarise()` has grouped output by 'State'. You can override using the
## `.groups` argument.
arranged_by_case_rate <- selected_data %>%
  arrange(desc(Case_Rate))
print(arranged_by_case_rate)
## # A tibble: 6,048 × 4
##    Year                     State County                               Case_Rate
##    <chr>                    <chr> <chr>                                    <dbl>
##  1 2020 (COVID-19 Pandemic) IA    Des Moines-West Des Moines                820.
##  2 2019                     NC    Winston-Salem                             794.
##  3 2021                     WI    Milwaukee-Waukesha                        725.
##  4 2020 (COVID-19 Pandemic) IN    Indianapolis-Carmel-Anderson              717.
##  5 2021                     NC    Winston-Salem                             606.
##  6 2019                     NE-IA Omaha-Council Bluffs                      439.
##  7 2021                     NC    Raleigh-Cary                              412.
##  8 2019                     IN    Indianapolis-Carmel-Anderson              395.
##  9 2020 (COVID-19 Pandemic) PA    Montgomery County-Bucks County-Ches…      392.
## 10 2020 (COVID-19 Pandemic) FL    Miami-Miami Beach-Kendall                 361.
## # ℹ 6,038 more rows