options(repos = c(CRAN = "https://cloud.r-project.org"))


#load the data
#im using the code below to make it easier for R to look for my files to work with

setwd("C:/Users/Roxana/OneDrive - Latino Commission on AIDS/Documents/UTSA Adolph Delgado/Week 5")
getwd()
## [1] "C:/Users/Roxana/OneDrive - Latino Commission on AIDS/Documents/UTSA Adolph Delgado/Week 5"
#load the packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)


#import and load the data from the "State total sheet"

Ahead_data <- read_excel("AHEAD_data 1.xlsx", sheet = "State totals")

#View the data
View(Ahead_data)


#filter the data to include only rows where: 
#1. the statistic is 'percentage' for any indicator
#2. the statisitic is 'count' and  the indicator is either 'diagnoses' or 'incidence'

Filtered_data <- Ahead_data %>%
  filter ((statistic == "percentage") |
            (statistic == "count" & indicator %in% c("Diagnoses", "Incidence")))


#view the filtered data 
View(Filtered_data)
#install packages for plots

install.packages("ggplot2")
## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Roxana\AppData\Local\Temp\RtmpaUd0TT\downloaded_packages
install.packages("sf")
## package 'sf' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'sf'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\Roxana\AppData\Local\Programs\R\R-4.4.2\library\00LOCK\sf\libs\x64\sf.dll
## to C:\Users\Roxana\AppData\Local\Programs\R\R-4.4.2\library\sf\libs\x64\sf.dll:
## Permission denied
## Warning: restored 'sf'
## 
## The downloaded binary packages are in
##  C:\Users\Roxana\AppData\Local\Temp\RtmpaUd0TT\downloaded_packages
install.packages("tigris")
## package 'tigris' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Roxana\AppData\Local\Temp\RtmpaUd0TT\downloaded_packages
library(readxl)
library(ggplot2)
library(sf)
## Linking to GEOS 3.12.2, GDAL 3.9.3, PROJ 9.4.1; sf_use_s2() is TRUE
library(dplyr)
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
options(tigris_use_cache = TRUE, tigris_class = "sf")

#download states shapefile
States <- states(cb = TRUE)
## Retrieving data for the year 2021
#adjust Alaska and Hawaii
#Alaska

Alaska <- States[States$STUSPS == "AK", ]
Alaska <- st_transform(Alaska, crs = st_crs(4326))
Alaska$geometry <- lapply(Alaska$geometry, function(g) {
  g <- g * 0.35   #scale
  g <- g + c(-35, -5) # translate
  g
})

#Hawaii

Hawaii <- States[States$STUSPS == "HI", ]
Hawaii <- st_transform(Hawaii, crs = st_crs(4326))
Hawaii$geometry <- lapply(Hawaii$geometry, function(g) {
  g <- g + c(54, -5) # translate
  g
})



#replace the old geometries with the new ones
States[States$STUSPS == "AK", ]$geometry <- Alaska$geometry
States[States$STUSPS == "HI", ]$geometry <- Hawaii$geometry


#merge data
Map_data <- left_join(States, Ahead_data, by = c("NAME" = "state"))


#filter for a specific metric and statistic
plot_data <- Map_data %>%
  filter(metric == "Estimated incidence", statistic == "count")

#plot

P <- ggplot(plot_data) +
  geom_sf(aes(fill = value), color = "white") +
  scale_fill_viridis_c(option = "C") +
  theme_minimal() +
  labs(title = "Map for Metric: Estimated Incidence", fill = "Value")

print(P)