options(repos = c(CRAN = "https://cloud.r-project.org"))
#load the data
#im using the code below to make it easier for R to look for my files to work with
setwd("C:/Users/Roxana/OneDrive - Latino Commission on AIDS/Documents/UTSA Adolph Delgado/Week 5")
getwd()
## [1] "C:/Users/Roxana/OneDrive - Latino Commission on AIDS/Documents/UTSA Adolph Delgado/Week 5"
#load the packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
#import and load the data from the "State total sheet"
Ahead_data <- read_excel("AHEAD_data 1.xlsx", sheet = "State totals")
#View the data
View(Ahead_data)
#filter the data to include only rows where:
#1. the statistic is 'percentage' for any indicator
#2. the statisitic is 'count' and the indicator is either 'diagnoses' or 'incidence'
Filtered_data <- Ahead_data %>%
filter ((statistic == "percentage") |
(statistic == "count" & indicator %in% c("Diagnoses", "Incidence")))
#view the filtered data
View(Filtered_data)
#install packages for plots
install.packages("ggplot2")
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Roxana\AppData\Local\Temp\RtmpaUd0TT\downloaded_packages
install.packages("sf")
## package 'sf' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'sf'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\Roxana\AppData\Local\Programs\R\R-4.4.2\library\00LOCK\sf\libs\x64\sf.dll
## to C:\Users\Roxana\AppData\Local\Programs\R\R-4.4.2\library\sf\libs\x64\sf.dll:
## Permission denied
## Warning: restored 'sf'
##
## The downloaded binary packages are in
## C:\Users\Roxana\AppData\Local\Temp\RtmpaUd0TT\downloaded_packages
install.packages("tigris")
## package 'tigris' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Roxana\AppData\Local\Temp\RtmpaUd0TT\downloaded_packages
library(readxl)
library(ggplot2)
library(sf)
## Linking to GEOS 3.12.2, GDAL 3.9.3, PROJ 9.4.1; sf_use_s2() is TRUE
library(dplyr)
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
options(tigris_use_cache = TRUE, tigris_class = "sf")
#download states shapefile
States <- states(cb = TRUE)
## Retrieving data for the year 2021
#adjust Alaska and Hawaii
#Alaska
Alaska <- States[States$STUSPS == "AK", ]
Alaska <- st_transform(Alaska, crs = st_crs(4326))
Alaska$geometry <- lapply(Alaska$geometry, function(g) {
g <- g * 0.35 #scale
g <- g + c(-35, -5) # translate
g
})
#Hawaii
Hawaii <- States[States$STUSPS == "HI", ]
Hawaii <- st_transform(Hawaii, crs = st_crs(4326))
Hawaii$geometry <- lapply(Hawaii$geometry, function(g) {
g <- g + c(54, -5) # translate
g
})
#replace the old geometries with the new ones
States[States$STUSPS == "AK", ]$geometry <- Alaska$geometry
States[States$STUSPS == "HI", ]$geometry <- Hawaii$geometry
#merge data
Map_data <- left_join(States, Ahead_data, by = c("NAME" = "state"))
#filter for a specific metric and statistic
plot_data <- Map_data %>%
filter(metric == "Estimated incidence", statistic == "count")
#plot
P <- ggplot(plot_data) +
geom_sf(aes(fill = value), color = "white") +
scale_fill_viridis_c(option = "C") +
theme_minimal() +
labs(title = "Map for Metric: Estimated Incidence", fill = "Value")
print(P)
