Data Cleaning

hiv<-read_csv("HIV2015.csv")
Rows: 52 Columns: 3
-- Column specification --------------------------------------------------------
Delimiter: ","
chr (2): State, Midwest
dbl (1): Rate2015

i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.

Here is some data cleaning to make coloring the bars a bit easier

hiv %>% 
  mutate(state_abr = ifelse(State == "US", "US", state2abbr(State))) %>% 
  mutate(state_fin = ifelse(state_abr == "WI", "WI", 
                            ifelse(state_abr == "US", "US", Midwest))) -> hiv_update
ggplot(hiv_update) +
  geom_col(aes(x = fct_reorder(state_abr, -Rate2015), y=Rate2015, fill = state_fin)) +
  labs(title = "HIV diagnosis rates across the U.S",
       subtitle = "Estimated HIV diagnosis rate by state, 2015",
       x = "State of Residence at Diagnosis",
       y = "Rate per 100,000 Population") +
  guides(fill = FALSE) +
  scale_fill_manual(values = c("#28458a","#7ac4ab","#c775c6","#fcf995")) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90),
        plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5)) +
  scale_y_continuous(expand = c(0,0))
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
"none")` instead.

Notice that our order of Wisconsin and WV are not the same as the original graph - if we look at it we see they have the same value. Why?

hiv_update %>% 
  filter(state_abr %in% c("WI","WV"))
# A tibble: 2 x 5
  State         Rate2015 Midwest state_abr state_fin
  <chr>            <dbl> <chr>   <chr>     <chr>    
1 West Virginia        4 N       WV        N        
2 Wisconsin            4 Y       WI        WI       
hiv_update %>% 
  mutate(state_abr == fct_relevel(state_abr, "WI")) %>% 
  ggplot(aes(x = fct_relevel(fct_reorder(state_abr, -Rate2015), "WI", after = 41 ), y=Rate2015, fill = state_fin)) +
  geom_col() +
  labs(title = "HIV diagnosis rates across the U.S",
       subtitle = "Estimated HIV diagnosis rate by state, 2015",
       x = "State of Residence at Diagnosis",
       y = "Rate per 100,000 Population") +
  guides(fill = FALSE) +
  scale_fill_manual(values = c("#28458a","#7ac4ab","#c775c6","#fcf995")) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
        plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5)) +
  scale_y_continuous(expand = c(0,0), limits = c(0,60)) +
  geom_text_repel(data = hiv_update %>% filter(state_fin %in% c("Y","US","WI") | state_abr == "DC"), aes(label = Rate2015),
                  point.padding = NA, nudge_y = 2.5)
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
"none")` instead.