STAT 515-002 Redesign Project

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(plotly)

## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

library(dplyr)
library(leaflet)

Plot #1 - Bar Chart of Fox News data, but with labeled y-axis starting at 0

dat <- data.frame(
  labels = factor(c("People on Welfare","People with a Full Time Job"), 
                  levels=c("People on Welfare","People with a Full Time Job")),
  people = c(108.6, 101.7)
)
dat

ggplot(data=dat, aes(x=labels, y=people, fill=labels)) + 
  geom_bar(color="black", fill="#FFD700", width=.5, stat="identity") + 
  guides(fill=FALSE) + 
  xlab("Source: Census Bureau, 2011") + ylab("Millions of People") +
  ggtitle("Welfare vs. Full Time Jobs") +
  geom_text(aes(label = people), vjust = 1.5, color = "black")

## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.

# Result: Even using Fox's original data and scaling the y-axis shows the small difference

Plot #2 - Revised Bar Chart

More accurate data (direct from US Census Bureau) and correct terminology

Employment Data: Civ Employment.xls

TANF (Welfare) Data: https://www.acf.hhs.gov/ofa/data/characteristics-and-financial-circumstances-tanf-recipients-fiscal-year-2009-t21

Note: Recession of 2008

dat2 <- data.frame(
  labels = factor(c("TANF Recipients","People with a Full Time Job"), 
                  levels=c("TANF Recipients","People with a Full Time Job")),
  people = c(97.36, 139.9) 
)
dat2

ggplot(data=dat2, aes(x=labels, y=people, fill=labels)) + 
  geom_bar(color="black", fill="#00BA38", width=.5, stat="identity") + 
  guides(fill=FALSE) + 
  xlab("Source: Census Bureau, 2011") + ylab("Millions of People") +
  ggtitle("TANF Recipients vs. Full Time Jobs") +
  geom_text(aes(label = people), vjust = 1.5, color = "white")

Plot #3 - Scatterplot with TANF, Unemployed, and Employed Data (Stacked)

census = read.csv("/Users/ryankelly/Desktop/Desktop - Ryan’s MacBook Pro/GMU/Spring 2023/STAT 515/Midterm Project/data/stats_by_state_2007.csv")

head(census)

str(census)

## 'data.frame':    51 obs. of  5 variables:
##  $ State     : chr  "Alabama " "Alaska " "Arizona " "Arkansas " ...
##  $ TANF      : int  42 8 78 20 1161 25 36 9 14 75 ...
##  $ Population: int  4638 682 6362 2842 36226 4842 3489 865 586 18278 ...
##  $ Unemployed: int  77 21 119 71 960 104 84 15 18 369 ...
##  $ Employed  : int  2151 362 3152 1373 18250 2727 1890 438 337 9093 ...

census$TANF = as.integer(census$TANF)
census$Population = as.integer(census$Population)

head(census)

str(census)

## 'data.frame':    51 obs. of  5 variables:
##  $ State     : chr  "Alabama " "Alaska " "Arizona " "Arkansas " ...
##  $ TANF      : int  42 8 78 20 1161 25 36 9 14 75 ...
##  $ Population: int  4638 682 6362 2842 36226 4842 3489 865 586 18278 ...
##  $ Unemployed: int  77 21 119 71 960 104 84 15 18 369 ...
##  $ Employed  : int  2151 362 3152 1373 18250 2727 1890 438 337 9093 ...

library(reshape2)  # Import package to change data to long format dataframe

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

census_long <- melt(census, id = "State", measure = c("TANF","Unemployed","Employed"))

# Reference: https://stackoverflow.com/questions/7570319/the-right-way-to-plot-multiple-y-values-as-separate-lines-with-ggplot2

#cbp1 <- c("#E69F00", "#56B4E9", "#009E73",
#                   "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
                   
cbp1 <- c("#CC79A7", "#56B4E9", "#009E73",
                   "#F0E442", "#0072B2", "#D55E00")

ggplot(census_long, aes(State, value, color = variable)) +
  geom_point() +
  scale_color_manual(values = cbp1) + 
  ylab("Number of People (x1000)") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank())

# Colorblind-Friend Palette: https://www.datanovia.com/en/blog/ggplot-colors-best-tricks-you-will-love/#use-a-colorblind-friendly-palette
# Reference: Vertical x-axis text https://stackoverflow.com/questions/1330989/rotating-and-spacing-axis-labels-in-ggplot2

Plot #4 - Bar Plot (Transformed data for TANF, Unemployed, Employed)

census_scaled = read.csv("/Users/ryankelly/Desktop/Desktop - Ryan’s MacBook Pro/GMU/Spring 2023/STAT 515/Midterm Project/data/stats_by_state_2007.csv")
#head(census_scaled)
#str(census_scaled)

census_scaled$TANF = as.integer(census_scaled$TANF)
census_scaled$Population = as.integer(census_scaled$Population)

census_scaled$TANF <- census_scaled$TANF/census_scaled$Population
census_scaled$Unemployed <- census_scaled$Unemployed/census_scaled$Population
census_scaled$Employed <- census_scaled$Employed/census_scaled$Population

census_scaled_long <- melt(census_scaled, id = "State", measure = c("TANF", "Unemployed","Employed"))

ggplot(census_scaled_long, aes(State, value, color = variable)) +
  geom_point() +
  scale_color_manual(values = cbp1) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank())

## Create grouped bar chart with TANF, Unemployed, Employed data for comparison

# Establish colorblind-friendly colors for bars
bars <- c("#FBB829","#882D17","#1F78B4")
# Reference: https://www.datanovia.com/en/blog/ggplot-colors-best-tricks-you-will-love/#use-a-colorblind-friendly-palette


ggplot(census_scaled_long, aes(x = State, y = value, fill = variable)) +
  geom_bar(stat = "identity", position = "dodge") +
  ylab("Percentage of Population") +
  scale_fill_manual(values = bars) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank())

# Reference: https://statisticsglobe.com/draw-grouped-barplot-in-r#example-2-drawing-grouped-barchart-using-ggplot2-package

Load and transform data for TANF, Unemployed, Employed Maps

census_scaled = read.csv("/Users/ryankelly/Desktop/Desktop - Ryan’s MacBook Pro/GMU/Spring 2023/STAT 515/Midterm Project/data/stats_by_state_2007.csv")
#head(census_scaled)
#str(census_scaled)

census_scaled$TANF = as.integer(census_scaled$TANF)
census_scaled$Population = as.integer(census_scaled$Population)

census_scaled$TANF <- census_scaled$TANF/census_scaled$Population
census_scaled$Unemployed <- census_scaled$Unemployed/census_scaled$Population
census_scaled$Employed <- census_scaled$Employed/census_scaled$Population

# Load state data for map
state_df <- map_data("state")

# Change state names in census_scaled data frame to lower case (to match state_af formatting)
census_scaled$State <- tolower(census_scaled$State) # changing state names to lower case from previous data
census_scaled$State <- gsub(" ", "", census_scaled$State) # remove space after state name

# Rename "State" column as "region" to match state_df data frame
names(census_scaled) <- c("region", "TANF", "Population", "Unemployed","Employed")

# Verify structures of data frames prior to joining
str(census_scaled)

## 'data.frame':    51 obs. of  5 variables:
##  $ region    : chr  "alabama" "alaska" "arizona" "arkansas" ...
##  $ TANF      : num  0.00906 0.01173 0.01226 0.00704 0.03205 ...
##  $ Population: int  4638 682 6362 2842 36226 4842 3489 865 586 18278 ...
##  $ Unemployed: num  0.0166 0.0308 0.0187 0.025 0.0265 ...
##  $ Employed  : num  0.464 0.531 0.495 0.483 0.504 ...

str(state_df)

## 'data.frame':    15537 obs. of  6 variables:
##  $ long     : num  -87.5 -87.5 -87.5 -87.5 -87.6 ...
##  $ lat      : num  30.4 30.4 30.4 30.3 30.3 ...
##  $ group    : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ order    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ region   : chr  "alabama" "alabama" "alabama" "alabama" ...
##  $ subregion: chr  NA NA NA NA ...

US <- inner_join(state_df, census_scaled, by = "region")

Plot #5 - Create map of TANF Percentages

blue <- colorRampPalette(c("#F7FBFF", "#DEEBF7", "#C6DBEF", "#9ECAE1", "#6BAED6", "#4292C6", "#2171B5", "#08519C", "#08306B"))(200)

p <- ggplot(US, aes(long, lat, group = group)) +  
  geom_polygon(aes(fill = TANF),
               colour = alpha("white", 1/2), size = 0.05)  +
  geom_polygon(data = state_df, colour = "white", fill = NA) + # adding state borders
  ggtitle("TANF By State") +
  scale_fill_gradientn(colours=c("white", blue))+
  theme_void()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

ggplotly(p)

Plot #6 - Create map of Unemployed Percentages

red <- colorRampPalette(c("#FFF5F0", "#FEE0D2", "#FCBBA1", "#FC9272", "#FB6A4A", "#EF3B2C", "#CB181D", "#A50F15", "#67000D"))(200)

q <- ggplot(US, aes(long, lat, group = group)) +  
  geom_polygon(aes(fill = Unemployed),
               colour = alpha("white", 1/2), size = 0.05)  +
  geom_polygon(data = state_df, colour = "white", fill = NA) + # adding state borders
  ggtitle("Unemployment By State") +
  scale_fill_gradientn(colours=c("white", red)) +
  theme_void()

ggplotly(q)

Plot #7 - Create map of Employed Percentages

green <- colorRampPalette(c("#F7FCF5", "#E5F5E0", "#C7E9C0", "#A1D99B", "#74C476", "#41AB5D", "#238B45", "#006D2C", "#00441B"))(200)

z <- ggplot(US, aes(long, lat, group = group)) +  
  geom_polygon(aes(fill = Employed),
               colour = alpha("white", 1/2), size = 0.05)  +
  geom_polygon(data = state_df, colour = "white", fill = NA) + # adding state borders
  ggtitle("Employment By State") +
  scale_fill_gradientn(colours=c("white", green))  +
  theme_void()

ggplotly(z)

Create Shiny plot of all 3 maps with a selection dropdown

library(shiny)

# Create the three individual Plotly maps
map1 <- ggplotly(p)  # map of TANF data
map2 <- ggplotly(q)  # map of Unemployed
map3 <- ggplotly(z)  # map of Employed

# Combine the three maps into a grid of subplots
combined_map <- subplot(map1, map2, map3, nrows = 1, shareX = TRUE, shareY = TRUE)

# Convert the combined plot to an interactive Plotly plot
combined_plotly <- ggplotly(combined_map)

# Define the Shiny app
ui <- fluidPage(
  selectInput("map_selector", label = "Select a map", choices = c("TANF", "Unemployed", "Employed")),
  plotlyOutput("map")
)

# References:
  # R Shiny https://shiny.rstudio.com/reference/shiny/1.7.0/observeevent
  # https://stackoverflow.com/questions/69414472/select-plot-from-a-dropdown-menu-in-rshiny

server <- function(input, output) {
  output$map <- renderPlotly({
    if(input$map_selector == "TANF") {
      map1
    } else if(input$map_selector == "Unemployed") {
      map2
    } else {
      map3
    }
  })
}

# Run the Shiny app
shinyApp(ui, server)

Shiny applications not supported in static R Markdown documents

STAT 515-002 Redesign Project

Ryan V. Kelly

2023-03-09

Plot #1 - Bar Chart of Fox News data, but with labeled y-axis starting at 0

Plot #2 - Revised Bar Chart

More accurate data (direct from US Census Bureau) and correct terminology

Employment Data: Civ Employment.xls

TANF (Welfare) Data: https://www.acf.hhs.gov/ofa/data/characteristics-and-financial-circumstances-tanf-recipients-fiscal-year-2009-t21

Note: Recession of 2008

Plot #3 - Scatterplot with TANF, Unemployed, and Employed Data (Stacked)

Plot #4 - Bar Plot (Transformed data for TANF, Unemployed, Employed)

Load and transform data for TANF, Unemployed, Employed Maps

Plot #5 - Create map of TANF Percentages

Plot #6 - Create map of Unemployed Percentages

Plot #7 - Create map of Employed Percentages