County Spending

#install.packages("rlang", version = "1.1.0")
library(tidyverse)
Warning: package 'tidyverse' was built under R version 4.2.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.0     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.1     ✔ tibble    3.1.8
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
library(treemap)
library(viridis)
Loading required package: viridisLite
library(highcharter)
Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 
Highcharts (www.highcharts.com) is a Highsoft software product which is
not free for commercial and Governmental use
library(gganimate)
Warning: package 'gganimate' was built under R version 4.2.3
library(magic)
Loading required package: abind
library(patchwork)
Warning: package 'patchwork' was built under R version 4.2.3
library(RColorBrewer)
library(networkD3)
Warning: package 'networkD3' was built under R version 4.2.3

Attaching package: 'networkD3'

The following object is masked from 'package:highcharter':

    JS
library(highcharter) 

Set working directory and load data

setwd("C:/Users/jakea/OneDrive/Desktop/MC 2022/DATA-110")
CS <- read.csv("County_Spending.csv")
head(CS)
  Fiscal.Year.Period Fiscal.Year                           Service
1                  9        2016 Housing and Community Development
2                  6        2021                General Government
3                  7        2017                General Government
4                  8        2017 Housing and Community Development
5                 10        2017         Health and Human Services
6                  2        2017                General Government
                                    Department
1                Housing and Community Affairs
2                                      Finance
3 Technology and Enterprise Business Solutions
4                Housing and Community Affairs
5                    Health and Human Services
6                    Non-Departmental Accounts
                            Program                          Fund
1     Multi-Family Housing Programs Montgomery Housing Initiative
2 Economic Development Fund Program                    Grant Fund
3         Chief Information Officer                  General Fund
4     Multi-Family Housing Programs Montgomery Housing Initiative
5                   Trauma Services                  General Fund
6                         Utilities                  General Fund
            Category                   Expense.Category Account.Code
1 Operating Expenses                  Public Assistance        65116
2 Operating Expenses                              Loans        65214
3 Operating Expenses Other Supplies/Materials/Equipment        62896
4 Operating Expenses                  Public Assistance        65130
5 Operating Expenses                             Travel        64010
6 Operating Expenses                          Utilities        65014
                                      Invoice.Description
1                                       Public Assistance
2 Grants-Convertible To Loans (Economic Development Fund)
3                                          Parking Meters
4               Senior Rental Assistance Program Benefits
5                                Metropolitan Area Travel
6                                             Electricity
                               Vendor Vendor.Number        Zip Contract.Number
1                          1126022265    2301153206      75240      2301153206
2    HEALTH RESEARCH AND ANALYSIS LLC         65969      20814                
3  P-CARD PURCHASES (JP MORGAN CHASE)         50766 60197-4471                
4 MCG HHS - RENTAL ASSISTANCE PROGRAM         38715      20850                
5                       Cabrera, Mona         38562                           
6                               PEPCO         34863 20068-0001                
  PO.Number PO.Line         Invoice.Number Invoice.Line
1 737461995      NA             1993278089           10
2                NA             REOPEN4474            1
3                NA          2298496846001            1
4                NA             HHS|233002           10
5                NA                1321180            4
6                NA 5502240899521000136638            1
  Invoice.Distribution.Line   Amount Invoice.Date Payment.Method Payment.Date
1                         1  1546.00   03/24/2016          CHECK   03/25/2016
2                         1  5000.00   12/23/2020            EFT   03/09/2021
3                         1     5.32   11/30/2016            EFT   01/11/2017
4                         1   200.00   02/17/2017          CHECK   02/21/2017
5                         1     1.39   04/06/2017          CHECK   04/24/2017
6                         1 10798.79   08/04/2016            EFT   08/09/2016
  Payment.Number Payment.Status
1     3232348497     RECONCILED
2         631826     RECONCILED
3         461215     RECONCILED
4        2276923     RECONCILED
5        2287101     RECONCILED
6         447544     RECONCILED

Remove columns that would be used for my project

CS <- CS %>% 
  select(-Contract.Number, -PO.Number, -PO.Line, -Invoice.Line, -Invoice.Distribution.Line, -Invoice.Date, -Payment.Date)

Filter for any NA values

CS <- CS %>%
  filter(!is.na(Fiscal.Year) & !is.na(Service) & !is.na(Department) & !is.na(Program) &  !is.na(Fund) & !is.na(Category) & !is.na(Expense.Category) & !is.na(Account.Code) & !is.na(Invoice.Description) & !is.na(Vendor) & !is.na(Vendor.Number) & !is.na(Zip) & !is.na(Invoice.Number) & !is.na(Amount) & !is.na(Payment.Method) & !is.na(Payment.Status) & !is.na(Payment.Number))

Show the total spending for counties departments and services

CSTotal <- CS %>% # total spending for county each year
  group_by(Fiscal.Year) %>%
  summarize(Total_Amount = sum(Amount))
CS_department_total <- CS %>% # total spending per department
  group_by(Department) %>%
  summarize(Total_Amount = sum(Amount))
CS_service_total <- CS %>% # total spending per service
  group_by(Service) %>%
  summarize(Total_Amount = sum(Amount))
CS_department_year <- CS %>% # total spending per department each year
  group_by(Department, Fiscal.Year) %>%
  summarize(Total_Amount = sum(Amount))
`summarise()` has grouped output by 'Department'. You can override using the
`.groups` argument.
CS_service_year <- CS %>%  # total spending per service each year
  group_by(Service, Fiscal.Year) %>%
  summarize(Total_Amount = sum(Amount)) %>% 
  filter(Service != "General Government") %>% # General Government because it is to much of an out linear
  filter(Service != "Debt Service") %>% # Debt Services because it is to much of an out linear
  filter(Service != "Not Defined") # remove Not Defined because no information 
`summarise()` has grouped output by 'Service'. You can override using the
`.groups` argument.
CS_education <- CS %>% 
  filter(Service %in% c("Education")) 
CS_education_total <- CS_education %>%
  group_by(Fiscal.Year) %>%
  summarize(Total_Amount = sum(Amount))
CS_important <- CS %>%
  group_by(Department, Service, Program, Fiscal.Year) %>%
  summarize(Total_Amount = sum(Amount))
`summarise()` has grouped output by 'Department', 'Service', 'Program'. You can
override using the `.groups` argument.
CS_sankey <- CS %>% # data set need for sankey graph
  group_by(Department, Service, Program, Fiscal.Year) %>%
  summarize(Total_Amount = sum(Amount)) %>%
  filter(Service %in% c("General Government")) # only look at general government becuase we took out as out linear, debt services fund department debt services so not interest in looking deeper
`summarise()` has grouped output by 'Department', 'Service', 'Program'. You can
override using the `.groups` argument.
CS_GeneralG <- CS %>% 
  filter(Service %in% c("General Government")) 
CS_GeneralG_total_department <- CS_GeneralG %>%
  group_by(Department) %>%
  summarize(Total_Amount = sum(Amount))
CS_GeneralG_total_department_time <- CS_GeneralG %>%
  group_by(Fiscal.Year, Department) %>%
  summarize(Total_Amount = sum(Amount))
`summarise()` has grouped output by 'Fiscal.Year'. You can override using the
`.groups` argument.
library(ggplot2)

ggplot(CS_sankey, aes(x = Fiscal.Year, y = Total_Amount)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(title = "Correlation between Fiscal Year and Total Amount")
`geom_smooth()` using formula = 'y ~ x'

Removes scenitific notion

options(scipen = 999) 
hchart(CSTotal, "column", hcaes(x = Fiscal.Year, y = Total_Amount, color = Total_Amount)) %>% 
  hc_title(text = "Montgomery County Total Spending from 2014-2023") %>% 
  hc_xAxis(title = list(text = "Fiscal Year")) %>% 
  hc_yAxis(title = list(text = "$ Amount")) %>%
  hc_subtitle(text = "Source: dataMontgomery") %>%
  hc_add_theme(hc_theme_economist()) %>%
 hc_colors(viridis(length(unique(CSTotal$Total_Amount)))) 

Total Spending for Montgomery County 2014-2023

ggplot(CSTotal, aes(x = Fiscal.Year, y = Total_Amount, fill = Total_Amount)) +
  geom_col() +
  labs(title = "Montgomery County Total Spending from 2014-2023",
       x = "Fiscal Year", 
       y = "$ Amount",
       fill = "$ Amount") +
  guides()

Facet wrap of each serivce and total spending per year

# Create the plot
ggplot(CS_service_year, aes(x = Fiscal.Year, y = Total_Amount, fill = Total_Amount)) +
  geom_col() +
  scale_fill_gradientn(colors = brewer.pal(9, "Blues")) +
  scale_y_continuous(labels = scales::label_number( big.mark = ",", prefix = "$")) +
  labs(title = "Total Spending Per Service Each Year From 2014-2023",
       x = "Fiscal Year",
       y = "Total Amount ",
       fill = "Total Amount",
       caption = "Source: dataMontgomery") +
  theme_bw() +
  facet_wrap("Service") 

Animation of Total Spending for each Service from 2014-2023

ggplot(CS_service_year, aes(y = Service, x = Total_Amount, fill = Total_Amount)) +
  geom_col() +
  scale_x_continuous(labels = scales::dollar_format(prefix = "$"), expand = c(0, 0)) +
  scale_fill_gradientn(colors = viridis(8)) +
  labs(title = "Total Spending by Service (Year: {frame_time})", 
       x = "Total Amount", 
       y = NULL, fill = "Total Amount") +
  theme_minimal() +
  transition_time(Fiscal.Year) +
  ease_aes('linear') 

library(networkD3)
library(RColorBrewer)

# Create a data frame with the flow data
flows <- data.frame(from = CS_important$Service, 
                    to = CS_important$Department, 
                    value = CS_important$Total_Amount)

# Create a node data frame
nodes <- data.frame(name = unique(c(flows$from, flows$to)))

# Add a column to the flows data frame to indicate the ID of the source and target nodes
flows$IDsource <- match(flows$from, nodes$name) - 1
flows$IDtarget <- match(flows$to, nodes$name) - 1

# Define the color scale for the departments
colorScale <- colorRampPalette(brewer.pal(9, "Set1"))

sankeyNetwork(Links = flows, 
              Nodes = nodes, 
              Source = "IDsource", 
              Target = "IDtarget", 
              Value = "value", 
              NodeID = "name",
              sinksRight = FALSE)
# Create a data frame with the flow data
flows <- data.frame(
  source = CS_sankey$Service, 
  target = CS_sankey$Department, 
  value = CS_sankey$Total_Amount)

# Create a node data frame
nodes <- data.frame(name = unique(c(flows$source, flows$target)))

Add a column to the flows data frame to indicate the ID of the source and target nodes

flows$source_id <- match(flows$source, nodes$name) - 1
flows$target_id <- match(flows$target, nodes$name) - 1
colorScale <- colorRampPalette(brewer.pal(9, "Set1"))

# Create the Sankey diagram using plotly
fig <- plot_ly(
  type = "sankey",
  orientation = "l",
  node = list(
    label = nodes$name,
    color = colorScale(length(nodes$name)),
    pad = 15,
    thickness = 20,
    line = list(color = "black", width = 0.3)), 
    link = list(
    source = flows$source_id,
    target = flows$target_id,
    value = flows$value)) %>% 
  layout(title = "Sankey Diagram",
  font = list(size = 10))

fig