Plots

# distribution of total H ------------------------------------------------------

dh %>%
  select(-c(div, btwn.H, within.H)) %>%
  distinct() %>%
  ggplot() +
  geom_freqpoly(aes(x = total.H,
                    color = flow),
                size = 2) +
  ggtitle("Distribution of Theil H",
          "All CBSAs, by residential/flow type") +
  theme(legend.position = "bottom")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# btwn.H and total H scaling with n.divs ---------------------------------------

library(RColorBrewer)
pal <- RColorBrewer::brewer.pal(3, "Set2")

# how many divisions by division type? (deciles)
dh %>%
  tidyaux::split_and_key("div") %>%
  map( ~quantile(.$n.divs, seq(0,1,.1))) %>%
  tidyaux::rbind.key("div") %>% 
  knitr::kable(digits = 0)
div 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
county 1 1 1 1 2 2 3 3 4 6 40
int 1 1 1 2 2 2 2 3 4 6 21
ints.and.us 1 2 3 4 5 6 7 9 11 18 55
plc 2 3 3 4 5 6 9 13 18 35 566
school_dist 1 2 3 5 6 8 10 12 17 32 380
wtr 1 1 1 1 1 2 2 3 3 5 28
# Place and School dist have dramatically higher upper limits

# break out school and Place from full set of plots
school.plc.decompp <-
  dh %>%
  filter(div %in%
           c("plc", "school_dist")) %>%
  pivot_longer(cols = matches("\\.H$"),
               names_to = "H.component",
               values_to = "H.value") %>%
  #filter(H.component %in%            c("btwn.H", "total.H")) %>%
  ggplot(
    aes(x = n.divs,
        y = H.value,
        color = H.component)) +
  geom_point(
    alpha = .7,
    stroke = 0
    ,size = 2
    ,shape = 19) +
  geom_smooth(method='lm',
              formula= y~x) +
  facet_wrap( vars(flow, div),
              scales = "free_x"
  ) +
  scale_color_brewer(type = "qual",
                     palette = "Set2") +
  ggtitle("Total and Btwn-division component Theil H",
          "By CBSAs, for school districts and Places") +
  theme(legend.position = "bottom")


school.plc.decompp

# with upper outliers trimmed:
school.plc.decompp +
  scale_x_continuous(limits = c(0,100))

# for phys divisions
dh %>%
  filter(!div %in%
           c("plc", "school_dist"
             , "county")) %>%
  pivot_longer(cols = matches("\\.H$"),
               names_to = "H.component",
               values_to = "H.value") %>%
  #filter(H.component %in%           c("btwn.H", "total.H")) %>%
  ggplot(
    aes(x = n.divs,
        y = H.value,
        color = H.component)) +
  geom_point(
    alpha = .7,
    stroke = 0
    ,size = 2
    ,shape = 19
    #,shape = 16
    ) +
  geom_smooth(method='lm',
              formula= y~x) +
  facet_wrap( vars(flow, div),
              scales = "free_x"
              ) +
  scale_color_brewer(type = "qual",
                     palette = "Set2") +
  ggtitle("Total and Btwn-division component Theil H",
        "By CBSAs, for Physical divisions") +
  theme(legend.position = "bottom")