Final-Project.knit

htd <- read.csv("C:\\Users\\moore\\OneDrive\\Desktop\\Fall 2023\\Intro to statistics\\project\\Statistics Project\\Statistics Project\\htd_transformed.csv")

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(ggthemes)

## Warning: package 'ggthemes' was built under R version 4.2.3

library(ggrepel)
library(boot)
library(broom)
library(lindia)

## Warning: package 'lindia' was built under R version 4.2.3

library(xts)

## Warning: package 'xts' was built under R version 4.2.3

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

library(tsibble)

## Warning: package 'tsibble' was built under R version 4.2.3

## 
## Attaching package: 'tsibble'

## The following object is masked from 'package:zoo':
## 
##     index

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:tsibble':
## 
##     interval

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

anova <- aov(CLEARED_COUNT ~ ACTUAL_COUNT, data = htd)
summary(anova)

##                Df    Sum Sq   Mean Sq F value Pr(>F)    
## ACTUAL_COUNT    1 256436013 256436013    7635 <2e-16 ***
## Residuals    3096 103980167     33585                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova2 <-aov(JUVENILE_CLEARED_COUNT ~ ACTUAL_COUNT, data = htd)
summary(anova2)

##                Df   Sum Sq Mean Sq F value Pr(>F)    
## ACTUAL_COUNT    1   688205  688205   187.6 <2e-16 ***
## Residuals    3096 11360394    3669                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model2 <- lm(ACTUAL_COUNT ~ JUVENILE_CLEARED_COUNT + CLEARED_COUNT, htd)
model2$coefficients

##            (Intercept) JUVENILE_CLEARED_COUNT          CLEARED_COUNT 
##             61.2066021             -0.3261605              1.7399224

htd |>
  ggplot(aes(x = CLEARED_COUNT, y = ACTUAL_COUNT, color = REGION_NAME)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(title = "Scatter Plot and Regression Lines",
       subtitle = "",
       x = "CLEARED_COUNT", y = "ACTUAL_COUNT") +
  theme_minimal()

## `geom_smooth()` using formula 'y ~ x'

htd |>
  ggplot(aes(x = JUVENILE_CLEARED_COUNT, y = ACTUAL_COUNT, color = REGION_NAME)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(title = "Scatter Plot and Regression Lines",
       subtitle = "",
       x = "Juvenile clear counts", y = "Actual counts") +
  theme_minimal()

## `geom_smooth()` using formula 'y ~ x'

htd$DATA_YEAR <- as.Date(paste(htd$DATA_YEAR, "-01-01", sep = ""), format = "%Y-%m-%d")
htd_1 <- htd |>
  select(DATA_YEAR, REGION_NAME, ACTUAL_COUNT, CLEARED_COUNT, JUVENILE_CLEARED_COUNT) |>
  distinct()
htd_1 <- htd_1|>
  group_by(DATA_YEAR, REGION_NAME) |>
  summarise(SUM_ACTUAL_COUNT = sum(ACTUAL_COUNT),
            SUM_JUVENILE_CLEARED_COUNT = sum(JUVENILE_CLEARED_COUNT),
            SUM_CLEARED_COUNT = sum(CLEARED_COUNT))

## `summarise()` has grouped output by 'DATA_YEAR'. You can override using the
## `.groups` argument.

htd_ts1 <- htd_1 |>
  as_tibble(index = DATA_YEAR, key = c("SUM_ACTUAL_COUNT", "SUM_JUVENILE_CLEARED_COUNT", "SUM_CLEARED_COUNT"))
fit_linear_models <- function(region_name) {
  region_data <- htd_ts1 %>% filter(REGION_NAME == region_name)

  # Linear model for actual counts
  actual_counts_model <- lm(SUM_ACTUAL_COUNT ~ DATA_YEAR, data = region_data)
  print(paste("Linear model for actual counts in", region_name))
  print(summary(actual_counts_model))

  # Linear model for cleared counts
  cleared_counts_model <- lm(SUM_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
  print(paste("Linear model for cleared counts in", region_name))
  print(summary(cleared_counts_model))

  # Linear model for juvenile cleared counts
  juvenile_cleared_counts_model <- lm(SUM_JUVENILE_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
  print(paste("Linear model for juvenile cleared counts in", region_name))
  print(summary(juvenile_cleared_counts_model))
}

# Apply the function to each region
fit_linear_models("Midwest")

## [1] "Linear model for actual counts in Midwest"
## 
## Call:
## lm(formula = SUM_ACTUAL_COUNT ~ DATA_YEAR, data = region_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4919.8 -2583.9  -869.4  2617.9  6868.8 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -66849.021  25882.883  -2.583   0.0363 *
## DATA_YEAR        4.602      1.505   3.057   0.0184 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4259 on 7 degrees of freedom
## Multiple R-squared:  0.5717, Adjusted R-squared:  0.5105 
## F-statistic: 9.345 on 1 and 7 DF,  p-value: 0.0184
## 
## [1] "Linear model for cleared counts in Midwest"
## 
## Call:
## lm(formula = SUM_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4407.8 -3226.3  -834.7  3016.6  5803.9 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -29984.141  24656.445  -1.216    0.263
## DATA_YEAR        2.141      1.434   1.493    0.179
## 
## Residual standard error: 4057 on 7 degrees of freedom
## Multiple R-squared:  0.2415, Adjusted R-squared:  0.1331 
## F-statistic: 2.228 on 1 and 7 DF,  p-value: 0.1791
## 
## [1] "Linear model for juvenile cleared counts in Midwest"
## 
## Call:
## lm(formula = SUM_JUVENILE_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -63.06 -32.40  -8.58  45.88  72.66 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -141.81680  331.67200  -0.428    0.682
## DATA_YEAR      0.01304    0.01929   0.676    0.521
## 
## Residual standard error: 54.58 on 7 degrees of freedom
## Multiple R-squared:  0.06131,    Adjusted R-squared:  -0.07279 
## F-statistic: 0.4572 on 1 and 7 DF,  p-value: 0.5206

fit_linear_models("West")

## [1] "Linear model for actual counts in West"
## 
## Call:
## lm(formula = SUM_ACTUAL_COUNT ~ DATA_YEAR, data = region_data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -4739  -2434   -270   3266   6142 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.537e+05  2.512e+04  -6.119 0.000482 ***
## DATA_YEAR    9.916e+00  1.461e+00   6.788 0.000256 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4133 on 7 degrees of freedom
## Multiple R-squared:  0.8681, Adjusted R-squared:  0.8493 
## F-statistic: 46.08 on 1 and 7 DF,  p-value: 0.000256
## 
## [1] "Linear model for cleared counts in West"
## 
## Call:
## lm(formula = SUM_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2037.1  -302.3  -160.7   673.5  1790.8 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.503e+04  7.535e+03  -11.28 9.60e-06 ***
## DATA_YEAR    5.355e+00  4.383e-01   12.22 5.63e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1240 on 7 degrees of freedom
## Multiple R-squared:  0.9552, Adjusted R-squared:  0.9488 
## F-statistic: 149.3 on 1 and 7 DF,  p-value: 5.631e-06
## 
## [1] "Linear model for juvenile cleared counts in West"
## 
## Call:
## lm(formula = SUM_JUVENILE_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -395.49 -314.36 -271.52  -96.65 1870.20 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  763.98004 4638.48274   0.165    0.874
## DATA_YEAR     -0.02346    0.26980  -0.087    0.933
## 
## Residual standard error: 763.3 on 7 degrees of freedom
## Multiple R-squared:  0.001079,   Adjusted R-squared:  -0.1416 
## F-statistic: 0.007562 on 1 and 7 DF,  p-value: 0.9331

fit_linear_models("South")

## [1] "Linear model for actual counts in South"
## 
## Call:
## lm(formula = SUM_ACTUAL_COUNT ~ DATA_YEAR, data = region_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10267.9  -6994.8    793.4   2711.3  10528.3 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.527e+05  4.653e+04  -5.431 0.000975 ***
## DATA_YEAR    1.655e+01  2.706e+00   6.116 0.000484 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7656 on 7 degrees of freedom
## Multiple R-squared:  0.8424, Adjusted R-squared:  0.8198 
## F-statistic:  37.4 on 1 and 7 DF,  p-value: 0.0004835
## 
## [1] "Linear model for cleared counts in South"
## 
## Call:
## lm(formula = SUM_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6936.4 -3560.9  -911.8  2620.7  9860.0 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -1.125e+05  3.235e+04  -3.479  0.01028 * 
## DATA_YEAR    7.404e+00  1.882e+00   3.935  0.00564 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5323 on 7 degrees of freedom
## Multiple R-squared:  0.6887, Adjusted R-squared:  0.6442 
## F-statistic: 15.48 on 1 and 7 DF,  p-value: 0.005639
## 
## [1] "Linear model for juvenile cleared counts in South"
## 
## Call:
## lm(formula = SUM_JUVENILE_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1019.5  -820.5  -605.2  -355.7  4811.7 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6487.4069 11897.3594  -0.545    0.602
## DATA_YEAR       0.4606     0.6920   0.666    0.527
## 
## Residual standard error: 1958 on 7 degrees of freedom
## Multiple R-squared:  0.05953,    Adjusted R-squared:  -0.07482 
## F-statistic: 0.4431 on 1 and 7 DF,  p-value: 0.527

fit_linear_models("Northeast")

## [1] "Linear model for actual counts in Northeast"
## 
## Call:
## lm(formula = SUM_ACTUAL_COUNT ~ DATA_YEAR, data = region_data)
## 
## Residuals:
##      1      2      3      4      5      6      7 
## -633.0 -173.4  -77.0 1647.6  468.1 -904.3 -327.9 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -1.745e+04  8.414e+03  -2.074   0.0928 .
## DATA_YEAR    1.108e+00  4.795e-01   2.311   0.0688 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 926.9 on 5 degrees of freedom
## Multiple R-squared:  0.5164, Adjusted R-squared:  0.4197 
## F-statistic:  5.34 on 1 and 5 DF,  p-value: 0.06884
## 
## [1] "Linear model for cleared counts in Northeast"
## 
## Call:
## lm(formula = SUM_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
## 
## Residuals:
##      1      2      3      4      5      6      7 
## -313.6 -287.6  469.0  253.0  523.9 -362.2 -282.5 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3948.1511  3958.0451  -0.998    0.364
## DATA_YEAR       0.2632     0.2256   1.167    0.296
## 
## Residual standard error: 436 on 5 degrees of freedom
## Multiple R-squared:  0.214,  Adjusted R-squared:  0.05681 
## F-statistic: 1.361 on 1 and 5 DF,  p-value: 0.2959
## 
## [1] "Linear model for juvenile cleared counts in Northeast"
## 
## Call:
## lm(formula = SUM_JUVENILE_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
##  -4.577  -9.143 -13.721  45.712 -22.854  36.580 -31.998 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -201.03378  294.63139  -0.682    0.525
## DATA_YEAR      0.01251    0.01679   0.745    0.490
## 
## Residual standard error: 32.46 on 5 degrees of freedom
## Multiple R-squared:  0.09992,    Adjusted R-squared:  -0.08009 
## F-statistic: 0.5551 on 1 and 5 DF,  p-value: 0.4898

htd_ts_midwest <- htd_1 |>
  filter(REGION_NAME == "Midwest") |>
  as_tsibble(index = DATA_YEAR, key = c("SUM_ACTUAL_COUNT", "SUM_JUVENILE_CLEARED_COUNT", "SUM_CLEARED_COUNT"))
htd_ts_west <- htd_1 |>
  filter(REGION_NAME == "West") |>
  as_tsibble(index = DATA_YEAR, key = c("SUM_ACTUAL_COUNT", "SUM_JUVENILE_CLEARED_COUNT", "SUM_CLEARED_COUNT"))
htd_ts_south <- htd_1 |>
  filter(REGION_NAME == "South") |>
  as_tsibble(index = DATA_YEAR, key = c("SUM_ACTUAL_COUNT", "SUM_JUVENILE_CLEARED_COUNT", "SUM_CLEARED_COUNT"))
htd_ts_northeast <- htd_1 |>
  filter(REGION_NAME == "Northeast") |>
  as_tsibble(index = DATA_YEAR, key = c("SUM_ACTUAL_COUNT", "SUM_JUVENILE_CLEARED_COUNT", "SUM_CLEARED_COUNT"))

htd_ts_midwest %>%
  filter(!is.na(SUM_ACTUAL_COUNT)) %>%
  index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
  summarise(avg_actual_counts = mean(SUM_ACTUAL_COUNT, na.rm = TRUE)) %>%
  ggplot(mapping = aes(x = half_year, y = avg_actual_counts)) +
  geom_line() +
  geom_smooth(span = 0.3, se = FALSE) +
  labs(title = "Average Actual Counts Over Time by midwest",
       subtitle = "(by half year)") +
  scale_x_date(breaks = "1 year", labels = function(x) year(x)) +
  theme_minimal()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_midwest %>%
  filter(!is.na(SUM_JUVENILE_CLEARED_COUNT)) %>%
  index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
  summarise(avg_juvenile_counts = mean(SUM_JUVENILE_CLEARED_COUNT, na.rm = TRUE)) %>%
  ggplot(mapping = aes(x = half_year, y = avg_juvenile_counts)) +
  geom_line() +
  geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
  labs(title = "Average juvenile cleared Counts Over Time for midwest",
       subtitle = "(by half year)") +
  scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
  theme_minimal()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_midwest %>%
  filter(!is.na(SUM_CLEARED_COUNT)) %>%
  index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
  summarise(avg_cleared_counts = mean(SUM_CLEARED_COUNT, na.rm = TRUE)) %>%
  ggplot(mapping = aes(x = half_year, y = avg_cleared_counts)) +
  geom_line() +
  geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
  labs(title = "Average  cleared Counts Over Time for midwest",
       subtitle = "(by half year)") +
  scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
  theme_minimal()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_west %>%
  filter(!is.na(SUM_ACTUAL_COUNT)) %>%
  index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
  summarise(avg_actual_counts = mean(SUM_ACTUAL_COUNT, na.rm = TRUE)) %>%
  ggplot(mapping = aes(x = half_year, y = avg_actual_counts)) +
  geom_line() +
  geom_smooth(span = 0.3, se = FALSE) +
  labs(title = "Average Actual Counts Over Time by west region",
       subtitle = "(by half year)") +
  scale_x_date(breaks = "1 year", labels = function(x) year(x)) +
  theme_minimal()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_west %>%
  filter(!is.na(SUM_JUVENILE_CLEARED_COUNT)) %>%
  index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
  summarise(avg_juvenile_counts = mean(SUM_JUVENILE_CLEARED_COUNT, na.rm = TRUE)) %>%
  ggplot(mapping = aes(x = half_year, y = avg_juvenile_counts)) +
  geom_line() +
  geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
  labs(title = "Average juvenile cleared Counts Over Time for west region",
       subtitle = "(by half year)") +
  scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
  theme_minimal()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_west %>%
  filter(!is.na(SUM_CLEARED_COUNT)) %>%
  index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
  summarise(avg_cleared_counts = mean(SUM_CLEARED_COUNT, na.rm = TRUE)) %>%
  ggplot(mapping = aes(x = half_year, y = avg_cleared_counts)) +
  geom_line() +
  geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
  labs(title = "Average  cleared Counts Over Time for west region",
       subtitle = "(by half year)") +
  scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
  theme_minimal()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_south %>%
  filter(!is.na(SUM_ACTUAL_COUNT)) %>%
  index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
  summarise(avg_actual_counts = mean(SUM_ACTUAL_COUNT, na.rm = TRUE)) %>%
  ggplot(mapping = aes(x = half_year, y = avg_actual_counts)) +
  geom_line() +
  geom_smooth(span = 0.3, se = FALSE) +
  labs(title = "Average Actual Counts Over Time by south region",
       subtitle = "(by half year)") +
  scale_x_date(breaks = "1 year", labels = function(x) year(x)) +
  theme_minimal()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_south %>%
  filter(!is.na(SUM_JUVENILE_CLEARED_COUNT)) %>%
  index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
  summarise(avg_juvenile_counts = mean(SUM_JUVENILE_CLEARED_COUNT, na.rm = TRUE)) %>%
  ggplot(mapping = aes(x = half_year, y = avg_juvenile_counts)) +
  geom_line() +
  geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
  labs(title = "Average juvenile cleared Counts Over Time for south region",
       subtitle = "(by half year)") +
  scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
  theme_minimal()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_south %>%
  filter(!is.na(SUM_CLEARED_COUNT)) %>%
  index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
  summarise(avg_cleared_counts = mean(SUM_CLEARED_COUNT, na.rm = TRUE)) %>%
  ggplot(mapping = aes(x = half_year, y = avg_cleared_counts)) +
  geom_line() +
  geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
  labs(title = "Average  cleared Counts Over Time for south region",
       subtitle = "(by half year)") +
  scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
  theme_minimal()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_northeast %>%
  filter(!is.na(SUM_ACTUAL_COUNT)) %>%
  index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
  summarise(avg_actual_counts = mean(SUM_ACTUAL_COUNT, na.rm = TRUE)) %>%
  ggplot(mapping = aes(x = half_year, y = avg_actual_counts)) +
  geom_line() +
  geom_smooth(span = 0.3, se = FALSE) +
  labs(title = "Average Actual Counts Over Time by northeast region",
       subtitle = "(by half year)") +
  scale_x_date(breaks = "1 year", labels = function(x) year(x)) +
  theme_minimal()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 16425

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 375.96

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.421e+05

htd_ts_northeast %>%
  filter(!is.na(SUM_JUVENILE_CLEARED_COUNT)) %>%
  index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
  summarise(avg_juvenile_counts = mean(SUM_JUVENILE_CLEARED_COUNT, na.rm = TRUE)) %>%
  ggplot(mapping = aes(x = half_year, y = avg_juvenile_counts)) +
  geom_line() +
  geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
  labs(title = "Average juvenile cleared Counts Over Time for northeast region",
       subtitle = "(by half year)") +
  scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
  theme_minimal()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 16425

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 375.96

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.421e+05

htd_ts_northeast %>%
  filter(!is.na(SUM_CLEARED_COUNT)) %>%
  index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
  summarise(avg_cleared_counts = mean(SUM_CLEARED_COUNT, na.rm = TRUE)) %>%
  ggplot(mapping = aes(x = half_year, y = avg_cleared_counts)) +
  geom_line() +
  geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
  labs(title = "Average  cleared Counts Over Time for northeast region",
       subtitle = "(by half year)") +
  scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
  theme_minimal()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 16425

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 375.96

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.421e+05