htd <- read.csv("C:\\Users\\moore\\OneDrive\\Desktop\\Fall 2023\\Intro to statistics\\project\\Statistics Project\\Statistics Project\\htd_transformed.csv")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.2.3
library(ggrepel)
library(boot)
library(broom)
library(lindia)
## Warning: package 'lindia' was built under R version 4.2.3
library(xts)
## Warning: package 'xts' was built under R version 4.2.3
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
library(tsibble)
## Warning: package 'tsibble' was built under R version 4.2.3
##
## Attaching package: 'tsibble'
## The following object is masked from 'package:zoo':
##
## index
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:tsibble':
##
## interval
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
anova <- aov(CLEARED_COUNT ~ ACTUAL_COUNT, data = htd)
summary(anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## ACTUAL_COUNT 1 256436013 256436013 7635 <2e-16 ***
## Residuals 3096 103980167 33585
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova2 <-aov(JUVENILE_CLEARED_COUNT ~ ACTUAL_COUNT, data = htd)
summary(anova2)
## Df Sum Sq Mean Sq F value Pr(>F)
## ACTUAL_COUNT 1 688205 688205 187.6 <2e-16 ***
## Residuals 3096 11360394 3669
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model2 <- lm(ACTUAL_COUNT ~ JUVENILE_CLEARED_COUNT + CLEARED_COUNT, htd)
model2$coefficients
## (Intercept) JUVENILE_CLEARED_COUNT CLEARED_COUNT
## 61.2066021 -0.3261605 1.7399224
htd |>
ggplot(aes(x = CLEARED_COUNT, y = ACTUAL_COUNT, color = REGION_NAME)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = "Scatter Plot and Regression Lines",
subtitle = "",
x = "CLEARED_COUNT", y = "ACTUAL_COUNT") +
theme_minimal()
## `geom_smooth()` using formula 'y ~ x'

htd |>
ggplot(aes(x = JUVENILE_CLEARED_COUNT, y = ACTUAL_COUNT, color = REGION_NAME)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = "Scatter Plot and Regression Lines",
subtitle = "",
x = "Juvenile clear counts", y = "Actual counts") +
theme_minimal()
## `geom_smooth()` using formula 'y ~ x'

htd$DATA_YEAR <- as.Date(paste(htd$DATA_YEAR, "-01-01", sep = ""), format = "%Y-%m-%d")
htd_1 <- htd |>
select(DATA_YEAR, REGION_NAME, ACTUAL_COUNT, CLEARED_COUNT, JUVENILE_CLEARED_COUNT) |>
distinct()
htd_1 <- htd_1|>
group_by(DATA_YEAR, REGION_NAME) |>
summarise(SUM_ACTUAL_COUNT = sum(ACTUAL_COUNT),
SUM_JUVENILE_CLEARED_COUNT = sum(JUVENILE_CLEARED_COUNT),
SUM_CLEARED_COUNT = sum(CLEARED_COUNT))
## `summarise()` has grouped output by 'DATA_YEAR'. You can override using the
## `.groups` argument.
htd_ts1 <- htd_1 |>
as_tibble(index = DATA_YEAR, key = c("SUM_ACTUAL_COUNT", "SUM_JUVENILE_CLEARED_COUNT", "SUM_CLEARED_COUNT"))
fit_linear_models <- function(region_name) {
region_data <- htd_ts1 %>% filter(REGION_NAME == region_name)
# Linear model for actual counts
actual_counts_model <- lm(SUM_ACTUAL_COUNT ~ DATA_YEAR, data = region_data)
print(paste("Linear model for actual counts in", region_name))
print(summary(actual_counts_model))
# Linear model for cleared counts
cleared_counts_model <- lm(SUM_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
print(paste("Linear model for cleared counts in", region_name))
print(summary(cleared_counts_model))
# Linear model for juvenile cleared counts
juvenile_cleared_counts_model <- lm(SUM_JUVENILE_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
print(paste("Linear model for juvenile cleared counts in", region_name))
print(summary(juvenile_cleared_counts_model))
}
# Apply the function to each region
fit_linear_models("Midwest")
## [1] "Linear model for actual counts in Midwest"
##
## Call:
## lm(formula = SUM_ACTUAL_COUNT ~ DATA_YEAR, data = region_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4919.8 -2583.9 -869.4 2617.9 6868.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -66849.021 25882.883 -2.583 0.0363 *
## DATA_YEAR 4.602 1.505 3.057 0.0184 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4259 on 7 degrees of freedom
## Multiple R-squared: 0.5717, Adjusted R-squared: 0.5105
## F-statistic: 9.345 on 1 and 7 DF, p-value: 0.0184
##
## [1] "Linear model for cleared counts in Midwest"
##
## Call:
## lm(formula = SUM_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4407.8 -3226.3 -834.7 3016.6 5803.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -29984.141 24656.445 -1.216 0.263
## DATA_YEAR 2.141 1.434 1.493 0.179
##
## Residual standard error: 4057 on 7 degrees of freedom
## Multiple R-squared: 0.2415, Adjusted R-squared: 0.1331
## F-statistic: 2.228 on 1 and 7 DF, p-value: 0.1791
##
## [1] "Linear model for juvenile cleared counts in Midwest"
##
## Call:
## lm(formula = SUM_JUVENILE_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63.06 -32.40 -8.58 45.88 72.66
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -141.81680 331.67200 -0.428 0.682
## DATA_YEAR 0.01304 0.01929 0.676 0.521
##
## Residual standard error: 54.58 on 7 degrees of freedom
## Multiple R-squared: 0.06131, Adjusted R-squared: -0.07279
## F-statistic: 0.4572 on 1 and 7 DF, p-value: 0.5206
fit_linear_models("West")
## [1] "Linear model for actual counts in West"
##
## Call:
## lm(formula = SUM_ACTUAL_COUNT ~ DATA_YEAR, data = region_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4739 -2434 -270 3266 6142
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.537e+05 2.512e+04 -6.119 0.000482 ***
## DATA_YEAR 9.916e+00 1.461e+00 6.788 0.000256 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4133 on 7 degrees of freedom
## Multiple R-squared: 0.8681, Adjusted R-squared: 0.8493
## F-statistic: 46.08 on 1 and 7 DF, p-value: 0.000256
##
## [1] "Linear model for cleared counts in West"
##
## Call:
## lm(formula = SUM_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2037.1 -302.3 -160.7 673.5 1790.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.503e+04 7.535e+03 -11.28 9.60e-06 ***
## DATA_YEAR 5.355e+00 4.383e-01 12.22 5.63e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1240 on 7 degrees of freedom
## Multiple R-squared: 0.9552, Adjusted R-squared: 0.9488
## F-statistic: 149.3 on 1 and 7 DF, p-value: 5.631e-06
##
## [1] "Linear model for juvenile cleared counts in West"
##
## Call:
## lm(formula = SUM_JUVENILE_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -395.49 -314.36 -271.52 -96.65 1870.20
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 763.98004 4638.48274 0.165 0.874
## DATA_YEAR -0.02346 0.26980 -0.087 0.933
##
## Residual standard error: 763.3 on 7 degrees of freedom
## Multiple R-squared: 0.001079, Adjusted R-squared: -0.1416
## F-statistic: 0.007562 on 1 and 7 DF, p-value: 0.9331
fit_linear_models("South")
## [1] "Linear model for actual counts in South"
##
## Call:
## lm(formula = SUM_ACTUAL_COUNT ~ DATA_YEAR, data = region_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10267.9 -6994.8 793.4 2711.3 10528.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.527e+05 4.653e+04 -5.431 0.000975 ***
## DATA_YEAR 1.655e+01 2.706e+00 6.116 0.000484 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7656 on 7 degrees of freedom
## Multiple R-squared: 0.8424, Adjusted R-squared: 0.8198
## F-statistic: 37.4 on 1 and 7 DF, p-value: 0.0004835
##
## [1] "Linear model for cleared counts in South"
##
## Call:
## lm(formula = SUM_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6936.4 -3560.9 -911.8 2620.7 9860.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.125e+05 3.235e+04 -3.479 0.01028 *
## DATA_YEAR 7.404e+00 1.882e+00 3.935 0.00564 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5323 on 7 degrees of freedom
## Multiple R-squared: 0.6887, Adjusted R-squared: 0.6442
## F-statistic: 15.48 on 1 and 7 DF, p-value: 0.005639
##
## [1] "Linear model for juvenile cleared counts in South"
##
## Call:
## lm(formula = SUM_JUVENILE_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1019.5 -820.5 -605.2 -355.7 4811.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6487.4069 11897.3594 -0.545 0.602
## DATA_YEAR 0.4606 0.6920 0.666 0.527
##
## Residual standard error: 1958 on 7 degrees of freedom
## Multiple R-squared: 0.05953, Adjusted R-squared: -0.07482
## F-statistic: 0.4431 on 1 and 7 DF, p-value: 0.527
fit_linear_models("Northeast")
## [1] "Linear model for actual counts in Northeast"
##
## Call:
## lm(formula = SUM_ACTUAL_COUNT ~ DATA_YEAR, data = region_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -633.0 -173.4 -77.0 1647.6 468.1 -904.3 -327.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.745e+04 8.414e+03 -2.074 0.0928 .
## DATA_YEAR 1.108e+00 4.795e-01 2.311 0.0688 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 926.9 on 5 degrees of freedom
## Multiple R-squared: 0.5164, Adjusted R-squared: 0.4197
## F-statistic: 5.34 on 1 and 5 DF, p-value: 0.06884
##
## [1] "Linear model for cleared counts in Northeast"
##
## Call:
## lm(formula = SUM_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -313.6 -287.6 469.0 253.0 523.9 -362.2 -282.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3948.1511 3958.0451 -0.998 0.364
## DATA_YEAR 0.2632 0.2256 1.167 0.296
##
## Residual standard error: 436 on 5 degrees of freedom
## Multiple R-squared: 0.214, Adjusted R-squared: 0.05681
## F-statistic: 1.361 on 1 and 5 DF, p-value: 0.2959
##
## [1] "Linear model for juvenile cleared counts in Northeast"
##
## Call:
## lm(formula = SUM_JUVENILE_CLEARED_COUNT ~ DATA_YEAR, data = region_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -4.577 -9.143 -13.721 45.712 -22.854 36.580 -31.998
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -201.03378 294.63139 -0.682 0.525
## DATA_YEAR 0.01251 0.01679 0.745 0.490
##
## Residual standard error: 32.46 on 5 degrees of freedom
## Multiple R-squared: 0.09992, Adjusted R-squared: -0.08009
## F-statistic: 0.5551 on 1 and 5 DF, p-value: 0.4898
htd_ts_midwest <- htd_1 |>
filter(REGION_NAME == "Midwest") |>
as_tsibble(index = DATA_YEAR, key = c("SUM_ACTUAL_COUNT", "SUM_JUVENILE_CLEARED_COUNT", "SUM_CLEARED_COUNT"))
htd_ts_west <- htd_1 |>
filter(REGION_NAME == "West") |>
as_tsibble(index = DATA_YEAR, key = c("SUM_ACTUAL_COUNT", "SUM_JUVENILE_CLEARED_COUNT", "SUM_CLEARED_COUNT"))
htd_ts_south <- htd_1 |>
filter(REGION_NAME == "South") |>
as_tsibble(index = DATA_YEAR, key = c("SUM_ACTUAL_COUNT", "SUM_JUVENILE_CLEARED_COUNT", "SUM_CLEARED_COUNT"))
htd_ts_northeast <- htd_1 |>
filter(REGION_NAME == "Northeast") |>
as_tsibble(index = DATA_YEAR, key = c("SUM_ACTUAL_COUNT", "SUM_JUVENILE_CLEARED_COUNT", "SUM_CLEARED_COUNT"))
htd_ts_midwest %>%
filter(!is.na(SUM_ACTUAL_COUNT)) %>%
index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
summarise(avg_actual_counts = mean(SUM_ACTUAL_COUNT, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = half_year, y = avg_actual_counts)) +
geom_line() +
geom_smooth(span = 0.3, se = FALSE) +
labs(title = "Average Actual Counts Over Time by midwest",
subtitle = "(by half year)") +
scale_x_date(breaks = "1 year", labels = function(x) year(x)) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_midwest %>%
filter(!is.na(SUM_JUVENILE_CLEARED_COUNT)) %>%
index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
summarise(avg_juvenile_counts = mean(SUM_JUVENILE_CLEARED_COUNT, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = half_year, y = avg_juvenile_counts)) +
geom_line() +
geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
labs(title = "Average juvenile cleared Counts Over Time for midwest",
subtitle = "(by half year)") +
scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_midwest %>%
filter(!is.na(SUM_CLEARED_COUNT)) %>%
index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
summarise(avg_cleared_counts = mean(SUM_CLEARED_COUNT, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = half_year, y = avg_cleared_counts)) +
geom_line() +
geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
labs(title = "Average cleared Counts Over Time for midwest",
subtitle = "(by half year)") +
scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_west %>%
filter(!is.na(SUM_ACTUAL_COUNT)) %>%
index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
summarise(avg_actual_counts = mean(SUM_ACTUAL_COUNT, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = half_year, y = avg_actual_counts)) +
geom_line() +
geom_smooth(span = 0.3, se = FALSE) +
labs(title = "Average Actual Counts Over Time by west region",
subtitle = "(by half year)") +
scale_x_date(breaks = "1 year", labels = function(x) year(x)) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_west %>%
filter(!is.na(SUM_JUVENILE_CLEARED_COUNT)) %>%
index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
summarise(avg_juvenile_counts = mean(SUM_JUVENILE_CLEARED_COUNT, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = half_year, y = avg_juvenile_counts)) +
geom_line() +
geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
labs(title = "Average juvenile cleared Counts Over Time for west region",
subtitle = "(by half year)") +
scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_west %>%
filter(!is.na(SUM_CLEARED_COUNT)) %>%
index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
summarise(avg_cleared_counts = mean(SUM_CLEARED_COUNT, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = half_year, y = avg_cleared_counts)) +
geom_line() +
geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
labs(title = "Average cleared Counts Over Time for west region",
subtitle = "(by half year)") +
scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_south %>%
filter(!is.na(SUM_ACTUAL_COUNT)) %>%
index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
summarise(avg_actual_counts = mean(SUM_ACTUAL_COUNT, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = half_year, y = avg_actual_counts)) +
geom_line() +
geom_smooth(span = 0.3, se = FALSE) +
labs(title = "Average Actual Counts Over Time by south region",
subtitle = "(by half year)") +
scale_x_date(breaks = "1 year", labels = function(x) year(x)) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_south %>%
filter(!is.na(SUM_JUVENILE_CLEARED_COUNT)) %>%
index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
summarise(avg_juvenile_counts = mean(SUM_JUVENILE_CLEARED_COUNT, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = half_year, y = avg_juvenile_counts)) +
geom_line() +
geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
labs(title = "Average juvenile cleared Counts Over Time for south region",
subtitle = "(by half year)") +
scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_south %>%
filter(!is.na(SUM_CLEARED_COUNT)) %>%
index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
summarise(avg_cleared_counts = mean(SUM_CLEARED_COUNT, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = half_year, y = avg_cleared_counts)) +
geom_line() +
geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
labs(title = "Average cleared Counts Over Time for south region",
subtitle = "(by half year)") +
scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15691
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 379.61
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.4486e+05

htd_ts_northeast %>%
filter(!is.na(SUM_ACTUAL_COUNT)) %>%
index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
summarise(avg_actual_counts = mean(SUM_ACTUAL_COUNT, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = half_year, y = avg_actual_counts)) +
geom_line() +
geom_smooth(span = 0.3, se = FALSE) +
labs(title = "Average Actual Counts Over Time by northeast region",
subtitle = "(by half year)") +
scale_x_date(breaks = "1 year", labels = function(x) year(x)) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 16425
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 375.96
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.421e+05

htd_ts_northeast %>%
filter(!is.na(SUM_JUVENILE_CLEARED_COUNT)) %>%
index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
summarise(avg_juvenile_counts = mean(SUM_JUVENILE_CLEARED_COUNT, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = half_year, y = avg_juvenile_counts)) +
geom_line() +
geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
labs(title = "Average juvenile cleared Counts Over Time for northeast region",
subtitle = "(by half year)") +
scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 16425
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 375.96
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.421e+05

htd_ts_northeast %>%
filter(!is.na(SUM_CLEARED_COUNT)) %>%
index_by(half_year = floor_date(DATA_YEAR, '6 months')) %>%
summarise(avg_cleared_counts = mean(SUM_CLEARED_COUNT, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = half_year, y = avg_cleared_counts)) +
geom_line() +
geom_smooth(span = 0.3, color = 'blue', se=FALSE) +
labs(title = "Average cleared Counts Over Time for northeast region",
subtitle = "(by half year)") +
scale_x_date(breaks = "1 year", labels = \(x) year(x)) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 16425
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 375.96
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.421e+05
