library(pastecs)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:pastecs':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data <- read.csv("C:/Users/atg516/Desktop/Project/HHS_Unaccompanied_Alien_Children_Program.csv")
summary(data)
## Date Children.apprehended.and.placed.in.CBP.custody.
## Length:1015 Min. : 1.00
## Class :character 1st Qu.: 59.75
## Mode :character Median :123.00
## Mean :116.70
## 3rd Qu.:167.00
## Max. :333.00
## NA's :701
## Children.in.CBP.custody Children.transferred.out.of.CBP.custody
## Min. : 7.00 Min. : 0.00
## 1st Qu.: 82.25 1st Qu.: 84.75
## Median :233.50 Median :176.00
## Mean :218.11 Mean :164.52
## 3rd Qu.:300.25 3rd Qu.:227.50
## Max. :531.00 Max. :440.00
## NA's :701 NA's :701
## Children.in.HHS.Care Children.discharged.from.HHS.Care
## Min. : 2109 Min. : 2.0
## 1st Qu.: 6940 1st Qu.:196.0
## Median : 8998 Median :279.0
## Mean : 9196 Mean :283.3
## 3rd Qu.:10766 3rd Qu.:360.0
## Max. :22557 Max. :827.0
##
data_clean <- data %>%
filter(!is.na(Children.discharged.from.HHS.Care) & !is.na(Children.in.HHS.Care))
data_clean <- data_clean %>%
mutate(discharge_rate = Children.discharged.from.HHS.Care / Children.in.HHS.Care)
View(data_clean)
library(pastecs)
stat.desc(data_clean$discharge_rate)
## nbr.val nbr.null nbr.na min max range
## 1.015000e+03 0.000000e+00 0.000000e+00 9.140768e-04 6.640268e-02 6.548860e-02
## sum median mean SE.mean CI.mean.0.95 var
## 3.033856e+01 2.989412e-02 2.989021e-02 3.086596e-04 6.056846e-04 9.669979e-05
## std.dev coef.var
## 9.833605e-03 3.289908e-01
The variable discharge_rate represents the proportion of children discharged from HHS care each day, relative to the total number in custody. The updated dataset includes 1,015 daily records spanning from 2021–2025. The discharge rate ranges from 0.091% to 6.64%, with both the mean and median around 2.99%.
The standard deviation of approximately 0.98% indicates low-to-moderate day-to-day variation in discharge activity. A coefficient of variation of 0.33 shows moderate relative variability. The 95% confidence interval for the mean discharge rate, ranging from 2.93% to 3.05%, provides a range for typical daily efficiency under the program’s operational standards.
library(ggplot2)
ggplot(data_clean, aes(x = discharge_rate)) +
geom_histogram(fill = "mediumorchid", color = "white", bins = 20) +
labs(
title = "Histogram of Daily Discharge Rate",
x = "Discharge Rate (Proportion of Children Discharged)",
y = "Frequency"
) +
theme_minimal()
data_clean <- data_clean %>%
mutate(log_discharge_rate = log(discharge_rate + 1e-6))
ggplot(data_clean, aes(x = log_discharge_rate)) +
geom_histogram(fill = "goldenrod", color = "white", bins = 20) +
labs(
title = "Histogram of Log-Transformed Discharge Rate",
x = "Log of Discharge Rate",
y = "Frequency"
) +
theme_minimal()