library(pastecs)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:pastecs':
## 
##     first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data <- read.csv("C:/Users/atg516/Desktop/Project/HHS_Unaccompanied_Alien_Children_Program.csv")
summary(data)
##      Date           Children.apprehended.and.placed.in.CBP.custody.
##  Length:1015        Min.   :  1.00                                 
##  Class :character   1st Qu.: 59.75                                 
##  Mode  :character   Median :123.00                                 
##                     Mean   :116.70                                 
##                     3rd Qu.:167.00                                 
##                     Max.   :333.00                                 
##                     NA's   :701                                    
##  Children.in.CBP.custody Children.transferred.out.of.CBP.custody
##  Min.   :  7.00          Min.   :  0.00                         
##  1st Qu.: 82.25          1st Qu.: 84.75                         
##  Median :233.50          Median :176.00                         
##  Mean   :218.11          Mean   :164.52                         
##  3rd Qu.:300.25          3rd Qu.:227.50                         
##  Max.   :531.00          Max.   :440.00                         
##  NA's   :701             NA's   :701                            
##  Children.in.HHS.Care Children.discharged.from.HHS.Care
##  Min.   : 2109        Min.   :  2.0                    
##  1st Qu.: 6940        1st Qu.:196.0                    
##  Median : 8998        Median :279.0                    
##  Mean   : 9196        Mean   :283.3                    
##  3rd Qu.:10766        3rd Qu.:360.0                    
##  Max.   :22557        Max.   :827.0                    
## 
data_clean <- data %>%
  filter(!is.na(Children.discharged.from.HHS.Care) & !is.na(Children.in.HHS.Care))

data_clean <- data_clean %>%
  mutate(discharge_rate = Children.discharged.from.HHS.Care / Children.in.HHS.Care)
View(data_clean)

library(pastecs)
stat.desc(data_clean$discharge_rate)
##      nbr.val     nbr.null       nbr.na          min          max        range 
## 1.015000e+03 0.000000e+00 0.000000e+00 9.140768e-04 6.640268e-02 6.548860e-02 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
## 3.033856e+01 2.989412e-02 2.989021e-02 3.086596e-04 6.056846e-04 9.669979e-05 
##      std.dev     coef.var 
## 9.833605e-03 3.289908e-01

The variable discharge_rate represents the proportion of children discharged from HHS care each day, relative to the total number in custody. The updated dataset includes 1,015 daily records spanning from 2021–2025. The discharge rate ranges from 0.091% to 6.64%, with both the mean and median around 2.99%.

The standard deviation of approximately 0.98% indicates low-to-moderate day-to-day variation in discharge activity. A coefficient of variation of 0.33 shows moderate relative variability. The 95% confidence interval for the mean discharge rate, ranging from 2.93% to 3.05%, provides a range for typical daily efficiency under the program’s operational standards.

library(ggplot2)

ggplot(data_clean, aes(x = discharge_rate)) +
  geom_histogram(fill = "mediumorchid", color = "white", bins = 20) +
  labs(
    title = "Histogram of Daily Discharge Rate",
    x = "Discharge Rate (Proportion of Children Discharged)",
    y = "Frequency"
  ) +
  theme_minimal()

data_clean <- data_clean %>%
  mutate(log_discharge_rate = log(discharge_rate + 1e-6))

ggplot(data_clean, aes(x = log_discharge_rate)) +
  geom_histogram(fill = "goldenrod", color = "white", bins = 20) +
  labs(
    title = "Histogram of Log-Transformed Discharge Rate",
    x = "Log of Discharge Rate",
    y = "Frequency"
  ) +
  theme_minimal()