This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

Warning messages:
1: In normalizePath(quartoSrcFile, winslash = "/") :
  path[1]="": No such file or directory
2: In normalizePath(quartoSrcFile, winslash = "/") :
  path[1]="": No such file or directory
3: In normalizePath(quartoSrcFile, winslash = "/") :
  path[1]="": No such file or directory
4: In normalizePath(quartoSrcFile, winslash = "/") :
  path[1]="": No such file or directory
5: In normalizePath(quartoSrcFile, winslash = "/") :
  path[1]="": No such file or directory
6: In normalizePath(quartoSrcFile, winslash = "/") :
  path[1]="": No such file or directory
# Load required libraries
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(scales)
library(caret)
Loading required package: lattice
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
# Load the dataset
df <- read.csv("~/Downloads/archive 2/children-in-foster-care-annually-beginning-1994.csv")

# Data preprocessing
df$Year <- as.integer(df$Year)
df$Total_Days_In_Care <- as.integer(df$Total.Days.In.Care)
df$Admissions <- as.integer(df$Admissions)
df$Discharges <- as.integer(df$Discharges)
df$Children_In_Care <- as.integer(df$Children.In.Care)
df$Indicated_CPS_Reports <- as.integer(df$Indicated.CPS.Reports)

# Exploratory Data Analysis (EDA)

# Line plot for trends in foster care placements and CPS reports
trends_data <- df %>%
  group_by(Year) %>%
  summarize(Total_Admissions = sum(Admissions),
            Total_Discharges = sum(Discharges),
            Total_Children_In_Care = sum(Children_In_Care),
            Total_Indicated_CPS_Reports = sum(Indicated_CPS_Reports))

ggplot(trends_data, aes(x = Year)) +
  geom_line(aes(y = Total_Admissions, color = "Admissions"), linewidth = 1) +
  geom_line(aes(y = Total_Discharges, color = "Discharges"), linewidth = 1) +
  geom_line(aes(y = Total_Children_In_Care, color = "Children In Care"), linewidth = 1) +
  geom_line(aes(y = Total_Indicated_CPS_Reports, color = "Indicated CPS Reports"), linewidth = 1) +
  scale_color_manual(values = c("Admissions" = "blue", "Discharges" = "green",
                                "Children In Care" = "red", "Indicated CPS Reports" = "purple")) +
  labs(title = "Trends in Foster Care Placements and CPS Reports",
       x = "Year", y = "Count", color = "Variable") +
  theme_minimal() +
  theme(legend.position = "bottom") +
  coord_cartesian(ylim = c(0, max(trends_data$Total_Children_In_Care) * 1.1))


# Bar plot for distribution of foster care placement types
placement_types <- df %>%
  select(Year, Adoptive.Home, Agency.Operated.Boarding.Home, Approved.Relative.Home,
         Foster.Boarding.Home, Group.Home, Group.Residence, Institution,
         Supervised.Independent.Living, Other) %>%
  pivot_longer(cols = -Year, names_to = "Placement_Type", values_to = "Total_Days") %>%
  group_by(Placement_Type) %>%
  summarize(Total_Days = sum(Total_Days))

ggplot(placement_types, aes(x = Placement_Type, y = Total_Days, fill = Placement_Type)) +
  geom_bar(stat = "identity") +
  scale_fill_brewer(palette = "Set3") +
  labs(title = "Distribution of Foster Care Placement Types",
       x = "Placement Type", y = "Total Days", fill = "Placement Type") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") +
  coord_flip()


# Histogram for geographical disparities in average number of children in care
geo_data <- df %>%
  group_by(County) %>%
  summarize(Avg_Children_In_Care = mean(Children_In_Care))

ggplot(geo_data, aes(x = Avg_Children_In_Care, fill = ..count..)) +
  geom_histogram(binwidth = 20, color = "white", boundary = 0) +
  scale_fill_gradient(low = "lightblue", high = "darkblue", name = "Frequency") +
  labs(title = "Distribution of Average Number of Children in Care by County",
       x = "Average Number of Children in Care",
       y = "Number of Counties") +
  theme_minimal() +
  theme(plot.title = element_text(size = 14, face = "bold"),
        axis.title = element_text(size = 12),
        legend.title = element_text(size = 10),
        legend.position = "right") +
  coord_cartesian(xlim = c(0, quantile(geo_data$Avg_Children_In_Care, 0.90)))


# Machine Learning Model

# Prepare the data for modeling
model_data <- df %>%
  select(Children_In_Care, Admissions, Discharges, Indicated_CPS_Reports)

# Split the data into training and testing sets
set.seed(123)
train_index <- createDataPartition(model_data$Children_In_Care, p = 0.8, list = FALSE)
train_data <- model_data[train_index, ]
test_data <- model_data[-train_index, ]

# Train the linear regression model
lm_model <- lm(Children_In_Care ~ Admissions + Discharges + Indicated_CPS_Reports, data = train_data)

# Evaluate the model
predictions <- predict(lm_model, newdata = test_data)
r_squared <- summary(lm_model)$r.squared
mse <- mean((test_data$Children_In_Care - predictions)^2)

cat("R-squared:", r_squared, "\n")
R-squared: 0.9867581 
cat("Mean Squared Error (MSE):", mse, "\n")
Mean Squared Error (MSE): NA 
# Print the model coefficients
print(summary(lm_model))

Call:
lm(formula = Children_In_Care ~ Admissions + Discharges + Indicated_CPS_Reports, 
    data = train_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-3614.3   -39.2    22.6    60.4  6728.4 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)    
(Intercept)           -36.134071  10.684469  -3.382 0.000744 ***
Admissions              1.540466   0.079870  19.287  < 2e-16 ***
Discharges              2.227439   0.061386  36.286  < 2e-16 ***
Indicated_CPS_Reports  -0.443983   0.009712 -45.717  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 347.7 on 1166 degrees of freedom
  (11 observations deleted due to missingness)
Multiple R-squared:  0.9868,    Adjusted R-squared:  0.9867 
F-statistic: 2.896e+04 on 3 and 1166 DF,  p-value: < 2.2e-16

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

LS0tCnRpdGxlOiAiRmluYWwgUHJvamVjdCIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ21kK1NoaWZ0K0VudGVyKi4gCgpgYGB7cn0KCiMgTG9hZCByZXF1aXJlZCBsaWJyYXJpZXMKbGlicmFyeShkcGx5cikKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHRpZHlyKQpsaWJyYXJ5KHNjYWxlcykKbGlicmFyeShjYXJldCkKCiMgTG9hZCB0aGUgZGF0YXNldApkZiA8LSByZWFkLmNzdigifi9Eb3dubG9hZHMvYXJjaGl2ZSAyL2NoaWxkcmVuLWluLWZvc3Rlci1jYXJlLWFubnVhbGx5LWJlZ2lubmluZy0xOTk0LmNzdiIpCgojIERhdGEgcHJlcHJvY2Vzc2luZwpkZiRZZWFyIDwtIGFzLmludGVnZXIoZGYkWWVhcikKZGYkVG90YWxfRGF5c19Jbl9DYXJlIDwtIGFzLmludGVnZXIoZGYkVG90YWwuRGF5cy5Jbi5DYXJlKQpkZiRBZG1pc3Npb25zIDwtIGFzLmludGVnZXIoZGYkQWRtaXNzaW9ucykKZGYkRGlzY2hhcmdlcyA8LSBhcy5pbnRlZ2VyKGRmJERpc2NoYXJnZXMpCmRmJENoaWxkcmVuX0luX0NhcmUgPC0gYXMuaW50ZWdlcihkZiRDaGlsZHJlbi5Jbi5DYXJlKQpkZiRJbmRpY2F0ZWRfQ1BTX1JlcG9ydHMgPC0gYXMuaW50ZWdlcihkZiRJbmRpY2F0ZWQuQ1BTLlJlcG9ydHMpCgojIEV4cGxvcmF0b3J5IERhdGEgQW5hbHlzaXMgKEVEQSkKCiMgTGluZSBwbG90IGZvciB0cmVuZHMgaW4gZm9zdGVyIGNhcmUgcGxhY2VtZW50cyBhbmQgQ1BTIHJlcG9ydHMKdHJlbmRzX2RhdGEgPC0gZGYgJT4lCiAgZ3JvdXBfYnkoWWVhcikgJT4lCiAgc3VtbWFyaXplKFRvdGFsX0FkbWlzc2lvbnMgPSBzdW0oQWRtaXNzaW9ucyksCiAgICAgICAgICAgIFRvdGFsX0Rpc2NoYXJnZXMgPSBzdW0oRGlzY2hhcmdlcyksCiAgICAgICAgICAgIFRvdGFsX0NoaWxkcmVuX0luX0NhcmUgPSBzdW0oQ2hpbGRyZW5fSW5fQ2FyZSksCiAgICAgICAgICAgIFRvdGFsX0luZGljYXRlZF9DUFNfUmVwb3J0cyA9IHN1bShJbmRpY2F0ZWRfQ1BTX1JlcG9ydHMpKQoKZ2dwbG90KHRyZW5kc19kYXRhLCBhZXMoeCA9IFllYXIpKSArCiAgZ2VvbV9saW5lKGFlcyh5ID0gVG90YWxfQWRtaXNzaW9ucywgY29sb3IgPSAiQWRtaXNzaW9ucyIpLCBsaW5ld2lkdGggPSAxKSArCiAgZ2VvbV9saW5lKGFlcyh5ID0gVG90YWxfRGlzY2hhcmdlcywgY29sb3IgPSAiRGlzY2hhcmdlcyIpLCBsaW5ld2lkdGggPSAxKSArCiAgZ2VvbV9saW5lKGFlcyh5ID0gVG90YWxfQ2hpbGRyZW5fSW5fQ2FyZSwgY29sb3IgPSAiQ2hpbGRyZW4gSW4gQ2FyZSIpLCBsaW5ld2lkdGggPSAxKSArCiAgZ2VvbV9saW5lKGFlcyh5ID0gVG90YWxfSW5kaWNhdGVkX0NQU19SZXBvcnRzLCBjb2xvciA9ICJJbmRpY2F0ZWQgQ1BTIFJlcG9ydHMiKSwgbGluZXdpZHRoID0gMSkgKwogIHNjYWxlX2NvbG9yX21hbnVhbCh2YWx1ZXMgPSBjKCJBZG1pc3Npb25zIiA9ICJibHVlIiwgIkRpc2NoYXJnZXMiID0gImdyZWVuIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiQ2hpbGRyZW4gSW4gQ2FyZSIgPSAicmVkIiwgIkluZGljYXRlZCBDUFMgUmVwb3J0cyIgPSAicHVycGxlIikpICsKICBsYWJzKHRpdGxlID0gIlRyZW5kcyBpbiBGb3N0ZXIgQ2FyZSBQbGFjZW1lbnRzIGFuZCBDUFMgUmVwb3J0cyIsCiAgICAgICB4ID0gIlllYXIiLCB5ID0gIkNvdW50IiwgY29sb3IgPSAiVmFyaWFibGUiKSArCiAgdGhlbWVfbWluaW1hbCgpICsKICB0aGVtZShsZWdlbmQucG9zaXRpb24gPSAiYm90dG9tIikgKwogIGNvb3JkX2NhcnRlc2lhbih5bGltID0gYygwLCBtYXgodHJlbmRzX2RhdGEkVG90YWxfQ2hpbGRyZW5fSW5fQ2FyZSkgKiAxLjEpKQoKIyBCYXIgcGxvdCBmb3IgZGlzdHJpYnV0aW9uIG9mIGZvc3RlciBjYXJlIHBsYWNlbWVudCB0eXBlcwpwbGFjZW1lbnRfdHlwZXMgPC0gZGYgJT4lCiAgc2VsZWN0KFllYXIsIEFkb3B0aXZlLkhvbWUsIEFnZW5jeS5PcGVyYXRlZC5Cb2FyZGluZy5Ib21lLCBBcHByb3ZlZC5SZWxhdGl2ZS5Ib21lLAogICAgICAgICBGb3N0ZXIuQm9hcmRpbmcuSG9tZSwgR3JvdXAuSG9tZSwgR3JvdXAuUmVzaWRlbmNlLCBJbnN0aXR1dGlvbiwKICAgICAgICAgU3VwZXJ2aXNlZC5JbmRlcGVuZGVudC5MaXZpbmcsIE90aGVyKSAlPiUKICBwaXZvdF9sb25nZXIoY29scyA9IC1ZZWFyLCBuYW1lc190byA9ICJQbGFjZW1lbnRfVHlwZSIsIHZhbHVlc190byA9ICJUb3RhbF9EYXlzIikgJT4lCiAgZ3JvdXBfYnkoUGxhY2VtZW50X1R5cGUpICU+JQogIHN1bW1hcml6ZShUb3RhbF9EYXlzID0gc3VtKFRvdGFsX0RheXMpKQoKZ2dwbG90KHBsYWNlbWVudF90eXBlcywgYWVzKHggPSBQbGFjZW1lbnRfVHlwZSwgeSA9IFRvdGFsX0RheXMsIGZpbGwgPSBQbGFjZW1lbnRfVHlwZSkpICsKICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikgKwogIHNjYWxlX2ZpbGxfYnJld2VyKHBhbGV0dGUgPSAiU2V0MyIpICsKICBsYWJzKHRpdGxlID0gIkRpc3RyaWJ1dGlvbiBvZiBGb3N0ZXIgQ2FyZSBQbGFjZW1lbnQgVHlwZXMiLAogICAgICAgeCA9ICJQbGFjZW1lbnQgVHlwZSIsIHkgPSAiVG90YWwgRGF5cyIsIGZpbGwgPSAiUGxhY2VtZW50IFR5cGUiKSArCiAgdGhlbWVfbWluaW1hbCgpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LCBoanVzdCA9IDEpLCBsZWdlbmQucG9zaXRpb24gPSAibm9uZSIpICsKICBjb29yZF9mbGlwKCkKCiMgSGlzdG9ncmFtIGZvciBnZW9ncmFwaGljYWwgZGlzcGFyaXRpZXMgaW4gYXZlcmFnZSBudW1iZXIgb2YgY2hpbGRyZW4gaW4gY2FyZQpnZW9fZGF0YSA8LSBkZiAlPiUKICBncm91cF9ieShDb3VudHkpICU+JQogIHN1bW1hcml6ZShBdmdfQ2hpbGRyZW5fSW5fQ2FyZSA9IG1lYW4oQ2hpbGRyZW5fSW5fQ2FyZSkpCgpnZ3Bsb3QoZ2VvX2RhdGEsIGFlcyh4ID0gQXZnX0NoaWxkcmVuX0luX0NhcmUsIGZpbGwgPSAuLmNvdW50Li4pKSArCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGggPSAyMCwgY29sb3IgPSAid2hpdGUiLCBib3VuZGFyeSA9IDApICsKICBzY2FsZV9maWxsX2dyYWRpZW50KGxvdyA9ICJsaWdodGJsdWUiLCBoaWdoID0gImRhcmtibHVlIiwgbmFtZSA9ICJGcmVxdWVuY3kiKSArCiAgbGFicyh0aXRsZSA9ICJEaXN0cmlidXRpb24gb2YgQXZlcmFnZSBOdW1iZXIgb2YgQ2hpbGRyZW4gaW4gQ2FyZSBieSBDb3VudHkiLAogICAgICAgeCA9ICJBdmVyYWdlIE51bWJlciBvZiBDaGlsZHJlbiBpbiBDYXJlIiwKICAgICAgIHkgPSAiTnVtYmVyIG9mIENvdW50aWVzIikgKwogIHRoZW1lX21pbmltYWwoKSArCiAgdGhlbWUocGxvdC50aXRsZSA9IGVsZW1lbnRfdGV4dChzaXplID0gMTQsIGZhY2UgPSAiYm9sZCIpLAogICAgICAgIGF4aXMudGl0bGUgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDEyKSwKICAgICAgICBsZWdlbmQudGl0bGUgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDEwKSwKICAgICAgICBsZWdlbmQucG9zaXRpb24gPSAicmlnaHQiKSArCiAgY29vcmRfY2FydGVzaWFuKHhsaW0gPSBjKDAsIHF1YW50aWxlKGdlb19kYXRhJEF2Z19DaGlsZHJlbl9Jbl9DYXJlLCAwLjkwKSkpCgojIE1hY2hpbmUgTGVhcm5pbmcgTW9kZWwKCiMgUHJlcGFyZSB0aGUgZGF0YSBmb3IgbW9kZWxpbmcKbW9kZWxfZGF0YSA8LSBkZiAlPiUKICBzZWxlY3QoQ2hpbGRyZW5fSW5fQ2FyZSwgQWRtaXNzaW9ucywgRGlzY2hhcmdlcywgSW5kaWNhdGVkX0NQU19SZXBvcnRzKQoKIyBTcGxpdCB0aGUgZGF0YSBpbnRvIHRyYWluaW5nIGFuZCB0ZXN0aW5nIHNldHMKc2V0LnNlZWQoMTIzKQp0cmFpbl9pbmRleCA8LSBjcmVhdGVEYXRhUGFydGl0aW9uKG1vZGVsX2RhdGEkQ2hpbGRyZW5fSW5fQ2FyZSwgcCA9IDAuOCwgbGlzdCA9IEZBTFNFKQp0cmFpbl9kYXRhIDwtIG1vZGVsX2RhdGFbdHJhaW5faW5kZXgsIF0KdGVzdF9kYXRhIDwtIG1vZGVsX2RhdGFbLXRyYWluX2luZGV4LCBdCgojIFRyYWluIHRoZSBsaW5lYXIgcmVncmVzc2lvbiBtb2RlbApsbV9tb2RlbCA8LSBsbShDaGlsZHJlbl9Jbl9DYXJlIH4gQWRtaXNzaW9ucyArIERpc2NoYXJnZXMgKyBJbmRpY2F0ZWRfQ1BTX1JlcG9ydHMsIGRhdGEgPSB0cmFpbl9kYXRhKQoKIyBFdmFsdWF0ZSB0aGUgbW9kZWwKcHJlZGljdGlvbnMgPC0gcHJlZGljdChsbV9tb2RlbCwgbmV3ZGF0YSA9IHRlc3RfZGF0YSkKcl9zcXVhcmVkIDwtIHN1bW1hcnkobG1fbW9kZWwpJHIuc3F1YXJlZAptc2UgPC0gbWVhbigodGVzdF9kYXRhJENoaWxkcmVuX0luX0NhcmUgLSBwcmVkaWN0aW9ucyleMikKCmNhdCgiUi1zcXVhcmVkOiIsIHJfc3F1YXJlZCwgIlxuIikKY2F0KCJNZWFuIFNxdWFyZWQgRXJyb3IgKE1TRSk6IiwgbXNlLCAiXG4iKQoKIyBQcmludCB0aGUgbW9kZWwgY29lZmZpY2llbnRzCnByaW50KHN1bW1hcnkobG1fbW9kZWwpKQoKCgoKYGBgCgpBZGQgYSBuZXcgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpJbnNlcnQgQ2h1bmsqIGJ1dHRvbiBvbiB0aGUgdG9vbGJhciBvciBieSBwcmVzc2luZyAqQ21kK09wdGlvbitJKi4KCldoZW4geW91IHNhdmUgdGhlIG5vdGVib29rLCBhbiBIVE1MIGZpbGUgY29udGFpbmluZyB0aGUgY29kZSBhbmQgb3V0cHV0IHdpbGwgYmUgc2F2ZWQgYWxvbmdzaWRlIGl0IChjbGljayB0aGUgKlByZXZpZXcqIGJ1dHRvbiBvciBwcmVzcyAqQ21kK1NoaWZ0K0sqIHRvIHByZXZpZXcgdGhlIEhUTUwgZmlsZSkuIAoKVGhlIHByZXZpZXcgc2hvd3MgeW91IGEgcmVuZGVyZWQgSFRNTCBjb3B5IG9mIHRoZSBjb250ZW50cyBvZiB0aGUgZWRpdG9yLiBDb25zZXF1ZW50bHksIHVubGlrZSAqS25pdCosICpQcmV2aWV3KiBkb2VzIG5vdCBydW4gYW55IFIgY29kZSBjaHVua3MuIEluc3RlYWQsIHRoZSBvdXRwdXQgb2YgdGhlIGNodW5rIHdoZW4gaXQgd2FzIGxhc3QgcnVuIGluIHRoZSBlZGl0b3IgaXMgZGlzcGxheWVkLgoK