This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
Warning messages:
1: In normalizePath(quartoSrcFile, winslash = "/") :
path[1]="": No such file or directory
2: In normalizePath(quartoSrcFile, winslash = "/") :
path[1]="": No such file or directory
3: In normalizePath(quartoSrcFile, winslash = "/") :
path[1]="": No such file or directory
4: In normalizePath(quartoSrcFile, winslash = "/") :
path[1]="": No such file or directory
5: In normalizePath(quartoSrcFile, winslash = "/") :
path[1]="": No such file or directory
6: In normalizePath(quartoSrcFile, winslash = "/") :
path[1]="": No such file or directory
# Load required libraries
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(scales)
library(caret)
Loading required package: lattice
Registered S3 method overwritten by 'data.table':
method from
print.data.table
# Load the dataset
df <- read.csv("~/Downloads/archive 2/children-in-foster-care-annually-beginning-1994.csv")
# Data preprocessing
df$Year <- as.integer(df$Year)
df$Total_Days_In_Care <- as.integer(df$Total.Days.In.Care)
df$Admissions <- as.integer(df$Admissions)
df$Discharges <- as.integer(df$Discharges)
df$Children_In_Care <- as.integer(df$Children.In.Care)
df$Indicated_CPS_Reports <- as.integer(df$Indicated.CPS.Reports)
# Exploratory Data Analysis (EDA)
# Line plot for trends in foster care placements and CPS reports
trends_data <- df %>%
group_by(Year) %>%
summarize(Total_Admissions = sum(Admissions),
Total_Discharges = sum(Discharges),
Total_Children_In_Care = sum(Children_In_Care),
Total_Indicated_CPS_Reports = sum(Indicated_CPS_Reports))
ggplot(trends_data, aes(x = Year)) +
geom_line(aes(y = Total_Admissions, color = "Admissions"), linewidth = 1) +
geom_line(aes(y = Total_Discharges, color = "Discharges"), linewidth = 1) +
geom_line(aes(y = Total_Children_In_Care, color = "Children In Care"), linewidth = 1) +
geom_line(aes(y = Total_Indicated_CPS_Reports, color = "Indicated CPS Reports"), linewidth = 1) +
scale_color_manual(values = c("Admissions" = "blue", "Discharges" = "green",
"Children In Care" = "red", "Indicated CPS Reports" = "purple")) +
labs(title = "Trends in Foster Care Placements and CPS Reports",
x = "Year", y = "Count", color = "Variable") +
theme_minimal() +
theme(legend.position = "bottom") +
coord_cartesian(ylim = c(0, max(trends_data$Total_Children_In_Care) * 1.1))
# Bar plot for distribution of foster care placement types
placement_types <- df %>%
select(Year, Adoptive.Home, Agency.Operated.Boarding.Home, Approved.Relative.Home,
Foster.Boarding.Home, Group.Home, Group.Residence, Institution,
Supervised.Independent.Living, Other) %>%
pivot_longer(cols = -Year, names_to = "Placement_Type", values_to = "Total_Days") %>%
group_by(Placement_Type) %>%
summarize(Total_Days = sum(Total_Days))
ggplot(placement_types, aes(x = Placement_Type, y = Total_Days, fill = Placement_Type)) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette = "Set3") +
labs(title = "Distribution of Foster Care Placement Types",
x = "Placement Type", y = "Total Days", fill = "Placement Type") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") +
coord_flip()
# Histogram for geographical disparities in average number of children in care
geo_data <- df %>%
group_by(County) %>%
summarize(Avg_Children_In_Care = mean(Children_In_Care))
ggplot(geo_data, aes(x = Avg_Children_In_Care, fill = ..count..)) +
geom_histogram(binwidth = 20, color = "white", boundary = 0) +
scale_fill_gradient(low = "lightblue", high = "darkblue", name = "Frequency") +
labs(title = "Distribution of Average Number of Children in Care by County",
x = "Average Number of Children in Care",
y = "Number of Counties") +
theme_minimal() +
theme(plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 12),
legend.title = element_text(size = 10),
legend.position = "right") +
coord_cartesian(xlim = c(0, quantile(geo_data$Avg_Children_In_Care, 0.90)))
# Machine Learning Model
# Prepare the data for modeling
model_data <- df %>%
select(Children_In_Care, Admissions, Discharges, Indicated_CPS_Reports)
# Split the data into training and testing sets
set.seed(123)
train_index <- createDataPartition(model_data$Children_In_Care, p = 0.8, list = FALSE)
train_data <- model_data[train_index, ]
test_data <- model_data[-train_index, ]
# Train the linear regression model
lm_model <- lm(Children_In_Care ~ Admissions + Discharges + Indicated_CPS_Reports, data = train_data)
# Evaluate the model
predictions <- predict(lm_model, newdata = test_data)
r_squared <- summary(lm_model)$r.squared
mse <- mean((test_data$Children_In_Care - predictions)^2)
cat("R-squared:", r_squared, "\n")
R-squared: 0.9867581
cat("Mean Squared Error (MSE):", mse, "\n")
Mean Squared Error (MSE): NA
# Print the model coefficients
print(summary(lm_model))
Call:
lm(formula = Children_In_Care ~ Admissions + Discharges + Indicated_CPS_Reports,
data = train_data)
Residuals:
Min 1Q Median 3Q Max
-3614.3 -39.2 22.6 60.4 6728.4
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -36.134071 10.684469 -3.382 0.000744 ***
Admissions 1.540466 0.079870 19.287 < 2e-16 ***
Discharges 2.227439 0.061386 36.286 < 2e-16 ***
Indicated_CPS_Reports -0.443983 0.009712 -45.717 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 347.7 on 1166 degrees of freedom
(11 observations deleted due to missingness)
Multiple R-squared: 0.9868, Adjusted R-squared: 0.9867
F-statistic: 2.896e+04 on 3 and 1166 DF, p-value: < 2.2e-16
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.