Adoption Subsidies

#pdf1
f <- "https://www.acf.hhs.gov/sites/default/files/cb/adoption_subsidy2012_2016.pdf"
tab = extract_tables(f)
temp = do.call(rbind, tab) # This is a matrix
# Extract the data portion which starts from row 5
final <- as.data.frame(temp[5:nrow(temp), ]) # Note: The first column "V1" is a bundle
# Separate the first column into 3 columns using the space (\\s) separator
L = strsplit(final$V1, "\\s")
State=sapply(L, function(v){toString(v[1:(length(v)-2)])})
x=sapply(L, function(v){tail(v,2)[1]}) 
y=sapply(L, function(v){tail(v,2)[2]})
tableone = cbind(State, x, y, final[2:18], final[20])
# Renaming columns
names(tableone)[-1] = paste0(rep("FY",20), rep(2012:2016, c(4,4,4,4,4)), rep("AdoptionSubsidy", 20), rep(c("Yes%", "No%", "Total", "Missing"), 5))
tableone[,4] = gsub(",", "", tableone[,4])
tableone[,8] = gsub(",", "", tableone[,8])
tableone[,12] = gsub(",", "", tableone[,12])
tableone[,16] = gsub(",", "", tableone[,16])
tableone[,20] = gsub(",", "", tableone[,20])
tableone[,-1] = apply(tableone[,-1], 2, function(x) as.numeric(x))
tableone <- tableone[-53,]

With subsidies

mTone <- tableone %>% 
  select(contains("Yes")) %>% 
  gather(var,value) 

p <- ggplot(mTone,aes(x = var,y = as.numeric(value),))+
  geom_boxplot(fill = "yellow")+
  labs(title = "Proportion of adoptions with subsidies",y = "Percent %", x = NULL)+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 10, vjust = 1, hjust = .7))
p

As we can see from the graph to our right. From 2012 to 2016 the proportion of guardians how took a subsidy for adopting a child seem to stay steady. And if we turn our attention to the mean we can see a very slight increase in the proportion of those who did take a subsidy when adopting.

With out subsites

mTone <- tableone %>% 
  select(contains("No")) %>% 
  gather(var,value) 

p <- ggplot(mTone,aes(x = var,y = as.numeric(value),))+
  geom_boxplot(fill = "red")+
  labs(title = "Proportion of adoptions with out subsites",y = "Percent %", x = NULL)+
  theme(axis.text.x = element_text(angle = 10, vjust = 1, hjust = .6))
p

From this graph, we can see the opposite of what was happening in the graph with subsidies. Where people are opting to take subsidies over no subsidies. This is simply because they are perfectly correlated. That is, if we add up and average each year in the graph with subsidies and without we will get 1 or 100%.

Total Number of children

mTone <- tableone %>% 
  select(contains("Total")) %>% 
  gather(var,value) 

p <- ggplot(mTone,aes(x = var,y = as.numeric(value),))+
  geom_boxplot(fill = "green")+
  labs(title = "Total Number of Adopted Children Nation Wide",y = "Count", x = NULL)+
  theme(axis.text.x = element_text(angle = 10, vjust = 1, hjust = .6))
p

From this graph we can see that there are a few states that have Way more adopted Children compared to others. However, we must note that those states have a greater population than the other states (California, Florida, Texas, Ect.) and therefore they ought to have proportionally more children adopted.

Missing Data

mTone <- tableone %>% 
  select(contains("Missing")) %>% 
  gather(var,value) 

p <- ggplot(mTone,aes(x = var,y = as.numeric(value),))+
  geom_boxplot(fill = "blue")+
  labs(title = "Total Missing Data",y = "Count", x = NULL)+
  theme(axis.text.x = element_text(angle = 10, vjust = 1, hjust = .6))
p

From this graph note that it is normal for states to have 0 missing data. Also note that those states that had a spike in missing data don’t seem to repeat it. For instance, the outlier in FY2014 was Arizona and the preceding years had significantly less missing data.

Family Structure

#pdf2
f2 <- "https://www.acf.hhs.gov/sites/default/files/cb/family_structure2016.pdf"
tab2 = extract_tables(f2)
temp2 = do.call(rbind, tab2) # This is a matrix
# Extract the data portion which starts from row 7
table2 <- as.data.frame(temp2[7:nrow(temp2), ])
# Renaming columns
names(table2)[-1] = paste0( rep(c("Married", "UnMarried", "SingleFemale", "SingleMale","Total","Missing"), 1))
table2 <- rename(table2, "States" = V1)
table2[,6] = gsub(",", "", table2[,6])
table2 <- table2[1:51,]
#setting values as numeric
table2[,-1] = apply(table2[,-1], 2, function(x) as.numeric(x))

Distribution of Family Structure

mTone <- table2 %>% 
  select(c("Married", "UnMarried", "SingleFemale", "SingleMale")) %>% 
  gather(var,value)

p <- ggplot(mTone,aes(x = var,y = value))+
  geom_boxplot(fill = "blue")+
  labs(title = "Distribution of the Adoptive Family Structre",x = NULL, y = "Percent %")+
  theme(axis.text.x = element_text(angle = 10, vjust = 1, hjust = .6))
p

From the graph we can see that it is way more likely for a Married couple to Adopt a child vs. the other family structures. The Next likely event is a single Mother. Then it seems that the unmarried couple has a higher proportion compares to single men.

Finalization Age

#pdf3
f3 <- "https://www.acf.hhs.gov/sites/default/files/cb/final_age2016.pdf"
tab3 = extract_tables(f3)
temp3 = do.call(rbind, tab3) # This is a matrix
# Extract the data portion which starts from row 5
temp3 <- strsplit(temp3,"\r")
temp3 <- temp3[2*(1:30)]
temp3 <- temp3[- c(2,29,30)]
temp3[[24]] = gsub(",", "", temp3[[24]])
table3 <- as.data.frame(temp3)

# Renaming columns
names(table3) = paste0( rep(c("States","< 1", "1", "2", "3","4","5","6","7","8","0-8","9","10","11","12","13","14","15","16","17","18","19","20","TotalNChildren","TotalMiss","TMeanAge","TMedianAge")))

table3[,-1] = apply(table3[,-1], 2, function(x) as.numeric(x))

Distribution of Finalization Age

mTone <- table3 %>% 
  select(c("< 1", "1", "2", "3","4","5","6","7","8","0-8","9","10","11","12","13","14","15","16","17","18","19","20")) %>% 
  gather(var,value)

p <- ggplot(mTone,aes(x = reorder(var,value),y = value))+
  geom_boxplot(fill = "green")+
  labs(title = "Distribution of Finalization Age given Adopted",x = "Years", y = "Percent %")
p

As we can see from the graph, on average the lower the age is the higher proportion of children had their adoption Finalized. And the box for 0-8 is for the proportion of children with adoption finalization ages 0-8. And as we can see from this chart. It is not likely that those greater than 8 will get their adoption finalized.

Parent(s) Relationship

#PDF4
f4 <- "https://www.acf.hhs.gov/sites/default/files/cb/prior_relation2016.pdf"
tab4 = extract_tables(f4)
temp4 = do.call(rbind, tab4) # This is a matrix
temp4 <- strsplit(temp4,"\r")
temp4 <- temp4[2*(1:10)]
temp4 <- temp4[- c(2,9,10)]
temp4[[6]] = gsub(",", "", temp4[[6]])
# Extract the data portion which starts from row 5
table4 <- as.data.frame(temp4) 
# Renaming columns
names(table4) = paste0( rep(c("States", "Non-Relative", "FosterParent", "StepParent","OtherRelative","Total","Missing/NA")))
table4[,-1] = apply(table4[,-1], 2, function(x) as.numeric(x))
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
mTone <- table4 %>% 
  select(c("Non-Relative", "FosterParent", "StepParent","OtherRelative")) %>% 
  gather(var,value)

p <- ggplot(mTone,aes(x = var,y = value))+
  geom_boxplot(fill = "blue")+
  labs(title = "Distribution of the Adoptive Parent(s) Relationship to child",x = NULL, y = "Percent %")
p

Note from the graph, Foster Parenting(which is like adoptive parents but without the full legal custody and rights) is on average more popular than the other types of parent relationship.The next noticeable box is the other relative box. This makes sense to be relatively large since the Foster care system tries to Keep children close to family. Next we have non-relitive, which we just discussed why it is lower on average than foster parent(s) and other relatives. Lastly is Step-Parent which is very uncommon in all the states.

Child Race

#PDF5
f5 <- "https://www.acf.hhs.gov/sites/default/files/cb/race2016.pdf"
tab5 = extract_tables(f5, pages = c(1,2))
#Page1
final5P1 <- as.data.frame(tab5[[1]][8:52, ])
final5P1 <- final5P1[,-c(3,4,8,10,15)]
#Page 2
temp5 <- strsplit(tab5[[2]],"\r")
temp5 <- temp5[-c(5,6)]
temp5 <- temp5[2*(2:12)]
temp5[[10]] = gsub(",", "", temp5[[10]])
final5P2 <- as.data.frame(temp5) 
# Renaming columns
names(final5P1) = paste0( rep(c("States","AmericanIndian/Alaskan", "Asain", "African", "Hawaiian","Hispanic","White","Undetermined",">=2","Total","MissData")))
names(final5P2) = paste0( rep(c("States","AmericanIndian/Alaskan", "Asain", "African", "Hawaiian","Hispanic","White","Undetermined",">=2","Total","MissData")))
#binding the two pages
table5 <- rbind.data.frame(final5P1,final5P2)
table5[,-1] = apply(table5[,-1], 2, function(x) as.numeric(x))
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
mTone <- table5 %>% 
  select(c("AmericanIndian/Alaskan", "Asain", "African", "Hawaiian","Hispanic","White","Undetermined",">=2")) %>% 
  gather(var,value)

p <- ggplot(mTone,aes(x = reorder(var,value),y = value, fill = var))+
  geom_boxplot(alpha=0.3) +
    theme(legend.position="none")+
  labs(title = "Race/Ethnicity Distribution of Children Adopted",x = NULL, y = "Percent %")+
  theme(axis.text.x = element_text(angle = 10, vjust = 1, hjust = .6))
p

As we can see from the graph of the unweighted race/ethnicity distribution of children, more white children get adopted over other races and ethnicities. in the next few come in close with each other and they are greater than two races/ethnicities, hispanic and african americans. The last four groups account for a relatively small percentages of the overall children that were adopted in terms of race/ethnicity.

Childs Gender

#PDF6
f6 <- "https://www.acf.hhs.gov/sites/default/files/cb/gender2012_2016.pdf"
tab6 = extract_tables(f6)
temp6 = do.call(rbind, tab6) # This is a matrix
# Extract the data portion which starts from row 5
temp6 <- strsplit(temp6,"\r")
temp6 <- temp6[2*(1:22)]
temp6 <- temp6[-2]
temp6[[4]] = gsub(",", "", temp6[[4]])
temp6[[8]] = gsub(",", "", temp6[[8]])
temp6[[12]] = gsub(",", "", temp6[[12]])
temp6[[16]] = gsub(",", "", temp6[[16]])
temp6[[20]] = gsub(",", "", temp6[[20]])
table6 <- as.data.frame(temp6)
# Renaming columns
names(table6)[-1] = paste0(rep("FY",20), rep(2012:2016, c(4,4,4,4,4)), rep("ChildSex", 20), rep(c("Male", "Female", "Total", "MissingData"), 5))
names(table6)[1] = "State"
table6[,-1] = apply(table6[,-1], 2, function(x) as.numeric(x))

Male

mTone <- table6 %>% 
  select(c("FY2012ChildSexMale","FY2013ChildSexMale","FY2014ChildSexMale",
           "FY2015ChildSexMale","FY2016ChildSexMale")) %>% 
  gather(var,value) 

p <- ggplot(mTone,aes(x = var,y = value,fill = var))+
  geom_boxplot(alpha = .3)+
  labs(title = "Gender Distribution of Children Adopted",y = "Percent %", x = NULL)+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 10, vjust = 1, hjust = .7),
        legend.position="none")
p

Note from the graph that the proportion of those adopted that were male is around 50%. This makes sense because there shold be an equal number of males getting adopted compared to females.

Female

mTone <- table6 %>% 
  select(contains("Female")) %>% 
  gather(var,value) 

p <- ggplot(mTone,aes(x = var,y = value,fill = var))+
  geom_boxplot(alpha = .3)+
  labs(title = "Female Distribution of Children Adopted",y = "Percent %", x = NULL)+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 10, vjust = 1, hjust = .7),
        legend.position="none")
p

As noted in the Male distribution, we should see on averge the boxes hover around 50%. Which is the case here. From the graph, the proportion of females adopted is around 50%.

Special Needs

f7 <- "https://www.acf.hhs.gov/sites/default/files/cb/special_needs2012_2016.pdf"
tab7 = extract_tables(f7)
temp7 = do.call(rbind, tab7) # This is a matrix
# Extract the data portion which starts from row 5
temp7 <- strsplit(temp7,"\r")
temp7 <- temp7[2*(1:22)]
temp7 <- temp7[-2]
temp7[[4]] = gsub(",", "", temp7[[4]])
temp7[[8]] = gsub(",", "", temp7[[8]])
temp7[[12]] = gsub(",", "", temp7[[12]])
temp7[[16]] = gsub(",", "", temp7[[16]])
temp7[[20]] = gsub(",", "", temp7[[20]])
table7 <- as.data.frame(temp7)
# Renaming columns
names(table7)[-1] = paste0(rep("FY",20), rep(2012:2016, c(4,4,4,4,4)), rep("SpecialNeeds", 20), rep(c("Yes%", "No%", "Total", "MissingData"), 5))
names(table7)[1] = "State"
table7[,-1] = apply(table7[,-1], 2, function(x) as.numeric(x))

With Special Needs

mTone <- table7 %>% select(contains("Yes")) %>% gather(var,value) 

p <- ggplot(mTone,aes(x = var,y = value,fill = var))+
  geom_boxplot(alpha = .3)+
  labs(title = "% of Children Adopted with Special Needs",y = "Percent %", x = NULL)+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 10, vjust = 1, hjust = .7),
        legend.position="none")
p

Note how high the proportion of special needs children that were adopted. In some States we get 100% of adoptions were made with a child that is identified as being special needs (like Alaska).

Without Special Needs

mTone <- table7 %>% select(contains("No")) %>% gather(var,value) 

p <- ggplot(mTone,aes(x = var,y = value,fill = var))+
  geom_boxplot(alpha = .3)+
  labs(title = "% of Children Adopted without Special Needs",y = "Percent %", x = NULL)+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 10, vjust = 1, hjust = .7),
        legend.position="none")
p

Note this graph is simply the flipped graph of Children with Special needs. Also note that it seems like the mean is trending downwards on the graph. That is, It seems that Children without special needs are being adopted less on average.

Termination of Parental Rights (TPR)

f8 <- "https://www.acf.hhs.gov/sites/default/files/cb/tpr2016.pdf"
tab8 = extract_tables(f8, pages = c(1,2))
final8P1 <- as.data.frame(tab8[[1]][8:51, ])
final8P2 <- as.data.frame(tab8[[2]][8:15, ])
final8P1 <- final8P1[-13]
final8P2 <- final8P2[- c(2,14)]
# Renaming columns
names(final8P1) = paste0( rep(c("States","< 1Month", "1-5Mos", "6-11Mos", "12-17Mos","18-23Mos","24-29Mos","30-35Mos","3-4Years","5+Years","Total","MissData","Mean#Mos","Median#mos")))
names(final8P2) = paste0( rep(c("States","< 1Month", "1-5Mos", "6-11Mos", "12-17Mos","18-23Mos","24-29Mos","30-35Mos","3-4Years","5+Years","Total","MissData","Mean#Mos","Median#mos")))
#binding the two pages/dataframes
table8 <- rbind.data.frame(final8P1,final8P2)
table8[,11] <- gsub(",","",table8[,11])
table8[,-1] = apply(table8[,-1], 2, function(x) as.numeric(x))
mTone <- table8 %>% 
  select(c("< 1Month", "1-5Mos", "6-11Mos", "12-17Mos","18-23Mos","24-29Mos","30-35Mos","3-4Years","5+Years")) %>% 
  gather(var,value)

p <- ggplot(mTone,aes(x = var,y = value, fill = var))+
  geom_boxplot(alpha=0.3) +
    theme(legend.position="none")+
  labs(title = "Time between TPR and Adoption Finalization of children 2016",x = NULL, y = "Percent %")+
  theme(axis.text.x = element_text(angle = 10, vjust = 1, hjust = .6))
p

Note from the graph that in the first month the proportion of TPR and finalization is quite small. Though it has a huge jump between 1-5 Months and then slowly goes down as the time between TPR and Finalization get greater. That is, for the first month the guardian(s) don’t seem to Terminate their parental rights on average. However After that first month there is a huge spike in TPR. though this trend slowly goes down. Meaning, the longer the child stays with a guardian(s) the lower the likelihood of the guardian(s) terminating their parental rights.