Data607_Project2 - Marriage/Divorce Rates

Read in data

mdDf <- read.csv("https://raw.githubusercontent.com/xiaoxiaogao-DD/DATA607-Project1/master/national_marriage_divorce_rates_00-16.csv", stringsAsFactors = FALSE, skip = 2, header = TRUE, colClasses = "character")

Remove blank columns

# Confirm all values are blank for last columns
stopifnot(nrow(mdDf %>% filter_if(str_detect(colnames(.), "X"), all_vars(. != ""))) == 0)

# Remove these rows
mdDf <- mdDf[, !str_detect(colnames(mdDf), "X")]

Split apart separate dataframes

marrDf <- mdDf[1:27,]
divDf <- mdDf[29:nrow(mdDf),]

Adust column names

colnames(divDf) <- divDf[1, ]

Extract footnote data into dataframe

# Save out total row numbers to confirm logic to split off footnotes was reasonable
marrRows <- nrow(marrDf)
divRows <- nrow(divDf)

marrFtDf <- marrDf %>% filter(!str_detect(Year, "^20."))
divFtDf <- divDf %>% filter(!str_detect(Year, "^20."))

marrDf <- marrDf %>% filter(str_detect(Year, "^20."))
divDf <- divDf %>% filter(str_detect(Year, "^20."))

stopifnot(marrRows == nrow(marrFtDf) + nrow(marrDf))
stopifnot(divRows == nrow(divFtDf) + nrow(divDf))

Parse footnote dataframes

marrFtDf <- marrFtDf %>%
  rename(raw = Year) %>%
  select(raw) %>%
  filter(str_trim(raw) != "") %>%
  mutate(footId = as.numeric(str_extract(raw, "^[:digit:]+")),
         footnote = str_replace(raw, "^[:digit:]+/", "")) %>%
  filter(!is.na(footId)) %>%
  select(footId, footnote)

divFtDf <- divFtDf %>%
  rename(raw = Year) %>%
  select(raw) %>%
  filter(str_trim(raw) != "") %>%
  mutate(footId = as.numeric(str_extract(raw, "^[:digit:]+")),
         footnote = str_replace(raw, "^[:digit:]+ ", "")) %>%
  filter(!is.na(footId)) %>%
  select(footId, footnote)

Parse numerics in main dataframes

# Extract footnote information from Year column
marrDf$footId <- str_extract(marrDf$Year, "/.")
marrDf$Year <- str_replace(marrDf$Year, "/.", "")
marrDf$footId <- str_replace(marrDf$footId, "/", "")

divDf$footId <- str_extract(divDf$Year, "/.")
divDf$Year <- str_replace(divDf$Year, "/.", "")
divDf$footId <- str_replace(divDf$footId, "/", "")

# remove commas
marrDf <- marrDf %>% mutate_all(funs(str_replace_all(., ",", "")))
divDf <- divDf %>% mutate_all(funs(str_replace_all(., ",", "")))

# parse all columns as numerics
marrDf <- marrDf %>% mutate_all(as.numeric)
divDf <- divDf %>% mutate_all(as.numeric)

Merge in footnotes

marrDf <- left_join(marrDf, marrFtDf, by="footId")
divDf <- left_join(divDf, divFtDf, by="footId")
rm(mdDf, marrFtDf, divFtDf)

Merge marraige and divorce dataframes

marrDf <- marrDf %>%
  rename(marriages = Marriages,
         marrPopulation = Population,
         marrRatePer1000 = Rate.per.1.000.total.population,
         marrFootnote = footnote) %>%
  select(-footId)

divDf <- divDf %>%
  rename(divPopulation = Population,
         divRatePer1000 = `Rate per 1,000 total population`,
         divFootnote = footnote,
         divAnnul = `Divorces & annulments`) %>%
  select(-footId)

finalDf <- full_join(marrDf, divDf, by = "Year") %>%
  select(Year, marriages, divAnnul, marrPopulation, divPopulation, marrRatePer1000, divRatePer1000,
         marrFootnote, divFootnote)

rm(divDf, marrDf)

Suggested Analyses

Is the decrease in the divorce rate due to the decrease in the marriage rate?

As noted, there are pretty large differences in population between the two data sets… So it’s not easy to compare marriage and divorce rates per year, but we can look at the data broadly.

Since couples may be unlikely to divorce just in the first year of marriage, maybe it would be useful to look at overlaid line graphs to see if there’s any trends that seem to occur on a delay of several years

ggplot(finalDf, aes(Year)) + 
  geom_line(aes(y = marrRatePer1000, colour = "marrRatePer1000")) + 
  geom_line(aes(y = divRatePer1000, colour = "divRatePer1000"))  +
  labs(title = "Marriage/Divorce Rates", x = "Year", y = "Rate Per 1000") +
  theme(plot.title = element_text(hjust = 0.5))

From the above, it’s not easy to discern subtle trends, but overall, it does look like a decrease in marriages is at least correlated with a decrease in divorce rates.

Data607_Project2 - Marriage/Divorce Rates

Ryan Weber

March 11, 2018

Read in data

Remove blank columns

Split apart separate dataframes

Adust column names

Extract footnote data into dataframe

Parse footnote dataframes

Parse numerics in main dataframes

Merge in footnotes

Merge marraige and divorce dataframes

Suggested Analyses