Import Marriage Rate Data

marriage_rate_data <- "https://raw.githubusercontent.com/fivethirtyeight/data/master/marriage/both_sexes.csv"
mr <- read.csv(marriage_rate_data, stringsAsFactors = F)
head(mr)
##   X year       date  all_2534   HS_2534   SC_2534  BAp_2534  BAo_2534
## 1 1 1960 1960-01-01 0.1233145 0.1095332 0.1522818 0.2389952 0.2389952
## 2 2 1970 1970-01-01 0.1269715 0.1094000 0.1495096 0.2187031 0.2187031
## 3 3 1980 1980-01-01 0.1991767 0.1617313 0.2236916 0.2881646 0.2881646
## 4 4 1990 1990-01-01 0.2968306 0.2777491 0.2780912 0.3612968 0.3656655
## 5 5 2000 2000-01-01 0.3450087 0.3316545 0.3249205 0.3874906 0.3939579
## 6 6 2001 2001-01-01 0.3527767 0.3446069 0.3341101 0.3835686 0.3925148
##     GD_2534 White_2534 Black_2534 Hisp_2534   NE_2534   MA_2534
## 1        NA  0.1164848  0.1621855 0.1393736 0.1504184 0.1628934
## 2        NA  0.1179043  0.1855163 0.1298769 0.1517231 0.1640680
## 3        NA  0.1824126  0.3137500 0.1885440 0.2414327 0.2505925
## 4 0.3474505  0.2639256  0.4838556 0.2962372 0.3500384 0.3623321
## 5 0.3691740  0.3127149  0.5144994 0.3180681 0.4091852 0.4175565
## 6 0.3590304  0.3183506  0.5437985 0.3321214 0.4200581 0.4294281
##   Midwest_2534 South_2534 Mountain_2534 Pacific_2534 poor_2534   mid_2534
## 1    0.1121467  0.1090562    0.09152117    0.1198758 0.1371597 0.07514929
## 2    0.1153741  0.1126220    0.10293602    0.1374964 0.1717202 0.08159207
## 3    0.1828339  0.1688435    0.17434230    0.2334279 0.3100591 0.14825303
## 4    0.2755046  0.2639794    0.25264326    0.3319579 0.4199108 0.24320008
## 5    0.3308022  0.3099712    0.30621032    0.3753061 0.5033676 0.30202036
## 6    0.3344332  0.3182688    0.30980779    0.3844799 0.5178771 0.31716118
##   rich_2534   all_3544    HS_3544    SC_3544  BAp_3544  BAo_3544   GD_3544
## 1 0.2066776 0.07058157 0.06860309 0.06663695 0.1326265 0.1326265        NA
## 2 0.1724093 0.06732520 0.06511964 0.06271724 0.1116899 0.1116899        NA
## 3 0.1851082 0.06883378 0.06429102 0.06531333 0.1056102 0.1056102        NA
## 4 0.2783226 0.11191800 0.11210043 0.09699372 0.1285172 0.1258567 0.1328018
## 5 0.2717386 0.15605881 0.16993703 0.13800404 0.1541238 0.1536299 0.1550970
## 6 0.2532041 0.15642529 0.16870156 0.13986044 0.1548151 0.1524923 0.1595169
##   White_3544 Black_3544  Hisp_3544    NE_3544    MA_3544 Midwest_3544
## 1 0.06825586 0.08836728 0.07307651 0.09194322 0.09347468   0.06863360
## 2 0.06250372 0.10290904 0.07070500 0.08570110 0.09040725   0.06156272
## 3 0.05966739 0.13140081 0.08110790 0.07997323 0.09744428   0.06070641
## 4 0.09611312 0.22010298 0.12194206 0.12785915 0.14354989   0.10157576
## 5 0.13207032 0.30239381 0.15469520 0.17327422 0.18819256   0.14539201
## 6 0.13287455 0.30857796 0.14953050 0.16653497 0.18315109   0.14794407
##   South_3544 Mountain_3544 Pacific_3544 poor_3544   mid_3544  rich_3544
## 1 0.06026353    0.04739747   0.05822486 0.1019749 0.04717272 0.08553870
## 2 0.05966057    0.04651163   0.06347796 0.1117548 0.04566838 0.06499159
## 3 0.05914089    0.04880077   0.07552538 0.1291426 0.05050321 0.04445951
## 4 0.09637035    0.09189904   0.13134638 0.2012208 0.09024739 0.06573916
## 5 0.14230600    0.13584194   0.17480047 0.2813137 0.12815751 0.08622046
## 6 0.14312592    0.13943820   0.17694864 0.2919112 0.13267625 0.06803283
##     all_4554    HS_4554    SC_4554   BAp_4554   BAo_4554    GD_4554
## 1 0.07254649 0.06840792 0.07903755 0.15360889 0.15360889         NA
## 2 0.05968794 0.05833439 0.05443478 0.10466047 0.10466047         NA
## 3 0.05250871 0.05036563 0.04816180 0.08623774 0.08623774         NA
## 4 0.05947824 0.05988244 0.04654087 0.07301884 0.06416529 0.08394886
## 5 0.08804394 0.09442809 0.07558786 0.09208417 0.09097472 0.09362802
## 6 0.08823342 0.09189007 0.07795481 0.09333365 0.09313480 0.09362876
##   White_4554 Black_4554  Hisp_4554    NE_4554    MA_4554 Midwest_4554
## 1 0.07246692 0.06913249 0.06636058 0.10236412 0.09264788   0.07285321
## 2 0.05754799 0.07899168 0.05810740 0.08028082 0.07860635   0.05791163
## 3 0.04765354 0.08624602 0.06522951 0.06930253 0.07508466   0.04807290
## 4 0.05092552 0.11617699 0.07613556 0.07047502 0.08373134   0.05398391
## 5 0.07578174 0.17587334 0.09418009 0.10232170 0.11269659   0.08302437
## 6 0.07516912 0.18154531 0.09409896 0.09868408 0.10953635   0.08207629
##   South_4554 Mountain_4554 Pacific_4554 poor_4554   mid_4554  rich_4554
## 1 0.05977295    0.04754183   0.05996993 0.1030055 0.05364421 0.07908591
## 2 0.05174462    0.03970134   0.04826312 0.1016489 0.04221637 0.05142867
## 3 0.04485348    0.03374438   0.04958992 0.1003011 0.03830266 0.03311296
## 4 0.05043636    0.04459411   0.06461875 0.1148335 0.04562332 0.03136386
## 5 0.07631858    0.07637774   0.09896832 0.1718976 0.07055672 0.03897342
## 6 0.07886513    0.07405971   0.10119511 0.1759369 0.07407508 0.02857320
##   nokids_all_2534 kids_all_2534 nokids_HS_2534 nokids_SC_2534
## 1       0.4640564   0.002820625      0.4430148      0.5000402
## 2       0.4309043   0.009868596      0.4246779      0.4333479
## 3       0.4464304   0.025285667      0.4319342      0.4505900
## 4       0.5425242   0.060277451      0.5464881      0.5238446
## 5       0.5714531   0.099472713      0.5711395      0.5700042
## 6       0.5852213   0.110178467      0.6045475      0.5810912
##   nokids_BAp_2534 nokids_BAo_2534 nokids_GD_2534 kids_HS_2534 kids_SC_2534
## 1       0.5619099       0.5619099             NA  0.003318886  0.001150824
## 2       0.4554766       0.4554766             NA  0.012465915  0.003699982
## 3       0.4719700       0.4719700             NA  0.031930752  0.018135401
## 4       0.5560765       0.5633301      0.5332628  0.078470444  0.052032702
## 5       0.5729677       0.5862213      0.5367160  0.127193577  0.097625310
## 6       0.5698644       0.5864967      0.5258800  0.141395652  0.110030662
##   kids_BAp_2534 kids_BAo_2534 kids_GD_2534 nokids_poor_2534
## 1  0.0005751073  0.0005751073           NA        0.4933061
## 2  0.0014683425  0.0014683425           NA        0.5097742
## 3  0.0062544364  0.0062544364           NA        0.5740402
## 4  0.0171241042  0.0181766027   0.01374234        0.6546908
## 5  0.0370024452  0.0401009875   0.02761467        0.7055451
## 6  0.0399801447  0.0445838012   0.02645041        0.7147334
##   nokids_mid_2534 nokids_rich_2534 kids_poor_2534 kids_mid_2534
## 1       0.4100080        0.4921184    0.008722711  0.0007532065
## 2       0.3764538        0.4288948    0.029974945  0.0033771145
## 3       0.3998250        0.3848089    0.077926214  0.0102368871
## 4       0.5186604        0.4750156    0.170763774  0.0274655254
## 5       0.5690228        0.4458023    0.256281918  0.0597845173
## 6       0.5864741        0.4461111    0.280146488  0.0677954572
##   kids_rich_2534
## 1   0.0008027331
## 2   0.0030435661
## 3   0.0068317224
## 4   0.0182329127
## 5   0.0295644698
## 6   0.0336540502

Tidy Marriage Rate Data

Display column names

#Display column names
colnames(mr)
##  [1] "X"                "year"             "date"            
##  [4] "all_2534"         "HS_2534"          "SC_2534"         
##  [7] "BAp_2534"         "BAo_2534"         "GD_2534"         
## [10] "White_2534"       "Black_2534"       "Hisp_2534"       
## [13] "NE_2534"          "MA_2534"          "Midwest_2534"    
## [16] "South_2534"       "Mountain_2534"    "Pacific_2534"    
## [19] "poor_2534"        "mid_2534"         "rich_2534"       
## [22] "all_3544"         "HS_3544"          "SC_3544"         
## [25] "BAp_3544"         "BAo_3544"         "GD_3544"         
## [28] "White_3544"       "Black_3544"       "Hisp_3544"       
## [31] "NE_3544"          "MA_3544"          "Midwest_3544"    
## [34] "South_3544"       "Mountain_3544"    "Pacific_3544"    
## [37] "poor_3544"        "mid_3544"         "rich_3544"       
## [40] "all_4554"         "HS_4554"          "SC_4554"         
## [43] "BAp_4554"         "BAo_4554"         "GD_4554"         
## [46] "White_4554"       "Black_4554"       "Hisp_4554"       
## [49] "NE_4554"          "MA_4554"          "Midwest_4554"    
## [52] "South_4554"       "Mountain_4554"    "Pacific_4554"    
## [55] "poor_4554"        "mid_4554"         "rich_4554"       
## [58] "nokids_all_2534"  "kids_all_2534"    "nokids_HS_2534"  
## [61] "nokids_SC_2534"   "nokids_BAp_2534"  "nokids_BAo_2534" 
## [64] "nokids_GD_2534"   "kids_HS_2534"     "kids_SC_2534"    
## [67] "kids_BAp_2534"    "kids_BAo_2534"    "kids_GD_2534"    
## [70] "nokids_poor_2534" "nokids_mid_2534"  "nokids_rich_2534"
## [73] "kids_poor_2534"   "kids_mid_2534"    "kids_rich_2534"

**Replace the ’_‘with a’.’ to make it easier to separate later.**

colnames(mr) <- gsub("_(\\d+)$", ".\\1", colnames(mr))
names(mr)
##  [1] "X"                "year"             "date"            
##  [4] "all.2534"         "HS.2534"          "SC.2534"         
##  [7] "BAp.2534"         "BAo.2534"         "GD.2534"         
## [10] "White.2534"       "Black.2534"       "Hisp.2534"       
## [13] "NE.2534"          "MA.2534"          "Midwest.2534"    
## [16] "South.2534"       "Mountain.2534"    "Pacific.2534"    
## [19] "poor.2534"        "mid.2534"         "rich.2534"       
## [22] "all.3544"         "HS.3544"          "SC.3544"         
## [25] "BAp.3544"         "BAo.3544"         "GD.3544"         
## [28] "White.3544"       "Black.3544"       "Hisp.3544"       
## [31] "NE.3544"          "MA.3544"          "Midwest.3544"    
## [34] "South.3544"       "Mountain.3544"    "Pacific.3544"    
## [37] "poor.3544"        "mid.3544"         "rich.3544"       
## [40] "all.4554"         "HS.4554"          "SC.4554"         
## [43] "BAp.4554"         "BAo.4554"         "GD.4554"         
## [46] "White.4554"       "Black.4554"       "Hisp.4554"       
## [49] "NE.4554"          "MA.4554"          "Midwest.4554"    
## [52] "South.4554"       "Mountain.4554"    "Pacific.4554"    
## [55] "poor.4554"        "mid.4554"         "rich.4554"       
## [58] "nokids_all.2534"  "kids_all.2534"    "nokids_HS.2534"  
## [61] "nokids_SC.2534"   "nokids_BAp.2534"  "nokids_BAo.2534" 
## [64] "nokids_GD.2534"   "kids_HS.2534"     "kids_SC.2534"    
## [67] "kids_BAp.2534"    "kids_BAo.2534"    "kids_GD.2534"    
## [70] "nokids_poor.2534" "nokids_mid.2534"  "nokids_rich.2534"
## [73] "kids_poor.2534"   "kids_mid.2534"    "kids_rich.2534"

Gather the columns into two columns, ‘header’ and ‘age_group’

marriage_rates <- mr %>%
  gather("age_group", "marriage_rate", matches(".(\\d+)$")) %>% #matches 4 digits at the end of the string
  separate("age_group", c("category", "age_group"), sep = '\\.') #separate age_group values by '.'

head(marriage_rates)
##   X year       date category age_group marriage_rate
## 1 1 1960 1960-01-01      all      2534     0.1233145
## 2 2 1970 1970-01-01      all      2534     0.1269715
## 3 3 1980 1980-01-01      all      2534     0.1991767
## 4 4 1990 1990-01-01      all      2534     0.2968306
## 5 5 2000 2000-01-01      all      2534     0.3450087
## 6 6 2001 2001-01-01      all      2534     0.3527767

Adjust the age-group values and marriage_rate values

unique(marriage_rates$age_group) #before
## [1] "2534" "3544" "4554"
marriage_rates$age_group <- gsub("(\\d\\d)$", "-\\1", marriage_rates$age_group) #add a hyphen in the middle
marriage_rates <- marriage_rates %>%
  mutate(marriage_rate_percentage = round(marriage_rate * 100,1)) %>%
  select(-marriage_rate)
unique(marriage_rates$age_group) #after
## [1] "25-34" "35-44" "45-54"

Analyze Marriage Rate Data

all <- filter(marriage_rates, category == "all")
race <- filter(marriage_rates, category == "White" | category == "Black" | category == "Hisp" , age_group == "25-34")
race <- filter(marriage_rates, category == "NE" | category == "MA" | category == "Midwest" | category == "South" | category == "Mountain" | category == "Pacific", age_group == "25-34")


ggplot(data=all, aes(x=year, y=marriage_rate_percentage, group = age_group, colour = age_group)) +
    geom_line() +
    geom_point( size=1, shape=21) +
    ggtitle("Marriage Rate by Age Group")

ggplot(data=race, aes(x=year, y=marriage_rate_percentage, group = category, colour = category)) +
    geom_line() +
    geom_point( size=1, shape=21) +
    ggtitle("Marriage Rate byRace: Ages 25-34")

ggplot(data=race, aes(x=year, y=marriage_rate_percentage, group = category, colour = category)) +
    geom_line() +
    geom_point( size=1, shape=21) +
    ggtitle("Marriage Rate by Region")