marriage_rate_data <- "https://raw.githubusercontent.com/fivethirtyeight/data/master/marriage/both_sexes.csv"
mr <- read.csv(marriage_rate_data, stringsAsFactors = F)
head(mr)
## X year date all_2534 HS_2534 SC_2534 BAp_2534 BAo_2534
## 1 1 1960 1960-01-01 0.1233145 0.1095332 0.1522818 0.2389952 0.2389952
## 2 2 1970 1970-01-01 0.1269715 0.1094000 0.1495096 0.2187031 0.2187031
## 3 3 1980 1980-01-01 0.1991767 0.1617313 0.2236916 0.2881646 0.2881646
## 4 4 1990 1990-01-01 0.2968306 0.2777491 0.2780912 0.3612968 0.3656655
## 5 5 2000 2000-01-01 0.3450087 0.3316545 0.3249205 0.3874906 0.3939579
## 6 6 2001 2001-01-01 0.3527767 0.3446069 0.3341101 0.3835686 0.3925148
## GD_2534 White_2534 Black_2534 Hisp_2534 NE_2534 MA_2534
## 1 NA 0.1164848 0.1621855 0.1393736 0.1504184 0.1628934
## 2 NA 0.1179043 0.1855163 0.1298769 0.1517231 0.1640680
## 3 NA 0.1824126 0.3137500 0.1885440 0.2414327 0.2505925
## 4 0.3474505 0.2639256 0.4838556 0.2962372 0.3500384 0.3623321
## 5 0.3691740 0.3127149 0.5144994 0.3180681 0.4091852 0.4175565
## 6 0.3590304 0.3183506 0.5437985 0.3321214 0.4200581 0.4294281
## Midwest_2534 South_2534 Mountain_2534 Pacific_2534 poor_2534 mid_2534
## 1 0.1121467 0.1090562 0.09152117 0.1198758 0.1371597 0.07514929
## 2 0.1153741 0.1126220 0.10293602 0.1374964 0.1717202 0.08159207
## 3 0.1828339 0.1688435 0.17434230 0.2334279 0.3100591 0.14825303
## 4 0.2755046 0.2639794 0.25264326 0.3319579 0.4199108 0.24320008
## 5 0.3308022 0.3099712 0.30621032 0.3753061 0.5033676 0.30202036
## 6 0.3344332 0.3182688 0.30980779 0.3844799 0.5178771 0.31716118
## rich_2534 all_3544 HS_3544 SC_3544 BAp_3544 BAo_3544 GD_3544
## 1 0.2066776 0.07058157 0.06860309 0.06663695 0.1326265 0.1326265 NA
## 2 0.1724093 0.06732520 0.06511964 0.06271724 0.1116899 0.1116899 NA
## 3 0.1851082 0.06883378 0.06429102 0.06531333 0.1056102 0.1056102 NA
## 4 0.2783226 0.11191800 0.11210043 0.09699372 0.1285172 0.1258567 0.1328018
## 5 0.2717386 0.15605881 0.16993703 0.13800404 0.1541238 0.1536299 0.1550970
## 6 0.2532041 0.15642529 0.16870156 0.13986044 0.1548151 0.1524923 0.1595169
## White_3544 Black_3544 Hisp_3544 NE_3544 MA_3544 Midwest_3544
## 1 0.06825586 0.08836728 0.07307651 0.09194322 0.09347468 0.06863360
## 2 0.06250372 0.10290904 0.07070500 0.08570110 0.09040725 0.06156272
## 3 0.05966739 0.13140081 0.08110790 0.07997323 0.09744428 0.06070641
## 4 0.09611312 0.22010298 0.12194206 0.12785915 0.14354989 0.10157576
## 5 0.13207032 0.30239381 0.15469520 0.17327422 0.18819256 0.14539201
## 6 0.13287455 0.30857796 0.14953050 0.16653497 0.18315109 0.14794407
## South_3544 Mountain_3544 Pacific_3544 poor_3544 mid_3544 rich_3544
## 1 0.06026353 0.04739747 0.05822486 0.1019749 0.04717272 0.08553870
## 2 0.05966057 0.04651163 0.06347796 0.1117548 0.04566838 0.06499159
## 3 0.05914089 0.04880077 0.07552538 0.1291426 0.05050321 0.04445951
## 4 0.09637035 0.09189904 0.13134638 0.2012208 0.09024739 0.06573916
## 5 0.14230600 0.13584194 0.17480047 0.2813137 0.12815751 0.08622046
## 6 0.14312592 0.13943820 0.17694864 0.2919112 0.13267625 0.06803283
## all_4554 HS_4554 SC_4554 BAp_4554 BAo_4554 GD_4554
## 1 0.07254649 0.06840792 0.07903755 0.15360889 0.15360889 NA
## 2 0.05968794 0.05833439 0.05443478 0.10466047 0.10466047 NA
## 3 0.05250871 0.05036563 0.04816180 0.08623774 0.08623774 NA
## 4 0.05947824 0.05988244 0.04654087 0.07301884 0.06416529 0.08394886
## 5 0.08804394 0.09442809 0.07558786 0.09208417 0.09097472 0.09362802
## 6 0.08823342 0.09189007 0.07795481 0.09333365 0.09313480 0.09362876
## White_4554 Black_4554 Hisp_4554 NE_4554 MA_4554 Midwest_4554
## 1 0.07246692 0.06913249 0.06636058 0.10236412 0.09264788 0.07285321
## 2 0.05754799 0.07899168 0.05810740 0.08028082 0.07860635 0.05791163
## 3 0.04765354 0.08624602 0.06522951 0.06930253 0.07508466 0.04807290
## 4 0.05092552 0.11617699 0.07613556 0.07047502 0.08373134 0.05398391
## 5 0.07578174 0.17587334 0.09418009 0.10232170 0.11269659 0.08302437
## 6 0.07516912 0.18154531 0.09409896 0.09868408 0.10953635 0.08207629
## South_4554 Mountain_4554 Pacific_4554 poor_4554 mid_4554 rich_4554
## 1 0.05977295 0.04754183 0.05996993 0.1030055 0.05364421 0.07908591
## 2 0.05174462 0.03970134 0.04826312 0.1016489 0.04221637 0.05142867
## 3 0.04485348 0.03374438 0.04958992 0.1003011 0.03830266 0.03311296
## 4 0.05043636 0.04459411 0.06461875 0.1148335 0.04562332 0.03136386
## 5 0.07631858 0.07637774 0.09896832 0.1718976 0.07055672 0.03897342
## 6 0.07886513 0.07405971 0.10119511 0.1759369 0.07407508 0.02857320
## nokids_all_2534 kids_all_2534 nokids_HS_2534 nokids_SC_2534
## 1 0.4640564 0.002820625 0.4430148 0.5000402
## 2 0.4309043 0.009868596 0.4246779 0.4333479
## 3 0.4464304 0.025285667 0.4319342 0.4505900
## 4 0.5425242 0.060277451 0.5464881 0.5238446
## 5 0.5714531 0.099472713 0.5711395 0.5700042
## 6 0.5852213 0.110178467 0.6045475 0.5810912
## nokids_BAp_2534 nokids_BAo_2534 nokids_GD_2534 kids_HS_2534 kids_SC_2534
## 1 0.5619099 0.5619099 NA 0.003318886 0.001150824
## 2 0.4554766 0.4554766 NA 0.012465915 0.003699982
## 3 0.4719700 0.4719700 NA 0.031930752 0.018135401
## 4 0.5560765 0.5633301 0.5332628 0.078470444 0.052032702
## 5 0.5729677 0.5862213 0.5367160 0.127193577 0.097625310
## 6 0.5698644 0.5864967 0.5258800 0.141395652 0.110030662
## kids_BAp_2534 kids_BAo_2534 kids_GD_2534 nokids_poor_2534
## 1 0.0005751073 0.0005751073 NA 0.4933061
## 2 0.0014683425 0.0014683425 NA 0.5097742
## 3 0.0062544364 0.0062544364 NA 0.5740402
## 4 0.0171241042 0.0181766027 0.01374234 0.6546908
## 5 0.0370024452 0.0401009875 0.02761467 0.7055451
## 6 0.0399801447 0.0445838012 0.02645041 0.7147334
## nokids_mid_2534 nokids_rich_2534 kids_poor_2534 kids_mid_2534
## 1 0.4100080 0.4921184 0.008722711 0.0007532065
## 2 0.3764538 0.4288948 0.029974945 0.0033771145
## 3 0.3998250 0.3848089 0.077926214 0.0102368871
## 4 0.5186604 0.4750156 0.170763774 0.0274655254
## 5 0.5690228 0.4458023 0.256281918 0.0597845173
## 6 0.5864741 0.4461111 0.280146488 0.0677954572
## kids_rich_2534
## 1 0.0008027331
## 2 0.0030435661
## 3 0.0068317224
## 4 0.0182329127
## 5 0.0295644698
## 6 0.0336540502
Display column names
#Display column names
colnames(mr)
## [1] "X" "year" "date"
## [4] "all_2534" "HS_2534" "SC_2534"
## [7] "BAp_2534" "BAo_2534" "GD_2534"
## [10] "White_2534" "Black_2534" "Hisp_2534"
## [13] "NE_2534" "MA_2534" "Midwest_2534"
## [16] "South_2534" "Mountain_2534" "Pacific_2534"
## [19] "poor_2534" "mid_2534" "rich_2534"
## [22] "all_3544" "HS_3544" "SC_3544"
## [25] "BAp_3544" "BAo_3544" "GD_3544"
## [28] "White_3544" "Black_3544" "Hisp_3544"
## [31] "NE_3544" "MA_3544" "Midwest_3544"
## [34] "South_3544" "Mountain_3544" "Pacific_3544"
## [37] "poor_3544" "mid_3544" "rich_3544"
## [40] "all_4554" "HS_4554" "SC_4554"
## [43] "BAp_4554" "BAo_4554" "GD_4554"
## [46] "White_4554" "Black_4554" "Hisp_4554"
## [49] "NE_4554" "MA_4554" "Midwest_4554"
## [52] "South_4554" "Mountain_4554" "Pacific_4554"
## [55] "poor_4554" "mid_4554" "rich_4554"
## [58] "nokids_all_2534" "kids_all_2534" "nokids_HS_2534"
## [61] "nokids_SC_2534" "nokids_BAp_2534" "nokids_BAo_2534"
## [64] "nokids_GD_2534" "kids_HS_2534" "kids_SC_2534"
## [67] "kids_BAp_2534" "kids_BAo_2534" "kids_GD_2534"
## [70] "nokids_poor_2534" "nokids_mid_2534" "nokids_rich_2534"
## [73] "kids_poor_2534" "kids_mid_2534" "kids_rich_2534"
**Replace the ’_‘with a’.’ to make it easier to separate later.**
colnames(mr) <- gsub("_(\\d+)$", ".\\1", colnames(mr))
names(mr)
## [1] "X" "year" "date"
## [4] "all.2534" "HS.2534" "SC.2534"
## [7] "BAp.2534" "BAo.2534" "GD.2534"
## [10] "White.2534" "Black.2534" "Hisp.2534"
## [13] "NE.2534" "MA.2534" "Midwest.2534"
## [16] "South.2534" "Mountain.2534" "Pacific.2534"
## [19] "poor.2534" "mid.2534" "rich.2534"
## [22] "all.3544" "HS.3544" "SC.3544"
## [25] "BAp.3544" "BAo.3544" "GD.3544"
## [28] "White.3544" "Black.3544" "Hisp.3544"
## [31] "NE.3544" "MA.3544" "Midwest.3544"
## [34] "South.3544" "Mountain.3544" "Pacific.3544"
## [37] "poor.3544" "mid.3544" "rich.3544"
## [40] "all.4554" "HS.4554" "SC.4554"
## [43] "BAp.4554" "BAo.4554" "GD.4554"
## [46] "White.4554" "Black.4554" "Hisp.4554"
## [49] "NE.4554" "MA.4554" "Midwest.4554"
## [52] "South.4554" "Mountain.4554" "Pacific.4554"
## [55] "poor.4554" "mid.4554" "rich.4554"
## [58] "nokids_all.2534" "kids_all.2534" "nokids_HS.2534"
## [61] "nokids_SC.2534" "nokids_BAp.2534" "nokids_BAo.2534"
## [64] "nokids_GD.2534" "kids_HS.2534" "kids_SC.2534"
## [67] "kids_BAp.2534" "kids_BAo.2534" "kids_GD.2534"
## [70] "nokids_poor.2534" "nokids_mid.2534" "nokids_rich.2534"
## [73] "kids_poor.2534" "kids_mid.2534" "kids_rich.2534"
Gather the columns into two columns, ‘header’ and ‘age_group’
marriage_rates <- mr %>%
gather("age_group", "marriage_rate", matches(".(\\d+)$")) %>% #matches 4 digits at the end of the string
separate("age_group", c("category", "age_group"), sep = '\\.') #separate age_group values by '.'
head(marriage_rates)
## X year date category age_group marriage_rate
## 1 1 1960 1960-01-01 all 2534 0.1233145
## 2 2 1970 1970-01-01 all 2534 0.1269715
## 3 3 1980 1980-01-01 all 2534 0.1991767
## 4 4 1990 1990-01-01 all 2534 0.2968306
## 5 5 2000 2000-01-01 all 2534 0.3450087
## 6 6 2001 2001-01-01 all 2534 0.3527767
Adjust the age-group values and marriage_rate values
unique(marriage_rates$age_group) #before
## [1] "2534" "3544" "4554"
marriage_rates$age_group <- gsub("(\\d\\d)$", "-\\1", marriage_rates$age_group) #add a hyphen in the middle
marriage_rates <- marriage_rates %>%
mutate(marriage_rate_percentage = round(marriage_rate * 100,1)) %>%
select(-marriage_rate)
unique(marriage_rates$age_group) #after
## [1] "25-34" "35-44" "45-54"
all <- filter(marriage_rates, category == "all")
race <- filter(marriage_rates, category == "White" | category == "Black" | category == "Hisp" , age_group == "25-34")
race <- filter(marriage_rates, category == "NE" | category == "MA" | category == "Midwest" | category == "South" | category == "Mountain" | category == "Pacific", age_group == "25-34")
ggplot(data=all, aes(x=year, y=marriage_rate_percentage, group = age_group, colour = age_group)) +
geom_line() +
geom_point( size=1, shape=21) +
ggtitle("Marriage Rate by Age Group")
ggplot(data=race, aes(x=year, y=marriage_rate_percentage, group = category, colour = category)) +
geom_line() +
geom_point( size=1, shape=21) +
ggtitle("Marriage Rate byRace: Ages 25-34")
ggplot(data=race, aes(x=year, y=marriage_rate_percentage, group = category, colour = category)) +
geom_line() +
geom_point( size=1, shape=21) +
ggtitle("Marriage Rate by Region")