Harold Nelson
3/4/2019
library(tidyverse)
## ── Attaching packages ────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0 ✔ purrr 0.2.5
## ✔ tibble 2.0.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ───────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(readr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Natality_2007_2017 <- read_delim("~/Downloads/Natality, 2007-2017.txt","\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
## Notes = col_character(),
## State = col_character(),
## SCode = col_character(),
## Year = col_integer(),
## YearCode = col_integer(),
## Age = col_character(),
## AgeCode = col_character(),
## Births = col_integer(),
## Fpop = col_character(),
## Rate = col_character()
## )
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 47 parsing failures.
## row # A tibble: 5 x 5 col row col expected actual file expected <int> <chr> <chr> <chr> <chr> actual 1 414 <NA> 10 columns 1 columns '~/Downloads/Natality, 2007-2017.txt' file 2 415 <NA> 10 columns 1 columns '~/Downloads/Natality, 2007-2017.txt' row 3 416 <NA> 10 columns 1 columns '~/Downloads/Natality, 2007-2017.txt' col 4 417 <NA> 10 columns 1 columns '~/Downloads/Natality, 2007-2017.txt' expected 5 418 <NA> 10 columns 1 columns '~/Downloads/Natality, 2007-2017.txt'
## ... ................. ... ........................................................................ ........ ........................................................................ ...... ........................................................................ .... ........................................................................ ... ........................................................................ ... ........................................................................ ........ ........................................................................
## See problems(...) for more details.
birth rate for each state, and sort the dataframe by that rate.
Natality_2007_2017 %>%
filter(AgeCode=="25-29") %>%
mutate(Rate25_29 = as.numeric(Rate)) %>%
select(State,Rate25_29) %>%
arrange(Rate25_29) -> srate25_29
srate25_29
## # A tibble: 51 x 2
## State Rate25_29
## <chr> <dbl>
## 1 District of Columbia 44.9
## 2 Massachusetts 69.5
## 3 New York 79.0
## 4 Rhode Island 79.7
## 5 California 82.6
## 6 Connecticut 83.8
## 7 Oregon 85.8
## 8 Colorado 85.8
## 9 New Jersey 89.5
## 10 Maryland 91.7
## # … with 41 more rows
Natality_2007_2017 %>%
filter(AgeCode=="20-24") %>%
mutate(Rate20_24 = as.numeric(Rate)) %>%
select(State,Rate20_24) %>%
arrange(Rate20_24) -> srate20_24
srate20_24
## # A tibble: 51 x 2
## State Rate20_24
## <chr> <dbl>
## 1 Massachusetts 32.4
## 2 Connecticut 38.2
## 3 Vermont 40.0
## 4 New Hampshire 40.8
## 5 Rhode Island 46.4
## 6 New Jersey 47.8
## 7 District of Columbia 49.6
## 8 New York 52.4
## 9 Minnesota 54.3
## 10 California 57.8
## # … with 41 more rows
both = srate20_24 %>% full_join(srate25_29)
## Joining, by = "State"
both
## # A tibble: 51 x 3
## State Rate20_24 Rate25_29
## <chr> <dbl> <dbl>
## 1 Massachusetts 32.4 69.5
## 2 Connecticut 38.2 83.8
## 3 Vermont 40.0 93.4
## 4 New Hampshire 40.8 92.2
## 5 Rhode Island 46.4 79.7
## 6 New Jersey 47.8 89.5
## 7 District of Columbia 49.6 44.9
## 8 New York 52.4 79.0
## 9 Minnesota 54.3 114.
## 10 California 57.8 82.6
## # … with 41 more rows
p = both %>%
ggplot(aes(x=Rate20_24,y=Rate25_29)) +
geom_point()
p
# Interactive with plotly
pI = both %>%
ggplot(aes(x=Rate20_24,y=Rate25_29,group=State)) +
geom_point()
ggplotly(pI)
We’ve downloaded one year of data from cdc Wonder. Examined birth rates for two age groups.
We want more years of data. We want a comprehensive births metric.