library(dplyr)
library(readr)
library(knitr)
library(ggplot2)
library(gtable)
library(wesanderson)
getwd()
## [1] "C:/Users/Chris Iyer/Documents/R/Mortality"
list_file <- list.files(pattern = "*.csv") %>% 
  lapply(read.csv, stringsAsFactors=F) %>% 
  bind_rows 
kable(head(list_file))
measure_id measure_name location_id location_name FIPS sex_id sex age_id age_name year_id metric val upper lower
26 Life expectancy 523 Alabama 1 1 Male 161 0 1980 Years 68.11951 68.23987 67.99309
26 Life expectancy 523 Alabama 1 1 Male 161 0 1981 Years 68.43191 68.55604 68.31208
26 Life expectancy 523 Alabama 1 1 Male 161 0 1982 Years 68.89942 69.01831 68.77751
26 Life expectancy 523 Alabama 1 1 Male 161 0 1983 Years 69.16916 69.29072 69.05122
26 Life expectancy 523 Alabama 1 1 Male 161 0 1984 Years 69.21699 69.33221 69.10116
26 Life expectancy 523 Alabama 1 1 Male 161 0 1985 Years 69.18182 69.30204 69.06820

Plots life expectancy by state.

#create a vector with state names to use for filtering out the county information
new_names <- sort(state.name)
new_names
##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"
dim(list_file)
## [1] 2012220      14
x <- list_file %>% filter( sex!= "Both",  measure_name == "Life expectancy", location_name %in% new_names) %>% mutate(val = round(val, digits = 2))
head(x)
##   measure_id    measure_name location_id location_name FIPS sex_id  sex
## 1         26 Life expectancy         523       Alabama    1      1 Male
## 2         26 Life expectancy         523       Alabama    1      1 Male
## 3         26 Life expectancy         523       Alabama    1      1 Male
## 4         26 Life expectancy         523       Alabama    1      1 Male
## 5         26 Life expectancy         523       Alabama    1      1 Male
## 6         26 Life expectancy         523       Alabama    1      1 Male
##   age_id age_name year_id metric   val    upper    lower
## 1    161        0    1980  Years 68.12 68.23987 67.99309
## 2    161        0    1981  Years 68.43 68.55604 68.31208
## 3    161        0    1982  Years 68.90 69.01831 68.77751
## 4    161        0    1983  Years 69.17 69.29072 69.05122
## 5    161        0    1984  Years 69.22 69.33221 69.10116
## 6    161        0    1985  Years 69.18 69.30204 69.06820
dim(x)
## [1] 3500   14
yy <- ggplot(x, aes(x = year_id, y = val, color = sex)) + geom_line() + facet_wrap(~ location_name, ncol = 5 ) 
yy 

But the above don’t show the states as compared to the US averages

USA <- (list_file %>% filter(grepl("United", location_name), measure_name == "Life expectancy", sex!= "Both")) %>% select(sex, year_id, val) %>% rename(Sex = sex, Year = year_id, US_Average = val)
USA <- rep(USA,50)
StateUS <- cbind.data.frame(x, USA) 
class(StateUS)
## [1] "data.frame"
New <- StateUS[, c(10, 7,4, 12,17)]
New <- New %>% mutate(US_Average = round(US_Average, digits = 2))
head(New %>% mutate(Difference = US_Average - val))
##   year_id  sex location_name   val US_Average Difference
## 1    1980 Male       Alabama 68.12      69.99       1.87
## 2    1981 Male       Alabama 68.43      70.35       1.92
## 3    1982 Male       Alabama 68.90      70.77       1.87
## 4    1983 Male       Alabama 69.17      71.02       1.85
## 5    1984 Male       Alabama 69.22      71.16       1.94
## 6    1985 Male       Alabama 69.18      71.17       1.99
xxx <- New %>% group_by(location_name) %>% summarise(Difference = mean(US_Average) - mean(val)) %>% rename(state = location_name)
Difference_Variable <- mean(New$US_Average) - mean(New$val)
#Create a new vectorwith the difference between state and US
xxx <- as.data.frame(xxx)
#have to reverse factors to coordinate with colors
aa <- ggplot(New, aes(x = year_id, y = val, color = sex)) + geom_line(size = 1.05) + facet_wrap(~ location_name, ncol = 5)  + scale_color_manual(values = wes_palette("GrandBudapest2")) + labs(colour = "", size = 1) + scale_color_hue(c=75, l=60) +
       guides(colour = guide_legend(override.aes = list(size=3)))

aa + geom_point(aes(y = US_Average), alpha = 1, size = 1/2) + scale_shape_discrete(solid = FALSE) + 
  theme_bw() +theme(legend.position="top") + ggtitle(label = "Life Expectancy by State", subtitle = "State Average: Solid Line\nUS Average: Dotted line  ")