##Paquetes

library(dslabs)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

##DB

data <- us_contagious_diseases

##Año en que empieza cada emfermedad

  data %>%
  group_by(disease) %>%
  summarise(start = min(year))
## # A tibble: 7 × 2
##   disease     start
##   <fct>       <dbl>
## 1 Hepatitis A  1966
## 2 Measles      1928
## 3 Mumps        1968
## 4 Pertussis    1938
## 5 Polio        1928
## 6 Rubella      1966
## 7 Smallpox     1928

##Enfermedad con mas casos en cada estado

  data %>%
  group_by(state, disease) %>%
  summarise(total_cases = sum(count)) %>%
  arrange(state, desc(total_cases)) %>%
  slice(1)
## `summarise()` has grouped output by 'state'. You can override using the
## `.groups` argument.
## # A tibble: 51 × 3
## # Groups:   state [51]
##    state                disease total_cases
##    <fct>                <fct>         <dbl>
##  1 Alabama              Measles      209582
##  2 Alaska               Measles       16908
##  3 Arizona              Measles      160810
##  4 Arkansas             Measles      134250
##  5 California           Measles     1376847
##  6 Colorado             Measles      245619
##  7 Connecticut          Measles      372473
##  8 Delaware             Measles       34197
##  9 District Of Columbia Measles       63082
## 10 Florida              Measles      165020
## # ℹ 41 more rows

##Como ha variado los casos de sarampion en California a lo largo del tiempo

  data %>%
  filter(state == "California", disease == "Measles") %>%
  group_by(year) %>%
  summarise(total_cases = sum(count)) %>%
  arrange(year)
## # A tibble: 75 × 2
##     year total_cases
##    <dbl>       <dbl>
##  1  1928        3698
##  2  1929        4024
##  3  1930       43416
##  4  1931       27807
##  5  1932       12618
##  6  1933       26551
##  7  1934       25650
##  8  1935       28799
##  9  1936       49050
## 10  1937        5107
## # ℹ 65 more rows

##Casos totales de polio por estado en 1950

  data %>%
  filter(disease == "Polio", year == 1950) %>%
  group_by(state) %>%
  summarise(total_cases = sum(count)) %>%
  arrange(desc(total_cases))
## # A tibble: 51 × 2
##    state        total_cases
##    <fct>              <dbl>
##  1 New York            4231
##  2 Texas               2747
##  3 California          2090
##  4 Michigan            2014
##  5 Illinois            1916
##  6 Ohio                1802
##  7 Pennsylvania        1334
##  8 Iowa                1282
##  9 Virginia            1217
## 10 Wisconsin            953
## # ℹ 41 more rows

##casos por cada 10,000 habitantes para sarampion en 1942 año donde hubo el pico de sarampion

  data %>%
  filter(disease == "Measles", year == 1942) %>%
  mutate(cases_per_10k = (count / population) * 10000) %>%
  group_by(state) %>%
  summarise(cases_per_10k = sum(cases_per_10k)) %>%
  arrange(desc(cases_per_10k))
## # A tibble: 51 × 2
##    state        cases_per_10k
##    <fct>                <dbl>
##  1 Utah                 262. 
##  2 California           128. 
##  3 Vermont              102. 
##  4 Nevada                95.8
##  5 Wisconsin             75.0
##  6 Arizona               73.8
##  7 Washington            67.2
##  8 Rhode Island          62.9
##  9 Wyoming               62.1
## 10 Connecticut           60.3
## # ℹ 41 more rows