##Paquetes
library(dslabs)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##DB
data <- us_contagious_diseases
##Año en que empieza cada emfermedad
data %>%
group_by(disease) %>%
summarise(start = min(year))
## # A tibble: 7 × 2
## disease start
## <fct> <dbl>
## 1 Hepatitis A 1966
## 2 Measles 1928
## 3 Mumps 1968
## 4 Pertussis 1938
## 5 Polio 1928
## 6 Rubella 1966
## 7 Smallpox 1928
##Enfermedad con mas casos en cada estado
data %>%
group_by(state, disease) %>%
summarise(total_cases = sum(count)) %>%
arrange(state, desc(total_cases)) %>%
slice(1)
## `summarise()` has grouped output by 'state'. You can override using the
## `.groups` argument.
## # A tibble: 51 × 3
## # Groups: state [51]
## state disease total_cases
## <fct> <fct> <dbl>
## 1 Alabama Measles 209582
## 2 Alaska Measles 16908
## 3 Arizona Measles 160810
## 4 Arkansas Measles 134250
## 5 California Measles 1376847
## 6 Colorado Measles 245619
## 7 Connecticut Measles 372473
## 8 Delaware Measles 34197
## 9 District Of Columbia Measles 63082
## 10 Florida Measles 165020
## # ℹ 41 more rows
##Como ha variado los casos de sarampion en California a lo largo del tiempo
data %>%
filter(state == "California", disease == "Measles") %>%
group_by(year) %>%
summarise(total_cases = sum(count)) %>%
arrange(year)
## # A tibble: 75 × 2
## year total_cases
## <dbl> <dbl>
## 1 1928 3698
## 2 1929 4024
## 3 1930 43416
## 4 1931 27807
## 5 1932 12618
## 6 1933 26551
## 7 1934 25650
## 8 1935 28799
## 9 1936 49050
## 10 1937 5107
## # ℹ 65 more rows
##Casos totales de polio por estado en 1950
data %>%
filter(disease == "Polio", year == 1950) %>%
group_by(state) %>%
summarise(total_cases = sum(count)) %>%
arrange(desc(total_cases))
## # A tibble: 51 × 2
## state total_cases
## <fct> <dbl>
## 1 New York 4231
## 2 Texas 2747
## 3 California 2090
## 4 Michigan 2014
## 5 Illinois 1916
## 6 Ohio 1802
## 7 Pennsylvania 1334
## 8 Iowa 1282
## 9 Virginia 1217
## 10 Wisconsin 953
## # ℹ 41 more rows
##casos por cada 10,000 habitantes para sarampion en 1942 año donde hubo el pico de sarampion
data %>%
filter(disease == "Measles", year == 1942) %>%
mutate(cases_per_10k = (count / population) * 10000) %>%
group_by(state) %>%
summarise(cases_per_10k = sum(cases_per_10k)) %>%
arrange(desc(cases_per_10k))
## # A tibble: 51 × 2
## state cases_per_10k
## <fct> <dbl>
## 1 Utah 262.
## 2 California 128.
## 3 Vermont 102.
## 4 Nevada 95.8
## 5 Wisconsin 75.0
## 6 Arizona 73.8
## 7 Washington 67.2
## 8 Rhode Island 62.9
## 9 Wyoming 62.1
## 10 Connecticut 60.3
## # ℹ 41 more rows