library(socsci)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: rlang
##
##
## Attaching package: 'rlang'
##
##
## The following objects are masked from 'package:purrr':
##
## %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
## flatten_raw, invoke, splice
##
##
## Loading required package: scales
##
##
## Attaching package: 'scales'
##
##
## The following object is masked from 'package:purrr':
##
## discard
##
##
## The following object is masked from 'package:readr':
##
## col_factor
##
##
## Loading required package: broom
##
## Loading required package: glue
tele <- read_csv("C:/Users/ryanb/Downloads/teleheath data.csv")
## Rows: 12720 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): State, TelehealthType, ServiceType, ServiceCount, RatePer1000Benefi...
## dbl (2): Year, Month
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Okay, this data needs a lot of help.
First let’s make those names suck a lot less
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
tele <- tele %>% clean_names()
tele
## # A tibble: 12,720 × 8
## state year month telehealth_type service_type service_count
## <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Alabama 2018 201801 Live audio/video All 6,905
## 2 Alabama 2018 201801 Other telehealth All -
## 3 Alabama 2018 201801 Remote patient monitoring All -
## 4 Alabama 2018 201801 Store and forward All -
## 5 Alabama 2018 201802 Live audio/video All 4,821
## 6 Alabama 2018 201802 Other telehealth All -
## 7 Alabama 2018 201802 Remote patient monitoring All -
## 8 Alabama 2018 201802 Store and forward All -
## 9 Alabama 2018 201803 Live audio/video All 2,269
## 10 Alabama 2018 201803 Other telehealth All -
## # ℹ 12,710 more rows
## # ℹ 2 more variables: rate_per1000beneficiaries <chr>, data_quality <chr>
Better. But there’s an issue here. See how the service_count says
tele <- tele %>%
mutate(service_count = as.numeric(gsub(",", "", replace(service_count, service_count == "-", "0"))))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `service_count = as.numeric(...)`.
## Caused by warning:
## ! NAs introduced by coercion
tele
## # A tibble: 12,720 × 8
## state year month telehealth_type service_type service_count
## <chr> <dbl> <dbl> <chr> <chr> <dbl>
## 1 Alabama 2018 201801 Live audio/video All 6905
## 2 Alabama 2018 201801 Other telehealth All 0
## 3 Alabama 2018 201801 Remote patient monitoring All 0
## 4 Alabama 2018 201801 Store and forward All 0
## 5 Alabama 2018 201802 Live audio/video All 4821
## 6 Alabama 2018 201802 Other telehealth All 0
## 7 Alabama 2018 201802 Remote patient monitoring All 0
## 8 Alabama 2018 201802 Store and forward All 0
## 9 Alabama 2018 201803 Live audio/video All 2269
## 10 Alabama 2018 201803 Other telehealth All 0
## # ℹ 12,710 more rows
## # ℹ 2 more variables: rate_per1000beneficiaries <chr>, data_quality <chr>
See how it says
Now, we need to caculate the number of services per state per year. I taught you part of this already
graph <- tele %>%
group_by(state, year) %>%
summarise(total = sum(service_count, na.rm = TRUE)) ## I didn't teach you this part, it basically says add together the service_count column for each state in each year and ignore the NA values
## `summarise()` has grouped output by 'state'. You can override using the
## `.groups` argument.
graph
## # A tibble: 265 × 3
## # Groups: state [53]
## state year total
## <chr> <dbl> <dbl>
## 1 Alabama 2018 33837
## 2 Alabama 2019 28987
## 3 Alabama 2020 756325
## 4 Alabama 2021 541948
## 5 Alabama 2022 361041
## 6 Alaska 2018 17770
## 7 Alaska 2019 23141
## 8 Alaska 2020 329553
## 9 Alaska 2021 303796
## 10 Alaska 2022 219381
## # ℹ 255 more rows
And now we are a lot closer to where we want to be. We just need to graph this out with geo_facet.
library(geofacet)
graph %>%
ggplot(., aes(x = year, y = total)) +
geom_line() +
facet_geo(~ state)
## Some values in the specified facet_geo column 'state' do not match the
## 'name' column of the specified grid and will be removed: Puerto Rico,
## Virgin Islands