library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(skimr)
library(ggthemes)

Importing the data

# Observed annual average temperatures for the Lower 48 states
tempState <- read_csv("https://raw.githubusercontent.com/washingtonpost/data-2C-beyond-the-limit-usa/main/data/processed/climdiv_state_year.csv")
## Rows: 6000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): fips
## dbl (3): year, temp, tempc
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Observed annual average temperatures for counties in the Lower 48 states
tempCounty <- read_csv("https://raw.githubusercontent.com/washingtonpost/data-2C-beyond-the-limit-usa/main/data/processed/climdiv_county_year.csv")
## Rows: 388375 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): fips
## dbl (3): year, temp, tempc
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Temperature change estimates for each of the Lower 48 states
tempChangeState <- read_csv("https://github.com/washingtonpost/data-2C-beyond-the-limit-usa/raw/main/data/processed/model_state.csv")
## Rows: 48 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): fips, max_warming_season, STUSAB, STATE_NAME, STATENS
## dbl (5): Fall, Spring, Summer, Winter, Annual
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Temperature change estimates for counties in the contiguous U.S.
tempChangeCounty <- read_csv("https://github.com/washingtonpost/data-2C-beyond-the-limit-usa/raw/main/data/processed/model_county.csv")
## Rows: 3107 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): fips, max_warming_season, CTYNAME, STNAME
## dbl (5): Fall, Spring, Summer, Winter, Annual
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(tempState)
## Rows: 6,000
## Columns: 4
## $ fips  <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01"…
## $ year  <dbl> 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905…
## $ temp  <dbl> 61.64167, 64.26667, 64.19167, 62.98333, 63.10000, 63.40833, 61.3…
## $ tempc <dbl> 16.46759, 17.92593, 17.88426, 17.21296, 17.27778, 17.44907, 16.3…
glimpse(tempCounty)
## Rows: 388,375
## Columns: 4
## $ fips  <chr> "01001", "01001", "01001", "01001", "01001", "01001", "01001", "…
## $ year  <dbl> 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905…
## $ temp  <dbl> 62.63333, 65.34167, 65.15000, 63.81667, 63.92500, 64.17500, 62.2…
## $ tempc <dbl> 17.01852, 18.52315, 18.41667, 17.67593, 17.73611, 17.87500, 16.8…
glimpse(tempChangeState)
## Rows: 48
## Columns: 10
## $ fips               <chr> "01", "04", "05", "06", "08", "09", "10", "12", "13…
## $ Fall               <dbl> -0.19566843, 1.20395062, -0.04253968, 1.57092063, 1…
## $ Spring             <dbl> -0.1058624, 1.3844797, 0.2663986, 1.4492416, 1.4369…
## $ Summer             <dbl> -0.32500882, 1.27445503, 0.05859612, 1.47833510, 1.…
## $ Winter             <dbl> 0.4585256, 1.3883880, 0.5322469, 1.4124303, 1.83875…
## $ max_warming_season <chr> "Winter", "Winter", "Winter", "Fall", "Winter", "Wi…
## $ Annual             <dbl> -0.03504762, 1.31988007, 0.21407407, 1.48056085, 1.…
## $ STUSAB             <chr> "AL", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA…
## $ STATE_NAME         <chr> "Alabama", "Arizona", "Arkansas", "California", "Co…
## $ STATENS            <chr> "01779775", "01779777", "00068085", "01779778", "01…
glimpse(tempChangeCounty)
## Rows: 3,107
## Columns: 9
## $ fips               <chr> "01001", "01003", "01005", "01007", "01009", "01011…
## $ Fall               <dbl> -0.248564374, 0.049693122, 0.179485009, -0.39816578…
## $ Spring             <dbl> -0.073735450, 0.060035273, 0.127492063, -0.21007407…
## $ Summer             <dbl> -0.307132275, -0.007407407, -0.061220459, -0.576084…
## $ Winter             <dbl> 0.2700388, 0.4445855, 0.8911323, 0.5307937, 0.73578…
## $ max_warming_season <chr> "Winter", "Winter", "Winter", "Winter", "Winter", "…
## $ Annual             <dbl> -0.079968254, 0.142994709, 0.300250441, -0.15266666…
## $ CTYNAME            <chr> "Autauga County", "Baldwin County", "Barbour County…
## $ STNAME             <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabam…
skim(tempState)
skim(tempCounty)
skim(tempChangeState)
skim(tempChangeCounty)

There are no missing values

line chart

tempChangeState %>% select(fips, STATE_NAME)%>% right_join(tempState)%>% filter(STATE_NAME %in% c("Michigan", "Minnesota")) %>% ggplot(aes(x = year, y = temp, color = STATE_NAME)) +
  geom_line() +
  scale_color_colorblind() +
  labs(x = "Year", y =  "Temperature (F)", title = "Temerature of Michigan and Minnesota, 1895-2019", caption = "Data source: The Washington Post and the NOAA nCLimDiv and nClimGrid data sets", color = "") + 
  theme_bw() +
  theme(legend.position = "bottom")
## Joining, by = "fips"

pivoting to long format

tempChangeStateLong <-tempChangeState %>% 
  pivot_longer(cols = c(Fall:Winter, Annual),
               names_to = "Period",
               values_to = "Change")

tempChangeCountyLong <-tempChangeCounty %>% 
  pivot_longer(cols = c(Fall:Winter, Annual),
               names_to = "Period",
               values_to = "Change")
tempChangeCountyLong %>% slice_max(Change, n =10)
## # A tibble: 10 × 6
##    fips  max_warming_season CTYNAME          STNAME       Period Change
##    <chr> <chr>              <chr>            <chr>        <chr>   <dbl>
##  1 27135 Winter             Roseau County    Minnesota    Winter   3.74
##  2 27069 Winter             Kittson County   Minnesota    Winter   3.65
##  3 38015 Winter             Burleigh County  North Dakota Winter   3.60
##  4 38041 Winter             Hettinger County North Dakota Winter   3.56
##  5 27061 Winter             Itasca County    Minnesota    Winter   3.54
##  6 38009 Winter             Bottineau County North Dakota Winter   3.54
##  7 38105 Winter             Williams County  North Dakota Winter   3.54
##  8 38079 Winter             Rolette County   North Dakota Winter   3.53
##  9 38053 Winter             McKenzie County  North Dakota Winter   3.52
## 10 38083 Winter             Sheridan County  North Dakota Winter   3.52

bot plot

tempChangeStateLong %>% filter(Period != "Annual") %>% ggplot(aes(x = fct_relevel(Period, "Fall", "Winter", "Spring", "Summer"), y = Change)) + 
  geom_boxplot(fill = "dodgerblue") +
  labs(title = "State-level temperature changes",
       subtitle = "The lower 48 continuous United States, 1895-2019", x = "", y = "Temperature change (f)", caption = "Data Source: The Washington Post and the NOAA nClimDiv and nClimGrid data sets", color = "") +
  theme_bw()

## pivoting data to a wider format

data(us_rent_income, package = "tidyr")
glimpse(us_rent_income)
## Rows: 104
## Columns: 5
## $ GEOID    <chr> "01", "01", "02", "02", "04", "04", "05", "05", "06", "06", "…
## $ NAME     <chr> "Alabama", "Alabama", "Alaska", "Alaska", "Arizona", "Arizona…
## $ variable <chr> "income", "rent", "income", "rent", "income", "rent", "income…
## $ estimate <dbl> 24476, 747, 32940, 1200, 27517, 972, 23789, 709, 29454, 1358,…
## $ moe      <dbl> 136, 3, 508, 13, 148, 4, 165, 5, 109, 3, 109, 5, 195, 5, 247,…
us_rent_income_wide <- us_rent_income %>%pivot_wider(id_cols = GEOID:NAME, names_from = variable, values_from = c(estimate, moe))

##bar plot

ratios <- us_rent_income_wide %>% mutate(rent_income_ratio = estimate_rent / estimate_income)

ratios %>% slice_min(rent_income_ratio, n = 10) %>%
  bind_rows(ratios %>% slice_max(rent_income_ratio, n =10)) %>% ggplot(aes(x = fct_reorder(NAME, rent_income_ratio),
                                   y = rent_income_ratio)) +
  geom_col(fill = "dodgerblue", color = "black") +
  labs(title = "Rent to income ratio, Inited States, 2017",
       x = "",
       y = "Median rent to median income ratio",
       caption = "Data source: The tidycensus R package & the american Cummunity Survey") +
  coord_flip() +
  theme_bw()