library(tidyverse)
library(skimr)
library(ggthemes)

First, import climate change data from GitHub into R using the code below

# Observed annual average temperatures for the Lower 48 states
tempState <- read_csv("https://raw.githubusercontent.com/washingtonpost/data-2C-beyond-the-limit-usa/main/data/processed/climdiv_state_year.csv")
# Observed annual average temperatures for counties in the Lower 48 states
tempCounty <- read_csv("https://raw.githubusercontent.com/washingtonpost/data-2C-beyond-the-limit-usa/main/data/processed/climdiv_county_year.csv")
# Temperature change estimates for each of the Lower 48 states
tempChangeState <- read_csv("https://github.com/washingtonpost/data-2C-beyond-the-limit-usa/raw/main/data/processed/model_state.csv")
# Temperature change estimates for counties in the contiguous U.S.
tempChangeCounty <- read_csv("https://github.com/washingtonpost/data-2C-beyond-the-limit-usa/raw/main/data/processed/model_county.csv")

using glimpse() to explore 4 variables

# using glimpse() to explore 4 variables
# Observed annual average temperatures for the Lower 48 states
glimpse(tempState)
## Rows: 6,000
## Columns: 4
## $ fips  <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01"…
## $ year  <dbl> 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905…
## $ temp  <dbl> 61.64167, 64.26667, 64.19167, 62.98333, 63.10000, 63.40833, 61.3…
## $ tempc <dbl> 16.46759, 17.92593, 17.88426, 17.21296, 17.27778, 17.44907, 16.3…
# Observed annual average temperatures for counties in the Lower 48 states
glimpse(tempCounty)
## Rows: 388,375
## Columns: 4
## $ fips  <chr> "01001", "01001", "01001", "01001", "01001", "01001", "01001", "…
## $ year  <dbl> 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905…
## $ temp  <dbl> 62.63333, 65.34167, 65.15000, 63.81667, 63.92500, 64.17500, 62.2…
## $ tempc <dbl> 17.01852, 18.52315, 18.41667, 17.67593, 17.73611, 17.87500, 16.8…
# Temperature change estimates for each of the Lower 48 states
glimpse(tempChangeState)
## Rows: 48
## Columns: 10
## $ fips               <chr> "01", "04", "05", "06", "08", "09", "10", "12", "13…
## $ Fall               <dbl> -0.19566843, 1.20395062, -0.04253968, 1.57092063, 1…
## $ Spring             <dbl> -0.1058624, 1.3844797, 0.2663986, 1.4492416, 1.4369…
## $ Summer             <dbl> -0.32500882, 1.27445503, 0.05859612, 1.47833510, 1.…
## $ Winter             <dbl> 0.4585256, 1.3883880, 0.5322469, 1.4124303, 1.83875…
## $ max_warming_season <chr> "Winter", "Winter", "Winter", "Fall", "Winter", "Wi…
## $ Annual             <dbl> -0.03504762, 1.31988007, 0.21407407, 1.48056085, 1.…
## $ STUSAB             <chr> "AL", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA…
## $ STATE_NAME         <chr> "Alabama", "Arizona", "Arkansas", "California", "Co…
## $ STATENS            <chr> "01779775", "01779777", "00068085", "01779778", "01…
# Temperature change estimates for counties in the contiguous U.S.
glimpse(tempChangeCounty)
## Rows: 3,107
## Columns: 9
## $ fips               <chr> "01001", "01003", "01005", "01007", "01009", "01011…
## $ Fall               <dbl> -0.248564374, 0.049693122, 0.179485009, -0.39816578…
## $ Spring             <dbl> -0.073735450, 0.060035273, 0.127492063, -0.21007407…
## $ Summer             <dbl> -0.307132275, -0.007407407, -0.061220459, -0.576084…
## $ Winter             <dbl> 0.2700388, 0.4445855, 0.8911323, 0.5307937, 0.73578…
## $ max_warming_season <chr> "Winter", "Winter", "Winter", "Winter", "Winter", "…
## $ Annual             <dbl> -0.079968254, 0.142994709, 0.300250441, -0.15266666…
## $ CTYNAME            <chr> "Autauga County", "Baldwin County", "Barbour County…
## $ STNAME             <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabam…
# Are there any notable patterns of missing data in any of the data sets?
skim(tempState)
Data summary
Name tempState
Number of rows 6000
Number of columns 4
_______________________
Column type frequency:
character 1
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
fips 0 1 2 2 0 48 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1 1957.00 36.09 1895.00 1926.00 1957.00 1988.00 2019.00 ▇▇▇▇▇
temp 0 1 51.62 8.01 34.90 45.18 50.77 57.56 73.36 ▃▇▇▅▁
tempc 0 1 10.90 4.45 1.61 7.32 10.43 14.20 22.98 ▃▇▇▅▁
skim(tempCounty)
Data summary
Name tempCounty
Number of rows 388375
Number of columns 4
_______________________
Column type frequency:
character 1
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
fips 0 1 5 5 0 3107 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1 1957.00 36.08 1895.00 1926.00 1957.00 1988.00 2019.00 ▇▇▇▇▇
temp 0 1 54.00 8.43 30.51 47.65 53.91 60.53 78.82 ▁▆▇▆▁
tempc 0 1 12.22 4.68 -0.83 8.69 12.17 15.85 26.01 ▁▆▇▆▁
skim(tempChangeState)
Data summary
Name tempChangeState
Number of rows 48
Number of columns 10
_______________________
Column type frequency:
character 5
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
fips 0 1 2 2 0 48 0
max_warming_season 0 1 4 6 0 2 0
STUSAB 0 1 2 2 0 48 0
STATE_NAME 0 1 4 14 0 48 0
STATENS 0 1 8 8 0 48 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Fall 0 1 0.79 0.52 -0.20 0.36 0.77 1.16 1.66 ▅▇▆▇▇
Spring 0 1 1.00 0.48 -0.11 0.72 1.07 1.36 1.76 ▂▂▃▇▅
Summer 0 1 0.77 0.63 -0.33 0.21 0.87 1.28 2.11 ▇▇▆▇▂
Winter 0 1 1.67 0.71 0.34 1.19 1.51 2.27 3.15 ▂▇▃▃▃
Annual 0 1 1.06 0.55 -0.04 0.64 1.10 1.52 2.04 ▃▅▆▇▃
skim(tempChangeCounty)
Data summary
Name tempChangeCounty
Number of rows 3107
Number of columns 9
_______________________
Column type frequency:
character 4
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
fips 0 1 5 5 0 3107 0
max_warming_season 0 1 4 6 0 4 0
CTYNAME 0 1 10 27 0 1842 0
STNAME 0 1 4 20 0 49 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Fall 0 1 0.54 0.54 -0.85 0.15 0.50 0.90 2.53 ▂▇▇▂▁
Spring 0 1 0.83 0.54 -0.61 0.44 0.83 1.21 2.79 ▂▇▇▂▁
Summer 0 1 0.46 0.62 -0.94 -0.01 0.34 0.87 2.58 ▂▇▅▂▁
Winter 0 1 1.41 0.71 -0.20 0.95 1.32 1.80 3.74 ▂▇▆▂▁
Annual 0 1 0.81 0.55 -0.57 0.40 0.74 1.20 2.54 ▂▇▇▃▁

There are no missing values in the data set. The data was complete.

Linechart

#Reproduce the line chart below displaying the annual temperature (Fahrenheit) for Michigan and Minnesota from 1895 to 2018. Hint: consider using a right_join() to get the full state names
tempChangeState %>% select(fips, STATE_NAME) %>% 
  right_join(tempState) %>% 
  filter(STATE_NAME %in% c("Michigan", "Minnesota")) %>% 
  ggplot(aes(x = year, y = temp, color = STATE_NAME))+
  geom_line()+
  scale_color_colorblind()+
  labs(title="Temperature of Michigan and Minnesota, 1895-2019",
       x='year',
       y='Temperature (F',
       Caption='The washington Post and NOAA nClimDiv and nClimGrind data sets')+
  theme_bw()+
  theme(legend.position = 'bottom')

Pivoting to long format

#tempChangeStateLong
tempChangeStateLong<-tempChangeState %>% 
  pivot_longer(cols=c(Fall:Winter, Annual),
               names_to ="Period",
               values_to = "Change")

#tempChangeCountyLong
tempChangeCountyLong<-tempChangeCounty %>% 
  pivot_longer(cols=c(Fall:Winter, Annual),
               names_to ="Period",
               values_to = "Change")

#Looking at the ten largest temperature increases across all seasons and counties, are there any noticeable patterns?

tempChangeCountyLong %>% slice_max(Change, n=8)
## # A tibble: 8 × 6
##   fips  max_warming_season CTYNAME          STNAME       Period Change
##   <chr> <chr>              <chr>            <chr>        <chr>   <dbl>
## 1 27135 Winter             Roseau County    Minnesota    Winter   3.74
## 2 27069 Winter             Kittson County   Minnesota    Winter   3.65
## 3 38015 Winter             Burleigh County  North Dakota Winter   3.60
## 4 38041 Winter             Hettinger County North Dakota Winter   3.56
## 5 27061 Winter             Itasca County    Minnesota    Winter   3.54
## 6 38009 Winter             Bottineau County North Dakota Winter   3.54
## 7 38105 Winter             Williams County  North Dakota Winter   3.54
## 8 38079 Winter             Rolette County   North Dakota Winter   3.53

Looking at the ten largest temperature increases across all seasons and counties, are there any noticeable patterns? most warming is minnesota and more warming is happening in the winter than other seasons.

Side_by_side box plot

# Next, create a side-by-side boxplot showing the state-level temperature changes for each season (Fall, Spring, Summer, and Winter) reproducing the plot below.
tempChangeCountyLong %>% filter(Period != "Annual") %>% 
  ggplot(aes(x= fct_relevel(Period,"Fall","Winter","Spring","Summer"), y=Change))+
  geom_boxplot(fill="dodgerblue")+
  labs(title="State Level temperature changes",
       subtitle="The lower 48 contaguous united states 1895-2019",
       x='',
       y='Temperature Change(F',
       Caption='The washington Post and NOAA nClimDiv and nClimGrind data sets')+
  theme_bw()

Pivoting Wider

#Load the us_rent_income dataset
data(us_rent_income, package = "tidyr")

#Use glimpse() to view the us_rent_income data set.
glimpse(us_rent_income)
## Rows: 104
## Columns: 5
## $ GEOID    <chr> "01", "01", "02", "02", "04", "04", "05", "05", "06", "06", "…
## $ NAME     <chr> "Alabama", "Alabama", "Alaska", "Alaska", "Arizona", "Arizona…
## $ variable <chr> "income", "rent", "income", "rent", "income", "rent", "income…
## $ estimate <dbl> 24476, 747, 32940, 1200, 27517, 972, 23789, 709, 29454, 1358,…
## $ moe      <dbl> 136, 3, 508, 13, 148, 4, 165, 5, 109, 3, 109, 5, 195, 5, 247,…
# Create a new data set called us_rent_income_wide that is a “wider” version of us_rent_income containing income and rent columns
us_rent_income_wide<- us_rent_income %>% 
  pivot_wider(id_cols = GEOID:NAME,
              names_from = variable,
              values_from = c(estimate, moe))
#Create a bar chart displaying the ratio of the median monthly rent and median monthly income of each state. Only include states with the ten highest and ten smallest ratios.

ratios<- us_rent_income_wide %>% mutate(rent_income_ratio = estimate_rent/ estimate_income) 

ratios %>% slice_max(rent_income_ratio, n = 10) %>% bind_rows(ratios %>% slice_min(rent_income_ratio, n = 10)) %>% 
  ggplot(aes(x = fct_reorder(NAME,rent_income_ratio),y = rent_income_ratio)) + 
  geom_col(fill = "dodgerblue", color = "brown") +
  coord_flip() + 
  labs(title = 'Rent to income ratio,United States, 2017',
       x = 'Rent to income ratio',
       y = 'State',
       caption = 'Data Source: The tidycencus R package and the American Community Survey') +
  theme_bw()