using glimpse() to explore 4 variables
# using glimpse() to explore 4 variables
# Observed annual average temperatures for the Lower 48 states
glimpse(tempState)
## Rows: 6,000
## Columns: 4
## $ fips <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01"…
## $ year <dbl> 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905…
## $ temp <dbl> 61.64167, 64.26667, 64.19167, 62.98333, 63.10000, 63.40833, 61.3…
## $ tempc <dbl> 16.46759, 17.92593, 17.88426, 17.21296, 17.27778, 17.44907, 16.3…
# Observed annual average temperatures for counties in the Lower 48 states
glimpse(tempCounty)
## Rows: 388,375
## Columns: 4
## $ fips <chr> "01001", "01001", "01001", "01001", "01001", "01001", "01001", "…
## $ year <dbl> 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905…
## $ temp <dbl> 62.63333, 65.34167, 65.15000, 63.81667, 63.92500, 64.17500, 62.2…
## $ tempc <dbl> 17.01852, 18.52315, 18.41667, 17.67593, 17.73611, 17.87500, 16.8…
# Temperature change estimates for each of the Lower 48 states
glimpse(tempChangeState)
## Rows: 48
## Columns: 10
## $ fips <chr> "01", "04", "05", "06", "08", "09", "10", "12", "13…
## $ Fall <dbl> -0.19566843, 1.20395062, -0.04253968, 1.57092063, 1…
## $ Spring <dbl> -0.1058624, 1.3844797, 0.2663986, 1.4492416, 1.4369…
## $ Summer <dbl> -0.32500882, 1.27445503, 0.05859612, 1.47833510, 1.…
## $ Winter <dbl> 0.4585256, 1.3883880, 0.5322469, 1.4124303, 1.83875…
## $ max_warming_season <chr> "Winter", "Winter", "Winter", "Fall", "Winter", "Wi…
## $ Annual <dbl> -0.03504762, 1.31988007, 0.21407407, 1.48056085, 1.…
## $ STUSAB <chr> "AL", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA…
## $ STATE_NAME <chr> "Alabama", "Arizona", "Arkansas", "California", "Co…
## $ STATENS <chr> "01779775", "01779777", "00068085", "01779778", "01…
# Temperature change estimates for counties in the contiguous U.S.
glimpse(tempChangeCounty)
## Rows: 3,107
## Columns: 9
## $ fips <chr> "01001", "01003", "01005", "01007", "01009", "01011…
## $ Fall <dbl> -0.248564374, 0.049693122, 0.179485009, -0.39816578…
## $ Spring <dbl> -0.073735450, 0.060035273, 0.127492063, -0.21007407…
## $ Summer <dbl> -0.307132275, -0.007407407, -0.061220459, -0.576084…
## $ Winter <dbl> 0.2700388, 0.4445855, 0.8911323, 0.5307937, 0.73578…
## $ max_warming_season <chr> "Winter", "Winter", "Winter", "Winter", "Winter", "…
## $ Annual <dbl> -0.079968254, 0.142994709, 0.300250441, -0.15266666…
## $ CTYNAME <chr> "Autauga County", "Baldwin County", "Barbour County…
## $ STNAME <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabam…
# Are there any notable patterns of missing data in any of the data sets?
skim(tempState)
Data summary
| Name |
tempState |
| Number of rows |
6000 |
| Number of columns |
4 |
| _______________________ |
|
| Column type frequency: |
|
| character |
1 |
| numeric |
3 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
Variable type: numeric
| year |
0 |
1 |
1957.00 |
36.09 |
1895.00 |
1926.00 |
1957.00 |
1988.00 |
2019.00 |
▇▇▇▇▇ |
| temp |
0 |
1 |
51.62 |
8.01 |
34.90 |
45.18 |
50.77 |
57.56 |
73.36 |
▃▇▇▅▁ |
| tempc |
0 |
1 |
10.90 |
4.45 |
1.61 |
7.32 |
10.43 |
14.20 |
22.98 |
▃▇▇▅▁ |
Data summary
| Name |
tempCounty |
| Number of rows |
388375 |
| Number of columns |
4 |
| _______________________ |
|
| Column type frequency: |
|
| character |
1 |
| numeric |
3 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
Variable type: numeric
| year |
0 |
1 |
1957.00 |
36.08 |
1895.00 |
1926.00 |
1957.00 |
1988.00 |
2019.00 |
▇▇▇▇▇ |
| temp |
0 |
1 |
54.00 |
8.43 |
30.51 |
47.65 |
53.91 |
60.53 |
78.82 |
▁▆▇▆▁ |
| tempc |
0 |
1 |
12.22 |
4.68 |
-0.83 |
8.69 |
12.17 |
15.85 |
26.01 |
▁▆▇▆▁ |
Data summary
| Name |
tempChangeState |
| Number of rows |
48 |
| Number of columns |
10 |
| _______________________ |
|
| Column type frequency: |
|
| character |
5 |
| numeric |
5 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| fips |
0 |
1 |
2 |
2 |
0 |
48 |
0 |
| max_warming_season |
0 |
1 |
4 |
6 |
0 |
2 |
0 |
| STUSAB |
0 |
1 |
2 |
2 |
0 |
48 |
0 |
| STATE_NAME |
0 |
1 |
4 |
14 |
0 |
48 |
0 |
| STATENS |
0 |
1 |
8 |
8 |
0 |
48 |
0 |
Variable type: numeric
| Fall |
0 |
1 |
0.79 |
0.52 |
-0.20 |
0.36 |
0.77 |
1.16 |
1.66 |
▅▇▆▇▇ |
| Spring |
0 |
1 |
1.00 |
0.48 |
-0.11 |
0.72 |
1.07 |
1.36 |
1.76 |
▂▂▃▇▅ |
| Summer |
0 |
1 |
0.77 |
0.63 |
-0.33 |
0.21 |
0.87 |
1.28 |
2.11 |
▇▇▆▇▂ |
| Winter |
0 |
1 |
1.67 |
0.71 |
0.34 |
1.19 |
1.51 |
2.27 |
3.15 |
▂▇▃▃▃ |
| Annual |
0 |
1 |
1.06 |
0.55 |
-0.04 |
0.64 |
1.10 |
1.52 |
2.04 |
▃▅▆▇▃ |
Data summary
| Name |
tempChangeCounty |
| Number of rows |
3107 |
| Number of columns |
9 |
| _______________________ |
|
| Column type frequency: |
|
| character |
4 |
| numeric |
5 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| fips |
0 |
1 |
5 |
5 |
0 |
3107 |
0 |
| max_warming_season |
0 |
1 |
4 |
6 |
0 |
4 |
0 |
| CTYNAME |
0 |
1 |
10 |
27 |
0 |
1842 |
0 |
| STNAME |
0 |
1 |
4 |
20 |
0 |
49 |
0 |
Variable type: numeric
| Fall |
0 |
1 |
0.54 |
0.54 |
-0.85 |
0.15 |
0.50 |
0.90 |
2.53 |
▂▇▇▂▁ |
| Spring |
0 |
1 |
0.83 |
0.54 |
-0.61 |
0.44 |
0.83 |
1.21 |
2.79 |
▂▇▇▂▁ |
| Summer |
0 |
1 |
0.46 |
0.62 |
-0.94 |
-0.01 |
0.34 |
0.87 |
2.58 |
▂▇▅▂▁ |
| Winter |
0 |
1 |
1.41 |
0.71 |
-0.20 |
0.95 |
1.32 |
1.80 |
3.74 |
▂▇▆▂▁ |
| Annual |
0 |
1 |
0.81 |
0.55 |
-0.57 |
0.40 |
0.74 |
1.20 |
2.54 |
▂▇▇▃▁ |
There are no missing values in the data set. The data was
complete.
Linechart
#Reproduce the line chart below displaying the annual temperature (Fahrenheit) for Michigan and Minnesota from 1895 to 2018. Hint: consider using a right_join() to get the full state names
tempChangeState %>% select(fips, STATE_NAME) %>%
right_join(tempState) %>%
filter(STATE_NAME %in% c("Michigan", "Minnesota")) %>%
ggplot(aes(x = year, y = temp, color = STATE_NAME))+
geom_line()+
scale_color_colorblind()+
labs(title="Temperature of Michigan and Minnesota, 1895-2019",
x='year',
y='Temperature (F',
Caption='The washington Post and NOAA nClimDiv and nClimGrind data sets')+
theme_bw()+
theme(legend.position = 'bottom')

Side_by_side box plot
# Next, create a side-by-side boxplot showing the state-level temperature changes for each season (Fall, Spring, Summer, and Winter) reproducing the plot below.
tempChangeCountyLong %>% filter(Period != "Annual") %>%
ggplot(aes(x= fct_relevel(Period,"Fall","Winter","Spring","Summer"), y=Change))+
geom_boxplot(fill="dodgerblue")+
labs(title="State Level temperature changes",
subtitle="The lower 48 contaguous united states 1895-2019",
x='',
y='Temperature Change(F',
Caption='The washington Post and NOAA nClimDiv and nClimGrind data sets')+
theme_bw()

Pivoting Wider
#Load the us_rent_income dataset
data(us_rent_income, package = "tidyr")
#Use glimpse() to view the us_rent_income data set.
glimpse(us_rent_income)
## Rows: 104
## Columns: 5
## $ GEOID <chr> "01", "01", "02", "02", "04", "04", "05", "05", "06", "06", "…
## $ NAME <chr> "Alabama", "Alabama", "Alaska", "Alaska", "Arizona", "Arizona…
## $ variable <chr> "income", "rent", "income", "rent", "income", "rent", "income…
## $ estimate <dbl> 24476, 747, 32940, 1200, 27517, 972, 23789, 709, 29454, 1358,…
## $ moe <dbl> 136, 3, 508, 13, 148, 4, 165, 5, 109, 3, 109, 5, 195, 5, 247,…
# Create a new data set called us_rent_income_wide that is a “wider” version of us_rent_income containing income and rent columns
us_rent_income_wide<- us_rent_income %>%
pivot_wider(id_cols = GEOID:NAME,
names_from = variable,
values_from = c(estimate, moe))
#Create a bar chart displaying the ratio of the median monthly rent and median monthly income of each state. Only include states with the ten highest and ten smallest ratios.
ratios<- us_rent_income_wide %>% mutate(rent_income_ratio = estimate_rent/ estimate_income)
ratios %>% slice_max(rent_income_ratio, n = 10) %>% bind_rows(ratios %>% slice_min(rent_income_ratio, n = 10)) %>%
ggplot(aes(x = fct_reorder(NAME,rent_income_ratio),y = rent_income_ratio)) +
geom_col(fill = "dodgerblue", color = "brown") +
coord_flip() +
labs(title = 'Rent to income ratio,United States, 2017',
x = 'Rent to income ratio',
y = 'State',
caption = 'Data Source: The tidycencus R package and the American Community Survey') +
theme_bw()
