BAIS 660 R Assignment 3

libraries to load in

library(tidyverse)
library(magrittr)
library(lubridate)
library(glue)
library(dplyr)

This code reads in the file

acs <- read_csv("acs_2015_county_data_revised.csv")

## Rows: 3142 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): state, county
## dbl (33): census_id, total_pop, men, women, hispanic, white, black, native, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

nrow(acs)

## [1] 3142

ncol(acs)

## [1] 35

There are 3,142 rows and 35 columns in the dataset

Checks the values datatypes to see if any needs changed

str(acs)

## spc_tbl_ [3,142 × 35] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ census_id     : num [1:3142] 1001 1003 1005 1007 1009 ...
##  $ state         : chr [1:3142] "Alabama" "Alabama" "Alabama" "Alabama" ...
##  $ county        : chr [1:3142] "Autauga" "Baldwin" "Barbour" "Bibb" ...
##  $ total_pop     : num [1:3142] 55221 195121 26932 22604 57710 ...
##  $ men           : num [1:3142] 26745 95314 14497 12073 28512 ...
##  $ women         : num [1:3142] 28476 99807 12435 10531 29198 ...
##  $ hispanic      : num [1:3142] 2.6 4.5 4.6 2.2 8.6 4.4 1.2 3.5 0.4 1.5 ...
##  $ white         : num [1:3142] 75.8 83.1 46.2 74.5 87.9 22.2 53.3 73 57.3 91.7 ...
##  $ black         : num [1:3142] 18.5 9.5 46.7 21.4 1.5 70.7 43.8 20.3 40.3 4.8 ...
##  $ native        : num [1:3142] 0.4 0.6 0.2 0.4 0.3 1.2 0.1 0.2 0.2 0.6 ...
##  $ asian         : num [1:3142] 1 0.7 0.4 0.1 0.1 0.2 0.4 0.9 0.8 0.3 ...
##  $ pacific       : num [1:3142] 0 0 0 0 0 0 0 0 0 0 ...
##  $ citizen       : num [1:3142] 40725 147695 20714 17495 42345 ...
##  $ income        : num [1:3142] 51281 50254 32964 38678 45813 ...
##  $ income_per_cap: num [1:3142] 24974 27317 16824 18431 20532 ...
##  $ poverty       : num [1:3142] 12.9 13.4 26.7 16.8 16.7 24.6 25.4 20.5 21.6 19.2 ...
##  $ child_poverty : num [1:3142] 18.6 19.2 45.3 27.9 27.2 38.4 39.2 31.6 37.2 30.1 ...
##  $ professional  : num [1:3142] 33.2 33.1 26.8 21.5 28.5 18.8 27.5 27.3 23.3 29.3 ...
##  $ service       : num [1:3142] 17 17.7 16.1 17.9 14.1 15 16.6 17.7 14.5 16 ...
##  $ office        : num [1:3142] 24.2 27.1 23.1 17.8 23.9 19.7 21.9 24.2 26.3 19.5 ...
##  $ construction  : num [1:3142] 8.6 10.8 10.8 19 13.5 20.1 10.3 10.5 11.5 13.7 ...
##  $ production    : num [1:3142] 17.1 11.2 23.1 23.7 19.9 26.4 23.7 20.4 24.4 21.5 ...
##  $ drive         : num [1:3142] 87.5 84.7 83.8 83.2 84.9 74.9 84.5 85.3 85.1 83.9 ...
##  $ carpool       : num [1:3142] 8.8 8.8 10.9 13.5 11.2 14.9 12.4 9.4 11.9 12.1 ...
##  $ transit       : num [1:3142] 0.1 0.1 0.4 0.5 0.4 0.7 0 0.2 0.2 0.2 ...
##  $ walk          : num [1:3142] 0.5 1 1.8 0.6 0.9 5 0.8 1.2 0.3 0.6 ...
##  $ other_transp  : num [1:3142] 1.3 1.4 1.5 1.5 0.4 1.7 0.6 1.2 0.4 0.7 ...
##  $ work_at_home  : num [1:3142] 1.8 3.9 1.6 0.7 2.3 2.8 1.7 2.7 2.1 2.5 ...
##  $ mean_commute  : num [1:3142] 26.5 26.4 24.1 28.8 34.9 27.5 24.6 24.1 25.1 27.4 ...
##  $ employed      : num [1:3142] 23986 85953 8597 8294 22189 ...
##  $ private_work  : num [1:3142] 73.6 81.5 71.8 76.8 82 79.5 77.4 74.1 85.1 73.1 ...
##  $ public_work   : num [1:3142] 20.9 12.3 20.8 16.1 13.5 15.1 16.2 20.8 12.1 18.5 ...
##  $ self_employed : num [1:3142] 5.5 5.8 7.3 6.7 4.2 5.4 6.2 5 2.8 7.9 ...
##  $ family_work   : num [1:3142] 0 0.4 0.1 0.4 0.4 0 0.2 0.1 0 0.5 ...
##  $ unemployment  : num [1:3142] 7.6 7.5 17.6 8.3 7.7 18 10.9 12.3 8.9 7.9 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   census_id = col_double(),
##   ..   state = col_character(),
##   ..   county = col_character(),
##   ..   total_pop = col_double(),
##   ..   men = col_double(),
##   ..   women = col_double(),
##   ..   hispanic = col_double(),
##   ..   white = col_double(),
##   ..   black = col_double(),
##   ..   native = col_double(),
##   ..   asian = col_double(),
##   ..   pacific = col_double(),
##   ..   citizen = col_double(),
##   ..   income = col_double(),
##   ..   income_per_cap = col_double(),
##   ..   poverty = col_double(),
##   ..   child_poverty = col_double(),
##   ..   professional = col_double(),
##   ..   service = col_double(),
##   ..   office = col_double(),
##   ..   construction = col_double(),
##   ..   production = col_double(),
##   ..   drive = col_double(),
##   ..   carpool = col_double(),
##   ..   transit = col_double(),
##   ..   walk = col_double(),
##   ..   other_transp = col_double(),
##   ..   work_at_home = col_double(),
##   ..   mean_commute = col_double(),
##   ..   employed = col_double(),
##   ..   private_work = col_double(),
##   ..   public_work = col_double(),
##   ..   self_employed = col_double(),
##   ..   family_work = col_double(),
##   ..   unemployment = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

Convert census_id to factor since it is a unique ID and not an actual number

acs$census_id <- as.factor(acs$census_id)

Convert men, women, and citizen to integer since they should be only whole numbers

acs$men <- as.integer(acs$men)
acs$women <- as.integer(acs$women)
acs$citizen <- as.integer(acs$citizen)

These variables should be whole numbers since you cannot have half a person.

Glimpse Command to show the new variable types

glimpse(acs)

## Rows: 3,142
## Columns: 35
## $ census_id      <fct> 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 1017, 1…
## $ state          <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", …
## $ county         <chr> "Autauga", "Baldwin", "Barbour", "Bibb", "Blount", "Bul…
## $ total_pop      <dbl> 55221, 195121, 26932, 22604, 57710, 10678, 20354, 11664…
## $ men            <int> 26745, 95314, 14497, 12073, 28512, 5660, 9502, 56274, 1…
## $ women          <int> 28476, 99807, 12435, 10531, 29198, 5018, 10852, 60374, …
## $ hispanic       <dbl> 2.6, 4.5, 4.6, 2.2, 8.6, 4.4, 1.2, 3.5, 0.4, 1.5, 7.6, …
## $ white          <dbl> 75.8, 83.1, 46.2, 74.5, 87.9, 22.2, 53.3, 73.0, 57.3, 9…
## $ black          <dbl> 18.5, 9.5, 46.7, 21.4, 1.5, 70.7, 43.8, 20.3, 40.3, 4.8…
## $ native         <dbl> 0.4, 0.6, 0.2, 0.4, 0.3, 1.2, 0.1, 0.2, 0.2, 0.6, 0.4, …
## $ asian          <dbl> 1.0, 0.7, 0.4, 0.1, 0.1, 0.2, 0.4, 0.9, 0.8, 0.3, 0.3, …
## $ pacific        <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, …
## $ citizen        <int> 40725, 147695, 20714, 17495, 42345, 8057, 15581, 88612,…
## $ income         <dbl> 51281, 50254, 32964, 38678, 45813, 31938, 32229, 41703,…
## $ income_per_cap <dbl> 24974, 27317, 16824, 18431, 20532, 17580, 18390, 21374,…
## $ poverty        <dbl> 12.9, 13.4, 26.7, 16.8, 16.7, 24.6, 25.4, 20.5, 21.6, 1…
## $ child_poverty  <dbl> 18.6, 19.2, 45.3, 27.9, 27.2, 38.4, 39.2, 31.6, 37.2, 3…
## $ professional   <dbl> 33.2, 33.1, 26.8, 21.5, 28.5, 18.8, 27.5, 27.3, 23.3, 2…
## $ service        <dbl> 17.0, 17.7, 16.1, 17.9, 14.1, 15.0, 16.6, 17.7, 14.5, 1…
## $ office         <dbl> 24.2, 27.1, 23.1, 17.8, 23.9, 19.7, 21.9, 24.2, 26.3, 1…
## $ construction   <dbl> 8.6, 10.8, 10.8, 19.0, 13.5, 20.1, 10.3, 10.5, 11.5, 13…
## $ production     <dbl> 17.1, 11.2, 23.1, 23.7, 19.9, 26.4, 23.7, 20.4, 24.4, 2…
## $ drive          <dbl> 87.5, 84.7, 83.8, 83.2, 84.9, 74.9, 84.5, 85.3, 85.1, 8…
## $ carpool        <dbl> 8.8, 8.8, 10.9, 13.5, 11.2, 14.9, 12.4, 9.4, 11.9, 12.1…
## $ transit        <dbl> 0.1, 0.1, 0.4, 0.5, 0.4, 0.7, 0.0, 0.2, 0.2, 0.2, 0.2, …
## $ walk           <dbl> 0.5, 1.0, 1.8, 0.6, 0.9, 5.0, 0.8, 1.2, 0.3, 0.6, 1.1, …
## $ other_transp   <dbl> 1.3, 1.4, 1.5, 1.5, 0.4, 1.7, 0.6, 1.2, 0.4, 0.7, 1.4, …
## $ work_at_home   <dbl> 1.8, 3.9, 1.6, 0.7, 2.3, 2.8, 1.7, 2.7, 2.1, 2.5, 1.9, …
## $ mean_commute   <dbl> 26.5, 26.4, 24.1, 28.8, 34.9, 27.5, 24.6, 24.1, 25.1, 2…
## $ employed       <dbl> 23986, 85953, 8597, 8294, 22189, 3865, 7813, 47401, 136…
## $ private_work   <dbl> 73.6, 81.5, 71.8, 76.8, 82.0, 79.5, 77.4, 74.1, 85.1, 7…
## $ public_work    <dbl> 20.9, 12.3, 20.8, 16.1, 13.5, 15.1, 16.2, 20.8, 12.1, 1…
## $ self_employed  <dbl> 5.5, 5.8, 7.3, 6.7, 4.2, 5.4, 6.2, 5.0, 2.8, 7.9, 4.1, …
## $ family_work    <dbl> 0.0, 0.4, 0.1, 0.4, 0.4, 0.0, 0.2, 0.1, 0.0, 0.5, 0.5, …
## $ unemployment   <dbl> 7.6, 7.5, 17.6, 8.3, 7.7, 18.0, 10.9, 12.3, 8.9, 7.9, 9…

Missing values (this code checks for missing values)

colSums(is.na(acs))

##      census_id          state         county      total_pop            men 
##              0              0              0              0              0 
##          women       hispanic          white          black         native 
##              0              0              0              0              0 
##          asian        pacific        citizen         income income_per_cap 
##              0              0              0              1              0 
##        poverty  child_poverty   professional        service         office 
##              0              1              0              0              0 
##   construction     production          drive        carpool        transit 
##              0              0              0              0              0 
##           walk   other_transp   work_at_home   mean_commute       employed 
##              0              0              0              0              0 
##   private_work    public_work  self_employed    family_work   unemployment 
##              0              0              0              0              0

Summary

summary(acs)

##    census_id       state              county            total_pop       
##  1001   :   1   Length:3142        Length:3142        Min.   :      85  
##  1003   :   1   Class :character   Class :character   1st Qu.:   11028  
##  1005   :   1   Mode  :character   Mode  :character   Median :   25768  
##  1007   :   1                                         Mean   :  100737  
##  1009   :   1                                         3rd Qu.:   67552  
##  1011   :   1                                         Max.   :10038388  
##  (Other):3136                                                           
##       men              women            hispanic          white      
##  Min.   :     42   Min.   :     43   Min.   : 0.000   Min.   : 0.90  
##  1st Qu.:   5546   1st Qu.:   5466   1st Qu.: 1.900   1st Qu.:65.60  
##  Median :  12826   Median :  12907   Median : 3.700   Median :84.60  
##  Mean   :  49565   Mean   :  51172   Mean   : 8.826   Mean   :77.28  
##  3rd Qu.:  33319   3rd Qu.:  34122   3rd Qu.: 9.000   3rd Qu.:93.30  
##  Max.   :4945351   Max.   :5093037   Max.   :98.700   Max.   :99.80  
##                                                                      
##      black            native           asian           pacific        
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.00000  
##  1st Qu.: 0.600   1st Qu.: 0.100   1st Qu.: 0.200   1st Qu.: 0.00000  
##  Median : 2.100   Median : 0.300   Median : 0.500   Median : 0.00000  
##  Mean   : 8.879   Mean   : 1.766   Mean   : 1.258   Mean   : 0.08475  
##  3rd Qu.:10.175   3rd Qu.: 0.600   3rd Qu.: 1.200   3rd Qu.: 0.00000  
##  Max.   :85.900   Max.   :92.100   Max.   :41.600   Max.   :35.30000  
##                                                                       
##     citizen            income       income_per_cap     poverty    
##  Min.   :     80   Min.   : 19328   Min.   : 8292   Min.   : 1.4  
##  1st Qu.:   8254   1st Qu.: 38826   1st Qu.:20471   1st Qu.:12.0  
##  Median :  19434   Median : 45111   Median :23577   Median :16.0  
##  Mean   :  70804   Mean   : 46830   Mean   :24338   Mean   :16.7  
##  3rd Qu.:  50728   3rd Qu.: 52250   3rd Qu.:27138   3rd Qu.:20.3  
##  Max.   :6046749   Max.   :123453   Max.   :65600   Max.   :53.3  
##                    NA's   :1                                      
##  child_poverty    professional      service          office     
##  Min.   : 0.00   Min.   :13.50   Min.   : 5.00   Min.   : 4.10  
##  1st Qu.:16.10   1st Qu.:26.70   1st Qu.:15.90   1st Qu.:20.20  
##  Median :22.50   Median :30.00   Median :18.00   Median :22.40  
##  Mean   :23.29   Mean   :31.04   Mean   :18.26   Mean   :22.13  
##  3rd Qu.:29.50   3rd Qu.:34.40   3rd Qu.:20.20   3rd Qu.:24.30  
##  Max.   :72.30   Max.   :74.00   Max.   :36.60   Max.   :35.40  
##  NA's   :1                                                      
##   construction     production        drive          carpool     
##  Min.   : 1.70   Min.   : 0.00   Min.   : 5.20   Min.   : 0.00  
##  1st Qu.: 9.80   1st Qu.:11.53   1st Qu.:76.60   1st Qu.: 8.50  
##  Median :12.20   Median :15.40   Median :80.60   Median : 9.90  
##  Mean   :12.74   Mean   :15.82   Mean   :79.08   Mean   :10.33  
##  3rd Qu.:15.00   3rd Qu.:19.40   3rd Qu.:83.60   3rd Qu.:11.88  
##  Max.   :40.30   Max.   :55.60   Max.   :94.60   Max.   :29.90  
##                                                                 
##     transit             walk         other_transp     work_at_home   
##  Min.   : 0.0000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 0.1000   1st Qu.: 1.400   1st Qu.: 0.900   1st Qu.: 2.800  
##  Median : 0.4000   Median : 2.400   Median : 1.300   Median : 4.000  
##  Mean   : 0.9675   Mean   : 3.307   Mean   : 1.614   Mean   : 4.697  
##  3rd Qu.: 0.8000   3rd Qu.: 4.000   3rd Qu.: 1.900   3rd Qu.: 5.700  
##  Max.   :61.7000   Max.   :71.200   Max.   :39.100   Max.   :37.200  
##                                                                      
##   mean_commute      employed        private_work    public_work   
##  Min.   : 4.90   Min.   :     62   Min.   :25.00   Min.   : 5.80  
##  1st Qu.:19.30   1st Qu.:   4524   1st Qu.:70.90   1st Qu.:13.10  
##  Median :22.90   Median :  10644   Median :75.80   Median :16.10  
##  Mean   :23.15   Mean   :  46387   Mean   :74.44   Mean   :17.35  
##  3rd Qu.:26.60   3rd Qu.:  29254   3rd Qu.:79.80   3rd Qu.:20.10  
##  Max.   :44.00   Max.   :4635465   Max.   :88.30   Max.   :66.20  
##                                                                   
##  self_employed     family_work      unemployment   
##  Min.   : 0.000   Min.   :0.0000   Min.   : 0.000  
##  1st Qu.: 5.400   1st Qu.:0.1000   1st Qu.: 5.500  
##  Median : 6.900   Median :0.2000   Median : 7.500  
##  Mean   : 7.921   Mean   :0.2915   Mean   : 7.815  
##  3rd Qu.: 9.400   3rd Qu.:0.3000   3rd Qu.: 9.700  
##  Max.   :36.600   Max.   :9.8000   Max.   :29.400  
##

After using the summary function, I did not find any unusual variables in this dataset so I left them all as is.

Women vs Men count by County

acs$women_men <- ifelse(acs$men > acs$women, "Men",
                     ifelse(acs$men < acs$women, "Women", "Equal"))

result <- acs[, c("county", "women_men")]
print(result)

## # A tibble: 3,142 × 2
##    county   women_men
##    <chr>    <chr>    
##  1 Autauga  Women    
##  2 Baldwin  Women    
##  3 Barbour  Men      
##  4 Bibb     Men      
##  5 Blount   Women    
##  6 Bullock  Men      
##  7 Butler   Women    
##  8 Calhoun  Women    
##  9 Chambers Women    
## 10 Cherokee Women    
## # ℹ 3,132 more rows

The code above creates a variable that labels if there are more men or more women in a county. Then prints by county if there are more men or women.

Unemployment Rate

num_counties <- acs %>%
  filter(unemployment < 10) %>%
  summarise(total = n())

print(num_counties)

## # A tibble: 1 × 1
##   total
##   <int>
## 1  2420

The code above creates a vector that filters out the counties where unemployment is less than 10% and then prints that number. There are 2,420 counties that have unemployment rates less than 10%.

7 find top 10 counties with highest mean commute

top10_commute <- acs %>%
  top_n(n = 10, wt = mean_commute) %>%
  arrange(desc(mean_commute)) %>%
  select(census_id, county, state, mean_commute)

print(top10_commute)

## # A tibble: 10 × 4
##    census_id county       state         mean_commute
##    <fct>     <chr>        <chr>                <dbl>
##  1 42103     Pike         Pennsylvania          44  
##  2 36005     Bronx        New York              43  
##  3 24017     Charles      Maryland              42.8
##  4 51187     Warren       Virginia              42.7
##  5 36081     Queens       New York              42.6
##  6 36085     Richmond     New York              42.6
##  7 51193     Westmoreland Virginia              42.5
##  8 8093      Park         Colorado              42.4
##  9 36047     Kings        New York              41.7
## 10 54015     Clay         West Virginia         41.4

the code above creates a vector with the top ten counties with the highest mean it then selects the census_id, county, state, and mean commute of those top ten counties. Then that is printed and displayed.

8

create % women

acs_women_percentage <- acs %>%
  mutate(pct_women = women / (women + men) * 100)

get top 10 counties with the lowest % women

lowest10 <- acs_women_percentage %>%
  arrange(pct_women) %>%
  select(census_id, county, state, pct_women) %>%
  head(10)

print(lowest10)

## # A tibble: 10 × 4
##    census_id county                 state        pct_women
##    <fct>     <chr>                  <chr>            <dbl>
##  1 42053     Forest                 Pennsylvania      26.8
##  2 8011      Bent                   Colorado          31.4
##  3 51183     Sussex                 Virginia          31.5
##  4 13309     Wheeler                Georgia           32.1
##  5 6035      Lassen                 California        33.2
##  6 48095     Concho                 Texas             33.3
##  7 13053     Chattahoochee          Georgia           33.4
##  8 2013      Aleutians East Borough Alaska            33.5
##  9 22125     West Feliciana         Louisiana         33.6
## 10 32027     Pershing               Nevada            33.7

The code above creates percentage of women, then arrange the percent of women and selects the census id, county, state, and percentage of women columns and then prints the counties with the lowest percentage of women.

##9

acs_nine <- acs %>%
  mutate(race_sum = hispanic + white + black + native + asian + pacific)

The code above creates a variable called race_sum that calculates the total of the races in this dataset.

9a

lowest10_counties <- acs_nine %>%
  arrange(race_sum) %>%
  select(census_id, county, state, race_sum) %>%
  head(10)

print(lowest10_counties)

## # A tibble: 10 × 4
##    census_id county                   state     race_sum
##    <fct>     <chr>                    <chr>        <dbl>
##  1 15001     Hawaii                   Hawaii        76.4
##  2 15009     Maui                     Hawaii        79.2
##  3 40097     Mayes                    Oklahoma      79.7
##  4 15003     Honolulu                 Hawaii        81.5
##  5 40123     Pontotoc                 Oklahoma      82.8
##  6 47061     Grundy                   Tennessee     83  
##  7 2282      Yakutat City and Borough Alaska        83.4
##  8 40069     Johnston                 Oklahoma      84  
##  9 15007     Kauai                    Hawaii        84.1
## 10 40003     Alfalfa                  Oklahoma      85.1

The code above arranges the new column I created and selects the columns census_id, county, state, and race sum. It then displays the counties with the lowest sum of race percentage.

9b

state_lowest_avg <- acs_nine %>%
  group_by(state) %>%
  summarise(avg_race_sum = mean(race_sum, na.rm = TRUE)) %>%
  arrange(avg_race_sum) %>%
  slice(1)   

print(state_lowest_avg)

## # A tibble: 1 × 2
##   state  avg_race_sum
##   <chr>         <dbl>
## 1 Hawaii           84

The code above displays which state has the lowest race sum out of all the states. The state with the lowest is Hawaii.

9c

any_over_100 <- any(acs_nine$race_sum > 100, na.rm = TRUE)
print(any_over_100)

## [1] TRUE

The code above finds any counties that have a race sum of over 100% and then prints them.

9d

num_states_exact_100 <- acs_nine %>%
  group_by(state) %>%
  filter(race_sum == 100) %>%
  summarise(counties_equal_100 = n()) %>%
  filter(counties_equal_100 > 0) %>%
  nrow()

print(num_states_exact_100)

## [1] 13

The code above groups by state and filters the data to show only instances where race sum is equal to 100. Next it prints the number which is 13 states.

10

acs_ten <- acs %>%
  mutate(carpool_rank = min_rank(desc(carpool)))

This code creates the carpool_rank variable

Top ten counties for carpooling

top10_counties <- acs_ten %>%
  arrange(carpool_rank) %>%
  select(census_id, county, state, carpool, carpool_rank) %>%
  head(10)

top10_counties

## # A tibble: 10 × 5
##    census_id county   state    carpool carpool_rank
##    <fct>     <chr>    <chr>      <dbl>        <int>
##  1 13061     Clay     Georgia     29.9            1
##  2 18087     LaGrange Indiana     27              2
##  3 13165     Jenkins  Georgia     25.3            3
##  4 5133      Sevier   Arkansas    24.4            4
##  5 20175     Seward   Kansas      23.4            5
##  6 48079     Cochran  Texas       22.8            6
##  7 48247     Jim Hogg Texas       22.6            7
##  8 48393     Roberts  Texas       22.4            8
##  9 39075     Holmes   Ohio        21.8            9
## 10 21197     Powell   Kentucky    21.6           10

# bottom 10 counties for carpooling

bottom10_counties <- acs_ten %>%
  arrange(desc(carpool_rank)) %>%
  select(census_id, county, state, carpool, carpool_rank) %>%
  head(10)

bottom10_counties

## # A tibble: 10 × 5
##    census_id county   state        carpool carpool_rank
##    <fct>     <chr>    <chr>          <dbl>        <int>
##  1 48261     Kenedy   Texas            0           3141
##  2 48269     King     Texas            0           3141
##  3 48235     Irion    Texas            0.9         3140
##  4 31183     Wheeler  Nebraska         1.3         3139
##  5 36061     New York New York         1.9         3138
##  6 13309     Wheeler  Georgia          2.3         3136
##  7 38029     Emmons   North Dakota     2.3         3136
##  8 30019     Daniels  Montana          2.6         3134
##  9 31057     Dundy    Nebraska         2.6         3134
## 10 46069     Hyde     South Dakota     2.8         3132

state average rank (lower average = better)

state_avg_rank <- acs_ten %>%
  group_by(state) %>%
  summarise(avg_rank = mean(carpool_rank, na.rm = TRUE)) %>%
  arrange(avg_rank)

state_avg_rank

## # A tibble: 51 × 2
##    state      avg_rank
##    <chr>         <dbl>
##  1 Arizona        971.
##  2 Utah          1019.
##  3 Arkansas      1055.
##  4 Hawaii        1072.
##  5 Alaska        1087.
##  6 Nevada        1100.
##  7 Texas         1106.
##  8 Wyoming       1107.
##  9 California    1122.
## 10 Missouri      1133.
## # ℹ 41 more rows

top 5 states for carpooling

top5_states <- state_avg_rank %>%
  head(5)

top5_states

## # A tibble: 5 × 2
##   state    avg_rank
##   <chr>       <dbl>
## 1 Arizona      971.
## 2 Utah        1019.
## 3 Arkansas    1055.
## 4 Hawaii      1072.
## 5 Alaska      1087.