Introduction

The United States Census Bureau conducts an annual survey called the American Community Survey (ACS) where the bureau contacts over 3 million randomly selected households across the country to gather information on income, sex, race, poverty, employment, transportation, and other variables. The dataset is obtained from Kaggle shows five-year estimates for variables in the dataset for each politically designated county in the United States.

Libraries Used

library(tidyverse) 

Data Importing and Cleaning

1.

acs_2015_county_data_revised.csv has been imported.

# set working directory
path_loc <- "E:/Data_Wrangling/Week 4/Week 4/homework3"
setwd(path_loc)
#import dataset and load to tibble acs
acs<- read_csv("acs_2015_county_data_revised.csv")

2.

Data Type change needs to be done for census_id, county, state columns. Since census_id is a discrete variable we shall update the data type to factor, similarly we can update the data type from character to factor for county & state columns.

#change datatypes
acs <- acs %>% mutate(across(census_id,as.factor))
acs <- acs %>% mutate(across(county,as.factor))
acs <- acs %>% mutate(across(state,as.factor))
glimpse(acs)
## Rows: 3,142
## Columns: 35
## $ census_id      <fct> 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 1017, 1~
## $ state          <fct> Alabama, Alabama, Alabama, Alabama, Alabama, Alabama, A~
## $ county         <fct> Autauga, Baldwin, Barbour, Bibb, Blount, Bullock, Butle~
## $ total_pop      <dbl> 55221, 195121, 26932, 22604, 57710, 10678, 20354, 11664~
## $ men            <dbl> 26745, 95314, 14497, 12073, 28512, 5660, 9502, 56274, 1~
## $ women          <dbl> 28476, 99807, 12435, 10531, 29198, 5018, 10852, 60374, ~
## $ hispanic       <dbl> 2.6, 4.5, 4.6, 2.2, 8.6, 4.4, 1.2, 3.5, 0.4, 1.5, 7.6, ~
## $ white          <dbl> 75.8, 83.1, 46.2, 74.5, 87.9, 22.2, 53.3, 73.0, 57.3, 9~
## $ black          <dbl> 18.5, 9.5, 46.7, 21.4, 1.5, 70.7, 43.8, 20.3, 40.3, 4.8~
## $ native         <dbl> 0.4, 0.6, 0.2, 0.4, 0.3, 1.2, 0.1, 0.2, 0.2, 0.6, 0.4, ~
## $ asian          <dbl> 1.0, 0.7, 0.4, 0.1, 0.1, 0.2, 0.4, 0.9, 0.8, 0.3, 0.3, ~
## $ pacific        <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ~
## $ citizen        <dbl> 40725, 147695, 20714, 17495, 42345, 8057, 15581, 88612,~
## $ income         <dbl> 51281, 50254, 32964, 38678, 45813, 31938, 32229, 41703,~
## $ income_per_cap <dbl> 24974, 27317, 16824, 18431, 20532, 17580, 18390, 21374,~
## $ poverty        <dbl> 12.9, 13.4, 26.7, 16.8, 16.7, 24.6, 25.4, 20.5, 21.6, 1~
## $ child_poverty  <dbl> 18.6, 19.2, 45.3, 27.9, 27.2, 38.4, 39.2, 31.6, 37.2, 3~
## $ professional   <dbl> 33.2, 33.1, 26.8, 21.5, 28.5, 18.8, 27.5, 27.3, 23.3, 2~
## $ service        <dbl> 17.0, 17.7, 16.1, 17.9, 14.1, 15.0, 16.6, 17.7, 14.5, 1~
## $ office         <dbl> 24.2, 27.1, 23.1, 17.8, 23.9, 19.7, 21.9, 24.2, 26.3, 1~
## $ construction   <dbl> 8.6, 10.8, 10.8, 19.0, 13.5, 20.1, 10.3, 10.5, 11.5, 13~
## $ production     <dbl> 17.1, 11.2, 23.1, 23.7, 19.9, 26.4, 23.7, 20.4, 24.4, 2~
## $ drive          <dbl> 87.5, 84.7, 83.8, 83.2, 84.9, 74.9, 84.5, 85.3, 85.1, 8~
## $ carpool        <dbl> 8.8, 8.8, 10.9, 13.5, 11.2, 14.9, 12.4, 9.4, 11.9, 12.1~
## $ transit        <dbl> 0.1, 0.1, 0.4, 0.5, 0.4, 0.7, 0.0, 0.2, 0.2, 0.2, 0.2, ~
## $ walk           <dbl> 0.5, 1.0, 1.8, 0.6, 0.9, 5.0, 0.8, 1.2, 0.3, 0.6, 1.1, ~
## $ other_transp   <dbl> 1.3, 1.4, 1.5, 1.5, 0.4, 1.7, 0.6, 1.2, 0.4, 0.7, 1.4, ~
## $ work_at_home   <dbl> 1.8, 3.9, 1.6, 0.7, 2.3, 2.8, 1.7, 2.7, 2.1, 2.5, 1.9, ~
## $ mean_commute   <dbl> 26.5, 26.4, 24.1, 28.8, 34.9, 27.5, 24.6, 24.1, 25.1, 2~
## $ employed       <dbl> 23986, 85953, 8597, 8294, 22189, 3865, 7813, 47401, 136~
## $ private_work   <dbl> 73.6, 81.5, 71.8, 76.8, 82.0, 79.5, 77.4, 74.1, 85.1, 7~
## $ public_work    <dbl> 20.9, 12.3, 20.8, 16.1, 13.5, 15.1, 16.2, 20.8, 12.1, 1~
## $ self_employed  <dbl> 5.5, 5.8, 7.3, 6.7, 4.2, 5.4, 6.2, 5.0, 2.8, 7.9, 4.1, ~
## $ family_work    <dbl> 0.0, 0.4, 0.1, 0.4, 0.4, 0.0, 0.2, 0.1, 0.0, 0.5, 0.5, ~
## $ unemployment   <dbl> 7.6, 7.5, 17.6, 8.3, 7.7, 18.0, 10.9, 12.3, 8.9, 7.9, 9~

3.

Yes, there are missing values in income & child_poverty fields with 1 record in each field. Since the count of missing data is small compared to size of the dataset we can remove these observations from our analysis. One more approach to handle these missing values could be to impute the mean of the respective columns of the particular state. But considering the small count of missing data we are removing the observations.

#counting missing values
colSums(is.na(acs))
##      census_id          state         county      total_pop            men 
##              0              0              0              0              0 
##          women       hispanic          white          black         native 
##              0              0              0              0              0 
##          asian        pacific        citizen         income income_per_cap 
##              0              0              0              1              0 
##        poverty  child_poverty   professional        service         office 
##              0              1              0              0              0 
##   construction     production          drive        carpool        transit 
##              0              0              0              0              0 
##           walk   other_transp   work_at_home   mean_commute       employed 
##              0              0              0              0              0 
##   private_work    public_work  self_employed    family_work   unemployment 
##              0              0              0              0              0
colnames(acs)[apply(is.na(acs), 2, any)]
## [1] "income"        "child_poverty"
#removing observations with missing values in columns "income" & "child_poverty"
acs <- acs %>% drop_na()
glimpse(acs)
## Rows: 3,140
## Columns: 35
## $ census_id      <fct> 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 1017, 1~
## $ state          <fct> Alabama, Alabama, Alabama, Alabama, Alabama, Alabama, A~
## $ county         <fct> Autauga, Baldwin, Barbour, Bibb, Blount, Bullock, Butle~
## $ total_pop      <dbl> 55221, 195121, 26932, 22604, 57710, 10678, 20354, 11664~
## $ men            <dbl> 26745, 95314, 14497, 12073, 28512, 5660, 9502, 56274, 1~
## $ women          <dbl> 28476, 99807, 12435, 10531, 29198, 5018, 10852, 60374, ~
## $ hispanic       <dbl> 2.6, 4.5, 4.6, 2.2, 8.6, 4.4, 1.2, 3.5, 0.4, 1.5, 7.6, ~
## $ white          <dbl> 75.8, 83.1, 46.2, 74.5, 87.9, 22.2, 53.3, 73.0, 57.3, 9~
## $ black          <dbl> 18.5, 9.5, 46.7, 21.4, 1.5, 70.7, 43.8, 20.3, 40.3, 4.8~
## $ native         <dbl> 0.4, 0.6, 0.2, 0.4, 0.3, 1.2, 0.1, 0.2, 0.2, 0.6, 0.4, ~
## $ asian          <dbl> 1.0, 0.7, 0.4, 0.1, 0.1, 0.2, 0.4, 0.9, 0.8, 0.3, 0.3, ~
## $ pacific        <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ~
## $ citizen        <dbl> 40725, 147695, 20714, 17495, 42345, 8057, 15581, 88612,~
## $ income         <dbl> 51281, 50254, 32964, 38678, 45813, 31938, 32229, 41703,~
## $ income_per_cap <dbl> 24974, 27317, 16824, 18431, 20532, 17580, 18390, 21374,~
## $ poverty        <dbl> 12.9, 13.4, 26.7, 16.8, 16.7, 24.6, 25.4, 20.5, 21.6, 1~
## $ child_poverty  <dbl> 18.6, 19.2, 45.3, 27.9, 27.2, 38.4, 39.2, 31.6, 37.2, 3~
## $ professional   <dbl> 33.2, 33.1, 26.8, 21.5, 28.5, 18.8, 27.5, 27.3, 23.3, 2~
## $ service        <dbl> 17.0, 17.7, 16.1, 17.9, 14.1, 15.0, 16.6, 17.7, 14.5, 1~
## $ office         <dbl> 24.2, 27.1, 23.1, 17.8, 23.9, 19.7, 21.9, 24.2, 26.3, 1~
## $ construction   <dbl> 8.6, 10.8, 10.8, 19.0, 13.5, 20.1, 10.3, 10.5, 11.5, 13~
## $ production     <dbl> 17.1, 11.2, 23.1, 23.7, 19.9, 26.4, 23.7, 20.4, 24.4, 2~
## $ drive          <dbl> 87.5, 84.7, 83.8, 83.2, 84.9, 74.9, 84.5, 85.3, 85.1, 8~
## $ carpool        <dbl> 8.8, 8.8, 10.9, 13.5, 11.2, 14.9, 12.4, 9.4, 11.9, 12.1~
## $ transit        <dbl> 0.1, 0.1, 0.4, 0.5, 0.4, 0.7, 0.0, 0.2, 0.2, 0.2, 0.2, ~
## $ walk           <dbl> 0.5, 1.0, 1.8, 0.6, 0.9, 5.0, 0.8, 1.2, 0.3, 0.6, 1.1, ~
## $ other_transp   <dbl> 1.3, 1.4, 1.5, 1.5, 0.4, 1.7, 0.6, 1.2, 0.4, 0.7, 1.4, ~
## $ work_at_home   <dbl> 1.8, 3.9, 1.6, 0.7, 2.3, 2.8, 1.7, 2.7, 2.1, 2.5, 1.9, ~
## $ mean_commute   <dbl> 26.5, 26.4, 24.1, 28.8, 34.9, 27.5, 24.6, 24.1, 25.1, 2~
## $ employed       <dbl> 23986, 85953, 8597, 8294, 22189, 3865, 7813, 47401, 136~
## $ private_work   <dbl> 73.6, 81.5, 71.8, 76.8, 82.0, 79.5, 77.4, 74.1, 85.1, 7~
## $ public_work    <dbl> 20.9, 12.3, 20.8, 16.1, 13.5, 15.1, 16.2, 20.8, 12.1, 1~
## $ self_employed  <dbl> 5.5, 5.8, 7.3, 6.7, 4.2, 5.4, 6.2, 5.0, 2.8, 7.9, 4.1, ~
## $ family_work    <dbl> 0.0, 0.4, 0.1, 0.4, 0.4, 0.0, 0.2, 0.1, 0.0, 0.5, 0.5, ~
## $ unemployment   <dbl> 7.6, 7.5, 17.6, 8.3, 7.7, 18.0, 10.9, 12.3, 8.9, 7.9, 9~

After removing the missing values we can observe the count of rows has been dropped by 2 from 3142 to 3140.

4.

#Summary of the cleaned data
acs %>% summary()
##    census_id         state             county       total_pop       
##  1001   :   1   Texas   : 253   Washington:  31   Min.   :     267  
##  1003   :   1   Georgia : 159   Jefferson :  26   1st Qu.:   11036  
##  1005   :   1   Virginia: 133   Franklin  :  25   Median :   25793  
##  1007   :   1   Kentucky: 120   Jackson   :  24   Mean   :  100801  
##  1009   :   1   Missouri: 115   Lincoln   :  24   3rd Qu.:   67620  
##  1011   :   1   Kansas  : 105   Madison   :  20   Max.   :10038388  
##  (Other):3134   (Other) :2255   (Other)   :2990                     
##       men              women            hispanic          white      
##  Min.   :    136   Min.   :    131   Min.   : 0.000   Min.   : 0.90  
##  1st Qu.:   5551   1st Qu.:   5488   1st Qu.: 1.900   1st Qu.:65.67  
##  Median :  12838   Median :  12916   Median : 3.700   Median :84.65  
##  Mean   :  49597   Mean   :  51204   Mean   : 8.819   Mean   :77.31  
##  3rd Qu.:  33328   3rd Qu.:  34123   3rd Qu.: 9.000   3rd Qu.:93.33  
##  Max.   :4945351   Max.   :5093037   Max.   :98.700   Max.   :99.80  
##                                                                      
##      black            native           asian           pacific        
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.00000  
##  1st Qu.: 0.600   1st Qu.: 0.100   1st Qu.: 0.200   1st Qu.: 0.00000  
##  Median : 2.100   Median : 0.300   Median : 0.500   Median : 0.00000  
##  Mean   : 8.885   Mean   : 1.763   Mean   : 1.253   Mean   : 0.07357  
##  3rd Qu.:10.200   3rd Qu.: 0.600   3rd Qu.: 1.200   3rd Qu.: 0.00000  
##  Max.   :85.900   Max.   :92.100   Max.   :41.600   Max.   :11.10000  
##                                                                       
##     citizen            income       income_per_cap     poverty    
##  Min.   :    199   Min.   : 19328   Min.   : 8292   Min.   : 1.4  
##  1st Qu.:   8276   1st Qu.: 38826   1st Qu.:20470   1st Qu.:12.0  
##  Median :  19454   Median : 45095   Median :23575   Median :16.0  
##  Mean   :  70849   Mean   : 46824   Mean   :24331   Mean   :16.7  
##  3rd Qu.:  50795   3rd Qu.: 52248   3rd Qu.:27138   3rd Qu.:20.3  
##  Max.   :6046749   Max.   :123453   Max.   :65600   Max.   :53.3  
##                                                                   
##  child_poverty    professional      service          office     
##  Min.   : 0.00   Min.   :13.50   Min.   : 5.00   Min.   : 4.10  
##  1st Qu.:16.10   1st Qu.:26.70   1st Qu.:15.90   1st Qu.:20.20  
##  Median :22.50   Median :30.00   Median :18.00   Median :22.40  
##  Mean   :23.29   Mean   :31.05   Mean   :18.25   Mean   :22.13  
##  3rd Qu.:29.50   3rd Qu.:34.42   3rd Qu.:20.20   3rd Qu.:24.30  
##  Max.   :72.30   Max.   :74.00   Max.   :36.60   Max.   :35.40  
##                                                                 
##   construction     production        drive         carpool     
##  Min.   : 1.70   Min.   : 0.00   Min.   : 5.2   Min.   : 0.00  
##  1st Qu.: 9.80   1st Qu.:11.50   1st Qu.:76.6   1st Qu.: 8.50  
##  Median :12.20   Median :15.40   Median :80.6   Median : 9.90  
##  Mean   :12.75   Mean   :15.82   Mean   :79.1   Mean   :10.33  
##  3rd Qu.:15.00   3rd Qu.:19.40   3rd Qu.:83.6   3rd Qu.:11.90  
##  Max.   :40.30   Max.   :55.60   Max.   :94.6   Max.   :29.90  
##                                                                
##     transit             walk         other_transp    work_at_home   
##  Min.   : 0.0000   Min.   : 0.000   Min.   : 0.00   Min.   : 0.000  
##  1st Qu.: 0.1000   1st Qu.: 1.400   1st Qu.: 0.90   1st Qu.: 2.800  
##  Median : 0.4000   Median : 2.400   Median : 1.30   Median : 4.000  
##  Mean   : 0.9681   Mean   : 3.294   Mean   : 1.61   Mean   : 4.694  
##  3rd Qu.: 0.8000   3rd Qu.: 4.000   3rd Qu.: 1.90   3rd Qu.: 5.700  
##  Max.   :61.7000   Max.   :71.200   Max.   :39.10   Max.   :37.200  
##                                                                     
##   mean_commute      employed        private_work    public_work   
##  Min.   : 4.90   Min.   :    166   Min.   :29.50   Min.   : 5.80  
##  1st Qu.:19.30   1st Qu.:   4532   1st Qu.:70.90   1st Qu.:13.07  
##  Median :22.90   Median :  10657   Median :75.85   Median :16.10  
##  Mean   :23.15   Mean   :  46416   Mean   :74.45   Mean   :17.33  
##  3rd Qu.:26.60   3rd Qu.:  29272   3rd Qu.:79.80   3rd Qu.:20.10  
##  Max.   :44.00   Max.   :4635465   Max.   :88.30   Max.   :66.20  
##                                                                   
##  self_employed     family_work      unemployment   
##  Min.   : 0.000   Min.   :0.0000   Min.   : 0.000  
##  1st Qu.: 5.400   1st Qu.:0.1000   1st Qu.: 5.500  
##  Median : 6.900   Median :0.2000   Median : 7.500  
##  Mean   : 7.922   Mean   :0.2917   Mean   : 7.815  
##  3rd Qu.: 9.400   3rd Qu.:0.3000   3rd Qu.: 9.700  
##  Max.   :36.600   Max.   :9.8000   Max.   :29.400  
## 

After observing the summary we can see the mean & median values is far from each other for few fields for example, total_pop , men , women , hispanic , black , citizen. This suggests there are outlines in the data and that the data is not distributed equally across all the categories of people.

Data Manipulation and Insights

5.

women_summary <- acs %>% 
  mutate(womengreaterthanmen = women > men) %>% 
  summarise(womgrmen_county_count = sum(womengreaterthanmen))
knitr::kable(women_summary)
womgrmen_county_count
1984

There are 1984 counties that have more women than men.

6.

emp_summary <- acs %>% 
  mutate(unemployment_10below = unemployment < 10) %>% 
  summarise(low_umemprate_county_count = sum(unemployment_10below))
knitr::kable(emp_summary)
low_umemprate_county_count
2419

There are 2419 counties that have unemployment rate less than 10%

7.

highest_mean_commute_tbl <- acs %>% 
  select(census_id,county,state,mean_commute) %>% 
  arrange(desc(mean_commute)) %>% 
  top_n(10)
knitr::kable(highest_mean_commute_tbl)
census_id county state mean_commute
42103 Pike Pennsylvania 44.0
36005 Bronx New York 43.0
24017 Charles Maryland 42.8
51187 Warren Virginia 42.7
36081 Queens New York 42.6
36085 Richmond New York 42.6
51193 Westmoreland Virginia 42.5
8093 Park Colorado 42.4
36047 Kings New York 41.7
54015 Clay West Virginia 41.4

Above table contains the top 10 counties with highest mean commute.

8.

pct_women_summary <- acs %>% 
  mutate(pct_women = round((women / total_pop)*100,2)) %>% 
  select(census_id,county,state,pct_women) %>% 
  arrange(pct_women) %>% 
  top_n(-10)
knitr::kable(pct_women_summary)
census_id county state pct_women
42053 Forest Pennsylvania 26.78
8011 Bent Colorado 31.37
51183 Sussex Virginia 31.47
13309 Wheeler Georgia 32.10
6035 Lassen California 33.17
48095 Concho Texas 33.28
13053 Chattahoochee Georgia 33.36
2013 Aleutians East Borough Alaska 33.47
22125 West Feliciana Louisiana 33.65
32027 Pershing Nevada 33.73

Above table shows the top 10 counties with lowest percentage of women.

9(a).

race_pct_tbl <- acs %>% 
  mutate(race_pct = hispanic + white + black + native + asian + pacific) %>% 
  select(census_id,county,state,race_pct) %>% 
  arrange(race_pct) %>% 
  top_n(-10)
knitr::kable(race_pct_tbl)
census_id county state race_pct
15001 Hawaii Hawaii 76.4
15009 Maui Hawaii 79.2
40097 Mayes Oklahoma 79.7
15003 Honolulu Hawaii 81.5
40123 Pontotoc Oklahoma 82.8
47061 Grundy Tennessee 83.0
2282 Yakutat City and Borough Alaska 83.4
40069 Johnston Oklahoma 84.0
15007 Kauai Hawaii 84.1
40003 Alfalfa Oklahoma 85.1

Above table shows the top 10 counties with lowest sum of race percentage variables.

9(b).

mean_race_pct_summ <- acs %>% 
  group_by(state) %>% 
  mutate(race_pct = hispanic + white + black + native + asian + pacific) %>% 
  summarise(mean_race_pct = mean(race_pct)) %>% 
  arrange(mean_race_pct) %>% 
  top_n(-1)
knitr::kable(mean_race_pct_summ)
state mean_race_pct
Hawaii 80.3

On an average, state of Hawaii has the lowest sum of race percentage variables at 80.3.

9(c).

race_pct_100_above <- acs %>% 
  mutate(race_pct = round((hispanic + white + black + native + asian + pacific),2)) %>%
  select(census_id,county,state,race_pct) %>% 
  filter(race_pct > 100)
knitr::kable(race_pct_100_above)
census_id county state race_pct
31073 Gosper Nebraska 100.1
31091 Hooker Nebraska 100.1
31125 Nance Nebraska 100.1
48017 Bailey Texas 100.1
48137 Edwards Texas 100.1

Yes, there are 5 counties that have sum of race percentage variables greater than 100%. Above are the details.

9(d).

race_pct_100 <- acs %>% 
  mutate(race_pct = round((hispanic + white + black + native + asian + pacific),2)) %>%
  select(census_id,county,state,race_pct) %>% 
  filter(race_pct==100)
knitr::kable(race_pct_100)
census_id county state race_pct
1065 Hale Alabama 100
1131 Wilcox Alabama 100
13201 Miller Georgia 100
13307 Webster Georgia 100
20199 Wallace Kansas 100
21031 Butler Kentucky 100
28021 Claiborne Mississippi 100
28125 Sharkey Mississippi 100
30019 Daniels Montana 100
30069 Petroleum Montana 100
30109 Wibaux Montana 100
31075 Grant Nebraska 100
31163 Sherman Nebraska 100
31183 Wheeler Nebraska 100
32011 Eureka Nevada 100
35033 Mora New Mexico 100
37095 Hyde North Carolina 100
38091 Steele North Dakota 100
41021 Gilliam Oregon 100
46049 Faulk South Dakota 100
46075 Jones South Dakota 100
48047 Brooks Texas 100
48127 Dimmit Texas 100
48131 Duval Texas 100
48173 Glasscock Texas 100
48261 Kenedy Texas 100
48263 Kent Texas 100
48269 King Texas 100
48271 Kinney Texas 100
48311 McMullen Texas 100
48319 Mason Texas 100
48369 Parmer Texas 100
48377 Presidio Texas 100
48443 Terrell Texas 100
48479 Webb Texas 100
48505 Zapata Texas 100
49001 Beaver Utah 100
54031 Hardy West Virginia 100

Yes, there are 38 counties that have sum of race percentage variables exactly equal to 100%. Above are the details.

10(a/b).

car_pool_rank_tbl <- acs %>% 
  mutate(carpool_rank = min_rank(-carpool)) %>% 
  select(census_id,county,state,carpool,carpool_rank) %>% 
  arrange(carpool_rank) %>%
  top_n(-10)
knitr::kable(car_pool_rank_tbl)
census_id county state carpool carpool_rank
13061 Clay Georgia 29.9 1
18087 LaGrange Indiana 27.0 2
13165 Jenkins Georgia 25.3 3
5133 Sevier Arkansas 24.4 4
20175 Seward Kansas 23.4 5
48079 Cochran Texas 22.8 6
48247 Jim Hogg Texas 22.6 7
48393 Roberts Texas 22.4 8
39075 Holmes Ohio 21.8 9
21197 Powell Kentucky 21.6 10

Above table shows the 10 highest ranked counties for carpooling.

10(c).

car_pool_rank_tbl_1 <- acs %>% 
  mutate(carpool_rank = min_rank(-carpool)) %>% 
  select(census_id,county,state,carpool,carpool_rank) %>% 
  arrange(carpool_rank) %>%
  top_n(10)
# calculating reverse
rev_data_frame <- apply(car_pool_rank_tbl_1, 2, rev)
# converting the result to dataframe
rev_data_frame <- as.data.frame(rev_data_frame)
knitr::kable(rev_data_frame)
census_id county state carpool carpool_rank
48269 King Texas 0.0 3139
48261 Kenedy Texas 0.0 3139
48235 Irion Texas 0.9 3138
31183 Wheeler Nebraska 1.3 3137
36061 New York New York 1.9 3136
38029 Emmons North Dakota 2.3 3134
13309 Wheeler Georgia 2.3 3134
31057 Dundy Nebraska 2.6 3132
30019 Daniels Montana 2.6 3132
51720 Norton city Virginia 2.8 3130
46069 Hyde South Dakota 2.8 3130

Above table shows the 10 lowest ranked counties for carpooling.

10(d).

car_pool_rank_state <- acs %>% 
  group_by(state) %>% 
  summarise(mean_carpool = round(mean(carpool),2)) %>% 
  mutate(carpool_rank = min_rank(-mean_carpool)) %>% 
  arrange(carpool_rank) %>% 
  top_n(-1)
knitr::kable(car_pool_rank_state)
state mean_carpool carpool_rank
Hawaii 12.75 1

On average, the state of Hawaii is best ranked for carpooling.

10(e).

car_pool_rank_summ <- acs %>% 
  group_by(state) %>% 
  summarise(mean_carpool = round(mean(carpool),2)) %>%  
  mutate(carpool_rank = min_rank(-mean_carpool)) %>% 
  arrange(carpool_rank) %>% 
  top_n(-5)
knitr::kable(car_pool_rank_summ)
state mean_carpool carpool_rank
Hawaii 12.75 1
Alaska 12.13 2
Arkansas 11.90 3
Utah 11.87 4
Texas 11.83 5

Above table shows the top 5 ranked states for carpooling.