Importing Data and Data Cleaning
Import the package
library(tidyverse)
## -- Attaching packages ---------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.5
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Reading the data:
acs_data <- read_csv("C:/Users/rohit/OneDrive/Desktop/acs_2015_county_data_revised.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## state = col_character(),
## county = col_character()
## )
## See spec(...) for full column specifications.
head(acs_data,6)
## # A tibble: 6 x 35
## census_id state county total_pop men women hispanic white black native asian
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1001 Alab~ Autau~ 55221 26745 28476 2.6 75.8 18.5 0.4 1
## 2 1003 Alab~ Baldw~ 195121 95314 99807 4.5 83.1 9.5 0.6 0.7
## 3 1005 Alab~ Barbo~ 26932 14497 12435 4.6 46.2 46.7 0.2 0.4
## 4 1007 Alab~ Bibb 22604 12073 10531 2.2 74.5 21.4 0.4 0.1
## 5 1009 Alab~ Blount 57710 28512 29198 8.6 87.9 1.5 0.3 0.1
## 6 1011 Alab~ Bullo~ 10678 5660 5018 4.4 22.2 70.7 1.2 0.2
## # ... with 24 more variables: pacific <dbl>, citizen <dbl>, income <dbl>,
## # income_per_cap <dbl>, poverty <dbl>, child_poverty <dbl>,
## # professional <dbl>, service <dbl>, office <dbl>, construction <dbl>,
## # production <dbl>, drive <dbl>, carpool <dbl>, transit <dbl>, walk <dbl>,
## # other_transp <dbl>, work_at_home <dbl>, mean_commute <dbl>, employed <dbl>,
## # private_work <dbl>, public_work <dbl>, self_employed <dbl>,
## # family_work <dbl>, unemployment <dbl>
dim(acs_data)
## [1] 3142 35
We can see that we have 3142 rows and 35 columns in this dataset.
We’ll take a look at the datatypes using str() command
str(acs_data)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 3142 obs. of 35 variables:
## $ census_id : num 1001 1003 1005 1007 1009 ...
## $ state : chr "Alabama" "Alabama" "Alabama" "Alabama" ...
## $ county : chr "Autauga" "Baldwin" "Barbour" "Bibb" ...
## $ total_pop : num 55221 195121 26932 22604 57710 ...
## $ men : num 26745 95314 14497 12073 28512 ...
## $ women : num 28476 99807 12435 10531 29198 ...
## $ hispanic : num 2.6 4.5 4.6 2.2 8.6 4.4 1.2 3.5 0.4 1.5 ...
## $ white : num 75.8 83.1 46.2 74.5 87.9 22.2 53.3 73 57.3 91.7 ...
## $ black : num 18.5 9.5 46.7 21.4 1.5 70.7 43.8 20.3 40.3 4.8 ...
## $ native : num 0.4 0.6 0.2 0.4 0.3 1.2 0.1 0.2 0.2 0.6 ...
## $ asian : num 1 0.7 0.4 0.1 0.1 0.2 0.4 0.9 0.8 0.3 ...
## $ pacific : num 0 0 0 0 0 0 0 0 0 0 ...
## $ citizen : num 40725 147695 20714 17495 42345 ...
## $ income : num 51281 50254 32964 38678 45813 ...
## $ income_per_cap: num 24974 27317 16824 18431 20532 ...
## $ poverty : num 12.9 13.4 26.7 16.8 16.7 24.6 25.4 20.5 21.6 19.2 ...
## $ child_poverty : num 18.6 19.2 45.3 27.9 27.2 38.4 39.2 31.6 37.2 30.1 ...
## $ professional : num 33.2 33.1 26.8 21.5 28.5 18.8 27.5 27.3 23.3 29.3 ...
## $ service : num 17 17.7 16.1 17.9 14.1 15 16.6 17.7 14.5 16 ...
## $ office : num 24.2 27.1 23.1 17.8 23.9 19.7 21.9 24.2 26.3 19.5 ...
## $ construction : num 8.6 10.8 10.8 19 13.5 20.1 10.3 10.5 11.5 13.7 ...
## $ production : num 17.1 11.2 23.1 23.7 19.9 26.4 23.7 20.4 24.4 21.5 ...
## $ drive : num 87.5 84.7 83.8 83.2 84.9 74.9 84.5 85.3 85.1 83.9 ...
## $ carpool : num 8.8 8.8 10.9 13.5 11.2 14.9 12.4 9.4 11.9 12.1 ...
## $ transit : num 0.1 0.1 0.4 0.5 0.4 0.7 0 0.2 0.2 0.2 ...
## $ walk : num 0.5 1 1.8 0.6 0.9 5 0.8 1.2 0.3 0.6 ...
## $ other_transp : num 1.3 1.4 1.5 1.5 0.4 1.7 0.6 1.2 0.4 0.7 ...
## $ work_at_home : num 1.8 3.9 1.6 0.7 2.3 2.8 1.7 2.7 2.1 2.5 ...
## $ mean_commute : num 26.5 26.4 24.1 28.8 34.9 27.5 24.6 24.1 25.1 27.4 ...
## $ employed : num 23986 85953 8597 8294 22189 ...
## $ private_work : num 73.6 81.5 71.8 76.8 82 79.5 77.4 74.1 85.1 73.1 ...
## $ public_work : num 20.9 12.3 20.8 16.1 13.5 15.1 16.2 20.8 12.1 18.5 ...
## $ self_employed : num 5.5 5.8 7.3 6.7 4.2 5.4 6.2 5 2.8 7.9 ...
## $ family_work : num 0 0.4 0.1 0.4 0.4 0 0.2 0.1 0 0.5 ...
## $ unemployment : num 7.6 7.5 17.6 8.3 7.7 18 10.9 12.3 8.9 7.9 ...
## - attr(*, "spec")=
## .. cols(
## .. census_id = col_double(),
## .. state = col_character(),
## .. county = col_character(),
## .. total_pop = col_double(),
## .. men = col_double(),
## .. women = col_double(),
## .. hispanic = col_double(),
## .. white = col_double(),
## .. black = col_double(),
## .. native = col_double(),
## .. asian = col_double(),
## .. pacific = col_double(),
## .. citizen = col_double(),
## .. income = col_double(),
## .. income_per_cap = col_double(),
## .. poverty = col_double(),
## .. child_poverty = col_double(),
## .. professional = col_double(),
## .. service = col_double(),
## .. office = col_double(),
## .. construction = col_double(),
## .. production = col_double(),
## .. drive = col_double(),
## .. carpool = col_double(),
## .. transit = col_double(),
## .. walk = col_double(),
## .. other_transp = col_double(),
## .. work_at_home = col_double(),
## .. mean_commute = col_double(),
## .. employed = col_double(),
## .. private_work = col_double(),
## .. public_work = col_double(),
## .. self_employed = col_double(),
## .. family_work = col_double(),
## .. unemployment = col_double()
## .. )
I’ll make the state and country as factors as that would be more relevant.
acs_data$state <- as.factor(acs_data$state)
acs_data$county <- as.factor(acs_data$county)
glimpse(acs_data)
## Observations: 3,142
## Variables: 35
## $ census_id <dbl> 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 1017...
## $ state <fct> Alabama, Alabama, Alabama, Alabama, Alabama, Alabama...
## $ county <fct> Autauga, Baldwin, Barbour, Bibb, Blount, Bullock, Bu...
## $ total_pop <dbl> 55221, 195121, 26932, 22604, 57710, 10678, 20354, 11...
## $ men <dbl> 26745, 95314, 14497, 12073, 28512, 5660, 9502, 56274...
## $ women <dbl> 28476, 99807, 12435, 10531, 29198, 5018, 10852, 6037...
## $ hispanic <dbl> 2.6, 4.5, 4.6, 2.2, 8.6, 4.4, 1.2, 3.5, 0.4, 1.5, 7....
## $ white <dbl> 75.8, 83.1, 46.2, 74.5, 87.9, 22.2, 53.3, 73.0, 57.3...
## $ black <dbl> 18.5, 9.5, 46.7, 21.4, 1.5, 70.7, 43.8, 20.3, 40.3, ...
## $ native <dbl> 0.4, 0.6, 0.2, 0.4, 0.3, 1.2, 0.1, 0.2, 0.2, 0.6, 0....
## $ asian <dbl> 1.0, 0.7, 0.4, 0.1, 0.1, 0.2, 0.4, 0.9, 0.8, 0.3, 0....
## $ pacific <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0....
## $ citizen <dbl> 40725, 147695, 20714, 17495, 42345, 8057, 15581, 886...
## $ income <dbl> 51281, 50254, 32964, 38678, 45813, 31938, 32229, 417...
## $ income_per_cap <dbl> 24974, 27317, 16824, 18431, 20532, 17580, 18390, 213...
## $ poverty <dbl> 12.9, 13.4, 26.7, 16.8, 16.7, 24.6, 25.4, 20.5, 21.6...
## $ child_poverty <dbl> 18.6, 19.2, 45.3, 27.9, 27.2, 38.4, 39.2, 31.6, 37.2...
## $ professional <dbl> 33.2, 33.1, 26.8, 21.5, 28.5, 18.8, 27.5, 27.3, 23.3...
## $ service <dbl> 17.0, 17.7, 16.1, 17.9, 14.1, 15.0, 16.6, 17.7, 14.5...
## $ office <dbl> 24.2, 27.1, 23.1, 17.8, 23.9, 19.7, 21.9, 24.2, 26.3...
## $ construction <dbl> 8.6, 10.8, 10.8, 19.0, 13.5, 20.1, 10.3, 10.5, 11.5,...
## $ production <dbl> 17.1, 11.2, 23.1, 23.7, 19.9, 26.4, 23.7, 20.4, 24.4...
## $ drive <dbl> 87.5, 84.7, 83.8, 83.2, 84.9, 74.9, 84.5, 85.3, 85.1...
## $ carpool <dbl> 8.8, 8.8, 10.9, 13.5, 11.2, 14.9, 12.4, 9.4, 11.9, 1...
## $ transit <dbl> 0.1, 0.1, 0.4, 0.5, 0.4, 0.7, 0.0, 0.2, 0.2, 0.2, 0....
## $ walk <dbl> 0.5, 1.0, 1.8, 0.6, 0.9, 5.0, 0.8, 1.2, 0.3, 0.6, 1....
## $ other_transp <dbl> 1.3, 1.4, 1.5, 1.5, 0.4, 1.7, 0.6, 1.2, 0.4, 0.7, 1....
## $ work_at_home <dbl> 1.8, 3.9, 1.6, 0.7, 2.3, 2.8, 1.7, 2.7, 2.1, 2.5, 1....
## $ mean_commute <dbl> 26.5, 26.4, 24.1, 28.8, 34.9, 27.5, 24.6, 24.1, 25.1...
## $ employed <dbl> 23986, 85953, 8597, 8294, 22189, 3865, 7813, 47401, ...
## $ private_work <dbl> 73.6, 81.5, 71.8, 76.8, 82.0, 79.5, 77.4, 74.1, 85.1...
## $ public_work <dbl> 20.9, 12.3, 20.8, 16.1, 13.5, 15.1, 16.2, 20.8, 12.1...
## $ self_employed <dbl> 5.5, 5.8, 7.3, 6.7, 4.2, 5.4, 6.2, 5.0, 2.8, 7.9, 4....
## $ family_work <dbl> 0.0, 0.4, 0.1, 0.4, 0.4, 0.0, 0.2, 0.1, 0.0, 0.5, 0....
## $ unemployment <dbl> 7.6, 7.5, 17.6, 8.3, 7.7, 18.0, 10.9, 12.3, 8.9, 7.9...
All the variable look appropriate now.
sum(is.na(acs_data))
## [1] 2
apply(is.na(acs_data),2,sum)
## census_id state county total_pop men
## 0 0 0 0 0
## women hispanic white black native
## 0 0 0 0 0
## asian pacific citizen income income_per_cap
## 0 0 0 1 0
## poverty child_poverty professional service office
## 0 1 0 0 0
## construction production drive carpool transit
## 0 0 0 0 0
## walk other_transp work_at_home mean_commute employed
## 0 0 0 0 0
## private_work public_work self_employed family_work unemployment
## 0 0 0 0 0
As seen there are missing values in ‘child_poverty’ and ‘income’ fields and both have one missing values.
Imputing values in such a small dataset wont make sense, so I’ll remove those observations.
acs_data <- drop_na(acs_data)
sum(is.na(acs_data))
## [1] 0
summary(acs_data)
## census_id state county total_pop
## Min. : 1001 Texas : 253 Washington: 31 Min. : 267
## 1st Qu.:18179 Georgia : 159 Jefferson : 26 1st Qu.: 11036
## Median :29176 Virginia: 133 Franklin : 25 Median : 25793
## Mean :30383 Kentucky: 120 Jackson : 24 Mean : 100801
## 3rd Qu.:45080 Missouri: 115 Lincoln : 24 3rd Qu.: 67620
## Max. :56045 Kansas : 105 Madison : 20 Max. :10038388
## (Other) :2255 (Other) :2990
## men women hispanic white
## Min. : 136 Min. : 131 Min. : 0.000 Min. : 0.90
## 1st Qu.: 5551 1st Qu.: 5488 1st Qu.: 1.900 1st Qu.:65.67
## Median : 12838 Median : 12916 Median : 3.700 Median :84.65
## Mean : 49597 Mean : 51204 Mean : 8.819 Mean :77.31
## 3rd Qu.: 33328 3rd Qu.: 34123 3rd Qu.: 9.000 3rd Qu.:93.33
## Max. :4945351 Max. :5093037 Max. :98.700 Max. :99.80
##
## black native asian pacific
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.00000
## 1st Qu.: 0.600 1st Qu.: 0.100 1st Qu.: 0.200 1st Qu.: 0.00000
## Median : 2.100 Median : 0.300 Median : 0.500 Median : 0.00000
## Mean : 8.885 Mean : 1.763 Mean : 1.253 Mean : 0.07357
## 3rd Qu.:10.200 3rd Qu.: 0.600 3rd Qu.: 1.200 3rd Qu.: 0.00000
## Max. :85.900 Max. :92.100 Max. :41.600 Max. :11.10000
##
## citizen income income_per_cap poverty
## Min. : 199 Min. : 19328 Min. : 8292 Min. : 1.4
## 1st Qu.: 8276 1st Qu.: 38826 1st Qu.:20470 1st Qu.:12.0
## Median : 19454 Median : 45095 Median :23575 Median :16.0
## Mean : 70849 Mean : 46824 Mean :24331 Mean :16.7
## 3rd Qu.: 50795 3rd Qu.: 52248 3rd Qu.:27138 3rd Qu.:20.3
## Max. :6046749 Max. :123453 Max. :65600 Max. :53.3
##
## child_poverty professional service office
## Min. : 0.00 Min. :13.50 Min. : 5.00 Min. : 4.10
## 1st Qu.:16.10 1st Qu.:26.70 1st Qu.:15.90 1st Qu.:20.20
## Median :22.50 Median :30.00 Median :18.00 Median :22.40
## Mean :23.29 Mean :31.05 Mean :18.25 Mean :22.13
## 3rd Qu.:29.50 3rd Qu.:34.42 3rd Qu.:20.20 3rd Qu.:24.30
## Max. :72.30 Max. :74.00 Max. :36.60 Max. :35.40
##
## construction production drive carpool
## Min. : 1.70 Min. : 0.00 Min. : 5.2 Min. : 0.00
## 1st Qu.: 9.80 1st Qu.:11.50 1st Qu.:76.6 1st Qu.: 8.50
## Median :12.20 Median :15.40 Median :80.6 Median : 9.90
## Mean :12.75 Mean :15.82 Mean :79.1 Mean :10.33
## 3rd Qu.:15.00 3rd Qu.:19.40 3rd Qu.:83.6 3rd Qu.:11.90
## Max. :40.30 Max. :55.60 Max. :94.6 Max. :29.90
##
## transit walk other_transp work_at_home
## Min. : 0.0000 Min. : 0.000 Min. : 0.00 Min. : 0.000
## 1st Qu.: 0.1000 1st Qu.: 1.400 1st Qu.: 0.90 1st Qu.: 2.800
## Median : 0.4000 Median : 2.400 Median : 1.30 Median : 4.000
## Mean : 0.9681 Mean : 3.294 Mean : 1.61 Mean : 4.694
## 3rd Qu.: 0.8000 3rd Qu.: 4.000 3rd Qu.: 1.90 3rd Qu.: 5.700
## Max. :61.7000 Max. :71.200 Max. :39.10 Max. :37.200
##
## mean_commute employed private_work public_work
## Min. : 4.90 Min. : 166 Min. :29.50 Min. : 5.80
## 1st Qu.:19.30 1st Qu.: 4532 1st Qu.:70.90 1st Qu.:13.07
## Median :22.90 Median : 10657 Median :75.85 Median :16.10
## Mean :23.15 Mean : 46416 Mean :74.45 Mean :17.33
## 3rd Qu.:26.60 3rd Qu.: 29272 3rd Qu.:79.80 3rd Qu.:20.10
## Max. :44.00 Max. :4635465 Max. :88.30 Max. :66.20
##
## self_employed family_work unemployment
## Min. : 0.000 Min. :0.0000 Min. : 0.000
## 1st Qu.: 5.400 1st Qu.:0.1000 1st Qu.: 5.500
## Median : 6.900 Median :0.2000 Median : 7.500
## Mean : 7.922 Mean :0.2917 Mean : 7.815
## 3rd Qu.: 9.400 3rd Qu.:0.3000 3rd Qu.: 9.700
## Max. :36.600 Max. :9.8000 Max. :29.400
##
As visible from the summary() function, we have a columns with skewed variables, indicating outliers. We would need to perform additional analysis before we can make any solid conclusions.
nrow(subset(acs_data, women > men))
## [1] 1984
There are 1984 counties with more women than men.
6.How many counties have an unemployment rate lower than 10%?
nrow(subset(acs_data, unemployment < 10.0))
## [1] 2419
There are 2419 counties that have unemployment less than 10%.
acs_data %>%
arrange(desc(mean_commute)) %>%
select(census_id, state, county, mean_commute) %>%
top_n(n = 10)
## Selecting by mean_commute
## # A tibble: 10 x 4
## census_id state county mean_commute
## <dbl> <fct> <fct> <dbl>
## 1 42103 Pennsylvania Pike 44
## 2 36005 New York Bronx 43
## 3 24017 Maryland Charles 42.8
## 4 51187 Virginia Warren 42.7
## 5 36081 New York Queens 42.6
## 6 36085 New York Richmond 42.6
## 7 51193 Virginia Westmoreland 42.5
## 8 8093 Colorado Park 42.4
## 9 36047 New York Kings 41.7
## 10 54015 West Virginia Clay 41.4
Top 10 counties with the highest mean commute are shown above in desc order.
acs_data$Women_percentage = (acs_data$women/acs_data$total_pop)*100
women_data <- acs_data[order(acs_data$Women_percentage),]
women_data <- select(women_data,census_id,state,county,Women_percentage)
head(women_data,10)
## # A tibble: 10 x 4
## census_id state county Women_percentage
## <dbl> <fct> <fct> <dbl>
## 1 42053 Pennsylvania Forest 26.8
## 2 8011 Colorado Bent 31.4
## 3 51183 Virginia Sussex 31.5
## 4 13309 Georgia Wheeler 32.1
## 5 6035 California Lassen 33.2
## 6 48095 Texas Concho 33.3
## 7 13053 Georgia Chattahoochee 33.4
## 8 2013 Alaska Aleutians East Borough 33.5
## 9 22125 Louisiana West Feliciana 33.6
## 10 32027 Nevada Pershing 33.7
acs_data <- mutate(acs_data, race_percent = hispanic + white + black + native + asian + pacific)
acs_data %>%
select(race_percent, everything()) %>%
arrange(race_percent) %>%
top_n(10)
## Selecting by Women_percentage
## # A tibble: 10 x 37
## race_percent census_id state county total_pop men women hispanic white
## <dbl> <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 95.3 35011 New ~ De Ba~ 2020 907 1113 45.1 50.2
## 2 97.3 51790 Virg~ Staun~ 24193 10861 13332 2.6 81.6
## 3 97.5 13235 Geor~ Pulas~ 11590 4866 6724 4.7 59.1
## 4 98.3 51720 Virg~ Norto~ 4007 1626 2381 2.4 86.6
## 5 98.4 51620 Virg~ Frank~ 8457 3766 4691 1 38.2
## 6 99 29117 Miss~ Livin~ 15042 6787 8255 1.5 93.4
## 7 99.7 1119 Alab~ Sumter 13341 5905 7436 0.4 24.8
## 8 99.9 51091 Virg~ Highl~ 2244 1003 1241 0 99.5
## 9 100 28125 Miss~ Shark~ 4805 2139 2666 0.6 27.7
## 10 100. 48137 Texas Edwar~ 1906 854 1052 51 48.8
## # ... with 28 more variables: black <dbl>, native <dbl>, asian <dbl>,
## # pacific <dbl>, citizen <dbl>, income <dbl>, income_per_cap <dbl>,
## # poverty <dbl>, child_poverty <dbl>, professional <dbl>, service <dbl>,
## # office <dbl>, construction <dbl>, production <dbl>, drive <dbl>,
## # carpool <dbl>, transit <dbl>, walk <dbl>, other_transp <dbl>,
## # work_at_home <dbl>, mean_commute <dbl>, employed <dbl>, private_work <dbl>,
## # public_work <dbl>, self_employed <dbl>, family_work <dbl>,
## # unemployment <dbl>, Women_percentage <dbl>
The top 10 counties with the lowest sum of these race percentage variables are shown above.
acs_data %>%
select(race_percent, everything()) %>%
group_by(state) %>%
summarise(average_rp = mean(race_percent)) %>%
arrange(average_rp) %>%
top_n(1)
## Selecting by average_rp
## # A tibble: 1 x 2
## state average_rp
## <fct> <dbl>
## 1 Mississippi 99.2
acs_data %>%
filter(race_percent > 100) %>%
nrow()
## [1] 11
As seen above, there are 11 obs that have sum greater than 100%
acs_data %>%
select(race_percent, everything()) %>%
group_by(state) %>%
filter(race_percent == 100) %>%
nrow()
## [1] 27
acs_data$carpool_rank = min_rank(-(acs_data$carpool))
acs_carpool_data = acs_data[order(acs_data$carpool_rank),]
acs_carpool <- select(acs_carpool_data,census_id, county,state, carpool, carpool_rank)
head(acs_carpool,10)
## # A tibble: 10 x 5
## census_id county state carpool carpool_rank
## <dbl> <fct> <fct> <dbl> <int>
## 1 13061 Clay Georgia 29.9 1
## 2 18087 LaGrange Indiana 27 2
## 3 13165 Jenkins Georgia 25.3 3
## 4 5133 Sevier Arkansas 24.4 4
## 5 20175 Seward Kansas 23.4 5
## 6 48079 Cochran Texas 22.8 6
## 7 48247 Jim Hogg Texas 22.6 7
## 8 48393 Roberts Texas 22.4 8
## 9 39075 Holmes Ohio 21.8 9
## 10 21197 Powell Kentucky 21.6 10
The output above shows 10 highest ranked counties for carpooling
acs_carpool <- select(acs_carpool_data,census_id, county,state, carpool, carpool_rank)
tail(acs_carpool,10)
## # A tibble: 10 x 5
## census_id county state carpool carpool_rank
## <dbl> <fct> <fct> <dbl> <int>
## 1 51720 Norton city Virginia 2.8 3130
## 2 30019 Daniels Montana 2.6 3132
## 3 31057 Dundy Nebraska 2.6 3132
## 4 13309 Wheeler Georgia 2.3 3134
## 5 38029 Emmons North Dakota 2.3 3134
## 6 36061 New York New York 1.9 3136
## 7 31183 Wheeler Nebraska 1.3 3137
## 8 48235 Irion Texas 0.9 3138
## 9 48261 Kenedy Texas 0 3139
## 10 48269 King Texas 0 3139
The output above shows 10 lowest ranked counties for carpooling
acs_carpool_statewise = group_by(acs_data, state) %>% summarize(avg_carpool_rt = mean(carpool))
acs_carpool_statewise = acs_carpool_statewise[order(-acs_carpool_statewise$avg_carpool_rt),]
head(acs_carpool_statewise,1)
## # A tibble: 1 x 2
## state avg_carpool_rt
## <fct> <dbl>
## 1 Hawaii 12.8
Hawaii is the best state for carpooling
head(acs_carpool_statewise,5)
## # A tibble: 5 x 2
## state avg_carpool_rt
## <fct> <dbl>
## 1 Hawaii 12.8
## 2 Alaska 12.1
## 3 Arkansas 11.9
## 4 Utah 11.9
## 5 Texas 11.8
the result above shows the top 5 states for carpooling