library(tidyverse);
## -- Attaching packages ---------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl);
acs<- read_excel("C:/Users/anger/OneDrive - University of Cincinnati/BANA 7025/Week 4/homework3/acs_2015_county_data_revised.xlsx");
summary(acs);
## census_id state county total_pop
## Min. : 1001 Length:3142 Length:3142 Min. : 85
## 1st Qu.:18178 Class :character Class :character 1st Qu.: 11028
## Median :29176 Mode :character Mode :character Median : 25768
## Mean :30384 Mean : 100737
## 3rd Qu.:45081 3rd Qu.: 67552
## Max. :56045 Max. :10038388
##
## men women hispanic white
## Min. : 42 Min. : 43 Min. : 0.000 Min. : 0.90
## 1st Qu.: 5546 1st Qu.: 5466 1st Qu.: 1.900 1st Qu.:65.60
## Median : 12826 Median : 12907 Median : 3.700 Median :84.60
## Mean : 49565 Mean : 51171 Mean : 8.826 Mean :77.28
## 3rd Qu.: 33319 3rd Qu.: 34122 3rd Qu.: 9.000 3rd Qu.:93.30
## Max. :4945351 Max. :5093037 Max. :98.700 Max. :99.80
##
## black native asian pacific
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.00000
## 1st Qu.: 0.600 1st Qu.: 0.100 1st Qu.: 0.200 1st Qu.: 0.00000
## Median : 2.100 Median : 0.300 Median : 0.500 Median : 0.00000
## Mean : 8.879 Mean : 1.766 Mean : 1.258 Mean : 0.08475
## 3rd Qu.:10.175 3rd Qu.: 0.600 3rd Qu.: 1.200 3rd Qu.: 0.00000
## Max. :85.900 Max. :92.100 Max. :41.600 Max. :35.30000
##
## citizen income income_per_cap poverty
## Min. : 80 Min. : 19328 Min. : 8292 Min. : 1.4
## 1st Qu.: 8254 1st Qu.: 38826 1st Qu.:20471 1st Qu.:12.0
## Median : 19434 Median : 45111 Median :23577 Median :16.0
## Mean : 70804 Mean : 46830 Mean :24338 Mean :16.7
## 3rd Qu.: 50728 3rd Qu.: 52250 3rd Qu.:27138 3rd Qu.:20.3
## Max. :6046749 Max. :123453 Max. :65600 Max. :53.3
## NA's :1
## child_poverty professional service office
## Length:3142 Min. :13.50 Min. : 5.00 Min. : 4.10
## Class :character 1st Qu.:26.70 1st Qu.:15.90 1st Qu.:20.20
## Mode :character Median :30.00 Median :18.00 Median :22.40
## Mean :31.04 Mean :18.26 Mean :22.13
## 3rd Qu.:34.40 3rd Qu.:20.20 3rd Qu.:24.30
## Max. :74.00 Max. :36.60 Max. :35.40
##
## construction production drive carpool
## Min. : 1.70 Min. : 0.00 Min. : 5.20 Min. : 0.00
## 1st Qu.: 9.80 1st Qu.:11.53 1st Qu.:76.60 1st Qu.: 8.50
## Median :12.20 Median :15.40 Median :80.60 Median : 9.90
## Mean :12.74 Mean :15.82 Mean :79.08 Mean :10.33
## 3rd Qu.:15.00 3rd Qu.:19.40 3rd Qu.:83.60 3rd Qu.:11.88
## Max. :40.30 Max. :55.60 Max. :94.60 Max. :29.90
##
## transit walk other_transp work_at_home
## Min. : 0.0000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.1000 1st Qu.: 1.400 1st Qu.: 0.900 1st Qu.: 2.800
## Median : 0.4000 Median : 2.400 Median : 1.300 Median : 4.000
## Mean : 0.9675 Mean : 3.307 Mean : 1.614 Mean : 4.697
## 3rd Qu.: 0.8000 3rd Qu.: 4.000 3rd Qu.: 1.900 3rd Qu.: 5.700
## Max. :61.7000 Max. :71.200 Max. :39.100 Max. :37.200
##
## mean_commute employed private_work public_work
## Min. : 4.90 Min. : 62 Min. :25.00 Min. : 5.80
## 1st Qu.:19.30 1st Qu.: 4524 1st Qu.:70.90 1st Qu.:13.10
## Median :22.90 Median : 10644 Median :75.80 Median :16.10
## Mean :23.15 Mean : 46387 Mean :74.44 Mean :17.35
## 3rd Qu.:26.60 3rd Qu.: 29254 3rd Qu.:79.80 3rd Qu.:20.10
## Max. :44.00 Max. :4635465 Max. :88.30 Max. :66.20
##
## self_employed family_work unemployment
## Min. : 0.000 Min. :0.0000 Min. : 0.000
## 1st Qu.: 5.400 1st Qu.:0.1000 1st Qu.: 5.500
## Median : 6.900 Median :0.2000 Median : 7.500
## Mean : 7.921 Mean :0.2915 Mean : 7.815
## 3rd Qu.: 9.400 3rd Qu.:0.3000 3rd Qu.: 9.700
## Max. :36.600 Max. :9.8000 Max. :29.400
##
str(acs)
## tibble [3,142 x 35] (S3: tbl_df/tbl/data.frame)
## $ census_id : num [1:3142] 1001 1003 1005 1007 1009 ...
## $ state : chr [1:3142] "Alabama" "Alabama" "Alabama" "Alabama" ...
## $ county : chr [1:3142] "Autauga" "Baldwin" "Barbour" "Bibb" ...
## $ total_pop : num [1:3142] 55221 195121 26932 22604 57710 ...
## $ men : num [1:3142] 26745 95314 14497 12073 28512 ...
## $ women : num [1:3142] 28476 99807 12435 10531 29198 ...
## $ hispanic : num [1:3142] 2.6 4.5 4.6 2.2 8.6 4.4 1.2 3.5 0.4 1.5 ...
## $ white : num [1:3142] 75.8 83.1 46.2 74.5 87.9 22.2 53.3 73 57.3 91.7 ...
## $ black : num [1:3142] 18.5 9.5 46.7 21.4 1.5 70.7 43.8 20.3 40.3 4.8 ...
## $ native : num [1:3142] 0.4 0.6 0.2 0.4 0.3 1.2 0.1 0.2 0.2 0.6 ...
## $ asian : num [1:3142] 1 0.7 0.4 0.1 0.1 0.2 0.4 0.9 0.8 0.3 ...
## $ pacific : num [1:3142] 0 0 0 0 0 0 0 0 0 0 ...
## $ citizen : num [1:3142] 40725 147695 20714 17495 42345 ...
## $ income : num [1:3142] 51281 50254 32964 38678 45813 ...
## $ income_per_cap: num [1:3142] 24974 27317 16824 18431 20532 ...
## $ poverty : num [1:3142] 12.9 13.4 26.7 16.8 16.7 24.6 25.4 20.5 21.6 19.2 ...
## $ child_poverty : chr [1:3142] "18.600000000000001" "19.2" "45.3" "27.9" ...
## $ professional : num [1:3142] 33.2 33.1 26.8 21.5 28.5 18.8 27.5 27.3 23.3 29.3 ...
## $ service : num [1:3142] 17 17.7 16.1 17.9 14.1 15 16.6 17.7 14.5 16 ...
## $ office : num [1:3142] 24.2 27.1 23.1 17.8 23.9 19.7 21.9 24.2 26.3 19.5 ...
## $ construction : num [1:3142] 8.6 10.8 10.8 19 13.5 20.1 10.3 10.5 11.5 13.7 ...
## $ production : num [1:3142] 17.1 11.2 23.1 23.7 19.9 26.4 23.7 20.4 24.4 21.5 ...
## $ drive : num [1:3142] 87.5 84.7 83.8 83.2 84.9 74.9 84.5 85.3 85.1 83.9 ...
## $ carpool : num [1:3142] 8.8 8.8 10.9 13.5 11.2 14.9 12.4 9.4 11.9 12.1 ...
## $ transit : num [1:3142] 0.1 0.1 0.4 0.5 0.4 0.7 0 0.2 0.2 0.2 ...
## $ walk : num [1:3142] 0.5 1 1.8 0.6 0.9 5 0.8 1.2 0.3 0.6 ...
## $ other_transp : num [1:3142] 1.3 1.4 1.5 1.5 0.4 1.7 0.6 1.2 0.4 0.7 ...
## $ work_at_home : num [1:3142] 1.8 3.9 1.6 0.7 2.3 2.8 1.7 2.7 2.1 2.5 ...
## $ mean_commute : num [1:3142] 26.5 26.4 24.1 28.8 34.9 27.5 24.6 24.1 25.1 27.4 ...
## $ employed : num [1:3142] 23986 85953 8597 8294 22189 ...
## $ private_work : num [1:3142] 73.6 81.5 71.8 76.8 82 79.5 77.4 74.1 85.1 73.1 ...
## $ public_work : num [1:3142] 20.9 12.3 20.8 16.1 13.5 15.1 16.2 20.8 12.1 18.5 ...
## $ self_employed : num [1:3142] 5.5 5.8 7.3 6.7 4.2 5.4 6.2 5 2.8 7.9 ...
## $ family_work : num [1:3142] 0 0.4 0.1 0.4 0.4 0 0.2 0.1 0 0.5 ...
## $ unemployment : num [1:3142] 7.6 7.5 17.6 8.3 7.7 18 10.9 12.3 8.9 7.9 ...
There are 35 columns and 3142 rows
Child poverty needs to be changed from a character to a numeric variable
acs$child_poverty <- as.numeric(as.character(acs$child_poverty))
glimpse(acs)
## Rows: 3,142
## Columns: 35
## $ census_id <dbl> 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 1017...
## $ state <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama...
## $ county <chr> "Autauga", "Baldwin", "Barbour", "Bibb", "Blount", "...
## $ total_pop <dbl> 55221, 195121, 26932, 22604, 57710, 10678, 20354, 11...
## $ men <dbl> 26745, 95314, 14497, 12073, 28512, 5660, 9502, 56274...
## $ women <dbl> 28476, 99807, 12435, 10531, 29198, 5018, 10852, 6037...
## $ hispanic <dbl> 2.6, 4.5, 4.6, 2.2, 8.6, 4.4, 1.2, 3.5, 0.4, 1.5, 7....
## $ white <dbl> 75.8, 83.1, 46.2, 74.5, 87.9, 22.2, 53.3, 73.0, 57.3...
## $ black <dbl> 18.5, 9.5, 46.7, 21.4, 1.5, 70.7, 43.8, 20.3, 40.3, ...
## $ native <dbl> 0.4, 0.6, 0.2, 0.4, 0.3, 1.2, 0.1, 0.2, 0.2, 0.6, 0....
## $ asian <dbl> 1.0, 0.7, 0.4, 0.1, 0.1, 0.2, 0.4, 0.9, 0.8, 0.3, 0....
## $ pacific <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0....
## $ citizen <dbl> 40725, 147695, 20714, 17495, 42345, 8057, 15581, 886...
## $ income <dbl> 51281, 50254, 32964, 38678, 45813, 31938, 32229, 417...
## $ income_per_cap <dbl> 24974, 27317, 16824, 18431, 20532, 17580, 18390, 213...
## $ poverty <dbl> 12.9, 13.4, 26.7, 16.8, 16.7, 24.6, 25.4, 20.5, 21.6...
## $ child_poverty <dbl> 18.6, 19.2, 45.3, 27.9, 27.2, 38.4, 39.2, 31.6, 37.2...
## $ professional <dbl> 33.2, 33.1, 26.8, 21.5, 28.5, 18.8, 27.5, 27.3, 23.3...
## $ service <dbl> 17.0, 17.7, 16.1, 17.9, 14.1, 15.0, 16.6, 17.7, 14.5...
## $ office <dbl> 24.2, 27.1, 23.1, 17.8, 23.9, 19.7, 21.9, 24.2, 26.3...
## $ construction <dbl> 8.6, 10.8, 10.8, 19.0, 13.5, 20.1, 10.3, 10.5, 11.5,...
## $ production <dbl> 17.1, 11.2, 23.1, 23.7, 19.9, 26.4, 23.7, 20.4, 24.4...
## $ drive <dbl> 87.5, 84.7, 83.8, 83.2, 84.9, 74.9, 84.5, 85.3, 85.1...
## $ carpool <dbl> 8.8, 8.8, 10.9, 13.5, 11.2, 14.9, 12.4, 9.4, 11.9, 1...
## $ transit <dbl> 0.1, 0.1, 0.4, 0.5, 0.4, 0.7, 0.0, 0.2, 0.2, 0.2, 0....
## $ walk <dbl> 0.5, 1.0, 1.8, 0.6, 0.9, 5.0, 0.8, 1.2, 0.3, 0.6, 1....
## $ other_transp <dbl> 1.3, 1.4, 1.5, 1.5, 0.4, 1.7, 0.6, 1.2, 0.4, 0.7, 1....
## $ work_at_home <dbl> 1.8, 3.9, 1.6, 0.7, 2.3, 2.8, 1.7, 2.7, 2.1, 2.5, 1....
## $ mean_commute <dbl> 26.5, 26.4, 24.1, 28.8, 34.9, 27.5, 24.6, 24.1, 25.1...
## $ employed <dbl> 23986, 85953, 8597, 8294, 22189, 3865, 7813, 47401, ...
## $ private_work <dbl> 73.6, 81.5, 71.8, 76.8, 82.0, 79.5, 77.4, 74.1, 85.1...
## $ public_work <dbl> 20.9, 12.3, 20.8, 16.1, 13.5, 15.1, 16.2, 20.8, 12.1...
## $ self_employed <dbl> 5.5, 5.8, 7.3, 6.7, 4.2, 5.4, 6.2, 5.0, 2.8, 7.9, 4....
## $ family_work <dbl> 0.0, 0.4, 0.1, 0.4, 0.4, 0.0, 0.2, 0.1, 0.0, 0.5, 0....
## $ unemployment <dbl> 7.6, 7.5, 17.6, 8.3, 7.7, 18.0, 10.9, 12.3, 8.9, 7.9...
Out of all 3142 there are only 2 missing values. Since the number of missing values is insignificant, the two missing values can be ignored because they will have minimal, if any effect on the data.
sum(is.na(acs))
## [1] 2
summary(acs)
## census_id state county total_pop
## Min. : 1001 Length:3142 Length:3142 Min. : 85
## 1st Qu.:18178 Class :character Class :character 1st Qu.: 11028
## Median :29176 Mode :character Mode :character Median : 25768
## Mean :30384 Mean : 100737
## 3rd Qu.:45081 3rd Qu.: 67552
## Max. :56045 Max. :10038388
##
## men women hispanic white
## Min. : 42 Min. : 43 Min. : 0.000 Min. : 0.90
## 1st Qu.: 5546 1st Qu.: 5466 1st Qu.: 1.900 1st Qu.:65.60
## Median : 12826 Median : 12907 Median : 3.700 Median :84.60
## Mean : 49565 Mean : 51171 Mean : 8.826 Mean :77.28
## 3rd Qu.: 33319 3rd Qu.: 34122 3rd Qu.: 9.000 3rd Qu.:93.30
## Max. :4945351 Max. :5093037 Max. :98.700 Max. :99.80
##
## black native asian pacific
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.00000
## 1st Qu.: 0.600 1st Qu.: 0.100 1st Qu.: 0.200 1st Qu.: 0.00000
## Median : 2.100 Median : 0.300 Median : 0.500 Median : 0.00000
## Mean : 8.879 Mean : 1.766 Mean : 1.258 Mean : 0.08475
## 3rd Qu.:10.175 3rd Qu.: 0.600 3rd Qu.: 1.200 3rd Qu.: 0.00000
## Max. :85.900 Max. :92.100 Max. :41.600 Max. :35.30000
##
## citizen income income_per_cap poverty
## Min. : 80 Min. : 19328 Min. : 8292 Min. : 1.4
## 1st Qu.: 8254 1st Qu.: 38826 1st Qu.:20471 1st Qu.:12.0
## Median : 19434 Median : 45111 Median :23577 Median :16.0
## Mean : 70804 Mean : 46830 Mean :24338 Mean :16.7
## 3rd Qu.: 50728 3rd Qu.: 52250 3rd Qu.:27138 3rd Qu.:20.3
## Max. :6046749 Max. :123453 Max. :65600 Max. :53.3
## NA's :1
## child_poverty professional service office
## Min. : 0.00 Min. :13.50 Min. : 5.00 Min. : 4.10
## 1st Qu.:16.10 1st Qu.:26.70 1st Qu.:15.90 1st Qu.:20.20
## Median :22.50 Median :30.00 Median :18.00 Median :22.40
## Mean :23.29 Mean :31.04 Mean :18.26 Mean :22.13
## 3rd Qu.:29.50 3rd Qu.:34.40 3rd Qu.:20.20 3rd Qu.:24.30
## Max. :72.30 Max. :74.00 Max. :36.60 Max. :35.40
## NA's :1
## construction production drive carpool
## Min. : 1.70 Min. : 0.00 Min. : 5.20 Min. : 0.00
## 1st Qu.: 9.80 1st Qu.:11.53 1st Qu.:76.60 1st Qu.: 8.50
## Median :12.20 Median :15.40 Median :80.60 Median : 9.90
## Mean :12.74 Mean :15.82 Mean :79.08 Mean :10.33
## 3rd Qu.:15.00 3rd Qu.:19.40 3rd Qu.:83.60 3rd Qu.:11.88
## Max. :40.30 Max. :55.60 Max. :94.60 Max. :29.90
##
## transit walk other_transp work_at_home
## Min. : 0.0000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.1000 1st Qu.: 1.400 1st Qu.: 0.900 1st Qu.: 2.800
## Median : 0.4000 Median : 2.400 Median : 1.300 Median : 4.000
## Mean : 0.9675 Mean : 3.307 Mean : 1.614 Mean : 4.697
## 3rd Qu.: 0.8000 3rd Qu.: 4.000 3rd Qu.: 1.900 3rd Qu.: 5.700
## Max. :61.7000 Max. :71.200 Max. :39.100 Max. :37.200
##
## mean_commute employed private_work public_work
## Min. : 4.90 Min. : 62 Min. :25.00 Min. : 5.80
## 1st Qu.:19.30 1st Qu.: 4524 1st Qu.:70.90 1st Qu.:13.10
## Median :22.90 Median : 10644 Median :75.80 Median :16.10
## Mean :23.15 Mean : 46387 Mean :74.44 Mean :17.35
## 3rd Qu.:26.60 3rd Qu.: 29254 3rd Qu.:79.80 3rd Qu.:20.10
## Max. :44.00 Max. :4635465 Max. :88.30 Max. :66.20
##
## self_employed family_work unemployment
## Min. : 0.000 Min. :0.0000 Min. : 0.000
## 1st Qu.: 5.400 1st Qu.:0.1000 1st Qu.: 5.500
## Median : 6.900 Median :0.2000 Median : 7.500
## Mean : 7.921 Mean :0.2915 Mean : 7.815
## 3rd Qu.: 9.400 3rd Qu.:0.3000 3rd Qu.: 9.700
## Max. :36.600 Max. :9.8000 Max. :29.400
##
There is a significant difference between the median and mean for total population. The median value is 25,768, the mean value is 100,737, and the 3rd quartile is 67,552 indicating the presence of multiple extreme large values. The means for the men and women variables are also significantly larger than the medians because these two variables comprise the total population variable. Extreme values will not be removed because a large portion of the population would be completely ommitted from the data.
All of the variables with irregularities have means that are larger rather than smaller than the median and that are larger than the third quartile. This trend is most likely due the difference in demographics in different areas. As we know, counties across the country are not homogeneous. Different counties can have vastly different populations with different races, ethnicities, ages, gender, etc. Overall, I do not recommend removing any irregular data to ensure the entire population is accounted for.
##5 Gender
filter(acs, women > men)
## # A tibble: 1,985 x 35
## census_id state county total_pop men women hispanic white black native
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1001 Alab~ Autau~ 55221 26745 28476 2.6 75.8 18.5 0.4
## 2 1003 Alab~ Baldw~ 195121 95314 99807 4.5 83.1 9.5 0.6
## 3 1009 Alab~ Blount 57710 28512 29198 8.6 87.9 1.5 0.3
## 4 1013 Alab~ Butler 20354 9502 10852 1.2 53.3 43.8 0.1
## 5 1015 Alab~ Calho~ 116648 56274 60374 3.5 73 20.3 0.2
## 6 1017 Alab~ Chamb~ 34079 16258 17821 0.4 57.3 40.3 0.2
## 7 1019 Alab~ Chero~ 26008 12975 13033 1.5 91.7 4.8 0.6
## 8 1021 Alab~ Chilt~ 43819 21619 22200 7.6 80.5 10.2 0.4
## 9 1023 Alab~ Choct~ 13395 6382 7013 0.4 55.9 42.9 0
## 10 1025 Alab~ Clarke 25070 11834 13236 0.3 53.4 45.3 0
## # ... with 1,975 more rows, and 25 more variables: asian <dbl>, pacific <dbl>,
## # citizen <dbl>, income <dbl>, income_per_cap <dbl>, poverty <dbl>,
## # child_poverty <dbl>, professional <dbl>, service <dbl>, office <dbl>,
## # construction <dbl>, production <dbl>, drive <dbl>, carpool <dbl>,
## # transit <dbl>, walk <dbl>, other_transp <dbl>, work_at_home <dbl>,
## # mean_commute <dbl>, employed <dbl>, private_work <dbl>, public_work <dbl>,
## # self_employed <dbl>, family_work <dbl>, unemployment <dbl>
As you can see by the filtered data above, 1,985 counties have more women than men.
filter(acs, unemployment < 10)
## # A tibble: 2,420 x 35
## census_id state county total_pop men women hispanic white black native
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1001 Alab~ Autau~ 55221 26745 28476 2.6 75.8 18.5 0.4
## 2 1003 Alab~ Baldw~ 195121 95314 99807 4.5 83.1 9.5 0.6
## 3 1007 Alab~ Bibb 22604 12073 10531 2.2 74.5 21.4 0.4
## 4 1009 Alab~ Blount 57710 28512 29198 8.6 87.9 1.5 0.3
## 5 1017 Alab~ Chamb~ 34079 16258 17821 0.4 57.3 40.3 0.2
## 6 1019 Alab~ Chero~ 26008 12975 13033 1.5 91.7 4.8 0.6
## 7 1021 Alab~ Chilt~ 43819 21619 22200 7.6 80.5 10.2 0.4
## 8 1027 Alab~ Clay 13537 6671 6866 3.2 79.9 14.4 0.7
## 9 1029 Alab~ Clebu~ 15002 7334 7668 2.3 92.5 2.9 0.2
## 10 1031 Alab~ Coffee 50884 25174 25710 6.4 71.5 17.2 0.8
## # ... with 2,410 more rows, and 25 more variables: asian <dbl>, pacific <dbl>,
## # citizen <dbl>, income <dbl>, income_per_cap <dbl>, poverty <dbl>,
## # child_poverty <dbl>, professional <dbl>, service <dbl>, office <dbl>,
## # construction <dbl>, production <dbl>, drive <dbl>, carpool <dbl>,
## # transit <dbl>, walk <dbl>, other_transp <dbl>, work_at_home <dbl>,
## # mean_commute <dbl>, employed <dbl>, private_work <dbl>, public_work <dbl>,
## # self_employed <dbl>, family_work <dbl>, unemployment <dbl>
2,420 counties have an unemployment rate less than 10%.
##7 Commute
top_n(acs, 10, mean_commute)
## # A tibble: 10 x 35
## census_id state county total_pop men women hispanic white black native
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 8093 Colo~ Park 16189 8.52e3 7.66e3 5.7 90.4 0 1.1
## 2 24017 Mary~ Charl~ 152754 7.37e4 7.90e4 5 45.4 41.8 0.7
## 3 36005 New ~ Bronx 1428357 6.72e5 7.56e5 54.6 10.3 29.6 0.2
## 4 36047 New ~ Kings 2595259 1.23e6 1.37e6 19.6 35.7 31.2 0.2
## 5 36081 New ~ Queens 2301139 1.12e6 1.19e6 27.9 26.1 17.4 0.2
## 6 36085 New ~ Richm~ 472481 2.29e5 2.44e5 17.8 62.8 9.6 0.1
## 7 42103 Penn~ Pike 56632 2.82e4 2.84e4 9.9 81.8 5.5 0.2
## 8 51187 Virg~ Warren 38481 1.93e4 1.92e4 3.9 87.9 4 0.2
## 9 51193 Virg~ Westm~ 17557 8.58e3 8.98e3 6.1 63.5 27.9 0.2
## 10 54015 West~ Clay 9141 4.58e3 4.56e3 0.1 97.9 0 0.4
## # ... with 25 more variables: asian <dbl>, pacific <dbl>, citizen <dbl>,
## # income <dbl>, income_per_cap <dbl>, poverty <dbl>, child_poverty <dbl>,
## # professional <dbl>, service <dbl>, office <dbl>, construction <dbl>,
## # production <dbl>, drive <dbl>, carpool <dbl>, transit <dbl>, walk <dbl>,
## # other_transp <dbl>, work_at_home <dbl>, mean_commute <dbl>, employed <dbl>,
## # private_work <dbl>, public_work <dbl>, self_employed <dbl>,
## # family_work <dbl>, unemployment <dbl>
The top 10 counties with the highest mean commute in no particular order are Park, Charles, Bronx, Kings, Queens, Richmond, Pike, Warren, Westmoreland, and Clay county. The average mean commute in these counties ranges from 41.4 to 44 minutes.
acs %>%
mutate(acs,percent_women = women/total_pop) %>%
select(census_id,state,county,percent_women) %>%
arrange(percent_women)
## # A tibble: 3,142 x 4
## census_id state county percent_women
## <dbl> <chr> <chr> <dbl>
## 1 42053 Pennsylvania Forest 0.268
## 2 8011 Colorado Bent 0.314
## 3 51183 Virginia Sussex 0.315
## 4 13309 Georgia Wheeler 0.321
## 5 6035 California Lassen 0.332
## 6 48095 Texas Concho 0.333
## 7 13053 Georgia Chattahoochee 0.334
## 8 2013 Alaska Aleutians East Borough 0.335
## 9 22125 Louisiana West Feliciana 0.336
## 10 32027 Nevada Pershing 0.337
## # ... with 3,132 more rows
The 10 counties with the lowest percentage of women, in no particular order are Forest, Bent, Sussez, Wheeler, Lassen, Concho, Chattahoochee, Aleutians East Borough, West Feliciana, Pershing county.
acs %>%
mutate(acs, percent_all_races = hispanic + white + black + native + asian + pacific) %>%
select(census_id, state, county, percent_all_races) %>%
arrange(percent_all_races)
## # A tibble: 3,142 x 4
## census_id state county percent_all_races
## <dbl> <chr> <chr> <dbl>
## 1 15001 Hawaii Hawaii 76.4
## 2 15009 Hawaii Maui 79.2
## 3 40097 Oklahoma Mayes 79.7
## 4 15003 Hawaii Honolulu 81.5
## 5 40123 Oklahoma Pontotoc 82.8
## 6 47061 Tennessee Grundy 83.
## 7 2282 Alaska Yakutat City and Borough 83.4
## 8 40069 Oklahoma Johnston 84
## 9 15007 Hawaii Kauai 84.1
## 10 40003 Oklahoma Alfalfa 85.1
## # ... with 3,132 more rows
acs %>%
mutate(acs, percent_all_races = hispanic + white + black + native + asian + pacific) %>%
select(state, percent_all_races) %>%
group_by(state) %>%
summarise(state_mean = mean(percent_all_races, na.rm = TRUE)) %>%
arrange(state_mean)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 51 x 2
## state state_mean
## <chr> <dbl>
## 1 Hawaii 84
## 2 Alaska 92.7
## 3 Oklahoma 92.8
## 4 Washington 96.7
## 5 California 96.9
## 6 Oregon 97.1
## 7 Delaware 97.3
## 8 Massachusetts 97.5
## 9 Maryland 97.6
## 10 District of Columbia 97.6
## # ... with 41 more rows
acs %>%
mutate(acs, percent_all_races = hispanic + white + black + native + asian + pacific) %>%
select(state, percent_all_races) %>%
group_by(state) %>%
summarise(state_mean = mean(percent_all_races, na.rm = TRUE)) %>%
arrange(state_mean)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 51 x 2
## state state_mean
## <chr> <dbl>
## 1 Hawaii 84
## 2 Alaska 92.7
## 3 Oklahoma 92.8
## 4 Washington 96.7
## 5 California 96.9
## 6 Oregon 97.1
## 7 Delaware 97.3
## 8 Massachusetts 97.5
## 9 Maryland 97.6
## 10 District of Columbia 97.6
## # ... with 41 more rows
acs %>%
mutate(acs, percent_all_races = hispanic + white + black + native + asian + pacific) %>%
select(state,county,percent_all_races) %>%
filter(percent_all_races > 100.0) %>%
arrange(desc(percent_all_races))
## # A tibble: 11 x 3
## state county percent_all_races
## <chr> <chr> <dbl>
## 1 Nebraska Gosper 100.
## 2 Nebraska Hooker 100.
## 3 Texas Bailey 100.
## 4 Texas Edwards 100.
## 5 Nebraska Nance 100.
## 6 Mississippi Claiborne 100.
## 7 Texas Duval 100.
## 8 Texas Kenedy 100.
## 9 Texas Kent 100.
## 10 Texas Presidio 100.
## 11 Utah Beaver 100.
acs %>%
mutate(acs, percent_all_races = hispanic + white + black + native + asian + pacific) %>%
select(county,percent_all_races) %>%
group_by(county) %>%
summarise(county_mean = mean(percent_all_races,na.rm = TRUE)) %>%
arrange(county_mean)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 1,851 x 2
## county county_mean
## <chr> <dbl>
## 1 Hawaii 76.4
## 2 Maui 79.2
## 3 Mayes 79.7
## 4 Honolulu 81.5
## 5 Yakutat City and Borough 83.4
## 6 Kauai 84.1
## 7 Alfalfa 85.1
## 8 Sequoyah 86.4
## 9 Coal 86.6
## 10 Bennett 86.9
## # ... with 1,841 more rows
####A The top 10 counties with the lowest sum of percentages are Hawaii, Maui, Mayes, Honolulu, Pontotoc, Grundy, Yakutat City and Borough, Johnston, Kauai, Alfalfa county.
####B Hawaii has the lowest percentage of all races with only 84%.
####C Five counties have a sum greater than 100: Gosper, Hooker, Bailey, Edwards, and Nance.
####D Six counties have a sum exactly equal to 100: Claiborne, Duval, Kenedy, Kent, Presidio, and Beaver.
acs %>%
select(census_id, state, county, carpool) %>%
mutate(carpool_rank = min_rank(carpool)) %>%
arrange(carpool_rank)
## # A tibble: 3,142 x 5
## census_id state county carpool carpool_rank
## <dbl> <chr> <chr> <dbl> <int>
## 1 48261 Texas Kenedy 0 1
## 2 48269 Texas King 0 1
## 3 48235 Texas Irion 0.9 3
## 4 31183 Nebraska Wheeler 1.3 4
## 5 36061 New York New York 1.9 5
## 6 13309 Georgia Wheeler 2.3 6
## 7 38029 North Dakota Emmons 2.3 6
## 8 30019 Montana Daniels 2.6 8
## 9 31057 Nebraska Dundy 2.6 8
## 10 46069 South Dakota Hyde 2.8 10
## # ... with 3,132 more rows
acs %>%
select(census_id, state, county, carpool) %>%
mutate(carpool_rank = min_rank(carpool)) %>%
arrange(desc(carpool_rank))
## # A tibble: 3,142 x 5
## census_id state county carpool carpool_rank
## <dbl> <chr> <chr> <dbl> <int>
## 1 13061 Georgia Clay 29.9 3142
## 2 18087 Indiana LaGrange 27 3141
## 3 13165 Georgia Jenkins 25.3 3140
## 4 5133 Arkansas Sevier 24.4 3139
## 5 20175 Kansas Seward 23.4 3138
## 6 48079 Texas Cochran 22.8 3137
## 7 48247 Texas Jim Hogg 22.6 3136
## 8 48393 Texas Roberts 22.4 3135
## 9 39075 Ohio Holmes 21.8 3134
## 10 21197 Kentucky Powell 21.6 3133
## # ... with 3,132 more rows
acs %>%
mutate(state_carpool_rank = min_rank(carpool)) %>%
select(state, state_carpool_rank) %>%
group_by(state) %>%
summarise(mean_carpool_rank = mean(state_carpool_rank, na.rm = TRUE)) %>%
arrange(mean_carpool_rank)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 51 x 2
## state mean_carpool_rank
## <chr> <dbl>
## 1 District of Columbia 72
## 2 Massachusetts 540.
## 3 Connecticut 604.
## 4 Rhode Island 670.
## 5 New Jersey 687.
## 6 Ohio 825.
## 7 Delaware 831.
## 8 New Hampshire 870.
## 9 New York 1019.
## 10 Wisconsin 1112.
## # ... with 41 more rows
The ten highest ranked counties for carpooling are: Kenedy, King, Irion, Wheeler, New York, Wheeler, Emmons, Daniels, Dundy, and Hyde.
The ten lowest ranked counties for carpooling are: Clay, LaGrange, Jenkins, Sevier, Seward, Cochran, Jim Hogg, Roberts, Holmes, and Powell.
District of Columbia is the top ranked state for carpooling.
The top 5 states are District of Columbia, Massachusetts, Connecticut, Rhode Island, and New Jersey.