Import Data and Data Cleaning

1 Import

library(tidyverse);
## -- Attaching packages ---------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl);
acs<- read_excel("C:/Users/anger/OneDrive - University of Cincinnati/BANA 7025/Week 4/homework3/acs_2015_county_data_revised.xlsx");
summary(acs);
##    census_id        state              county            total_pop       
##  Min.   : 1001   Length:3142        Length:3142        Min.   :      85  
##  1st Qu.:18178   Class :character   Class :character   1st Qu.:   11028  
##  Median :29176   Mode  :character   Mode  :character   Median :   25768  
##  Mean   :30384                                         Mean   :  100737  
##  3rd Qu.:45081                                         3rd Qu.:   67552  
##  Max.   :56045                                         Max.   :10038388  
##                                                                          
##       men              women            hispanic          white      
##  Min.   :     42   Min.   :     43   Min.   : 0.000   Min.   : 0.90  
##  1st Qu.:   5546   1st Qu.:   5466   1st Qu.: 1.900   1st Qu.:65.60  
##  Median :  12826   Median :  12907   Median : 3.700   Median :84.60  
##  Mean   :  49565   Mean   :  51171   Mean   : 8.826   Mean   :77.28  
##  3rd Qu.:  33319   3rd Qu.:  34122   3rd Qu.: 9.000   3rd Qu.:93.30  
##  Max.   :4945351   Max.   :5093037   Max.   :98.700   Max.   :99.80  
##                                                                      
##      black            native           asian           pacific        
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.00000  
##  1st Qu.: 0.600   1st Qu.: 0.100   1st Qu.: 0.200   1st Qu.: 0.00000  
##  Median : 2.100   Median : 0.300   Median : 0.500   Median : 0.00000  
##  Mean   : 8.879   Mean   : 1.766   Mean   : 1.258   Mean   : 0.08475  
##  3rd Qu.:10.175   3rd Qu.: 0.600   3rd Qu.: 1.200   3rd Qu.: 0.00000  
##  Max.   :85.900   Max.   :92.100   Max.   :41.600   Max.   :35.30000  
##                                                                       
##     citizen            income       income_per_cap     poverty    
##  Min.   :     80   Min.   : 19328   Min.   : 8292   Min.   : 1.4  
##  1st Qu.:   8254   1st Qu.: 38826   1st Qu.:20471   1st Qu.:12.0  
##  Median :  19434   Median : 45111   Median :23577   Median :16.0  
##  Mean   :  70804   Mean   : 46830   Mean   :24338   Mean   :16.7  
##  3rd Qu.:  50728   3rd Qu.: 52250   3rd Qu.:27138   3rd Qu.:20.3  
##  Max.   :6046749   Max.   :123453   Max.   :65600   Max.   :53.3  
##                    NA's   :1                                      
##  child_poverty       professional      service          office     
##  Length:3142        Min.   :13.50   Min.   : 5.00   Min.   : 4.10  
##  Class :character   1st Qu.:26.70   1st Qu.:15.90   1st Qu.:20.20  
##  Mode  :character   Median :30.00   Median :18.00   Median :22.40  
##                     Mean   :31.04   Mean   :18.26   Mean   :22.13  
##                     3rd Qu.:34.40   3rd Qu.:20.20   3rd Qu.:24.30  
##                     Max.   :74.00   Max.   :36.60   Max.   :35.40  
##                                                                    
##   construction     production        drive          carpool     
##  Min.   : 1.70   Min.   : 0.00   Min.   : 5.20   Min.   : 0.00  
##  1st Qu.: 9.80   1st Qu.:11.53   1st Qu.:76.60   1st Qu.: 8.50  
##  Median :12.20   Median :15.40   Median :80.60   Median : 9.90  
##  Mean   :12.74   Mean   :15.82   Mean   :79.08   Mean   :10.33  
##  3rd Qu.:15.00   3rd Qu.:19.40   3rd Qu.:83.60   3rd Qu.:11.88  
##  Max.   :40.30   Max.   :55.60   Max.   :94.60   Max.   :29.90  
##                                                                 
##     transit             walk         other_transp     work_at_home   
##  Min.   : 0.0000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 0.1000   1st Qu.: 1.400   1st Qu.: 0.900   1st Qu.: 2.800  
##  Median : 0.4000   Median : 2.400   Median : 1.300   Median : 4.000  
##  Mean   : 0.9675   Mean   : 3.307   Mean   : 1.614   Mean   : 4.697  
##  3rd Qu.: 0.8000   3rd Qu.: 4.000   3rd Qu.: 1.900   3rd Qu.: 5.700  
##  Max.   :61.7000   Max.   :71.200   Max.   :39.100   Max.   :37.200  
##                                                                      
##   mean_commute      employed        private_work    public_work   
##  Min.   : 4.90   Min.   :     62   Min.   :25.00   Min.   : 5.80  
##  1st Qu.:19.30   1st Qu.:   4524   1st Qu.:70.90   1st Qu.:13.10  
##  Median :22.90   Median :  10644   Median :75.80   Median :16.10  
##  Mean   :23.15   Mean   :  46387   Mean   :74.44   Mean   :17.35  
##  3rd Qu.:26.60   3rd Qu.:  29254   3rd Qu.:79.80   3rd Qu.:20.10  
##  Max.   :44.00   Max.   :4635465   Max.   :88.30   Max.   :66.20  
##                                                                   
##  self_employed     family_work      unemployment   
##  Min.   : 0.000   Min.   :0.0000   Min.   : 0.000  
##  1st Qu.: 5.400   1st Qu.:0.1000   1st Qu.: 5.500  
##  Median : 6.900   Median :0.2000   Median : 7.500  
##  Mean   : 7.921   Mean   :0.2915   Mean   : 7.815  
##  3rd Qu.: 9.400   3rd Qu.:0.3000   3rd Qu.: 9.700  
##  Max.   :36.600   Max.   :9.8000   Max.   :29.400  
## 
str(acs)
## tibble [3,142 x 35] (S3: tbl_df/tbl/data.frame)
##  $ census_id     : num [1:3142] 1001 1003 1005 1007 1009 ...
##  $ state         : chr [1:3142] "Alabama" "Alabama" "Alabama" "Alabama" ...
##  $ county        : chr [1:3142] "Autauga" "Baldwin" "Barbour" "Bibb" ...
##  $ total_pop     : num [1:3142] 55221 195121 26932 22604 57710 ...
##  $ men           : num [1:3142] 26745 95314 14497 12073 28512 ...
##  $ women         : num [1:3142] 28476 99807 12435 10531 29198 ...
##  $ hispanic      : num [1:3142] 2.6 4.5 4.6 2.2 8.6 4.4 1.2 3.5 0.4 1.5 ...
##  $ white         : num [1:3142] 75.8 83.1 46.2 74.5 87.9 22.2 53.3 73 57.3 91.7 ...
##  $ black         : num [1:3142] 18.5 9.5 46.7 21.4 1.5 70.7 43.8 20.3 40.3 4.8 ...
##  $ native        : num [1:3142] 0.4 0.6 0.2 0.4 0.3 1.2 0.1 0.2 0.2 0.6 ...
##  $ asian         : num [1:3142] 1 0.7 0.4 0.1 0.1 0.2 0.4 0.9 0.8 0.3 ...
##  $ pacific       : num [1:3142] 0 0 0 0 0 0 0 0 0 0 ...
##  $ citizen       : num [1:3142] 40725 147695 20714 17495 42345 ...
##  $ income        : num [1:3142] 51281 50254 32964 38678 45813 ...
##  $ income_per_cap: num [1:3142] 24974 27317 16824 18431 20532 ...
##  $ poverty       : num [1:3142] 12.9 13.4 26.7 16.8 16.7 24.6 25.4 20.5 21.6 19.2 ...
##  $ child_poverty : chr [1:3142] "18.600000000000001" "19.2" "45.3" "27.9" ...
##  $ professional  : num [1:3142] 33.2 33.1 26.8 21.5 28.5 18.8 27.5 27.3 23.3 29.3 ...
##  $ service       : num [1:3142] 17 17.7 16.1 17.9 14.1 15 16.6 17.7 14.5 16 ...
##  $ office        : num [1:3142] 24.2 27.1 23.1 17.8 23.9 19.7 21.9 24.2 26.3 19.5 ...
##  $ construction  : num [1:3142] 8.6 10.8 10.8 19 13.5 20.1 10.3 10.5 11.5 13.7 ...
##  $ production    : num [1:3142] 17.1 11.2 23.1 23.7 19.9 26.4 23.7 20.4 24.4 21.5 ...
##  $ drive         : num [1:3142] 87.5 84.7 83.8 83.2 84.9 74.9 84.5 85.3 85.1 83.9 ...
##  $ carpool       : num [1:3142] 8.8 8.8 10.9 13.5 11.2 14.9 12.4 9.4 11.9 12.1 ...
##  $ transit       : num [1:3142] 0.1 0.1 0.4 0.5 0.4 0.7 0 0.2 0.2 0.2 ...
##  $ walk          : num [1:3142] 0.5 1 1.8 0.6 0.9 5 0.8 1.2 0.3 0.6 ...
##  $ other_transp  : num [1:3142] 1.3 1.4 1.5 1.5 0.4 1.7 0.6 1.2 0.4 0.7 ...
##  $ work_at_home  : num [1:3142] 1.8 3.9 1.6 0.7 2.3 2.8 1.7 2.7 2.1 2.5 ...
##  $ mean_commute  : num [1:3142] 26.5 26.4 24.1 28.8 34.9 27.5 24.6 24.1 25.1 27.4 ...
##  $ employed      : num [1:3142] 23986 85953 8597 8294 22189 ...
##  $ private_work  : num [1:3142] 73.6 81.5 71.8 76.8 82 79.5 77.4 74.1 85.1 73.1 ...
##  $ public_work   : num [1:3142] 20.9 12.3 20.8 16.1 13.5 15.1 16.2 20.8 12.1 18.5 ...
##  $ self_employed : num [1:3142] 5.5 5.8 7.3 6.7 4.2 5.4 6.2 5 2.8 7.9 ...
##  $ family_work   : num [1:3142] 0 0.4 0.1 0.4 0.4 0 0.2 0.1 0 0.5 ...
##  $ unemployment  : num [1:3142] 7.6 7.5 17.6 8.3 7.7 18 10.9 12.3 8.9 7.9 ...

There are 35 columns and 3142 rows

2

Child poverty needs to be changed from a character to a numeric variable

acs$child_poverty <- as.numeric(as.character(acs$child_poverty))

glimpse(acs)
## Rows: 3,142
## Columns: 35
## $ census_id      <dbl> 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 1017...
## $ state          <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama...
## $ county         <chr> "Autauga", "Baldwin", "Barbour", "Bibb", "Blount", "...
## $ total_pop      <dbl> 55221, 195121, 26932, 22604, 57710, 10678, 20354, 11...
## $ men            <dbl> 26745, 95314, 14497, 12073, 28512, 5660, 9502, 56274...
## $ women          <dbl> 28476, 99807, 12435, 10531, 29198, 5018, 10852, 6037...
## $ hispanic       <dbl> 2.6, 4.5, 4.6, 2.2, 8.6, 4.4, 1.2, 3.5, 0.4, 1.5, 7....
## $ white          <dbl> 75.8, 83.1, 46.2, 74.5, 87.9, 22.2, 53.3, 73.0, 57.3...
## $ black          <dbl> 18.5, 9.5, 46.7, 21.4, 1.5, 70.7, 43.8, 20.3, 40.3, ...
## $ native         <dbl> 0.4, 0.6, 0.2, 0.4, 0.3, 1.2, 0.1, 0.2, 0.2, 0.6, 0....
## $ asian          <dbl> 1.0, 0.7, 0.4, 0.1, 0.1, 0.2, 0.4, 0.9, 0.8, 0.3, 0....
## $ pacific        <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0....
## $ citizen        <dbl> 40725, 147695, 20714, 17495, 42345, 8057, 15581, 886...
## $ income         <dbl> 51281, 50254, 32964, 38678, 45813, 31938, 32229, 417...
## $ income_per_cap <dbl> 24974, 27317, 16824, 18431, 20532, 17580, 18390, 213...
## $ poverty        <dbl> 12.9, 13.4, 26.7, 16.8, 16.7, 24.6, 25.4, 20.5, 21.6...
## $ child_poverty  <dbl> 18.6, 19.2, 45.3, 27.9, 27.2, 38.4, 39.2, 31.6, 37.2...
## $ professional   <dbl> 33.2, 33.1, 26.8, 21.5, 28.5, 18.8, 27.5, 27.3, 23.3...
## $ service        <dbl> 17.0, 17.7, 16.1, 17.9, 14.1, 15.0, 16.6, 17.7, 14.5...
## $ office         <dbl> 24.2, 27.1, 23.1, 17.8, 23.9, 19.7, 21.9, 24.2, 26.3...
## $ construction   <dbl> 8.6, 10.8, 10.8, 19.0, 13.5, 20.1, 10.3, 10.5, 11.5,...
## $ production     <dbl> 17.1, 11.2, 23.1, 23.7, 19.9, 26.4, 23.7, 20.4, 24.4...
## $ drive          <dbl> 87.5, 84.7, 83.8, 83.2, 84.9, 74.9, 84.5, 85.3, 85.1...
## $ carpool        <dbl> 8.8, 8.8, 10.9, 13.5, 11.2, 14.9, 12.4, 9.4, 11.9, 1...
## $ transit        <dbl> 0.1, 0.1, 0.4, 0.5, 0.4, 0.7, 0.0, 0.2, 0.2, 0.2, 0....
## $ walk           <dbl> 0.5, 1.0, 1.8, 0.6, 0.9, 5.0, 0.8, 1.2, 0.3, 0.6, 1....
## $ other_transp   <dbl> 1.3, 1.4, 1.5, 1.5, 0.4, 1.7, 0.6, 1.2, 0.4, 0.7, 1....
## $ work_at_home   <dbl> 1.8, 3.9, 1.6, 0.7, 2.3, 2.8, 1.7, 2.7, 2.1, 2.5, 1....
## $ mean_commute   <dbl> 26.5, 26.4, 24.1, 28.8, 34.9, 27.5, 24.6, 24.1, 25.1...
## $ employed       <dbl> 23986, 85953, 8597, 8294, 22189, 3865, 7813, 47401, ...
## $ private_work   <dbl> 73.6, 81.5, 71.8, 76.8, 82.0, 79.5, 77.4, 74.1, 85.1...
## $ public_work    <dbl> 20.9, 12.3, 20.8, 16.1, 13.5, 15.1, 16.2, 20.8, 12.1...
## $ self_employed  <dbl> 5.5, 5.8, 7.3, 6.7, 4.2, 5.4, 6.2, 5.0, 2.8, 7.9, 4....
## $ family_work    <dbl> 0.0, 0.4, 0.1, 0.4, 0.4, 0.0, 0.2, 0.1, 0.0, 0.5, 0....
## $ unemployment   <dbl> 7.6, 7.5, 17.6, 8.3, 7.7, 18.0, 10.9, 12.3, 8.9, 7.9...

3 Missing Values

Out of all 3142 there are only 2 missing values. Since the number of missing values is insignificant, the two missing values can be ignored because they will have minimal, if any effect on the data.

sum(is.na(acs))
## [1] 2

4 Summary of Data

summary(acs)
##    census_id        state              county            total_pop       
##  Min.   : 1001   Length:3142        Length:3142        Min.   :      85  
##  1st Qu.:18178   Class :character   Class :character   1st Qu.:   11028  
##  Median :29176   Mode  :character   Mode  :character   Median :   25768  
##  Mean   :30384                                         Mean   :  100737  
##  3rd Qu.:45081                                         3rd Qu.:   67552  
##  Max.   :56045                                         Max.   :10038388  
##                                                                          
##       men              women            hispanic          white      
##  Min.   :     42   Min.   :     43   Min.   : 0.000   Min.   : 0.90  
##  1st Qu.:   5546   1st Qu.:   5466   1st Qu.: 1.900   1st Qu.:65.60  
##  Median :  12826   Median :  12907   Median : 3.700   Median :84.60  
##  Mean   :  49565   Mean   :  51171   Mean   : 8.826   Mean   :77.28  
##  3rd Qu.:  33319   3rd Qu.:  34122   3rd Qu.: 9.000   3rd Qu.:93.30  
##  Max.   :4945351   Max.   :5093037   Max.   :98.700   Max.   :99.80  
##                                                                      
##      black            native           asian           pacific        
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.00000  
##  1st Qu.: 0.600   1st Qu.: 0.100   1st Qu.: 0.200   1st Qu.: 0.00000  
##  Median : 2.100   Median : 0.300   Median : 0.500   Median : 0.00000  
##  Mean   : 8.879   Mean   : 1.766   Mean   : 1.258   Mean   : 0.08475  
##  3rd Qu.:10.175   3rd Qu.: 0.600   3rd Qu.: 1.200   3rd Qu.: 0.00000  
##  Max.   :85.900   Max.   :92.100   Max.   :41.600   Max.   :35.30000  
##                                                                       
##     citizen            income       income_per_cap     poverty    
##  Min.   :     80   Min.   : 19328   Min.   : 8292   Min.   : 1.4  
##  1st Qu.:   8254   1st Qu.: 38826   1st Qu.:20471   1st Qu.:12.0  
##  Median :  19434   Median : 45111   Median :23577   Median :16.0  
##  Mean   :  70804   Mean   : 46830   Mean   :24338   Mean   :16.7  
##  3rd Qu.:  50728   3rd Qu.: 52250   3rd Qu.:27138   3rd Qu.:20.3  
##  Max.   :6046749   Max.   :123453   Max.   :65600   Max.   :53.3  
##                    NA's   :1                                      
##  child_poverty    professional      service          office     
##  Min.   : 0.00   Min.   :13.50   Min.   : 5.00   Min.   : 4.10  
##  1st Qu.:16.10   1st Qu.:26.70   1st Qu.:15.90   1st Qu.:20.20  
##  Median :22.50   Median :30.00   Median :18.00   Median :22.40  
##  Mean   :23.29   Mean   :31.04   Mean   :18.26   Mean   :22.13  
##  3rd Qu.:29.50   3rd Qu.:34.40   3rd Qu.:20.20   3rd Qu.:24.30  
##  Max.   :72.30   Max.   :74.00   Max.   :36.60   Max.   :35.40  
##  NA's   :1                                                      
##   construction     production        drive          carpool     
##  Min.   : 1.70   Min.   : 0.00   Min.   : 5.20   Min.   : 0.00  
##  1st Qu.: 9.80   1st Qu.:11.53   1st Qu.:76.60   1st Qu.: 8.50  
##  Median :12.20   Median :15.40   Median :80.60   Median : 9.90  
##  Mean   :12.74   Mean   :15.82   Mean   :79.08   Mean   :10.33  
##  3rd Qu.:15.00   3rd Qu.:19.40   3rd Qu.:83.60   3rd Qu.:11.88  
##  Max.   :40.30   Max.   :55.60   Max.   :94.60   Max.   :29.90  
##                                                                 
##     transit             walk         other_transp     work_at_home   
##  Min.   : 0.0000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 0.1000   1st Qu.: 1.400   1st Qu.: 0.900   1st Qu.: 2.800  
##  Median : 0.4000   Median : 2.400   Median : 1.300   Median : 4.000  
##  Mean   : 0.9675   Mean   : 3.307   Mean   : 1.614   Mean   : 4.697  
##  3rd Qu.: 0.8000   3rd Qu.: 4.000   3rd Qu.: 1.900   3rd Qu.: 5.700  
##  Max.   :61.7000   Max.   :71.200   Max.   :39.100   Max.   :37.200  
##                                                                      
##   mean_commute      employed        private_work    public_work   
##  Min.   : 4.90   Min.   :     62   Min.   :25.00   Min.   : 5.80  
##  1st Qu.:19.30   1st Qu.:   4524   1st Qu.:70.90   1st Qu.:13.10  
##  Median :22.90   Median :  10644   Median :75.80   Median :16.10  
##  Mean   :23.15   Mean   :  46387   Mean   :74.44   Mean   :17.35  
##  3rd Qu.:26.60   3rd Qu.:  29254   3rd Qu.:79.80   3rd Qu.:20.10  
##  Max.   :44.00   Max.   :4635465   Max.   :88.30   Max.   :66.20  
##                                                                   
##  self_employed     family_work      unemployment   
##  Min.   : 0.000   Min.   :0.0000   Min.   : 0.000  
##  1st Qu.: 5.400   1st Qu.:0.1000   1st Qu.: 5.500  
##  Median : 6.900   Median :0.2000   Median : 7.500  
##  Mean   : 7.921   Mean   :0.2915   Mean   : 7.815  
##  3rd Qu.: 9.400   3rd Qu.:0.3000   3rd Qu.: 9.700  
##  Max.   :36.600   Max.   :9.8000   Max.   :29.400  
## 

There is a significant difference between the median and mean for total population. The median value is 25,768, the mean value is 100,737, and the 3rd quartile is 67,552 indicating the presence of multiple extreme large values. The means for the men and women variables are also significantly larger than the medians because these two variables comprise the total population variable. Extreme values will not be removed because a large portion of the population would be completely ommitted from the data.

All of the variables with irregularities have means that are larger rather than smaller than the median and that are larger than the third quartile. This trend is most likely due the difference in demographics in different areas. As we know, counties across the country are not homogeneous. Different counties can have vastly different populations with different races, ethnicities, ages, gender, etc. Overall, I do not recommend removing any irregular data to ensure the entire population is accounted for.

Data Manipulation & Insights

##5 Gender

filter(acs, women > men)
## # A tibble: 1,985 x 35
##    census_id state county total_pop   men women hispanic white black native
##        <dbl> <chr> <chr>      <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl>  <dbl>
##  1      1001 Alab~ Autau~     55221 26745 28476      2.6  75.8  18.5    0.4
##  2      1003 Alab~ Baldw~    195121 95314 99807      4.5  83.1   9.5    0.6
##  3      1009 Alab~ Blount     57710 28512 29198      8.6  87.9   1.5    0.3
##  4      1013 Alab~ Butler     20354  9502 10852      1.2  53.3  43.8    0.1
##  5      1015 Alab~ Calho~    116648 56274 60374      3.5  73    20.3    0.2
##  6      1017 Alab~ Chamb~     34079 16258 17821      0.4  57.3  40.3    0.2
##  7      1019 Alab~ Chero~     26008 12975 13033      1.5  91.7   4.8    0.6
##  8      1021 Alab~ Chilt~     43819 21619 22200      7.6  80.5  10.2    0.4
##  9      1023 Alab~ Choct~     13395  6382  7013      0.4  55.9  42.9    0  
## 10      1025 Alab~ Clarke     25070 11834 13236      0.3  53.4  45.3    0  
## # ... with 1,975 more rows, and 25 more variables: asian <dbl>, pacific <dbl>,
## #   citizen <dbl>, income <dbl>, income_per_cap <dbl>, poverty <dbl>,
## #   child_poverty <dbl>, professional <dbl>, service <dbl>, office <dbl>,
## #   construction <dbl>, production <dbl>, drive <dbl>, carpool <dbl>,
## #   transit <dbl>, walk <dbl>, other_transp <dbl>, work_at_home <dbl>,
## #   mean_commute <dbl>, employed <dbl>, private_work <dbl>, public_work <dbl>,
## #   self_employed <dbl>, family_work <dbl>, unemployment <dbl>

As you can see by the filtered data above, 1,985 counties have more women than men.

6 Unemployment rate

filter(acs, unemployment < 10)
## # A tibble: 2,420 x 35
##    census_id state county total_pop   men women hispanic white black native
##        <dbl> <chr> <chr>      <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl>  <dbl>
##  1      1001 Alab~ Autau~     55221 26745 28476      2.6  75.8  18.5    0.4
##  2      1003 Alab~ Baldw~    195121 95314 99807      4.5  83.1   9.5    0.6
##  3      1007 Alab~ Bibb       22604 12073 10531      2.2  74.5  21.4    0.4
##  4      1009 Alab~ Blount     57710 28512 29198      8.6  87.9   1.5    0.3
##  5      1017 Alab~ Chamb~     34079 16258 17821      0.4  57.3  40.3    0.2
##  6      1019 Alab~ Chero~     26008 12975 13033      1.5  91.7   4.8    0.6
##  7      1021 Alab~ Chilt~     43819 21619 22200      7.6  80.5  10.2    0.4
##  8      1027 Alab~ Clay       13537  6671  6866      3.2  79.9  14.4    0.7
##  9      1029 Alab~ Clebu~     15002  7334  7668      2.3  92.5   2.9    0.2
## 10      1031 Alab~ Coffee     50884 25174 25710      6.4  71.5  17.2    0.8
## # ... with 2,410 more rows, and 25 more variables: asian <dbl>, pacific <dbl>,
## #   citizen <dbl>, income <dbl>, income_per_cap <dbl>, poverty <dbl>,
## #   child_poverty <dbl>, professional <dbl>, service <dbl>, office <dbl>,
## #   construction <dbl>, production <dbl>, drive <dbl>, carpool <dbl>,
## #   transit <dbl>, walk <dbl>, other_transp <dbl>, work_at_home <dbl>,
## #   mean_commute <dbl>, employed <dbl>, private_work <dbl>, public_work <dbl>,
## #   self_employed <dbl>, family_work <dbl>, unemployment <dbl>

2,420 counties have an unemployment rate less than 10%.

##7 Commute

top_n(acs, 10, mean_commute)
## # A tibble: 10 x 35
##    census_id state county total_pop    men  women hispanic white black native
##        <dbl> <chr> <chr>      <dbl>  <dbl>  <dbl>    <dbl> <dbl> <dbl>  <dbl>
##  1      8093 Colo~ Park       16189 8.52e3 7.66e3      5.7  90.4   0      1.1
##  2     24017 Mary~ Charl~    152754 7.37e4 7.90e4      5    45.4  41.8    0.7
##  3     36005 New ~ Bronx    1428357 6.72e5 7.56e5     54.6  10.3  29.6    0.2
##  4     36047 New ~ Kings    2595259 1.23e6 1.37e6     19.6  35.7  31.2    0.2
##  5     36081 New ~ Queens   2301139 1.12e6 1.19e6     27.9  26.1  17.4    0.2
##  6     36085 New ~ Richm~    472481 2.29e5 2.44e5     17.8  62.8   9.6    0.1
##  7     42103 Penn~ Pike       56632 2.82e4 2.84e4      9.9  81.8   5.5    0.2
##  8     51187 Virg~ Warren     38481 1.93e4 1.92e4      3.9  87.9   4      0.2
##  9     51193 Virg~ Westm~     17557 8.58e3 8.98e3      6.1  63.5  27.9    0.2
## 10     54015 West~ Clay        9141 4.58e3 4.56e3      0.1  97.9   0      0.4
## # ... with 25 more variables: asian <dbl>, pacific <dbl>, citizen <dbl>,
## #   income <dbl>, income_per_cap <dbl>, poverty <dbl>, child_poverty <dbl>,
## #   professional <dbl>, service <dbl>, office <dbl>, construction <dbl>,
## #   production <dbl>, drive <dbl>, carpool <dbl>, transit <dbl>, walk <dbl>,
## #   other_transp <dbl>, work_at_home <dbl>, mean_commute <dbl>, employed <dbl>,
## #   private_work <dbl>, public_work <dbl>, self_employed <dbl>,
## #   family_work <dbl>, unemployment <dbl>

The top 10 counties with the highest mean commute in no particular order are Park, Charles, Bronx, Kings, Queens, Richmond, Pike, Warren, Westmoreland, and Clay county. The average mean commute in these counties ranges from 41.4 to 44 minutes.

8 New Variable: Percentage of Women

acs %>%
  mutate(acs,percent_women = women/total_pop) %>%
  select(census_id,state,county,percent_women) %>%
  arrange(percent_women)
## # A tibble: 3,142 x 4
##    census_id state        county                 percent_women
##        <dbl> <chr>        <chr>                          <dbl>
##  1     42053 Pennsylvania Forest                         0.268
##  2      8011 Colorado     Bent                           0.314
##  3     51183 Virginia     Sussex                         0.315
##  4     13309 Georgia      Wheeler                        0.321
##  5      6035 California   Lassen                         0.332
##  6     48095 Texas        Concho                         0.333
##  7     13053 Georgia      Chattahoochee                  0.334
##  8      2013 Alaska       Aleutians East Borough         0.335
##  9     22125 Louisiana    West Feliciana                 0.336
## 10     32027 Nevada       Pershing                       0.337
## # ... with 3,132 more rows

The 10 counties with the lowest percentage of women, in no particular order are Forest, Bent, Sussez, Wheeler, Lassen, Concho, Chattahoochee, Aleutians East Borough, West Feliciana, Pershing county.

9 New Variable: Percentage of all races

Code A-E

acs %>%
  mutate(acs, percent_all_races = hispanic + white + black + native + asian + pacific) %>%
  select(census_id, state, county, percent_all_races) %>%
  arrange(percent_all_races)
## # A tibble: 3,142 x 4
##    census_id state     county                   percent_all_races
##        <dbl> <chr>     <chr>                                <dbl>
##  1     15001 Hawaii    Hawaii                                76.4
##  2     15009 Hawaii    Maui                                  79.2
##  3     40097 Oklahoma  Mayes                                 79.7
##  4     15003 Hawaii    Honolulu                              81.5
##  5     40123 Oklahoma  Pontotoc                              82.8
##  6     47061 Tennessee Grundy                                83. 
##  7      2282 Alaska    Yakutat City and Borough              83.4
##  8     40069 Oklahoma  Johnston                              84  
##  9     15007 Hawaii    Kauai                                 84.1
## 10     40003 Oklahoma  Alfalfa                               85.1
## # ... with 3,132 more rows
acs %>%
  mutate(acs, percent_all_races = hispanic + white + black + native + asian + pacific) %>%
  select(state, percent_all_races) %>%
  group_by(state) %>%
  summarise(state_mean = mean(percent_all_races, na.rm = TRUE)) %>%
  arrange(state_mean)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 51 x 2
##    state                state_mean
##    <chr>                     <dbl>
##  1 Hawaii                     84  
##  2 Alaska                     92.7
##  3 Oklahoma                   92.8
##  4 Washington                 96.7
##  5 California                 96.9
##  6 Oregon                     97.1
##  7 Delaware                   97.3
##  8 Massachusetts              97.5
##  9 Maryland                   97.6
## 10 District of Columbia       97.6
## # ... with 41 more rows
acs %>%
  mutate(acs, percent_all_races = hispanic + white + black + native + asian + pacific) %>%
  select(state, percent_all_races) %>%
  group_by(state) %>%
  summarise(state_mean = mean(percent_all_races, na.rm = TRUE)) %>%
  arrange(state_mean)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 51 x 2
##    state                state_mean
##    <chr>                     <dbl>
##  1 Hawaii                     84  
##  2 Alaska                     92.7
##  3 Oklahoma                   92.8
##  4 Washington                 96.7
##  5 California                 96.9
##  6 Oregon                     97.1
##  7 Delaware                   97.3
##  8 Massachusetts              97.5
##  9 Maryland                   97.6
## 10 District of Columbia       97.6
## # ... with 41 more rows
acs %>%
   mutate(acs, percent_all_races = hispanic + white + black + native + asian + pacific) %>%
  select(state,county,percent_all_races) %>%
  filter(percent_all_races > 100.0) %>%
  arrange(desc(percent_all_races))
## # A tibble: 11 x 3
##    state       county    percent_all_races
##    <chr>       <chr>                 <dbl>
##  1 Nebraska    Gosper                 100.
##  2 Nebraska    Hooker                 100.
##  3 Texas       Bailey                 100.
##  4 Texas       Edwards                100.
##  5 Nebraska    Nance                  100.
##  6 Mississippi Claiborne              100.
##  7 Texas       Duval                  100.
##  8 Texas       Kenedy                 100.
##  9 Texas       Kent                   100.
## 10 Texas       Presidio               100.
## 11 Utah        Beaver                 100.
acs %>%
  mutate(acs, percent_all_races = hispanic + white + black + native + asian + pacific) %>%
  select(county,percent_all_races) %>%
  group_by(county) %>%
  summarise(county_mean = mean(percent_all_races,na.rm = TRUE)) %>%
  arrange(county_mean)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 1,851 x 2
##    county                   county_mean
##    <chr>                          <dbl>
##  1 Hawaii                          76.4
##  2 Maui                            79.2
##  3 Mayes                           79.7
##  4 Honolulu                        81.5
##  5 Yakutat City and Borough        83.4
##  6 Kauai                           84.1
##  7 Alfalfa                         85.1
##  8 Sequoyah                        86.4
##  9 Coal                            86.6
## 10 Bennett                         86.9
## # ... with 1,841 more rows

####A The top 10 counties with the lowest sum of percentages are Hawaii, Maui, Mayes, Honolulu, Pontotoc, Grundy, Yakutat City and Borough, Johnston, Kauai, Alfalfa county.

####B Hawaii has the lowest percentage of all races with only 84%.

####C Five counties have a sum greater than 100: Gosper, Hooker, Bailey, Edwards, and Nance.

####D Six counties have a sum exactly equal to 100: Claiborne, Duval, Kenedy, Kent, Presidio, and Beaver.

10 Carpool A-E

acs %>%
  select(census_id, state, county, carpool) %>%
  mutate(carpool_rank = min_rank(carpool)) %>%
  arrange(carpool_rank)
## # A tibble: 3,142 x 5
##    census_id state        county   carpool carpool_rank
##        <dbl> <chr>        <chr>      <dbl>        <int>
##  1     48261 Texas        Kenedy       0              1
##  2     48269 Texas        King         0              1
##  3     48235 Texas        Irion        0.9            3
##  4     31183 Nebraska     Wheeler      1.3            4
##  5     36061 New York     New York     1.9            5
##  6     13309 Georgia      Wheeler      2.3            6
##  7     38029 North Dakota Emmons       2.3            6
##  8     30019 Montana      Daniels      2.6            8
##  9     31057 Nebraska     Dundy        2.6            8
## 10     46069 South Dakota Hyde         2.8           10
## # ... with 3,132 more rows
acs %>%
  select(census_id, state, county, carpool) %>%
  mutate(carpool_rank = min_rank(carpool)) %>%
  arrange(desc(carpool_rank))
## # A tibble: 3,142 x 5
##    census_id state    county   carpool carpool_rank
##        <dbl> <chr>    <chr>      <dbl>        <int>
##  1     13061 Georgia  Clay        29.9         3142
##  2     18087 Indiana  LaGrange    27           3141
##  3     13165 Georgia  Jenkins     25.3         3140
##  4      5133 Arkansas Sevier      24.4         3139
##  5     20175 Kansas   Seward      23.4         3138
##  6     48079 Texas    Cochran     22.8         3137
##  7     48247 Texas    Jim Hogg    22.6         3136
##  8     48393 Texas    Roberts     22.4         3135
##  9     39075 Ohio     Holmes      21.8         3134
## 10     21197 Kentucky Powell      21.6         3133
## # ... with 3,132 more rows
acs %>%
  mutate(state_carpool_rank = min_rank(carpool)) %>%
  select(state, state_carpool_rank) %>%
  group_by(state) %>%
  summarise(mean_carpool_rank = mean(state_carpool_rank, na.rm = TRUE)) %>%
  arrange(mean_carpool_rank)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 51 x 2
##    state                mean_carpool_rank
##    <chr>                            <dbl>
##  1 District of Columbia               72 
##  2 Massachusetts                     540.
##  3 Connecticut                       604.
##  4 Rhode Island                      670.
##  5 New Jersey                        687.
##  6 Ohio                              825.
##  7 Delaware                          831.
##  8 New Hampshire                     870.
##  9 New York                         1019.
## 10 Wisconsin                        1112.
## # ... with 41 more rows

B

The ten highest ranked counties for carpooling are: Kenedy, King, Irion, Wheeler, New York, Wheeler, Emmons, Daniels, Dundy, and Hyde.

C

The ten lowest ranked counties for carpooling are: Clay, LaGrange, Jenkins, Sevier, Seward, Cochran, Jim Hogg, Roberts, Holmes, and Powell.

D

District of Columbia is the top ranked state for carpooling.

E

The top 5 states are District of Columbia, Massachusetts, Connecticut, Rhode Island, and New Jersey.