- 3,142 rows and 35 columns
#load tidyverse library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#import data into a new variable
county_data <- read_csv("data/acs_2015_county_data_revised.csv")
## Rows: 3142 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): state, county
## dbl (33): census_id, total_pop, men, women, hispanic, white, black, native, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#use str to find observations and variables
glimpse(county_data)
## Rows: 3,142
## Columns: 35
## $ census_id <dbl> 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 1017, 1…
## $ state <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", …
## $ county <chr> "Autauga", "Baldwin", "Barbour", "Bibb", "Blount", "Bul…
## $ total_pop <dbl> 55221, 195121, 26932, 22604, 57710, 10678, 20354, 11664…
## $ men <dbl> 26745, 95314, 14497, 12073, 28512, 5660, 9502, 56274, 1…
## $ women <dbl> 28476, 99807, 12435, 10531, 29198, 5018, 10852, 60374, …
## $ hispanic <dbl> 2.6, 4.5, 4.6, 2.2, 8.6, 4.4, 1.2, 3.5, 0.4, 1.5, 7.6, …
## $ white <dbl> 75.8, 83.1, 46.2, 74.5, 87.9, 22.2, 53.3, 73.0, 57.3, 9…
## $ black <dbl> 18.5, 9.5, 46.7, 21.4, 1.5, 70.7, 43.8, 20.3, 40.3, 4.8…
## $ native <dbl> 0.4, 0.6, 0.2, 0.4, 0.3, 1.2, 0.1, 0.2, 0.2, 0.6, 0.4, …
## $ asian <dbl> 1.0, 0.7, 0.4, 0.1, 0.1, 0.2, 0.4, 0.9, 0.8, 0.3, 0.3, …
## $ pacific <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, …
## $ citizen <dbl> 40725, 147695, 20714, 17495, 42345, 8057, 15581, 88612,…
## $ income <dbl> 51281, 50254, 32964, 38678, 45813, 31938, 32229, 41703,…
## $ income_per_cap <dbl> 24974, 27317, 16824, 18431, 20532, 17580, 18390, 21374,…
## $ poverty <dbl> 12.9, 13.4, 26.7, 16.8, 16.7, 24.6, 25.4, 20.5, 21.6, 1…
## $ child_poverty <dbl> 18.6, 19.2, 45.3, 27.9, 27.2, 38.4, 39.2, 31.6, 37.2, 3…
## $ professional <dbl> 33.2, 33.1, 26.8, 21.5, 28.5, 18.8, 27.5, 27.3, 23.3, 2…
## $ service <dbl> 17.0, 17.7, 16.1, 17.9, 14.1, 15.0, 16.6, 17.7, 14.5, 1…
## $ office <dbl> 24.2, 27.1, 23.1, 17.8, 23.9, 19.7, 21.9, 24.2, 26.3, 1…
## $ construction <dbl> 8.6, 10.8, 10.8, 19.0, 13.5, 20.1, 10.3, 10.5, 11.5, 13…
## $ production <dbl> 17.1, 11.2, 23.1, 23.7, 19.9, 26.4, 23.7, 20.4, 24.4, 2…
## $ drive <dbl> 87.5, 84.7, 83.8, 83.2, 84.9, 74.9, 84.5, 85.3, 85.1, 8…
## $ carpool <dbl> 8.8, 8.8, 10.9, 13.5, 11.2, 14.9, 12.4, 9.4, 11.9, 12.1…
## $ transit <dbl> 0.1, 0.1, 0.4, 0.5, 0.4, 0.7, 0.0, 0.2, 0.2, 0.2, 0.2, …
## $ walk <dbl> 0.5, 1.0, 1.8, 0.6, 0.9, 5.0, 0.8, 1.2, 0.3, 0.6, 1.1, …
## $ other_transp <dbl> 1.3, 1.4, 1.5, 1.5, 0.4, 1.7, 0.6, 1.2, 0.4, 0.7, 1.4, …
## $ work_at_home <dbl> 1.8, 3.9, 1.6, 0.7, 2.3, 2.8, 1.7, 2.7, 2.1, 2.5, 1.9, …
## $ mean_commute <dbl> 26.5, 26.4, 24.1, 28.8, 34.9, 27.5, 24.6, 24.1, 25.1, 2…
## $ employed <dbl> 23986, 85953, 8597, 8294, 22189, 3865, 7813, 47401, 136…
## $ private_work <dbl> 73.6, 81.5, 71.8, 76.8, 82.0, 79.5, 77.4, 74.1, 85.1, 7…
## $ public_work <dbl> 20.9, 12.3, 20.8, 16.1, 13.5, 15.1, 16.2, 20.8, 12.1, 1…
## $ self_employed <dbl> 5.5, 5.8, 7.3, 6.7, 4.2, 5.4, 6.2, 5.0, 2.8, 7.9, 4.1, …
## $ family_work <dbl> 0.0, 0.4, 0.1, 0.4, 0.4, 0.0, 0.2, 0.1, 0.0, 0.5, 0.5, …
## $ unemployment <dbl> 7.6, 7.5, 17.6, 8.3, 7.7, 18.0, 10.9, 12.3, 8.9, 7.9, 9…
- The following columns should be changed into integers: census_id,
total_pop, men, women, citizen, income, income_per_cap, and
employed
#change the following column data types to integers instead of doubles
county_data$census_id <- as.integer(county_data$census_id)
county_data$total_pop <- as.integer(county_data$total_pop)
county_data$men <- as.integer(county_data$men)
county_data$women <- as.integer(county_data$women)
county_data$citizen <- as.integer(county_data$citizen)
county_data$income <- as.integer(county_data$income)
county_data$income_per_cap <- as.integer(county_data$income_per_cap)
county_data$employed <- as.integer(county_data$employed)
#use glimpse to see changes
glimpse(county_data)
## Rows: 3,142
## Columns: 35
## $ census_id <int> 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 1017, 1…
## $ state <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", …
## $ county <chr> "Autauga", "Baldwin", "Barbour", "Bibb", "Blount", "Bul…
## $ total_pop <int> 55221, 195121, 26932, 22604, 57710, 10678, 20354, 11664…
## $ men <int> 26745, 95314, 14497, 12073, 28512, 5660, 9502, 56274, 1…
## $ women <int> 28476, 99807, 12435, 10531, 29198, 5018, 10852, 60374, …
## $ hispanic <dbl> 2.6, 4.5, 4.6, 2.2, 8.6, 4.4, 1.2, 3.5, 0.4, 1.5, 7.6, …
## $ white <dbl> 75.8, 83.1, 46.2, 74.5, 87.9, 22.2, 53.3, 73.0, 57.3, 9…
## $ black <dbl> 18.5, 9.5, 46.7, 21.4, 1.5, 70.7, 43.8, 20.3, 40.3, 4.8…
## $ native <dbl> 0.4, 0.6, 0.2, 0.4, 0.3, 1.2, 0.1, 0.2, 0.2, 0.6, 0.4, …
## $ asian <dbl> 1.0, 0.7, 0.4, 0.1, 0.1, 0.2, 0.4, 0.9, 0.8, 0.3, 0.3, …
## $ pacific <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, …
## $ citizen <int> 40725, 147695, 20714, 17495, 42345, 8057, 15581, 88612,…
## $ income <int> 51281, 50254, 32964, 38678, 45813, 31938, 32229, 41703,…
## $ income_per_cap <int> 24974, 27317, 16824, 18431, 20532, 17580, 18390, 21374,…
## $ poverty <dbl> 12.9, 13.4, 26.7, 16.8, 16.7, 24.6, 25.4, 20.5, 21.6, 1…
## $ child_poverty <dbl> 18.6, 19.2, 45.3, 27.9, 27.2, 38.4, 39.2, 31.6, 37.2, 3…
## $ professional <dbl> 33.2, 33.1, 26.8, 21.5, 28.5, 18.8, 27.5, 27.3, 23.3, 2…
## $ service <dbl> 17.0, 17.7, 16.1, 17.9, 14.1, 15.0, 16.6, 17.7, 14.5, 1…
## $ office <dbl> 24.2, 27.1, 23.1, 17.8, 23.9, 19.7, 21.9, 24.2, 26.3, 1…
## $ construction <dbl> 8.6, 10.8, 10.8, 19.0, 13.5, 20.1, 10.3, 10.5, 11.5, 13…
## $ production <dbl> 17.1, 11.2, 23.1, 23.7, 19.9, 26.4, 23.7, 20.4, 24.4, 2…
## $ drive <dbl> 87.5, 84.7, 83.8, 83.2, 84.9, 74.9, 84.5, 85.3, 85.1, 8…
## $ carpool <dbl> 8.8, 8.8, 10.9, 13.5, 11.2, 14.9, 12.4, 9.4, 11.9, 12.1…
## $ transit <dbl> 0.1, 0.1, 0.4, 0.5, 0.4, 0.7, 0.0, 0.2, 0.2, 0.2, 0.2, …
## $ walk <dbl> 0.5, 1.0, 1.8, 0.6, 0.9, 5.0, 0.8, 1.2, 0.3, 0.6, 1.1, …
## $ other_transp <dbl> 1.3, 1.4, 1.5, 1.5, 0.4, 1.7, 0.6, 1.2, 0.4, 0.7, 1.4, …
## $ work_at_home <dbl> 1.8, 3.9, 1.6, 0.7, 2.3, 2.8, 1.7, 2.7, 2.1, 2.5, 1.9, …
## $ mean_commute <dbl> 26.5, 26.4, 24.1, 28.8, 34.9, 27.5, 24.6, 24.1, 25.1, 2…
## $ employed <int> 23986, 85953, 8597, 8294, 22189, 3865, 7813, 47401, 136…
## $ private_work <dbl> 73.6, 81.5, 71.8, 76.8, 82.0, 79.5, 77.4, 74.1, 85.1, 7…
## $ public_work <dbl> 20.9, 12.3, 20.8, 16.1, 13.5, 15.1, 16.2, 20.8, 12.1, 1…
## $ self_employed <dbl> 5.5, 5.8, 7.3, 6.7, 4.2, 5.4, 6.2, 5.0, 2.8, 7.9, 4.1, …
## $ family_work <dbl> 0.0, 0.4, 0.1, 0.4, 0.4, 0.0, 0.2, 0.1, 0.0, 0.5, 0.5, …
## $ unemployment <dbl> 7.6, 7.5, 17.6, 8.3, 7.7, 18.0, 10.9, 12.3, 8.9, 7.9, 9…
- There are two null values One in income, and one in child_poverty
There were only 2 nulls across 2 observations so removing them would not
interfere too drastically with the data
#use colsumns w/ is.na to get the null values per column
colSums(is.na(county_data))
## census_id state county total_pop men
## 0 0 0 0 0
## women hispanic white black native
## 0 0 0 0 0
## asian pacific citizen income income_per_cap
## 0 0 0 1 0
## poverty child_poverty professional service office
## 0 1 0 0 0
## construction production drive carpool transit
## 0 0 0 0 0
## walk other_transp work_at_home mean_commute employed
## 0 0 0 0 0
## private_work public_work self_employed family_work unemployment
## 0 0 0 0 0
#remove null
county_data <- na.omit(county_data)
#check work
colSums(is.na(county_data))
## census_id state county total_pop men
## 0 0 0 0 0
## women hispanic white black native
## 0 0 0 0 0
## asian pacific citizen income income_per_cap
## 0 0 0 0 0
## poverty child_poverty professional service office
## 0 0 0 0 0
## construction production drive carpool transit
## 0 0 0 0 0
## walk other_transp work_at_home mean_commute employed
## 0 0 0 0 0
## private_work public_work self_employed family_work unemployment
## 0 0 0 0 0
- The max for the follwing seem a bit high: unemployed, transit,
women, men, total_pop, citizen, income and walk In visualizations I can
adjust the the axis to exclude outliers
#use summary to view data functions
summary(county_data)
## census_id state county total_pop
## Min. : 1001 Length:3140 Length:3140 Min. : 267
## 1st Qu.:18179 Class :character Class :character 1st Qu.: 11036
## Median :29176 Mode :character Mode :character Median : 25793
## Mean :30383 Mean : 100801
## 3rd Qu.:45080 3rd Qu.: 67621
## Max. :56045 Max. :10038388
## men women hispanic white
## Min. : 136 Min. : 131 Min. : 0.000 Min. : 0.90
## 1st Qu.: 5551 1st Qu.: 5488 1st Qu.: 1.900 1st Qu.:65.67
## Median : 12838 Median : 12916 Median : 3.700 Median :84.65
## Mean : 49597 Mean : 51204 Mean : 8.819 Mean :77.31
## 3rd Qu.: 33328 3rd Qu.: 34123 3rd Qu.: 9.000 3rd Qu.:93.33
## Max. :4945351 Max. :5093037 Max. :98.700 Max. :99.80
## black native asian pacific
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.00000
## 1st Qu.: 0.600 1st Qu.: 0.100 1st Qu.: 0.200 1st Qu.: 0.00000
## Median : 2.100 Median : 0.300 Median : 0.500 Median : 0.00000
## Mean : 8.885 Mean : 1.763 Mean : 1.253 Mean : 0.07357
## 3rd Qu.:10.200 3rd Qu.: 0.600 3rd Qu.: 1.200 3rd Qu.: 0.00000
## Max. :85.900 Max. :92.100 Max. :41.600 Max. :11.10000
## citizen income income_per_cap poverty
## Min. : 199 Min. : 19328 Min. : 8292 Min. : 1.4
## 1st Qu.: 8276 1st Qu.: 38826 1st Qu.:20470 1st Qu.:12.0
## Median : 19455 Median : 45095 Median :23575 Median :16.0
## Mean : 70849 Mean : 46824 Mean :24331 Mean :16.7
## 3rd Qu.: 50795 3rd Qu.: 52248 3rd Qu.:27138 3rd Qu.:20.3
## Max. :6046749 Max. :123453 Max. :65600 Max. :53.3
## child_poverty professional service office
## Min. : 0.00 Min. :13.50 Min. : 5.00 Min. : 4.10
## 1st Qu.:16.10 1st Qu.:26.70 1st Qu.:15.90 1st Qu.:20.20
## Median :22.50 Median :30.00 Median :18.00 Median :22.40
## Mean :23.29 Mean :31.05 Mean :18.25 Mean :22.13
## 3rd Qu.:29.50 3rd Qu.:34.42 3rd Qu.:20.20 3rd Qu.:24.30
## Max. :72.30 Max. :74.00 Max. :36.60 Max. :35.40
## construction production drive carpool
## Min. : 1.70 Min. : 0.00 Min. : 5.2 Min. : 0.00
## 1st Qu.: 9.80 1st Qu.:11.50 1st Qu.:76.6 1st Qu.: 8.50
## Median :12.20 Median :15.40 Median :80.6 Median : 9.90
## Mean :12.75 Mean :15.82 Mean :79.1 Mean :10.33
## 3rd Qu.:15.00 3rd Qu.:19.40 3rd Qu.:83.6 3rd Qu.:11.90
## Max. :40.30 Max. :55.60 Max. :94.6 Max. :29.90
## transit walk other_transp work_at_home
## Min. : 0.0000 Min. : 0.000 Min. : 0.00 Min. : 0.000
## 1st Qu.: 0.1000 1st Qu.: 1.400 1st Qu.: 0.90 1st Qu.: 2.800
## Median : 0.4000 Median : 2.400 Median : 1.30 Median : 4.000
## Mean : 0.9681 Mean : 3.294 Mean : 1.61 Mean : 4.694
## 3rd Qu.: 0.8000 3rd Qu.: 4.000 3rd Qu.: 1.90 3rd Qu.: 5.700
## Max. :61.7000 Max. :71.200 Max. :39.10 Max. :37.200
## mean_commute employed private_work public_work
## Min. : 4.90 Min. : 166 Min. :29.50 Min. : 5.80
## 1st Qu.:19.30 1st Qu.: 4532 1st Qu.:70.90 1st Qu.:13.07
## Median :22.90 Median : 10657 Median :75.85 Median :16.10
## Mean :23.15 Mean : 46416 Mean :74.45 Mean :17.33
## 3rd Qu.:26.60 3rd Qu.: 29272 3rd Qu.:79.80 3rd Qu.:20.10
## Max. :44.00 Max. :4635465 Max. :88.30 Max. :66.20
## self_employed family_work unemployment
## Min. : 0.000 Min. :0.0000 Min. : 0.000
## 1st Qu.: 5.400 1st Qu.:0.1000 1st Qu.: 5.500
## Median : 6.900 Median :0.2000 Median : 7.500
## Mean : 7.922 Mean :0.2917 Mean : 7.815
## 3rd Qu.: 9.400 3rd Qu.:0.3000 3rd Qu.: 9.700
## Max. :36.600 Max. :9.8000 Max. :29.400
- 1,984 counties have more women than men
#declare variables for counting the variables that are greater
#declare i as one to iterate through the rows
more_women <- 0
i <- 1
#use a while loop to iterate each row
#use nrow to get the number of rows to iterate through
while(i < nrow(county_data)) {
men_count <- county_data$men[i]
women_count <- county_data$women[i]
if(women_count > men_count){
more_women <- more_women +1
}
i <- i + 1
}
#display the count
print(more_women)
## [1] 1984
- 2,418 counties have unemployment less than 10%
#declare variables for counting the unemployment
#declare i as one to iterate through the rows
unemp <- 0
i <- 1
#use a while loop to iterate each row
#use nrow to get the number of rows to iterate through
while(i < nrow(county_data)) {
if(county_data$unemployment[i] < 10){
unemp <- unemp +1
}
i <- i + 1
}
#display the count
print(unemp)
## [1] 2418
- The 10 counties with the highest commute are : Conecuh, Allendale,
Quitman, Crowley, Humphreys, Corson, Ziebach, Oglala Lakota, Todd,
Dewey, Kusilvak Census Area
#load the dplyr package
library(dplyr)
#use county data, arrange by mean commute desc
#select the top ten, and certain columns
county_data %>%
arrange(desc(mean_commute)) %>%
top_n(10) %>%
select(census_id, county, state, mean_commute)
## Selecting by unemployment
## # A tibble: 11 × 4
## census_id county state mean_commute
## <int> <chr> <chr> <dbl>
## 1 1035 Conecuh Alabama 29.7
## 2 45005 Allendale South Carolina 27.5
## 3 28119 Quitman Mississippi 23.7
## 4 8025 Crowley Colorado 21.9
## 5 28053 Humphreys Mississippi 21.4
## 6 46031 Corson South Dakota 19.7
## 7 46137 Ziebach South Dakota 19.6
## 8 46102 Oglala Lakota South Dakota 17.7
## 9 46121 Todd South Dakota 16.5
## 10 46041 Dewey South Dakota 16.3
## 11 2158 Kusilvak Census Area Alaska 5.9
#create new variabel for percentage of women
county_data$percent_women <- 0.0
i <- 1
#use a while loop to iterate each row
#use nrow to get the number of rows to iterate through
while(i < nrow(county_data)) {
#put the values to be used in new variables
pop_count <- county_data$total_pop[i]
women_count <- county_data$women[i]
#calculate and set the variable
county_data$percent_women[i] <- ( women_count/ pop_count) * 100
i <- i + 1
}
#show new data
county_data %>%
top_n(10) %>%
arrange(percent_women) %>%
select(census_id, county, state, percent_women)
## Selecting by percent_women
## # A tibble: 10 × 4
## census_id county state percent_women
## <int> <chr> <chr> <dbl>
## 1 29117 Livingston Missouri 54.9
## 2 35011 De Baca New Mexico 55.1
## 3 51790 Staunton city Virginia 55.1
## 4 48137 Edwards Texas 55.2
## 5 51091 Highland Virginia 55.3
## 6 51620 Franklin city Virginia 55.5
## 7 28125 Sharkey Mississippi 55.5
## 8 1119 Sumter Alabama 55.7
## 9 13235 Pulaski Georgia 58.0
## 10 51720 Norton city Virginia 59.4
- Claiborne, Duval, Kenedy, Kent, Presidio, Beaver, Nance, Hooker,
Bailey, Edwards, Gosper
- Texas
- Nance, Hooker, Bailey, Edwards, Gosper
- 3
#create new variable for total race
county_data$total_race <- 0.0
i <- 1
#use a while loop to iterate each row
#use nrow to get the number of rows to iterate through
while (i < nrow(county_data)) {
#put the values to be used in new variables
hisp_count <- county_data$hispanic[i]
white_count <- county_data$white[i]
black_count <- county_data$black[i]
native_count <- county_data$native[i]
asian_count <- county_data$asian[i]
pacific_count <- county_data$pacific[i]
#calculate and set the variable
county_data$total_race[i] <- hisp_count + white_count + black_count + native_count + asian_count + pacific_count
i <- i + 1
}
#show new data
county_data %>%
top_n(10) %>%
arrange(total_race) %>%
select(census_id, county, state, total_race)
## Selecting by total_race
## # A tibble: 11 × 4
## census_id county state total_race
## <int> <chr> <chr> <dbl>
## 1 28021 Claiborne Mississippi 100
## 2 48131 Duval Texas 100
## 3 48261 Kenedy Texas 100
## 4 48263 Kent Texas 100
## 5 48377 Presidio Texas 100
## 6 49001 Beaver Utah 100
## 7 31125 Nance Nebraska 100.
## 8 31091 Hooker Nebraska 100.
## 9 48017 Bailey Texas 100.
## 10 48137 Edwards Texas 100.
## 11 31073 Gosper Nebraska 100.
#create new variable assigned to rank
county_data$carpool_rank <- min_rank(desc(county_data$carpool))
- Clay, LaGrange, Jenkins, Sevier, Seward, Cochran, Jim Hogg, Robert,
Holmes, Powell
#read documentation
?min_rank()
## starting httpd help server ... done
#the top_n function caused the output to be out of order
county_data %>%
# top_n(10) %>%
arrange(carpool_rank) %>%
select(census_id, county, state, carpool, carpool_rank)
## # A tibble: 3,140 × 5
## census_id county state carpool carpool_rank
## <int> <chr> <chr> <dbl> <int>
## 1 13061 Clay Georgia 29.9 1
## 2 18087 LaGrange Indiana 27 2
## 3 13165 Jenkins Georgia 25.3 3
## 4 5133 Sevier Arkansas 24.4 4
## 5 20175 Seward Kansas 23.4 5
## 6 48079 Cochran Texas 22.8 6
## 7 48247 Jim Hogg Texas 22.6 7
## 8 48393 Roberts Texas 22.4 8
## 9 39075 Holmes Ohio 21.8 9
## 10 21197 Powell Kentucky 21.6 10
## # ℹ 3,130 more rows
- Kennedy, King, Irion, Wheeler(Nebraska), New York, Wheeler(Georgia),
Emmons, Daniels, Dundy, Hyde
#read documentation
?min_rank()
#the top_n function caused the output to be out of order
county_data %>%
# top_n(10) %>%
arrange(desc(carpool_rank)) %>%
select(census_id, county, state, carpool, carpool_rank)
## # A tibble: 3,140 × 5
## census_id county state carpool carpool_rank
## <int> <chr> <chr> <dbl> <int>
## 1 48261 Kenedy Texas 0 3139
## 2 48269 King Texas 0 3139
## 3 48235 Irion Texas 0.9 3138
## 4 31183 Wheeler Nebraska 1.3 3137
## 5 36061 New York New York 1.9 3136
## 6 13309 Wheeler Georgia 2.3 3134
## 7 38029 Emmons North Dakota 2.3 3134
## 8 30019 Daniels Montana 2.6 3132
## 9 31057 Dundy Nebraska 2.6 3132
## 10 46069 Hyde South Dakota 2.8 3130
## # ℹ 3,130 more rows
- Texas
#use less than 50 as the top ranked, find the states that are most in the table
#which.max doesnt work here
county_data %>%
filter(carpool_rank < 50) %>%
arrange(desc(carpool_rank))
## # A tibble: 52 × 38
## census_id state county total_pop men women hispanic white black native
## <int> <chr> <chr> <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 2130 Alaska Ketch… 13699 7038 6661 4.8 65 0.4 13.6
## 2 12027 Florida DeSoto 34957 19756 15201 30.5 55.2 12.9 0.1
## 3 12067 Florida Lafay… 8801 5265 3536 12 69.4 17.7 0
## 4 29017 Missouri Bolli… 12356 6184 6172 0.9 96.9 0.2 0.1
## 5 29151 Missouri Osage 13758 7190 6568 0.7 97.6 0.8 0.3
## 6 46121 South Dak… Todd 9942 4862 5080 3.7 10 0.3 74.4
## 7 49045 Utah Tooele 60893 30737 30156 11.8 83.7 0.6 1.1
## 8 5127 Arkansas Scott 10870 5557 5313 7.5 84.4 0.2 1.6
## 9 16007 Idaho Bear … 5939 2981 2958 4.1 93.8 0.5 0.1
## 10 29059 Missouri Dallas 16564 8284 8280 1.8 94.6 0.1 0.2
## # ℹ 42 more rows
## # ℹ 28 more variables: asian <dbl>, pacific <dbl>, citizen <int>, income <int>,
## # income_per_cap <int>, poverty <dbl>, child_poverty <dbl>,
## # professional <dbl>, service <dbl>, office <dbl>, construction <dbl>,
## # production <dbl>, drive <dbl>, carpool <dbl>, transit <dbl>, walk <dbl>,
## # other_transp <dbl>, work_at_home <dbl>, mean_commute <dbl>, employed <int>,
## # private_work <dbl>, public_work <dbl>, self_employed <dbl>, …
# names(which.max(table(county_data$state)))
#returns Texas
names(which.max(table(county_data$state)))
## [1] "Texas"
- Georgia, Indiana, Texas, Arkansas, Kansas