Assignment 3

3,142 rows and 35 columns

#load tidyverse library
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

#import data into a new variable
county_data <- read_csv("data/acs_2015_county_data_revised.csv")

## Rows: 3142 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): state, county
## dbl (33): census_id, total_pop, men, women, hispanic, white, black, native, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#use str to find observations and variables
glimpse(county_data)

## Rows: 3,142
## Columns: 35
## $ census_id      <dbl> 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 1017, 1…
## $ state          <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", …
## $ county         <chr> "Autauga", "Baldwin", "Barbour", "Bibb", "Blount", "Bul…
## $ total_pop      <dbl> 55221, 195121, 26932, 22604, 57710, 10678, 20354, 11664…
## $ men            <dbl> 26745, 95314, 14497, 12073, 28512, 5660, 9502, 56274, 1…
## $ women          <dbl> 28476, 99807, 12435, 10531, 29198, 5018, 10852, 60374, …
## $ hispanic       <dbl> 2.6, 4.5, 4.6, 2.2, 8.6, 4.4, 1.2, 3.5, 0.4, 1.5, 7.6, …
## $ white          <dbl> 75.8, 83.1, 46.2, 74.5, 87.9, 22.2, 53.3, 73.0, 57.3, 9…
## $ black          <dbl> 18.5, 9.5, 46.7, 21.4, 1.5, 70.7, 43.8, 20.3, 40.3, 4.8…
## $ native         <dbl> 0.4, 0.6, 0.2, 0.4, 0.3, 1.2, 0.1, 0.2, 0.2, 0.6, 0.4, …
## $ asian          <dbl> 1.0, 0.7, 0.4, 0.1, 0.1, 0.2, 0.4, 0.9, 0.8, 0.3, 0.3, …
## $ pacific        <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, …
## $ citizen        <dbl> 40725, 147695, 20714, 17495, 42345, 8057, 15581, 88612,…
## $ income         <dbl> 51281, 50254, 32964, 38678, 45813, 31938, 32229, 41703,…
## $ income_per_cap <dbl> 24974, 27317, 16824, 18431, 20532, 17580, 18390, 21374,…
## $ poverty        <dbl> 12.9, 13.4, 26.7, 16.8, 16.7, 24.6, 25.4, 20.5, 21.6, 1…
## $ child_poverty  <dbl> 18.6, 19.2, 45.3, 27.9, 27.2, 38.4, 39.2, 31.6, 37.2, 3…
## $ professional   <dbl> 33.2, 33.1, 26.8, 21.5, 28.5, 18.8, 27.5, 27.3, 23.3, 2…
## $ service        <dbl> 17.0, 17.7, 16.1, 17.9, 14.1, 15.0, 16.6, 17.7, 14.5, 1…
## $ office         <dbl> 24.2, 27.1, 23.1, 17.8, 23.9, 19.7, 21.9, 24.2, 26.3, 1…
## $ construction   <dbl> 8.6, 10.8, 10.8, 19.0, 13.5, 20.1, 10.3, 10.5, 11.5, 13…
## $ production     <dbl> 17.1, 11.2, 23.1, 23.7, 19.9, 26.4, 23.7, 20.4, 24.4, 2…
## $ drive          <dbl> 87.5, 84.7, 83.8, 83.2, 84.9, 74.9, 84.5, 85.3, 85.1, 8…
## $ carpool        <dbl> 8.8, 8.8, 10.9, 13.5, 11.2, 14.9, 12.4, 9.4, 11.9, 12.1…
## $ transit        <dbl> 0.1, 0.1, 0.4, 0.5, 0.4, 0.7, 0.0, 0.2, 0.2, 0.2, 0.2, …
## $ walk           <dbl> 0.5, 1.0, 1.8, 0.6, 0.9, 5.0, 0.8, 1.2, 0.3, 0.6, 1.1, …
## $ other_transp   <dbl> 1.3, 1.4, 1.5, 1.5, 0.4, 1.7, 0.6, 1.2, 0.4, 0.7, 1.4, …
## $ work_at_home   <dbl> 1.8, 3.9, 1.6, 0.7, 2.3, 2.8, 1.7, 2.7, 2.1, 2.5, 1.9, …
## $ mean_commute   <dbl> 26.5, 26.4, 24.1, 28.8, 34.9, 27.5, 24.6, 24.1, 25.1, 2…
## $ employed       <dbl> 23986, 85953, 8597, 8294, 22189, 3865, 7813, 47401, 136…
## $ private_work   <dbl> 73.6, 81.5, 71.8, 76.8, 82.0, 79.5, 77.4, 74.1, 85.1, 7…
## $ public_work    <dbl> 20.9, 12.3, 20.8, 16.1, 13.5, 15.1, 16.2, 20.8, 12.1, 1…
## $ self_employed  <dbl> 5.5, 5.8, 7.3, 6.7, 4.2, 5.4, 6.2, 5.0, 2.8, 7.9, 4.1, …
## $ family_work    <dbl> 0.0, 0.4, 0.1, 0.4, 0.4, 0.0, 0.2, 0.1, 0.0, 0.5, 0.5, …
## $ unemployment   <dbl> 7.6, 7.5, 17.6, 8.3, 7.7, 18.0, 10.9, 12.3, 8.9, 7.9, 9…

The following columns should be changed into integers: census_id, total_pop, men, women, citizen, income, income_per_cap, and employed

#change the following column data types to integers instead of doubles
county_data$census_id <- as.integer(county_data$census_id)
county_data$total_pop <- as.integer(county_data$total_pop)
county_data$men <- as.integer(county_data$men)
county_data$women <- as.integer(county_data$women)
county_data$citizen <- as.integer(county_data$citizen)
county_data$income <- as.integer(county_data$income)
county_data$income_per_cap <- as.integer(county_data$income_per_cap)
county_data$employed <- as.integer(county_data$employed)

#use glimpse to see changes
glimpse(county_data)

## Rows: 3,142
## Columns: 35
## $ census_id      <int> 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 1017, 1…
## $ state          <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", …
## $ county         <chr> "Autauga", "Baldwin", "Barbour", "Bibb", "Blount", "Bul…
## $ total_pop      <int> 55221, 195121, 26932, 22604, 57710, 10678, 20354, 11664…
## $ men            <int> 26745, 95314, 14497, 12073, 28512, 5660, 9502, 56274, 1…
## $ women          <int> 28476, 99807, 12435, 10531, 29198, 5018, 10852, 60374, …
## $ hispanic       <dbl> 2.6, 4.5, 4.6, 2.2, 8.6, 4.4, 1.2, 3.5, 0.4, 1.5, 7.6, …
## $ white          <dbl> 75.8, 83.1, 46.2, 74.5, 87.9, 22.2, 53.3, 73.0, 57.3, 9…
## $ black          <dbl> 18.5, 9.5, 46.7, 21.4, 1.5, 70.7, 43.8, 20.3, 40.3, 4.8…
## $ native         <dbl> 0.4, 0.6, 0.2, 0.4, 0.3, 1.2, 0.1, 0.2, 0.2, 0.6, 0.4, …
## $ asian          <dbl> 1.0, 0.7, 0.4, 0.1, 0.1, 0.2, 0.4, 0.9, 0.8, 0.3, 0.3, …
## $ pacific        <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, …
## $ citizen        <int> 40725, 147695, 20714, 17495, 42345, 8057, 15581, 88612,…
## $ income         <int> 51281, 50254, 32964, 38678, 45813, 31938, 32229, 41703,…
## $ income_per_cap <int> 24974, 27317, 16824, 18431, 20532, 17580, 18390, 21374,…
## $ poverty        <dbl> 12.9, 13.4, 26.7, 16.8, 16.7, 24.6, 25.4, 20.5, 21.6, 1…
## $ child_poverty  <dbl> 18.6, 19.2, 45.3, 27.9, 27.2, 38.4, 39.2, 31.6, 37.2, 3…
## $ professional   <dbl> 33.2, 33.1, 26.8, 21.5, 28.5, 18.8, 27.5, 27.3, 23.3, 2…
## $ service        <dbl> 17.0, 17.7, 16.1, 17.9, 14.1, 15.0, 16.6, 17.7, 14.5, 1…
## $ office         <dbl> 24.2, 27.1, 23.1, 17.8, 23.9, 19.7, 21.9, 24.2, 26.3, 1…
## $ construction   <dbl> 8.6, 10.8, 10.8, 19.0, 13.5, 20.1, 10.3, 10.5, 11.5, 13…
## $ production     <dbl> 17.1, 11.2, 23.1, 23.7, 19.9, 26.4, 23.7, 20.4, 24.4, 2…
## $ drive          <dbl> 87.5, 84.7, 83.8, 83.2, 84.9, 74.9, 84.5, 85.3, 85.1, 8…
## $ carpool        <dbl> 8.8, 8.8, 10.9, 13.5, 11.2, 14.9, 12.4, 9.4, 11.9, 12.1…
## $ transit        <dbl> 0.1, 0.1, 0.4, 0.5, 0.4, 0.7, 0.0, 0.2, 0.2, 0.2, 0.2, …
## $ walk           <dbl> 0.5, 1.0, 1.8, 0.6, 0.9, 5.0, 0.8, 1.2, 0.3, 0.6, 1.1, …
## $ other_transp   <dbl> 1.3, 1.4, 1.5, 1.5, 0.4, 1.7, 0.6, 1.2, 0.4, 0.7, 1.4, …
## $ work_at_home   <dbl> 1.8, 3.9, 1.6, 0.7, 2.3, 2.8, 1.7, 2.7, 2.1, 2.5, 1.9, …
## $ mean_commute   <dbl> 26.5, 26.4, 24.1, 28.8, 34.9, 27.5, 24.6, 24.1, 25.1, 2…
## $ employed       <int> 23986, 85953, 8597, 8294, 22189, 3865, 7813, 47401, 136…
## $ private_work   <dbl> 73.6, 81.5, 71.8, 76.8, 82.0, 79.5, 77.4, 74.1, 85.1, 7…
## $ public_work    <dbl> 20.9, 12.3, 20.8, 16.1, 13.5, 15.1, 16.2, 20.8, 12.1, 1…
## $ self_employed  <dbl> 5.5, 5.8, 7.3, 6.7, 4.2, 5.4, 6.2, 5.0, 2.8, 7.9, 4.1, …
## $ family_work    <dbl> 0.0, 0.4, 0.1, 0.4, 0.4, 0.0, 0.2, 0.1, 0.0, 0.5, 0.5, …
## $ unemployment   <dbl> 7.6, 7.5, 17.6, 8.3, 7.7, 18.0, 10.9, 12.3, 8.9, 7.9, 9…

There are two null values One in income, and one in child_poverty There were only 2 nulls across 2 observations so removing them would not interfere too drastically with the data

#use colsumns w/ is.na to get the null values per column
colSums(is.na(county_data))

##      census_id          state         county      total_pop            men 
##              0              0              0              0              0 
##          women       hispanic          white          black         native 
##              0              0              0              0              0 
##          asian        pacific        citizen         income income_per_cap 
##              0              0              0              1              0 
##        poverty  child_poverty   professional        service         office 
##              0              1              0              0              0 
##   construction     production          drive        carpool        transit 
##              0              0              0              0              0 
##           walk   other_transp   work_at_home   mean_commute       employed 
##              0              0              0              0              0 
##   private_work    public_work  self_employed    family_work   unemployment 
##              0              0              0              0              0

#remove null
county_data <- na.omit(county_data)

#check work
colSums(is.na(county_data))

##      census_id          state         county      total_pop            men 
##              0              0              0              0              0 
##          women       hispanic          white          black         native 
##              0              0              0              0              0 
##          asian        pacific        citizen         income income_per_cap 
##              0              0              0              0              0 
##        poverty  child_poverty   professional        service         office 
##              0              0              0              0              0 
##   construction     production          drive        carpool        transit 
##              0              0              0              0              0 
##           walk   other_transp   work_at_home   mean_commute       employed 
##              0              0              0              0              0 
##   private_work    public_work  self_employed    family_work   unemployment 
##              0              0              0              0              0

The max for the follwing seem a bit high: unemployed, transit, women, men, total_pop, citizen, income and walk In visualizations I can adjust the the axis to exclude outliers

#use summary to view data functions
summary(county_data)

##    census_id        state              county            total_pop       
##  Min.   : 1001   Length:3140        Length:3140        Min.   :     267  
##  1st Qu.:18179   Class :character   Class :character   1st Qu.:   11036  
##  Median :29176   Mode  :character   Mode  :character   Median :   25793  
##  Mean   :30383                                         Mean   :  100801  
##  3rd Qu.:45080                                         3rd Qu.:   67621  
##  Max.   :56045                                         Max.   :10038388  
##       men              women            hispanic          white      
##  Min.   :    136   Min.   :    131   Min.   : 0.000   Min.   : 0.90  
##  1st Qu.:   5551   1st Qu.:   5488   1st Qu.: 1.900   1st Qu.:65.67  
##  Median :  12838   Median :  12916   Median : 3.700   Median :84.65  
##  Mean   :  49597   Mean   :  51204   Mean   : 8.819   Mean   :77.31  
##  3rd Qu.:  33328   3rd Qu.:  34123   3rd Qu.: 9.000   3rd Qu.:93.33  
##  Max.   :4945351   Max.   :5093037   Max.   :98.700   Max.   :99.80  
##      black            native           asian           pacific        
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.00000  
##  1st Qu.: 0.600   1st Qu.: 0.100   1st Qu.: 0.200   1st Qu.: 0.00000  
##  Median : 2.100   Median : 0.300   Median : 0.500   Median : 0.00000  
##  Mean   : 8.885   Mean   : 1.763   Mean   : 1.253   Mean   : 0.07357  
##  3rd Qu.:10.200   3rd Qu.: 0.600   3rd Qu.: 1.200   3rd Qu.: 0.00000  
##  Max.   :85.900   Max.   :92.100   Max.   :41.600   Max.   :11.10000  
##     citizen            income       income_per_cap     poverty    
##  Min.   :    199   Min.   : 19328   Min.   : 8292   Min.   : 1.4  
##  1st Qu.:   8276   1st Qu.: 38826   1st Qu.:20470   1st Qu.:12.0  
##  Median :  19455   Median : 45095   Median :23575   Median :16.0  
##  Mean   :  70849   Mean   : 46824   Mean   :24331   Mean   :16.7  
##  3rd Qu.:  50795   3rd Qu.: 52248   3rd Qu.:27138   3rd Qu.:20.3  
##  Max.   :6046749   Max.   :123453   Max.   :65600   Max.   :53.3  
##  child_poverty    professional      service          office     
##  Min.   : 0.00   Min.   :13.50   Min.   : 5.00   Min.   : 4.10  
##  1st Qu.:16.10   1st Qu.:26.70   1st Qu.:15.90   1st Qu.:20.20  
##  Median :22.50   Median :30.00   Median :18.00   Median :22.40  
##  Mean   :23.29   Mean   :31.05   Mean   :18.25   Mean   :22.13  
##  3rd Qu.:29.50   3rd Qu.:34.42   3rd Qu.:20.20   3rd Qu.:24.30  
##  Max.   :72.30   Max.   :74.00   Max.   :36.60   Max.   :35.40  
##   construction     production        drive         carpool     
##  Min.   : 1.70   Min.   : 0.00   Min.   : 5.2   Min.   : 0.00  
##  1st Qu.: 9.80   1st Qu.:11.50   1st Qu.:76.6   1st Qu.: 8.50  
##  Median :12.20   Median :15.40   Median :80.6   Median : 9.90  
##  Mean   :12.75   Mean   :15.82   Mean   :79.1   Mean   :10.33  
##  3rd Qu.:15.00   3rd Qu.:19.40   3rd Qu.:83.6   3rd Qu.:11.90  
##  Max.   :40.30   Max.   :55.60   Max.   :94.6   Max.   :29.90  
##     transit             walk         other_transp    work_at_home   
##  Min.   : 0.0000   Min.   : 0.000   Min.   : 0.00   Min.   : 0.000  
##  1st Qu.: 0.1000   1st Qu.: 1.400   1st Qu.: 0.90   1st Qu.: 2.800  
##  Median : 0.4000   Median : 2.400   Median : 1.30   Median : 4.000  
##  Mean   : 0.9681   Mean   : 3.294   Mean   : 1.61   Mean   : 4.694  
##  3rd Qu.: 0.8000   3rd Qu.: 4.000   3rd Qu.: 1.90   3rd Qu.: 5.700  
##  Max.   :61.7000   Max.   :71.200   Max.   :39.10   Max.   :37.200  
##   mean_commute      employed        private_work    public_work   
##  Min.   : 4.90   Min.   :    166   Min.   :29.50   Min.   : 5.80  
##  1st Qu.:19.30   1st Qu.:   4532   1st Qu.:70.90   1st Qu.:13.07  
##  Median :22.90   Median :  10657   Median :75.85   Median :16.10  
##  Mean   :23.15   Mean   :  46416   Mean   :74.45   Mean   :17.33  
##  3rd Qu.:26.60   3rd Qu.:  29272   3rd Qu.:79.80   3rd Qu.:20.10  
##  Max.   :44.00   Max.   :4635465   Max.   :88.30   Max.   :66.20  
##  self_employed     family_work      unemployment   
##  Min.   : 0.000   Min.   :0.0000   Min.   : 0.000  
##  1st Qu.: 5.400   1st Qu.:0.1000   1st Qu.: 5.500  
##  Median : 6.900   Median :0.2000   Median : 7.500  
##  Mean   : 7.922   Mean   :0.2917   Mean   : 7.815  
##  3rd Qu.: 9.400   3rd Qu.:0.3000   3rd Qu.: 9.700  
##  Max.   :36.600   Max.   :9.8000   Max.   :29.400

1,984 counties have more women than men

#declare variables for counting the variables that are greater
#declare i as one to iterate through the rows
more_women <- 0
i <- 1

#use a while loop to iterate each row
#use nrow to get the number of rows to iterate through
while(i < nrow(county_data)) {
  men_count <- county_data$men[i]
  women_count <- county_data$women[i]
  
    if(women_count > men_count){
      more_women <- more_women +1
    }
  
  i <-  i + 1
}

#display the count
print(more_women)

## [1] 1984

2,418 counties have unemployment less than 10%

#declare variables for counting the unemployment
#declare i as one to iterate through the rows
unemp <- 0
i <- 1

#use a while loop to iterate each row
#use nrow to get the number of rows to iterate through
while(i < nrow(county_data)) {

    if(county_data$unemployment[i] < 10){
      unemp <- unemp +1
    }
  
  i <-  i + 1
}

#display the count
print(unemp)

## [1] 2418

The 10 counties with the highest commute are : Conecuh, Allendale, Quitman, Crowley, Humphreys, Corson, Ziebach, Oglala Lakota, Todd, Dewey, Kusilvak Census Area

#load the dplyr package
library(dplyr)

#use county data, arrange by mean commute desc
#select the top ten, and certain columns
county_data %>% 
  arrange(desc(mean_commute)) %>% 
  top_n(10) %>% 
  select(census_id, county, state, mean_commute)

## Selecting by unemployment

## # A tibble: 11 × 4
##    census_id county               state          mean_commute
##        <int> <chr>                <chr>                 <dbl>
##  1      1035 Conecuh              Alabama                29.7
##  2     45005 Allendale            South Carolina         27.5
##  3     28119 Quitman              Mississippi            23.7
##  4      8025 Crowley              Colorado               21.9
##  5     28053 Humphreys            Mississippi            21.4
##  6     46031 Corson               South Dakota           19.7
##  7     46137 Ziebach              South Dakota           19.6
##  8     46102 Oglala Lakota        South Dakota           17.7
##  9     46121 Todd                 South Dakota           16.5
## 10     46041 Dewey                South Dakota           16.3
## 11      2158 Kusilvak Census Area Alaska                  5.9

#create new variabel for percentage of women
county_data$percent_women <- 0.0

i <- 1

#use a while loop to iterate each row
#use nrow to get the number of rows to iterate through
while(i < nrow(county_data)) {

  #put the values to be used in new variables
  pop_count <- county_data$total_pop[i]
  women_count <- county_data$women[i]
  
  #calculate and set the variable
  county_data$percent_women[i] <- ( women_count/ pop_count) * 100
 
  i <-  i + 1
  
}

#show new data
county_data %>% 
  top_n(10) %>% 
  arrange(percent_women) %>% 
  select(census_id, county, state, percent_women)

## Selecting by percent_women

## # A tibble: 10 × 4
##    census_id county        state       percent_women
##        <int> <chr>         <chr>               <dbl>
##  1     29117 Livingston    Missouri             54.9
##  2     35011 De Baca       New Mexico           55.1
##  3     51790 Staunton city Virginia             55.1
##  4     48137 Edwards       Texas                55.2
##  5     51091 Highland      Virginia             55.3
##  6     51620 Franklin city Virginia             55.5
##  7     28125 Sharkey       Mississippi          55.5
##  8      1119 Sumter        Alabama              55.7
##  9     13235 Pulaski       Georgia              58.0
## 10     51720 Norton city   Virginia             59.4

1. Claiborne, Duval, Kenedy, Kent, Presidio, Beaver, Nance, Hooker, Bailey, Edwards, Gosper
2. Texas
3. Nance, Hooker, Bailey, Edwards, Gosper
4. 3

#create new variable for  total race
county_data$total_race <- 0.0

i <- 1

#use a while loop to iterate each row
#use nrow to get the number of rows to iterate through
while (i < nrow(county_data)) {

  #put the values to be used in new variables
  hisp_count <- county_data$hispanic[i]
  white_count <- county_data$white[i]
  black_count <- county_data$black[i]
  native_count <- county_data$native[i]
  asian_count <- county_data$asian[i]
  pacific_count <- county_data$pacific[i]
  
  #calculate and set the variable
  county_data$total_race[i] <- hisp_count + white_count + black_count + native_count + asian_count + pacific_count
 
  i <-  i + 1
  
}

#show new data
county_data %>% 
  top_n(10) %>% 
  arrange(total_race) %>% 
  select(census_id, county, state, total_race)

## Selecting by total_race

## # A tibble: 11 × 4
##    census_id county    state       total_race
##        <int> <chr>     <chr>            <dbl>
##  1     28021 Claiborne Mississippi       100 
##  2     48131 Duval     Texas             100 
##  3     48261 Kenedy    Texas             100 
##  4     48263 Kent      Texas             100 
##  5     48377 Presidio  Texas             100 
##  6     49001 Beaver    Utah              100 
##  7     31125 Nance     Nebraska          100.
##  8     31091 Hooker    Nebraska          100.
##  9     48017 Bailey    Texas             100.
## 10     48137 Edwards   Texas             100.
## 11     31073 Gosper    Nebraska          100.

#create new variable assigned to rank
county_data$carpool_rank <- min_rank(desc(county_data$carpool))

Clay, LaGrange, Jenkins, Sevier, Seward, Cochran, Jim Hogg, Robert, Holmes, Powell

#read documentation
?min_rank()

## starting httpd help server ... done

#the top_n function caused the output to be out of order
county_data %>% 
  # top_n(10) %>% 
  arrange(carpool_rank) %>% 
  select(census_id, county, state, carpool, carpool_rank)

## # A tibble: 3,140 × 5
##    census_id county   state    carpool carpool_rank
##        <int> <chr>    <chr>      <dbl>        <int>
##  1     13061 Clay     Georgia     29.9            1
##  2     18087 LaGrange Indiana     27              2
##  3     13165 Jenkins  Georgia     25.3            3
##  4      5133 Sevier   Arkansas    24.4            4
##  5     20175 Seward   Kansas      23.4            5
##  6     48079 Cochran  Texas       22.8            6
##  7     48247 Jim Hogg Texas       22.6            7
##  8     48393 Roberts  Texas       22.4            8
##  9     39075 Holmes   Ohio        21.8            9
## 10     21197 Powell   Kentucky    21.6           10
## # ℹ 3,130 more rows

Kennedy, King, Irion, Wheeler(Nebraska), New York, Wheeler(Georgia), Emmons, Daniels, Dundy, Hyde

#read documentation
?min_rank()

#the top_n function caused the output to be out of order
county_data %>% 
  # top_n(10) %>% 
  arrange(desc(carpool_rank)) %>% 
  select(census_id, county, state, carpool, carpool_rank)

## # A tibble: 3,140 × 5
##    census_id county   state        carpool carpool_rank
##        <int> <chr>    <chr>          <dbl>        <int>
##  1     48261 Kenedy   Texas            0           3139
##  2     48269 King     Texas            0           3139
##  3     48235 Irion    Texas            0.9         3138
##  4     31183 Wheeler  Nebraska         1.3         3137
##  5     36061 New York New York         1.9         3136
##  6     13309 Wheeler  Georgia          2.3         3134
##  7     38029 Emmons   North Dakota     2.3         3134
##  8     30019 Daniels  Montana          2.6         3132
##  9     31057 Dundy    Nebraska         2.6         3132
## 10     46069 Hyde     South Dakota     2.8         3130
## # ℹ 3,130 more rows

Texas

#use less than 50 as the top ranked, find the states that are most in the table
#which.max doesnt work here
county_data %>% 
  filter(carpool_rank < 50) %>% 
  arrange(desc(carpool_rank))

## # A tibble: 52 × 38
##    census_id state      county total_pop   men women hispanic white black native
##        <int> <chr>      <chr>      <int> <int> <int>    <dbl> <dbl> <dbl>  <dbl>
##  1      2130 Alaska     Ketch…     13699  7038  6661      4.8  65     0.4   13.6
##  2     12027 Florida    DeSoto     34957 19756 15201     30.5  55.2  12.9    0.1
##  3     12067 Florida    Lafay…      8801  5265  3536     12    69.4  17.7    0  
##  4     29017 Missouri   Bolli…     12356  6184  6172      0.9  96.9   0.2    0.1
##  5     29151 Missouri   Osage      13758  7190  6568      0.7  97.6   0.8    0.3
##  6     46121 South Dak… Todd        9942  4862  5080      3.7  10     0.3   74.4
##  7     49045 Utah       Tooele     60893 30737 30156     11.8  83.7   0.6    1.1
##  8      5127 Arkansas   Scott      10870  5557  5313      7.5  84.4   0.2    1.6
##  9     16007 Idaho      Bear …      5939  2981  2958      4.1  93.8   0.5    0.1
## 10     29059 Missouri   Dallas     16564  8284  8280      1.8  94.6   0.1    0.2
## # ℹ 42 more rows
## # ℹ 28 more variables: asian <dbl>, pacific <dbl>, citizen <int>, income <int>,
## #   income_per_cap <int>, poverty <dbl>, child_poverty <dbl>,
## #   professional <dbl>, service <dbl>, office <dbl>, construction <dbl>,
## #   production <dbl>, drive <dbl>, carpool <dbl>, transit <dbl>, walk <dbl>,
## #   other_transp <dbl>, work_at_home <dbl>, mean_commute <dbl>, employed <int>,
## #   private_work <dbl>, public_work <dbl>, self_employed <dbl>, …

  # names(which.max(table(county_data$state)))

#returns Texas
names(which.max(table(county_data$state)))

## [1] "Texas"

Georgia, Indiana, Texas, Arkansas, Kansas

Assignment 3

Oyindamola Okunoye

2025-09-30