library(tidyverse)
library(acs)
library(stringr) # to pad fips codes
library(openintro) # Use this as an example to follow when organizing raw data imported from Census.Bureau
Select variables to be used for clustering (ones that are fixed in the short term) The goal is to provide a role model for towns: the town is very similar to us. Why can’t we be like them?! As an experiment, let’s start with pop2010, density, age_over_65, white, bachelors.
Using population density
# Create a new variable, rural
countyComplete$rural <- ifelse(countyComplete$density < 300, "rural", "urban")
#300 excludes three less rural counties in southern NH: justify
countyComplete$rural <- factor(countyComplete$rural)
str(countyComplete)
## 'data.frame': 3143 obs. of 54 variables:
## $ state : Factor w/ 51 levels "Alabama","Alaska",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ name : Factor w/ 1877 levels "Abbeville County",..: 83 90 101 151 166 227 237 250 298 320 ...
## $ FIPS : num 1001 1003 1005 1007 1009 ...
## $ pop2010 : num 54571 182265 27457 22915 57322 ...
## $ pop2000 : num 43671 140415 29038 20826 51024 ...
## $ age_under_5 : num 6.6 6.1 6.2 6 6.3 6.8 6.5 6.1 5.7 5.3 ...
## $ age_under_18 : num 26.8 23 21.9 22.7 24.6 22.3 24.1 22.9 22.5 21.4 ...
## $ age_over_65 : num 12 16.8 14.2 12.7 14.7 13.5 16.7 14.3 16.7 17.9 ...
## $ female : num 51.3 51.1 46.9 46.3 50.5 45.8 53 51.8 52.2 50.4 ...
## $ white : num 78.5 85.7 48 75.8 92.6 23 54.4 74.9 58.8 92.7 ...
## $ black : num 17.7 9.4 46.9 22 1.3 70.2 43.4 20.6 38.7 4.6 ...
## $ native : num 0.4 0.7 0.4 0.3 0.5 0.2 0.3 0.5 0.2 0.5 ...
## $ asian : num 0.9 0.7 0.4 0.1 0.2 0.2 0.8 0.7 0.5 0.2 ...
## $ pac_isl : num NA NA NA NA NA NA 0 0.1 0 0 ...
## $ two_plus_races : num 1.6 1.5 0.9 0.9 1.2 0.8 0.8 1.7 1.1 1.5 ...
## $ hispanic : num 2.4 4.4 5.1 1.8 8.1 7.1 0.9 3.3 1.6 1.2 ...
## $ white_not_hispanic : num 77.2 83.5 46.8 75 88.9 21.9 54.1 73.6 58.1 92.1 ...
## $ no_move_in_one_plus_year : num 86.3 83 83 90.5 87.2 88.5 92.8 82.9 86.2 88.1 ...
## $ foreign_born : num 2 3.6 2.8 0.7 4.7 1.1 1.1 2.5 0.9 0.5 ...
## $ foreign_spoken_at_home : num 3.7 5.5 4.7 1.5 7.2 3.8 1.6 4.5 1.6 1.4 ...
## $ hs_grad : num 85.3 87.6 71.9 74.5 74.7 74.7 74.8 78.5 71.8 73.4 ...
## $ bachelors : num 21.7 26.8 13.5 10 12.5 12 11 16.1 10.8 10.5 ...
## $ veterans : num 5817 20396 2327 1883 4072 ...
## $ mean_work_travel : num 25.1 25.8 23.8 28.3 33.2 28.1 25.1 22.1 23.6 26.2 ...
## $ housing_units : num 22135 104061 11829 8981 23887 ...
## $ home_ownership : num 77.5 76.7 68 82.9 82 76.9 69 70.7 71.4 77.5 ...
## $ housing_multi_unit : num 7.2 22.6 11.1 6.6 3.7 9.9 13.7 14.3 8.7 4.3 ...
## $ median_val_owner_occupied : num 133900 177200 88200 81200 113700 ...
## $ households : num 19718 69476 9795 7441 20605 ...
## $ persons_per_household : num 2.7 2.5 2.52 3.02 2.73 2.85 2.58 2.46 2.51 2.22 ...
## $ per_capita_income : num 24568 26469 15875 19918 21070 ...
## $ median_household_income : num 53255 50147 33219 41770 45549 ...
## $ poverty : num 10.6 12.2 25 12.6 13.4 25.3 25 19.5 20.3 17.6 ...
## $ private_nonfarm_establishments : num 877 4812 522 318 749 ...
## $ private_nonfarm_employment : num 10628 52233 7990 2927 6968 ...
## $ percent_change_private_nonfarm_employment: num 16.6 17.4 -27 -14 -11.4 -18.5 2.1 -5.6 -45.8 5.4 ...
## $ nonemployment_establishments : num 2971 14175 1527 1192 3501 ...
## $ firms : num 4067 19035 1667 1385 4458 ...
## $ black_owned_firms : num 15.2 2.7 NA 14.9 NA NA NA 7.2 NA NA ...
## $ native_owned_firms : num NA 0.4 NA NA NA NA NA NA NA NA ...
## $ asian_owned_firms : num 1.3 1 NA NA NA NA 3.3 1.6 NA NA ...
## $ pac_isl_owned_firms : num NA NA NA NA NA NA NA NA NA NA ...
## $ hispanic_owned_firms : num 0.7 1.3 NA NA NA NA NA 0.5 NA NA ...
## $ women_owned_firms : num 31.7 27.3 27 NA 23.2 38.8 NA 24.7 29.3 14.5 ...
## $ manufacturer_shipments_2007 : num NA 1410273 NA 0 341544 ...
## $ mercent_whole_sales_2007 : num NA NA NA NA NA ...
## $ sales : num 598175 2966489 188337 124707 319700 ...
## $ sales_per_capita : num 12003 17166 6334 5804 5622 ...
## $ accommodation_food_service : num 88157 436955 NA 10757 20941 ...
## $ building_permits : num 191 696 10 8 18 1 3 107 10 6 ...
## $ fed_spending : num 331142 1119082 240308 163201 294114 ...
## $ area : num 594 1590 885 623 645 ...
## $ density : num 91.8 114.6 31 36.8 88.9 ...
## $ rural : Factor w/ 2 levels "rural","urban": 1 1 1 1 1 1 1 1 1 1 ...
countyRural <-
countyComplete %>%
filter(rural == "rural") %>%
select("state", "name", "FIPS", "pop2010", "density", "age_over_65", "bachelors",
"median_household_income", "no_move_in_one_plus_year", "poverty")
str(countyRural)
## 'data.frame': 2747 obs. of 10 variables:
## $ state : Factor w/ 51 levels "Alabama","Alaska",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ name : Factor w/ 1877 levels "Abbeville County",..: 83 90 101 151 166 227 237 250 298 320 ...
## $ FIPS : num 1001 1003 1005 1007 1009 ...
## $ pop2010 : num 54571 182265 27457 22915 57322 ...
## $ density : num 91.8 114.6 31 36.8 88.9 ...
## $ age_over_65 : num 12 16.8 14.2 12.7 14.7 13.5 16.7 14.3 16.7 17.9 ...
## $ bachelors : num 21.7 26.8 13.5 10 12.5 12 11 16.1 10.8 10.5 ...
## $ median_household_income : num 53255 50147 33219 41770 45549 ...
## $ no_move_in_one_plus_year: num 86.3 83 83 90.5 87.2 88.5 92.8 82.9 86.2 88.1 ...
## $ poverty : num 10.6 12.2 25 12.6 13.4 25.3 25 19.5 20.3 17.6 ...
head(countyRural)
## state name FIPS pop2010 density age_over_65 bachelors
## 1 Alabama Autauga County 1001 54571 91.8 12.0 21.7
## 2 Alabama Baldwin County 1003 182265 114.6 16.8 26.8
## 3 Alabama Barbour County 1005 27457 31.0 14.2 13.5
## 4 Alabama Bibb County 1007 22915 36.8 12.7 10.0
## 5 Alabama Blount County 1009 57322 88.9 14.7 12.5
## 6 Alabama Bullock County 1011 10914 17.5 13.5 12.0
## median_household_income no_move_in_one_plus_year poverty
## 1 53255 86.3 10.6
## 2 50147 83.0 12.2
## 3 33219 83.0 25.0
## 4 41770 90.5 12.6
## 5 45549 87.2 13.4
## 6 31602 88.5 25.3
summary(countyRural)
## state name FIPS
## Texas : 235 Washington County: 24 Min. : 1001
## Georgia : 134 Franklin County : 23 1st Qu.:19022
## Kentucky: 113 Lincoln County : 23 Median :29133
## Missouri: 108 Jackson County : 22 Mean :30201
## Kansas : 101 Jefferson County : 21 3rd Qu.:45068
## Iowa : 97 Madison County : 17 Max. :56045
## (Other) :1959 (Other) :2617
## pop2010 density age_over_65 bachelors
## Min. : 82 Min. : 0.00 Min. : 3.50 Min. : 3.70
## 1st Qu.: 9982 1st Qu.: 13.80 1st Qu.:13.70 1st Qu.:12.60
## Median : 21704 Median : 36.80 Median :16.00 Median :15.90
## Mean : 40735 Mean : 57.25 Mean :16.33 Mean :17.36
## 3rd Qu.: 45664 3rd Qu.: 75.80 3rd Qu.:18.60 3rd Qu.:20.20
## Max. :2035210 Max. :299.80 Max. :43.40 Max. :64.00
##
## median_household_income no_move_in_one_plus_year poverty
## Min. : 19351 Min. : 51.60 Min. : 0.00
## 1st Qu.: 36353 1st Qu.: 83.60 1st Qu.:11.40
## Median : 41437 Median : 86.70 Median :15.10
## Mean : 42386 Mean : 86.09 Mean :15.91
## 3rd Qu.: 47175 3rd Qu.: 89.20 3rd Qu.:19.30
## Max. :103643 Max. :100.00 Max. :53.50
##
lapply(countyRural[, -c(1:3)], hist)
## $pop2010
## $breaks
## [1] 0 200000 400000 600000 800000 1000000 1200000 1400000
## [9] 1600000 1800000 2000000 2200000
##
## $counts
## [1] 2692 43 6 1 3 0 0 0 0 1 1
##
## $density
## [1] 4.899891e-06 7.826720e-08 1.092100e-08 1.820167e-09 5.460502e-09
## [6] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.820167e-09
## [11] 1.820167e-09
##
## $mids
## [1] 100000 300000 500000 700000 900000 1100000 1300000 1500000
## [9] 1700000 1900000 2100000
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## $density
## $breaks
## [1] 0 20 40 60 80 100 120 140 160 180 200 220 240 260 280 300
##
## $counts
## [1] 880 583 381 249 172 112 85 56 49 42 33 34 25 22 24
##
## $density
## [1] 0.0160174736 0.0106115763 0.0069348380 0.0045322170 0.0031306880
## [6] 0.0020385876 0.0015471423 0.0010192938 0.0008918821 0.0007644703
## [11] 0.0006006553 0.0006188569 0.0004550419 0.0004004368 0.0004368402
##
## $mids
## [1] 10 30 50 70 90 110 130 150 170 190 210 230 250 270 290
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## $age_over_65
## $breaks
## [1] 0 5 10 15 20 25 30 35 40 45
##
## $counts
## [1] 4 125 956 1194 383 74 10 0 1
##
## $density
## [1] 0.0002912268 0.0091008373 0.0696032035 0.0869311977 0.0278849654
## [6] 0.0053876957 0.0007280670 0.0000000000 0.0000728067
##
## $mids
## [1] 2.5 7.5 12.5 17.5 22.5 27.5 32.5 37.5 42.5
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## $bachelors
## $breaks
## [1] 0 5 10 15 20 25 30 35 40 45 50 55 60 65
##
## $counts
## [1] 4 239 950 851 371 178 83 32 21 13 3 1 1
##
## $density
## [1] 0.0002912268 0.0174008009 0.0691663633 0.0619585002 0.0270112850
## [6] 0.0129595923 0.0060429560 0.0023298143 0.0015289407 0.0009464871
## [11] 0.0002184201 0.0000728067 0.0000728067
##
## $mids
## [1] 2.5 7.5 12.5 17.5 22.5 27.5 32.5 37.5 42.5 47.5 52.5 57.5 62.5
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## $median_household_income
## $breaks
## [1] 15000 20000 25000 30000 35000 40000 45000 50000 55000 60000
## [11] 65000 70000 75000 80000 85000 90000 95000 100000 105000
##
## $counts
## [1] 3 39 128 373 624 684 415 227 121 67 35 15 7 8 0 0 0
## [18] 1
##
## $density
## [1] 2.184201e-07 2.839461e-06 9.319257e-06 2.715690e-05 4.543138e-05
## [6] 4.979978e-05 3.021478e-05 1.652712e-05 8.809610e-06 4.878049e-06
## [11] 2.548234e-06 1.092100e-06 5.096469e-07 5.824536e-07 0.000000e+00
## [16] 0.000000e+00 0.000000e+00 7.280670e-08
##
## $mids
## [1] 17500 22500 27500 32500 37500 42500 47500 52500 57500 62500
## [11] 67500 72500 77500 82500 87500 92500 97500 102500
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## $no_move_in_one_plus_year
## $breaks
## [1] 50 55 60 65 70 75 80 85 90 95 100
##
## $counts
## [1] 1 2 1 13 50 182 724 1275 471 28
##
## $density
## [1] 0.0000728067 0.0001456134 0.0000728067 0.0009464871 0.0036403349
## [6] 0.0132508191 0.0527120495 0.0928285402 0.0342919549 0.0020385876
##
## $mids
## [1] 52.5 57.5 62.5 67.5 72.5 77.5 82.5 87.5 92.5 97.5
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## $poverty
## $breaks
## [1] 0 5 10 15 20 25 30 35 40 45 50 55
##
## $counts
## [1] 25 431 914 781 375 138 54 15 8 5 1
##
## $density
## [1] 0.0018201675 0.0313796869 0.0665453222 0.0568620313 0.0273025118
## [6] 0.0100473244 0.0039315617 0.0010921005 0.0005824536 0.0003640335
## [11] 0.0000728067
##
## $mids
## [1] 2.5 7.5 12.5 17.5 22.5 27.5 32.5 37.5 42.5 47.5 52.5
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
#hist(countyRural$median_household_income, breaks = 100)
# Copy customer data into new data frame
new_data = countyRural[ , c("pop2010", "density", "age_over_65", "bachelors")]
# Remove customer id as a variable, store it as row names
head(new_data)
## pop2010 density age_over_65 bachelors
## 1 54571 91.8 12.0 21.7
## 2 182265 114.6 16.8 26.8
## 3 27457 31.0 14.2 13.5
## 4 22915 36.8 12.7 10.0
## 5 57322 88.9 14.7 12.5
## 6 10914 17.5 13.5 12.0
row.names(new_data) = new_data$FIPS
new_data$FIPS = NULL
head(new_data)
## pop2010 density age_over_65 bachelors
## 1 54571 91.8 12.0 21.7
## 2 182265 114.6 16.8 26.8
## 3 27457 31.0 14.2 13.5
## 4 22915 36.8 12.7 10.0
## 5 57322 88.9 14.7 12.5
## 6 10914 17.5 13.5 12.0
# Take the log-transform of the data, and plot
#new_data$median_household_income = log(new_data$median_household_income)
new_data <- data.frame(lapply(new_data, log))
lapply(new_data, hist)
## $pop2010
## $breaks
## [1] 4 5 6 7 8 9 10 11 12 13 14 15
##
## $counts
## [1] 2 1 35 124 374 853 860 412 78 6 2
##
## $density
## [1] 0.0007280670 0.0003640335 0.0127411722 0.0451401529 0.1361485257
## [6] 0.3105205679 0.3130688023 0.1499817983 0.0283946123 0.0021842009
## [11] 0.0007280670
##
## $mids
## [1] 4.5 5.5 6.5 7.5 8.5 9.5 10.5 11.5 12.5 13.5 14.5
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## $density
## $breaks
## [1] -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0
## [15] 4.5 5.0 5.5 6.0
##
## $counts
## [1] 5 2 10 17 35 54 89 128 116 168 255 398 484 430 294 199 62
##
## $density
## [1] 0.003641661 0.001456664 0.007283321 0.012381646 0.025491624
## [6] 0.039329934 0.064821559 0.093226511 0.084486526 0.122359796
## [11] 0.185724690 0.289876184 0.352512746 0.313182811 0.214129643
## [16] 0.144938092 0.045156591
##
## $mids
## [1] -2.25 -1.75 -1.25 -0.75 -0.25 0.25 0.75 1.25 1.75 2.25 2.75
## [12] 3.25 3.75 4.25 4.75 5.25 5.75
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## $age_over_65
## $breaks
## [1] 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2 3.4 3.6 3.8
##
## $counts
## [1] 2 2 6 17 42 148 404 908 750 372 84 11 1
##
## $density
## [1] 0.003640335 0.003640335 0.010921005 0.030942847 0.076447033
## [6] 0.269384783 0.735347652 1.652712050 1.365125592 0.677102293
## [11] 0.152894066 0.020021842 0.001820167
##
## $mids
## [1] 1.3 1.5 1.7 1.9 2.1 2.3 2.5 2.7 2.9 3.1 3.3 3.5 3.7
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## $bachelors
## $breaks
## [1] 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2 3.4 3.6 3.8 4.0 4.2
##
## $counts
## [1] 1 3 5 21 114 249 457 628 566 356 191 95 39 20 2
##
## $density
## [1] 0.001820167 0.005460502 0.009100837 0.038223517 0.207499090
## [6] 0.453221696 0.831816527 1.143065162 1.030214780 0.647979614
## [11] 0.347651984 0.172915908 0.070986531 0.036403349 0.003640335
##
## $mids
## [1] 1.3 1.5 1.7 1.9 2.1 2.3 2.5 2.7 2.9 3.1 3.3 3.5 3.7 3.9 4.1
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
# Standardize variables
new_data = scale(new_data)
head(new_data)
## pop2010 density age_over_65 bachelors
## [1,] 0.81147855 NaN -1.0625413 0.7929683
## [2,] 1.82507608 NaN 0.2333486 1.3624904
## [3,] 0.23416099 NaN -0.4142143 -0.4875685
## [4,] 0.08217551 NaN -0.8441848 -1.2972539
## [5,] 0.85281548 NaN -0.2809345 -0.6952102
## [6,] -0.54125465 NaN -0.6089115 -0.8053484
# Compute distance metrics on standardized data
# This will likely generate an error on most machines
# d = dist(new_data)
# Take a 10% sample
#sample = seq(1, 18417, by = 10)
#head(sample)
#customers_sample = customers[sample, ]
#new_data_sample = new_data[sample, ]
# Compute distance metrics on standardized data
d = dist(new_data)
# Perform hierarchical clustering on distance metrics
c = hclust(d, method="ward.D2")
# Plot de dendogram
plot(c)
# Cut at 9 segments
members = cutree(c, k = 5) #Dendogram appears to show that there 5 distinctive groups
# Show 30 first customers, frequency table
members[1:30]
## [1] 1 1 2 3 3 3 3 1 3 3 2 3 3 3 3 1 4 3 3 3 3 4 4 4 3 1 3 4 3 3
table(members)
## members
## 1 2 3 4 5
## 513 233 734 541 726
# Show profile of each segment
aggregate(countyRural[, -c(1:3)], by = list(members), mean)
## Group.1 pop2010 density age_over_65 bachelors
## 1 1 110628.487 123.80136 12.96257 26.19025
## 2 2 32140.403 52.88455 11.44163 14.60558
## 3 3 20805.881 41.90327 15.65831 11.47888
## 4 4 49537.026 76.54048 17.21645 18.52680
## 5 5 7693.536 12.75647 20.30482 17.06983
## median_household_income no_move_in_one_plus_year poverty
## 1 50254.31 82.57758 13.89006
## 2 43935.20 84.55966 18.47811
## 3 36723.44 87.00395 19.53365
## 4 43963.12 86.27079 14.16599
## 5 40877.81 88.00413 14.15014
The outliers should provide role models for struggling towns in each cluster.
library(knitr) # for kable()
library(kableExtra)
countyRural <- cbind(countyRural, members)
head(countyRural)
## state name FIPS pop2010 density age_over_65 bachelors
## 1 Alabama Autauga County 1001 54571 91.8 12.0 21.7
## 2 Alabama Baldwin County 1003 182265 114.6 16.8 26.8
## 3 Alabama Barbour County 1005 27457 31.0 14.2 13.5
## 4 Alabama Bibb County 1007 22915 36.8 12.7 10.0
## 5 Alabama Blount County 1009 57322 88.9 14.7 12.5
## 6 Alabama Bullock County 1011 10914 17.5 13.5 12.0
## median_household_income no_move_in_one_plus_year poverty members
## 1 53255 86.3 10.6 1
## 2 50147 83.0 12.2 1
## 3 33219 83.0 25.0 2
## 4 41770 90.5 12.6 3
## 5 45549 87.2 13.4 3
## 6 31602 88.5 25.3 3
ggplot(data = countyRural,
aes(x = factor(members), y = median_household_income)) +
geom_boxplot()
countyRural %>%
group_by(members) %>%
top_n(3, median_household_income) %>%
arrange(members, desc(median_household_income)) %>%
kable()
state | name | FIPS | pop2010 | density | age_over_65 | bachelors | median_household_income | no_move_in_one_plus_year | poverty | members |
---|---|---|---|---|---|---|---|---|---|---|
New Mexico | Los Alamos County | 35028 | 17950 | 164.4 | 15.0 | 64.0 | 103643 | 86.0 | 2.4 | 1 |
Virginia | Fauquier County | 51061 | 65203 | 100.7 | 12.7 | 30.8 | 83877 | 90.1 | 5.4 | 1 |
Massachusetts | Nantucket County | 25019 | 10172 | 226.2 | 12.1 | 40.2 | 83347 | 89.7 | 7.2 | 1 |
Alaska | Bristol Bay Borough | 2060 | 997 | 2.0 | 8.3 | 15.9 | 84000 | 77.7 | 5.0 | 2 |
Colorado | Elbert County | 8039 | 23086 | 12.5 | 9.5 | 29.2 | 78958 | 90.9 | 3.5 | 2 |
Virginia | King George County | 51099 | 23584 | 131.3 | 10.2 | 30.5 | 76241 | 83.3 | 7.1 | 2 |
Nevada | Lander County | 32015 | 5775 | 1.1 | 11.8 | 12.9 | 66525 | 80.6 | 12.2 | 3 |
Louisiana | Cameron Parish | 22023 | 6839 | 5.3 | 12.9 | 11.7 | 59555 | 87.6 | 11.6 | 3 |
Nevada | Pershing County | 32027 | 6753 | 1.1 | 13.0 | 12.4 | 56491 | 81.2 | 13.7 | 3 |
Virginia | Fluvanna County | 51065 | 25691 | 89.8 | 15.7 | 25.8 | 68223 | 91.7 | 7.5 | 4 |
Virginia | King William County | 51101 | 15935 | 58.2 | 12.4 | 19.8 | 64946 | 91.8 | 7.6 | 4 |
Virginia | Botetourt County | 51023 | 33148 | 61.2 | 16.4 | 22.9 | 64724 | 91.6 | 5.6 | 4 |
Texas | Loving County | 48301 | 82 | 0.1 | 14.6 | 17.9 | 83889 | 92.7 | 0.0 | 5 |
Colorado | Hinsdale County | 8053 | 843 | 0.8 | 17.4 | 42.0 | 74659 | 90.2 | 3.7 | 5 |
Texas | Hartley County | 48205 | 6062 | 4.1 | 12.5 | 19.8 | 66583 | 79.0 | 9.3 | 5 |
countyRural %>%
filter(state == "New Hampshire")
## state name FIPS pop2010 density age_over_65
## 1 New Hampshire Belknap County 33001 60088 150.1 16.7
## 2 New Hampshire Carroll County 33003 47818 51.4 20.6
## 3 New Hampshire Cheshire County 33005 77117 109.1 14.7
## 4 New Hampshire Coos County 33007 33055 18.4 19.4
## 5 New Hampshire Grafton County 33009 89118 52.2 15.5
## 6 New Hampshire Merrimack County 33013 146445 156.8 13.7
## 7 New Hampshire Sullivan County 33019 43742 81.4 16.5
## bachelors median_household_income no_move_in_one_plus_year poverty
## 1 26.9 54929 88.7 8.6
## 2 30.3 49897 89.4 9.6
## 3 29.8 53828 84.8 10.0
## 4 16.1 41534 85.4 12.1
## 5 35.3 53075 84.0 9.8
## 6 32.7 63012 85.2 8.1
## 7 25.9 50689 87.0 10.0
## members
## 1 1
## 2 4
## 3 1
## 4 3
## 5 1
## 6 1
## 7 1