1. Download the dataset and read the CSV file: HW1_US_Counties.csv
# Some house keep jobs:
# Set working directory:
# 1) For Windows:
#setwd("C:/Users/Andrew/OneDrive/AGZ_Home/workspace_R")
# 2) For Mac:
#setwd("/Volumes/AGZ_Home/workspace_R")
# 3) For linux
#setwd("/mnt/AGZ_Home_vmwin10/workspace_R")
setwd("c:/AGZ1/GD_AGZ1117/AGZ_Home/workspace_R")
list.files()
getwd()
#dirpath <- "C:/Users/Andrew/SkyDrive/workspace_R/data"
#dir(dirpath, full=TRUE) # make sure the
# Download and save the HW1_US_Counties.csv in the "C:/Users/Andrew/SkyDrive/workspace_R/data"
# Read the file:
dirpath <- "c:/AGZ1/GD_AGZ1117/AGZ_Home/workspace_R/data"
dir(dirpath, full=TRUE) # make sure the
HW1_US_Counties <- read.csv("data/HW1_US_Counties.csv", stringsAsFactors = FALSE)
#verify:
dim(HW1_US_Counties)
names(HW1_US_Counties)
# some preparation
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
# Covert to local data frame to make it more reaable & presentable
USCounties_df <- tbl_df(HW1_US_Counties)
USCounties_df
# HW1_US_Counties
# A few utilities to get understand the file:
head(USCounties_df)
tail(USCounties_df)
names(USCounties_df)
length(USCounties_df)
str(USCounties_df)
class(USCounties_df)
dim(USCounties_df)
USCounties_df %>% sample_n(5)
USCounties_df %>% sample_frac(0.25, replac=TRUE)
glimpse(USCounties_df)
2. Rename Column-headers with clearer Names.
For example:
Rename col #9 from "POPESTIMATE2013" into: "POPEST2013",
# 1. Use dpyr:
# rename col #13 from Col#8 from "CENSUS2010POP" into "CENSUS2010" and col# 12 from "NATURALINC2013" into: "INC2013":
# Note: the rename will be saved into the data set directly!
names(USCounties_df)[names(USCounties_df)=="CENSUS2010POP"] <- "CENSUS2010"
names(USCounties_df)[names(USCounties_df)=="NATURALINC2013"] <- "INC2013"
# We can do this by two steps into one:
# names(uscounties)[c(1,2,8)] <- c("State_Name", "County_Name", "Pop_2010")
head(USCounties_df)
## # A tibble: 6 × 16
## STNAME CTYNAME SUMLEV REGION DIVISION STATE COUNTY CENSUS2010
## <chr> <chr> <int> <int> <int> <int> <int> <int>
## 1 Alabama Autauga County 50 3 6 1 1 54571
## 2 Alabama Baldwin County 50 3 6 1 3 182265
## 3 Alabama Barbour County 50 3 6 1 5 27457
## 4 Alabama Bibb County 50 3 6 1 7 22915
## 5 Alabama Blount County 50 3 6 1 9 57322
## 6 Alabama Bullock County 50 3 6 1 11 10914
## # ... with 8 more variables: POPESTIMATE2013 <int>, BIRTHS2013 <int>,
## # DEATHS2013 <int>, INC2013 <int>, INTERNATIONALMIG2013 <int>,
## # USPS <chr>, ALAND_SQMI <dbl>, AWATER_SQMI <dbl>
# 2. Use R built-in function
# rename col #13 from Col#9 from "POPESTIMATE2013" into "POPEST2013" and col# 13 from "INTERNATIONALMIG2013" into: "INTER'LMIG2013":
# Note: the result won't be saved into the data set.
rename(USCounties_df, c("POPESTIMATE2013"="POPEST2013", "INTERNATIONALMIG2013"="INTER'LMIG2013"))
## # A tibble: 3,144 × 16
## STNAME CTYNAME SUMLEV REGION DIVISION STATE COUNTY CENSUS2010
## <chr> <chr> <int> <int> <int> <int> <int> <int>
## 1 Alabama Autauga County 50 3 6 1 1 54571
## 2 Alabama Baldwin County 50 3 6 1 3 182265
## 3 Alabama Barbour County 50 3 6 1 5 27457
## 4 Alabama Bibb County 50 3 6 1 7 22915
## 5 Alabama Blount County 50 3 6 1 9 57322
## 6 Alabama Bullock County 50 3 6 1 11 10914
## 7 Alabama Butler County 50 3 6 1 13 20947
## 8 Alabama Calhoun County 50 3 6 1 15 118572
## 9 Alabama Chambers County 50 3 6 1 17 34215
## 10 Alabama Cherokee County 50 3 6 1 19 25989
## # ... with 3,134 more rows, and 8 more variables: POPEST2013 <int>,
## # BIRTHS2013 <int>, DEATHS2013 <int>, INC2013 <int>,
## # `INTER'LMIG2013` <int>, USPS <chr>, ALAND_SQMI <dbl>,
## # AWATER_SQMI <dbl>
names(USCounties_df)
## [1] "STNAME" "CTYNAME" "SUMLEV"
## [4] "REGION" "DIVISION" "STATE"
## [7] "COUNTY" "CENSUS2010" "POPESTIMATE2013"
## [10] "BIRTHS2013" "DEATHS2013" "INC2013"
## [13] "INTERNATIONALMIG2013" "USPS" "ALAND_SQMI"
## [16] "AWATER_SQMI"
names(USCounties_df)[15]
## [1] "ALAND_SQMI"
## Tam's Solution: We can do this in one command, by passing the column numbers to the names() function.
names(USCounties_df)[15] <- "Area"
names(USCounties_df)
## [1] "STNAME" "CTYNAME" "SUMLEV"
## [4] "REGION" "DIVISION" "STATE"
## [7] "COUNTY" "CENSUS2010" "POPESTIMATE2013"
## [10] "BIRTHS2013" "DEATHS2013" "INC2013"
## [13] "INTERNATIONALMIG2013" "USPS" "Area"
## [16] "AWATER_SQMI"
3. Sort All US Counties by Decreasing Area and print it out
# Use R built-in function
# I don't know how to sort the column for the df using R built-in function?
# using dplyr approach:
arrange(USCounties_df, desc(CTYNAME))
## # A tibble: 3,144 × 16
## STNAME CTYNAME SUMLEV REGION DIVISION STATE
## <chr> <chr> <int> <int> <int> <int>
## 1 South Dakota Ziebach County 50 2 4 46
## 2 Texas Zavala County 50 3 7 48
## 3 Texas Zapata County 50 3 7 48
## 4 Arizona Yuma County 50 4 8 4
## 5 Colorado Yuma County 50 4 8 8
## 6 Alaska Yukon-Koyukuk Census Area 50 4 9 2
## 7 California Yuba County 50 4 9 6
## 8 Texas Young County 50 3 7 48
## 9 Maine York County 50 1 1 23
## 10 Nebraska York County 50 2 4 31
## # ... with 3,134 more rows, and 10 more variables: COUNTY <int>,
## # CENSUS2010 <int>, POPESTIMATE2013 <int>, BIRTHS2013 <int>,
## # DEATHS2013 <int>, INC2013 <int>, INTERNATIONALMIG2013 <int>,
## # USPS <chr>, Area <dbl>, AWATER_SQMI <dbl>
arrange(USCounties_df, desc(Area))
## # A tibble: 3,144 × 16
## STNAME CTYNAME SUMLEV REGION DIVISION STATE
## <chr> <chr> <int> <int> <int> <int>
## 1 Alaska Yukon-Koyukuk Census Area 50 4 9 2
## 2 Alaska North Slope Borough 50 4 9 2
## 3 Alaska Bethel Census Area 50 4 9 2
## 4 Alaska Northwest Arctic Borough 50 4 9 2
## 5 Alaska Valdez-Cordova Census Area 50 4 9 2
## 6 Alaska Southeast Fairbanks Census Area 50 4 9 2
## 7 Alaska Matanuska-Susitna Borough 50 4 9 2
## 8 Alaska Lake and Peninsula Borough 50 4 9 2
## 9 Alaska Nome Census Area 50 4 9 2
## 10 California San Bernardino County 50 4 9 6
## # ... with 3,134 more rows, and 10 more variables: COUNTY <int>,
## # CENSUS2010 <int>, POPESTIMATE2013 <int>, BIRTHS2013 <int>,
## # DEATHS2013 <int>, INC2013 <int>, INTERNATIONALMIG2013 <int>,
## # USPS <chr>, Area <dbl>, AWATER_SQMI <dbl>
## Ram's way:
# To sort, we use the order() function.
# You can store the sorted data into another data frame, if you wish to.
# Use HW1_US_Counties
USCounties_df[order(USCounties_df$Area, decreasing=TRUE), ]
## # A tibble: 3,144 × 16
## STNAME CTYNAME SUMLEV REGION DIVISION STATE
## <chr> <chr> <int> <int> <int> <int>
## 1 Alaska Yukon-Koyukuk Census Area 50 4 9 2
## 2 Alaska North Slope Borough 50 4 9 2
## 3 Alaska Bethel Census Area 50 4 9 2
## 4 Alaska Northwest Arctic Borough 50 4 9 2
## 5 Alaska Valdez-Cordova Census Area 50 4 9 2
## 6 Alaska Southeast Fairbanks Census Area 50 4 9 2
## 7 Alaska Matanuska-Susitna Borough 50 4 9 2
## 8 Alaska Lake and Peninsula Borough 50 4 9 2
## 9 Alaska Nome Census Area 50 4 9 2
## 10 California San Bernardino County 50 4 9 6
## # ... with 3,134 more rows, and 10 more variables: COUNTY <int>,
## # CENSUS2010 <int>, POPESTIMATE2013 <int>, BIRTHS2013 <int>,
## # DEATHS2013 <int>, INC2013 <int>, INTERNATIONALMIG2013 <int>,
## # USPS <chr>, Area <dbl>, AWATER_SQMI <dbl>
head(USCounties_df[order(USCounties_df$Area, decreasing=TRUE), ] ,10)
## # A tibble: 10 × 16
## STNAME CTYNAME SUMLEV REGION DIVISION STATE
## <chr> <chr> <int> <int> <int> <int>
## 1 Alaska Yukon-Koyukuk Census Area 50 4 9 2
## 2 Alaska North Slope Borough 50 4 9 2
## 3 Alaska Bethel Census Area 50 4 9 2
## 4 Alaska Northwest Arctic Borough 50 4 9 2
## 5 Alaska Valdez-Cordova Census Area 50 4 9 2
## 6 Alaska Southeast Fairbanks Census Area 50 4 9 2
## 7 Alaska Matanuska-Susitna Borough 50 4 9 2
## 8 Alaska Lake and Peninsula Borough 50 4 9 2
## 9 Alaska Nome Census Area 50 4 9 2
## 10 California San Bernardino County 50 4 9 6
## # ... with 10 more variables: COUNTY <int>, CENSUS2010 <int>,
## # POPESTIMATE2013 <int>, BIRTHS2013 <int>, DEATHS2013 <int>,
## # INC2013 <int>, INTERNATIONALMIG2013 <int>, USPS <chr>, Area <dbl>,
## # AWATER_SQMI <dbl>
# or: sort "CTYNAME" for the HW1_US_Counties:
head(HW1_US_Counties[order(USCounties_df$CTYNAME, decreasing=TRUE), ], 10)
## STNAME CTYNAME SUMLEV REGION DIVISION STATE
## 2429 South Dakota Ziebach County 50 2 4 46
## 2778 Texas Zavala County 50 3 7 48
## 2777 Texas Zapata County 50 3 7 48
## 111 Arizona Yuma County 50 4 8 4
## 308 Colorado Yuma County 50 4 8 8
## 96 Alaska Yukon-Koyukuk Census Area 50 4 9 2
## 244 California Yuba County 50 4 9 6
## 2776 Texas Young County 50 3 7 48
## 1194 Maine York County 50 1 1 23
## 1748 Nebraska York County 50 2 4 31
## COUNTY CENSUS2010POP POPESTIMATE2013 BIRTHS2013 DEATHS2013
## 2429 137 2801 2834 39 19
## 2778 507 11677 12156 204 90
## 2777 505 14018 14390 283 94
## 111 27 195751 201201 3147 1414
## 308 125 10043 10151 154 91
## 96 290 5588 5695 107 65
## 244 115 72155 73340 1282 471
## 2776 503 18550 18341 222 231
## 1194 31 197131 199431 1831 1739
## 1748 185 13665 13883 177 130
## NATURALINC2013 INTERNATIONALMIG2013 USPS ALAND_SQMI AWATER_SQMI
## 2429 20 2 SD 1961.386 9.348
## 2778 114 12 TX 1297.406 4.328
## 2777 189 3 TX 998.412 59.603
## 111 1733 219 AZ 5513.997 5.087
## 308 63 15 CO 2364.394 4.299
## 96 42 0 AK 145505.165 2299.769
## 244 811 91 CA 632.020 11.969
## 2776 -9 -2 TX 914.469 16.389
## 1194 92 59 ME 990.748 278.965
## 1748 47 8 NE 572.510 3.319
# Same result for by using the dataframe:
USCounties_df[order(USCounties_df$Area, decreasing=TRUE), ]
## # A tibble: 3,144 × 16
## STNAME CTYNAME SUMLEV REGION DIVISION STATE
## <chr> <chr> <int> <int> <int> <int>
## 1 Alaska Yukon-Koyukuk Census Area 50 4 9 2
## 2 Alaska North Slope Borough 50 4 9 2
## 3 Alaska Bethel Census Area 50 4 9 2
## 4 Alaska Northwest Arctic Borough 50 4 9 2
## 5 Alaska Valdez-Cordova Census Area 50 4 9 2
## 6 Alaska Southeast Fairbanks Census Area 50 4 9 2
## 7 Alaska Matanuska-Susitna Borough 50 4 9 2
## 8 Alaska Lake and Peninsula Borough 50 4 9 2
## 9 Alaska Nome Census Area 50 4 9 2
## 10 California San Bernardino County 50 4 9 6
## # ... with 3,134 more rows, and 10 more variables: COUNTY <int>,
## # CENSUS2010 <int>, POPESTIMATE2013 <int>, BIRTHS2013 <int>,
## # DEATHS2013 <int>, INC2013 <int>, INTERNATIONALMIG2013 <int>,
## # USPS <chr>, Area <dbl>, AWATER_SQMI <dbl>
5. Create a new column to calculate the population density of each County.
# 1. R bult-in approach"
USCounties_df$POPDENSITY <- USCounties_df$CENSUS2010 / USCounties_df$Area
names(USCounties_df)
## [1] "STNAME" "CTYNAME" "SUMLEV"
## [4] "REGION" "DIVISION" "STATE"
## [7] "COUNTY" "CENSUS2010" "POPESTIMATE2013"
## [10] "BIRTHS2013" "DEATHS2013" "INC2013"
## [13] "INTERNATIONALMIG2013" "USPS" "Area"
## [16] "AWATER_SQMI" "POPDENSITY"
head(USCounties_df)
## # A tibble: 6 × 17
## STNAME CTYNAME SUMLEV REGION DIVISION STATE COUNTY CENSUS2010
## <chr> <chr> <int> <int> <int> <int> <int> <int>
## 1 Alabama Autauga County 50 3 6 1 1 54571
## 2 Alabama Baldwin County 50 3 6 1 3 182265
## 3 Alabama Barbour County 50 3 6 1 5 27457
## 4 Alabama Bibb County 50 3 6 1 7 22915
## 5 Alabama Blount County 50 3 6 1 9 57322
## 6 Alabama Bullock County 50 3 6 1 11 10914
## # ... with 9 more variables: POPESTIMATE2013 <int>, BIRTHS2013 <int>,
## # DEATHS2013 <int>, INC2013 <int>, INTERNATIONALMIG2013 <int>,
## # USPS <chr>, Area <dbl>, AWATER_SQMI <dbl>, POPDENSITY <dbl>
# 2. dplyr approach
USCounties_df %>%
select(STNAME, CTYNAME, COUNTY, CENSUS2010, POPDENSITY,Area ) %>% # This line can be ommited.
mutate(POPDENSITY2 = CENSUS2010/Area)
## # A tibble: 3,144 × 7
## STNAME CTYNAME COUNTY CENSUS2010 POPDENSITY Area
## <chr> <chr> <int> <int> <dbl> <dbl>
## 1 Alabama Autauga County 1 54571 91.80283 594.437
## 2 Alabama Baldwin County 3 182265 114.64361 1589.840
## 3 Alabama Barbour County 5 27457 31.02921 884.876
## 4 Alabama Bibb County 7 22915 36.80634 622.583
## 5 Alabama Blount County 9 57322 88.89792 644.807
## 6 Alabama Bullock County 11 10914 17.52394 622.805
## 7 Alabama Butler County 13 20947 26.96478 776.828
## 8 Alabama Calhoun County 15 118572 195.70664 605.866
## 9 Alabama Chambers County 17 34215 57.35662 596.531
## 10 Alabama Cherokee County 19 25989 46.93527 553.720
## # ... with 3,134 more rows, and 1 more variables: POPDENSITY2 <dbl>
# Note: the result is not saved like using R build-in approach.
names(USCounties_df)
## [1] "STNAME" "CTYNAME" "SUMLEV"
## [4] "REGION" "DIVISION" "STATE"
## [7] "COUNTY" "CENSUS2010" "POPESTIMATE2013"
## [10] "BIRTHS2013" "DEATHS2013" "INC2013"
## [13] "INTERNATIONALMIG2013" "USPS" "Area"
## [16] "AWATER_SQMI" "POPDENSITY"
6. How many counties are there in the US?
dim(USCounties_df)
## [1] 3144 17
# It's 3144 counties
7. Store just the names of all US counties in a new vector.
USCounties_df_name <- USCounties_df$CTYNAME
head(USCounties_df_name)
## [1] "Autauga County" "Baldwin County" "Barbour County" "Bibb County"
## [5] "Blount County" "Bullock County"
8. Which counties have the most least people living in them? (Use column “POPESTIMATE2013”)
# Part One: Use dplyr package:
## Find the min and max for the "POPESTIMATE2013"; the "CNAME" does not seem to display correctly:
USCounties_df %>%
#group_by(POPESTIMATE2013) %>%
# summarise_each(funs(min(., na.rm=TRUE), max(., na.rm=TRUE)), select = c(STNAME, CTYNAME, CENSUS2010), matches("CENSUS2010"))
summarise_each(funs(min(., na.rm=TRUE), max(., na.rm=TRUE)), matches("POPESTIMATE2013"))
## # A tibble: 1 × 2
## min max
## <int> <int>
## 1 90 10017068
# 2015/01/13:
View(USCounties_df)
# Result: max: 10017068; min: 90
# After found the max pop for the column "POPESTIMATE2013", then look for with Country:" (Los Angeles County)
USCounties_df$CTYNAME[USCounties_df$POPESTIMATE2013 == 10017068]
## [1] "Los Angeles County"
# After found the min pop for the column "POPESTIMATE2013", then look for with Country:" (Kalawao County)
USCounties_df$CTYNAME[USCounties_df$POPESTIMATE2013 == 90]
## [1] "Kalawao County"
# Part 2: Ram's way (but does not show the nmuber):
maxrow <- which.max(USCounties_df$POPESTIMATE2013)
USCounties_df$CTYNAME[maxrow]
## [1] "Los Angeles County"
# In One row:
USCounties_df$CTYNAME[which.max(USCounties_df$POPESTIMATE2013)]
## [1] "Los Angeles County"
USCounties_df$CTYNAME[which.min(USCounties_df$POPESTIMATE2013)]
## [1] "Kalawao County"
9. List of Counties with area 5000 Sq Miles
bigCounties <- subset(USCounties_df, Area > 5000 )
bigCounties$CTYNAME
## [1] "Aleutians East Borough" "Bethel Census Area"
## [3] "Denali Borough" "Dillingham Census Area"
## [5] "Fairbanks North Star Borough" "Hoonah-Angoon Census Area"
## [7] "Kenai Peninsula Borough" "Kodiak Island Borough"
## [9] "Lake and Peninsula Borough" "Matanuska-Susitna Borough"
## [11] "Nome Census Area" "North Slope Borough"
## [13] "Northwest Arctic Borough" "Southeast Fairbanks Census Area"
## [15] "Valdez-Cordova Census Area" "Wade Hampton Census Area"
## [17] "Yakutat City and Borough" "Yukon-Koyukuk Census Area"
## [19] "Apache County" "Cochise County"
## [21] "Coconino County" "Maricopa County"
## [23] "Mohave County" "Navajo County"
## [25] "Pima County" "Pinal County"
## [27] "Yavapai County" "Yuma County"
## [29] "Fresno County" "Inyo County"
## [31] "Kern County" "Riverside County"
## [33] "San Bernardino County" "Siskiyou County"
## [35] "Idaho County" "Owyhee County"
## [37] "Aroostook County" "St. Louis County"
## [39] "Beaverhead County" "Flathead County"
## [41] "Phillips County" "Rosebud County"
## [43] "Cherry County" "Clark County"
## [45] "Elko County" "Humboldt County"
## [47] "Lander County" "Lincoln County"
## [49] "Nye County" "Pershing County"
## [51] "Washoe County" "White Pine County"
## [53] "Catron County" "Chaves County"
## [55] "McKinley County" "Otero County"
## [57] "Rio Arriba County" "San Juan County"
## [59] "Socorro County" "Douglas County"
## [61] "Harney County" "Klamath County"
## [63] "Lake County" "Malheur County"
## [65] "Brewster County" "Box Elder County"
## [67] "Garfield County" "Millard County"
## [69] "San Juan County" "Tooele County"
## [71] "Okanogan County" "Carbon County"
## [73] "Fremont County" "Natrona County"
## [75] "Park County" "Sweetwater County"
# Or subset(USCounties_df, Area > 5000, select = c(STNAME, CTYNAME, Area) )
subset(USCounties_df, Area > 5000, select = c(STNAME, CTYNAME, Area), )
## # A tibble: 76 × 3
## STNAME CTYNAME Area
## <chr> <chr> <dbl>
## 1 Alaska Aleutians East Borough 6981.867
## 2 Alaska Bethel Census Area 40570.001
## 3 Alaska Denali Borough 12751.719
## 4 Alaska Dillingham Census Area 18568.839
## 5 Alaska Fairbanks North Star Borough 7338.518
## 6 Alaska Hoonah-Angoon Census Area 7524.904
## 7 Alaska Kenai Peninsula Borough 16075.352
## 8 Alaska Kodiak Island Borough 6549.726
## 9 Alaska Lake and Peninsula Borough 23652.048
## 10 Alaska Matanuska-Susitna Borough 24608.141
## # ... with 66 more rows
# Total 76
10. Population distribution by Quartiles (Which counties are in the bottom 25%, top 25% etc)
popByQuartiles <- USCounties_df$POPESTIMATE2013
quantile(popByQuartiles)
## 0% 25% 50% 75% 100%
## 90.00 11015.75 25733.00 67582.25 10017068.00
# We can specify which quanties we want:
quantile(USCounties_df$POPESTIMATE2013, probs= c(0.25, 0.5, 0.75))
## 25% 50% 75%
## 11015.75 25733.00 67582.25
# So we know where the boundaries are, according to which we have to group the counties.
firstQ <- quantile(USCounties_df$POPESTIMATE2013)[2]
med <- quantile(USCounties_df$POPESTIMATE2013)[3]
thirdQ <- quantile(USCounties_df$POPESTIMATE2013)[4]
# topmost quartile
head(USCounties_df[USCounties_df$POPESTIMATE2013 > thirdQ, c("CTYNAME", "POPESTIMATE2013") ] )
## # A tibble: 6 × 2
## CTYNAME POPESTIMATE2013
## <chr> <int>
## 1 Baldwin County 195540
## 2 Calhoun County 116736
## 3 Cullman County 80811
## 4 DeKalb County 71013
## 5 Elmore County 80902
## 6 Etowah County 103931
# Counties between second and third quartile
head(USCounties_df$CTYNAME[(USCounties_df$POPESTIMATE2013 < thirdQ) & (USCounties_df$POPESTIMATE2013 > med)])
## [1] "Autauga County" "Barbour County" "Blount County" "Chambers County"
## [5] "Cherokee County" "Chilton County"
11 I like to find some small towns to live where the population is around 10000
subset(USCounties_df, CENSUS2010 > 10000 & CENSUS2010 < 10100, select = c(STNAME, CTYNAME, CENSUS2010))
## # A tibble: 7 × 3
## STNAME CTYNAME CENSUS2010
## <chr> <chr> <int>
## 1 Colorado Yuma County 10043
## 2 Georgia Lanier County 10078
## 3 Kansas Rice County 10083
## 4 Kentucky Metcalfe County 10099
## 5 Montana Carbon County 10078
## 6 Nevada White Pine County 10030
## 7 Virginia Cumberland County 10052