1. Download the dataset and read the CSV file: HW1_US_Counties.csv

# Some house keep jobs: 
# Set working directory:

setwd("C:/Users/Andrew/SkyDrive/workspace_R")

dirpath <- "C:/Users/Andrew/SkyDrive/workspace_R/data"
dir(dirpath, full=TRUE)  # make sure the 

# Download and save the HW1_US_Counties.csv in the "C:/Users/Andrew/SkyDrive/workspace_R/data"
# Read the file:

HW1_US_Counties <- read.csv("data/HW1_US_Counties.csv",  stringsAsFactors = FALSE)
#verify:
dim(HW1_US_Counties)
names(HW1_US_Counties)


# some preparation
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.1.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.1.2
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plyr)
## Warning: package 'plyr' was built under R version 3.1.2
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## 
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
# Covert to local data frame to make it more reaable & presentable
USCounties_df <- tbl_df(HW1_US_Counties)
USCounties_df

# A few utilities to get understand the file:
head(USCounties_df)
tail(USCounties_df)
names(USCounties_df)
length(USCounties_df)
str(USCounties_df)
class(USCounties_df)
dim(USCounties_df)

USCounties_df %>% sample_n(5)
USCounties_df %>% sample_frac(0.25, replac=TRUE)
glimpse(USCounties_df)

2. Rename Column-headers with clearer Names.

For example: 
Rename col #9 from "POPESTIMATE2013" into: "POPEST2013",
# 1. Use dpyr:
# rename col #13 from Col#8 from "CENSUS2010POP" into "CENSUS2010" and col# 12 from "NATURALINC2013" into: "INC2013":
# Note: the rename will be saved into the data set directly!
names(USCounties_df)[names(USCounties_df)=="CENSUS2010POP"] <- "CENSUS2010"
names(USCounties_df)[names(USCounties_df)=="NATURALINC2013"] <- "INC2013"
# We can do this by two steps into one:
# names(uscounties)[c(1,2,8)] <- c("State_Name", "County_Name", "Pop_2010")
head(USCounties_df)
## Source: local data frame [6 x 16]
## 
##    STNAME        CTYNAME SUMLEV REGION DIVISION STATE COUNTY CENSUS2010
## 1 Alabama Autauga County     50      3        6     1      1      54571
## 2 Alabama Baldwin County     50      3        6     1      3     182265
## 3 Alabama Barbour County     50      3        6     1      5      27457
## 4 Alabama    Bibb County     50      3        6     1      7      22915
## 5 Alabama  Blount County     50      3        6     1      9      57322
## 6 Alabama Bullock County     50      3        6     1     11      10914
## Variables not shown: POPESTIMATE2013 (int), BIRTHS2013 (int), DEATHS2013
##   (int), INC2013 (int), INTERNATIONALMIG2013 (int), USPS (chr), ALAND_SQMI
##   (dbl), AWATER_SQMI (dbl)
# 2. Use R built-in function
# rename col #13 from Col#9 from "POPESTIMATE2013" into "POPEST2013" and col# 13 from "INTERNATIONALMIG2013" into: "INTER'LMIG2013":
# Note: the result won't be saved into the data set.
rename(USCounties_df, c("POPESTIMATE2013"="POPEST2013", "INTERNATIONALMIG2013"="INTER'LMIG2013"))
## Source: local data frame [3,144 x 16]
## 
##     STNAME         CTYNAME SUMLEV REGION DIVISION STATE COUNTY CENSUS2010
## 1  Alabama  Autauga County     50      3        6     1      1      54571
## 2  Alabama  Baldwin County     50      3        6     1      3     182265
## 3  Alabama  Barbour County     50      3        6     1      5      27457
## 4  Alabama     Bibb County     50      3        6     1      7      22915
## 5  Alabama   Blount County     50      3        6     1      9      57322
## 6  Alabama  Bullock County     50      3        6     1     11      10914
## 7  Alabama   Butler County     50      3        6     1     13      20947
## 8  Alabama  Calhoun County     50      3        6     1     15     118572
## 9  Alabama Chambers County     50      3        6     1     17      34215
## 10 Alabama Cherokee County     50      3        6     1     19      25989
## ..     ...             ...    ...    ...      ...   ...    ...        ...
## Variables not shown: POPEST2013 (int), BIRTHS2013 (int), DEATHS2013 (int),
##   INC2013 (int), INTER'LMIG2013 (int), USPS (chr), ALAND_SQMI (dbl),
##   AWATER_SQMI (dbl)
names(USCounties_df)
##  [1] "STNAME"               "CTYNAME"              "SUMLEV"              
##  [4] "REGION"               "DIVISION"             "STATE"               
##  [7] "COUNTY"               "CENSUS2010"           "POPESTIMATE2013"     
## [10] "BIRTHS2013"           "DEATHS2013"           "INC2013"             
## [13] "INTERNATIONALMIG2013" "USPS"                 "ALAND_SQMI"          
## [16] "AWATER_SQMI"
names(USCounties_df)[15]
## [1] "ALAND_SQMI"
## Tam's Solution: We can do this in one command, by passing the column numbers to the names() function.
names(USCounties_df)[15] <- "Area"
names(USCounties_df)
##  [1] "STNAME"               "CTYNAME"              "SUMLEV"              
##  [4] "REGION"               "DIVISION"             "STATE"               
##  [7] "COUNTY"               "CENSUS2010"           "POPESTIMATE2013"     
## [10] "BIRTHS2013"           "DEATHS2013"           "INC2013"             
## [13] "INTERNATIONALMIG2013" "USPS"                 "Area"                
## [16] "AWATER_SQMI"

3. Sort All US Counties by Decreasing Area and print it out

# Use R built-in function
# I don't know how to sort the column for the df using R built-in function?
# using dplyr approach:
arrange(USCounties_df, desc(CTYNAME))
## Source: local data frame [3,144 x 16]
## 
##          STNAME                   CTYNAME SUMLEV REGION DIVISION STATE
## 1  South Dakota            Ziebach County     50      2        4    46
## 2         Texas             Zavala County     50      3        7    48
## 3         Texas             Zapata County     50      3        7    48
## 4       Arizona               Yuma County     50      4        8     4
## 5      Colorado               Yuma County     50      4        8     8
## 6        Alaska Yukon-Koyukuk Census Area     50      4        9     2
## 7    California               Yuba County     50      4        9     6
## 8         Texas              Young County     50      3        7    48
## 9         Maine               York County     50      1        1    23
## 10     Nebraska               York County     50      2        4    31
## ..          ...                       ...    ...    ...      ...   ...
## Variables not shown: COUNTY (int), CENSUS2010 (int), POPESTIMATE2013
##   (int), BIRTHS2013 (int), DEATHS2013 (int), INC2013 (int),
##   INTERNATIONALMIG2013 (int), USPS (chr), Area (dbl), AWATER_SQMI (dbl)
arrange(USCounties_df, desc(Area))
## Source: local data frame [3,144 x 16]
## 
##        STNAME                         CTYNAME SUMLEV REGION DIVISION STATE
## 1      Alaska       Yukon-Koyukuk Census Area     50      4        9     2
## 2      Alaska             North Slope Borough     50      4        9     2
## 3      Alaska              Bethel Census Area     50      4        9     2
## 4      Alaska        Northwest Arctic Borough     50      4        9     2
## 5      Alaska      Valdez-Cordova Census Area     50      4        9     2
## 6      Alaska Southeast Fairbanks Census Area     50      4        9     2
## 7      Alaska       Matanuska-Susitna Borough     50      4        9     2
## 8      Alaska      Lake and Peninsula Borough     50      4        9     2
## 9      Alaska                Nome Census Area     50      4        9     2
## 10 California           San Bernardino County     50      4        9     6
## ..        ...                             ...    ...    ...      ...   ...
## Variables not shown: COUNTY (int), CENSUS2010 (int), POPESTIMATE2013
##   (int), BIRTHS2013 (int), DEATHS2013 (int), INC2013 (int),
##   INTERNATIONALMIG2013 (int), USPS (chr), Area (dbl), AWATER_SQMI (dbl)
## Ram's way:
# To sort, we use the order() function.
# You can store the sorted data into another data frame, if you wish to.

# Use HW1_US_Counties

USCounties_df[order(USCounties_df$Area, decreasing=TRUE), ]
## Source: local data frame [3,144 x 16]
## 
##        STNAME                         CTYNAME SUMLEV REGION DIVISION STATE
## 1      Alaska       Yukon-Koyukuk Census Area     50      4        9     2
## 2      Alaska             North Slope Borough     50      4        9     2
## 3      Alaska              Bethel Census Area     50      4        9     2
## 4      Alaska        Northwest Arctic Borough     50      4        9     2
## 5      Alaska      Valdez-Cordova Census Area     50      4        9     2
## 6      Alaska Southeast Fairbanks Census Area     50      4        9     2
## 7      Alaska       Matanuska-Susitna Borough     50      4        9     2
## 8      Alaska      Lake and Peninsula Borough     50      4        9     2
## 9      Alaska                Nome Census Area     50      4        9     2
## 10 California           San Bernardino County     50      4        9     6
## ..        ...                             ...    ...    ...      ...   ...
## Variables not shown: COUNTY (int), CENSUS2010 (int), POPESTIMATE2013
##   (int), BIRTHS2013 (int), DEATHS2013 (int), INC2013 (int),
##   INTERNATIONALMIG2013 (int), USPS (chr), Area (dbl), AWATER_SQMI (dbl)
head(USCounties_df[order(USCounties_df$Area, decreasing=TRUE), ] ,10)
## Source: local data frame [10 x 16]
## 
##        STNAME                         CTYNAME SUMLEV REGION DIVISION STATE
## 1      Alaska       Yukon-Koyukuk Census Area     50      4        9     2
## 2      Alaska             North Slope Borough     50      4        9     2
## 3      Alaska              Bethel Census Area     50      4        9     2
## 4      Alaska        Northwest Arctic Borough     50      4        9     2
## 5      Alaska      Valdez-Cordova Census Area     50      4        9     2
## 6      Alaska Southeast Fairbanks Census Area     50      4        9     2
## 7      Alaska       Matanuska-Susitna Borough     50      4        9     2
## 8      Alaska      Lake and Peninsula Borough     50      4        9     2
## 9      Alaska                Nome Census Area     50      4        9     2
## 10 California           San Bernardino County     50      4        9     6
## Variables not shown: COUNTY (int), CENSUS2010 (int), POPESTIMATE2013
##   (int), BIRTHS2013 (int), DEATHS2013 (int), INC2013 (int),
##   INTERNATIONALMIG2013 (int), USPS (chr), Area (dbl), AWATER_SQMI (dbl)
# or: sort "CTYNAME" for the HW1_US_Counties:
head(HW1_US_Counties[order(USCounties_df$CTYNAME, decreasing=TRUE), ], 10)
##            STNAME                   CTYNAME SUMLEV REGION DIVISION STATE
## 2429 South Dakota            Ziebach County     50      2        4    46
## 2778        Texas             Zavala County     50      3        7    48
## 2777        Texas             Zapata County     50      3        7    48
## 111       Arizona               Yuma County     50      4        8     4
## 308      Colorado               Yuma County     50      4        8     8
## 96         Alaska Yukon-Koyukuk Census Area     50      4        9     2
## 244    California               Yuba County     50      4        9     6
## 2776        Texas              Young County     50      3        7    48
## 1194        Maine               York County     50      1        1    23
## 1748     Nebraska               York County     50      2        4    31
##      COUNTY CENSUS2010POP POPESTIMATE2013 BIRTHS2013 DEATHS2013
## 2429    137          2801            2834         39         19
## 2778    507         11677           12156        204         90
## 2777    505         14018           14390        283         94
## 111      27        195751          201201       3147       1414
## 308     125         10043           10151        154         91
## 96      290          5588            5695        107         65
## 244     115         72155           73340       1282        471
## 2776    503         18550           18341        222        231
## 1194     31        197131          199431       1831       1739
## 1748    185         13665           13883        177        130
##      NATURALINC2013 INTERNATIONALMIG2013 USPS ALAND_SQMI AWATER_SQMI
## 2429             20                    2   SD   1961.386       9.348
## 2778            114                   12   TX   1297.406       4.328
## 2777            189                    3   TX    998.412      59.603
## 111            1733                  219   AZ   5513.997       5.087
## 308              63                   15   CO   2364.394       4.299
## 96               42                    0   AK 145505.165    2299.769
## 244             811                   91   CA    632.020      11.969
## 2776             -9                   -2   TX    914.469      16.389
## 1194             92                   59   ME    990.748     278.965
## 1748             47                    8   NE    572.510       3.319
# Same result for by using the dataframe:

USCounties_df[order(USCounties_df$Area, decreasing=TRUE), ] 
## Source: local data frame [3,144 x 16]
## 
##        STNAME                         CTYNAME SUMLEV REGION DIVISION STATE
## 1      Alaska       Yukon-Koyukuk Census Area     50      4        9     2
## 2      Alaska             North Slope Borough     50      4        9     2
## 3      Alaska              Bethel Census Area     50      4        9     2
## 4      Alaska        Northwest Arctic Borough     50      4        9     2
## 5      Alaska      Valdez-Cordova Census Area     50      4        9     2
## 6      Alaska Southeast Fairbanks Census Area     50      4        9     2
## 7      Alaska       Matanuska-Susitna Borough     50      4        9     2
## 8      Alaska      Lake and Peninsula Borough     50      4        9     2
## 9      Alaska                Nome Census Area     50      4        9     2
## 10 California           San Bernardino County     50      4        9     6
## ..        ...                             ...    ...    ...      ...   ...
## Variables not shown: COUNTY (int), CENSUS2010 (int), POPESTIMATE2013
##   (int), BIRTHS2013 (int), DEATHS2013 (int), INC2013 (int),
##   INTERNATIONALMIG2013 (int), USPS (chr), Area (dbl), AWATER_SQMI (dbl)

4. Save the sorted data frame in two formats: as a csv file, & as .Rda

write.csv(USCounties_df, file="USCounties_df_new.csv")
save(USCounties_df, file="USCounties_df_new.Rda")

5. Create a new column to calculate the population density of each County.

# 1. R bult-in approach"

USCounties_df$POPDENSITY <- USCounties_df$CENSUS2010 / USCounties_df$Area
names(USCounties_df)
##  [1] "STNAME"               "CTYNAME"              "SUMLEV"              
##  [4] "REGION"               "DIVISION"             "STATE"               
##  [7] "COUNTY"               "CENSUS2010"           "POPESTIMATE2013"     
## [10] "BIRTHS2013"           "DEATHS2013"           "INC2013"             
## [13] "INTERNATIONALMIG2013" "USPS"                 "Area"                
## [16] "AWATER_SQMI"          "POPDENSITY"
head(USCounties_df)
## Source: local data frame [6 x 17]
## 
##    STNAME        CTYNAME SUMLEV REGION DIVISION STATE COUNTY CENSUS2010
## 1 Alabama Autauga County     50      3        6     1      1      54571
## 2 Alabama Baldwin County     50      3        6     1      3     182265
## 3 Alabama Barbour County     50      3        6     1      5      27457
## 4 Alabama    Bibb County     50      3        6     1      7      22915
## 5 Alabama  Blount County     50      3        6     1      9      57322
## 6 Alabama Bullock County     50      3        6     1     11      10914
## Variables not shown: POPESTIMATE2013 (int), BIRTHS2013 (int), DEATHS2013
##   (int), INC2013 (int), INTERNATIONALMIG2013 (int), USPS (chr), Area
##   (dbl), AWATER_SQMI (dbl), POPDENSITY (dbl)
# 2. dplyr approach
USCounties_df %>% 
    select(STNAME, CTYNAME, COUNTY, CENSUS2010, POPDENSITY,Area ) %>%  # This line can be ommited.
    mutate(POPDENSITY2 = CENSUS2010/Area)
## Source: local data frame [3,144 x 7]
## 
##     STNAME         CTYNAME COUNTY CENSUS2010 POPDENSITY     Area
## 1  Alabama  Autauga County      1      54571   91.80283  594.437
## 2  Alabama  Baldwin County      3     182265  114.64361 1589.840
## 3  Alabama  Barbour County      5      27457   31.02921  884.876
## 4  Alabama     Bibb County      7      22915   36.80634  622.583
## 5  Alabama   Blount County      9      57322   88.89792  644.807
## 6  Alabama  Bullock County     11      10914   17.52394  622.805
## 7  Alabama   Butler County     13      20947   26.96478  776.828
## 8  Alabama  Calhoun County     15     118572  195.70664  605.866
## 9  Alabama Chambers County     17      34215   57.35662  596.531
## 10 Alabama Cherokee County     19      25989   46.93527  553.720
## ..     ...             ...    ...        ...        ...      ...
## Variables not shown: POPDENSITY2 (dbl)
# Note: the result is not saved like using R build-in approach.

names(USCounties_df) 
##  [1] "STNAME"               "CTYNAME"              "SUMLEV"              
##  [4] "REGION"               "DIVISION"             "STATE"               
##  [7] "COUNTY"               "CENSUS2010"           "POPESTIMATE2013"     
## [10] "BIRTHS2013"           "DEATHS2013"           "INC2013"             
## [13] "INTERNATIONALMIG2013" "USPS"                 "Area"                
## [16] "AWATER_SQMI"          "POPDENSITY"

6. How many counties are there in the US?

dim(USCounties_df)
## [1] 3144   17
# It's 3144 counties

7. Store just the names of all US counties in a new vector.

USCounties_df_name <- USCounties_df$CTYNAME
head(USCounties_df_name)
## [1] "Autauga County" "Baldwin County" "Barbour County" "Bibb County"   
## [5] "Blount County"  "Bullock County"

8. Which counties have the most least people living in them? (Use column “POPESTIMATE2013”)

# Part One: Use dplyr package:

## Find the min and max for the "POPESTIMATE2013"; the "CNAME" does not seem to display correctly:
USCounties_df %>%
     #group_by(POPESTIMATE2013) %>%
     # summarise_each(funs(min(., na.rm=TRUE),  max(., na.rm=TRUE)), select = c(STNAME, CTYNAME, CENSUS2010), matches("CENSUS2010"))
     summarise_each(funs(min(., na.rm=TRUE),  max(., na.rm=TRUE)),  matches("POPESTIMATE2013"))
## Source: local data frame [1 x 2]
## 
##   min      max
## 1  90 10017068
# Result: max: 10017068; min: 90
# After found the max pop for the column "POPESTIMATE2013", then look for with Country:" (Los Angeles County)
USCounties_df$CTYNAME[USCounties_df$POPESTIMATE2013 == 10017068]
## [1] "Los Angeles County"
# After found the min pop for the column "POPESTIMATE2013", then look for with Country:"  (Kalawao County)
USCounties_df$CTYNAME[USCounties_df$POPESTIMATE2013 == 90]  
## [1] "Kalawao County"
# Part 2: Ram's way (but does not show the nmuber):
maxrow <- which.max(USCounties_df$POPESTIMATE2013)
USCounties_df$CTYNAME[maxrow]
## [1] "Los Angeles County"
# In One row:
USCounties_df$CTYNAME[which.max(USCounties_df$POPESTIMATE2013)]
## [1] "Los Angeles County"
USCounties_df$CTYNAME[which.min(USCounties_df$POPESTIMATE2013)]
## [1] "Kalawao County"

9. List of Counties with area 5000 Sq Miles

bigCounties <- subset(USCounties_df, Area > 5000 )
bigCounties$CTYNAME
##  [1] "Aleutians East Borough"          "Bethel Census Area"             
##  [3] "Denali Borough"                  "Dillingham Census Area"         
##  [5] "Fairbanks North Star Borough"    "Hoonah-Angoon Census Area"      
##  [7] "Kenai Peninsula Borough"         "Kodiak Island Borough"          
##  [9] "Lake and Peninsula Borough"      "Matanuska-Susitna Borough"      
## [11] "Nome Census Area"                "North Slope Borough"            
## [13] "Northwest Arctic Borough"        "Southeast Fairbanks Census Area"
## [15] "Valdez-Cordova Census Area"      "Wade Hampton Census Area"       
## [17] "Yakutat City and Borough"        "Yukon-Koyukuk Census Area"      
## [19] "Apache County"                   "Cochise County"                 
## [21] "Coconino County"                 "Maricopa County"                
## [23] "Mohave County"                   "Navajo County"                  
## [25] "Pima County"                     "Pinal County"                   
## [27] "Yavapai County"                  "Yuma County"                    
## [29] "Fresno County"                   "Inyo County"                    
## [31] "Kern County"                     "Riverside County"               
## [33] "San Bernardino County"           "Siskiyou County"                
## [35] "Idaho County"                    "Owyhee County"                  
## [37] "Aroostook County"                "St. Louis County"               
## [39] "Beaverhead County"               "Flathead County"                
## [41] "Phillips County"                 "Rosebud County"                 
## [43] "Cherry County"                   "Clark County"                   
## [45] "Elko County"                     "Humboldt County"                
## [47] "Lander County"                   "Lincoln County"                 
## [49] "Nye County"                      "Pershing County"                
## [51] "Washoe County"                   "White Pine County"              
## [53] "Catron County"                   "Chaves County"                  
## [55] "McKinley County"                 "Otero County"                   
## [57] "Rio Arriba County"               "San Juan County"                
## [59] "Socorro County"                  "Douglas County"                 
## [61] "Harney County"                   "Klamath County"                 
## [63] "Lake County"                     "Malheur County"                 
## [65] "Brewster County"                 "Box Elder County"               
## [67] "Garfield County"                 "Millard County"                 
## [69] "San Juan County"                 "Tooele County"                  
## [71] "Okanogan County"                 "Carbon County"                  
## [73] "Fremont County"                  "Natrona County"                 
## [75] "Park County"                     "Sweetwater County"
# Or subset(USCounties_df, Area > 5000,  select = c(STNAME, CTYNAME, Area) )
subset(USCounties_df, Area > 5000,  select = c(STNAME, CTYNAME, Area),  )
## Source: local data frame [76 x 3]
## 
##        STNAME                         CTYNAME       Area
## 1      Alaska          Aleutians East Borough   6981.867
## 2      Alaska              Bethel Census Area  40570.001
## 3      Alaska                  Denali Borough  12751.719
## 4      Alaska          Dillingham Census Area  18568.839
## 5      Alaska    Fairbanks North Star Borough   7338.518
## 6      Alaska       Hoonah-Angoon Census Area   7524.904
## 7      Alaska         Kenai Peninsula Borough  16075.352
## 8      Alaska           Kodiak Island Borough   6549.726
## 9      Alaska      Lake and Peninsula Borough  23652.048
## 10     Alaska       Matanuska-Susitna Borough  24608.141
## 11     Alaska                Nome Census Area  22961.699
## 12     Alaska             North Slope Borough  88695.579
## 13     Alaska        Northwest Arctic Borough  35767.822
## 14     Alaska Southeast Fairbanks Census Area  24768.569
## 15     Alaska      Valdez-Cordova Census Area  34239.955
## 16     Alaska        Wade Hampton Census Area  17081.277
## 17     Alaska        Yakutat City and Borough   7649.325
## 18     Alaska       Yukon-Koyukuk Census Area 145505.165
## 19    Arizona                   Apache County  11197.521
## 20    Arizona                  Cochise County   6165.661
## 21    Arizona                 Coconino County  18618.869
## 22    Arizona                 Maricopa County   9200.075
## 23    Arizona                   Mohave County  13310.980
## 24    Arizona                   Navajo County   9950.435
## 25    Arizona                     Pima County   9187.004
## 26    Arizona                    Pinal County   5365.665
## 27    Arizona                  Yavapai County   8123.477
## 28    Arizona                     Yuma County   5513.997
## 29 California                   Fresno County   5958.869
## 30 California                     Inyo County  10180.938
## 31 California                     Kern County   8131.951
## 32 California                Riverside County   7206.435
## 33 California           San Bernardino County  20057.108
## 34 California                 Siskiyou County   6277.888
## 35      Idaho                    Idaho County   8477.342
## 36      Idaho                   Owyhee County   7665.540
## 37      Maine                Aroostook County   6671.091
## 38  Minnesota                St. Louis County   6247.568
## 39    Montana               Beaverhead County   5541.607
## 40    Montana                 Flathead County   5087.670
## 41    Montana                 Phillips County   5140.086
## 42    Montana                  Rosebud County   5010.315
## 43   Nebraska                   Cherry County   5960.555
## 44     Nevada                    Clark County   7891.493
## 45     Nevada                     Elko County  17169.822
## 46     Nevada                 Humboldt County   9640.776
## 47     Nevada                   Lander County   5490.092
## 48     Nevada                  Lincoln County  10633.200
## 49     Nevada                      Nye County  18181.934
## 50     Nevada                 Pershing County   6036.599
## 51     Nevada                   Washoe County   6302.369
## 52     Nevada               White Pine County   8875.650
## 53 New Mexico                   Catron County   6923.674
## 54 New Mexico                   Chaves County   6065.277
## 55 New Mexico                 McKinley County   5449.820
## 56 New Mexico                    Otero County   6613.211
## 57 New Mexico               Rio Arriba County   5860.839
## 58 New Mexico                 San Juan County   5513.091
## 59 New Mexico                  Socorro County   6646.581
## 60     Oregon                  Douglas County   5036.459
## 61     Oregon                   Harney County  10133.215
## 62     Oregon                  Klamath County   5941.048
## 63     Oregon                     Lake County   8138.978
## 64     Oregon                  Malheur County   9887.555
## 65      Texas                 Brewster County   6183.776
## 66       Utah                Box Elder County   5745.552
## 67       Utah                 Garfield County   5175.096
## 68       Utah                  Millard County   6595.899
## 69       Utah                 San Juan County   7820.107
## 70       Utah                   Tooele County   6941.140
## 71 Washington                 Okanogan County   5267.998
## 72    Wyoming                   Carbon County   7897.550
## 73    Wyoming                  Fremont County   9183.843
## 74    Wyoming                  Natrona County   5340.512
## 75    Wyoming                     Park County   6942.034
## 76    Wyoming               Sweetwater County  10426.637
# Total 76

10. Population distribution by Quartiles (Which counties are in the bottom 25%, top 25% etc)

popByQuartiles <- USCounties_df$POPESTIMATE2013
quantile(popByQuartiles)
##          0%         25%         50%         75%        100% 
##       90.00    11015.75    25733.00    67582.25 10017068.00
# We can specify which quanties we want:
quantile(USCounties_df$POPESTIMATE2013, probs= c(0.25, 0.5, 0.75)) 
##      25%      50%      75% 
## 11015.75 25733.00 67582.25
# So we know where the boundaries are, according to which we have to group the counties.
firstQ <- quantile(USCounties_df$POPESTIMATE2013)[2]
med <- quantile(USCounties_df$POPESTIMATE2013)[3]
thirdQ <- quantile(USCounties_df$POPESTIMATE2013)[4]

# topmost quartile
head(USCounties_df[USCounties_df$POPESTIMATE2013 > thirdQ, c("CTYNAME", "POPESTIMATE2013") ]  )
## Source: local data frame [6 x 2]
## 
##          CTYNAME POPESTIMATE2013
## 1 Baldwin County          195540
## 2 Calhoun County          116736
## 3 Cullman County           80811
## 4  DeKalb County           71013
## 5  Elmore County           80902
## 6  Etowah County          103931
# Counties between second and third quartile
head(USCounties_df$CTYNAME[(USCounties_df$POPESTIMATE2013 < thirdQ) & (USCounties_df$POPESTIMATE2013 > med)])
## [1] "Autauga County"  "Barbour County"  "Blount County"   "Chambers County"
## [5] "Cherokee County" "Chilton County"

11 I like to find some small towns to live where the population is around 10000

subset(USCounties_df, CENSUS2010 > 10000 &  CENSUS2010 < 10100, select = c(STNAME, CTYNAME, CENSUS2010))
## Source: local data frame [7 x 3]
## 
##     STNAME           CTYNAME CENSUS2010
## 1 Colorado       Yuma County      10043
## 2  Georgia     Lanier County      10078
## 3   Kansas       Rice County      10083
## 4 Kentucky   Metcalfe County      10099
## 5  Montana     Carbon County      10078
## 6   Nevada White Pine County      10030
## 7 Virginia Cumberland County      10052