Let’s read population data from github, check descriptives with summary command. Output is limited by customized CSS in Markdown.

USpop<-read.csv("https://raw.githubusercontent.com/sigmasigmaiota/USpopulation/master/Population DataSet.csv")

#Look at summary table.
summary(USpop)
##        X          SUMLEV      REGION    DIVISION      STATE      
##  Min.   : 1   Min.   :10.00   0: 1   5      : 9   Min.   : 0.00  
##  1st Qu.:15   1st Qu.:40.00   1:10   8      : 8   1st Qu.:12.00  
##  Median :29   Median :40.00   2:13   4      : 7   Median :27.00  
##  Mean   :29   Mean   :38.07   3:18   1      : 6   Mean   :27.18  
##  3rd Qu.:43   3rd Qu.:40.00   4:14   0      : 5   3rd Qu.:41.00  
##  Max.   :57   Max.   :40.00   X: 1   3      : 5   Max.   :72.00  
##                                      (Other):17                  
##          NAME    ESTIMATESBASE2010   POPESTIMATE2010    
##  Alabama   : 1   Min.   :   563773   Min.   :   564483  
##  Alaska    : 1   1st Qu.:  1853001   1st Qu.:  1854214  
##  Arizona   : 1   Median :  4625381   Median :  4635656  
##  Arkansas  : 1   Mean   : 16315798   Mean   : 16345610  
##  California: 1   3rd Qu.:  9535736   3rd Qu.:  9574293  
##  Colorado  : 1   Max.   :308758105   Max.   :309326085  
##  (Other)   :51                                          
##  POPESTIMATE2011     POPESTIMATE2012     POPESTIMATE2013    
##  Min.   :   567224   Min.   :   576270   Min.   :   582123  
##  1st Qu.:  1856074   1st Qu.:  1856764   1st Qu.:  1865414  
##  Median :  4671422   Median :  4717112   Median :  4764153  
##  Mean   : 16463487   Mean   : 16583459   Mean   : 16697654  
##  3rd Qu.:  9656754   3rd Qu.:  9749123   3rd Qu.:  9843599  
##  Max.   :311580009   Max.   :313874218   Max.   :316057727  
##                                                             
##  POPESTIMATE2014     POPESTIMATE2015     POPESTIMATE2016    
##  Min.   :   582548   Min.   :   585668   Min.   :   584290  
##  1st Qu.:  1879522   1st Qu.:  1891507   1st Qu.:  1905924  
##  Median :  4823793   Median :  4853160   Median :  4864745  
##  Mean   : 16819195   Mean   : 16942126   Mean   : 17063518  
##  3rd Qu.:  9930589   3rd Qu.:  9932573   3rd Qu.:  9951890  
##  Max.   :318386421   Max.   :320742673   Max.   :323071342  
##                                                             
##  POPESTIMATE2017     POPESTIMATE2018      NPOPCHG_2010   
##  Min.   :   578934   Min.   :   577737   Min.   : -6582  
##  1st Qu.:  1917575   1st Qu.:  1929268   1st Qu.:  2570  
##  Median :  4875120   Median :  4887871   Median :  6457  
##  Mean   : 17171340   Mean   : 17275394   Mean   : 29812  
##  3rd Qu.:  9976447   3rd Qu.:  9995915   3rd Qu.: 18362  
##  Max.   :325147121   Max.   :327167434   Max.   :567980  
##                                                          
##   NPOPCHG_2011      NPOPCHG_2012      NPOPCHG_2013      NPOPCHG_2014    
##  Min.   : -42793   Min.   : -44244   Min.   : -41411   Min.   : -58203  
##  1st Qu.:   8898   1st Qu.:  10012   1st Qu.:   8774   1st Qu.:   7386  
##  Median :  21288   Median :  16893   Median :  18436   Median :  17240  
##  Mean   : 117877   Mean   : 119972   Mean   : 114195   Mean   : 121542  
##  3rd Qu.:  65723   3rd Qu.:  71221   3rd Qu.:  53494   3rd Qu.:  59023  
##  Max.   :2253924   Max.   :2294209   Max.   :2183509   Max.   :2328694  
##                                                                         
##   NPOPCHG_2015      NPOPCHG_2016      NPOPCHG_2017      NPOPCHG_2018    
##  Min.   : -61708   Min.   : -66671   Min.   : -81494   Min.   :-129848  
##  1st Qu.:   4089   1st Qu.:   3647   1st Qu.:   1365   1st Qu.:   3341  
##  Median :  14763   Median :  13364   Median :  14027   Median :  17827  
##  Mean   : 122931   Mean   : 121392   Mean   : 107822   Mean   : 104054  
##  3rd Qu.:  50831   3rd Qu.:  60116   3rd Qu.:  60505   3rd Qu.:  61216  
##  Max.   :2356252   Max.   :2328669   Max.   :2075779   Max.   :2020313  
##                                                                         
##   PPOPCHG_2010       PPOPCHG_2011      PPOPCHG_2012      PPOPCHG_2013    
##  Min.   :-0.12431   Min.   :-1.1499   Min.   :-1.2027   Min.   :-1.1394  
##  1st Qu.: 0.09317   1st Qu.: 0.2671   1st Qu.: 0.2745   1st Qu.: 0.2512  
##  Median : 0.17818   Median : 0.6560   Median : 0.6946   Median : 0.6524  
##  Mean   : 0.17896   Mean   : 0.6490   Mean   : 0.6857   Mean   : 0.6626  
##  3rd Qu.: 0.24368   3rd Qu.: 0.8968   3rd Qu.: 1.0440   3rd Qu.: 0.9712  
##  Max.   : 0.55154   Max.   : 2.3992   Max.   : 2.4408   Max.   : 2.9785  
##                                                                          
##   PPOPCHG_2014      PPOPCHG_2015      PPOPCHG_2016      PPOPCHG_2017     
##  Min.   :-1.6199   Min.   :-1.7457   Min.   :-1.9196   Min.   :-2.39231  
##  1st Qu.: 0.1930   1st Qu.: 0.1282   1st Qu.: 0.1233   1st Qu.: 0.05405  
##  Median : 0.5311   Median : 0.4792   Median : 0.4030   Median : 0.37844  
##  Mean   : 0.6018   Mean   : 0.6056   Mean   : 0.5883   Mean   : 0.51042  
##  3rd Qu.: 0.9699   3rd Qu.: 1.1043   3rd Qu.: 1.0899   3rd Qu.: 1.05089  
##  Max.   : 2.1306   Max.   : 2.2566   Max.   : 2.0156   Max.   : 2.13758  
##                                                                          
##   PPOPCHG_2018     NRANK_ESTBASE2010 NRANK_POPEST2010 NRANK_POPEST2011
##  Min.   :-3.9052   1      : 2        1      : 2       1      : 2      
##  1st Qu.: 0.1299   2      : 2        2      : 2       2      : 2      
##  Median : 0.3979   3      : 2        3      : 2       3      : 2      
##  Mean   : 0.4897   4      : 2        4      : 2       4      : 2      
##  3rd Qu.: 0.9723   X      : 2        X      : 2       X      : 2      
##  Max.   : 2.0854   10     : 1        10     : 1       10     : 1      
##                    (Other):46        (Other):46       (Other):46      
##  NRANK_POPEST2012 NRANK_POPEST2013 NRANK_POPEST2014 NRANK_POPEST2015
##  1      : 2       1      : 2       1      : 2       1      : 2      
##  2      : 2       2      : 2       2      : 2       2      : 2      
##  3      : 2       3      : 2       3      : 2       3      : 2      
##  4      : 2       4      : 2       4      : 2       4      : 2      
##  X      : 2       X      : 2       X      : 2       X      : 2      
##  10     : 1       10     : 1       10     : 1       10     : 1      
##  (Other):46       (Other):46       (Other):46       (Other):46      
##  NRANK_POPEST2016 NRANK_POPEST2017 NRANK_POPEST2018 NRANK_NPCHG2010
##  1      : 2       1      : 2       1      : 2       1      : 2     
##  2      : 2       2      : 2       2      : 2       2      : 2     
##  3      : 2       3      : 2       3      : 2       3      : 2     
##  4      : 2       4      : 2       4      : 2       4      : 2     
##  X      : 2       X      : 2       X      : 2       X      : 2     
##  10     : 1       10     : 1       10     : 1       10     : 1     
##  (Other):46       (Other):46       (Other):46       (Other):46     
##  NRANK_NPCHG2011 NRANK_NPCHG2012 NRANK_NPCHG2013 NRANK_NPCHG2014
##  1      : 2      1      : 2      1      : 2      1      : 2     
##  2      : 2      2      : 2      2      : 2      2      : 2     
##  3      : 2      3      : 2      3      : 2      3      : 2     
##  4      : 2      4      : 2      4      : 2      4      : 2     
##  X      : 2      X      : 2      X      : 2      X      : 2     
##  10     : 1      10     : 1      10     : 1      10     : 1     
##  (Other):46      (Other):46      (Other):46      (Other):46     
##  NRANK_NPCHG2015 NRANK_NPCHG2016 NRANK_NPCHG2017 NRANK_NPCHG2018
##  1      : 2      1      : 2      1      : 2      1      : 2     
##  2      : 2      2      : 2      2      : 2      2      : 2     
##  3      : 2      3      : 2      3      : 2      3      : 2     
##  4      : 2      4      : 2      4      : 2      4      : 2     
##  X      : 2      X      : 2      X      : 2      X      : 2     
##  10     : 1      10     : 1      10     : 1      10     : 1     
##  (Other):46      (Other):46      (Other):46      (Other):46     
##  NRANK_PPCHG2010 NRANK_PPCHG2011 NRANK_PPCHG2012 NRANK_PPCHG2013
##  1      : 2      1      : 2      1      : 2      1      : 2     
##  2      : 2      2      : 2      2      : 2      2      : 2     
##  3      : 2      3      : 2      3      : 2      3      : 2     
##  4      : 2      4      : 2      4      : 2      4      : 2     
##  X      : 2      X      : 2      X      : 2      X      : 2     
##  10     : 1      10     : 1      10     : 1      10     : 1     
##  (Other):46      (Other):46      (Other):46      (Other):46     
##  NRANK_PPCHG2014 NRANK_PPCHG2015 NRANK_PPCHG2016 NRANK_PPCHG2017
##  1      : 2      1      : 2      1      : 2      1      : 2     
##  2      : 2      2      : 2      2      : 2      2      : 2     
##  3      : 2      3      : 2      3      : 2      3      : 2     
##  4      : 2      4      : 2      4      : 2      4      : 2     
##  X      : 2      X      : 2      X      : 2      X      : 2     
##  10     : 1      10     : 1      10     : 1      10     : 1     
##  (Other):46      (Other):46      (Other):46      (Other):46     
##  NRANK_PPCHG2018
##  1      : 2     
##  2      : 2     
##  3      : 2     
##  4      : 2     
##  X      : 2     
##  10     : 1     
##  (Other):46

We’ll need to remove the divisions listed at the top of the dataset by creating a subset; let’s remove the first five rows, wherein data at the region and national level lives.

#subset the data.
USpop.StateTerr<-USpop[6:57,]
head(USpop.StateTerr)

We now have information only on the state/territory level. Let’s transform and create a row for each year. Additionally, let’s omit the first two columns, as SUMLEV is consistent on the state/terr level.

#remove first two columns
USpop.StateTerr[1:2]<-NULL
library(tidyr)
library(dplyr)
#Create row for each statistic.
USpop.YearList<-gather(USpop.StateTerr,"Statistic","Value",5:length(colnames(USpop.StateTerr)))
head(USpop.YearList)

Let’s extract the year from our new “Year” variable and create a “Statistic” variable.

library(stringr)
USpop.YearList$Year<-as.numeric(str_extract(USpop.YearList$Statistic,"([0-9]+)"))
USpop.YearList$Statistic<-gsub("([0-9])|(_)","",USpop.YearList$Statistic)

#Rename value column to specifically address state/terr level data.
colnames(USpop.YearList)[6]<-"StateValue"

head(USpop.YearList)

Let’s do the same for division information.

USdiv.YearList<-gather(USpop[2:5,],"Statistic","Value",7:length(colnames(USpop)))

#Remove first two columns.
USdiv.YearList[1:2]<-NULL

#We also have no need for state or division information at Region level.
USdiv.YearList[2:3]<-NULL

#Rename NAME column.
colnames(USdiv.YearList)[2]<-"RegionName"

#Rename Value column in anticipation of merge with state data.
colnames(USdiv.YearList)[4]<-"RegionValue"

#Clean year and statistic.
USdiv.YearList$Year<-as.numeric(str_extract(USdiv.YearList$Statistic,"([0-9]+)"))
USdiv.YearList$Statistic<-gsub("([0-9])|(_)","",USdiv.YearList$Statistic)

#Shorten RegionName.
USdiv.YearList$RegionName<-gsub("( Region)","",USdiv.YearList$RegionName)

head(USdiv.YearList)

Nice. Let’s merge by REGION number, Year, and Statistic. We must use full_join in order to keep Puerto Rico, which is omitted from Region categorization.

USpop2<-full_join(USdiv.YearList,USpop.YearList,by=c("REGION","Statistic","Year"))

head(USpop2)

We need to pair this with a list of divisions, which differs from Region. A list exists on wikipedia; I’ve edited from https://en.wikipedia.org/wiki/List_of_regions_of_the_United_States.

DIVISION<-as.factor(c(1,2,3,4,5,6,7,8,9))
DivisionName<-c("New England","Mid-Atlantic","East North Central","West North Central","South Atlantic","East South Central","West South Central","Mountain","Pacific")
Divisions<-data.frame(DIVISION,DivisionName)

Merge Divisions with master dataset and check.

USpop3<-full_join(Divisions,USpop2,by=c("DIVISION"))
head(USpop3)

Let’s look at Puerto Rico in particular.

PR<-USpop3[which(USpop3$NAME == "Puerto Rico"),]
head(PR)

Puerto Rico is not ranked among the other states and has no division assignment. This is problematic for anyone completing comparative analysis on a national level. Since puerto rico has been assigned no region, there is no regional data available. Let’s recode NA as Puerto Rico in RegionName.

USpop3$RegionName[which(USpop3$NAME=="Puerto Rico")]<-"Puerto Rico"

#look at Puerto Rico again, replace subset.
PR<-USpop3[which(USpop3$NAME == "Puerto Rico"),]
head(PR)

Let’s add a column for National statistics.

#National data only.
USpop.Nat<-USpop[1,3:ncol(USpop)]

#gather, as before
USpop.Nat<-gather(USpop.Nat,"Statistic","Value",5:length(colnames(USpop.Nat)))

#Clean year and statistic.
USpop.Nat$Year<-as.numeric(str_extract(USpop.Nat$Statistic,"([0-9]+)"))
USpop.Nat$Statistic<-gsub("([0-9])|(_)","",USpop.Nat$Statistic)

#omit columns and unnecessary rows.
USpop.Nat<-USpop.Nat[,5:7]
USpop.Nat<-USpop.Nat[which(USpop.Nat$Value!="X"),]

#rename coluns in preparation for merge.
colnames(USpop.Nat)[2]<-"NationalValue"

#merge into master data.
USpop4<-full_join(USpop.Nat,USpop3,by=c("Statistic","Year"))
head(USpop4)

Let’s sort the data by type of statistic.

USpopESTIMATESBASE<-USpop4[which(USpop4$Statistic=="ESTIMATESBASE"),]
USpopESTIMATE<-USpop4[which(USpop4$Statistic=="POPESTIMATE"),]
USpopPOPCHG<-USpop4[which(USpop4$Statistic=="NPOPCHG"),]
USpopPPOPCHG<-USpop4[which(USpop4$Statistic=="PPOPCHG"),]

Finally, let’s map our results. Let’s map population increases in 2018.

USpopPOPCHG2018<-USpop4[which(USpop4$Statistic=="NPOPCHG"&USpop4$Year==2018),]
USpopPOPCHG2018$StateValue<-as.numeric(USpopPOPCHG2018$StateValue)
USpopPOPCHG2018$StateValue[which(USpopPOPCHG2018$StateValue<0)]<-0


library(ggplot2)
library(fiftystater)
library(ggthemes)
library(tidyverse)
library(viridis)

USpopPOPCHG2018$statefull<-tolower(USpopPOPCHG2018$NAME)

data("fifty_states")

library(mapdata)
library(mapproj)

#Puerto Rico color must be set manually; there has been a decrease in population, so we've matched the color representing zero growth.
pr<-map_data('worldHires','Puerto Rico')
pr<-subset(pr,long<0) 
prmap<-ggplot(USpopPOPCHG2018)+geom_polygon(data=pr,aes(long,lat,group=group),fill="lemonchiffon1")+
  coord_fixed(1.0)+
  expand_limits(x = fifty_states$long, y = fifty_states$lat) +
  coord_map(projection = "mercator", xlim = c(-68, -65), ylim = c(18.6,17.8))+
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        rect = element_blank())

Total_plot<-ggplot(USpopPOPCHG2018, aes(map_id=statefull)) + 
  geom_map(aes(fill=StateValue), map=fifty_states) + 
  expand_limits(x = fifty_states$long, y = fifty_states$lat) +
  coord_map(projection = "mercator", xlim = c(-125, -65), ylim = c(50,23)) +
  scale_x_continuous(breaks = NULL) + 
  scale_y_continuous(breaks = NULL) +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        rect = element_blank())+
  scale_fill_viridis(breaks=c(-100000,-25000,-5000,0,5000,25000,100000,300000),
labels=c('-100K--25K','-25K--5K','-25K-0','0-5000','5k-25k','25K-100K','100K-300K','300K+'),begin=1,end=.25,option="magma")+
  guides(fill=guide_legend(title="Growth by state/terr",size="legend",title.theme=element_text(size=9,angle=0)))+
  ggtitle("Population Increases by State, 2018")

library(grid)
library(grDevices)

png(file="C://MSDS/Project2a.png",w=4000,h=4000,res=500,bg="transparent")
grid.newpage()
v1<-viewport(width = 1, height = 1, x = 0.5, y = 0.5) #plot area for the main map
v4<-viewport(width = 0.12, height = 0.12, x = 0.48, y = 0.30) #plot area for the inset map)
print(Total_plot,vp=v1) 
print(prmap,vp=v4)
dev.off()
## png 
##   2
knitr::include_graphics("/MSDS/Project2a.png")

The greatest estimated population increase in 2018, by far, occurred in Texas and Florida.


Let’s map population losses.

USpopPOPCHG2018<-USpop4[which(USpop4$Statistic=="NPOPCHG"&USpop4$Year==2018),]
USpopPOPCHG2018$StateValue<-as.numeric(USpopPOPCHG2018$StateValue)
USpopPOPCHG2018$StateValue[which(USpopPOPCHG2018$StateValue>0)]<-0

USpopPOPCHG2018$statefull<-tolower(USpopPOPCHG2018$NAME)

data("fifty_states")

pr<-map_data('worldHires','Puerto Rico')
pr<-subset(pr,long<0) 
prmap<-ggplot(USpopPOPCHG2018)+geom_polygon(data=pr,aes(long,lat,group=group),fill="khaki3")+
  coord_fixed(1.0)+
  expand_limits(x = fifty_states$long, y = fifty_states$lat) +
  coord_map(projection = "mercator", xlim = c(-68, -65), ylim = c(18.6,17.8))+
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        rect = element_blank())

Total_plot<-ggplot(USpopPOPCHG2018, aes(map_id=statefull)) + 
  geom_map(aes(fill=StateValue), map=fifty_states) + 
  expand_limits(x = fifty_states$long, y = fifty_states$lat) +
  coord_map(projection = "mercator", xlim = c(-125, -65), ylim = c(50,23)) +
  scale_x_continuous(breaks = NULL) + 
  scale_y_continuous(breaks = NULL) +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        rect = element_blank())+
  scale_fill_viridis(breaks=c(-300000,-100000,-25000,-5000,0),
labels=c('300K+','100K-300K','25K-100K','5K-25K','0-25K'),begin=1,end=0,option="cividis")+
  guides(fill=guide_legend(title="Decrease pop by state/terr",size="legend",title.theme=element_text(size=9,angle=0)))+
  ggtitle("Population Decreases by State, 2018")

library(grid)
library(grDevices)

png(file="C://MSDS/Project2b.png",w=4000,h=4000,res=500,bg="transparent")
grid.newpage()
v1<-viewport(width = 1, height = 1, x = 0.5, y = 0.5) #plot area for the main map
v4<-viewport(width = 0.12, height = 0.12, x = 0.48, y = 0.30) #plot area for the inset map)
print(Total_plot,vp=v1) 
print(prmap,vp=v4)
dev.off()
## png 
##   2
knitr::include_graphics("/MSDS/Project2b.png")

It’s easy to see that West Virginia and Louisiana is estimated to have experienced a decrease in population in 2018, as well as Illinois and New York. Puerto Rico is estimated to have experienced the greatest decrease in population.


Let’s calculate percentage of national and division population resides in each state or territory using projected population data for 2018. Let’s map percentage of national population by state.

USpopESTIMATE2018<-USpop4[which(USpopESTIMATE$Year==2018),]
USpopESTIMATE2018$StateValue<-as.numeric(USpopESTIMATE2018$StateValue)
USpopESTIMATE2018$RegionValue<-as.numeric(USpopESTIMATE2018$RegionValue)
USpopESTIMATE2018$NationalValue<-as.numeric(USpopESTIMATE2018$NationalValue)
USpopESTIMATE2018$NatPer <- USpopESTIMATE2018$StateValue/USpopESTIMATE2018$NationalValue
USpopESTIMATE2018$RegPer <- USpopESTIMATE2018$StateValue/USpopESTIMATE2018$RegionValue

USpopESTIMATE2018$statefull<-tolower(USpopESTIMATE2018$NAME)

data("fifty_states")

pr<-map_data('worldHires','Puerto Rico')
pr<-subset(pr,long<0) 
prmap<-ggplot(USpopESTIMATE2018)+geom_polygon(data=pr,aes(long,lat,group=group),fill="grey49")+
  coord_fixed(1.0)+
  expand_limits(x = fifty_states$long, y = fifty_states$lat) +
  coord_map(projection = "mercator", xlim = c(-68, -65), ylim = c(18.6,17.8))+
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        rect = element_blank())

Total_plot<-ggplot(USpopESTIMATE2018, aes(map_id=statefull)) + 
  geom_map(aes(fill=NatPer), map=fifty_states) + 
  expand_limits(x = fifty_states$long, y = fifty_states$lat) +
  coord_map(projection = "mercator", xlim = c(-125, -65), ylim = c(50,23)) +
  scale_x_continuous(breaks = NULL) + 
  scale_y_continuous(breaks = NULL) +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        rect = element_blank())+
  scale_fill_viridis(breaks=c(.05,.1,.15,.20),
labels=c('0-5%','6-10%','11-15%','15% +'),begin=.3,end=1,option="cividis")+
  guides(fill=guide_legend(title="Nat Pop % by state/terr",size="legend",title.theme=element_text(size=9,angle=0)))+
  ggtitle("National Population Percentage by State, 2018")

library(grid)
library(grDevices)

png(file="C://MSDS/Project2c.png",w=4000,h=4000,res=500,bg="transparent")
grid.newpage()
v1<-viewport(width = 1, height = 1, x = 0.5, y = 0.5) #plot area for the main map
v4<-viewport(width = 0.12, height = 0.12, x = 0.48, y = 0.30) #plot area for the inset map)
print(Total_plot,vp=v1) 
print(prmap,vp=v4)
dev.off()
## png 
##   2
knitr::include_graphics("/MSDS/Project2c.png")

It’s apparent that the states with greatest populations are California, Texas, Florida and New York. With New York’s estimated population decreasing, it’s curious that it still accounts for one of the greatest percentages of national population.


Let’s map state percentage by region.

data("fifty_states")

pr<-map_data('worldHires','Puerto Rico')
pr<-subset(pr,long<0) 
prmap<-ggplot(USpopESTIMATE2018)+geom_polygon(data=pr,aes(long,lat,group=group),fill="grey98")+
  coord_fixed(1.0)+
  expand_limits(x = fifty_states$long, y = fifty_states$lat) +
  coord_map(projection = "mercator", xlim = c(-68, -65), ylim = c(18.6,17.8))+
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        rect = element_blank())

Total_plot<-ggplot(USpopESTIMATE2018, aes(map_id=statefull)) + 
  geom_map(aes(fill=RegPer), map=fifty_states) + 
  expand_limits(x = fifty_states$long, y = fifty_states$lat) +
  coord_map(projection = "mercator", xlim = c(-125, -65), ylim = c(50,23)) +
  scale_x_continuous(breaks = NULL) + 
  scale_y_continuous(breaks = NULL) +
  labs(x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        rect = element_blank())+
  scale_fill_viridis(breaks=c(.05,.1,.15,.20),
labels=c('0-5%','6-10%','11-15%','15% +'),begin=.3,end=1,option="cividis")+
  guides(fill=guide_legend(title="Region Pop % by state/terr",size="legend",title.theme=element_text(size=9,angle=0)))+
  ggtitle("Regional Population Percentage by State, 2018")

library(grid)
library(grDevices)

png(file="C://MSDS/Project2d.png",w=4000,h=4000,res=500,bg="transparent")
grid.newpage()
v1<-viewport(width = 1, height = 1, x = 0.5, y = 0.5) #plot area for the main map
v4<-viewport(width = 0.12, height = 0.12, x = 0.48, y = 0.30) #plot area for the inset map)
print(Total_plot,vp=v1) 
print(prmap,vp=v4)
dev.off()
## png 
##   2
knitr::include_graphics("/MSDS/Project2d.png")

Omitting Puerto Rico due to the fact that there is no region or division assignment, we see that California, as expected, dominates the West Region, while New York dominates the Northeast to a lesser degree. Texas is most populous in the South, and Illinois carries the Midwest by only a slight margin.

Such data could be used to form an argument for representation in Congress. Should a state such as California, with 12.1% of the national population, be represented by only 2 senators?