Let’s read population data from github, check descriptives with summary command. Output is limited by customized CSS in Markdown.
USpop<-read.csv("https://raw.githubusercontent.com/sigmasigmaiota/USpopulation/master/Population DataSet.csv")
#Look at summary table.
summary(USpop)## X SUMLEV REGION DIVISION STATE
## Min. : 1 Min. :10.00 0: 1 5 : 9 Min. : 0.00
## 1st Qu.:15 1st Qu.:40.00 1:10 8 : 8 1st Qu.:12.00
## Median :29 Median :40.00 2:13 4 : 7 Median :27.00
## Mean :29 Mean :38.07 3:18 1 : 6 Mean :27.18
## 3rd Qu.:43 3rd Qu.:40.00 4:14 0 : 5 3rd Qu.:41.00
## Max. :57 Max. :40.00 X: 1 3 : 5 Max. :72.00
## (Other):17
## NAME ESTIMATESBASE2010 POPESTIMATE2010
## Alabama : 1 Min. : 563773 Min. : 564483
## Alaska : 1 1st Qu.: 1853001 1st Qu.: 1854214
## Arizona : 1 Median : 4625381 Median : 4635656
## Arkansas : 1 Mean : 16315798 Mean : 16345610
## California: 1 3rd Qu.: 9535736 3rd Qu.: 9574293
## Colorado : 1 Max. :308758105 Max. :309326085
## (Other) :51
## POPESTIMATE2011 POPESTIMATE2012 POPESTIMATE2013
## Min. : 567224 Min. : 576270 Min. : 582123
## 1st Qu.: 1856074 1st Qu.: 1856764 1st Qu.: 1865414
## Median : 4671422 Median : 4717112 Median : 4764153
## Mean : 16463487 Mean : 16583459 Mean : 16697654
## 3rd Qu.: 9656754 3rd Qu.: 9749123 3rd Qu.: 9843599
## Max. :311580009 Max. :313874218 Max. :316057727
##
## POPESTIMATE2014 POPESTIMATE2015 POPESTIMATE2016
## Min. : 582548 Min. : 585668 Min. : 584290
## 1st Qu.: 1879522 1st Qu.: 1891507 1st Qu.: 1905924
## Median : 4823793 Median : 4853160 Median : 4864745
## Mean : 16819195 Mean : 16942126 Mean : 17063518
## 3rd Qu.: 9930589 3rd Qu.: 9932573 3rd Qu.: 9951890
## Max. :318386421 Max. :320742673 Max. :323071342
##
## POPESTIMATE2017 POPESTIMATE2018 NPOPCHG_2010
## Min. : 578934 Min. : 577737 Min. : -6582
## 1st Qu.: 1917575 1st Qu.: 1929268 1st Qu.: 2570
## Median : 4875120 Median : 4887871 Median : 6457
## Mean : 17171340 Mean : 17275394 Mean : 29812
## 3rd Qu.: 9976447 3rd Qu.: 9995915 3rd Qu.: 18362
## Max. :325147121 Max. :327167434 Max. :567980
##
## NPOPCHG_2011 NPOPCHG_2012 NPOPCHG_2013 NPOPCHG_2014
## Min. : -42793 Min. : -44244 Min. : -41411 Min. : -58203
## 1st Qu.: 8898 1st Qu.: 10012 1st Qu.: 8774 1st Qu.: 7386
## Median : 21288 Median : 16893 Median : 18436 Median : 17240
## Mean : 117877 Mean : 119972 Mean : 114195 Mean : 121542
## 3rd Qu.: 65723 3rd Qu.: 71221 3rd Qu.: 53494 3rd Qu.: 59023
## Max. :2253924 Max. :2294209 Max. :2183509 Max. :2328694
##
## NPOPCHG_2015 NPOPCHG_2016 NPOPCHG_2017 NPOPCHG_2018
## Min. : -61708 Min. : -66671 Min. : -81494 Min. :-129848
## 1st Qu.: 4089 1st Qu.: 3647 1st Qu.: 1365 1st Qu.: 3341
## Median : 14763 Median : 13364 Median : 14027 Median : 17827
## Mean : 122931 Mean : 121392 Mean : 107822 Mean : 104054
## 3rd Qu.: 50831 3rd Qu.: 60116 3rd Qu.: 60505 3rd Qu.: 61216
## Max. :2356252 Max. :2328669 Max. :2075779 Max. :2020313
##
## PPOPCHG_2010 PPOPCHG_2011 PPOPCHG_2012 PPOPCHG_2013
## Min. :-0.12431 Min. :-1.1499 Min. :-1.2027 Min. :-1.1394
## 1st Qu.: 0.09317 1st Qu.: 0.2671 1st Qu.: 0.2745 1st Qu.: 0.2512
## Median : 0.17818 Median : 0.6560 Median : 0.6946 Median : 0.6524
## Mean : 0.17896 Mean : 0.6490 Mean : 0.6857 Mean : 0.6626
## 3rd Qu.: 0.24368 3rd Qu.: 0.8968 3rd Qu.: 1.0440 3rd Qu.: 0.9712
## Max. : 0.55154 Max. : 2.3992 Max. : 2.4408 Max. : 2.9785
##
## PPOPCHG_2014 PPOPCHG_2015 PPOPCHG_2016 PPOPCHG_2017
## Min. :-1.6199 Min. :-1.7457 Min. :-1.9196 Min. :-2.39231
## 1st Qu.: 0.1930 1st Qu.: 0.1282 1st Qu.: 0.1233 1st Qu.: 0.05405
## Median : 0.5311 Median : 0.4792 Median : 0.4030 Median : 0.37844
## Mean : 0.6018 Mean : 0.6056 Mean : 0.5883 Mean : 0.51042
## 3rd Qu.: 0.9699 3rd Qu.: 1.1043 3rd Qu.: 1.0899 3rd Qu.: 1.05089
## Max. : 2.1306 Max. : 2.2566 Max. : 2.0156 Max. : 2.13758
##
## PPOPCHG_2018 NRANK_ESTBASE2010 NRANK_POPEST2010 NRANK_POPEST2011
## Min. :-3.9052 1 : 2 1 : 2 1 : 2
## 1st Qu.: 0.1299 2 : 2 2 : 2 2 : 2
## Median : 0.3979 3 : 2 3 : 2 3 : 2
## Mean : 0.4897 4 : 2 4 : 2 4 : 2
## 3rd Qu.: 0.9723 X : 2 X : 2 X : 2
## Max. : 2.0854 10 : 1 10 : 1 10 : 1
## (Other):46 (Other):46 (Other):46
## NRANK_POPEST2012 NRANK_POPEST2013 NRANK_POPEST2014 NRANK_POPEST2015
## 1 : 2 1 : 2 1 : 2 1 : 2
## 2 : 2 2 : 2 2 : 2 2 : 2
## 3 : 2 3 : 2 3 : 2 3 : 2
## 4 : 2 4 : 2 4 : 2 4 : 2
## X : 2 X : 2 X : 2 X : 2
## 10 : 1 10 : 1 10 : 1 10 : 1
## (Other):46 (Other):46 (Other):46 (Other):46
## NRANK_POPEST2016 NRANK_POPEST2017 NRANK_POPEST2018 NRANK_NPCHG2010
## 1 : 2 1 : 2 1 : 2 1 : 2
## 2 : 2 2 : 2 2 : 2 2 : 2
## 3 : 2 3 : 2 3 : 2 3 : 2
## 4 : 2 4 : 2 4 : 2 4 : 2
## X : 2 X : 2 X : 2 X : 2
## 10 : 1 10 : 1 10 : 1 10 : 1
## (Other):46 (Other):46 (Other):46 (Other):46
## NRANK_NPCHG2011 NRANK_NPCHG2012 NRANK_NPCHG2013 NRANK_NPCHG2014
## 1 : 2 1 : 2 1 : 2 1 : 2
## 2 : 2 2 : 2 2 : 2 2 : 2
## 3 : 2 3 : 2 3 : 2 3 : 2
## 4 : 2 4 : 2 4 : 2 4 : 2
## X : 2 X : 2 X : 2 X : 2
## 10 : 1 10 : 1 10 : 1 10 : 1
## (Other):46 (Other):46 (Other):46 (Other):46
## NRANK_NPCHG2015 NRANK_NPCHG2016 NRANK_NPCHG2017 NRANK_NPCHG2018
## 1 : 2 1 : 2 1 : 2 1 : 2
## 2 : 2 2 : 2 2 : 2 2 : 2
## 3 : 2 3 : 2 3 : 2 3 : 2
## 4 : 2 4 : 2 4 : 2 4 : 2
## X : 2 X : 2 X : 2 X : 2
## 10 : 1 10 : 1 10 : 1 10 : 1
## (Other):46 (Other):46 (Other):46 (Other):46
## NRANK_PPCHG2010 NRANK_PPCHG2011 NRANK_PPCHG2012 NRANK_PPCHG2013
## 1 : 2 1 : 2 1 : 2 1 : 2
## 2 : 2 2 : 2 2 : 2 2 : 2
## 3 : 2 3 : 2 3 : 2 3 : 2
## 4 : 2 4 : 2 4 : 2 4 : 2
## X : 2 X : 2 X : 2 X : 2
## 10 : 1 10 : 1 10 : 1 10 : 1
## (Other):46 (Other):46 (Other):46 (Other):46
## NRANK_PPCHG2014 NRANK_PPCHG2015 NRANK_PPCHG2016 NRANK_PPCHG2017
## 1 : 2 1 : 2 1 : 2 1 : 2
## 2 : 2 2 : 2 2 : 2 2 : 2
## 3 : 2 3 : 2 3 : 2 3 : 2
## 4 : 2 4 : 2 4 : 2 4 : 2
## X : 2 X : 2 X : 2 X : 2
## 10 : 1 10 : 1 10 : 1 10 : 1
## (Other):46 (Other):46 (Other):46 (Other):46
## NRANK_PPCHG2018
## 1 : 2
## 2 : 2
## 3 : 2
## 4 : 2
## X : 2
## 10 : 1
## (Other):46
We’ll need to remove the divisions listed at the top of the dataset by creating a subset; let’s remove the first five rows, wherein data at the region and national level lives.
#subset the data.
USpop.StateTerr<-USpop[6:57,]
head(USpop.StateTerr)We now have information only on the state/territory level. Let’s transform and create a row for each year. Additionally, let’s omit the first two columns, as SUMLEV is consistent on the state/terr level.
#remove first two columns
USpop.StateTerr[1:2]<-NULL
library(tidyr)
library(dplyr)
#Create row for each statistic.
USpop.YearList<-gather(USpop.StateTerr,"Statistic","Value",5:length(colnames(USpop.StateTerr)))
head(USpop.YearList)Let’s extract the year from our new “Year” variable and create a “Statistic” variable.
library(stringr)
USpop.YearList$Year<-as.numeric(str_extract(USpop.YearList$Statistic,"([0-9]+)"))
USpop.YearList$Statistic<-gsub("([0-9])|(_)","",USpop.YearList$Statistic)
#Rename value column to specifically address state/terr level data.
colnames(USpop.YearList)[6]<-"StateValue"
head(USpop.YearList)Let’s do the same for division information.
USdiv.YearList<-gather(USpop[2:5,],"Statistic","Value",7:length(colnames(USpop)))
#Remove first two columns.
USdiv.YearList[1:2]<-NULL
#We also have no need for state or division information at Region level.
USdiv.YearList[2:3]<-NULL
#Rename NAME column.
colnames(USdiv.YearList)[2]<-"RegionName"
#Rename Value column in anticipation of merge with state data.
colnames(USdiv.YearList)[4]<-"RegionValue"
#Clean year and statistic.
USdiv.YearList$Year<-as.numeric(str_extract(USdiv.YearList$Statistic,"([0-9]+)"))
USdiv.YearList$Statistic<-gsub("([0-9])|(_)","",USdiv.YearList$Statistic)
#Shorten RegionName.
USdiv.YearList$RegionName<-gsub("( Region)","",USdiv.YearList$RegionName)
head(USdiv.YearList)Nice. Let’s merge by REGION number, Year, and Statistic. We must use full_join in order to keep Puerto Rico, which is omitted from Region categorization.
USpop2<-full_join(USdiv.YearList,USpop.YearList,by=c("REGION","Statistic","Year"))
head(USpop2)We need to pair this with a list of divisions, which differs from Region. A list exists on wikipedia; I’ve edited from https://en.wikipedia.org/wiki/List_of_regions_of_the_United_States.
DIVISION<-as.factor(c(1,2,3,4,5,6,7,8,9))
DivisionName<-c("New England","Mid-Atlantic","East North Central","West North Central","South Atlantic","East South Central","West South Central","Mountain","Pacific")
Divisions<-data.frame(DIVISION,DivisionName)Merge Divisions with master dataset and check.
USpop3<-full_join(Divisions,USpop2,by=c("DIVISION"))
head(USpop3)Let’s look at Puerto Rico in particular.
PR<-USpop3[which(USpop3$NAME == "Puerto Rico"),]
head(PR)Puerto Rico is not ranked among the other states and has no division assignment. This is problematic for anyone completing comparative analysis on a national level. Since puerto rico has been assigned no region, there is no regional data available. Let’s recode NA as Puerto Rico in RegionName.
USpop3$RegionName[which(USpop3$NAME=="Puerto Rico")]<-"Puerto Rico"
#look at Puerto Rico again, replace subset.
PR<-USpop3[which(USpop3$NAME == "Puerto Rico"),]
head(PR)Let’s add a column for National statistics.
#National data only.
USpop.Nat<-USpop[1,3:ncol(USpop)]
#gather, as before
USpop.Nat<-gather(USpop.Nat,"Statistic","Value",5:length(colnames(USpop.Nat)))
#Clean year and statistic.
USpop.Nat$Year<-as.numeric(str_extract(USpop.Nat$Statistic,"([0-9]+)"))
USpop.Nat$Statistic<-gsub("([0-9])|(_)","",USpop.Nat$Statistic)
#omit columns and unnecessary rows.
USpop.Nat<-USpop.Nat[,5:7]
USpop.Nat<-USpop.Nat[which(USpop.Nat$Value!="X"),]
#rename coluns in preparation for merge.
colnames(USpop.Nat)[2]<-"NationalValue"
#merge into master data.
USpop4<-full_join(USpop.Nat,USpop3,by=c("Statistic","Year"))
head(USpop4)Let’s sort the data by type of statistic.
USpopESTIMATESBASE<-USpop4[which(USpop4$Statistic=="ESTIMATESBASE"),]
USpopESTIMATE<-USpop4[which(USpop4$Statistic=="POPESTIMATE"),]
USpopPOPCHG<-USpop4[which(USpop4$Statistic=="NPOPCHG"),]
USpopPPOPCHG<-USpop4[which(USpop4$Statistic=="PPOPCHG"),]Finally, let’s map our results. Let’s map population increases in 2018.
USpopPOPCHG2018<-USpop4[which(USpop4$Statistic=="NPOPCHG"&USpop4$Year==2018),]
USpopPOPCHG2018$StateValue<-as.numeric(USpopPOPCHG2018$StateValue)
USpopPOPCHG2018$StateValue[which(USpopPOPCHG2018$StateValue<0)]<-0
library(ggplot2)
library(fiftystater)
library(ggthemes)
library(tidyverse)
library(viridis)
USpopPOPCHG2018$statefull<-tolower(USpopPOPCHG2018$NAME)
data("fifty_states")
library(mapdata)
library(mapproj)
#Puerto Rico color must be set manually; there has been a decrease in population, so we've matched the color representing zero growth.
pr<-map_data('worldHires','Puerto Rico')
pr<-subset(pr,long<0)
prmap<-ggplot(USpopPOPCHG2018)+geom_polygon(data=pr,aes(long,lat,group=group),fill="lemonchiffon1")+
coord_fixed(1.0)+
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map(projection = "mercator", xlim = c(-68, -65), ylim = c(18.6,17.8))+
labs(x = "", y = "") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
rect = element_blank())
Total_plot<-ggplot(USpopPOPCHG2018, aes(map_id=statefull)) +
geom_map(aes(fill=StateValue), map=fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map(projection = "mercator", xlim = c(-125, -65), ylim = c(50,23)) +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
rect = element_blank())+
scale_fill_viridis(breaks=c(-100000,-25000,-5000,0,5000,25000,100000,300000),
labels=c('-100K--25K','-25K--5K','-25K-0','0-5000','5k-25k','25K-100K','100K-300K','300K+'),begin=1,end=.25,option="magma")+
guides(fill=guide_legend(title="Growth by state/terr",size="legend",title.theme=element_text(size=9,angle=0)))+
ggtitle("Population Increases by State, 2018")
library(grid)
library(grDevices)
png(file="C://MSDS/Project2a.png",w=4000,h=4000,res=500,bg="transparent")
grid.newpage()
v1<-viewport(width = 1, height = 1, x = 0.5, y = 0.5) #plot area for the main map
v4<-viewport(width = 0.12, height = 0.12, x = 0.48, y = 0.30) #plot area for the inset map)
print(Total_plot,vp=v1)
print(prmap,vp=v4)
dev.off()## png
## 2
knitr::include_graphics("/MSDS/Project2a.png")The greatest estimated population increase in 2018, by far, occurred in Texas and Florida.
Let’s map population losses.
USpopPOPCHG2018<-USpop4[which(USpop4$Statistic=="NPOPCHG"&USpop4$Year==2018),]
USpopPOPCHG2018$StateValue<-as.numeric(USpopPOPCHG2018$StateValue)
USpopPOPCHG2018$StateValue[which(USpopPOPCHG2018$StateValue>0)]<-0
USpopPOPCHG2018$statefull<-tolower(USpopPOPCHG2018$NAME)
data("fifty_states")
pr<-map_data('worldHires','Puerto Rico')
pr<-subset(pr,long<0)
prmap<-ggplot(USpopPOPCHG2018)+geom_polygon(data=pr,aes(long,lat,group=group),fill="khaki3")+
coord_fixed(1.0)+
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map(projection = "mercator", xlim = c(-68, -65), ylim = c(18.6,17.8))+
labs(x = "", y = "") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
rect = element_blank())
Total_plot<-ggplot(USpopPOPCHG2018, aes(map_id=statefull)) +
geom_map(aes(fill=StateValue), map=fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map(projection = "mercator", xlim = c(-125, -65), ylim = c(50,23)) +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
rect = element_blank())+
scale_fill_viridis(breaks=c(-300000,-100000,-25000,-5000,0),
labels=c('300K+','100K-300K','25K-100K','5K-25K','0-25K'),begin=1,end=0,option="cividis")+
guides(fill=guide_legend(title="Decrease pop by state/terr",size="legend",title.theme=element_text(size=9,angle=0)))+
ggtitle("Population Decreases by State, 2018")
library(grid)
library(grDevices)
png(file="C://MSDS/Project2b.png",w=4000,h=4000,res=500,bg="transparent")
grid.newpage()
v1<-viewport(width = 1, height = 1, x = 0.5, y = 0.5) #plot area for the main map
v4<-viewport(width = 0.12, height = 0.12, x = 0.48, y = 0.30) #plot area for the inset map)
print(Total_plot,vp=v1)
print(prmap,vp=v4)
dev.off()## png
## 2
knitr::include_graphics("/MSDS/Project2b.png") It’s easy to see that West Virginia and Louisiana is estimated to have experienced a decrease in population in 2018, as well as Illinois and New York. Puerto Rico is estimated to have experienced the greatest decrease in population.
Let’s calculate percentage of national and division population resides in each state or territory using projected population data for 2018. Let’s map percentage of national population by state.
USpopESTIMATE2018<-USpop4[which(USpopESTIMATE$Year==2018),]
USpopESTIMATE2018$StateValue<-as.numeric(USpopESTIMATE2018$StateValue)
USpopESTIMATE2018$RegionValue<-as.numeric(USpopESTIMATE2018$RegionValue)
USpopESTIMATE2018$NationalValue<-as.numeric(USpopESTIMATE2018$NationalValue)
USpopESTIMATE2018$NatPer <- USpopESTIMATE2018$StateValue/USpopESTIMATE2018$NationalValue
USpopESTIMATE2018$RegPer <- USpopESTIMATE2018$StateValue/USpopESTIMATE2018$RegionValue
USpopESTIMATE2018$statefull<-tolower(USpopESTIMATE2018$NAME)
data("fifty_states")
pr<-map_data('worldHires','Puerto Rico')
pr<-subset(pr,long<0)
prmap<-ggplot(USpopESTIMATE2018)+geom_polygon(data=pr,aes(long,lat,group=group),fill="grey49")+
coord_fixed(1.0)+
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map(projection = "mercator", xlim = c(-68, -65), ylim = c(18.6,17.8))+
labs(x = "", y = "") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
rect = element_blank())
Total_plot<-ggplot(USpopESTIMATE2018, aes(map_id=statefull)) +
geom_map(aes(fill=NatPer), map=fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map(projection = "mercator", xlim = c(-125, -65), ylim = c(50,23)) +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
rect = element_blank())+
scale_fill_viridis(breaks=c(.05,.1,.15,.20),
labels=c('0-5%','6-10%','11-15%','15% +'),begin=.3,end=1,option="cividis")+
guides(fill=guide_legend(title="Nat Pop % by state/terr",size="legend",title.theme=element_text(size=9,angle=0)))+
ggtitle("National Population Percentage by State, 2018")
library(grid)
library(grDevices)
png(file="C://MSDS/Project2c.png",w=4000,h=4000,res=500,bg="transparent")
grid.newpage()
v1<-viewport(width = 1, height = 1, x = 0.5, y = 0.5) #plot area for the main map
v4<-viewport(width = 0.12, height = 0.12, x = 0.48, y = 0.30) #plot area for the inset map)
print(Total_plot,vp=v1)
print(prmap,vp=v4)
dev.off()## png
## 2
knitr::include_graphics("/MSDS/Project2c.png")It’s apparent that the states with greatest populations are California, Texas, Florida and New York. With New York’s estimated population decreasing, it’s curious that it still accounts for one of the greatest percentages of national population.
Let’s map state percentage by region.
data("fifty_states")
pr<-map_data('worldHires','Puerto Rico')
pr<-subset(pr,long<0)
prmap<-ggplot(USpopESTIMATE2018)+geom_polygon(data=pr,aes(long,lat,group=group),fill="grey98")+
coord_fixed(1.0)+
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map(projection = "mercator", xlim = c(-68, -65), ylim = c(18.6,17.8))+
labs(x = "", y = "") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
rect = element_blank())
Total_plot<-ggplot(USpopESTIMATE2018, aes(map_id=statefull)) +
geom_map(aes(fill=RegPer), map=fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map(projection = "mercator", xlim = c(-125, -65), ylim = c(50,23)) +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
rect = element_blank())+
scale_fill_viridis(breaks=c(.05,.1,.15,.20),
labels=c('0-5%','6-10%','11-15%','15% +'),begin=.3,end=1,option="cividis")+
guides(fill=guide_legend(title="Region Pop % by state/terr",size="legend",title.theme=element_text(size=9,angle=0)))+
ggtitle("Regional Population Percentage by State, 2018")
library(grid)
library(grDevices)
png(file="C://MSDS/Project2d.png",w=4000,h=4000,res=500,bg="transparent")
grid.newpage()
v1<-viewport(width = 1, height = 1, x = 0.5, y = 0.5) #plot area for the main map
v4<-viewport(width = 0.12, height = 0.12, x = 0.48, y = 0.30) #plot area for the inset map)
print(Total_plot,vp=v1)
print(prmap,vp=v4)
dev.off()## png
## 2
knitr::include_graphics("/MSDS/Project2d.png") Omitting Puerto Rico due to the fact that there is no region or division assignment, we see that California, as expected, dominates the West Region, while New York dominates the Northeast to a lesser degree. Texas is most populous in the South, and Illinois carries the Midwest by only a slight margin.
Such data could be used to form an argument for representation in Congress. Should a state such as California, with 12.1% of the national population, be represented by only 2 senators?