2010 Census Data: http://pasdc.hbg.psu.edu/Data/PAStats/tabid/1014/Default.aspx
require(ggplot2)
county <- c('Adams', 'Allegheny', 'Armstrong', 'Beaver', 'Bedford', 'Berks', 'Blair',
'Bradford', 'Bucks', 'Butler', 'Cambria', 'Cameron', 'Carbon', 'Centre',
'Chester', 'Clarion', 'Clearfield', 'Clinton', 'Columbia', 'Crawford',
'Cumberland', 'Dauphin', 'Delaware', 'Elk', 'Erie', 'Fayette', 'Forest',
'Franklin', 'Fulton', 'Greene', 'Huntingdon', 'Indiana', 'Jefferson',
'Juniata', 'Lackawanna', 'Lancaster', 'Lawrence', 'Lebanon', 'Lehigh',
'Luzerne', 'Lycoming', 'McKean', 'Mercer', 'Mifflin', 'Monroe', 'Montgomery',
'Montour', 'Northampton', 'Northumberland', 'Perry', 'Philadelphia', 'Pike',
'Potter', 'Schuylkill', 'Snyder', 'Somerset', 'Sullivan', 'Susquehanna',
'Tioga', 'Union', 'Venango', 'Warren', 'Washington', 'Wayne',
'Westmoreland', 'Wyoming', 'York')
mage <- c(41.3, 41.3, 44.5, 44.4, 43.9, 39.1, 42, 43.4, 42, 41.5, 43.8, 48.2, 43.9, 28.7, 39.3,
39.4, 42.9, 38.5, 39.5, 41.7, 40.3, 39.4, 38.7, 45.1, 38.6, 43.6, 43, 40.1, 41.8, 41.1,
41.2, 38.3, 43, 40.9, 41.8, 38.2, 43.6, 41, 39.4, 42.5, 41.1, 41.5, 42.8, 42.5, 40.3,
40.6, 43.9, 40.9, 43.4, 41.1, 33.5, 43.7, 44.9, 43.2, 39.2, 44.3, 49.9, 45.1, 42.4,
38.5, 44.3, 45.1, 43.6, 45.9, 45.1, 42.3, 40.1)
dat <- data.frame(county = county, mage = mage)
summary(dat$mage)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 28.7 40.2 41.8 41.8 43.7 49.9
summ <- data.frame(dat[which(dat$county %in% c("Centre", "Lackawanna", "Philadelphia",
"Allegheny", "Perry", "Sullivan")),])
print(summ[order(summ$mage),])
## county mage
## 14 Centre 28.7
## 51 Philadelphia 33.5
## 50 Perry 41.1
## 2 Allegheny 41.3
## 35 Lackawanna 41.8
## 57 Sullivan 49.9
g <- ggplot(dat, aes(x=mage)) +
geom_histogram(aes(y = ..density..), binwidth = 1) +
geom_density(linetype = "dashed", col="lightslategray") +
geom_vline(xintercept=median(dat$mage), size=4, alpha=0.4, col="black") +
geom_vline(xintercept=dat[which(dat$county == "Perry"), "mage"],
col = "blue") +
geom_vline(xintercept=dat[which(dat$county == "Lackawanna"), "mage"],
col = "red") +
geom_vline(xintercept=dat[which(dat$county == "Allegheny"), "mage"],
col = "orange") +
geom_vline(xintercept=dat[which(dat$county == "Centre"), "mage"],
col = "green") +
geom_vline(xintercept=dat[which(dat$county == "Philadelphia"), "mage"],
col = "purple") +
geom_vline(xintercept=dat[which(dat$county == "Sullivan"), "mage"],
col = "yellow4") +
theme_bw() +
theme(axis.title.x = element_text(vjust=-0.45),
axis.title.y = element_text(vjust=1.2),
legend.position="none",
text = element_text(size=16, family="Times New Roman"),
axis.text.x = element_text(face = "bold", size=18)) +
annotate("text", x = dat[which(dat$county == "Perry"), "mage"]-1,
y = 0.19, color = "blue", size = 4,
label = "Perry", family="Times New Roman") +
annotate("text", x = dat[which(dat$county == "Lackawanna"), "mage"]+2,
y = 0.20, color = "red", size = 4,
label = "Lackawanna", family="Times New Roman") +
annotate("text", x = dat[which(dat$county == "Allegheny"), "mage"],
y = 0.21, color = "orange", size = 4,
label = "Allegheny", family="Times New Roman") +
annotate("text", x = dat[which(dat$county == "Centre"), "mage"],
y = 0.15, color = "darkgreen", size = 4,
label = "Centre", family="Times New Roman") +
annotate("text", x = dat[which(dat$county == "Philadelphia"), "mage"],
y = 0.15, color = "purple", size = 4,
label = "Philadelphia", family="Times New Roman") +
annotate("text", x = dat[which(dat$county == "Sullivan"), "mage"],
y = 0.15, color = "yellow4", size = 4,
label = "Sullivan", family="Times New Roman") +
annotate("text", x = median(dat$mage),
y = 0.05, color = "lightgray", size = 4,
label = "Median", family="Times New Roman") +
labs(x = "Median Age", title = "Median Age of Pennsylvania Counties")
plot(g)