ggplot2-Exercise3

1

画出《ggplot2 Elegant Graphics for Data Analysis》书里面第86-87页的几张图，注意书提供的代码有瑕疵，请修正后执行。

失业率曲线：

require(ggplot2,quietly=T)
require(scales,quietly=T) #date_breaks函数所属的包

#scale_x_date中的limits属性的表现有点奇怪，设置某一年为起点，显示出来的却是其后的第三年

(unemp <- qplot(date, unemploy, data=economics, 
                geom="line", xlab = "", 
                ylab = "No. unemployed (1000s)") + 
   scale_x_date(limits=as.Date(c("1964-06-30","2009-06-30")),
                labels=date_format("%Y"), 
                breaks=date_breaks("5 years")) + 
   scale_y_continuous(breaks=seq(0,12000,2000))
)

plot of chunk ggplot2ex

加上纵线：

# 去掉前三位年代较早的总统

presid <- presidential[-(1:3), ]

# 党派执政开始的时间线

yrng <- range(economics$unemploy)
xrng <- range(economics$date)
unemp + geom_vline(aes(xintercept = as.numeric(start)), data = presid)

plot of chunk unnamed-chunk-1

绘制表示政党执政期间的透明色块：

#alpha=0.2应放在aes外面，否则会和fill属性一起进入legend。参考：

#http://stackoverflow.com/questions/11714951/remove-extra-legends-in-ggplot2

unemp + 
  geom_rect(aes(NULL, NULL, 
                xmin = start, xmax = end, 
                fill = party), 
            alpha = 0.2, 
            ymin = yrng[1], ymax = yrng[2], 
            data = presid) + 
  scale_fill_manual(values = c("Republican"="red", 
                               "Democratic"="blue"))

plot of chunk unnamed-chunk-2

加上总统名字：

last_plot() + 
  geom_text(aes(x = start, y = yrng[1], label = name), 
            data = presid, size = 3, hjust = 0, vjust = 0)

plot of chunk unnamed-chunk-3

加上图片说明：

#strwrap函数将字符串按40个字符分为一组，用换行符隔开

caption <- paste(strwrap("Unemployment rates in the US have
varied a lot over the years", 40), collapse="\n")

#在最初的失业率曲线图的右上角加上图片说明

#hjust=1、vjust=1表示字符串从原点开始在水平和垂直方向上调至右上角

unemp + 
  geom_text(aes(x, y, label = caption), 
            data = data.frame(x = xrng[2], y = yrng[2]), 
            hjust = 1, vjust = 1, size = 4)

plot of chunk unnamed-chunk-4

标出失业率最高点：

highest <- subset(economics, unemploy == max(unemploy))
unemp + 
  geom_point(data = highest, size = 3, 
             colour = "red", alpha = 0.5)

plot of chunk unnamed-chunk-5

2

在美国地图上画出us.cities数据集中所有城市的位置，用适当的方式表达其人口（例如散点的直径），如果发生遮盖的情况也请适当地处理使展现的信息更全面，更合理，更美观。

library(maps)
data(us.cities)

#去掉Alaska和Hawaii，只留下美国本土的城市

continental<-subset(us.cities,country.etc!="AK" & country.etc!="HI")

#将城市按人口由少到多排序，这就是它们所对应的点被画出的顺序

#这样做是为了避免人口多的城市的点被人口少的点覆盖

continental<-continental[order(continental$pop),]

#设置城市点坐标，设置背景灰度更高的主题theme，画出州界，标上标题

usmap<-ggplot(continental, aes(long, lat)) + labs(x="",y="") + 
  theme(panel.background=element_rect(fill='gray70')) + 
  borders("state",size=0.5) + 
  geom_text(aes(x, y, label = "American Cities' Population"), 
            data = data.frame(x = max(continental$long), 
                              y = max(continental$lat)), 
            hjust = 1, vjust = 1, size = 8)

#将人口的对数设置为连续色阶，并用不同的点形状表示城市类型（首府/非首府）

#设置图例标题、标签、图例顺序、标签顺序等

usmap + 
  geom_point(aes(shape=factor(capital), colour=pop), size=3) + 
  scale_colour_gradient("population", trans="log10", 
                        low="white",high="red3") + 
  scale_shape_discrete(name="city type", breaks=c(0,2), 
                     labels=c("others","capital cities")) +
  guides(colour = guide_colourbar(order = 1), 
         shape = guide_legend(order = 2, reverse=TRUE))

plot of chunk unnamed-chunk-6