数据采集
library(RCurl)
library(XML)
web<-"https://www.bls.gov/opub/ted/2018/1-point-4-million-people-unemployed-27-weeks-or-more-in-july-2018-22-7-percent-of-all-unemployed.htm"
webcode<- getURL(web)
webhtml<- htmlParse(webcode , asText = T)
tables<- readHTMLTable(webhtml, header = T , colClasses = c("character" , "FormattedNumber" ,"FormattedNumber" ,"FormattedNumber","FormattedNumber" ))
tables<- tables[[1]]
head(tables)
数据清理
names(tables)<- c("month" , "<5W", "5-14W","15-26W" ,">27W")
tables$month<-as.character(tables$month)
tables$month<- paste("01" , tables$month , sep = '-')
Sys.setlocale("LC_TIME","us")
[1] "English_United States.1252"
tables$month<- as.Date(tables$month, format = "%d-%b %Y")
head(tables)
library(tidyr)
tables<- gather(tables , key = "class" ,value = "values" , -1)
tables$values<- (tables$values)/1000000
head(tables)
一定要避开as.Date的坑
Sys.getlocale()
#[1] "LC_COLLATE=Chinese (Simplified)_China.936;LC_CTYPE=Chinese (Simplified)_China.936;LC_MONETARY=Chinese (Simplified)_China.936;LC_NUMERIC=C;LC_TIME=Chinese (Simplified)_China.936"
Sys.setlocale("LC_TIME","us")
#[1] "English_United States.1252"
Sys.getlocale()
#[1] "LC_COLLATE=Chinese (Simplified)_China.936;LC_CTYPE=Chinese (Simplified)_China.936;LC_MONETARY=Chinese (Simplified)_China.936;LC_NUMERIC=C;LC_TIME=English_United States.1252"
绘图
library(ggplot2)
theme_set(theme_classic())
mytheme<- theme(legend.position = "top" , legend.direction = "vertical" ,text = element_text(face = "bold"))
p<- ggplot(tables , aes(x = month, y = values , col = class, fill = class))
p+ geom_line(position = "stack" ,size = 1) +geom_area(position = "stack" ,alpha= 0.7) + ylab("Million") +xlab("Year")+mytheme + expand_limits(y = 0 ) +ggtitle("Number of unemployed by duration of unemployment, seasonally adjusted ") +scale_y_continuous(expand = c(0,0))

LS0tDQp0aXRsZTogImdncGxvdCBwcmFjdGljZTQtbGluZXBsb3QiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIOaVsOaNrumHh+mbhg0KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCmxpYnJhcnkoUkN1cmwpDQpsaWJyYXJ5KFhNTCkNCndlYjwtImh0dHBzOi8vd3d3LmJscy5nb3Yvb3B1Yi90ZWQvMjAxOC8xLXBvaW50LTQtbWlsbGlvbi1wZW9wbGUtdW5lbXBsb3llZC0yNy13ZWVrcy1vci1tb3JlLWluLWp1bHktMjAxOC0yMi03LXBlcmNlbnQtb2YtYWxsLXVuZW1wbG95ZWQuaHRtIg0Kd2ViY29kZTwtIGdldFVSTCh3ZWIpDQp3ZWJodG1sPC0gaHRtbFBhcnNlKHdlYmNvZGUgLCBhc1RleHQgPSBUKQ0KdGFibGVzPC0gcmVhZEhUTUxUYWJsZSh3ZWJodG1sLCBoZWFkZXIgPSAgVCAsIGNvbENsYXNzZXMgPSBjKCJjaGFyYWN0ZXIiICwgIkZvcm1hdHRlZE51bWJlciIgLCJGb3JtYXR0ZWROdW1iZXIiICwiRm9ybWF0dGVkTnVtYmVyIiwiRm9ybWF0dGVkTnVtYmVyIiApKQ0KdGFibGVzPC0gdGFibGVzW1sxXV0NCmhlYWQodGFibGVzKQ0KDQpgYGANCg0KIyDmlbDmja7muIXnkIYNCmBgYHtyfQ0KbmFtZXModGFibGVzKTwtIGMoIm1vbnRoIiAsICI8NVciLCAiNS0xNFciLCIxNS0yNlciICwiPjI3VyIpDQp0YWJsZXMkbW9udGg8LWFzLmNoYXJhY3Rlcih0YWJsZXMkbW9udGgpDQp0YWJsZXMkbW9udGg8LSBwYXN0ZSgiMDEiICwgdGFibGVzJG1vbnRoICwgc2VwID0gJy0nKQ0KU3lzLnNldGxvY2FsZSgiTENfVElNRSIsInVzIikgDQp0YWJsZXMkbW9udGg8LSBhcy5EYXRlKHRhYmxlcyRtb250aCwgZm9ybWF0ID0gIiVkLSViICVZIikNCmhlYWQodGFibGVzKQ0KbGlicmFyeSh0aWR5cikNCnRhYmxlczwtIGdhdGhlcih0YWJsZXMgLCBrZXkgPSAiY2xhc3MiICx2YWx1ZSA9ICJ2YWx1ZXMiICwgLTEpDQp0YWJsZXMkdmFsdWVzPC0gKHRhYmxlcyR2YWx1ZXMpLzEwMDAwMDANCmhlYWQodGFibGVzKQ0KYGBgDQoNCiMjIOS4gOWumuimgemBv+W8gGFzLkRhdGXnmoTlnZENCmBgYHtyfQ0KU3lzLmdldGxvY2FsZSgpDQojWzFdICJMQ19DT0xMQVRFPUNoaW5lc2UgKFNpbXBsaWZpZWQpX0NoaW5hLjkzNjtMQ19DVFlQRT1DaGluZXNlIChTaW1wbGlmaWVkKV9DaGluYS45MzY7TENfTU9ORVRBUlk9Q2hpbmVzZSAoU2ltcGxpZmllZClfQ2hpbmEuOTM2O0xDX05VTUVSSUM9QztMQ19USU1FPUNoaW5lc2UgKFNpbXBsaWZpZWQpX0NoaW5hLjkzNiINClN5cy5zZXRsb2NhbGUoIkxDX1RJTUUiLCJ1cyIpIA0KI1sxXSAiRW5nbGlzaF9Vbml0ZWQgU3RhdGVzLjEyNTIiDQogU3lzLmdldGxvY2FsZSgpDQojWzFdICJMQ19DT0xMQVRFPUNoaW5lc2UgKFNpbXBsaWZpZWQpX0NoaW5hLjkzNjtMQ19DVFlQRT1DaGluZXNlIChTaW1wbGlmaWVkKV9DaGluYS45MzY7TENfTU9ORVRBUlk9Q2hpbmVzZSAoU2ltcGxpZmllZClfQ2hpbmEuOTM2O0xDX05VTUVSSUM9QztMQ19USU1FPUVuZ2xpc2hfVW5pdGVkIFN0YXRlcy4xMjUyIg0KYGBgDQoNCiMg57uY5Zu+DQoNCmBgYHtyfQ0KbGlicmFyeShnZ3Bsb3QyKQ0KdGhlbWVfc2V0KHRoZW1lX2NsYXNzaWMoKSkNCm15dGhlbWU8LSB0aGVtZShsZWdlbmQucG9zaXRpb24gPSAidG9wIiAsIGxlZ2VuZC5kaXJlY3Rpb24gPSAidmVydGljYWwiICx0ZXh0ID0gZWxlbWVudF90ZXh0KGZhY2UgPSAiYm9sZCIpKQ0KcDwtIGdncGxvdCh0YWJsZXMgLCBhZXMoeCA9IG1vbnRoLCB5ID0gdmFsdWVzICwgY29sID0gY2xhc3MsIGZpbGwgPSBjbGFzcykpDQpwKyBnZW9tX2xpbmUocG9zaXRpb24gPSAic3RhY2siICxzaXplID0gMSkgK2dlb21fYXJlYShwb3NpdGlvbiA9ICJzdGFjayIgLGFscGhhPSAwLjcpICsgeWxhYigiTWlsbGlvbiIpICt4bGFiKCJZZWFyIikrbXl0aGVtZSArIGV4cGFuZF9saW1pdHMoeSA9IDAgKSArZ2d0aXRsZSgiTnVtYmVyIG9mIHVuZW1wbG95ZWQgYnkgZHVyYXRpb24gb2YgdW5lbXBsb3ltZW50LCBzZWFzb25hbGx5IGFkanVzdGVkICIpICtzY2FsZV95X2NvbnRpbnVvdXMoZXhwYW5kID0gYygwLDApKQ0KYGBgDQoNCg==