7.3 加強基本圖表
library(ggplot2)
# Load our data, which lives in intl.csv
intl = read.csv("data/intl.csv")
str(intl)
'data.frame': 8 obs. of 2 variables:
$ Region : Factor w/ 8 levels "Africa","Asia",..: 2 3 6 4 5 1 7 8
$ PercentOfIntl: num 0.531 0.201 0.098 0.09 0.054 0.02 0.015 0.002
7.3.1 Bar Plot with Quantities
# We want to make a bar plot with region on the X axis
# and Percentage on the y-axis.
ggplot(intl, aes(x=Region, y=PercentOfIntl)) +
geom_bar(stat="identity") +
geom_text(aes(label=PercentOfIntl)) #條狀圖

7.3.2 Reorder by Column
# Make Region an ordered factor
# We can do this with the re-order command and transform command.
intl = transform(intl, Region = reorder(Region, -PercentOfIntl))
# Make the percentages out of 100 instead of fractions
intl$PercentOfIntl = intl$PercentOfIntl * 100 #百分比
# Make the plot
ggplot(intl, aes(x=Region, y=PercentOfIntl)) +
geom_bar(stat="identity", fill="dark blue") +
geom_text(aes(label=PercentOfIntl), vjust=-0.4) +
ylab("Percent of International Students") +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))

7.3 全球國際學生數量
7.3.3 Data for Intl’ Students
library(ggmap)
# Load in the international student data
intlall = read.csv("data/intlall.csv",stringsAsFactors=FALSE)
# Lets look at the first few rows
# head(intlall)
# Those NAs are really 0s, and we can replace them easily
intlall[is.na(intlall)] = 0
# Now lets look again
head(intlall)
7.3.4 World Map
# Load the world map
world_map = map_data("world")
str(world_map)
'data.frame': 99338 obs. of 6 variables:
$ long : num -69.9 -69.9 -69.9 -70 -70.1 ...
$ lat : num 12.5 12.4 12.4 12.5 12.5 ...
$ group : num 1 1 1 1 1 1 1 1 1 1 ...
$ order : int 1 2 3 4 5 6 7 8 9 10 ...
$ region : chr "Aruba" "Aruba" "Aruba" "Aruba" ...
$ subregion: chr NA NA NA NA ...
7.3.5 Merge Map with Data
# Lets merge intlall into world_map using the merge command
world_map = merge(world_map, intlall, by.x ="region", by.y = "Citizenship") #以x的region y的Citizenship做合併
str(world_map)
'data.frame': 63634 obs. of 12 variables:
$ region : chr "Albania" "Albania" "Albania" "Albania" ...
$ long : num 20.5 20.4 19.5 20.5 20.4 ...
$ lat : num 41.3 39.8 42.5 40.1 41.5 ...
$ group : num 6 6 6 6 6 6 6 6 6 6 ...
$ order : int 789 822 870 815 786 821 818 779 879 795 ...
$ subregion : chr NA NA NA NA ...
$ UG : num 3 3 3 3 3 3 3 3 3 3 ...
$ G : num 1 1 1 1 1 1 1 1 1 1 ...
$ SpecialUG : num 0 0 0 0 0 0 0 0 0 0 ...
$ SpecialG : num 0 0 0 0 0 0 0 0 0 0 ...
$ ExhangeVisiting: num 0 0 0 0 0 0 0 0 0 0 ...
$ Total : int 4 4 4 4 4 4 4 4 4 4 ...
7.3.6 Plot the Map
ggplot(world_map, aes(x=long, y=lat, group=group)) +
geom_polygon(fill="white", color="black") +
coord_map("mercator")

7.3.7 Polygon points need to be ordered by Group
# Reorder the data
world_map = world_map[order(world_map$group, world_map$order),]
# Redo the plot
ggplot(world_map, aes(x=long, y=lat, group=group)) +
geom_polygon(fill="white", color="black")

# + coord_map("mercator") #不將各個點都相連,只連最外圈
7.3.8 Identify and Fix Mismatchs between Map and Data
# Lets look for China
grep("China", intlall$Citizenship, ignore.case=T, value=T)
[1] "China (People's Republic Of)"
grep("China", unique(map_data("world")$region), ignore.case=T, value=T)
[1] "China"
#地圖在中國那塊缺了
#兩個表的名字不同
# Lets "fix" that in the intlall dataset
intlall$Citizenship[intlall$Citizenship=="China (People's Republic Of)"] =
"China"
# We'll repeat our merge and order from before
world_map = merge(map_data("world"), intlall,
by.x ="region",
by.y = "Citizenship")
world_map = world_map[order(world_map$group, world_map$order),]
ggplot(world_map, aes(x=long, y=lat, group=group)) +
geom_polygon(aes(fill=Total), color="black") #+

#coord_map("mercator")
7.3.9 Different Orientations
# We can try other projections - this one is visually interesting
ggplot(world_map, aes(x=long, y=lat, group=group)) +
geom_polygon(aes(fill=Total), color="black") +
coord_map("ortho", orientation=c(20, 30, 0))

ggplot(world_map, aes(x=long, y=lat, group=group)) +
geom_polygon(aes(fill=Total), color="black") +
coord_map("ortho", orientation=c(-37, 175, 0))
7.3 資料結構轉換
7.3.10 Reshaping before Ploting
library(ggplot2)
library(reshape2)
# Now lets load our dataframe
households = read.csv("data/households.csv")
str(households)
'data.frame': 8 obs. of 7 variables:
$ Year : int 1970 1980 1990 1995 2000 2005 2010 2012
$ MarriedWChild : num 40.3 30.9 26.3 25.5 24.1 22.9 20.9 19.6
$ MarriedWOChild: num 30.3 29.9 29.8 28.9 28.7 28.3 28.8 29.1
$ OtherFamily : num 10.6 12.9 14.8 15.6 16 16.7 17.4 17.8
$ MenAlone : num 5.6 8.6 9.7 10.2 10.7 11.3 11.9 12.3
$ WomenAlone : num 11.5 14 14.9 14.7 14.8 15.3 14.8 15.2
$ OtherNonfamily: num 1.7 3.6 4.6 5 5.7 5.6 6.2 6.1
# Plot it
melt(households, id="Year") %>%
ggplot(aes(x=Year, y=value, color=variable)) +
geom_line(size=2) + geom_point(size=5) +
ylab("Percentage of Households")

LS0tDQp0aXRsZTogIkFTNy0wQyDkuJbnlYzlnLDlnJYiDQphdXRob3I6ICLmnY7liq3nq5EgTTA2NDAyMDAyMyINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCjxicj4NCg0KYGBge3IgZWNobz1ULCBtZXNzYWdlPUYsIGNhY2hlPUYsIHdhcm5pbmc9Rn0NClN5cy5zZXRsb2NhbGUoIkxDX0FMTCIsIkMiKQ0Kcm0obGlzdD1scyhhbGw9VCkpDQpvcHRpb25zKGRpZ2l0cz00LCBzY2lwZW49MTIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShtYXBzKQ0KbGlicmFyeShnZ21hcCkNCmxpYnJhcnkocmVzaGFwZTIpDQpgYGANCg0KLSAtIC0NCg0KIyMjIDcuMyDliqDlvLfln7rmnKzlnJbooagNCg0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQoNCiMgTG9hZCBvdXIgZGF0YSwgd2hpY2ggbGl2ZXMgaW4gaW50bC5jc3YNCmludGwgPSByZWFkLmNzdigiZGF0YS9pbnRsLmNzdiIpDQpzdHIoaW50bCkNCmBgYA0KDQojIyMjIyA3LjMuMSBCYXIgUGxvdCB3aXRoIFF1YW50aXRpZXMNCmBgYHtyfQ0KIyBXZSB3YW50IHRvIG1ha2UgYSBiYXIgcGxvdCB3aXRoIHJlZ2lvbiBvbiB0aGUgWCBheGlzDQojIGFuZCBQZXJjZW50YWdlIG9uIHRoZSB5LWF4aXMuDQpnZ3Bsb3QoaW50bCwgYWVzKHg9UmVnaW9uLCB5PVBlcmNlbnRPZkludGwpKSArDQogIGdlb21fYmFyKHN0YXQ9ImlkZW50aXR5IikgKw0KICBnZW9tX3RleHQoYWVzKGxhYmVsPVBlcmNlbnRPZkludGwpKSAgICAj5qKd54uA5ZyWDQpgYGANCg0KIyMjIyMgNy4zLjIgUmVvcmRlciBieSBDb2x1bW4NCmBgYHtyfQ0KIyBNYWtlIFJlZ2lvbiBhbiBvcmRlcmVkIGZhY3Rvcg0KIyBXZSBjYW4gZG8gdGhpcyB3aXRoIHRoZSByZS1vcmRlciBjb21tYW5kIGFuZCB0cmFuc2Zvcm0gY29tbWFuZC4gDQppbnRsID0gdHJhbnNmb3JtKGludGwsIFJlZ2lvbiA9IHJlb3JkZXIoUmVnaW9uLCAtUGVyY2VudE9mSW50bCkpIA0KDQojIE1ha2UgdGhlIHBlcmNlbnRhZ2VzIG91dCBvZiAxMDAgaW5zdGVhZCBvZiBmcmFjdGlvbnMNCmludGwkUGVyY2VudE9mSW50bCA9IGludGwkUGVyY2VudE9mSW50bCAqIDEwMCAgI+eZvuWIhuavlA0KDQojIE1ha2UgdGhlIHBsb3QNCmdncGxvdChpbnRsLCBhZXMoeD1SZWdpb24sIHk9UGVyY2VudE9mSW50bCkpICsNCiAgZ2VvbV9iYXIoc3RhdD0iaWRlbnRpdHkiLCBmaWxsPSJkYXJrIGJsdWUiKSArDQogIGdlb21fdGV4dChhZXMobGFiZWw9UGVyY2VudE9mSW50bCksIHZqdXN0PS0wLjQpICsNCiAgeWxhYigiUGVyY2VudCBvZiBJbnRlcm5hdGlvbmFsIFN0dWRlbnRzIikgKw0KICB0aGVtZShheGlzLnRpdGxlLnggPSBlbGVtZW50X2JsYW5rKCksIA0KICAgICAgICBheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LCBoanVzdCA9IDEpKQ0KYGBgDQo8YnI+DQoNCi0gLSAtDQoNCiMjIyA3LjMg5YWo55CD5ZyL6Zqb5a2455Sf5pW46YePDQoNCiMjIyMjIDcuMy4zIERhdGEgZm9yIEludGwnIFN0dWRlbnRzDQpgYGB7cn0NCmxpYnJhcnkoZ2dtYXApDQoNCiMgTG9hZCBpbiB0aGUgaW50ZXJuYXRpb25hbCBzdHVkZW50IGRhdGENCmludGxhbGwgPSByZWFkLmNzdigiZGF0YS9pbnRsYWxsLmNzdiIsc3RyaW5nc0FzRmFjdG9ycz1GQUxTRSkNCg0KIyBMZXRzIGxvb2sgYXQgdGhlIGZpcnN0IGZldyByb3dzDQojIGhlYWQoaW50bGFsbCkNCg0KIyBUaG9zZSBOQXMgYXJlIHJlYWxseSAwcywgYW5kIHdlIGNhbiByZXBsYWNlIHRoZW0gZWFzaWx5DQppbnRsYWxsW2lzLm5hKGludGxhbGwpXSA9IDANCg0KIyBOb3cgbGV0cyBsb29rIGFnYWluDQpoZWFkKGludGxhbGwpIA0KYGBgDQoNCiMjIyMjIDcuMy40IFdvcmxkIE1hcA0KYGBge3J9DQojIExvYWQgdGhlIHdvcmxkIG1hcA0Kd29ybGRfbWFwID0gbWFwX2RhdGEoIndvcmxkIikNCnN0cih3b3JsZF9tYXApDQpgYGANCg0KIyMjIyMgNy4zLjUgTWVyZ2UgTWFwIHdpdGggRGF0YQ0KYGBge3J9DQojIExldHMgbWVyZ2UgaW50bGFsbCBpbnRvIHdvcmxkX21hcCB1c2luZyB0aGUgbWVyZ2UgY29tbWFuZA0Kd29ybGRfbWFwID0gbWVyZ2Uod29ybGRfbWFwLCBpbnRsYWxsLCBieS54ID0icmVnaW9uIiwgYnkueSA9ICJDaXRpemVuc2hpcCIpICAgI+S7pXjnmoRyZWdpb24geeeahENpdGl6ZW5zaGlw5YGa5ZCI5L21DQpzdHIod29ybGRfbWFwKQ0KYGBgDQoNCiMjIyMjIDcuMy42IFBsb3QgdGhlIE1hcA0KYGBge3J9DQpnZ3Bsb3Qod29ybGRfbWFwLCBhZXMoeD1sb25nLCB5PWxhdCwgZ3JvdXA9Z3JvdXApKSArDQogIGdlb21fcG9seWdvbihmaWxsPSJ3aGl0ZSIsIGNvbG9yPSJibGFjayIpICsNCiAgY29vcmRfbWFwKCJtZXJjYXRvciIpDQpgYGANCg0KIyMjIyMgNy4zLjcgUG9seWdvbiBwb2ludHMgbmVlZCB0byBiZSBvcmRlcmVkIGJ5IEdyb3VwDQpgYGB7cn0NCiMgUmVvcmRlciB0aGUgZGF0YQ0Kd29ybGRfbWFwID0gd29ybGRfbWFwW29yZGVyKHdvcmxkX21hcCRncm91cCwgd29ybGRfbWFwJG9yZGVyKSxdDQoNCiMgUmVkbyB0aGUgcGxvdA0KZ2dwbG90KHdvcmxkX21hcCwgYWVzKHg9bG9uZywgeT1sYXQsIGdyb3VwPWdyb3VwKSkgKw0KICBnZW9tX3BvbHlnb24oZmlsbD0id2hpdGUiLCBjb2xvcj0iYmxhY2siKQ0KICAjICsgY29vcmRfbWFwKCJtZXJjYXRvciIpICPkuI3lsIflkITlgIvpu57pg73nm7jpgKPvvIzlj6rpgKPmnIDlpJblnIgNCmBgYA0KDQojIyMjIyA3LjMuOCBJZGVudGlmeSBhbmQgRml4IE1pc21hdGNocyBiZXR3ZWVuIE1hcCBhbmQgRGF0YQ0KYGBge3J9DQojIExldHMgbG9vayBmb3IgQ2hpbmENCmdyZXAoIkNoaW5hIiwgaW50bGFsbCRDaXRpemVuc2hpcCwgaWdub3JlLmNhc2U9VCwgdmFsdWU9VCkNCmdyZXAoIkNoaW5hIiwgdW5pcXVlKG1hcF9kYXRhKCJ3b3JsZCIpJHJlZ2lvbiksIGlnbm9yZS5jYXNlPVQsIHZhbHVlPVQpDQoj5Zyw5ZyW5Zyo5Lit5ZyL6YKj5aGK57y65LqGDQoj5YWp5YCL6KGo55qE5ZCN5a2X5LiN5ZCMDQpgYGANCg0KYGBge3J9DQojIExldHMgImZpeCIgdGhhdCBpbiB0aGUgaW50bGFsbCBkYXRhc2V0DQppbnRsYWxsJENpdGl6ZW5zaGlwW2ludGxhbGwkQ2l0aXplbnNoaXA9PSJDaGluYSAoUGVvcGxlJ3MgUmVwdWJsaWMgT2YpIl0gPSANCiAgIkNoaW5hIg0KDQojIFdlJ2xsIHJlcGVhdCBvdXIgbWVyZ2UgYW5kIG9yZGVyIGZyb20gYmVmb3JlDQp3b3JsZF9tYXAgPSBtZXJnZShtYXBfZGF0YSgid29ybGQiKSwgaW50bGFsbCwgDQogICAgICAgICAgICAgICAgICBieS54ID0icmVnaW9uIiwNCiAgICAgICAgICAgICAgICAgIGJ5LnkgPSAiQ2l0aXplbnNoaXAiKQ0Kd29ybGRfbWFwID0gd29ybGRfbWFwW29yZGVyKHdvcmxkX21hcCRncm91cCwgd29ybGRfbWFwJG9yZGVyKSxdDQoNCmdncGxvdCh3b3JsZF9tYXAsIGFlcyh4PWxvbmcsIHk9bGF0LCBncm91cD1ncm91cCkpICsNCiAgZ2VvbV9wb2x5Z29uKGFlcyhmaWxsPVRvdGFsKSwgY29sb3I9ImJsYWNrIikgIysNCiAgI2Nvb3JkX21hcCgibWVyY2F0b3IiKQ0KYGBgDQoNCiMjIyMjIDcuMy45IERpZmZlcmVudCBPcmllbnRhdGlvbnMNCmBgYHtyfQ0KIyBXZSBjYW4gdHJ5IG90aGVyIHByb2plY3Rpb25zIC0gdGhpcyBvbmUgaXMgdmlzdWFsbHkgaW50ZXJlc3RpbmcNCmdncGxvdCh3b3JsZF9tYXAsIGFlcyh4PWxvbmcsIHk9bGF0LCBncm91cD1ncm91cCkpICsNCiAgZ2VvbV9wb2x5Z29uKGFlcyhmaWxsPVRvdGFsKSwgY29sb3I9ImJsYWNrIikgKw0KICBjb29yZF9tYXAoIm9ydGhvIiwgb3JpZW50YXRpb249YygyMCwgMzAsIDApKQ0KYGBgDQoNCmBgYHtyfQ0KZ2dwbG90KHdvcmxkX21hcCwgYWVzKHg9bG9uZywgeT1sYXQsIGdyb3VwPWdyb3VwKSkgKw0KICBnZW9tX3BvbHlnb24oYWVzKGZpbGw9VG90YWwpLCBjb2xvcj0iYmxhY2siKSArDQogIGNvb3JkX21hcCgib3J0aG8iLCBvcmllbnRhdGlvbj1jKC0zNywgMTc1LCAwKSkNCmBgYA0KPGJyPg0KDQotIC0gLQ0KDQojIyMgNy4zIOizh+aWmee1kOani+i9ieaPmw0KDQojIyMjIyA3LjMuMTAgUmVzaGFwaW5nIGJlZm9yZSBQbG90aW5nDQpgYGB7cn0NCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkocmVzaGFwZTIpDQojIE5vdyBsZXRzIGxvYWQgb3VyIGRhdGFmcmFtZQ0KaG91c2Vob2xkcyA9IHJlYWQuY3N2KCJkYXRhL2hvdXNlaG9sZHMuY3N2IikNCnN0cihob3VzZWhvbGRzKQ0KYGBgDQoNCmBgYHtyfQ0KIyBQbG90IGl0DQptZWx0KGhvdXNlaG9sZHMsIGlkPSJZZWFyIikgJT4lIA0KICBnZ3Bsb3QoYWVzKHg9WWVhciwgeT12YWx1ZSwgY29sb3I9dmFyaWFibGUpKSArDQogIGdlb21fbGluZShzaXplPTIpICsgZ2VvbV9wb2ludChzaXplPTUpICsgIA0KICB5bGFiKCJQZXJjZW50YWdlIG9mIEhvdXNlaG9sZHMiKQ0KYGBgDQo8YnI+DQoNCi0gLSAtDQoNCjxicj48YnI+PGJyPjxicj48YnI+DQoNCjxzdHlsZT4NCi5jYXB0aW9uIHsNCiAgY29sb3I6ICM3Nzc7DQogIG1hcmdpbi10b3A6IDEwcHg7DQp9DQpwIGNvZGUgew0KICB3aGl0ZS1zcGFjZTogaW5oZXJpdDsNCn0NCnByZSB7DQogIHdvcmQtYnJlYWs6IG5vcm1hbDsNCiAgd29yZC13cmFwOiBub3JtYWw7DQogIGxpbmUtaGVpZ2h0OiAxOw0KfQ0KcHJlIGNvZGUgew0KICB3aGl0ZS1zcGFjZTogaW5oZXJpdDsNCn0NCnAsbGkgew0KICBmb250LWZhbWlseTogIlRyZWJ1Y2hldCBNUyIsICLlvq7ou5/mraPpu5Hpq5QiLCAiTWljcm9zb2Z0IEpoZW5nSGVpIjsNCn0NCg0KLnJ7DQogIGxpbmUtaGVpZ2h0OiAxLjI7DQp9DQoNCnRpdGxlew0KICBjb2xvcjogI2NjMDAwMDsNCiAgZm9udC1mYW1pbHk6ICJUcmVidWNoZXQgTVMiLCAi5b6u6Luf5q2j6buR6auUIiwgIk1pY3Jvc29mdCBKaGVuZ0hlaSI7DQp9DQoNCmJvZHl7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQpoMSxoMixoMyxoNCxoNXsNCiAgY29sb3I6ICMwMDg4MDA7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQpoM3sNCiAgY29sb3I6ICNiMzZiMDA7DQogIGJhY2tncm91bmQ6ICNmZmUwYjM7DQogIGxpbmUtaGVpZ2h0OiAyOw0KICBmb250LXdlaWdodDogYm9sZDsNCn0NCg0KaDV7DQogIGNvbG9yOiAjMDA2MDAwOw0KICBiYWNrZ3JvdW5kOiAjZmZmZmUwOw0KICBsaW5lLWhlaWdodDogMjsNCiAgZm9udC13ZWlnaHQ6IGJvbGQ7DQp9DQoNCmVtew0KICBjb2xvcjogIzAwMDBjMDsNCiAgYmFja2dyb3VuZDogI2YwZjBmMDsNCiAgfQ0KPC9zdHlsZT4NCg0K