Load Libraries
require(ggplot2)
require(maps)
require(ggmap)
Load and have a look at Data
WHO = read.csv("WHO.csv")
str(WHO)
## 'data.frame': 194 obs. of 13 variables:
## $ Country : Factor w/ 194 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Region : Factor w/ 6 levels "Africa","Americas",..: 3 4 1 4 1 2 2 4 6 4 ...
## $ Population : int 29825 3162 38482 78 20821 89 41087 2969 23050 8464 ...
## $ Under15 : num 47.4 21.3 27.4 15.2 47.6 ...
## $ Over60 : num 3.82 14.93 7.17 22.86 3.84 ...
## $ FertilityRate : num 5.4 1.75 2.83 NA 6.1 2.12 2.2 1.74 1.89 1.44 ...
## $ LifeExpectancy : int 60 74 73 82 51 75 76 71 82 81 ...
## $ ChildMortality : num 98.5 16.7 20 3.2 163.5 ...
## $ CellularSubscribers : num 54.3 96.4 99 75.5 48.4 ...
## $ LiteracyRate : num NA NA NA NA 70.1 99 97.8 99.6 NA NA ...
## $ GNI : num 1140 8820 8310 NA 5230 ...
## $ PrimarySchoolEnrollmentMale : num NA NA 98.2 78.4 93.1 91.1 NA NA 96.9 NA ...
## $ PrimarySchoolEnrollmentFemale: num NA NA 96.4 79.4 78.2 84.5 NA NA 97.5 NA ...
Make a plot with base R
plot(WHO$GNI, WHO$FertilityRate)
Same plot with ggplot, quite nicer
scatterplot = ggplot(WHO, aes(x = GNI, y = FertilityRate))
scatterplot + geom_point()
Use geom_line instead of geom_point
scatterplot + geom_line()
Go back to using geom_point
scatterplot + geom_point()
Change color and shape
scatterplot + geom_point(color = "blue", size = 3, shape = 17)
Experiment with color and shape
scatterplot + geom_point(color = "darkred", size = 3, shape = 8)
Set title
scatterplot + geom_point(colour = "blue", size = 3, shape = 17) + ggtitle("Fertility Rate vs. Gross National Income")
Create pdf file
fertilityGNIplot = scatterplot + geom_point(colour = "blue", size = 3, shape = 17) + ggtitle("Fertility Rate vs. Gross
National Income")
pdf("MyPlot.pdf")
print(fertilityGNIplot)
dev.off()
## png
## 2
Create svg file
svg("MyPlot.svg")
print(fertilityGNIplot)
dev.off()
## png
## 2
Same graph in red with stars for point marks
scatterplot + geom_point(colour = "blue", size = 3, shape = 17) + ggtitle("Fertility Rate vs. Gross National Income")
Color by region (categorical variable
ggplot(WHO, aes(x = GNI, y = FertilityRate, color = Region)) + geom_point()
Color by life expectancy (nominal variable)
ggplot(WHO, aes(x = GNI, y = FertilityRate, color = LifeExpectancy)) + geom_point()
Check relationship between FertilityRate and Under15
ggplot(WHO, aes(x = FertilityRate, y = Under15)) + geom_point()
Use log to obtain linear relationship
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + geom_point()
Perform a linear regression and check the model
mod = lm(Under15 ~ log(FertilityRate), data = WHO)
summary(mod)
##
## Call:
## lm(formula = Under15 ~ log(FertilityRate), data = WHO)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.3131 -1.7742 0.0446 1.7440 7.7174
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.6540 0.4478 17.09 <2e-16 ***
## log(FertilityRate) 22.0547 0.4175 52.82 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.65 on 181 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.9391, Adjusted R-squared: 0.9387
## F-statistic: 2790 on 1 and 181 DF, p-value: < 2.2e-16
Add regression line to our graph
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + geom_point() + stat_smooth(method = "lm")
Same with 0.99 confidence level
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + geom_point() + stat_smooth(method = "lm", level = 0.99)
Hide confidence level
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + geom_point() + stat_smooth(method = "lm", se = FALSE)
Colored regression line
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + geom_point() + stat_smooth(method = "lm", colour = "orange")
Load data and have a look
mvt = read.csv("mvt.csv", stringsAsFactors=FALSE)
str(mvt)
## 'data.frame': 191641 obs. of 3 variables:
## $ Date : chr "12/31/12 23:15" "12/31/12 22:00" "12/31/12 22:00" "12/31/12 22:00" ...
## $ Latitude : num 41.8 41.9 42 41.8 41.8 ...
## $ Longitude: num -87.6 -87.7 -87.8 -87.7 -87.6 ...
Add variables Date,Weekday, Hour
mvt$Date = strptime(mvt$Date, format="%m/%d/%y %H:%M")
mvt$Weekday = weekdays(mvt$Date)
mvt$Hour = mvt$Date$hour
str(mvt)
## 'data.frame': 191641 obs. of 5 variables:
## $ Date : POSIXlt, format: "2012-12-31 23:15:00" "2012-12-31 22:00:00" ...
## $ Latitude : num 41.8 41.9 42 41.8 41.8 ...
## $ Longitude: num -87.6 -87.7 -87.8 -87.7 -87.6 ...
## $ Weekday : chr "Monday" "Monday" "Monday" "Monday" ...
## $ Hour : int 23 22 22 22 21 20 20 20 19 18 ...
Check thefts per day
table(mvt$Weekday)
##
## Friday Monday Saturday Sunday Thursday Tuesday Wednesday
## 29284 27397 27118 26316 27319 26791 27416
Create variable WeekdayCounts to store this info
WeekdayCounts = as.data.frame(table(mvt$Weekday))
str(WeekdayCounts)
## 'data.frame': 7 obs. of 2 variables:
## $ Var1: Factor w/ 7 levels "Friday","Monday",..: 1 2 3 4 5 6 7
## $ Freq: int 29284 27397 27118 26316 27319 26791 27416
Make a plot
ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1))
Plot was messy becase order of days was alphabetical, use levels to fix it
WeekdayCounts$Var1 = factor(WeekdayCounts$Var1, ordered=TRUE, levels=c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"))
ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1))
Same graph but add labels
ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1)) + xlab("Day of the Week") + ylab("Total Motor
Vehicle Thefts")
Add hour variable
table(mvt$Weekday, mvt$Hour)
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## Friday 1873 932 743 560 473 602 839 1203 1268 1286 938 822
## Monday 1900 825 712 527 415 542 772 1123 1323 1235 971 737
## Saturday 2050 1267 985 836 652 508 541 650 858 1039 946 789
## Sunday 2028 1236 1019 838 607 461 478 483 615 864 884 787
## Thursday 1856 816 696 508 400 534 799 1135 1298 1301 932 731
## Tuesday 1691 777 603 464 414 520 845 1118 1175 1174 948 786
## Wednesday 1814 790 619 469 396 561 862 1140 1329 1237 947 763
##
## 12 13 14 15 16 17 18 19 20 21 22 23
## Friday 1207 857 937 1140 1165 1318 1623 1652 1736 1881 2308 1921
## Monday 1129 824 958 1059 1136 1252 1518 1503 1622 1815 2009 1490
## Saturday 1204 767 963 1086 1055 1084 1348 1390 1570 1702 2078 1750
## Sunday 1192 789 959 1037 1083 1160 1389 1342 1706 1696 2079 1584
## Thursday 1093 752 831 1044 1131 1258 1510 1537 1668 1776 2134 1579
## Tuesday 1108 762 908 1071 1090 1274 1553 1496 1696 1816 2044 1458
## Wednesday 1225 804 863 1075 1076 1289 1580 1507 1718 1748 2093 1511
DayHourCounts = as.data.frame(table(mvt$Weekday, mvt$Hour))
str(DayHourCounts)
## 'data.frame': 168 obs. of 3 variables:
## $ Var1: Factor w/ 7 levels "Friday","Monday",..: 1 2 3 4 5 6 7 1 2 3 ...
## $ Var2: Factor w/ 24 levels "0","1","2","3",..: 1 1 1 1 1 1 1 2 2 2 ...
## $ Freq: int 1873 1900 2050 2028 1856 1691 1814 932 825 1267 ...
DayHourCounts$Hour = as.numeric(as.character(DayHourCounts$Var2))
ggplot(DayHourCounts, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1))
Change color and thickness of lines to make the graph more readable
ggplot(DayHourCounts, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1, color=Var1), size=2)
On Saturday and Sunday we have less thefts in morning hours
Make lines transparent
ggplot(DayHourCounts, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1, color=Var1), size=2, alpha=0.5)
Construct variable DayHourCounts and make a heatmap
DayHourCounts$Var1 = factor(DayHourCounts$Var1, ordered=TRUE, levels=c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"))
ggplot(DayHourCounts, aes(x = Hour, y = Var1)) + geom_tile(aes(fill = Freq))
Many crimes take place about midnight, especially in Weekends
Improve our graph
ggplot(DayHourCounts, aes(x = Hour, y = Var1)) + geom_tile(aes(fill = Freq)) + scale_fill_gradient(name="Total MV Thefts") + theme(axis.title.y = element_blank())
Change colour
ggplot(DayHourCounts, aes(x = Hour, y = Var1)) + geom_tile(aes(fill = Freq)) + scale_fill_gradient(name="Total MV Thefts", low="white", high="red") + theme(axis.title.y = element_blank())
Friday night is very busy for thieves…
Heatmaps with ggplot
load chicago map
chicago = get_map(location = "chicago", zoom = 11)
ggmap(chicago)
load athens map
athens = get_map(location = "athens", zoom = 11)
ggmap(athens)
back to chicago
chicago = get_map(location = "chicago", zoom = 11)
ggmap(chicago)
add top 100 thefts in map (if we put all 190.000 it will be one big dot)
ggmap(chicago) + geom_point(data = mvt[1:100,], aes(x = Longitude, y = Latitude))
Round coordinates to check thefts in regions and check our data
LatLonCounts = as.data.frame(table(round(mvt$Longitude,2), round(mvt$Latitude,2)))
str(LatLonCounts)
## 'data.frame': 1638 obs. of 3 variables:
## $ Var1: Factor w/ 42 levels "-87.93","-87.92",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Var2: Factor w/ 39 levels "41.64","41.65",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Freq: int 0 0 0 0 0 0 0 0 0 0 ...
Convert (longitude) and (latitude) to numerical and make a plot coloured by frequency
LatLonCounts$Long = as.numeric(as.character(LatLonCounts$Var1))
LatLonCounts$Lat = as.numeric(as.character(LatLonCounts$Var2))
ggmap(chicago) + geom_point(data = LatLonCounts, aes(x = Long, y = Lat, color = Freq, size=Freq))
Change colour
ggmap(chicago) + geom_point(data = LatLonCounts, aes(x = Long, y = Lat, color = Freq, size=Freq)) + scale_colour_gradient(low="yellow", high="red")
Use argument geom_tile geometry to make our graph looking like a typical heatmap
ggmap(chicago) + geom_tile(data = LatLonCounts, aes(x = Long, y = Lat, alpha = Freq), fill="red")
load the data
murders = read.csv("murders.csv")
str(murders)
## 'data.frame': 51 obs. of 6 variables:
## $ State : Factor w/ 51 levels "Alabama","Alaska",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Population : int 4779736 710231 6392017 2915918 37253956 5029196 3574097 897934 601723 19687653 ...
## $ PopulationDensity: num 94.65 1.26 57.05 56.43 244.2 ...
## $ Murders : int 199 31 352 130 1811 117 131 48 131 987 ...
## $ GunMurders : int 135 19 232 93 1257 65 97 38 99 669 ...
## $ GunOwnership : num 0.517 0.578 0.311 0.553 0.213 0.347 0.167 0.255 0.036 0.245 ...
load us map
statesMap = map_data("state")
check statesMap
str(statesMap)
## 'data.frame': 15537 obs. of 6 variables:
## $ long : num -87.5 -87.5 -87.5 -87.5 -87.6 ...
## $ lat : num 30.4 30.4 30.4 30.3 30.3 ...
## $ group : num 1 1 1 1 1 1 1 1 1 1 ...
## $ order : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "alabama" "alabama" "alabama" "alabama" ...
## $ subregion: chr NA NA NA NA ...
plot the map
ggplot(statesMap, aes(x = long, y = lat, group = group)) + geom_polygon(fill = "white", color = "black") +
coord_map("mercator")
convert state names to lower and merge our datasets
murders$region = tolower(murders$State)
murderMap = merge(statesMap, murders, by="region")
str(murderMap)
## 'data.frame': 15537 obs. of 12 variables:
## $ region : chr "alabama" "alabama" "alabama" "alabama" ...
## $ long : num -87.5 -87.5 -87.5 -87.5 -87.6 ...
## $ lat : num 30.4 30.4 30.4 30.3 30.3 ...
## $ group : num 1 1 1 1 1 1 1 1 1 1 ...
## $ order : int 1 2 3 4 5 6 7 8 9 10 ...
## $ subregion : chr NA NA NA NA ...
## $ State : Factor w/ 51 levels "Alabama","Alaska",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Population : int 4779736 4779736 4779736 4779736 4779736 4779736 4779736 4779736 4779736 4779736 ...
## $ PopulationDensity: num 94.7 94.7 94.7 94.7 94.7 ...
## $ Murders : int 199 199 199 199 199 199 199 199 199 199 ...
## $ GunMurders : int 135 135 135 135 135 135 135 135 135 135 ...
## $ GunOwnership : num 0.517 0.517 0.517 0.517 0.517 0.517 0.517 0.517 0.517 0.517 ...
murder counts in us map
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = Murders)) + geom_polygon(colour = "black") +
scale_fill_gradient(low = "black", high = "red", guide = "legend")
each state is coloured by murder counts, more murders darker colour
we see that California and Texas have the largest counts, but isnt’t it due to the fact that they are the largest states?
Let’s make a new map showing each state’s population
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = Population)) + geom_polygon(colour = "black") +
scale_fill_gradient(low = "black", high = "red", guide = "legend")
the two plots look identical
Let’s make a new var, murder counst per 100.000 of population and reconstruct our map
murderMap$MurderRate = murderMap$Murders / murderMap$Population * 100000
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = MurderRate)) + geom_polygon(colour = "black") +
scale_fill_gradient(low = "black", high = "red", guide = "legend")
our map is still too dark why?
Washington DC is an outlier with very high murder counts. But due to its small size cannot be seen in the ,map
Let’s make our map again removing states with MurderRate>10
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = MurderRate)) + geom_polygon(colour = "black") +
scale_fill_gradient(low = "black", high = "red", guide = "legend", name = "Murder Rate per 100k", limits = c(0.9,10))
now we have a variety of colors
Louisiana has a large murders rate
load the data, using stringsAsFactors=false
intlall = read.csv("intlall.csv",stringsAsFactors=FALSE)
head(intlall)
## Citizenship UG G SpecialUG SpecialG ExhangeVisiting Total
## 1 Albania 3 1 0 0 0 4
## 2 Antigua and Barbuda NA NA NA 1 NA 1
## 3 Argentina NA 19 NA NA NA 19
## 4 Armenia 3 2 NA NA NA 5
## 5 Australia 6 32 NA NA 1 39
## 6 Austria NA 11 NA NA 5 16
convert NAs to 0
intlall[is.na(intlall)] = 0
head(intlall)
## Citizenship UG G SpecialUG SpecialG ExhangeVisiting Total
## 1 Albania 3 1 0 0 0 4
## 2 Antigua and Barbuda 0 0 0 1 0 1
## 3 Argentina 0 19 0 0 0 19
## 4 Armenia 3 2 0 0 0 5
## 5 Australia 6 32 0 0 1 39
## 6 Austria 0 11 0 0 5 16
load world Atlas and have a look
world_map = map_data("world")
str(world_map)
## 'data.frame': 101913 obs. of 6 variables:
## $ long : num -69.9 -69.9 -69.9 -70 -70.1 ...
## $ lat : num 12.5 12.4 12.4 12.5 12.5 ...
## $ group : num 1 1 1 1 1 1 1 1 1 1 ...
## $ order : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "Aruba" "Aruba" "Aruba" "Aruba" ...
## $ subregion: chr NA NA NA NA ...
merge our datasets
world_map = merge(world_map, intlall, by.x ="region", by.y = "Citizenship")
str(world_map)
## 'data.frame': 65153 obs. of 12 variables:
## $ region : chr "Albania" "Albania" "Albania" "Albania" ...
## $ long : num 20.5 19.4 20.6 19.4 19.4 ...
## $ lat : num 41.3 42.3 40.1 42.1 42.3 ...
## $ group : num 6 6 6 6 6 6 6 6 6 6 ...
## $ order : int 789 871 813 864 873 818 823 822 874 869 ...
## $ subregion : chr NA NA NA NA ...
## $ UG : num 3 3 3 3 3 3 3 3 3 3 ...
## $ G : num 1 1 1 1 1 1 1 1 1 1 ...
## $ SpecialUG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SpecialG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ExhangeVisiting: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Total : int 4 4 4 4 4 4 4 4 4 4 ...
make a map with geom_polygon
ggplot(world_map, aes(x=long, y=lat, group=group)) + geom_polygon(fill="white", color="black") +
coord_map("mercator")
something is not right, merging our datasets reordered our data
Reorder our data points by group variable(country)
world_map = world_map[order(world_map$group, world_map$order),]
Now construct our map again
ggplot(world_map, aes(x=long, y=lat, group=group)) + geom_polygon(fill="white", color="black") +
coord_map("mercator")
US is missing because us students do not count as international students
some countries are missing due to merge because they have different names in intall and world_map datasets
Make a table to check what is going on
table(intlall$Citizenship)
##
## Albania Antigua and Barbuda
## 1 1
## Argentina Armenia
## 1 1
## Australia Austria
## 1 1
## Bahrain Bangladesh
## 1 1
## Belarus Belgium
## 1 1
## Bolivia Bosnia-Hercegovina
## 1 1
## Brazil Bulgaria
## 1 1
## Cambodia Cameroon
## 1 1
## Canada Chile
## 1 1
## China (People's Republic Of) Colombia
## 1 1
## Costa Rica Cote d'Ivoire
## 1 1
## Croatia Cyprus
## 1 1
## Czech Republic Denmark
## 1 1
## Ecuador Egypt
## 1 1
## El Salvador Estonia
## 1 1
## Ethiopia Finland
## 1 1
## France Georgia
## 1 1
## Germany Ghana
## 1 1
## Greece Guatemala
## 1 1
## Haiti Hong Kong
## 1 1
## Hungary Iceland
## 1 1
## India Indonesia
## 1 1
## Iran Iraq
## 1 1
## Ireland Israel
## 1 1
## Italy Jamaica
## 1 1
## Japan Jordan
## 1 1
## Kazakhstan Kenya
## 1 1
## Korea, South Kuwait
## 1 1
## Latvia Lebanon
## 1 1
## Lithuania Macedonia
## 1 1
## Malaysia Mauritius
## 1 1
## Mexico Moldova
## 1 1
## Mongolia Montenegro
## 1 1
## Morocco Nepal
## 1 1
## Netherlands New Zealand
## 1 1
## Nigeria Norway
## 1 1
## Pakistan Paraguay
## 1 1
## Peru Philippines
## 1 1
## Poland Portugal
## 1 1
## Qatar Romania
## 1 1
## Russia Rwanda
## 1 1
## Saudi Arabia Serbia
## 1 1
## Sierra Leone Singapore
## 1 1
## Slovakia Somalia
## 1 1
## South Africa Spain
## 1 1
## Sri Lanka St. Lucia
## 1 1
## St. Vincent & The Grenadines Sudan
## 1 1
## Sweden Switzerland
## 1 1
## Syria Taiwan
## 1 1
## Tanzania Thailand
## 1 1
## Trinidad & Tobago Tunisia
## 1 1
## Turkey Uganda
## 1 1
## Ukraine United Arab Emirates
## 1 1
## United Kingdom Unknown
## 1 1
## Uruguay Venezuela
## 1 1
## Vietnam West Bank
## 1 1
## Zambia Zimbabwe
## 1 1
China is written “China (People’s Republic Of)”, while in our initial dataframe it is plain “China”
Update intall dataframe and check again
intlall$Citizenship[intlall$Citizenship=="China (People's Republic Of)"] = "China"
table(intlall$Citizenship)
##
## Albania Antigua and Barbuda
## 1 1
## Argentina Armenia
## 1 1
## Australia Austria
## 1 1
## Bahrain Bangladesh
## 1 1
## Belarus Belgium
## 1 1
## Bolivia Bosnia-Hercegovina
## 1 1
## Brazil Bulgaria
## 1 1
## Cambodia Cameroon
## 1 1
## Canada Chile
## 1 1
## China Colombia
## 1 1
## Costa Rica Cote d'Ivoire
## 1 1
## Croatia Cyprus
## 1 1
## Czech Republic Denmark
## 1 1
## Ecuador Egypt
## 1 1
## El Salvador Estonia
## 1 1
## Ethiopia Finland
## 1 1
## France Georgia
## 1 1
## Germany Ghana
## 1 1
## Greece Guatemala
## 1 1
## Haiti Hong Kong
## 1 1
## Hungary Iceland
## 1 1
## India Indonesia
## 1 1
## Iran Iraq
## 1 1
## Ireland Israel
## 1 1
## Italy Jamaica
## 1 1
## Japan Jordan
## 1 1
## Kazakhstan Kenya
## 1 1
## Korea, South Kuwait
## 1 1
## Latvia Lebanon
## 1 1
## Lithuania Macedonia
## 1 1
## Malaysia Mauritius
## 1 1
## Mexico Moldova
## 1 1
## Mongolia Montenegro
## 1 1
## Morocco Nepal
## 1 1
## Netherlands New Zealand
## 1 1
## Nigeria Norway
## 1 1
## Pakistan Paraguay
## 1 1
## Peru Philippines
## 1 1
## Poland Portugal
## 1 1
## Qatar Romania
## 1 1
## Russia Rwanda
## 1 1
## Saudi Arabia Serbia
## 1 1
## Sierra Leone Singapore
## 1 1
## Slovakia Somalia
## 1 1
## South Africa Spain
## 1 1
## Sri Lanka St. Lucia
## 1 1
## St. Vincent & The Grenadines Sudan
## 1 1
## Sweden Switzerland
## 1 1
## Syria Taiwan
## 1 1
## Tanzania Thailand
## 1 1
## Trinidad & Tobago Tunisia
## 1 1
## Turkey Uganda
## 1 1
## Ukraine United Arab Emirates
## 1 1
## United Kingdom Unknown
## 1 1
## Uruguay Venezuela
## 1 1
## Vietnam West Bank
## 1 1
## Zambia Zimbabwe
## 1 1
Remerge our data frames, reorder data rows and reconstruct our map
world_map = merge(map_data("world"), intlall, by.x ="region", by.y = "Citizenship")
world_map = world_map[order(world_map$group, world_map$order),]
ggplot(world_map, aes(x=long, y=lat, group=group)) + geom_polygon(fill="white", color="black") +
coord_map("mercator")
Reconstruct our map and add students percentages
ggplot(world_map, aes(x=long, y=lat, group=group)) + geom_polygon(aes(fill=Total), color="black") +
coord_map("mercator")
Russia and UK are missing again to naming conventions
Same map using Mercator projection
ggplot(world_map, aes(x=long, y=lat, group=group)) + geom_polygon(aes(fill=Total), color="black") +
coord_map("ortho", orientation=c(20, 30, 0))
A different view
ggplot(world_map, aes(x=long, y=lat, group=group)) + geom_polygon(aes(fill=Total), color="black") +
coord_map("ortho", orientation=c(-37, 175, 0))