Load Libraries

require(ggplot2)
require(maps)
require(ggmap)

Who Dataset

Load and have a look at Data

WHO = read.csv("WHO.csv")
str(WHO)

## 'data.frame':    194 obs. of  13 variables:
##  $ Country                      : Factor w/ 194 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Region                       : Factor w/ 6 levels "Africa","Americas",..: 3 4 1 4 1 2 2 4 6 4 ...
##  $ Population                   : int  29825 3162 38482 78 20821 89 41087 2969 23050 8464 ...
##  $ Under15                      : num  47.4 21.3 27.4 15.2 47.6 ...
##  $ Over60                       : num  3.82 14.93 7.17 22.86 3.84 ...
##  $ FertilityRate                : num  5.4 1.75 2.83 NA 6.1 2.12 2.2 1.74 1.89 1.44 ...
##  $ LifeExpectancy               : int  60 74 73 82 51 75 76 71 82 81 ...
##  $ ChildMortality               : num  98.5 16.7 20 3.2 163.5 ...
##  $ CellularSubscribers          : num  54.3 96.4 99 75.5 48.4 ...
##  $ LiteracyRate                 : num  NA NA NA NA 70.1 99 97.8 99.6 NA NA ...
##  $ GNI                          : num  1140 8820 8310 NA 5230 ...
##  $ PrimarySchoolEnrollmentMale  : num  NA NA 98.2 78.4 93.1 91.1 NA NA 96.9 NA ...
##  $ PrimarySchoolEnrollmentFemale: num  NA NA 96.4 79.4 78.2 84.5 NA NA 97.5 NA ...

Make a plot with base R

plot(WHO$GNI, WHO$FertilityRate)

Same plot with ggplot, quite nicer

scatterplot = ggplot(WHO, aes(x = GNI, y = FertilityRate))
scatterplot + geom_point()

Use geom_line instead of geom_point

scatterplot + geom_line()

Go back to using geom_point

scatterplot + geom_point()

Change color and shape

scatterplot + geom_point(color = "blue", size = 3, shape = 17)

Experiment with color and shape

scatterplot + geom_point(color = "darkred", size = 3, shape = 8)

Set title

scatterplot + geom_point(colour = "blue", size = 3, shape = 17) + ggtitle("Fertility Rate vs. Gross National Income")

Create pdf file

fertilityGNIplot = scatterplot + geom_point(colour = "blue", size = 3, shape = 17) + ggtitle("Fertility Rate vs. Gross
National Income")
pdf("MyPlot.pdf")
print(fertilityGNIplot)
dev.off()

## png 
##   2

Create svg file

svg("MyPlot.svg")
print(fertilityGNIplot)
dev.off()

## png 
##   2

Same graph in red with stars for point marks

scatterplot + geom_point(colour = "blue", size = 3, shape = 17) + ggtitle("Fertility Rate vs. Gross National Income")

Color by region (categorical variable

ggplot(WHO, aes(x = GNI, y = FertilityRate, color = Region)) + geom_point()

Color by life expectancy (nominal variable)

ggplot(WHO, aes(x = GNI, y = FertilityRate, color = LifeExpectancy)) + geom_point()

Check relationship between FertilityRate and Under15

ggplot(WHO, aes(x = FertilityRate, y = Under15)) + geom_point()

Use log to obtain linear relationship

ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + geom_point()

Perform a linear regression and check the model

mod = lm(Under15 ~ log(FertilityRate), data = WHO)
summary(mod)

## 
## Call:
## lm(formula = Under15 ~ log(FertilityRate), data = WHO)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3131  -1.7742   0.0446   1.7440   7.7174 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          7.6540     0.4478   17.09   <2e-16 ***
## log(FertilityRate)  22.0547     0.4175   52.82   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.65 on 181 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.9391, Adjusted R-squared:  0.9387 
## F-statistic:  2790 on 1 and 181 DF,  p-value: < 2.2e-16

Add regression line to our graph

ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + geom_point() + stat_smooth(method = "lm")

Same with 0.99 confidence level

ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + geom_point() + stat_smooth(method = "lm", level = 0.99)

Hide confidence level

ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + geom_point() + stat_smooth(method = "lm", se = FALSE)

Colored regression line

ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + geom_point() + stat_smooth(method = "lm", colour = "orange")

MVT dataset

Load data and have a look

mvt = read.csv("mvt.csv", stringsAsFactors=FALSE)
str(mvt)

## 'data.frame':    191641 obs. of  3 variables:
##  $ Date     : chr  "12/31/12 23:15" "12/31/12 22:00" "12/31/12 22:00" "12/31/12 22:00" ...
##  $ Latitude : num  41.8 41.9 42 41.8 41.8 ...
##  $ Longitude: num  -87.6 -87.7 -87.8 -87.7 -87.6 ...

Add variables Date,Weekday, Hour

mvt$Date = strptime(mvt$Date, format="%m/%d/%y %H:%M")
mvt$Weekday = weekdays(mvt$Date)
mvt$Hour = mvt$Date$hour
str(mvt)

## 'data.frame':    191641 obs. of  5 variables:
##  $ Date     : POSIXlt, format: "2012-12-31 23:15:00" "2012-12-31 22:00:00" ...
##  $ Latitude : num  41.8 41.9 42 41.8 41.8 ...
##  $ Longitude: num  -87.6 -87.7 -87.8 -87.7 -87.6 ...
##  $ Weekday  : chr  "Monday" "Monday" "Monday" "Monday" ...
##  $ Hour     : int  23 22 22 22 21 20 20 20 19 18 ...

Check thefts per day

table(mvt$Weekday)

## 
##    Friday    Monday  Saturday    Sunday  Thursday   Tuesday Wednesday 
##     29284     27397     27118     26316     27319     26791     27416

Create variable WeekdayCounts to store this info

WeekdayCounts = as.data.frame(table(mvt$Weekday))
str(WeekdayCounts)

## 'data.frame':    7 obs. of  2 variables:
##  $ Var1: Factor w/ 7 levels "Friday","Monday",..: 1 2 3 4 5 6 7
##  $ Freq: int  29284 27397 27118 26316 27319 26791 27416

Make a plot

ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1))

Plot was messy becase order of days was alphabetical, use levels to fix it

WeekdayCounts$Var1 = factor(WeekdayCounts$Var1, ordered=TRUE, levels=c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"))
ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1))

Same graph but add labels

ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1)) + xlab("Day of the Week") + ylab("Total Motor
Vehicle Thefts")

Add hour variable

table(mvt$Weekday, mvt$Hour)

##            
##                0    1    2    3    4    5    6    7    8    9   10   11
##   Friday    1873  932  743  560  473  602  839 1203 1268 1286  938  822
##   Monday    1900  825  712  527  415  542  772 1123 1323 1235  971  737
##   Saturday  2050 1267  985  836  652  508  541  650  858 1039  946  789
##   Sunday    2028 1236 1019  838  607  461  478  483  615  864  884  787
##   Thursday  1856  816  696  508  400  534  799 1135 1298 1301  932  731
##   Tuesday   1691  777  603  464  414  520  845 1118 1175 1174  948  786
##   Wednesday 1814  790  619  469  396  561  862 1140 1329 1237  947  763
##            
##               12   13   14   15   16   17   18   19   20   21   22   23
##   Friday    1207  857  937 1140 1165 1318 1623 1652 1736 1881 2308 1921
##   Monday    1129  824  958 1059 1136 1252 1518 1503 1622 1815 2009 1490
##   Saturday  1204  767  963 1086 1055 1084 1348 1390 1570 1702 2078 1750
##   Sunday    1192  789  959 1037 1083 1160 1389 1342 1706 1696 2079 1584
##   Thursday  1093  752  831 1044 1131 1258 1510 1537 1668 1776 2134 1579
##   Tuesday   1108  762  908 1071 1090 1274 1553 1496 1696 1816 2044 1458
##   Wednesday 1225  804  863 1075 1076 1289 1580 1507 1718 1748 2093 1511

DayHourCounts = as.data.frame(table(mvt$Weekday, mvt$Hour))
str(DayHourCounts)

## 'data.frame':    168 obs. of  3 variables:
##  $ Var1: Factor w/ 7 levels "Friday","Monday",..: 1 2 3 4 5 6 7 1 2 3 ...
##  $ Var2: Factor w/ 24 levels "0","1","2","3",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ Freq: int  1873 1900 2050 2028 1856 1691 1814 932 825 1267 ...

DayHourCounts$Hour = as.numeric(as.character(DayHourCounts$Var2))
ggplot(DayHourCounts, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1))

Change color and thickness of lines to make the graph more readable

ggplot(DayHourCounts, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1, color=Var1), size=2)

On Saturday and Sunday we have less thefts in morning hours

Make lines transparent

ggplot(DayHourCounts, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1, color=Var1), size=2, alpha=0.5)

Construct variable DayHourCounts and make a heatmap

DayHourCounts$Var1 = factor(DayHourCounts$Var1, ordered=TRUE, levels=c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"))
ggplot(DayHourCounts, aes(x = Hour, y = Var1)) + geom_tile(aes(fill = Freq))

Many crimes take place about midnight, especially in Weekends

Improve our graph

ggplot(DayHourCounts, aes(x = Hour, y = Var1)) + geom_tile(aes(fill = Freq)) + scale_fill_gradient(name="Total MV Thefts") + theme(axis.title.y = element_blank())

Change colour

ggplot(DayHourCounts, aes(x = Hour, y = Var1)) + geom_tile(aes(fill = Freq)) + scale_fill_gradient(name="Total MV Thefts", low="white", high="red") + theme(axis.title.y = element_blank())

Friday night is very busy for thieves…

Heatmaps with ggplot

load chicago map

chicago = get_map(location = "chicago", zoom = 11)
ggmap(chicago)

load athens map

athens = get_map(location = "athens", zoom = 11)
ggmap(athens)

back to chicago

chicago = get_map(location = "chicago", zoom = 11)
ggmap(chicago)

add top 100 thefts in map (if we put all 190.000 it will be one big dot)

ggmap(chicago) + geom_point(data = mvt[1:100,], aes(x = Longitude, y = Latitude))

Round coordinates to check thefts in regions and check our data

LatLonCounts = as.data.frame(table(round(mvt$Longitude,2), round(mvt$Latitude,2)))
str(LatLonCounts)

## 'data.frame':    1638 obs. of  3 variables:
##  $ Var1: Factor w/ 42 levels "-87.93","-87.92",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Var2: Factor w/ 39 levels "41.64","41.65",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Freq: int  0 0 0 0 0 0 0 0 0 0 ...

Convert (longitude) and (latitude) to numerical and make a plot coloured by frequency

LatLonCounts$Long = as.numeric(as.character(LatLonCounts$Var1))
LatLonCounts$Lat = as.numeric(as.character(LatLonCounts$Var2))
ggmap(chicago) + geom_point(data = LatLonCounts, aes(x = Long, y = Lat, color = Freq, size=Freq))

Change colour

ggmap(chicago) + geom_point(data = LatLonCounts, aes(x = Long, y = Lat, color = Freq, size=Freq)) + scale_colour_gradient(low="yellow", high="red")

Use argument geom_tile geometry to make our graph looking like a typical heatmap

ggmap(chicago) + geom_tile(data = LatLonCounts, aes(x = Long, y = Lat, alpha = Freq), fill="red")

Murders Dataset

load the data

murders = read.csv("murders.csv")
str(murders)

## 'data.frame':    51 obs. of  6 variables:
##  $ State            : Factor w/ 51 levels "Alabama","Alaska",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Population       : int  4779736 710231 6392017 2915918 37253956 5029196 3574097 897934 601723 19687653 ...
##  $ PopulationDensity: num  94.65 1.26 57.05 56.43 244.2 ...
##  $ Murders          : int  199 31 352 130 1811 117 131 48 131 987 ...
##  $ GunMurders       : int  135 19 232 93 1257 65 97 38 99 669 ...
##  $ GunOwnership     : num  0.517 0.578 0.311 0.553 0.213 0.347 0.167 0.255 0.036 0.245 ...

load us map

statesMap = map_data("state")

check statesMap

str(statesMap)

## 'data.frame':    15537 obs. of  6 variables:
##  $ long     : num  -87.5 -87.5 -87.5 -87.5 -87.6 ...
##  $ lat      : num  30.4 30.4 30.4 30.3 30.3 ...
##  $ group    : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ order    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ region   : chr  "alabama" "alabama" "alabama" "alabama" ...
##  $ subregion: chr  NA NA NA NA ...

plot the map

ggplot(statesMap, aes(x = long, y = lat, group = group)) + geom_polygon(fill = "white", color = "black") +
coord_map("mercator")

convert state names to lower and merge our datasets

murders$region = tolower(murders$State)
murderMap = merge(statesMap, murders, by="region")
str(murderMap)

## 'data.frame':    15537 obs. of  12 variables:
##  $ region           : chr  "alabama" "alabama" "alabama" "alabama" ...
##  $ long             : num  -87.5 -87.5 -87.5 -87.5 -87.6 ...
##  $ lat              : num  30.4 30.4 30.4 30.3 30.3 ...
##  $ group            : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ order            : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ subregion        : chr  NA NA NA NA ...
##  $ State            : Factor w/ 51 levels "Alabama","Alaska",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Population       : int  4779736 4779736 4779736 4779736 4779736 4779736 4779736 4779736 4779736 4779736 ...
##  $ PopulationDensity: num  94.7 94.7 94.7 94.7 94.7 ...
##  $ Murders          : int  199 199 199 199 199 199 199 199 199 199 ...
##  $ GunMurders       : int  135 135 135 135 135 135 135 135 135 135 ...
##  $ GunOwnership     : num  0.517 0.517 0.517 0.517 0.517 0.517 0.517 0.517 0.517 0.517 ...

murder counts in us map

ggplot(murderMap, aes(x = long, y = lat, group = group, fill = Murders)) + geom_polygon(colour = "black") +
scale_fill_gradient(low = "black", high = "red", guide = "legend")

each state is coloured by murder counts, more murders darker colour

we see that California and Texas have the largest counts, but isnt’t it due to the fact that they are the largest states?

Let’s make a new map showing each state’s population

ggplot(murderMap, aes(x = long, y = lat, group = group, fill = Population)) + geom_polygon(colour = "black") +
scale_fill_gradient(low = "black", high = "red", guide = "legend")

the two plots look identical

Let’s make a new var, murder counst per 100.000 of population and reconstruct our map

murderMap$MurderRate = murderMap$Murders / murderMap$Population * 100000
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = MurderRate)) + geom_polygon(colour = "black") +
scale_fill_gradient(low = "black", high = "red", guide = "legend")

our map is still too dark why?

Washington DC is an outlier with very high murder counts. But due to its small size cannot be seen in the ,map

Let’s make our map again removing states with MurderRate>10

ggplot(murderMap, aes(x = long, y = lat, group = group, fill = MurderRate)) + geom_polygon(colour = "black") +
scale_fill_gradient(low = "black", high = "red", guide = "legend", name = "Murder Rate per 100k", limits = c(0.9,10))

now we have a variety of colors

Louisiana has a large murders rate

International Students dataset

load the data, using stringsAsFactors=false

intlall = read.csv("intlall.csv",stringsAsFactors=FALSE)
head(intlall)

##           Citizenship UG  G SpecialUG SpecialG ExhangeVisiting Total
## 1             Albania  3  1         0        0               0     4
## 2 Antigua and Barbuda NA NA        NA        1              NA     1
## 3           Argentina NA 19        NA       NA              NA    19
## 4             Armenia  3  2        NA       NA              NA     5
## 5           Australia  6 32        NA       NA               1    39
## 6             Austria NA 11        NA       NA               5    16

convert NAs to 0

intlall[is.na(intlall)] = 0
head(intlall)

##           Citizenship UG  G SpecialUG SpecialG ExhangeVisiting Total
## 1             Albania  3  1         0        0               0     4
## 2 Antigua and Barbuda  0  0         0        1               0     1
## 3           Argentina  0 19         0        0               0    19
## 4             Armenia  3  2         0        0               0     5
## 5           Australia  6 32         0        0               1    39
## 6             Austria  0 11         0        0               5    16

load world Atlas and have a look

world_map = map_data("world")
str(world_map)

## 'data.frame':    101913 obs. of  6 variables:
##  $ long     : num  -69.9 -69.9 -69.9 -70 -70.1 ...
##  $ lat      : num  12.5 12.4 12.4 12.5 12.5 ...
##  $ group    : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ order    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ region   : chr  "Aruba" "Aruba" "Aruba" "Aruba" ...
##  $ subregion: chr  NA NA NA NA ...

merge our datasets

world_map = merge(world_map, intlall, by.x ="region", by.y = "Citizenship")
str(world_map)

## 'data.frame':    65153 obs. of  12 variables:
##  $ region         : chr  "Albania" "Albania" "Albania" "Albania" ...
##  $ long           : num  20.5 19.4 20.6 19.4 19.4 ...
##  $ lat            : num  41.3 42.3 40.1 42.1 42.3 ...
##  $ group          : num  6 6 6 6 6 6 6 6 6 6 ...
##  $ order          : int  789 871 813 864 873 818 823 822 874 869 ...
##  $ subregion      : chr  NA NA NA NA ...
##  $ UG             : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ G              : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ SpecialUG      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SpecialG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ExhangeVisiting: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Total          : int  4 4 4 4 4 4 4 4 4 4 ...

make a map with geom_polygon

ggplot(world_map, aes(x=long, y=lat, group=group)) + geom_polygon(fill="white", color="black") +
coord_map("mercator")

something is not right, merging our datasets reordered our data

Reorder our data points by group variable(country)

world_map = world_map[order(world_map$group, world_map$order),]

Now construct our map again

ggplot(world_map, aes(x=long, y=lat, group=group)) + geom_polygon(fill="white", color="black") +
coord_map("mercator")

US is missing because us students do not count as international students

some countries are missing due to merge because they have different names in intall and world_map datasets

Make a table to check what is going on

table(intlall$Citizenship)

## 
##                      Albania          Antigua and Barbuda 
##                            1                            1 
##                    Argentina                      Armenia 
##                            1                            1 
##                    Australia                      Austria 
##                            1                            1 
##                      Bahrain                   Bangladesh 
##                            1                            1 
##                      Belarus                      Belgium 
##                            1                            1 
##                      Bolivia           Bosnia-Hercegovina 
##                            1                            1 
##                       Brazil                     Bulgaria 
##                            1                            1 
##                     Cambodia                     Cameroon 
##                            1                            1 
##                       Canada                        Chile 
##                            1                            1 
## China (People's Republic Of)                     Colombia 
##                            1                            1 
##                   Costa Rica                Cote d'Ivoire 
##                            1                            1 
##                      Croatia                       Cyprus 
##                            1                            1 
##               Czech Republic                      Denmark 
##                            1                            1 
##                      Ecuador                        Egypt 
##                            1                            1 
##                  El Salvador                      Estonia 
##                            1                            1 
##                     Ethiopia                      Finland 
##                            1                            1 
##                       France                      Georgia 
##                            1                            1 
##                      Germany                        Ghana 
##                            1                            1 
##                       Greece                    Guatemala 
##                            1                            1 
##                        Haiti                    Hong Kong 
##                            1                            1 
##                      Hungary                      Iceland 
##                            1                            1 
##                        India                    Indonesia 
##                            1                            1 
##                         Iran                         Iraq 
##                            1                            1 
##                      Ireland                       Israel 
##                            1                            1 
##                        Italy                      Jamaica 
##                            1                            1 
##                        Japan                       Jordan 
##                            1                            1 
##                   Kazakhstan                        Kenya 
##                            1                            1 
##                 Korea, South                       Kuwait 
##                            1                            1 
##                       Latvia                      Lebanon 
##                            1                            1 
##                    Lithuania                    Macedonia 
##                            1                            1 
##                     Malaysia                    Mauritius 
##                            1                            1 
##                       Mexico                      Moldova 
##                            1                            1 
##                     Mongolia                   Montenegro 
##                            1                            1 
##                      Morocco                        Nepal 
##                            1                            1 
##                  Netherlands                  New Zealand 
##                            1                            1 
##                      Nigeria                       Norway 
##                            1                            1 
##                     Pakistan                     Paraguay 
##                            1                            1 
##                         Peru                  Philippines 
##                            1                            1 
##                       Poland                     Portugal 
##                            1                            1 
##                        Qatar                      Romania 
##                            1                            1 
##                       Russia                       Rwanda 
##                            1                            1 
##                 Saudi Arabia                       Serbia 
##                            1                            1 
##                 Sierra Leone                    Singapore 
##                            1                            1 
##                     Slovakia                      Somalia 
##                            1                            1 
##                 South Africa                        Spain 
##                            1                            1 
##                    Sri Lanka                    St. Lucia 
##                            1                            1 
## St. Vincent & The Grenadines                        Sudan 
##                            1                            1 
##                       Sweden                  Switzerland 
##                            1                            1 
##                        Syria                       Taiwan 
##                            1                            1 
##                     Tanzania                     Thailand 
##                            1                            1 
##            Trinidad & Tobago                      Tunisia 
##                            1                            1 
##                       Turkey                       Uganda 
##                            1                            1 
##                      Ukraine         United Arab Emirates 
##                            1                            1 
##               United Kingdom                      Unknown 
##                            1                            1 
##                      Uruguay                    Venezuela 
##                            1                            1 
##                      Vietnam                    West Bank 
##                            1                            1 
##                       Zambia                     Zimbabwe 
##                            1                            1

China is written “China (People’s Republic Of)”, while in our initial dataframe it is plain “China”

Update intall dataframe and check again

intlall$Citizenship[intlall$Citizenship=="China (People's Republic Of)"] = "China"
table(intlall$Citizenship)

## 
##                      Albania          Antigua and Barbuda 
##                            1                            1 
##                    Argentina                      Armenia 
##                            1                            1 
##                    Australia                      Austria 
##                            1                            1 
##                      Bahrain                   Bangladesh 
##                            1                            1 
##                      Belarus                      Belgium 
##                            1                            1 
##                      Bolivia           Bosnia-Hercegovina 
##                            1                            1 
##                       Brazil                     Bulgaria 
##                            1                            1 
##                     Cambodia                     Cameroon 
##                            1                            1 
##                       Canada                        Chile 
##                            1                            1 
##                        China                     Colombia 
##                            1                            1 
##                   Costa Rica                Cote d'Ivoire 
##                            1                            1 
##                      Croatia                       Cyprus 
##                            1                            1 
##               Czech Republic                      Denmark 
##                            1                            1 
##                      Ecuador                        Egypt 
##                            1                            1 
##                  El Salvador                      Estonia 
##                            1                            1 
##                     Ethiopia                      Finland 
##                            1                            1 
##                       France                      Georgia 
##                            1                            1 
##                      Germany                        Ghana 
##                            1                            1 
##                       Greece                    Guatemala 
##                            1                            1 
##                        Haiti                    Hong Kong 
##                            1                            1 
##                      Hungary                      Iceland 
##                            1                            1 
##                        India                    Indonesia 
##                            1                            1 
##                         Iran                         Iraq 
##                            1                            1 
##                      Ireland                       Israel 
##                            1                            1 
##                        Italy                      Jamaica 
##                            1                            1 
##                        Japan                       Jordan 
##                            1                            1 
##                   Kazakhstan                        Kenya 
##                            1                            1 
##                 Korea, South                       Kuwait 
##                            1                            1 
##                       Latvia                      Lebanon 
##                            1                            1 
##                    Lithuania                    Macedonia 
##                            1                            1 
##                     Malaysia                    Mauritius 
##                            1                            1 
##                       Mexico                      Moldova 
##                            1                            1 
##                     Mongolia                   Montenegro 
##                            1                            1 
##                      Morocco                        Nepal 
##                            1                            1 
##                  Netherlands                  New Zealand 
##                            1                            1 
##                      Nigeria                       Norway 
##                            1                            1 
##                     Pakistan                     Paraguay 
##                            1                            1 
##                         Peru                  Philippines 
##                            1                            1 
##                       Poland                     Portugal 
##                            1                            1 
##                        Qatar                      Romania 
##                            1                            1 
##                       Russia                       Rwanda 
##                            1                            1 
##                 Saudi Arabia                       Serbia 
##                            1                            1 
##                 Sierra Leone                    Singapore 
##                            1                            1 
##                     Slovakia                      Somalia 
##                            1                            1 
##                 South Africa                        Spain 
##                            1                            1 
##                    Sri Lanka                    St. Lucia 
##                            1                            1 
## St. Vincent & The Grenadines                        Sudan 
##                            1                            1 
##                       Sweden                  Switzerland 
##                            1                            1 
##                        Syria                       Taiwan 
##                            1                            1 
##                     Tanzania                     Thailand 
##                            1                            1 
##            Trinidad & Tobago                      Tunisia 
##                            1                            1 
##                       Turkey                       Uganda 
##                            1                            1 
##                      Ukraine         United Arab Emirates 
##                            1                            1 
##               United Kingdom                      Unknown 
##                            1                            1 
##                      Uruguay                    Venezuela 
##                            1                            1 
##                      Vietnam                    West Bank 
##                            1                            1 
##                       Zambia                     Zimbabwe 
##                            1                            1

Remerge our data frames, reorder data rows and reconstruct our map

world_map = merge(map_data("world"), intlall, by.x ="region", by.y = "Citizenship")
world_map = world_map[order(world_map$group, world_map$order),]
ggplot(world_map, aes(x=long, y=lat, group=group)) + geom_polygon(fill="white", color="black") +
coord_map("mercator")

Reconstruct our map and add students percentages

ggplot(world_map, aes(x=long, y=lat, group=group)) + geom_polygon(aes(fill=Total), color="black") +
coord_map("mercator")

Russia and UK are missing again to naming conventions

Same map using Mercator projection

ggplot(world_map, aes(x=long, y=lat, group=group)) + geom_polygon(aes(fill=Total), color="black") +
coord_map("ortho", orientation=c(20, 30, 0))

A different view

ggplot(world_map, aes(x=long, y=lat, group=group)) + geom_polygon(aes(fill=Total), color="black") +
coord_map("ortho", orientation=c(-37, 175, 0))