rm(list=ls(all=T))
options(digits=4, scipen=12)
library(dplyr)
library(ggplot2)
library(maps)
library(ggmap)
library(reshape2)
library(ggplot2)
# Load our data, which lives in intl.csv
intl = read.csv("data/intl.csv")
str(intl)
'data.frame': 8 obs. of 2 variables:
$ Region : Factor w/ 8 levels "Africa","Asia",..: 2 3 6 4 5 1 7 8
$ PercentOfIntl: num 0.531 0.201 0.098 0.09 0.054 0.02 0.015 0.002
# We want to make a bar plot with region on the X axis
# and Percentage on the y-axis.
ggplot(intl , aes(x= Region , y = PercentOfIntl)) + geom_bar(stat = "identity") + geom_text(aes(label = PercentOfIntl))
# Now lets look again
head(intlall)
Citizenship UG G SpecialUG SpecialG ExhangeVisiting Total
1 Albania 3 1 0 0 0 4
2 Antigua and Barbuda 0 0 0 1 0 1
3 Argentina 0 19 0 0 0 19
4 Armenia 3 2 0 0 0 5
5 Australia 6 32 0 0 1 39
6 Austria 0 11 0 0 5 16
str(world_map)
'data.frame': 99338 obs. of 6 variables:
$ long : num -69.9 -69.9 -69.9 -70 -70.1 ...
$ lat : num 12.5 12.4 12.4 12.5 12.5 ...
$ group : num 1 1 1 1 1 1 1 1 1 1 ...
$ order : int 1 2 3 4 5 6 7 8 9 10 ...
$ region : chr "Aruba" "Aruba" "Aruba" "Aruba" ...
$ subregion: chr NA NA NA NA ...
str(world_map)
'data.frame': 63634 obs. of 12 variables:
$ region : chr "Albania" "Albania" "Albania" "Albania" ...
$ long : num 20.5 20.4 19.5 20.5 20.4 ...
$ lat : num 41.3 39.8 42.5 40.1 41.5 ...
$ group : num 6 6 6 6 6 6 6 6 6 6 ...
$ order : int 789 822 870 815 786 821 818 779 879 795 ...
$ subregion : chr NA NA NA NA ...
$ UG : num 3 3 3 3 3 3 3 3 3 3 ...
$ G : num 1 1 1 1 1 1 1 1 1 1 ...
$ SpecialUG : num 0 0 0 0 0 0 0 0 0 0 ...
$ SpecialG : num 0 0 0 0 0 0 0 0 0 0 ...
$ ExhangeVisiting: num 0 0 0 0 0 0 0 0 0 0 ...
$ Total : int 4 4 4 4 4 4 4 4 4 4 ...
rr ggplot(world_map, aes(x=long, y=lat, group=group)) + geom_polygon(fill=, color=) + coord_map()
table(intlall$Citizenship)
Albania Antigua and Barbuda Argentina
1 1 1
Armenia Australia Austria
1 1 1
Bahrain Bangladesh Belarus
1 1 1
Belgium Bolivia Bosnia-Hercegovina
1 1 1
Brazil Bulgaria Cambodia
1 1 1
Cameroon Canada Chile
1 1 1
China Colombia Costa Rica
1 1 1
Cote d'Ivoire Croatia Cyprus
1 1 1
Czech Republic Denmark Ecuador
1 1 1
Egypt El Salvador Estonia
1 1 1
Ethiopia Finland France
1 1 1
Georgia Germany Ghana
1 1 1
Greece Guatemala Haiti
1 1 1
Hong Kong Hungary Iceland
1 1 1
India Indonesia Iran
1 1 1
Iraq Ireland Israel
1 1 1
Italy Jamaica Japan
1 1 1
Jordan Kazakhstan Kenya
1 1 1
Korea, South Kuwait Latvia
1 1 1
Lebanon Lithuania Macedonia
1 1 1
Malaysia Mauritius Mexico
1 1 1
Moldova Mongolia Montenegro
1 1 1
Morocco Nepal Netherlands
1 1 1
New Zealand Nigeria Norway
1 1 1
Pakistan Paraguay Peru
1 1 1
Philippines Poland Portugal
1 1 1
Qatar Romania Russia
1 1 1
Rwanda Saudi Arabia Serbia
1 1 1
Sierra Leone Singapore Slovakia
1 1 1
Somalia South Africa Spain
1 1 1
Sri Lanka St. Lucia St. Vincent & The Grenadines
1 1 1
Sudan Sweden Switzerland
1 1 1
Syria Taiwan Tanzania
1 1 1
Thailand Trinidad & Tobago Tunisia
1 1 1
Turkey Uganda Ukraine
1 1 1
United Arab Emirates United Kingdom Unknown
1 1 1
Uruguay Venezuela Vietnam
1 1 1
West Bank Zambia Zimbabwe
1 1 1
rr library(ggplot2) library(reshape2) # Now lets load our dataframe households = read.csv(/households.csv) str(households)
'data.frame': 8 obs. of 7 variables:
$ Year : int 1970 1980 1990 1995 2000 2005 2010 2012
$ MarriedWChild : num 40.3 30.9 26.3 25.5 24.1 22.9 20.9 19.6
$ MarriedWOChild: num 30.3 29.9 29.8 28.9 28.7 28.3 28.8 29.1
$ OtherFamily : num 10.6 12.9 14.8 15.6 16 16.7 17.4 17.8
$ MenAlone : num 5.6 8.6 9.7 10.2 10.7 11.3 11.9 12.3
$ WomenAlone : num 11.5 14 14.9 14.7 14.8 15.3 14.8 15.2
$ OtherNonfamily: num 1.7 3.6 4.6 5 5.7 5.6 6.2 6.1
rr # Plot it melt(households, id=) %>% ggplot(aes(x=Year, y=value, color=variable)) + geom_line(size=2) + geom_point(size=5) +
ylab(of Households)