#INSTALLING AND LOADING PACKAGES
#You must install the "ggplot2" package before creating the following data visualizations.
#install.packages("ggplot2")
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.1.3
#SETTING WORK DIRECTORY
#Create a folder on your desktop (this is outside of RStudio) called "mydata".
#Session > Set Working Directory > Choose Directory > Choose "mydata" folder.
#In your workspace (upper right quadrant in RStudio), click "Import Dataset" and select "From Text File". Select your .csv file.
#We will be using health data from developing regions in South Asia (Afghanistan, Bangladesh, Bhutan, India, Maldives, Nepal, Pakistan, Sri Lanka).
setwd("~/Desktop/ggplot2")
#RENAMING DATA SETS
hiv <- read.csv(file= "pakistan.childHIV.csv", head=TRUE, sep= ",")
sa <- read.csv(file= "southasia.csv", head=TRUE, sep= ",")
#TYPES OF GRAPHS YOU CAN MAKE WITH GGPLOT2
#qplot bar graph, understanding stat = identity
qplot(Year, childHIV, data=hiv,
geom= "bar",
main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Year", ylab= "Children Living with HIV (Ages 0-14)",
stat= "identity") + scale_x_continuous(breaks=seq(2001,2013,1))

#qplot histogram, understanding stat = bin
qplot(childHIV, data=hiv,
geom= "histogram",
main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Children Living with HIV (Ages 0-14)", ylab= "Frequency",
stat= "bin", binwidth=75)

#qplot line
qplot(Year, childHIV, data=hiv,
geom= "line", main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Year", ylab= "Children Living with HIV (Ages 0-14)",
stat= "identity") + scale_x_continuous(breaks=seq(2001,2013,1))

#qplot point
qplot(Year, childHIV, data=hiv,
geom= "point",
main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Year", ylab= "Children Living with HIV (Ages 0-14)",
stat= "identity", aes(childHIV)) + scale_x_continuous(breaks=seq(2001,2013,1))

#USING THE GGPLOT FUNCTION
#Add layers by using the + sign
ggplot(data=hiv, aes(Year, childHIV)) + geom_point() + scale_x_continuous(breaks=seq(2001,2013,1)) + labs(y = "Children Living with HIV (Ages 0-14)")

#Create data frames to reference specific rows and columns.
sa[9:16, 1:2]
## Country Life.expectancy
## 9 Afghanistan 60.93141
## 10 Bangladesh 70.69339
## 11 Bhutan 68.30232
## 12 India 66.45624
## 13 Maldives 77.93537
## 14 Nepal 68.40385
## 15 Pakistan 66.58537
## 16 Sri Lanka 74.24029
#ggplot density
#"Gaussian" is another name for the normal distribution.
#Life expectancy at birth indicates the number of years a newborn infant would live
#if prevailing patterns of mortality at the time of its birth were to stay the same throughout its life.
ggplot(data=sa[9:16, 1:2], aes(Life.expectancy)) +
geom_density(kernel= "gaussian") + xlab("Life expectancy at birth, total(years) in 2013")

#ggplot boxplot
#Life expectancy at birth taken from developing South Asia countries
ggplot(data=sa[1:16, 5:6], aes(Year, Southasia.life.expectancy.avg)) +
geom_boxplot() + labs(title= "Life Expectancy in Developing South Asian Countries", x = NULL, y = "Life Expectancy")

#COORDINATE SYSTEMS
#Default Cartesian system
ggplot(data=hiv, aes(childHIV)) +
geom_bar(binwidth=50) + xlab("Children Living with HIV (Ages 0-14)") + coord_cartesian(xlim=c(-2000, 2000), ylim=c(-5, 5))

#Zoomed in
ggplot(data=hiv, aes(childHIV)) +
geom_bar(binwidth=50) + xlab("Children Living with HIV (Ages 0-14)") + coord_cartesian(xlim=c(0, 2000), ylim=c(0, 5))

#Flipped Cartesian coordinates
ggplot(data=hiv, aes(childHIV)) +
geom_bar(binwidth=50) + xlab("Children Living with HIV (Ages 0-14)") + coord_flip(xlim=c(-2000, 2000), ylim=c(-5, 5))

#Zoomed in
ggplot(data=hiv, aes(childHIV)) +
geom_bar(binwidth=50) + xlab("Children Living with HIV (Ages 0-14)") + coord_flip(xlim=c(0, 2000), ylim=c(0, 5))

#FACETING
#Out-of-pocket health expenditure (% of total expenditure on health)
#Number of infants dying before reaching one year of age
library(scales)
#Facet into columns
ggplot(data=sa[9:16, 1:4], aes(Out.of.pocket.health.expenditure, Infant.deaths)) +
geom_point() + facet_grid(.~Country) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_y_continuous(labels=comma) +
labs(x = "Out of Pocket Health Expenditure (% of total expenditure on health)", y = "Number of Infant Deaths")

#Facet into rows
ggplot(data=sa[9:16, 1:4], aes(Out.of.pocket.health.expenditure, Infant.deaths)) +
geom_point() + facet_grid(Country~.) + scale_y_continuous(breaks=c(0, 500000, 1000000), labels=comma) +
labs(x = "Out of Pocket Health Expenditure (% of total expenditure on health)", y = "Number of Infant Deaths")

#AESTHETICS
#Change the interior coloring
qplot(Year, childHIV, data=hiv,
geom= "bar", main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Year", ylab= "Children Living with HIV (Ages 0-14)",
stat= "identity", aes(childHIV), fill= "red") + guides(fill=FALSE)

#Add a color scale to a variable
qplot(Year, childHIV, data=hiv,
geom= "point",
main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Year", ylab= "Children Living with HIV (Ages 0-14)",
stat= "identity", aes(childHIV), color= Life.expectancy)

#THEMES FOR GGPLOT2
#Classic theme
qplot(Year, childHIV, data=hiv,
geom= "bar",
main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Year", ylab= "Children Living with HIV (Ages 0-14)",
stat= "identity") + theme_classic()

#To access more ggplot2 themes, install the "ggthemes" package.
#install.packages("ggthemes")
require(ggthemes)
## Loading required package: ggthemes
## Warning: package 'ggthemes' was built under R version 3.1.3
#Tufte theme
qplot(Year, childHIV, data=hiv,
geom= "bar",
main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Year", ylab= "Children Living with HIV (Ages 0-14)",
stat= "identity") + theme_tufte()
