#INSTALLING AND LOADING PACKAGES
#You must install the "ggplot2" package before creating the following data visualizations. 
#install.packages("ggplot2")
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.1.3
#SETTING WORK DIRECTORY
#Create a folder on your desktop (this is outside of RStudio) called "mydata". 
#Session > Set Working Directory > Choose Directory > Choose "mydata" folder. 
#In your workspace (upper right quadrant in RStudio), click "Import Dataset" and select "From Text File". Select your .csv file. 
#We will be using health data from developing regions in South Asia (Afghanistan, Bangladesh, Bhutan, India, Maldives, Nepal, Pakistan, Sri Lanka). 
setwd("~/Desktop/ggplot2")
#RENAMING DATA SETS
hiv <- read.csv(file= "pakistan.childHIV.csv", head=TRUE, sep= ",")
sa <- read.csv(file= "southasia.csv", head=TRUE, sep= ",")

#TYPES OF GRAPHS YOU CAN MAKE WITH GGPLOT2

#qplot bar graph, understanding stat = identity
qplot(Year, childHIV, data=hiv, 
      geom= "bar",
      main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Year", ylab= "Children Living with HIV (Ages 0-14)", 
      stat= "identity") + scale_x_continuous(breaks=seq(2001,2013,1))

#qplot histogram, understanding stat = bin
qplot(childHIV, data=hiv, 
      geom= "histogram", 
      main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Children Living with HIV (Ages 0-14)", ylab= "Frequency", 
      stat= "bin", binwidth=75) 

#qplot line
qplot(Year, childHIV, data=hiv, 
      geom= "line", main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Year", ylab= "Children Living with HIV (Ages 0-14)", 
      stat= "identity") + scale_x_continuous(breaks=seq(2001,2013,1))

#qplot point
qplot(Year, childHIV, data=hiv, 
      geom= "point", 
      main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Year", ylab= "Children Living with HIV (Ages 0-14)", 
      stat= "identity", aes(childHIV)) + scale_x_continuous(breaks=seq(2001,2013,1))

#USING THE GGPLOT FUNCTION
#Add layers by using the + sign
ggplot(data=hiv, aes(Year, childHIV)) + geom_point() + scale_x_continuous(breaks=seq(2001,2013,1)) +  labs(y = "Children Living with HIV (Ages 0-14)")

#Create data frames to reference specific rows and columns. 
sa[9:16, 1:2]
##        Country Life.expectancy
## 9  Afghanistan        60.93141
## 10  Bangladesh        70.69339
## 11      Bhutan        68.30232
## 12       India        66.45624
## 13    Maldives        77.93537
## 14       Nepal        68.40385
## 15    Pakistan        66.58537
## 16   Sri Lanka        74.24029
#ggplot density
#"Gaussian" is another name for the normal distribution. 
#Life expectancy at birth indicates the number of years a newborn infant would live
#if prevailing patterns of mortality at the time of its birth were to stay the same throughout its life.
ggplot(data=sa[9:16, 1:2], aes(Life.expectancy)) + 
  geom_density(kernel= "gaussian") + xlab("Life expectancy at birth, total(years) in 2013")

#ggplot boxplot
#Life expectancy at birth taken from developing South Asia countries
ggplot(data=sa[1:16, 5:6], aes(Year, Southasia.life.expectancy.avg)) + 
  geom_boxplot() +  labs(title= "Life Expectancy in Developing South Asian Countries", x = NULL, y = "Life Expectancy")

#COORDINATE SYSTEMS 

#Default Cartesian system
ggplot(data=hiv, aes(childHIV)) + 
  geom_bar(binwidth=50) + xlab("Children Living with HIV (Ages 0-14)") + coord_cartesian(xlim=c(-2000, 2000), ylim=c(-5, 5))

#Zoomed in 
ggplot(data=hiv, aes(childHIV)) + 
  geom_bar(binwidth=50) + xlab("Children Living with HIV (Ages 0-14)") + coord_cartesian(xlim=c(0, 2000), ylim=c(0, 5))

#Flipped Cartesian coordinates
ggplot(data=hiv, aes(childHIV)) + 
  geom_bar(binwidth=50) + xlab("Children Living with HIV (Ages 0-14)") + coord_flip(xlim=c(-2000, 2000), ylim=c(-5, 5))

#Zoomed in
ggplot(data=hiv, aes(childHIV)) + 
  geom_bar(binwidth=50) + xlab("Children Living with HIV (Ages 0-14)") + coord_flip(xlim=c(0, 2000), ylim=c(0, 5))

#FACETING
#Out-of-pocket health expenditure (% of total expenditure on health)
#Number of infants dying before reaching one year of age
library(scales)

#Facet into columns
ggplot(data=sa[9:16, 1:4], aes(Out.of.pocket.health.expenditure, Infant.deaths)) + 
  geom_point() + facet_grid(.~Country) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
  scale_y_continuous(labels=comma) + 
  labs(x = "Out of Pocket Health Expenditure (% of total expenditure on health)", y = "Number of Infant Deaths")

#Facet into rows 
ggplot(data=sa[9:16, 1:4], aes(Out.of.pocket.health.expenditure, Infant.deaths)) +  
  geom_point() + facet_grid(Country~.) + scale_y_continuous(breaks=c(0, 500000, 1000000), labels=comma) + 
  labs(x = "Out of Pocket Health Expenditure (% of total expenditure on health)", y = "Number of Infant Deaths")

#AESTHETICS

#Change the interior coloring
qplot(Year, childHIV, data=hiv, 
      geom= "bar", main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Year", ylab= "Children Living with HIV (Ages 0-14)", 
      stat= "identity", aes(childHIV), fill= "red") + guides(fill=FALSE)

#Add a color scale to a variable
qplot(Year, childHIV, data=hiv, 
      geom= "point", 
      main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Year", ylab= "Children Living with HIV (Ages 0-14)", 
      stat= "identity", aes(childHIV), color= Life.expectancy)

#Add a size scale to a variable
qplot(Year, childHIV, data=hiv, geom="point", main= "Children Living with HIV in Pakistan from 2001 to 2013”, xlab= “Year”, ylab= “Children Living with HIV (Ages 0-14)", stat= "identity", aes(childHIV), size= Life.expectancy)

#THEMES FOR GGPLOT2
#Classic theme
qplot(Year, childHIV, data=hiv, 
      geom= "bar",
      main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Year", ylab= "Children Living with HIV (Ages 0-14)", 
      stat= "identity") + theme_classic()

#To access more ggplot2 themes, install the "ggthemes" package. 
#install.packages("ggthemes")
require(ggthemes)
## Loading required package: ggthemes
## Warning: package 'ggthemes' was built under R version 3.1.3
#Tufte theme
qplot(Year, childHIV, data=hiv, 
      geom= "bar",
      main= "Children Living with HIV in Pakistan from 2001 to 2013", xlab= "Year", ylab= "Children Living with HIV (Ages 0-14)", 
      stat= "identity") + theme_tufte()