library(ggplot2)
library(ggplot2movies)
library(ggthemes)
library(data.table)
library("plotly")
library(reshape2)

The layers are Data, Aesthetics, Geometry, Facets, Statistics, Coordinates

More info can be found here

A simple dot plot

g1<-ggplot(data=mtcars, aes(x=mpg, y=hp))+geom_point()
g1

If we want to add facets for each cyl

g1+facet_grid(cyl~.)

If we want to add a Statistics Layer and particularly a Smoot

g2<-g1+facet_grid(cyl~.)+stat_smooth()
g2

If we want to add a Coordinate Layer

g2+coord_cartesian(xlim=c(10,40))

If we want a Theme Layaer

g2+coord_cartesian(xlim=c(10,40))+theme_minimal()

Histograms

#Data & Aesthetics
pl<-ggplot(movies,aes(x=rating))

#Geometry
pl+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

pl+geom_histogram(binwidth = 0.1)

pl+geom_histogram(binwidth = 0.1, color='red')

pl+geom_histogram(binwidth = 0.1, color='red', fill='pink')

pl+geom_histogram(binwidth = 0.1, color='red', fill='pink', alpha=0.4)

##Adding Lables
pl2<-pl+geom_histogram(binwidth = 0.1, color='red', fill='pink', alpha=0.4)
pl2+xlab("Movie Rating")+ylab("Count")+ggtitle("This is the title")

##Fill the color of the histogram based on the counts
pl+geom_histogram(binwidth = 0.1, aes(fill=..count..))

Scatter Plots

ggplot(mtcars, aes(x=wt, y=mpg))+geom_point()

ggplot(mtcars, aes(x=wt, y=mpg))+geom_point(size=5)

ggplot(mtcars, aes(x=wt, y=mpg))+geom_point(aes(size=hp))

ggplot(mtcars, aes(x=wt, y=mpg))+geom_point(aes(shape=factor(cyl)))

ggplot(mtcars, aes(x=wt, y=mpg))+geom_point(aes(shape=factor(cyl)))

ggplot(mtcars, aes(x=wt, y=mpg))+geom_point(aes(shape=factor(cyl),  color=hp), size=4)

Bar Plots

pl<-ggplot(mpg, aes(x=class))
pl+geom_bar()

pl+geom_bar(color='blue', fill='blue')

pl+geom_bar(aes(fill=drv))

pl+geom_bar(aes(fill=drv), position="dodge")

pl+geom_bar(aes(fill=drv), position="fill")

Box Plots

pl<-ggplot(mtcars, aes(x=factor(cyl), y=mpg))
pl+geom_boxplot()

pl+geom_boxplot()+coord_flip()

pl+geom_boxplot(fill="green")

pl+geom_boxplot(aes(fill=factor(cyl)))+theme_bw()

Two Variable Plotting

pl<-ggplot(movies, aes(x=year, y=rating))
pl+geom_bin2d()

pl+geom_bin2d()+scale_fill_gradient(high='red', low='green')

pl+geom_bin2d(binwidth=c(3,1))+scale_fill_gradient(high='red', low='blue')

pl+geom_density_2d()

Coordinates and Faceting

pl<-ggplot(mpg, aes(x=displ, y=hwy))+geom_point()
pl+coord_cartesian(xlim=c(1,4), ylim=c(15,30))

pl+coord_fixed(ratio=1/3)

pl+facet_grid(.~cyl)

pl+facet_grid(drv~.)

pl+facet_grid(drv~cyl)

Themes

##default is: theme_set(theme_grey())
##theme_set(theme_minimal())
pl<-ggplot(mtcars, aes(x=wt, y=mpg))+geom_point()
pl+theme_wsj()

Exercise

head(mpg)
## # A tibble: 6 × 11
##   manufacturer model displ  year   cyl      trans   drv   cty   hwy    fl
##          <chr> <chr> <dbl> <int> <int>      <chr> <chr> <int> <int> <chr>
## 1         audi    a4   1.8  1999     4   auto(l5)     f    18    29     p
## 2         audi    a4   1.8  1999     4 manual(m5)     f    21    29     p
## 3         audi    a4   2.0  2008     4 manual(m6)     f    20    31     p
## 4         audi    a4   2.0  2008     4   auto(av)     f    21    30     p
## 5         audi    a4   2.8  1999     6   auto(l5)     f    16    26     p
## 6         audi    a4   2.8  1999     6 manual(m5)     f    18    26     p
## # ... with 1 more variables: class <chr>
ggplot(mpg, aes(x=hwy))+geom_histogram(bins=20, fill='red', alpha=0.5)+ggtitle("Histogram")

ggplot(mpg, aes(x=manufacturer))+geom_bar(aes(fill=factor(cyl)))+ggtitle("Bar Plot of Manufactures by Cyl")

head(txhousing)
## # A tibble: 6 × 9
##      city  year month sales   volume median listings inventory     date
##     <chr> <int> <int> <dbl>    <dbl>  <dbl>    <dbl>     <dbl>    <dbl>
## 1 Abilene  2000     1    72  5380000  71400      701       6.3 2000.000
## 2 Abilene  2000     2    98  6505000  58700      746       6.6 2000.083
## 3 Abilene  2000     3   130  9285000  58100      784       6.8 2000.167
## 4 Abilene  2000     4    98  9730000  68600      785       6.9 2000.250
## 5 Abilene  2000     5   141 10590000  67300      794       6.8 2000.333
## 6 Abilene  2000     6   156 13910000  66900      780       6.6 2000.417
ggplot(txhousing, aes(x=sales, y=volume))+geom_point(color='blue', alpha=0.3)+ggtitle("Scatter Plot")

ggplot(txhousing, aes(x=sales, y=volume))+geom_point(color='blue', alpha=0.3)+ggtitle("Scatter Plot")+geom_smooth(color="red")

##Economist Plot
df<-fread('Economist_Assignment_Data.csv', drop=1)
head(df)
##        Country HDI.Rank   HDI CPI            Region
## 1: Afghanistan      172 0.398 1.5      Asia Pacific
## 2:     Albania       70 0.739 3.1 East EU Cemt Asia
## 3:     Algeria       96 0.698 2.9              MENA
## 4:      Angola      148 0.486 2.0               SSA
## 5:   Argentina       45 0.797 3.0          Americas
## 6:     Armenia       86 0.716 2.6 East EU Cemt Asia
ggplot(df, aes(x=CPI, y=HDI ))+geom_point(aes(color=Region))

##or ggplot(df, aes(x=CPI, y=HDI, color=Region ))+geom_point()
ggplot(df, aes(x=CPI, y=HDI, color=Region ))+geom_point(size=4, shape=1)+geom_smooth(aes(group=1))

ggplot(df, aes(x=CPI, y=HDI, color=Region ))+geom_point(size=4, shape=1)+geom_smooth()

ggplot(df, aes(x=CPI, y=HDI, color=Region ))+geom_point(size=4, shape=1)+geom_smooth(aes(group=1), method='lm', formula=y~log(x), se=F, color='red')

ggplot(df, aes(x=CPI, y=HDI, color=Region ))+geom_point(size=4, shape=1)+geom_smooth(aes(group=1), method='lm', formula=y~log(x), se=F, color='red')+geom_text(aes(label=Country))

###Take a subset of Countries
pointsToLabel <- c("Russia", "Venezuela", "Iraq", "Myanmar", "Sudan",
                   "Afghanistan", "Congo", "Greece", "Argentina", "Brazil",
                   "India", "Italy", "China", "South Africa", "Spane",
                   "Botswana", "Cape Verde", "Bhutan", "Rwanda", "France",
                   "United States", "Germany", "Britain", "Barbados", "Norway", "Japan",
                   "New Zealand", "Singapore")


ggplot(df, aes(x=CPI, y=HDI, color=Region ))+geom_point(size=4, shape=1)+geom_smooth(aes(group=1), method='lm', formula=y~log(x), se=F, color='red')+geom_text(aes(label=Country), color="gray20", data = subset(df, Country %in% pointsToLabel),check_overlap = TRUE)+ggtitle("HDI vs CPI")

#try to make it similar to Economist
pl3<-ggplot(df, aes(x=CPI, y=HDI, color=Region ))+geom_point(size=4, shape=1)+geom_smooth(aes(group=1), method='lm', formula=y~log(x), se=F, color='red')+geom_text(aes(label=Country), color="gray20", data = subset(df, Country %in% pointsToLabel),check_overlap = TRUE)

pl3+theme_economist_white()+ scale_x_continuous(name = "Corruption Perceptions Index, 2011 (10=least corrupt)",
                     limits = c(.9, 10.5),breaks=1:10) +scale_y_continuous(name = "Human Development Index, 2011 (1=Best)",
                     limits = c(0.2, 1.0))

Interactive Visualization

pl<-ggplot(mtcars, aes(mpg,wt))+geom_point()

gpl<-ggplotly(pl)
gpl
dsamp <- diamonds[sample(nrow(diamonds), 1000), ]
qplot(carat, price, data=dsamp, colour=clarity)

ggplotly()
set.seed(100)
d <- diamonds[sample(nrow(diamonds), 1000), ]

p <- ggplot(data = d, aes(x = carat, y = price)) +
geom_point(aes(text = paste("Clarity:", clarity)), size = 1) +
geom_smooth(aes(colour = cut, fill = cut)) + facet_wrap(~ cut)

(gg <- ggplotly(p))
# Learn about API authentication here: https://plot.ly/r/getting-started
# Find your api_key here: https://plot.ly/settings/api

# create data
set.seed(20130226)
n <- 200
x1 <- rnorm(n, mean = 2)
y1 <- 1.5 + 0.4 * x1 + rnorm(n)
x2 <- rnorm(n, mean = -1)
y2 <- 3.5 - 1.2 * x2 + rnorm(n)
class <- rep(c("A", "B"), each = n)
df <- data.frame(x = c(x1, x2), y = c(y1, y2), colour = class)



##Volcano
p <- volcano %>%
  melt() %>% 
  ggplot(aes(Var1, Var2, fill = value)) + geom_tile()
  
ggplotly(p)
##Bar Plot

df <- structure(c(106487, 495681, 1597442, 2452577, 2065141, 2271925, 4735484, 3555352, 8056040, 4321887, 2463194, 347566, 621147, 1325727, 1123492, 800368, 761550, 1359737, 1073726, 36, 53, 141, 41538, 64759, 124160, 69942, 74862, 323543, 247236, 112059, 16595, 37028, 153249, 427642, 1588178, 2738157, 2795672, 2265696, 11951, 33424, 62469, 74720, 166607, 404044, 426967, 38972, 361888, 1143671, 1516716, 160037, 354804, 996944, 1716374, 1982735, 3615225, 4486806, 3037122, 17, 54, 55, 210, 312, 358, 857, 350, 7368, 8443, 6286, 1750, 7367, 14092, 28954, 80779, 176893, 354939, 446792, 33333, 69911, 53144, 29169, 18005, 11704, 13363, 18028, 46547, 14574, 8954, 2483, 14693, 25467, 25215, 41254, 46237, 98263, 185986), .Dim = c(19, 5), .Dimnames = list(c("1820-30", "1831-40", "1841-50", "1851-60", "1861-70", "1871-80", "1881-90", "1891-00", "1901-10", "1911-20", "1921-30", "1931-40", "1941-50", "1951-60", "1961-70", "1971-80", "1981-90", "1991-00", "2001-06"), c("Europe", "Asia", "Americas", "Africa", "Oceania")))
df.m <- melt(df)
names(df.m)[1:2]<-c("Period", "Region")

a <- ggplot(df.m, aes(x = Period, y = value/1e+06,fill = Region)) + ggtitle("Migration to the United States by Source Region (1820-2006), In Millions")
b <- a + geom_bar(stat = "identity", position = "stack")

ggplotly()