ggplot2 Plotting System

Title: Main procedures and graphs to perform in exploratory data analysis using ggplot2 plotting System

Synopsis: This document is aimed at helping to remember the main graphs to produce when performing exploratory data analysis.

What do my data look like?

ggplot2 system

Plotting 1 - simples graph

#install.packages(""ggplot2"")
library(ggplot2)
data(mpg) # belongs to ggplot2
qplot(displ, hwy, data=mpg)

Plotting 2 - grouping by colour

library(ggplot2)
qplot(displ,hwy, data = mpg, color=drv) 

Plotting 3 - adding a geoms

library(ggplot2)
qplot(displ,hwy, data = mpg, geom=c("point","smooth")) 

Plotting 4 - adding a geoms

qplot(displ,hwy, data = mpg, geom="point")

Plotting 5 - adding a geoms

qplot(displ, hwy, data = mpg, geom="smooth")

Plotting 6 - adding a geoms

qplot(hwy, data = mpg, geom="density")

Plotting 7 - adding a geoms

qplot(hwy, data = mpg, geom="density", color=drv)

Histograms

library(ggplot2)
qplot(hwy, data = mpg, fill=drv) 

Withouth rows definition (1 row)

library(ggplot2)
qplot(displ, hwy, data=mpg, facets = . ~ drv) 

Withouth columns definition (1 column)

library(ggplot2)
qplot(hwy, data=mpg, facets = drv ~ .) 

Facets

    #install.packages("UsingR")
    library(UsingR)
## Loading required package: MASS
## Loading required package: HistData
## Loading required package: Hmisc
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## 
## The following object is masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
## 
## 
## Attaching package: 'UsingR'
## 
## The following object is masked from 'package:survival':
## 
##     cancer
## 
## The following object is masked from 'package:ggplot2':
## 
##     movies
    data(Galton)
    galton = Galton
    ##install.packages("reshape")
    library(ggplot2)
    library(reshape)
    long = melt(galton)
## Using  as id variables
    g = ggplot(long, aes(x=value, fill= variable))
    g = g + geom_histogram(colour = "black", binwidth=1)
    g = g + facet_grid(. ~variable)
    g

Scatterplot (without color grouping)

library(ggplot2)
qplot(displ, hwy, data = mpg, shape=drv) 

Scatterplot (without color grouping)

library(ggplot2)
qplot(displ, hwy, data = mpg, shape=drv, color=drv) 

Scatterplot of multiple factors

library(ggplot2)
qplot(displ, hwy, data = mpg, geom=c("point","smooth"), color=drv) 

Scatter plot of multiple factors with linear regression line in each one (same graph)

library(ggplot2)
qplot(displ, hwy, data = mpg, geom=c("point","smooth"), color=drv, method="lm") 

Multiple scatterplot lines of best fit at same window grouped by one factor

library(ggplot2)
qplot(displ, hwy, data = mpg, geom=c("point","smooth"), color=drv, facets=.~drv) 

Multiple scatterplot linear regression lines at same window grouped by one factor

library(ggplot2)
qplot(displ, hwy, data = mpg, geom=c("point","smooth"), color=drv, facets=.~drv, method="lm") 

Skeleton dataset tutorial

geom_points()

skeleton = read.table("SkeletonData.txt", header=T)
library(ggplot2)
skeleton$Sex = factor(skeleton$Sex, levels=c(1,2))
g = ggplot(skeleton, aes(Age, DGDifference))
g = g + geom_point()  # Adds a geom parameter
g

Line of best fit

skeleton = read.table("skeletondata.txt", header=T)
library(ggplot2)
skeleton$Sex = factor(skeleton$Sex, levels=c(1,2))
g = ggplot(skeleton, aes(Age, DGDifference))
g = g + geom_point()  # Adds a geom parameter
g = g + geom_smooth() # Adds a line of best fit
g

Linear regression line

skeleton = read.table("skeletondata.txt", header=T)
library(ggplot2)
skeleton$Sex = factor(skeleton$Sex, levels=c(1,2))
g = ggplot(skeleton, aes(Age, DGDifference))
g = g + geom_point()  # Adds a geom parameter
g = g + geom_smooth(method="lm") # Adds a linear model line
g

Using facets (multple graphs)

skeleton = read.table("skeletondata.txt", header=T)
library(ggplot2)
skeleton$Sex = factor(skeleton$Sex, levels=c(1,2))
g = ggplot(skeleton, aes(Age, DGDifference))
g = g + geom_point() 
g = g + geom_smooth(method="lm") 
g = g + facet_grid(. ~ BMI)# Adds facets
g

Using factets (colors)

library(ggplot2)
skeleton = read.table("skeletondata.txt", header=T)
qplot(Age, DGDifference,data=skeleton, facets=.~BMI,geom=c("point","smooth"), method="lm", color=BMI, shape=BMI)

Facets by sex

library(ggplot2)
skeleton = read.table("skeletondata.txt", header=T)
skeleton$Sex = factor(skeleton$Sex, levels=c(1,2))
qplot(Age, DGDifference,data=skeleton, facets=.~Sex,geom=c("point","smooth"), color=Sex)

Controlling easthetics color, size, alpha

skeleton = read.table("skeletondata.txt", header=T)
library(ggplot2)
skeleton$Sex = factor(skeleton$Sex, levels=c(1,2))
g = ggplot(skeleton, aes(Age, DGDifference))
g = g + geom_point(color="steelblue", size=4, alpha=1/2)  
g

geom_point SIZE ALPHA AES

skeleton = read.table("skeletondata.txt", header=T)
library(ggplot2)
skeleton$Sex = factor(skeleton$Sex, levels=c(1,2))
g = ggplot(skeleton, aes(Age, DGDifference))
g = g + geom_point(size=4, alpha=1/2, aes(color=BMI))  
g

Adding Labels, x axis and y axis text

skeleton = read.table("skeletondata.txt", header=T)
library(ggplot2)
skeleton$Sex = factor(skeleton$Sex, levels=c(1,2))
g = ggplot(skeleton, aes(Age, DGDifference))
g = g + geom_point(aes(color=BMI)) + labs(title="Skeleton Distribution") + labs(x="X Axis", y = "Y Axis")
g

Adding Labels, x axis and y axis text and changing the regression line

skeleton = read.table("skeletondata.txt", header=T)
library(ggplot2)
skeleton$Sex = factor(skeleton$Sex, levels=c(1,2))
g = ggplot(skeleton, aes(Age, DGDifference))
g = g + geom_point(size=4, alpha=1/2, aes(color=BMI))
g = g + geom_smooth(color="yellow",size=4, linetype=3, method="lm", se=FALSE)
g

Applying a theme

skeleton = read.table("skeletondata.txt", header=T)
library(ggplot2)
skeleton$Sex = factor(skeleton$Sex, levels=c(1,2))
g = ggplot(skeleton, aes(Age, DGDifference))
g = g + geom_point(aes(color=BMI)) 
g = g + theme_bw() # simply changes the theme
g

Changing theme font

skeleton = read.table("skeletondata.txt", header=T)
library(ggplot2)
skeleton$Sex = factor(skeleton$Sex, levels=c(1,2))
g = ggplot(skeleton, aes(Age, DGDifference))
g = g + geom_point(aes(color=BMI)) 
g = g + theme_bw(base_family="Times") # changes the font of the theme
g

Cartesian Coordinates

testdat = data.frame(x=1:100, y = rnorm(100))
testdat[50,2] =  100 ## Intencional outliers
g = ggplot(testdat, aes(x=x, y=y))
g = g + geom_line() 
g = g + coord_cartesian(ylim= c(-3,3))
g