Simple ggplot examples

Andrew Heiss — Jan 28, 2014, 11:16 PM

# Load ggplot (install it first if you need to)
library(ggplot2)

# Load built-in data
mtcars <- mtcars
movies <- movies
# Select 1000 of the movies
movies <- movies[sample(nrow(movies), 1000), ]

# If you want to use data from Stata, do the following:
# library(foreign)  # Lets you load data from other programs
# read.dta(file.choose())  # read.dta() opens a .dta file. file.choose() opens a file dialog so you can navigate to your dta file. 

# Alternatively you can put the actual path to the file, like so:
# read.dta("data.dta")
# Also, if your data is a csv or something, you don't need to use the foreign library and you can just do this:
# read.csv(file.choose())


# Univariate stuff
# Continuous data
# Histograms
p <- ggplot(data=movies, aes(x=rating))
p + geom_histogram()  # default bins
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-1

p + geom_histogram(binwidth=2)  # big bins

plot of chunk unnamed-chunk-1

p + geom_histogram(binwidth=0.1)  # small bins
Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-1


# Density plots
p + geom_density(fill="darkgrey")

plot of chunk unnamed-chunk-1


# Boxplot
m <- ggplot(data=movies, aes(y=rating, x=factor(0)))
m + geom_boxplot() + coord_flip()

plot of chunk unnamed-chunk-1



# Bar charts (categorical)
c <- ggplot(mtcars, aes(factor(cyl)))
c + geom_bar()

plot of chunk unnamed-chunk-1



# Bivariate stuff
# Continuous + continuous
p <- ggplot(mtcars, aes(wt, mpg))
p + geom_point(size=4)

plot of chunk unnamed-chunk-1


# Add other variables
p + geom_point(aes(colour=factor(cyl), size = qsec))

plot of chunk unnamed-chunk-1


# Continuous + categorical
p <- ggplot(mpg, aes(factor(cyl), hwy))
p + geom_point(size=4)  # Overlaid dots

plot of chunk unnamed-chunk-1

p + geom_point(size=4, position="jitter")  # Jittered dots

plot of chunk unnamed-chunk-1

p + geom_point(size=4, alpha=.2)  # Transparent dots

plot of chunk unnamed-chunk-1


# Violin plots
p <- ggplot(mpg, aes(x=factor(cyl), y=hwy, fill=factor(cyl)))
p + geom_violin(scale = "width")

plot of chunk unnamed-chunk-1

# Add jittered dots for fun
p + geom_violin(scale = "width") + geom_point(size=2, position="jitter")

plot of chunk unnamed-chunk-1



# Categorical stuff 
mosaicplot(~ Sex + Survived, data = Titanic, color = TRUE)

plot of chunk unnamed-chunk-1

mosaicplot(Titanic, color = TRUE)

plot of chunk unnamed-chunk-1



# Coefficient plots
library(coefplot)  # Install this package
model1 <- lm(mpg ~ wt + disp + hp + factor(cyl), data=mtcars)
summary(model1)

Call:
lm(formula = mpg ~ wt + disp + hp + factor(cyl), data = mtcars)

Residuals:
   Min     1Q Median     3Q    Max 
-4.274 -1.035 -0.383  0.981  5.419 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   36.0024     2.1307   16.90  1.5e-15 ***
wt            -3.4286     1.0555   -3.25   0.0032 ** 
disp           0.0042     0.0129    0.33   0.7477    
hp            -0.0235     0.0122   -1.93   0.0652 .  
factor(cyl)6  -3.4660     1.4630   -2.37   0.0255 *  
factor(cyl)8  -3.7532     2.8140   -1.33   0.1938    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.48 on 26 degrees of freedom
Multiple R-squared:  0.858, Adjusted R-squared:  0.83 
F-statistic: 31.4 on 5 and 26 DF,  p-value: 3.18e-10
coefplot(model1) + labs(title=NULL) + theme_bw()

plot of chunk unnamed-chunk-1


# Scatterplot matrix
library(car)
scatterplotMatrix(~ mpg + wt + disp + hp | cyl, data=mtcars, main=NULL)

plot of chunk unnamed-chunk-1