Building Graphs in R…

J.Korn

January 22, 2019

library(gridExtra)
library(grid)
library(ggplot2)
library(datasets)
library(dplyr)
library(nycflights13)
library(reshape2)

The Dataset to Create the Scatterplot with:

mtcars$cyl <- as.factor(mtcars$cyl)
library(printr)
library(knitr)
knitr::kable(str(mtcars), digits = 2, align = "l")
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : Factor w/ 3 levels "4","6","8": 2 2 1 2 3 2 3 1 1 2 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...

Basic Scatter Plot:

library(ggplot2)
# Basic scatter plot
ggplot(mtcars, aes(x=wt, y=mpg)) + geom_point()

# Change the point size, and shape
ggplot(mtcars, aes(x=wt, y=mpg)) +
  geom_point(size=2, shape=23)

Basic Scatter Plot: Change the Point Size

# Change the point size
ggplot(mtcars, aes(x=wt, y=mpg)) + 
  geom_point(aes(size=qsec))

Basic Scatter Plot: Label points

ggplot(mtcars, aes(x=wt, y=mpg)) +
  geom_point() + 
  geom_text(label=rownames(mtcars))

Basic Scatter Plot: Add regression lines

The functions below can be used to add regression lines to a scatter plot:

# Add the regression line
ggplot(mtcars, aes(x=wt, y=mpg)) + 
  geom_point()+
  geom_smooth(method=lm)

Basic Scatter Plot: Change the appearance of points and lines

# Change the point colors and shapes
# Change the line type and color
ggplot(mtcars, aes(x=wt, y=mpg)) + 
  geom_point(shape=18, color="blue")+
  geom_smooth(method=lm, se=FALSE, linetype="dashed",
             color="darkred")

Basic Scatter Plot: Change the confidence interval

# Change the confidence interval fill color
ggplot(mtcars, aes(x=wt, y=mpg)) + 
  geom_point(shape=18, color="blue")+
  geom_smooth(method=lm,  linetype="dashed",
             color="darkred", fill="blue")

Scatter Plot with Multiple Groups: Change the point shapes by a specific column

# Change point shapes by the levels of cyl
ggplot(mtcars, aes(x=wt, y=mpg, shape=cyl)) +
  geom_point()

Scatter Plot with Multiple Groups: Change the point shape and color

# Change point shapes and colors
ggplot(mtcars, aes(x=wt, y=mpg, shape=cyl, color=cyl)) +
  geom_point()

Scatter Plot with Multiple Groups: Change the point shape and color and sizes

# Change point shapes, colors and sizes
ggplot(mtcars, aes(x=wt, y=mpg, shape=cyl, color=cyl, size=cyl)) +
  geom_point()

Scatter Plot with Multiple Groups: Add regression lines

# Add regression lines
ggplot(mtcars, aes(x=wt, y=mpg, color=cyl, shape=cyl)) +
  geom_point() + 
  geom_smooth(method=lm)

Scatter Plot with Multiple Groups: Add regression lines and remove CI

# Remove confidence intervals
# Extend the regression lines
ggplot(mtcars, aes(x=wt, y=mpg, color=cyl, shape=cyl)) +
  geom_point() + 
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE)

Scatter Plot with Multiple Groups: Change the Fillers of the CI Bands

ggplot(mtcars, aes(x=wt, y=mpg, color=cyl, shape=cyl)) +
  geom_point() + 
  geom_smooth(method=lm, aes(fill=cyl))

The Dataset to Create the Line Graphs with:

#filter Newark temperature in fist 15 days of January
df <- weather %>% filter(month == 1 & day <= 15)
library(printr)
library(knitr)
knitr::kable(str(df), digits = 2, align = "l")
## Classes 'tbl_df', 'tbl' and 'data.frame':    1074 obs. of  15 variables:
##  $ origin    : chr  "EWR" "EWR" "EWR" "EWR" ...
##  $ year      : num  2013 2013 2013 2013 2013 ...
##  $ month     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ day       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hour      : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ temp      : num  39 39 39 39.9 39 ...
##  $ dewp      : num  26.1 27 28 28 28 ...
##  $ humid     : num  59.4 61.6 64.4 62.2 64.4 ...
##  $ wind_dir  : num  270 250 240 250 260 240 240 250 260 260 ...
##  $ wind_speed: num  10.36 8.06 11.51 12.66 12.66 ...
##  $ wind_gust : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ precip    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ pressure  : num  1012 1012 1012 1012 1012 ...
##  $ visib     : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ time_hour : POSIXct, format: "2013-01-01 01:00:00" "2013-01-01 02:00:00" ...

Basic Line Plot:

#Linegraph of hourly temperature using geom_line()
ggplot(data = df, mapping = aes(x = time_hour, y = temp)) +
  geom_line()

Basic Line Plot: Change the line type

ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()

Basic Line Plot: Change the color

ggplot(data=df,  mapping = aes(x = time_hour, y = temp, group=1)) +
  geom_line(color="red")+
  geom_point()

Basic Line Plot: Add an arrow to the line

ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=1)) +
  geom_line(arrow = arrow())+
  geom_point()

Line Plot with Multiple Groups:

ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=origin)) +
  geom_line()+
  geom_point()

Line Plot with Multiple Groups: Change line types

ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=origin)) +
  geom_line(linetype="dashed", color="blue", size=1.2)+
  geom_point(color="red", size=3)

Line Plot with Multiple Groups: Change line types by groups

ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=origin)) +
  geom_line(aes(linetype=origin))+
  geom_point()

Line Plot with Multiple Groups: Change line types and point shapes

ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=origin)) +
 geom_line(aes(linetype=origin))+
  geom_point(aes(shape=origin))

Line Plot with Multiple Groups: Change line colors by groups

ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=origin)) +
geom_line(aes(color=origin))+
  geom_point(aes(color=origin))

The Dataset to Create the Bar Graphs with:

df = tips

df = data.frame(
  time = factor(c("Lunch","Dinner"), levels=c("Lunch","Dinner")),
  total_bill = c(14.89, 17.23)
)
## 'data.frame':    2 obs. of  2 variables:
##  $ time      : Factor w/ 2 levels "Lunch","Dinner": 1 2
##  $ total_bill: num  14.9 17.2

Basic Bargraph:

ggplot(data=df, aes(x=time, y=total_bill)) +
    geom_bar(stat="identity")

Basic Bargraph: Grouping by color

ggplot(data=df, aes(x=time, y=total_bill, fill=time)) +
    geom_bar(stat="identity")

Basic Bargraph: Add a black outline

ggplot(data=df, aes(x=time, y=total_bill, fill=time)) +
    geom_bar(colour="black", stat="identity")

Basic Bargraph:

To get a bar graph of counts, don’t map a variable to y, and use stat=“bin” (which is the default) instead of stat=“identity”:

ggplot(data=tips, aes(x=day)) +
    geom_bar(stat="count")

The Dataset to Create the Bar Graphs with More Groups:

dat1 <- data.frame(
    sex = factor(c("Female","Female","Male","Male")),
    time = factor(c("Lunch","Dinner","Lunch","Dinner"), levels=c("Lunch","Dinner")),
    total_bill = c(13.53, 16.81, 16.24, 17.42)
)
## 'data.frame':    4 obs. of  3 variables:
##  $ sex       : Factor w/ 2 levels "Female","Male": 1 1 2 2
##  $ time      : Factor w/ 2 levels "Lunch","Dinner": 1 2 1 2
##  $ total_bill: num  13.5 16.8 16.2 17.4

Bargraph with More Groups: Stacked bargraph

ggplot(data=dat1, aes(x=time, y=total_bill, fill=sex)) +
    geom_bar(stat="identity")

Bargraph with More Groups: time on x-axis, color fill grouped

ggplot(data=dat1, aes(x=time, y=total_bill, fill=sex)) +
    geom_bar(stat="identity", position=position_dodge())

The Dataset to Create the Histograms:

mtcars$cyl <- as.factor(mtcars$cyl)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : Factor w/ 3 levels "4","6","8": 2 2 1 2 3 2 3 1 1 2 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...

Basic Histogram:

qplot(mtcars$mpg, geom="histogram") 

Basic Histogram: You can also use the ggplot() function to make the same histogram:

ggplot(data=mtcars, aes(mtcars$mpg)) + geom_histogram() 

Basic Histogram: Add Density

ggplot(data=mtcars, aes(mtcars$mpg)) + geom_histogram() + geom_density(col=3)

Basic Histogram:Transforming the Histogram

ggplot(data=mtcars, aes(mtcars$mpg)) + geom_histogram(stat = "bin", binwidth=1)

Laying Out Multiple Plots on a Page:

p1 = ggplot(mtcars, aes(x=wt, y=mpg)) +
  geom_point(size=2, shape=23)
p2 = qplot(mtcars$mpg, geom="histogram") 
grid.arrange(p1, p2, nrow = 1)

Laying Out Multiple Plots on a Page: Something Better!

p1 <- ggplot(mtcars, aes(mpg, wt, colour = factor(cyl))) +
  geom_point()
p2 <- ggplot(mtcars, aes(mpg, wt, colour = factor(cyl))) +
  geom_point() + facet_wrap(~ cyl, ncol = 2, scales = "free") +
  guides(colour = "none") +
  theme()
grid.arrange(p1, p2, nrow = 1)