R Graphics (Ch 1,2)

Topics

Reading Files

Libraries

  • Install two libraries for all the examples install.packages(c(“ggplot2”,“gcookbook”))
  • Installed packages can be loaded as library(ggplot2);library(gcookbook)

Reading Files

  • CSV files with column names
    data = read.csv("datafile.csv",header=TRUE)
  • CSV files without column names
    data = read.csv("datafile.csv", header=FALSE)
  • Manually assign the header names
    names(data) = c("Column1","Column2","Column3")
  • CSV files with tab delimited
    data = read.csv("datafile.csv", sep="\t")

Reading Files

  • To ensure that text values are not converted into factors
    data = read.csv("datafile.csv", stringsAsFactors=FALSE)
  • To convert to factor
    data$Sex = factor(data$Sex)

Reading excel files (.xlsx)

install.packages("gdata")
library(gdata)

  • Read the worksheet
    data = read.xlsx("datafile.xlsx", sheetIndex=2)
    data = read.xlsx("datafile.xls", sheetName="Revenues")
    data = read.xls("datafile.xls", sheet=2)

Reading SPSS/stata files

install.packages("foreign")
library(foreign)
data = read.spss("datafile.sav")
data = read.dta("datafile.dta")

Quickly Exploring Data

Scatter plot - Basic

plot(mtcars$wt, mtcars$mpg)   

plot of chunk unnamed-chunk-1

Using qplot

library(ggplot2)
qplot(mtcars$wt, mtcars$mpg) #qplot(wt, mpg, data=mtcars)

plot of chunk unnamed-chunk-2

Using ggplot

ggplot(mtcars, aes(x=wt, y=mpg)) + geom_point()

plot of chunk unnamed-chunk-3

Basic Line Plot

plot(pressure$temperature, pressure$pressure, type="l")

plot of chunk unnamed-chunk-4

Basic Line Plot with points added

plot(pressure$temperature, pressure$pressure, type="l")
points(pressure$temperature, pressure$pressure)

plot of chunk unnamed-chunk-5

To add a new line and points

plot(pressure$temperature, pressure$pressure, type="l")
lines(pressure$temperature, pressure$pressure/2, col="red")
points(pressure$temperature, pressure$pressure/2, col="red")

plot of chunk unnamed-chunk-6

Using ggplot

ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line()

plot of chunk unnamed-chunk-7

Using ggplot with points added

ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line() + geom_point()

plot of chunk unnamed-chunk-8

Creating a Bar Graph

barplot(BOD$demand, names.arg=BOD$Time)

plot of chunk unnamed-chunk-9

Creating a Bar Graph

table(mtcars$cyl)

 4  6  8 
11  7 14 
# Generate a table of counts
barplot(table(mtcars$cyl))

plot of chunk unnamed-chunk-10

Using ggplot

library(ggplot2)
qplot(BOD$Time, BOD$demand, geom="bar", stat="identity")

plot of chunk unnamed-chunk-11

Using ggplot

# Convert the x variable to a factor, so that it is treated as discrete
qplot(factor(BOD$Time), BOD$demand, geom="bar", stat="identity")

plot of chunk unnamed-chunk-12

Plotting counts

# cyl is continuous here
qplot(mtcars$cyl)

plot of chunk unnamed-chunk-13

Plotting counts

# Treat cyl as discrete
qplot(factor(mtcars$cyl))

plot of chunk unnamed-chunk-14

Using ggplot

ggplot(BOD, aes(x=Time, y=demand)) + geom_bar(stat="identity")

plot of chunk unnamed-chunk-15

Using ggplot to create Bar graph of counts

# Bar graph of counts
ggplot(mtcars, aes(x=factor(cyl))) + geom_bar()

plot of chunk unnamed-chunk-16

Creating a histogram

hist(mtcars$mpg)

plot of chunk unnamed-chunk-17

Histogram with breaks

# Specify approximate number of bins with breaks
hist(mtcars$mpg, breaks=10)

plot of chunk unnamed-chunk-18

Using ggplot

library(ggplot2)
ggplot(mtcars, aes(x=mpg)) + geom_histogram(binwidth=4)

plot of chunk unnamed-chunk-19

Creating a Box Plot

plot(ToothGrowth$supp, ToothGrowth$len)

plot of chunk unnamed-chunk-20

Creating a Box Plot

# Formula syntax
boxplot(len ~ supp, data = ToothGrowth)

plot of chunk unnamed-chunk-21

Creating a Box Plot

# Put interaction of two variables on x-axis
boxplot(len ~ supp + dose, data = ToothGrowth)

plot of chunk unnamed-chunk-22

Box Plot using ggplot

ggplot(ToothGrowth, aes(x=supp, y=len)) + geom_boxplot()

plot of chunk unnamed-chunk-23

Box Plot using ggplot

ggplot(ToothGrowth, aes(x=interaction(supp, dose), y=len)) + geom_boxplot()

plot of chunk unnamed-chunk-24

Plotting a function curve

curve(x^3 - 5*x, from=-4, to=4)

plot of chunk unnamed-chunk-25