Create variables

var1 = 0 
var2 = 10
# addition
var1 + var2 
## [1] 10
# multiplication
var1 * var2 
## [1] 0

Compute stuff

10*2
## [1] 20
6/2*(2+1)
## [1] 9
exp(5)
## [1] 148.4132
sqrt(4)
## [1] 2
exp(var1)
## [1] 1
log(10,base=2)
## [1] 3.321928
?log

Vectors

x = c(1,2,3,4,5,6)
# 5 is added to each element in the vector x
x + 5
## [1]  6  7  8  9 10 11
# each element in x is multiplied by 5
x * 5
## [1]  5 10 15 20 25 30
# each element in x is divided by 5
x / 5
## [1] 0.2 0.4 0.6 0.8 1.0 1.2
y = c(2,3)
x + y
## [1] 3 5 5 7 7 9
x * y
## [1]  2  6  6 12 10 18
z = c(1,3,5,6,5,3)
t(z) %*% x  #t() is the transpose function 
##      [,1]
## [1,]   89
z * x  
## [1]  1  6 15 24 25 18
length(z) 
## [1] 6
z = c(z,10) 
z[1] # first entry
## [1] 1
z[7] # 7th entry
## [1] 10
z[length(z)] 
## [1] 10

Matrices

A = matrix(c(1,2,3,4),nrow=2,ncol=2,byrow=TRUE) # inputs by row 
A
##      [,1] [,2]
## [1,]    1    2
## [2,]    3    4
A2 = matrix(c(1,2,3,4),nrow=2,ncol=2,byrow=FALSE) # inputs by column
A2
##      [,1] [,2]
## [1,]    1    3
## [2,]    2    4
A %*% A 
##      [,1] [,2]
## [1,]    7   10
## [2,]   15   22
A * A 
##      [,1] [,2]
## [1,]    1    4
## [2,]    9   16
A[2,2] #2nd row, 2nd col
## [1] 4
A[1,2] #1st row, 2nd col 
## [1] 2

Lists

v = 1:6
v
## [1] 1 2 3 4 5 6
m = matrix(c(1,0,0,1),byrow=T,nrow=2) 
m
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1
l = list(v,m)  
l
## [[1]]
## [1] 1 2 3 4 5 6
## 
## [[2]]
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1

We can index specific elements like this:

l[[2]][2,1] 
## [1] 0
l[[1]][4]
## [1] 4
v2 = 3:4      # create another vector 
l[[3]] = v2   # add the newly created vector  

Install libraries

install.packages("ggplot2")
library(ggplot2)

Read in data

?mpg
mpg R Documentation

Fuel economy data from 1999 to 2008 for 38 popular models of cars

Description

This dataset contains a subset of the fuel economy data that the EPA makes available on http://fueleconomy.gov. It contains only models which had a new release every year between 1999 and 2008 - this was used as a proxy for the popularity of the car.

Format

A data frame with 234 rows and 11 variables:

manufacturer

manufacturer name

model

model name

displ

engine displacement, in litres

year

year of manufacture

cyl

number of cylinders

trans

type of transmission

drv

the type of drive train, where f = front-wheel drive, r = rear wheel drive, 4 = 4wd

cty

city miles per gallon

hwy

highway miles per gallon

fl

fuel type

class

“type” of car

data(mpg) 

Summarize data

str(mpg) # structure of the data set 
## tibble [234 x 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...
summary(mpg) # summary of quantitative variables
##  manufacturer          model               displ            year     
##  Length:234         Length:234         Min.   :1.600   Min.   :1999  
##  Class :character   Class :character   1st Qu.:2.400   1st Qu.:1999  
##  Mode  :character   Mode  :character   Median :3.300   Median :2004  
##                                        Mean   :3.472   Mean   :2004  
##                                        3rd Qu.:4.600   3rd Qu.:2008  
##                                        Max.   :7.000   Max.   :2008  
##       cyl           trans               drv                 cty       
##  Min.   :4.000   Length:234         Length:234         Min.   : 9.00  
##  1st Qu.:4.000   Class :character   Class :character   1st Qu.:14.00  
##  Median :6.000   Mode  :character   Mode  :character   Median :17.00  
##  Mean   :5.889                                         Mean   :16.86  
##  3rd Qu.:8.000                                         3rd Qu.:19.00  
##  Max.   :8.000                                         Max.   :35.00  
##       hwy             fl               class          
##  Min.   :12.00   Length:234         Length:234        
##  1st Qu.:18.00   Class :character   Class :character  
##  Median :24.00   Mode  :character   Mode  :character  
##  Mean   :23.44                                        
##  3rd Qu.:27.00                                        
##  Max.   :44.00

Tabulations

  • Obtain tabulations using table()
table(mpg$manufacturer) # manufacturer is categorical variable
## 
##       audi  chevrolet      dodge       ford      honda    hyundai       jeep 
##         18         19         37         25          9         14          8 
## land rover    lincoln    mercury     nissan    pontiac     subaru     toyota 
##          4          3          4         13          5         14         34 
## volkswagen 
##         27
table(mpg$year) 
## 
## 1999 2008 
##  117  117
table(mpg$manufacturer,mpg$year)
##             
##              1999 2008
##   audi          9    9
##   chevrolet     7   12
##   dodge        16   21
##   ford         15   10
##   honda         5    4
##   hyundai       6    8
##   jeep          2    6
##   land rover    2    2
##   lincoln       2    1
##   mercury       2    2
##   nissan        6    7
##   pontiac       3    2
##   subaru        6    8
##   toyota       20   14
##   volkswagen   16   11

Simple Descriptive Stats

  • Compute mean, standard deviation etc.
mean(mpg$displ)
## [1] 3.471795
sd(mpg$displ)
## [1] 1.291959
range(mpg$displ)
## [1] 1.6 7.0

Simple plots

Histogram

hist(mpg$displ, 
     main="Engine displacement (in liters)", #title
     col=terrain.colors(20), #color
     xlim=c(0,8)) #range of x axis 

  • Learn more about how to change the attributes of the plot with ?hist
  • Learn more about using color in R here

Boxplot

boxplot(mpg$displ, main="Engine displacement (in liters)") 

boxplot(mpg$displ~mpg$manufacturer, main = "Engine displacement (in liters) by Manufacturer") 

Barplot

  • Using the output of table() to produce a plot
barplot(table(mpg$manufacturer), 
        main = "Counts by Manufacturer",
        ylim = c(0,40))

  • Version 2 where manufacturer labels are displayed
barplot(table(mpg$manufacturer), 
        main = "Counts by Manufacturer",
        xlim = c(0,40),
        horiz = TRUE, #flip axes
        las=1) #specify horizontal axis labels