MKT500V Introduction to R Day 1: Basics

Sally Chen

8/25/2018

Slides

http://rpubs.com/sallychen/301448

What is R & Rstudio

Course Objectives

Data Classes and Objects

Assign value to object

x1 = 0.02;   #assign values to object
y1 = "0.01"
z1=TRUE;
class(x1);  
## [1] "numeric"
class(y1);
## [1] "character"
class(z1);
## [1] "logical"
X1 = 100;  #Case sensitive x1 != X1
print(X1)
## [1] 100
print(x1)
## [1] 0.02
1x = 200;  # name should not start with number
## Error: <text>:1:2: unexpected symbol
## 1: 1x
##      ^
x1 = 200;  # Overwritten
print(x1)
## [1] 200
2 + "2"
## Error in 2 + "2": non-numeric argument to binary operator

Data Structures

Vector

x = c(1,2,3,4);  #numeric vector
y = c("1","2","hello","R") #character vector
str(x)
##  num [1:4] 1 2 3 4
str(y)
##  chr [1:4] "1" "2" "hello" "R"
numeric(5); # a empty numeric vector of length 5
## [1] 0 0 0 0 0
character(3); # a empty character vector of length 3
## [1] "" "" ""
c(1,"hello",TRUE)   # every element be transformed to character
## [1] "1"     "hello" "TRUE"

Other ways of constructing numeric vectors

a = 3:7; 
print(a);
## [1] 3 4 5 6 7
a = rep(3,4);  # replicate 3 for 4 times
print(a);
## [1] 3 3 3 3

Some functions for vectors & vector calculation

x
## [1] 1 2 3 4
length(x)  # return the length of x
## [1] 4
sd(x);  # return the standard deviation of x
## [1] 1.290994
min(x);   #return the minimal of x
## [1] 1
x+1;  # add 1 on each element
## [1] 2 3 4 5
exp(x)  # take expoential on each element
## [1]  2.718282  7.389056 20.085537 54.598150
x[1] = x[1]+10  # add 1 on the first element
x
## [1] 11  2  3  4
y = c(2,3,4,5)
x
## [1] 11  2  3  4
x*y    # * is by element operation
## [1] 22  6 12 20
sum(x*y) 
## [1] 60

Accesing data from numeric or character vectors

x;
## [1] 11  2  3  4
x[1];
## [1] 11
x[1:3];
## [1] 11  2  3
x[c(1,3)];
## [1] 11  3
x[-3]
## [1] 11  2  4

In-class exercise

Data Type Coercion: Class really matters!

z = c("1","2","3") # create a character vector
class(z)
## [1] "character"
mean(z)  # calculate mean
## Warning in mean.default(z): argument is not numeric or logical: returning
## NA
## [1] NA
is.numeric(z)  # check whether it is numeric
## [1] FALSE
z = as.numeric(z)  # transform data type
z
## [1] 1 2 3
mean(z)
## [1] 2

Missing Values in Vector

x = c(1,4,7,NA,12,19,15,21,20)
mean(x)
## [1] NA
is.na(x)
## [1] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE
mean(x,na.rm = TRUE)  # remove NA from x, then calculate the mean()
## [1] 12.375

Function

myfunction <- function(arg1, arg2, ... ){
statements
return(object)
}

Built-in Function Example

s = seq(from = 1.1, to = 3.5, by = 0.3); 
print(s);
## [1] 1.1 1.4 1.7 2.0 2.3 2.6 2.9 3.2 3.5

Argument Matching: How to call a function?

seq(from = 1.1, to = 3.5, by = 0.3);
## [1] 1.1 1.4 1.7 2.0 2.3 2.6 2.9 3.2 3.5
seq(1.1,3.5,0.3)
## [1] 1.1 1.4 1.7 2.0 2.3 2.6 2.9 3.2 3.5
seq(by = 0.3,from = 1.1, to = 3.5)
## [1] 1.1 1.4 1.7 2.0 2.3 2.6 2.9 3.2 3.5

Default values in functions

e = rnorm(n=10); # 10 valuues from N(0,1) 
print(e)
##  [1]  0.48066306 -0.73348538 -1.23786307 -1.43250440  1.16290540
##  [6]  1.01817997 -0.01531768 -1.97142729  0.80005467 -0.80021797
plot(e)

e1 = rnorm(n=10, mean = 100, sd = 1); # 10 values from N(2,3) print(e);
print(e1)
##  [1]  99.07570 100.44479 100.47408  99.79697 101.49242 100.54867  98.97238
##  [8]  98.09214  99.62634 100.55920
plot(e1)

Constructing a function

simplesum = function(a, b) {  #attributes

s = a+b;  #statement

return(s); }
print(simplesum(a=10,b=20))
## [1] 30

Constructing a function

simpledif = function(a, b) {

s = a-b;

return(s); }
print(simpledif(10,20))
## [1] -10
print(simpledif(20,10))
## [1] 10

Matrix

Example: constructing a matrix

matrix(data=1:6,nrow = 3,ncol = 2);
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
matrix(data=1:6,nrow = 3,ncol = 2,byrow=TRUE);
##      [,1] [,2]
## [1,]    1    2
## [2,]    3    4
## [3,]    5    6
matrix(data=1:6);
##      [,1]
## [1,]    1
## [2,]    2
## [3,]    3
## [4,]    4
## [5,]    5
## [6,]    6
a1 = 1:3;
a2 = 4:6;
cbind(a1,a2);
##      a1 a2
## [1,]  1  4
## [2,]  2  5
## [3,]  3  6
a2 = 4:5  # what if a1 and a2 has different length
cbind(a1,a2)
## Warning in cbind(a1, a2): number of rows of result is not a multiple of
## vector length (arg 2)
##      a1 a2
## [1,]  1  4
## [2,]  2  5
## [3,]  3  4
rbind(a1,a2)
## Warning in rbind(a1, a2): number of columns of result is not a multiple of
## vector length (arg 2)
##    [,1] [,2] [,3]
## a1    1    2    3
## a2    4    5    4
matrix(c(1,2,3,"4",5,"hello")) # matrix() will coerce everything to character
##      [,1]   
## [1,] "1"    
## [2,] "2"    
## [3,] "3"    
## [4,] "4"    
## [5,] "5"    
## [6,] "hello"

Accesing data from matrix objects

a = matrix(1:6,nrow=3,ncol=2) #create a 3*2 matrix
print(a)
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
print(a[1,1]) # access the first cell
## [1] 1
print(a[1,]) # access the first row
## [1] 1 4
print(a[,2]) # access the second column
## [1] 4 5 6
print(a[1:2,1:2]) # access the top 2 row and column
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5

Matrix Calculations

a
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
a+1  #by-element add
##      [,1] [,2]
## [1,]    2    5
## [2,]    3    6
## [3,]    4    7
a*a   # by element multiplication
##      [,1] [,2]
## [1,]    1   16
## [2,]    4   25
## [3,]    9   36
sum(a)
## [1] 21
apply(a,sum,MARGIN = 1)  # caculate sum of each row
## [1] 5 7 9
apply(a,sum,MARGIN = 2) # sum of each column
## [1]  6 15

In class exercise

Data.frame

Example Creating a data.frame object

name = c("Messi","Ronaldo","Neymar")
age = c(30,32,25) 
golden_ball = c(TRUE,TRUE,FALSE)
players = data.frame(name,age,golden_ball)
head(players)
##      name age golden_ball
## 1   Messi  30        TRUE
## 2 Ronaldo  32        TRUE
## 3  Neymar  25       FALSE
golden_ball = c(TRUE,TRUE) # golden ball has one missing value
data.frame(name,age,golden_ball) # data frame do not accept columns of different lengths
## Error in data.frame(name, age, golden_ball): arguments imply differing number of rows: 3, 2
golden_ball = c(TRUE,TRUE,NA) # use NA to indicate missing value 
data.frame(name,age,golden_ball) 
##      name age golden_ball
## 1   Messi  30        TRUE
## 2 Ronaldo  32        TRUE
## 3  Neymar  25          NA
matrix(1:6,nrow=2,ncol=3)
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
as.data.frame(matrix(1:6,nrow=2,ncol=3))
##   V1 V2 V3
## 1  1  3  5
## 2  2  4  6

Accesing data from a data.frame

players[1,]
##    name age golden_ball
## 1 Messi  30        TRUE
players[,1]
## [1] Messi   Ronaldo Neymar 
## Levels: Messi Neymar Ronaldo
players$name
## [1] Messi   Ronaldo Neymar 
## Levels: Messi Neymar Ronaldo
players$name[2]
## [1] Ronaldo
## Levels: Messi Neymar Ronaldo

Operations on data.frame

players$age+2   # add 2 to all the element of the age column
## [1] 32 34 27
players$goals = c(20,30,10)  # add one column
players
##      name age golden_ball goals
## 1   Messi  30        TRUE    20
## 2 Ronaldo  32        TRUE    30
## 3  Neymar  25       FALSE    10
new_player = data.frame(name = "Suarez", age = 30, golden_ball= FALSE, goals = 40) # add a new row to existing data.frame
rbind(players,new_player)
##      name age golden_ball goals
## 1   Messi  30        TRUE    20
## 2 Ronaldo  32        TRUE    30
## 3  Neymar  25       FALSE    10
## 4  Suarez  30       FALSE    40
players<-rbind(players,new_player)

In-class exercise

List

my_list = list(players,c(1,2,3),matrix(1:6))
my_list
## [[1]]
##      name age golden_ball goals
## 1   Messi  30        TRUE    20
## 2 Ronaldo  32        TRUE    30
## 3  Neymar  25       FALSE    10
## 4  Suarez  30       FALSE    40
## 
## [[2]]
## [1] 1 2 3
## 
## [[3]]
##      [,1]
## [1,]    1
## [2,]    2
## [3,]    3
## [4,]    4
## [5,]    5
## [6,]    6
my_list[[1]]
##      name age golden_ball goals
## 1   Messi  30        TRUE    20
## 2 Ronaldo  32        TRUE    30
## 3  Neymar  25       FALSE    10
## 4  Suarez  30       FALSE    40

Factor

direction = c("North","West","North","East","South","West") #create a character vector
direction
## [1] "North" "West"  "North" "East"  "South" "West"
class(direction)
## [1] "character"
summary(direction)
##    Length     Class      Mode 
##         6 character character
factor_direction = factor(direction)  # create a factor of direction
factor_direction
## [1] North West  North East  South West 
## Levels: East North South West
class(factor_direction)
## [1] "factor"
summary(factor_direction)
##  East North South  West 
##     1     2     1     2