R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#The variables can be assigned values using leftward, rightward and equal to operator. 
#The values of the variables can be printed using print() or cat() function. 
#The cat() function combines multiple items into a continuous print output.
# Assignment using equal operator. 
var.1 = c(0,1,2,3) # Assignment using leftward operator. 
var.2 <- c("learn","R") # Assignment using rightward operator. 
c(TRUE,1) -> var.3 
print(var.1) 
## [1] 0 1 2 3
cat ("var.1 is ", var.1 ,"\n") 
## var.1 is  0 1 2 3
cat ("var.2 is ", var.2 ,"\n") 
## var.2 is  learn R
cat ("var.3 is ", var.3 ,"\n")
## var.3 is  1 1
var_x <- "Hello" 
cat("The class of var_x is ",class(var_x),"\n") 
## The class of var_x is  character
var_x <- 34.5 
cat(" Now the class of var_x is ",class(var_x),"\n") 
##  Now the class of var_x is  numeric
var_x <- 27L 
cat(" Next the class of var_x becomes ",class(var_x),"\n")
##  Next the class of var_x becomes  integer

now we run code 2

v <- c( 2,5.5,6) 
t <- c(8, 3, 4) 
print(v+t)
## [1] 10.0  8.5 10.0
v <- c( 2,5.5,6) 
t <- c(8, 3, 4) 
print(v-t)
## [1] -6.0  2.5  2.0
v <- c( 2,5.5,6) 
t <- c(8, 3, 4) 
print(v*t)
## [1] 16.0 16.5 24.0
v <- c( 2,5.5,6) 
t <- c(8, 3, 4) 
print(v/t)
## [1] 0.250000 1.833333 1.500000
v <- c( 2,5.5,6) 
t <- c(8, 3, 4) 
print(v%%t) # gives the remainder of the first divided by the second
## [1] 2.0 2.5 2.0
v <- c( 2,5.5,6) 
t <- c(8, 3, 4) 
print(v%/%t) # the quotient of dividing the first by the second
## [1] 0 1 1
v <- c( 2,5.5,6) 
t <- c(8, 3, 4) 
print(v^t)  # the first raised to the power of the second
## [1]  256.000  166.375 1296.000

Now run code 3

x <- 5
print(x)
## [1] 5
x <- x + 1
print(x)
## [1] 6
# X  # This line is removed since it would cause an error if X is not defined

y <- 6
x <- x + y
print(x)
## [1] 12
x <- "some text"
print(x)
## [1] "some text"
# x <- x + 1  # This line is commented out because it would cause an error

x <- 3.6
print(x)
## [1] 3.6

now we rune code 4

x <- 5
y <- 16
x+y
## [1] 21
#[1] 21
x-y
## [1] -11
#[1] -11
x*y
## [1] 80
#[1] 80
y/x
## [1] 3.2
#[1] 3.2
y%/%x
## [1] 3
#[1] 3
y%%x
## [1] 1
#[1] 1
y^x
## [1] 1048576
#[1] 1048576
X <-9
x<-9
y<-(x-2)%%2 #(remainder from division
y
## [1] 1

now we run code 5ar

v <- c( 2,5.5,6)  
print(v+2)
## [1] 4.0 7.5 8.0
v <- c( 2,5.5,6) 
print(v-2)
## [1] 0.0 3.5 4.0
v <- c( 2,5.5,6) 
print(v*2)
## [1]  4 11 12
v <- c( 2,5.5,6) 
print(v/2)
## [1] 1.00 2.75 3.00

now we rune code5br

x<-25
sqrt(x)
## [1] 5
b<- 12
a1<-3.5
a2<-7.8
x1<-1
x2<-5
y<-b+a1*x1+a2*x2
y
## [1] 54.5

now we run code 6

v1 <- c(3,8,4,5,0,11) 
v2 <- c(4,11) 
v3<-v1+v2
# V2 becomes c(4,11,4,11,4,11) 
v3
## [1]  7 19  8 16  4 22

now we rune code 7

s<-c(1:5)
s
## [1] 1 2 3 4 5
# Create vector with elements from 5 to 9 incrementing by 0.4. 
print(seq(5, 9, by = 0.4))
##  [1] 5.0 5.4 5.8 6.2 6.6 7.0 7.4 7.8 8.2 8.6 9.0
s <- c('apple','red',5,TRUE)
s
## [1] "apple" "red"   "5"     "TRUE"
class(s)
## [1] "character"
t<-c(5, 3, 5, 6)
t
## [1] 5 3 5 6
class(t)
## [1] "numeric"
# Accessing vector elements using position. 
t <- c("Sun","Mon","Tue","Wed","Thurs","Fri","Sat") 

u <- t[c(2,3,6)] 
print(u) 
## [1] "Mon" "Tue" "Fri"
#What will “v <- t[c(-3)]”  result in 
# Accessing vector elements using logical indexing. 
v <- t[c(TRUE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE)] 
print(v) 
## [1] "Sun" "Fri"
# Accessing vector elements using negative indexing. 
x <- t[c(-2,-5)] 
print(x) 
## [1] "Sun" "Tue" "Wed" "Fri" "Sat"
# Accessing vector elements using 0/1 indexing. 
y <- t[c(0,0,0,0,0,0,1)] 
print(y)
## [1] "Sun"

now we rune code 8

# Create a list containing a vector, a matrix and a list. 
list_data <- list(c("Jan","Feb","Mar"), matrix(c(3,9,5,1,-2,8), nrow = 2), list("green",12.3)) 

#Naming parts of list
# Give names to the elements in the list. 
names(list_data) <- c("1st Quarter", "A_Matrix", "A Inner list") 
# Show the list. 
print(list_data)
## $`1st Quarter`
## [1] "Jan" "Feb" "Mar"
## 
## $A_Matrix
##      [,1] [,2] [,3]
## [1,]    3    5   -2
## [2,]    9    1    8
## 
## $`A Inner list`
## $`A Inner list`[[1]]
## [1] "green"
## 
## $`A Inner list`[[2]]
## [1] 12.3
#$`1st_Quarter`
#$A_Matrix
#$A_Inner_list $A_Inner_list[[1]] 
#$A_Inner_list[[2]]

# Create a list containing a vector, a matrix and a list. 
list_data <- list(c("Jan","Feb","Mar"), matrix(c(3,9,5,1,-2,8), nrow = 2), list("green",12.3)) 
# Give names to the elements in the list. 
names(list_data) <- c("1st Quarter", "A_Matrix", "A Inner list") 
# Add element at the end of the list. 
list_data[4] <- "New element" 
print(list_data[4]) 
## [[1]]
## [1] "New element"
# Remove the last element. 
list_data[4] <- NULL 
# Print the 4th Element. 
print(list_data[4]) 
## $<NA>
## NULL
# Update the 3rd Element. 
list_data[3] <- "updated element" 
print(list_data[3])
## $`A Inner list`
## [1] "updated element"
# Create lists. 
list1 <- list(1:5) 
print(list1) 
## [[1]]
## [1] 1 2 3 4 5
list2 <-list(10:14) 
print(list2) 
## [[1]]
## [1] 10 11 12 13 14
# Convert the lists to vectors. 
v1 <- unlist(list1) 
v2 <- unlist(list2) 
print(v1) 
## [1] 1 2 3 4 5
print(v2) 
## [1] 10 11 12 13 14
# Now add the vectors 
result <- v1+v2 
print(result)
## [1] 11 13 15 17 19
# Create a list. 
list1 <- list(c(2,5,3),21.3,sin) 
# Print the list. 
print(list1)
## [[1]]
## [1] 2 5 3
## 
## [[2]]
## [1] 21.3
## 
## [[3]]
## function (x)  .Primitive("sin")
list1[[1]]
## [1] 2 5 3
list2 <- list(c(2,5,3),21.3,sin(30))
print(list2) 
## [[1]]
## [1] 2 5 3
## 
## [[2]]
## [1] 21.3
## 
## [[3]]
## [1] -0.9880316
list2[[1]][[2]]
## [1] 5
list3<- list(list1, list2)
list3
## [[1]]
## [[1]][[1]]
## [1] 2 5 3
## 
## [[1]][[2]]
## [1] 21.3
## 
## [[1]][[3]]
## function (x)  .Primitive("sin")
## 
## 
## [[2]]
## [[2]][[1]]
## [1] 2 5 3
## 
## [[2]][[2]]
## [1] 21.3
## 
## [[2]][[3]]
## [1] -0.9880316
list3[[1]][[1]]
## [1] 2 5 3
str(list3)
## List of 2
##  $ :List of 3
##   ..$ : num [1:3] 2 5 3
##   ..$ : num 21.3
##   ..$ :function (x)  
##  $ :List of 3
##   ..$ : num [1:3] 2 5 3
##   ..$ : num 21.3
##   ..$ : num -0.988
class(list3)
## [1] "list"

Now we run code 9ar

M = matrix( c('a','a','b','c','b','a'), nrow = 2, ncol = 3, byrow = TRUE) 
# Create a matrix. 
print(M)
##      [,1] [,2] [,3]
## [1,] "a"  "a"  "b" 
## [2,] "c"  "b"  "a"
a <- array(c('green','yellow'),dim = c(3,3,2))   # Create an array.
print(a)
## , , 1
## 
##      [,1]     [,2]     [,3]    
## [1,] "green"  "yellow" "green" 
## [2,] "yellow" "green"  "yellow"
## [3,] "green"  "yellow" "green" 
## 
## , , 2
## 
##      [,1]     [,2]     [,3]    
## [1,] "yellow" "green"  "yellow"
## [2,] "green"  "yellow" "green" 
## [3,] "yellow" "green"  "yellow"
b <- array(c(1:20),dim = c(4,5))
print(b)
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    5    9   13   17
## [2,]    2    6   10   14   18
## [3,]    3    7   11   15   19
## [4,]    4    8   12   16   20
c <- array(c(1:20),dim = c(2,5))
print(c)
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    3    5    7    9
## [2,]    2    4    6    8   10
c <- array(c(1:20),dim = c(7,5))
print(c)
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    8   15    2    9
## [2,]    2    9   16    3   10
## [3,]    3   10   17    4   11
## [4,]    4   11   18    5   12
## [5,]    5   12   19    6   13
## [6,]    6   13   20    7   14
## [7,]    7   14    1    8   15
d<- b+10   # what is d
d
##      [,1] [,2] [,3] [,4] [,5]
## [1,]   11   15   19   23   27
## [2,]   12   16   20   24   28
## [3,]   13   17   21   25   29
## [4,]   14   18   22   26   30
e <- array(c(1:10),dim = c(5,2))   # what is e
f <- b %*% e   #what is f
f
##      [,1] [,2]
## [1,]  175  400
## [2,]  190  440
## [3,]  205  480
## [4,]  220  520
# Make some data
a = c(1,2,3)
b = c(2,4,6)
c = cbind(a,b) # a function to combine two columns
c
##      a b
## [1,] 1 2
## [2,] 2 4
## [3,] 3 6
str(c)
##  num [1:3, 1:2] 1 2 3 2 4 6
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:2] "a" "b"
x = c(2,2,2)
a*b
## [1]  2  8 18
b*a
## [1]  2  8 18
# This works (matrix multiplication)
x%*%c
##       a  b
## [1,] 12 24

now we run code 9br

#Arrays – any number of dimensions
# Create an array. 
a <- array(c('green','yellow'),dim = c(3,3,2)) 
print(a)
## , , 1
## 
##      [,1]     [,2]     [,3]    
## [1,] "green"  "yellow" "green" 
## [2,] "yellow" "green"  "yellow"
## [3,] "green"  "yellow" "green" 
## 
## , , 2
## 
##      [,1]     [,2]     [,3]    
## [1,] "yellow" "green"  "yellow"
## [2,] "green"  "yellow" "green" 
## [3,] "yellow" "green"  "yellow"
# Create two vectors of different lengths. 
vector1 <- c(5,9,3) 
vector2 <- c(10,11,12,13,14,15) 
column.names <- c("COL1","COL2","COL3") 
row.names <- c("ROW1","ROW2","ROW3") 
matrix.names <- c("Matrix1","Matrix2") 
# Take these vectors as input to the array. 
result <- array(c(vector1,vector2),dim = c(3,3,2),dimnames = list(row.names,column.names, matrix.names)) 

print(result)
## , , Matrix1
## 
##      COL1 COL2 COL3
## ROW1    5   10   13
## ROW2    9   11   14
## ROW3    3   12   15
## 
## , , Matrix2
## 
##      COL1 COL2 COL3
## ROW1    5   10   13
## ROW2    9   11   14
## ROW3    3   12   15
# Elements are arranged sequentially by row. 
M <- matrix(c(3:14), nrow = 4, byrow = TRUE)  #values from 3 to 14
print(M) 
##      [,1] [,2] [,3]
## [1,]    3    4    5
## [2,]    6    7    8
## [3,]    9   10   11
## [4,]   12   13   14
# Elements are arranged sequentially by column. 
N <- matrix(c(3:14), nrow = 4, byrow = FALSE) 
print(N) 
##      [,1] [,2] [,3]
## [1,]    3    7   11
## [2,]    4    8   12
## [3,]    5    9   13
## [4,]    6   10   14
# Define the column and row names. 
rownames = c("row1", "row2", "row3", "row4") 
colnames = c("col1", "col2", "col3") 
P <- matrix(c(3:14), nrow = 4, byrow = TRUE, dimnames = list(rownames, colnames)) 
print(P)
##      col1 col2 col3
## row1    3    4    5
## row2    6    7    8
## row3    9   10   11
## row4   12   13   14
#What will “rownames(M) = rownames”  do?
  

# Define the column and row names. 
rownames = c("row1", "row2", "row3", "row4") 
colnames = c("col1", "col2", "col3") 
# Create the matrix. 
P <- matrix(c(3:14), nrow = 4, byrow = TRUE, dimnames = list(rownames, colnames)) 
# Access the element at 3rd column and 1st row. 
P
##      col1 col2 col3
## row1    3    4    5
## row2    6    7    8
## row3    9   10   11
## row4   12   13   14
print(P[1,3]) 
## [1] 5
# Access the element at 2nd column and 4th row. 
print(P[4,2]) 
## [1] 13
# Access only the 2nd row. 
print(P[2,]) 
## col1 col2 col3 
##    6    7    8
# Access only the 3rd column. 
print(P[,3])
## row1 row2 row3 row4 
##    5    8   11   14
#What will:    P[,"col3"]    do
P[,"col3"]
## row1 row2 row3 row4 
##    5    8   11   14
#Syntax:  apply(aray, rowcol, function)
#aray – the array, rowcol – which order by rows (1), by columns (2), both (1,2)
# Create two vectors of different lengths. 
vector1 <- c(5,9,3) 
vector2 <- c(10,11,12,13,14,15) 
# Take these vectors as input to the array. 
new.array <- array(c(vector1,vector2),dim = c(3,3,2)) 
print(new.array) 
## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    5   10   13
## [2,]    9   11   14
## [3,]    3   12   15
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]    5   10   13
## [2,]    9   11   14
## [3,]    3   12   15
# Use apply to calculate the sum of the rows across all the matrices. 
result <- apply(new.array, c(1), sum) 
print(result) # Use apply to calculate the sum of the columns across all the matrices. 
## [1] 56 68 60
result <- apply(new.array, c(2), sum) 
print(result)
## [1] 34 66 84

now we run code 10

#Create using the data.frame function
# Create the data frame. 
BMI <- data.frame( gender = c("Male", "Male","Female"),  height = c(152, 171.5, 165), weight = c(81,93, 78), Age = c(42,38,26) )
print(BMI)
##   gender height weight Age
## 1   Male  152.0     81  42
## 2   Male  171.5     93  38
## 3 Female  165.0     78  26
# Create the data frame. 
emp.data <- data.frame( emp_id = c (1:5), emp_name = c("Rick","Dan","Michelle","Ryan","Gary"), salary = c(623.3,515.2,611.0,729.0,843.25), start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11", "2015-03-27")), stringsAsFactors = FALSE ) 
# Print the data frame. 
print(emp.data) 
##   emp_id emp_name salary start_date
## 1      1     Rick 623.30 2012-01-01
## 2      2      Dan 515.20 2013-09-23
## 3      3 Michelle 611.00 2014-11-15
## 4      4     Ryan 729.00 2014-05-11
## 5      5     Gary 843.25 2015-03-27
#structure of the data frame
str(emp.data)
## 'data.frame':    5 obs. of  4 variables:
##  $ emp_id    : int  1 2 3 4 5
##  $ emp_name  : chr  "Rick" "Dan" "Michelle" "Ryan" ...
##  $ salary    : num  623 515 611 729 843
##  $ start_date: Date, format: "2012-01-01" "2013-09-23" ...
# Print the summary – get statistical summaries
print(summary(emp.data)) 
##      emp_id    emp_name             salary        start_date        
##  Min.   :1   Length:5           Min.   :515.2   Min.   :2012-01-01  
##  1st Qu.:2   Class :character   1st Qu.:611.0   1st Qu.:2013-09-23  
##  Median :3   Mode  :character   Median :623.3   Median :2014-05-11  
##  Mean   :3                      Mean   :664.4   Mean   :2014-01-14  
##  3rd Qu.:4                      3rd Qu.:729.0   3rd Qu.:2014-11-15  
##  Max.   :5                      Max.   :843.2   Max.   :2015-03-27
# Extract from the data frame - here specific columns – notice the naming convention – use names
result <- data.frame(emp.data$emp_name,emp.data$salary) 
print(result)
##   emp.data.emp_name emp.data.salary
## 1              Rick          623.30
## 2               Dan          515.20
## 3          Michelle          611.00
## 4              Ryan          729.00
## 5              Gary          843.25
str(result)
## 'data.frame':    5 obs. of  2 variables:
##  $ emp.data.emp_name: chr  "Rick" "Dan" "Michelle" "Ryan" ...
##  $ emp.data.salary  : num  623 515 611 729 843
# Extract first two rows (all columns). – use row numbers instead of names
result <- emp.data[1:2,] 
print(result)
##   emp_id emp_name salary start_date
## 1      1     Rick  623.3 2012-01-01
## 2      2      Dan  515.2 2013-09-23
#Add a column to a data frame
# Add the "dept" column. 
emp.data$dept <- c("IT","Operations","IT","HR","Finance") 
v <- emp.data 
print(v)
##   emp_id emp_name salary start_date       dept
## 1      1     Rick 623.30 2012-01-01         IT
## 2      2      Dan 515.20 2013-09-23 Operations
## 3      3 Michelle 611.00 2014-11-15         IT
## 4      4     Ryan 729.00 2014-05-11         HR
## 5      5     Gary 843.25 2015-03-27    Finance
# Create the first data frame. 
emp.data <- data.frame( emp_id = c (1:5), emp_name = c("Rick","Dan","Michelle","Ryan","Gary"), salary = c(623.3,515.2,611.0,729.0,843.25), start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11", "2015-03-27")), dept = c("IT","Operations","IT","HR","Finance"), stringsAsFactors = FALSE ) 
# Create the second data frame 
emp.newdata <- data.frame( emp_id = c (6:8), emp_name = c("Rasmi","Pranab","Tusar"), salary = c(578.0,722.5,632.8), start_date = as.Date(c("2013-05-21","2013-07-30","2014-06-17")), dept = c("IT","Operations","Fianance"), stringsAsFactors = FALSE ) 
# Bind the two data frames – add rows
emp.finaldata <- rbind(emp.data,emp.newdata) 
print(emp.finaldata)
##   emp_id emp_name salary start_date       dept
## 1      1     Rick 623.30 2012-01-01         IT
## 2      2      Dan 515.20 2013-09-23 Operations
## 3      3 Michelle 611.00 2014-11-15         IT
## 4      4     Ryan 729.00 2014-05-11         HR
## 5      5     Gary 843.25 2015-03-27    Finance
## 6      6    Rasmi 578.00 2013-05-21         IT
## 7      7   Pranab 722.50 2013-07-30 Operations
## 8      8    Tusar 632.80 2014-06-17   Fianance

now we run code 11

#factors
# Create a vector as input. 
data <- c("East","West","East","North","North","East","West", "West","West","East","North")
print(data) 
##  [1] "East"  "West"  "East"  "North" "North" "East"  "West"  "West"  "West" 
## [10] "East"  "North"
print(is.factor(data)) 
## [1] FALSE
# Apply the factor function. 
factor_data <- factor(data) 
print(factor_data) 
##  [1] East  West  East  North North East  West  West  West  East  North
## Levels: East North West
print(is.factor(factor_data))
## [1] TRUE
data <- c("East","West","East","North","North","East","West", "West","West","East","North") 
# Create the factors 
factor_data <- factor(data) 

print(factor_data) 
##  [1] East  West  East  North North East  West  West  West  East  North
## Levels: East North West
# Apply the factor function with required order of the level. 
new_order_data <- factor(factor_data,levels = c("East","West","North")) 
print(new_order_data)
##  [1] East  West  East  North North East  West  West  West  East  North
## Levels: East West North
#Creating any data frame with a column of text data, 
#R treats the text column as categorical data and creates factors on it.
# Create the vectors for data frame. 
height <- c(132,151,162,139,166,147,122) 
weight <- c(48,49,66,53,67,52,40) 
gender <- c("male","male","female","female","male","female","male") 

# Create the data frame. 
input_data <- data.frame(height,weight,factor(gender))
print(input_data) 
##   height weight factor.gender.
## 1    132     48           male
## 2    151     49           male
## 3    162     66         female
## 4    139     53         female
## 5    166     67           male
## 6    147     52         female
## 7    122     40           male
str(input_data)
## 'data.frame':    7 obs. of  3 variables:
##  $ height        : num  132 151 162 139 166 147 122
##  $ weight        : num  48 49 66 53 67 52 40
##  $ factor.gender.: Factor w/ 2 levels "female","male": 2 2 1 1 2 1 2
# Test if the gender column is a factor. 
print(is.factor(input_data$gender))
## [1] FALSE
# Print the gender column so see the levels. 
print(input_data$gender)
## NULL