exx1

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#The variables can be assigned values using leftward, rightward and equal to operator. 
#The values of the variables can be printed using print() or cat() function. 
#The cat() function combines multiple items into a continuous print output.
# Assignment using equal operator. 
var.1 = c(0,1,2,3) # Assignment using leftward operator. 
var.2 <- c("learn","R") # Assignment using rightward operator. 
c(TRUE,1) -> var.3 
print(var.1)

## [1] 0 1 2 3

cat ("var.1 is ", var.1 ,"\n")

## var.1 is  0 1 2 3

cat ("var.2 is ", var.2 ,"\n")

## var.2 is  learn R

cat ("var.3 is ", var.3 ,"\n")

## var.3 is  1 1

var_x <- "Hello" 
cat("The class of var_x is ",class(var_x),"\n")

## The class of var_x is  character

var_x <- 34.5 
cat(" Now the class of var_x is ",class(var_x),"\n")

##  Now the class of var_x is  numeric

var_x <- 27L 
cat(" Next the class of var_x becomes ",class(var_x),"\n")

##  Next the class of var_x becomes  integer

now we run code 2

v <- c( 2,5.5,6) 
t <- c(8, 3, 4) 
print(v+t)

## [1] 10.0  8.5 10.0

v <- c( 2,5.5,6) 
t <- c(8, 3, 4) 
print(v-t)

## [1] -6.0  2.5  2.0

v <- c( 2,5.5,6) 
t <- c(8, 3, 4) 
print(v*t)

## [1] 16.0 16.5 24.0

v <- c( 2,5.5,6) 
t <- c(8, 3, 4) 
print(v/t)

## [1] 0.250000 1.833333 1.500000

v <- c( 2,5.5,6) 
t <- c(8, 3, 4) 
print(v%%t) # gives the remainder of the first divided by the second

## [1] 2.0 2.5 2.0

v <- c( 2,5.5,6) 
t <- c(8, 3, 4) 
print(v%/%t) # the quotient of dividing the first by the second

## [1] 0 1 1

v <- c( 2,5.5,6) 
t <- c(8, 3, 4) 
print(v^t)  # the first raised to the power of the second

## [1]  256.000  166.375 1296.000

Now run code 3

x <- 5
print(x)

## [1] 5

x <- x + 1
print(x)

## [1] 6

# X  # This line is removed since it would cause an error if X is not defined

y <- 6
x <- x + y
print(x)

## [1] 12

x <- "some text"
print(x)

## [1] "some text"

# x <- x + 1  # This line is commented out because it would cause an error

x <- 3.6
print(x)

## [1] 3.6

now we rune code 4

x <- 5
y <- 16
x+y

## [1] 21

#[1] 21
x-y

## [1] -11

#[1] -11
x*y

## [1] 80

#[1] 80
y/x

## [1] 3.2

#[1] 3.2
y%/%x

## [1] 3

#[1] 3
y%%x

## [1] 1

#[1] 1
y^x

## [1] 1048576

#[1] 1048576
X <-9
x<-9
y<-(x-2)%%2 #(remainder from division
y

## [1] 1

now we run code 5ar

v <- c( 2,5.5,6)  
print(v+2)

## [1] 4.0 7.5 8.0

v <- c( 2,5.5,6) 
print(v-2)

## [1] 0.0 3.5 4.0

v <- c( 2,5.5,6) 
print(v*2)

## [1]  4 11 12

v <- c( 2,5.5,6) 
print(v/2)

## [1] 1.00 2.75 3.00

now we rune code5br

x<-25
sqrt(x)

## [1] 5

b<- 12
a1<-3.5
a2<-7.8
x1<-1
x2<-5
y<-b+a1*x1+a2*x2
y

## [1] 54.5

now we run code 6

v1 <- c(3,8,4,5,0,11) 
v2 <- c(4,11) 
v3<-v1+v2
# V2 becomes c(4,11,4,11,4,11) 
v3

## [1]  7 19  8 16  4 22

now we rune code 7

s<-c(1:5)
s

## [1] 1 2 3 4 5

# Create vector with elements from 5 to 9 incrementing by 0.4. 
print(seq(5, 9, by = 0.4))

##  [1] 5.0 5.4 5.8 6.2 6.6 7.0 7.4 7.8 8.2 8.6 9.0

s <- c('apple','red',5,TRUE)
s

## [1] "apple" "red"   "5"     "TRUE"

class(s)

## [1] "character"

t<-c(5, 3, 5, 6)
t

## [1] 5 3 5 6

class(t)

## [1] "numeric"

# Accessing vector elements using position. 
t <- c("Sun","Mon","Tue","Wed","Thurs","Fri","Sat") 

u <- t[c(2,3,6)] 
print(u)

## [1] "Mon" "Tue" "Fri"

#What will “v <- t[c(-3)]”  result in 
# Accessing vector elements using logical indexing. 
v <- t[c(TRUE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE)] 
print(v)

## [1] "Sun" "Fri"

# Accessing vector elements using negative indexing. 
x <- t[c(-2,-5)] 
print(x)

## [1] "Sun" "Tue" "Wed" "Fri" "Sat"

# Accessing vector elements using 0/1 indexing. 
y <- t[c(0,0,0,0,0,0,1)] 
print(y)

## [1] "Sun"

now we rune code 8

# Create a list containing a vector, a matrix and a list. 
list_data <- list(c("Jan","Feb","Mar"), matrix(c(3,9,5,1,-2,8), nrow = 2), list("green",12.3)) 

#Naming parts of list
# Give names to the elements in the list. 
names(list_data) <- c("1st Quarter", "A_Matrix", "A Inner list") 
# Show the list. 
print(list_data)

## $`1st Quarter`
## [1] "Jan" "Feb" "Mar"
## 
## $A_Matrix
##      [,1] [,2] [,3]
## [1,]    3    5   -2
## [2,]    9    1    8
## 
## $`A Inner list`
## $`A Inner list`[[1]]
## [1] "green"
## 
## $`A Inner list`[[2]]
## [1] 12.3

#$`1st_Quarter`
#$A_Matrix
#$A_Inner_list $A_Inner_list[[1]] 
#$A_Inner_list[[2]]

# Create a list containing a vector, a matrix and a list. 
list_data <- list(c("Jan","Feb","Mar"), matrix(c(3,9,5,1,-2,8), nrow = 2), list("green",12.3)) 
# Give names to the elements in the list. 
names(list_data) <- c("1st Quarter", "A_Matrix", "A Inner list") 
# Add element at the end of the list. 
list_data[4] <- "New element" 
print(list_data[4])

## [[1]]
## [1] "New element"

# Remove the last element. 
list_data[4] <- NULL 
# Print the 4th Element. 
print(list_data[4])

## $<NA>
## NULL

# Update the 3rd Element. 
list_data[3] <- "updated element" 
print(list_data[3])

## $`A Inner list`
## [1] "updated element"

# Create lists. 
list1 <- list(1:5) 
print(list1)

## [[1]]
## [1] 1 2 3 4 5

list2 <-list(10:14) 
print(list2)

## [[1]]
## [1] 10 11 12 13 14

# Convert the lists to vectors. 
v1 <- unlist(list1) 
v2 <- unlist(list2) 
print(v1)

## [1] 1 2 3 4 5

print(v2)

## [1] 10 11 12 13 14

# Now add the vectors 
result <- v1+v2 
print(result)

## [1] 11 13 15 17 19

# Create a list. 
list1 <- list(c(2,5,3),21.3,sin) 
# Print the list. 
print(list1)

## [[1]]
## [1] 2 5 3
## 
## [[2]]
## [1] 21.3
## 
## [[3]]
## function (x)  .Primitive("sin")

list1[[1]]

## [1] 2 5 3

list2 <- list(c(2,5,3),21.3,sin(30))
print(list2)

## [[1]]
## [1] 2 5 3
## 
## [[2]]
## [1] 21.3
## 
## [[3]]
## [1] -0.9880316

list2[[1]][[2]]

## [1] 5

list3<- list(list1, list2)
list3

## [[1]]
## [[1]][[1]]
## [1] 2 5 3
## 
## [[1]][[2]]
## [1] 21.3
## 
## [[1]][[3]]
## function (x)  .Primitive("sin")
## 
## 
## [[2]]
## [[2]][[1]]
## [1] 2 5 3
## 
## [[2]][[2]]
## [1] 21.3
## 
## [[2]][[3]]
## [1] -0.9880316

list3[[1]][[1]]

## [1] 2 5 3

str(list3)

## List of 2
##  $ :List of 3
##   ..$ : num [1:3] 2 5 3
##   ..$ : num 21.3
##   ..$ :function (x)  
##  $ :List of 3
##   ..$ : num [1:3] 2 5 3
##   ..$ : num 21.3
##   ..$ : num -0.988

class(list3)

## [1] "list"

Now we run code 9ar

M = matrix( c('a','a','b','c','b','a'), nrow = 2, ncol = 3, byrow = TRUE) 
# Create a matrix. 
print(M)

##      [,1] [,2] [,3]
## [1,] "a"  "a"  "b" 
## [2,] "c"  "b"  "a"

a <- array(c('green','yellow'),dim = c(3,3,2))   # Create an array.
print(a)

## , , 1
## 
##      [,1]     [,2]     [,3]    
## [1,] "green"  "yellow" "green" 
## [2,] "yellow" "green"  "yellow"
## [3,] "green"  "yellow" "green" 
## 
## , , 2
## 
##      [,1]     [,2]     [,3]    
## [1,] "yellow" "green"  "yellow"
## [2,] "green"  "yellow" "green" 
## [3,] "yellow" "green"  "yellow"

b <- array(c(1:20),dim = c(4,5))
print(b)

##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    5    9   13   17
## [2,]    2    6   10   14   18
## [3,]    3    7   11   15   19
## [4,]    4    8   12   16   20

c <- array(c(1:20),dim = c(2,5))
print(c)

##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    3    5    7    9
## [2,]    2    4    6    8   10

c <- array(c(1:20),dim = c(7,5))
print(c)

##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    8   15    2    9
## [2,]    2    9   16    3   10
## [3,]    3   10   17    4   11
## [4,]    4   11   18    5   12
## [5,]    5   12   19    6   13
## [6,]    6   13   20    7   14
## [7,]    7   14    1    8   15

d<- b+10   # what is d
d

##      [,1] [,2] [,3] [,4] [,5]
## [1,]   11   15   19   23   27
## [2,]   12   16   20   24   28
## [3,]   13   17   21   25   29
## [4,]   14   18   22   26   30

e <- array(c(1:10),dim = c(5,2))   # what is e
f <- b %*% e   #what is f
f

##      [,1] [,2]
## [1,]  175  400
## [2,]  190  440
## [3,]  205  480
## [4,]  220  520

# Make some data
a = c(1,2,3)
b = c(2,4,6)
c = cbind(a,b) # a function to combine two columns
c

##      a b
## [1,] 1 2
## [2,] 2 4
## [3,] 3 6

str(c)

##  num [1:3, 1:2] 1 2 3 2 4 6
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:2] "a" "b"

x = c(2,2,2)
a*b

## [1]  2  8 18

b*a

## [1]  2  8 18

# This works (matrix multiplication)
x%*%c

##       a  b
## [1,] 12 24

now we run code 9br

#Arrays – any number of dimensions
# Create an array. 
a <- array(c('green','yellow'),dim = c(3,3,2)) 
print(a)

## , , 1
## 
##      [,1]     [,2]     [,3]    
## [1,] "green"  "yellow" "green" 
## [2,] "yellow" "green"  "yellow"
## [3,] "green"  "yellow" "green" 
## 
## , , 2
## 
##      [,1]     [,2]     [,3]    
## [1,] "yellow" "green"  "yellow"
## [2,] "green"  "yellow" "green" 
## [3,] "yellow" "green"  "yellow"

# Create two vectors of different lengths. 
vector1 <- c(5,9,3) 
vector2 <- c(10,11,12,13,14,15) 
column.names <- c("COL1","COL2","COL3") 
row.names <- c("ROW1","ROW2","ROW3") 
matrix.names <- c("Matrix1","Matrix2") 
# Take these vectors as input to the array. 
result <- array(c(vector1,vector2),dim = c(3,3,2),dimnames = list(row.names,column.names, matrix.names)) 

print(result)

## , , Matrix1
## 
##      COL1 COL2 COL3
## ROW1    5   10   13
## ROW2    9   11   14
## ROW3    3   12   15
## 
## , , Matrix2
## 
##      COL1 COL2 COL3
## ROW1    5   10   13
## ROW2    9   11   14
## ROW3    3   12   15

# Elements are arranged sequentially by row. 
M <- matrix(c(3:14), nrow = 4, byrow = TRUE)  #values from 3 to 14
print(M)

##      [,1] [,2] [,3]
## [1,]    3    4    5
## [2,]    6    7    8
## [3,]    9   10   11
## [4,]   12   13   14

# Elements are arranged sequentially by column. 
N <- matrix(c(3:14), nrow = 4, byrow = FALSE) 
print(N)

##      [,1] [,2] [,3]
## [1,]    3    7   11
## [2,]    4    8   12
## [3,]    5    9   13
## [4,]    6   10   14

# Define the column and row names. 
rownames = c("row1", "row2", "row3", "row4") 
colnames = c("col1", "col2", "col3") 
P <- matrix(c(3:14), nrow = 4, byrow = TRUE, dimnames = list(rownames, colnames)) 
print(P)

##      col1 col2 col3
## row1    3    4    5
## row2    6    7    8
## row3    9   10   11
## row4   12   13   14

#What will “rownames(M) = rownames”  do?
  

# Define the column and row names. 
rownames = c("row1", "row2", "row3", "row4") 
colnames = c("col1", "col2", "col3") 
# Create the matrix. 
P <- matrix(c(3:14), nrow = 4, byrow = TRUE, dimnames = list(rownames, colnames)) 
# Access the element at 3rd column and 1st row. 
P

##      col1 col2 col3
## row1    3    4    5
## row2    6    7    8
## row3    9   10   11
## row4   12   13   14

print(P[1,3])

## [1] 5

# Access the element at 2nd column and 4th row. 
print(P[4,2])

## [1] 13

# Access only the 2nd row. 
print(P[2,])

## col1 col2 col3 
##    6    7    8

# Access only the 3rd column. 
print(P[,3])

## row1 row2 row3 row4 
##    5    8   11   14

#What will:    P[,"col3"]    do
P[,"col3"]

## row1 row2 row3 row4 
##    5    8   11   14

#Syntax:  apply(aray, rowcol, function)
#aray – the array, rowcol – which order by rows (1), by columns (2), both (1,2)
# Create two vectors of different lengths. 
vector1 <- c(5,9,3) 
vector2 <- c(10,11,12,13,14,15) 
# Take these vectors as input to the array. 
new.array <- array(c(vector1,vector2),dim = c(3,3,2)) 
print(new.array)

## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    5   10   13
## [2,]    9   11   14
## [3,]    3   12   15
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]    5   10   13
## [2,]    9   11   14
## [3,]    3   12   15

# Use apply to calculate the sum of the rows across all the matrices. 
result <- apply(new.array, c(1), sum) 
print(result) # Use apply to calculate the sum of the columns across all the matrices.

## [1] 56 68 60

result <- apply(new.array, c(2), sum) 
print(result)

## [1] 34 66 84

now we run code 10

#Create using the data.frame function
# Create the data frame. 
BMI <- data.frame( gender = c("Male", "Male","Female"),  height = c(152, 171.5, 165), weight = c(81,93, 78), Age = c(42,38,26) )
print(BMI)

##   gender height weight Age
## 1   Male  152.0     81  42
## 2   Male  171.5     93  38
## 3 Female  165.0     78  26

# Create the data frame. 
emp.data <- data.frame( emp_id = c (1:5), emp_name = c("Rick","Dan","Michelle","Ryan","Gary"), salary = c(623.3,515.2,611.0,729.0,843.25), start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11", "2015-03-27")), stringsAsFactors = FALSE ) 
# Print the data frame. 
print(emp.data)

##   emp_id emp_name salary start_date
## 1      1     Rick 623.30 2012-01-01
## 2      2      Dan 515.20 2013-09-23
## 3      3 Michelle 611.00 2014-11-15
## 4      4     Ryan 729.00 2014-05-11
## 5      5     Gary 843.25 2015-03-27

#structure of the data frame
str(emp.data)

## 'data.frame':    5 obs. of  4 variables:
##  $ emp_id    : int  1 2 3 4 5
##  $ emp_name  : chr  "Rick" "Dan" "Michelle" "Ryan" ...
##  $ salary    : num  623 515 611 729 843
##  $ start_date: Date, format: "2012-01-01" "2013-09-23" ...

# Print the summary – get statistical summaries
print(summary(emp.data))

##      emp_id    emp_name             salary        start_date        
##  Min.   :1   Length:5           Min.   :515.2   Min.   :2012-01-01  
##  1st Qu.:2   Class :character   1st Qu.:611.0   1st Qu.:2013-09-23  
##  Median :3   Mode  :character   Median :623.3   Median :2014-05-11  
##  Mean   :3                      Mean   :664.4   Mean   :2014-01-14  
##  3rd Qu.:4                      3rd Qu.:729.0   3rd Qu.:2014-11-15  
##  Max.   :5                      Max.   :843.2   Max.   :2015-03-27

# Extract from the data frame - here specific columns – notice the naming convention – use names
result <- data.frame(emp.data$emp_name,emp.data$salary) 
print(result)

##   emp.data.emp_name emp.data.salary
## 1              Rick          623.30
## 2               Dan          515.20
## 3          Michelle          611.00
## 4              Ryan          729.00
## 5              Gary          843.25

str(result)

## 'data.frame':    5 obs. of  2 variables:
##  $ emp.data.emp_name: chr  "Rick" "Dan" "Michelle" "Ryan" ...
##  $ emp.data.salary  : num  623 515 611 729 843

# Extract first two rows (all columns). – use row numbers instead of names
result <- emp.data[1:2,] 
print(result)

##   emp_id emp_name salary start_date
## 1      1     Rick  623.3 2012-01-01
## 2      2      Dan  515.2 2013-09-23

#Add a column to a data frame
# Add the "dept" column. 
emp.data$dept <- c("IT","Operations","IT","HR","Finance") 
v <- emp.data 
print(v)

##   emp_id emp_name salary start_date       dept
## 1      1     Rick 623.30 2012-01-01         IT
## 2      2      Dan 515.20 2013-09-23 Operations
## 3      3 Michelle 611.00 2014-11-15         IT
## 4      4     Ryan 729.00 2014-05-11         HR
## 5      5     Gary 843.25 2015-03-27    Finance

# Create the first data frame. 
emp.data <- data.frame( emp_id = c (1:5), emp_name = c("Rick","Dan","Michelle","Ryan","Gary"), salary = c(623.3,515.2,611.0,729.0,843.25), start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11", "2015-03-27")), dept = c("IT","Operations","IT","HR","Finance"), stringsAsFactors = FALSE ) 
# Create the second data frame 
emp.newdata <- data.frame( emp_id = c (6:8), emp_name = c("Rasmi","Pranab","Tusar"), salary = c(578.0,722.5,632.8), start_date = as.Date(c("2013-05-21","2013-07-30","2014-06-17")), dept = c("IT","Operations","Fianance"), stringsAsFactors = FALSE ) 
# Bind the two data frames – add rows
emp.finaldata <- rbind(emp.data,emp.newdata) 
print(emp.finaldata)

##   emp_id emp_name salary start_date       dept
## 1      1     Rick 623.30 2012-01-01         IT
## 2      2      Dan 515.20 2013-09-23 Operations
## 3      3 Michelle 611.00 2014-11-15         IT
## 4      4     Ryan 729.00 2014-05-11         HR
## 5      5     Gary 843.25 2015-03-27    Finance
## 6      6    Rasmi 578.00 2013-05-21         IT
## 7      7   Pranab 722.50 2013-07-30 Operations
## 8      8    Tusar 632.80 2014-06-17   Fianance

now we run code 11

#factors
# Create a vector as input. 
data <- c("East","West","East","North","North","East","West", "West","West","East","North")
print(data)

##  [1] "East"  "West"  "East"  "North" "North" "East"  "West"  "West"  "West" 
## [10] "East"  "North"

print(is.factor(data))

## [1] FALSE

# Apply the factor function. 
factor_data <- factor(data) 
print(factor_data)

##  [1] East  West  East  North North East  West  West  West  East  North
## Levels: East North West

print(is.factor(factor_data))

## [1] TRUE

data <- c("East","West","East","North","North","East","West", "West","West","East","North") 
# Create the factors 
factor_data <- factor(data) 

print(factor_data)

##  [1] East  West  East  North North East  West  West  West  East  North
## Levels: East North West

# Apply the factor function with required order of the level. 
new_order_data <- factor(factor_data,levels = c("East","West","North")) 
print(new_order_data)

##  [1] East  West  East  North North East  West  West  West  East  North
## Levels: East West North

#Creating any data frame with a column of text data, 
#R treats the text column as categorical data and creates factors on it.
# Create the vectors for data frame. 
height <- c(132,151,162,139,166,147,122) 
weight <- c(48,49,66,53,67,52,40) 
gender <- c("male","male","female","female","male","female","male") 

# Create the data frame. 
input_data <- data.frame(height,weight,factor(gender))
print(input_data)

##   height weight factor.gender.
## 1    132     48           male
## 2    151     49           male
## 3    162     66         female
## 4    139     53         female
## 5    166     67           male
## 6    147     52         female
## 7    122     40           male

str(input_data)

## 'data.frame':    7 obs. of  3 variables:
##  $ height        : num  132 151 162 139 166 147 122
##  $ weight        : num  48 49 66 53 67 52 40
##  $ factor.gender.: Factor w/ 2 levels "female","male": 2 2 1 1 2 1 2

# Test if the gender column is a factor. 
print(is.factor(input_data$gender))

## [1] FALSE

# Print the gender column so see the levels. 
print(input_data$gender)

## NULL