R Markdown - DATA FRAME
#DATA FRAME
# A special vector that can store multiple type elements but with equal length.
# Like a Super-list. An intersection between matrix and list.
# 1) How to define a Data frame
r1 = (1:6)
r2 = c("a","b","c","d","e","f") # or can use letters[1:6]
r3 = c(T,T,T,F,F,F)
r4 = c(1+1i,2+2i,3+3i,4+4i,5+5i,6+6i)
r5 = c (1.5,2.5,3.5,4.5,5.5,6.5)
#Their types
print(class(r1))
## [1] "integer"
print(class(r2))
## [1] "character"
print(class(r3))
## [1] "logical"
print(class(r4))
## [1] "complex"
print(class(r5))
## [1] "numeric"
dfexample <- data.frame(r1,r2,r3,r4,r5)
#or
dfexample2 <- data.frame(r1 = (1:6),r2 = c("a","b","c","d","e","f"),
r3 = c(T,T,T,F,F,F),
r4 = c(1+1i,2+2i,3+3i,4+4i,5+5i,6+6i),
r5 = c(1.5,2.5,3.5,4.5,5.5,6.5) )
# 1a. Confirm the type
str(dfexample)
## 'data.frame': 6 obs. of 5 variables:
## $ r1: int 1 2 3 4 5 6
## $ r2: chr "a" "b" "c" "d" ...
## $ r3: logi TRUE TRUE TRUE FALSE FALSE FALSE
## $ r4: cplx 1+1i 2+2i 3+3i ...
## $ r5: num 1.5 2.5 3.5 4.5 5.5 6.5
str(dfexample2)
## 'data.frame': 6 obs. of 5 variables:
## $ r1: int 1 2 3 4 5 6
## $ r2: chr "a" "b" "c" "d" ...
## $ r3: logi TRUE TRUE TRUE FALSE FALSE FALSE
## $ r4: cplx 1+1i 2+2i 3+3i ...
## $ r5: num 1.5 2.5 3.5 4.5 5.5 6.5
#1b. Check Col names, no of columns, Row names and no of rows
names(dfexample) #Col Names
## [1] "r1" "r2" "r3" "r4" "r5"
ncol(dfexample) #No of Col
## [1] 5
length(dfexample) #No of col Second method
## [1] 5
row.names(dfexample) #Row Names
## [1] "1" "2" "3" "4" "5" "6"
nrow(dfexample) #No of Rows
## [1] 6
#1c. Change column and row names.
#Column names
#First method
names(dfexample) <- c("Col1", "Col2", "Col3", "Col4", "Col5")
#Row names
row.names(dfexample) <- c("Row1","Row2","Row3","Row4","Row5","Row6")
dfexample
## Col1 Col2 Col3 Col4 Col5
## Row1 1 a TRUE 1+1i 1.5
## Row2 2 b TRUE 2+2i 2.5
## Row3 3 c TRUE 3+3i 3.5
## Row4 4 d FALSE 4+4i 4.5
## Row5 5 e FALSE 5+5i 5.5
## Row6 6 f FALSE 6+6i 6.5
#1d. Returns both number of rows and columns
dim(dfexample)
## [1] 6 5
#1e. Show top few observations/rows
head(dfexample, n=2) #By default it show top 6 if second part is left empty.
## Col1 Col2 Col3 Col4 Col5
## Row1 1 a TRUE 1+1i 1.5
## Row2 2 b TRUE 2+2i 2.5
#1f. Show bottom few observations/rows
tail(dfexample, n=2) #By default it show top 6 if second part is left empty.
## Col1 Col2 Col3 Col4 Col5
## Row5 5 e FALSE 5+5i 5.5
## Row6 6 f FALSE 6+6i 6.5
#2) How to print Data frame / Subset
#2a. Show full Data frame
dfexample
## Col1 Col2 Col3 Col4 Col5
## Row1 1 a TRUE 1+1i 1.5
## Row2 2 b TRUE 2+2i 2.5
## Row3 3 c TRUE 3+3i 3.5
## Row4 4 d FALSE 4+4i 4.5
## Row5 5 e FALSE 5+5i 5.5
## Row6 6 f FALSE 6+6i 6.5
#2b. Show Data Frame without last row
dfexample[1:5,] #First Method
## Col1 Col2 Col3 Col4 Col5
## Row1 1 a TRUE 1+1i 1.5
## Row2 2 b TRUE 2+2i 2.5
## Row3 3 c TRUE 3+3i 3.5
## Row4 4 d FALSE 4+4i 4.5
## Row5 5 e FALSE 5+5i 5.5
dfexample[-6,] #Second Method
## Col1 Col2 Col3 Col4 Col5
## Row1 1 a TRUE 1+1i 1.5
## Row2 2 b TRUE 2+2i 2.5
## Row3 3 c TRUE 3+3i 3.5
## Row4 4 d FALSE 4+4i 4.5
## Row5 5 e FALSE 5+5i 5.5
#2c. Show Data Frame without last column.
dfexample[,1:4] #First Method
## Col1 Col2 Col3 Col4
## Row1 1 a TRUE 1+1i
## Row2 2 b TRUE 2+2i
## Row3 3 c TRUE 3+3i
## Row4 4 d FALSE 4+4i
## Row5 5 e FALSE 5+5i
## Row6 6 f FALSE 6+6i
dfexample[,-5] #Second Method
## Col1 Col2 Col3 Col4
## Row1 1 a TRUE 1+1i
## Row2 2 b TRUE 2+2i
## Row3 3 c TRUE 3+3i
## Row4 4 d FALSE 4+4i
## Row5 5 e FALSE 5+5i
## Row6 6 f FALSE 6+6i
#2d. Show Data Frame without first row and last column
dfexample[1:5,1:4]
## Col1 Col2 Col3 Col4
## Row1 1 a TRUE 1+1i
## Row2 2 b TRUE 2+2i
## Row3 3 c TRUE 3+3i
## Row4 4 d FALSE 4+4i
## Row5 5 e FALSE 5+5i
# 3) How to access different elements of a Data Frame and return type
dfexample[[1]]
## [1] 1 2 3 4 5 6
typeof(dfexample[[1]])
## [1] "integer"
dfexample[1]
## Col1
## Row1 1
## Row2 2
## Row3 3
## Row4 4
## Row5 5
## Row6 6
typeof(dfexample[1])
## [1] "list"
dfexample$Col1
## [1] 1 2 3 4 5 6
typeof(dfexample$Col1)
## [1] "integer"
dfexample["Col1"]
## Col1
## Row1 1
## Row2 2
## Row3 3
## Row4 4
## Row5 5
## Row6 6
typeof(dfexample["Col1"])
## [1] "list"
dfexample[["Col1"]]
## [1] 1 2 3 4 5 6
typeof(dfexample[["Col1"]])
## [1] "integer"
# Using list syntax [[ or $ will retain the elements' data type & return
# a vector instead of data frame.
# Using matrix syntax [ will return result in a data frame (with 1 variable).
# 3a. Get a column of element in the Data Frame
dfexample[3] #First Method
## Col3
## Row1 TRUE
## Row2 TRUE
## Row3 TRUE
## Row4 FALSE
## Row5 FALSE
## Row6 FALSE
dfexample$Col3 #Second Method
## [1] TRUE TRUE TRUE FALSE FALSE FALSE
# 3b. Get a sub-element of a column in the Data Frame
dfexample[[2]][2] #First Method
## [1] "b"
dfexample$Col2[2] #Second Method
## [1] "b"
# 3c. Get a row of element in the Data Frame
dfexample[3,]
## Col1 Col2 Col3 Col4 Col5
## Row3 3 c TRUE 3+3i 3.5
# 3d. Get a sub-element of a row in the Data Frame
dfexample[3,1]
## [1] 3
# 3e. Return specific elements
# Elements from column 3 & 4 and row 3 & 4
dfexample[c(3,4),c(3,4)]
## Col3 Col4
## Row3 TRUE 3+3i
## Row4 FALSE 4+4i
# 3f. To maintain type as Data frame
dfexample[3] # First Method: Maintains Data frame type
## Col3
## Row1 TRUE
## Row2 TRUE
## Row3 TRUE
## Row4 FALSE
## Row5 FALSE
## Row6 FALSE
class(dfexample[,3]) # Second Method: Returns Logical Type so follow below:
## [1] "logical"
class(dfexample[,3,drop=FALSE]) # To Maintain data type.
## [1] "data.frame"
print(dfexample[,3]) #Type is changed
## [1] TRUE TRUE TRUE FALSE FALSE FALSE
print(dfexample[,3,drop=FALSE]) #Data frame type is kept.
## Col3
## Row1 TRUE
## Row2 TRUE
## Row3 TRUE
## Row4 FALSE
## Row5 FALSE
## Row6 FALSE
#4 Modifying or Adding column or row
#4a. Modifying the elements of the first column
dfexample[1] <- c(7:12)
dfexample
## Col1 Col2 Col3 Col4 Col5
## Row1 7 a TRUE 1+1i 1.5
## Row2 8 b TRUE 2+2i 2.5
## Row3 9 c TRUE 3+3i 3.5
## Row4 10 d FALSE 4+4i 4.5
## Row5 11 e FALSE 5+5i 5.5
## Row6 12 f FALSE 6+6i 6.5
#4b. Adding a new col (Col6)
#First method: cbind
cbind.data.frame(dfexample,Col6 = c(1:6))
## Col1 Col2 Col3 Col4 Col5 Col6
## Row1 7 a TRUE 1+1i 1.5 1
## Row2 8 b TRUE 2+2i 2.5 2
## Row3 9 c TRUE 3+3i 3.5 3
## Row4 10 d FALSE 4+4i 4.5 4
## Row5 11 e FALSE 5+5i 5.5 5
## Row6 12 f FALSE 6+6i 6.5 6
#Second method
dfexample$Col6 <- c(1:6)
dfexample
## Col1 Col2 Col3 Col4 Col5 Col6
## Row1 7 a TRUE 1+1i 1.5 1
## Row2 8 b TRUE 2+2i 2.5 2
## Row3 9 c TRUE 3+3i 3.5 3
## Row4 10 d FALSE 4+4i 4.5 4
## Row5 11 e FALSE 5+5i 5.5 5
## Row6 12 f FALSE 6+6i 6.5 6
#Third method
col6 <- c(1:6)
dfexample[["Col6"]] <- col6
dfexample
## Col1 Col2 Col3 Col4 Col5 Col6
## Row1 7 a TRUE 1+1i 1.5 1
## Row2 8 b TRUE 2+2i 2.5 2
## Row3 9 c TRUE 3+3i 3.5 3
## Row4 10 d FALSE 4+4i 4.5 4
## Row5 11 e FALSE 5+5i 5.5 5
## Row6 12 f FALSE 6+6i 6.5 6
#4c. Adding a new row: rbind (Row7)
rbind.data.frame(dfexample,Row7 = c(14, "g",TRUE,7+7i,7.5))
## Col1 Col2 Col3 Col4 Col5 Col6
## Row1 7 a TRUE 1+1i 1.5 1
## Row2 8 b TRUE 2+2i 2.5 2
## Row3 9 c TRUE 3+3i 3.5 3
## Row4 10 d FALSE 4+4i 4.5 4
## Row5 11 e FALSE 5+5i 5.5 5
## Row6 12 f FALSE 6+6i 6.5 6
## Row7 14 g TRUE 7+7i 7.5 14
#4d. Removing a component from data frame
#First method
dfexample$Row7 <- NULL
dfexample
## Col1 Col2 Col3 Col4 Col5 Col6
## Row1 7 a TRUE 1+1i 1.5 1
## Row2 8 b TRUE 2+2i 2.5 2
## Row3 9 c TRUE 3+3i 3.5 3
## Row4 10 d FALSE 4+4i 4.5 4
## Row5 11 e FALSE 5+5i 5.5 5
## Row6 12 f FALSE 6+6i 6.5 6
#Second method
dfexample <- dfexample[,-6]
dfexample
## Col1 Col2 Col3 Col4 Col5
## Row1 7 a TRUE 1+1i 1.5
## Row2 8 b TRUE 2+2i 2.5
## Row3 9 c TRUE 3+3i 3.5
## Row4 10 d FALSE 4+4i 4.5
## Row5 11 e FALSE 5+5i 5.5
## Row6 12 f FALSE 6+6i 6.5
#5) Sorting
# Will show the index of the elements of the particular variables in order.
#5a. Sort Ascending
ranks <- order(dfexample[1])
ranks
## [1] 1 2 3 4 5 6
#5b. Sort Descending
dwranks <- order(dfexample[1], decreasing = TRUE)
dwranks
## [1] 6 5 4 3 2 1