R Markdown - DATA FRAME

#DATA FRAME 

# A special vector that can store multiple type elements but with equal length. 
# Like a Super-list. An intersection between matrix and list. 

# 1) How to define a Data frame

r1 = (1:6)
r2 = c("a","b","c","d","e","f")  # or can use letters[1:6] 
r3 = c(T,T,T,F,F,F)
r4 = c(1+1i,2+2i,3+3i,4+4i,5+5i,6+6i)
r5 = c (1.5,2.5,3.5,4.5,5.5,6.5)

  #Their types 
  print(class(r1))
## [1] "integer"
  print(class(r2)) 
## [1] "character"
  print(class(r3))
## [1] "logical"
  print(class(r4))
## [1] "complex"
  print(class(r5))
## [1] "numeric"
dfexample <- data.frame(r1,r2,r3,r4,r5)
#or 
dfexample2 <- data.frame(r1 = (1:6),r2 = c("a","b","c","d","e","f"),
                         r3 = c(T,T,T,F,F,F),
                         r4 = c(1+1i,2+2i,3+3i,4+4i,5+5i,6+6i),
                         r5 = c(1.5,2.5,3.5,4.5,5.5,6.5) )  

  # 1a. Confirm the type 

    str(dfexample)
## 'data.frame':    6 obs. of  5 variables:
##  $ r1: int  1 2 3 4 5 6
##  $ r2: chr  "a" "b" "c" "d" ...
##  $ r3: logi  TRUE TRUE TRUE FALSE FALSE FALSE
##  $ r4: cplx  1+1i 2+2i 3+3i ...
##  $ r5: num  1.5 2.5 3.5 4.5 5.5 6.5
    str(dfexample2)
## 'data.frame':    6 obs. of  5 variables:
##  $ r1: int  1 2 3 4 5 6
##  $ r2: chr  "a" "b" "c" "d" ...
##  $ r3: logi  TRUE TRUE TRUE FALSE FALSE FALSE
##  $ r4: cplx  1+1i 2+2i 3+3i ...
##  $ r5: num  1.5 2.5 3.5 4.5 5.5 6.5
  #1b. Check Col names, no of columns, Row names and no of rows
    
    names(dfexample) #Col Names
## [1] "r1" "r2" "r3" "r4" "r5"
    ncol(dfexample)  #No of Col
## [1] 5
    length(dfexample) #No of col Second method
## [1] 5
    row.names(dfexample) #Row Names
## [1] "1" "2" "3" "4" "5" "6"
    nrow(dfexample) #No of Rows
## [1] 6
  #1c. Change column and row names. 
  
    #Column names
    #First method
    names(dfexample) <- c("Col1", "Col2", "Col3", "Col4", "Col5")
    #Row names
    row.names(dfexample) <- c("Row1","Row2","Row3","Row4","Row5","Row6")

    dfexample
##      Col1 Col2  Col3 Col4 Col5
## Row1    1    a  TRUE 1+1i  1.5
## Row2    2    b  TRUE 2+2i  2.5
## Row3    3    c  TRUE 3+3i  3.5
## Row4    4    d FALSE 4+4i  4.5
## Row5    5    e FALSE 5+5i  5.5
## Row6    6    f FALSE 6+6i  6.5
  #1d. Returns both number of rows and columns
  dim(dfexample)
## [1] 6 5
  #1e. Show top few observations/rows 
  head(dfexample, n=2) #By default it show top 6 if second part is left empty. 
##      Col1 Col2 Col3 Col4 Col5
## Row1    1    a TRUE 1+1i  1.5
## Row2    2    b TRUE 2+2i  2.5
  #1f. Show bottom few observations/rows 
  tail(dfexample, n=2) #By default it show top 6 if second part is left empty.
##      Col1 Col2  Col3 Col4 Col5
## Row5    5    e FALSE 5+5i  5.5
## Row6    6    f FALSE 6+6i  6.5
#2) How to print Data frame / Subset 

  #2a. Show full Data frame

  dfexample
##      Col1 Col2  Col3 Col4 Col5
## Row1    1    a  TRUE 1+1i  1.5
## Row2    2    b  TRUE 2+2i  2.5
## Row3    3    c  TRUE 3+3i  3.5
## Row4    4    d FALSE 4+4i  4.5
## Row5    5    e FALSE 5+5i  5.5
## Row6    6    f FALSE 6+6i  6.5
  #2b. Show Data Frame without last row 

  dfexample[1:5,] #First Method
##      Col1 Col2  Col3 Col4 Col5
## Row1    1    a  TRUE 1+1i  1.5
## Row2    2    b  TRUE 2+2i  2.5
## Row3    3    c  TRUE 3+3i  3.5
## Row4    4    d FALSE 4+4i  4.5
## Row5    5    e FALSE 5+5i  5.5
  dfexample[-6,]  #Second Method
##      Col1 Col2  Col3 Col4 Col5
## Row1    1    a  TRUE 1+1i  1.5
## Row2    2    b  TRUE 2+2i  2.5
## Row3    3    c  TRUE 3+3i  3.5
## Row4    4    d FALSE 4+4i  4.5
## Row5    5    e FALSE 5+5i  5.5
  #2c. Show Data Frame without last column. 

  dfexample[,1:4] #First Method
##      Col1 Col2  Col3 Col4
## Row1    1    a  TRUE 1+1i
## Row2    2    b  TRUE 2+2i
## Row3    3    c  TRUE 3+3i
## Row4    4    d FALSE 4+4i
## Row5    5    e FALSE 5+5i
## Row6    6    f FALSE 6+6i
  dfexample[,-5]  #Second Method
##      Col1 Col2  Col3 Col4
## Row1    1    a  TRUE 1+1i
## Row2    2    b  TRUE 2+2i
## Row3    3    c  TRUE 3+3i
## Row4    4    d FALSE 4+4i
## Row5    5    e FALSE 5+5i
## Row6    6    f FALSE 6+6i
  #2d. Show Data Frame without first row and last column

  dfexample[1:5,1:4]
##      Col1 Col2  Col3 Col4
## Row1    1    a  TRUE 1+1i
## Row2    2    b  TRUE 2+2i
## Row3    3    c  TRUE 3+3i
## Row4    4    d FALSE 4+4i
## Row5    5    e FALSE 5+5i
# 3) How to access different elements of a Data Frame and return type

dfexample[[1]]
## [1] 1 2 3 4 5 6
typeof(dfexample[[1]])
## [1] "integer"
dfexample[1]
##      Col1
## Row1    1
## Row2    2
## Row3    3
## Row4    4
## Row5    5
## Row6    6
typeof(dfexample[1])
## [1] "list"
dfexample$Col1
## [1] 1 2 3 4 5 6
typeof(dfexample$Col1)
## [1] "integer"
dfexample["Col1"]
##      Col1
## Row1    1
## Row2    2
## Row3    3
## Row4    4
## Row5    5
## Row6    6
typeof(dfexample["Col1"])
## [1] "list"
dfexample[["Col1"]]
## [1] 1 2 3 4 5 6
typeof(dfexample[["Col1"]])
## [1] "integer"
# Using list syntax [[ or $ will retain the elements' data type & return 
# a vector instead of data frame. 
  
# Using matrix syntax [ will return result in a data frame (with 1 variable). 
  
  # 3a. Get a column of element in the Data Frame
    
    dfexample[3] #First Method
##       Col3
## Row1  TRUE
## Row2  TRUE
## Row3  TRUE
## Row4 FALSE
## Row5 FALSE
## Row6 FALSE
    dfexample$Col3 #Second Method
## [1]  TRUE  TRUE  TRUE FALSE FALSE FALSE
  # 3b. Get a sub-element of a column in the Data Frame
    
    dfexample[[2]][2] #First Method
## [1] "b"
    dfexample$Col2[2] #Second Method
## [1] "b"
  # 3c. Get a row of element in the Data Frame
    
    dfexample[3,]
##      Col1 Col2 Col3 Col4 Col5
## Row3    3    c TRUE 3+3i  3.5
  # 3d. Get a sub-element of a row in the Data Frame
    
    dfexample[3,1]
## [1] 3
  # 3e. Return specific elements 
    
    # Elements from column 3 & 4 and row 3 & 4
    dfexample[c(3,4),c(3,4)]  
##       Col3 Col4
## Row3  TRUE 3+3i
## Row4 FALSE 4+4i
  # 3f. To maintain type as Data frame
    
    dfexample[3] # First Method: Maintains Data frame type
##       Col3
## Row1  TRUE
## Row2  TRUE
## Row3  TRUE
## Row4 FALSE
## Row5 FALSE
## Row6 FALSE
    class(dfexample[,3]) # Second Method: Returns Logical Type so follow below: 
## [1] "logical"
    class(dfexample[,3,drop=FALSE]) # To Maintain data type.
## [1] "data.frame"
    print(dfexample[,3])            #Type is changed 
## [1]  TRUE  TRUE  TRUE FALSE FALSE FALSE
    print(dfexample[,3,drop=FALSE]) #Data frame type is kept. 
##       Col3
## Row1  TRUE
## Row2  TRUE
## Row3  TRUE
## Row4 FALSE
## Row5 FALSE
## Row6 FALSE
#4 Modifying or Adding column or row
    
    #4a. Modifying the elements of the first column
    dfexample[1] <- c(7:12)
    dfexample
##      Col1 Col2  Col3 Col4 Col5
## Row1    7    a  TRUE 1+1i  1.5
## Row2    8    b  TRUE 2+2i  2.5
## Row3    9    c  TRUE 3+3i  3.5
## Row4   10    d FALSE 4+4i  4.5
## Row5   11    e FALSE 5+5i  5.5
## Row6   12    f FALSE 6+6i  6.5
    #4b. Adding a new col (Col6)
    
      #First method: cbind
      cbind.data.frame(dfexample,Col6 = c(1:6))
##      Col1 Col2  Col3 Col4 Col5 Col6
## Row1    7    a  TRUE 1+1i  1.5    1
## Row2    8    b  TRUE 2+2i  2.5    2
## Row3    9    c  TRUE 3+3i  3.5    3
## Row4   10    d FALSE 4+4i  4.5    4
## Row5   11    e FALSE 5+5i  5.5    5
## Row6   12    f FALSE 6+6i  6.5    6
      #Second method
      dfexample$Col6 <- c(1:6)
      dfexample
##      Col1 Col2  Col3 Col4 Col5 Col6
## Row1    7    a  TRUE 1+1i  1.5    1
## Row2    8    b  TRUE 2+2i  2.5    2
## Row3    9    c  TRUE 3+3i  3.5    3
## Row4   10    d FALSE 4+4i  4.5    4
## Row5   11    e FALSE 5+5i  5.5    5
## Row6   12    f FALSE 6+6i  6.5    6
      #Third method
      col6 <- c(1:6)
      dfexample[["Col6"]] <- col6
      dfexample
##      Col1 Col2  Col3 Col4 Col5 Col6
## Row1    7    a  TRUE 1+1i  1.5    1
## Row2    8    b  TRUE 2+2i  2.5    2
## Row3    9    c  TRUE 3+3i  3.5    3
## Row4   10    d FALSE 4+4i  4.5    4
## Row5   11    e FALSE 5+5i  5.5    5
## Row6   12    f FALSE 6+6i  6.5    6
    #4c. Adding a new row: rbind (Row7)
    rbind.data.frame(dfexample,Row7 = c(14, "g",TRUE,7+7i,7.5))
##      Col1 Col2  Col3 Col4 Col5 Col6
## Row1    7    a  TRUE 1+1i  1.5    1
## Row2    8    b  TRUE 2+2i  2.5    2
## Row3    9    c  TRUE 3+3i  3.5    3
## Row4   10    d FALSE 4+4i  4.5    4
## Row5   11    e FALSE 5+5i  5.5    5
## Row6   12    f FALSE 6+6i  6.5    6
## Row7   14    g  TRUE 7+7i  7.5   14
    #4d. Removing a component from data frame 
      #First method
      dfexample$Row7 <- NULL
      dfexample
##      Col1 Col2  Col3 Col4 Col5 Col6
## Row1    7    a  TRUE 1+1i  1.5    1
## Row2    8    b  TRUE 2+2i  2.5    2
## Row3    9    c  TRUE 3+3i  3.5    3
## Row4   10    d FALSE 4+4i  4.5    4
## Row5   11    e FALSE 5+5i  5.5    5
## Row6   12    f FALSE 6+6i  6.5    6
      #Second method 
      dfexample <- dfexample[,-6]
      dfexample
##      Col1 Col2  Col3 Col4 Col5
## Row1    7    a  TRUE 1+1i  1.5
## Row2    8    b  TRUE 2+2i  2.5
## Row3    9    c  TRUE 3+3i  3.5
## Row4   10    d FALSE 4+4i  4.5
## Row5   11    e FALSE 5+5i  5.5
## Row6   12    f FALSE 6+6i  6.5
#5) Sorting
    # Will show the index of the elements of the particular variables in order.
      
      #5a. Sort Ascending
      ranks <- order(dfexample[1])
      ranks          
## [1] 1 2 3 4 5 6
      #5b. Sort Descending
      dwranks <- order(dfexample[1], decreasing = TRUE)
      dwranks
## [1] 6 5 4 3 2 1