Type of Data - Vector_Numeric、character、logic
a <- seq(from=1, to=20 ,by=2) # numeric
b <- letters[1:10] # character
c <- rep(c(TRUE,FALSE),5) # logic
a ; b ; c
## [1] 1 3 5 7 9 11 13 15 17 19
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"
## [1] TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE
class(a) ; class(b) ; class(c)
## [1] "numeric"
## [1] "character"
## [1] "logical"
# use class() to detect the data type
Type of Data - dataframe、matrix、list
d <- matrix(1:15,ncol = 3,nrow = 5,byrow = T)
e <- as.data.frame(d) # transfer matrix into dataframe
f <- list(d,e) # make it as a list
d ; e ; f
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
## [4,] 10 11 12
## [5,] 13 14 15
## V1 V2 V3
## 1 1 2 3
## 2 4 5 6
## 3 7 8 9
## 4 10 11 12
## 5 13 14 15
## [[1]]
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
## [4,] 10 11 12
## [5,] 13 14 15
##
## [[2]]
## V1 V2 V3
## 1 1 2 3
## 2 4 5 6
## 3 7 8 9
## 4 10 11 12
## 5 13 14 15
#func starts with as.~ : transfer complex into simple
#func starts with no as.~ : transfer simple into complex
class(d) ; class(e) ; class(f)
## [1] "matrix"
## [1] "data.frame"
## [1] "list"
Common orders to use
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 4 7 10 13
## [2,] 2 5 8 11 14
## [3,] 3 6 9 12 15
dim(d) # report the dim of matrix
## [1] 5 3
table(c) # compute the data frequency
## c
## FALSE TRUE
## 5 5
summary(d) # summary statistic of the data
## V1 V2 V3
## Min. : 1 Min. : 2 Min. : 3
## 1st Qu.: 4 1st Qu.: 5 1st Qu.: 6
## Median : 7 Median : 8 Median : 9
## Mean : 7 Mean : 8 Mean : 9
## 3rd Qu.:10 3rd Qu.:11 3rd Qu.:12
## Max. :13 Max. :14 Max. :15
head(d) # report head 6 data
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
## [4,] 10 11 12
## [5,] 13 14 15
tail(d) # report tail 6 data
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
## [4,] 10 11 12
## [5,] 13 14 15
cbind(a,b,c) # bind data together via columes
## a b c
## [1,] "1" "a" "TRUE"
## [2,] "3" "b" "FALSE"
## [3,] "5" "c" "TRUE"
## [4,] "7" "d" "FALSE"
## [5,] "9" "e" "TRUE"
## [6,] "11" "f" "FALSE"
## [7,] "13" "g" "TRUE"
## [8,] "15" "h" "FALSE"
## [9,] "17" "i" "TRUE"
## [10,] "19" "j" "FALSE"
rbind(a,b,c) # bind data together via rows
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## a "1" "3" "5" "7" "9" "11" "13" "15" "17"
## b "a" "b" "c" "d" "e" "f" "g" "h" "i"
## c "TRUE" "FALSE" "TRUE" "FALSE" "TRUE" "FALSE" "TRUE" "FALSE" "TRUE"
## [,10]
## a "19"
## b "j"
## c "FALSE"
merge(d,e,by="V1") # merge two data set, "by"" means via which colume
## V1 V2.x V3.x V2.y V3.y
## 1 1 2 3 2 3
## 2 4 5 6 5 6
## 3 7 8 9 8 9
## 4 10 11 12 11 12
## 5 13 14 15 14 15
Data Manipulation
d[1,2] #report the data set in first row, second colume
## [1] 2
d[,-2] #delete the data in second colume
## [,1] [,2]
## [1,] 1 3
## [2,] 4 6
## [3,] 7 9
## [4,] 10 12
## [5,] 13 15
colnames(e) ; rownames(e) #return the colnames and rownames of data
## [1] "V1" "V2" "V3"
## [1] "1" "2" "3" "4" "5"
as.character(d[,2]) #point specific data set to do the data transformation
## [1] "2" "5" "8" "11" "14"
head(paste(d[,1],"%",sep=",")) #dealing with character binding
## [1] "1,%" "4,%" "7,%" "10,%" "13,%"
head(round(d[,1],2)) #extract the numbers below the points
## [1] 1 4 7 10 13