Data Frame

data = data.frame(
  ID = c(1, 2, 3, 4, 5),
  Name = c("A", "B", "C", "D", 'E'),
  Age = c(25, 30, 22, 28, 35),
  Score = c(85, 92, 78, 89, 81)
)

data

Data Stats

data$Age
[1] 25 30 22 28 35
data$Score
[1] 85 92 78 89 81
mean(data$Age)
[1] 28
mean(data$Score)
[1] 85
summary(data)
       ID        Name                Age         Score   
 Min.   :1   Length:5           Min.   :22   Min.   :78  
 1st Qu.:2   Class :character   1st Qu.:25   1st Qu.:81  
 Median :3   Mode  :character   Median :28   Median :85  
 Mean   :3                      Mean   :28   Mean   :85  
 3rd Qu.:4                      3rd Qu.:30   3rd Qu.:89  
 Max.   :5                      Max.   :35   Max.   :92  
cor(data$Age, data$Score)
[1] 0.2480695
sd(data$Age)
[1] 4.949747
sd(data$Age) / sqrt(5)
[1] 2.213594

Data Select

data

Row selection

data[2:4 , ]

Col selection

data[ , 2:4]
data[2:4, 2:4]
data[c(2, 3, 4),  c(2, 3, 4)]
data[2, 4]
[1] 92
data[2, ]
data[ , 4]
[1] 85 92 78 89 81
2:4
[1] 2 3 4
c(2, 3, 4)
[1] 2 3 4
data[c(1, 3, 5), ]
data[ , c(1, 3)]

Conditional Selection

# score > 80
data[ data$Score > 80 , ]
# age >= 30

data[ data$Age >= 30, ]
# age = 30
data[ data$Age == 30 , ]

# select the rows with age 22, 25 or 35

data[ data$Age == 22 | data$Age == 25 | data$Age == 35 , ]
NA

Col add

Dept = c("CS", "Bio", "Chem", "Phy", "Eco")
data = cbind(data, Dept)
data 
new_r = c(6, "F", 60, 100, "Eng")
data = rbind(data, new_r)
data

Add in the middle

new_r = c(10, "A", 40, 100, "Eng")

data[1:2 , ]
data[3:6, ]

data = rbind(data[1:2 , ], new_r, data[3:6, ])
data
data
Family_name = c("A", "X", "Y", "Z", "G", "V", "N")

cbind(data[ , 1:2], Family_name, data[ , 3:5])
cbind
function (..., deparse.level = 1) 
.Internal(cbind(deparse.level, ...))
<bytecode: 0x7f883e312da8>
<environment: namespace:base>

search in every column for a specific value

Real dataset

iris = read.csv("iris.csv")
head(iris)
summary(iris)
  sepal.length    sepal.width     petal.length    petal.width      variety         
 Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100   Length:150        
 1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300   Class :character  
 Median :5.800   Median :3.000   Median :4.350   Median :1.300   Mode  :character  
 Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199                     
 3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800                     
 Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500                     
str(iris)
'data.frame':   150 obs. of  5 variables:
 $ sepal.length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
 $ sepal.width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
 $ petal.length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 $ petal.width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 $ variety     : chr  "Setosa" "Setosa" "Setosa" "Setosa" ...




# select row for setosa.
setosa = iris[iris$variety == "Setosa" , ]
head(setosa)
summary(setosa)
  sepal.length    sepal.width     petal.length    petal.width      variety         
 Min.   :4.300   Min.   :2.300   Min.   :1.000   Min.   :0.100   Length:50         
 1st Qu.:4.800   1st Qu.:3.200   1st Qu.:1.400   1st Qu.:0.200   Class :character  
 Median :5.000   Median :3.400   Median :1.500   Median :0.200   Mode  :character  
 Mean   :5.006   Mean   :3.428   Mean   :1.462   Mean   :0.246                     
 3rd Qu.:5.200   3rd Qu.:3.675   3rd Qu.:1.575   3rd Qu.:0.300                     
 Max.   :5.800   Max.   :4.400   Max.   :1.900   Max.   :0.600                     
# sepal length > 5.843
setosa[setosa$sepal.length > 5.843, ]
head(iris[iris$sepal.length > 5.843, ])
library(ggplot2)

ggplot(iris, aes(x = variety, y = sepal.length, fill = variety)) + 
  geom_boxplot()

iris[ ,1 ]
  [1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4 5.1 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.0 5.0
 [28] 5.2 5.2 4.7 4.8 5.4 5.2 5.5 4.9 5.0 5.5 4.9 4.4 5.1 5.0 4.5 4.4 5.0 5.1 4.8 5.1 4.6 5.3 5.0 7.0 6.4 6.9 5.5
 [55] 6.5 5.7 6.3 4.9 6.6 5.2 5.0 5.9 6.0 6.1 5.6 6.7 5.6 5.8 6.2 5.6 5.9 6.1 6.3 6.1 6.4 6.6 6.8 6.7 6.0 5.7 5.5
 [82] 5.5 5.8 6.0 5.4 6.0 6.7 6.3 5.6 5.5 5.5 6.1 5.8 5.0 5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3
[109] 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7 6.0 6.9 5.6 7.7 6.3 6.7 7.2 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1
[136] 7.7 6.3 6.4 6.0 6.9 6.7 6.9 5.8 6.8 6.7 6.7 6.3 6.5 6.2 5.9
for (i in 1:4) {
  cat(1, i,  cor(iris[ , 1], iris[, i]), "\n")
}
1 1 1 
1 2 -0.1175698 
1 3 0.8717538 
1 4 0.8179411 
col_names = names(iris)
col_names
[1] "sepal.length" "sepal.width"  "petal.length" "petal.width"  "variety"     
for (c in col_names) {
  print(c)
}
[1] "sepal.length"
[1] "sepal.width"
[1] "petal.length"
[1] "petal.width"
[1] "variety"
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICB3b3JkX2RvY3VtZW50OiBkZWZhdWx0CiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0Ci0tLQoKIyMgRGF0YSBGcmFtZQoKYGBge3J9CmRhdGEgPSBkYXRhLmZyYW1lKAogIElEID0gYygxLCAyLCAzLCA0LCA1KSwKICBOYW1lID0gYygiQSIsICJCIiwgIkMiLCAiRCIsICdFJyksCiAgQWdlID0gYygyNSwgMzAsIDIyLCAyOCwgMzUpLAogIFNjb3JlID0gYyg4NSwgOTIsIDc4LCA4OSwgODEpCikKCmRhdGEKYGBgCgojIyBEYXRhIFN0YXRzCgpgYGB7cn0KZGF0YSRBZ2UKZGF0YSRTY29yZQpgYGAKCmBgYHtyfQptZWFuKGRhdGEkQWdlKQptZWFuKGRhdGEkU2NvcmUpCmBgYAoKYGBge3J9CnN1bW1hcnkoZGF0YSkKYGBgCgpgYGB7cn0KY29yKGRhdGEkQWdlLCBkYXRhJFNjb3JlKQpgYGAKCmBgYHtyfQpzZChkYXRhJEFnZSkKYGBgCgpgYGB7cn0Kc2QoZGF0YSRBZ2UpIC8gc3FydCg1KQpgYGAKCiMjIERhdGEgU2VsZWN0CgpgYGB7cn0KZGF0YQpgYGAKCiMjIyBSb3cgc2VsZWN0aW9uCgpgYGB7cn0KZGF0YVsyOjQgLCBdCmBgYAoKIyMjIENvbCBzZWxlY3Rpb24KCmBgYHtyfQpkYXRhWyAsIDI6NF0KYGBgCgpgYGB7cn0KZGF0YVsyOjQsIDI6NF0KYGBgCgpgYGB7cn0KZGF0YVtjKDIsIDMsIDQpLCAgYygyLCAzLCA0KV0KYGBgCgpgYGB7cn0KZGF0YVsyLCA0XQpgYGAKCmBgYHtyfQpkYXRhWzIsIF0KYGBgCgpgYGB7cn0KZGF0YVsgLCA0XQpgYGAKCmBgYHtyfQoyOjQKYygyLCAzLCA0KQpgYGAKCmBgYHtyfQpkYXRhW2MoMSwgMywgNSksIF0KYGBgCgpgYGB7cn0KZGF0YVsgLCBjKDEsIDMpXQpgYGAKCiMjIENvbmRpdGlvbmFsIFNlbGVjdGlvbgoKYGBge3J9CiMgc2NvcmUgPiA4MApkYXRhWyBkYXRhJFNjb3JlID4gODAgLCBdCmBgYAoKYGBge3J9CiMgYWdlID49IDMwCgpkYXRhWyBkYXRhJEFnZSA+PSAzMCwgXQpgYGAKCmBgYHtyfQojIGFnZSA9IDMwCmRhdGFbIGRhdGEkQWdlID09IDMwICwgXQpgYGAKCmBgYHtyfQoKIyBzZWxlY3QgdGhlIHJvd3Mgd2l0aCBhZ2UgMjIsIDI1IG9yIDM1CgpkYXRhWyBkYXRhJEFnZSA9PSAyMiB8IGRhdGEkQWdlID09IDI1IHwgZGF0YSRBZ2UgPT0gMzUgLCBdCgpgYGAKCiMjIENvbCBhZGQKCmBgYHtyfQpEZXB0ID0gYygiQ1MiLCAiQmlvIiwgIkNoZW0iLCAiUGh5IiwgIkVjbyIpCmRhdGEgPSBjYmluZChkYXRhLCBEZXB0KQpkYXRhIApgYGAKCmBgYHtyfQpuZXdfciA9IGMoNiwgIkYiLCA2MCwgMTAwLCAiRW5nIikKZGF0YSA9IHJiaW5kKGRhdGEsIG5ld19yKQpkYXRhCmBgYAoKIyMgQWRkIGluIHRoZSBtaWRkbGUKCmBgYHtyfQpuZXdfciA9IGMoMTAsICJBIiwgNDAsIDEwMCwgIkVuZyIpCgpkYXRhWzE6MiAsIF0KZGF0YVszOjYsIF0KCmRhdGEgPSByYmluZChkYXRhWzE6MiAsIF0sIG5ld19yLCBkYXRhWzM6NiwgXSkKZGF0YQpgYGAKCmBgYHtyfQpkYXRhCmBgYAoKYGBge3J9CkZhbWlseV9uYW1lID0gYygiQSIsICJYIiwgIlkiLCAiWiIsICJHIiwgIlYiLCAiTiIpCgpjYmluZChkYXRhWyAsIDE6Ml0sIEZhbWlseV9uYW1lLCBkYXRhWyAsIDM6NV0pCmBgYAoKYGBge3J9CmNiaW5kCmBgYAoKIyMgc2VhcmNoIGluIGV2ZXJ5IGNvbHVtbiBmb3IgYSBzcGVjaWZpYyB2YWx1ZQoKIyBSZWFsIGRhdGFzZXQKCmBgYHtyfQppcmlzID0gcmVhZC5jc3YoImlyaXMuY3N2IikKaGVhZChpcmlzKQpgYGAKCmBgYHtyfQpzdW1tYXJ5KGlyaXMpCmBgYAoKYGBge3J9CnN0cihpcmlzKQpgYGAKCmBgYHtyfQoKCgoKIyBzZWxlY3Qgcm93IGZvciBzZXRvc2EuCnNldG9zYSA9IGlyaXNbaXJpcyR2YXJpZXR5ID09ICJTZXRvc2EiICwgXQpoZWFkKHNldG9zYSkKYGBgCgpgYGB7cn0Kc3VtbWFyeShzZXRvc2EpCmBgYAoKYGBge3J9CiMgc2VwYWwgbGVuZ3RoID4gNS44NDMKc2V0b3NhW3NldG9zYSRzZXBhbC5sZW5ndGggPiA1Ljg0MywgXQpgYGAKCmBgYHtyfQpoZWFkKGlyaXNbaXJpcyRzZXBhbC5sZW5ndGggPiA1Ljg0MywgXSkKYGBgCgpgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQoKZ2dwbG90KGlyaXMsIGFlcyh4ID0gdmFyaWV0eSwgeSA9IHNlcGFsLmxlbmd0aCwgZmlsbCA9IHZhcmlldHkpKSArIAogIGdlb21fYm94cGxvdCgpCmBgYAoKCgpgYGB7cn0KaXJpc1sgLCAxIF0KYGBgCgpgYGB7cn0KZm9yIChpIGluIDE6NCkgewogIGNhdCgxLCBpLCAgY29yKGlyaXNbICwgMV0sIGlyaXNbLCBpXSksICJcbiIpCn0KYGBgCgpgYGB7cn0KY29sX25hbWVzID0gbmFtZXMoaXJpcykKY29sX25hbWVzCmBgYAoKYGBge3J9IApmb3IgKGMgaW4gY29sX25hbWVzKSB7CiAgcHJpbnQoYykKfQpgYGAKCg==