Data Frame
data = data.frame(
ID = c(1, 2, 3, 4, 5),
Name = c("A", "B", "C", "D", 'E'),
Age = c(25, 30, 22, 28, 35),
Score = c(85, 92, 78, 89, 81)
)
data
Data Stats
data$Age
[1] 25 30 22 28 35
data$Score
[1] 85 92 78 89 81
mean(data$Age)
[1] 28
mean(data$Score)
[1] 85
summary(data)
ID Name Age Score
Min. :1 Length:5 Min. :22 Min. :78
1st Qu.:2 Class :character 1st Qu.:25 1st Qu.:81
Median :3 Mode :character Median :28 Median :85
Mean :3 Mean :28 Mean :85
3rd Qu.:4 3rd Qu.:30 3rd Qu.:89
Max. :5 Max. :35 Max. :92
cor(data$Age, data$Score)
[1] 0.2480695
sd(data$Age)
[1] 4.949747
sd(data$Age) / sqrt(5)
[1] 2.213594
Data Select
data
Row selection
data[2:4 , ]
Col selection
data[ , 2:4]
data[2:4, 2:4]
data[c(2, 3, 4), c(2, 3, 4)]
data[2, 4]
[1] 92
data[2, ]
data[ , 4]
[1] 85 92 78 89 81
2:4
[1] 2 3 4
c(2, 3, 4)
[1] 2 3 4
data[c(1, 3, 5), ]
data[ , c(1, 3)]
Conditional Selection
# score > 80
data[ data$Score > 80 , ]
# age >= 30
data[ data$Age >= 30, ]
# age = 30
data[ data$Age == 30 , ]
# select the rows with age 22, 25 or 35
data[ data$Age == 22 | data$Age == 25 | data$Age == 35 , ]
NA
Col add
Dept = c("CS", "Bio", "Chem", "Phy", "Eco")
data = cbind(data, Dept)
data
new_r = c(6, "F", 60, 100, "Eng")
data = rbind(data, new_r)
data
Add in the middle
new_r = c(10, "A", 40, 100, "Eng")
data[1:2 , ]
data[3:6, ]
data = rbind(data[1:2 , ], new_r, data[3:6, ])
data
data
Family_name = c("A", "X", "Y", "Z", "G", "V", "N")
cbind(data[ , 1:2], Family_name, data[ , 3:5])
cbind
function (..., deparse.level = 1)
.Internal(cbind(deparse.level, ...))
<bytecode: 0x7f883e312da8>
<environment: namespace:base>
search in every column for a specific value
Real dataset
iris = read.csv("iris.csv")
head(iris)
summary(iris)
sepal.length sepal.width petal.length petal.width variety
Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100 Length:150
1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300 Class :character
Median :5.800 Median :3.000 Median :4.350 Median :1.300 Mode :character
Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
str(iris)
'data.frame': 150 obs. of 5 variables:
$ sepal.length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ sepal.width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ petal.length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ petal.width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ variety : chr "Setosa" "Setosa" "Setosa" "Setosa" ...
# select row for setosa.
setosa = iris[iris$variety == "Setosa" , ]
head(setosa)
summary(setosa)
sepal.length sepal.width petal.length petal.width variety
Min. :4.300 Min. :2.300 Min. :1.000 Min. :0.100 Length:50
1st Qu.:4.800 1st Qu.:3.200 1st Qu.:1.400 1st Qu.:0.200 Class :character
Median :5.000 Median :3.400 Median :1.500 Median :0.200 Mode :character
Mean :5.006 Mean :3.428 Mean :1.462 Mean :0.246
3rd Qu.:5.200 3rd Qu.:3.675 3rd Qu.:1.575 3rd Qu.:0.300
Max. :5.800 Max. :4.400 Max. :1.900 Max. :0.600
# sepal length > 5.843
setosa[setosa$sepal.length > 5.843, ]
head(iris[iris$sepal.length > 5.843, ])
library(ggplot2)
ggplot(iris, aes(x = variety, y = sepal.length, fill = variety)) +
geom_boxplot()

iris[ ,1 ]
[1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4 5.1 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.0 5.0
[28] 5.2 5.2 4.7 4.8 5.4 5.2 5.5 4.9 5.0 5.5 4.9 4.4 5.1 5.0 4.5 4.4 5.0 5.1 4.8 5.1 4.6 5.3 5.0 7.0 6.4 6.9 5.5
[55] 6.5 5.7 6.3 4.9 6.6 5.2 5.0 5.9 6.0 6.1 5.6 6.7 5.6 5.8 6.2 5.6 5.9 6.1 6.3 6.1 6.4 6.6 6.8 6.7 6.0 5.7 5.5
[82] 5.5 5.8 6.0 5.4 6.0 6.7 6.3 5.6 5.5 5.5 6.1 5.8 5.0 5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3
[109] 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7 6.0 6.9 5.6 7.7 6.3 6.7 7.2 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1
[136] 7.7 6.3 6.4 6.0 6.9 6.7 6.9 5.8 6.8 6.7 6.7 6.3 6.5 6.2 5.9
for (i in 1:4) {
cat(1, i, cor(iris[ , 1], iris[, i]), "\n")
}
1 1 1
1 2 -0.1175698
1 3 0.8717538
1 4 0.8179411
col_names = names(iris)
col_names
[1] "sepal.length" "sepal.width" "petal.length" "petal.width" "variety"
for (c in col_names) {
print(c)
}
[1] "sepal.length"
[1] "sepal.width"
[1] "petal.length"
[1] "petal.width"
[1] "variety"
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICB3b3JkX2RvY3VtZW50OiBkZWZhdWx0CiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0Ci0tLQoKIyMgRGF0YSBGcmFtZQoKYGBge3J9CmRhdGEgPSBkYXRhLmZyYW1lKAogIElEID0gYygxLCAyLCAzLCA0LCA1KSwKICBOYW1lID0gYygiQSIsICJCIiwgIkMiLCAiRCIsICdFJyksCiAgQWdlID0gYygyNSwgMzAsIDIyLCAyOCwgMzUpLAogIFNjb3JlID0gYyg4NSwgOTIsIDc4LCA4OSwgODEpCikKCmRhdGEKYGBgCgojIyBEYXRhIFN0YXRzCgpgYGB7cn0KZGF0YSRBZ2UKZGF0YSRTY29yZQpgYGAKCmBgYHtyfQptZWFuKGRhdGEkQWdlKQptZWFuKGRhdGEkU2NvcmUpCmBgYAoKYGBge3J9CnN1bW1hcnkoZGF0YSkKYGBgCgpgYGB7cn0KY29yKGRhdGEkQWdlLCBkYXRhJFNjb3JlKQpgYGAKCmBgYHtyfQpzZChkYXRhJEFnZSkKYGBgCgpgYGB7cn0Kc2QoZGF0YSRBZ2UpIC8gc3FydCg1KQpgYGAKCiMjIERhdGEgU2VsZWN0CgpgYGB7cn0KZGF0YQpgYGAKCiMjIyBSb3cgc2VsZWN0aW9uCgpgYGB7cn0KZGF0YVsyOjQgLCBdCmBgYAoKIyMjIENvbCBzZWxlY3Rpb24KCmBgYHtyfQpkYXRhWyAsIDI6NF0KYGBgCgpgYGB7cn0KZGF0YVsyOjQsIDI6NF0KYGBgCgpgYGB7cn0KZGF0YVtjKDIsIDMsIDQpLCAgYygyLCAzLCA0KV0KYGBgCgpgYGB7cn0KZGF0YVsyLCA0XQpgYGAKCmBgYHtyfQpkYXRhWzIsIF0KYGBgCgpgYGB7cn0KZGF0YVsgLCA0XQpgYGAKCmBgYHtyfQoyOjQKYygyLCAzLCA0KQpgYGAKCmBgYHtyfQpkYXRhW2MoMSwgMywgNSksIF0KYGBgCgpgYGB7cn0KZGF0YVsgLCBjKDEsIDMpXQpgYGAKCiMjIENvbmRpdGlvbmFsIFNlbGVjdGlvbgoKYGBge3J9CiMgc2NvcmUgPiA4MApkYXRhWyBkYXRhJFNjb3JlID4gODAgLCBdCmBgYAoKYGBge3J9CiMgYWdlID49IDMwCgpkYXRhWyBkYXRhJEFnZSA+PSAzMCwgXQpgYGAKCmBgYHtyfQojIGFnZSA9IDMwCmRhdGFbIGRhdGEkQWdlID09IDMwICwgXQpgYGAKCmBgYHtyfQoKIyBzZWxlY3QgdGhlIHJvd3Mgd2l0aCBhZ2UgMjIsIDI1IG9yIDM1CgpkYXRhWyBkYXRhJEFnZSA9PSAyMiB8IGRhdGEkQWdlID09IDI1IHwgZGF0YSRBZ2UgPT0gMzUgLCBdCgpgYGAKCiMjIENvbCBhZGQKCmBgYHtyfQpEZXB0ID0gYygiQ1MiLCAiQmlvIiwgIkNoZW0iLCAiUGh5IiwgIkVjbyIpCmRhdGEgPSBjYmluZChkYXRhLCBEZXB0KQpkYXRhIApgYGAKCmBgYHtyfQpuZXdfciA9IGMoNiwgIkYiLCA2MCwgMTAwLCAiRW5nIikKZGF0YSA9IHJiaW5kKGRhdGEsIG5ld19yKQpkYXRhCmBgYAoKIyMgQWRkIGluIHRoZSBtaWRkbGUKCmBgYHtyfQpuZXdfciA9IGMoMTAsICJBIiwgNDAsIDEwMCwgIkVuZyIpCgpkYXRhWzE6MiAsIF0KZGF0YVszOjYsIF0KCmRhdGEgPSByYmluZChkYXRhWzE6MiAsIF0sIG5ld19yLCBkYXRhWzM6NiwgXSkKZGF0YQpgYGAKCmBgYHtyfQpkYXRhCmBgYAoKYGBge3J9CkZhbWlseV9uYW1lID0gYygiQSIsICJYIiwgIlkiLCAiWiIsICJHIiwgIlYiLCAiTiIpCgpjYmluZChkYXRhWyAsIDE6Ml0sIEZhbWlseV9uYW1lLCBkYXRhWyAsIDM6NV0pCmBgYAoKYGBge3J9CmNiaW5kCmBgYAoKIyMgc2VhcmNoIGluIGV2ZXJ5IGNvbHVtbiBmb3IgYSBzcGVjaWZpYyB2YWx1ZQoKIyBSZWFsIGRhdGFzZXQKCmBgYHtyfQppcmlzID0gcmVhZC5jc3YoImlyaXMuY3N2IikKaGVhZChpcmlzKQpgYGAKCmBgYHtyfQpzdW1tYXJ5KGlyaXMpCmBgYAoKYGBge3J9CnN0cihpcmlzKQpgYGAKCmBgYHtyfQoKCgoKIyBzZWxlY3Qgcm93IGZvciBzZXRvc2EuCnNldG9zYSA9IGlyaXNbaXJpcyR2YXJpZXR5ID09ICJTZXRvc2EiICwgXQpoZWFkKHNldG9zYSkKYGBgCgpgYGB7cn0Kc3VtbWFyeShzZXRvc2EpCmBgYAoKYGBge3J9CiMgc2VwYWwgbGVuZ3RoID4gNS44NDMKc2V0b3NhW3NldG9zYSRzZXBhbC5sZW5ndGggPiA1Ljg0MywgXQpgYGAKCmBgYHtyfQpoZWFkKGlyaXNbaXJpcyRzZXBhbC5sZW5ndGggPiA1Ljg0MywgXSkKYGBgCgpgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQoKZ2dwbG90KGlyaXMsIGFlcyh4ID0gdmFyaWV0eSwgeSA9IHNlcGFsLmxlbmd0aCwgZmlsbCA9IHZhcmlldHkpKSArIAogIGdlb21fYm94cGxvdCgpCmBgYAoKCgpgYGB7cn0KaXJpc1sgLCAxIF0KYGBgCgpgYGB7cn0KZm9yIChpIGluIDE6NCkgewogIGNhdCgxLCBpLCAgY29yKGlyaXNbICwgMV0sIGlyaXNbLCBpXSksICJcbiIpCn0KYGBgCgpgYGB7cn0KY29sX25hbWVzID0gbmFtZXMoaXJpcykKY29sX25hbWVzCmBgYAoKYGBge3J9IApmb3IgKGMgaW4gY29sX25hbWVzKSB7CiAgcHJpbnQoYykKfQpgYGAKCg==