Vector
# Create
score = c(100, 90, 60, 70, 85, 90)
age =c(30, 35, 60, 50, 32, 25)
# select
score[3]
[1] 60
age[5]
[1] 32
age[10]
[1] NA
length(score)
[1] 6
length(age)
[1] 6
Append element
age
[1] 30 35 60 50 32 25
age = c(age, 2000)
age
[1] 30 35 60 50 32 25 2000
score = c(score, 78)
score
[1] 100 90 60 70 85 90 78
age = c(40, age)
age
[1] 40 30 35 60 50 32 25 2000
age[1:4]
[1] 40 30 35 60
age[5:8]
[1] 50 32 25 2000
age = c(age[1:4], 45, age[5:8] )
age
[1] 40 30 35 60 45 50 32 25 2000
Select a vector segment
age[3:5]
[1] 35 60 45
score[4:10]
[1] 70 85 90 78 NA NA NA
Remove element
age = age[-8]
age
[1] 40 30 35 60 45 50 32 2000
Update
score
[1] 100 90 60 70 85 90 78
score[4] = 80
score
[1] 100 90 60 80 85 90 78
Conditional Selection
age
[1] 40 30 35 60 45 50 32 2000
age[ age < 50 ]
[1] 40 30 35 45 32
age[ age <= 50 ]
[1] 40 30 35 45 50 32
age[ age > 50 ]
[1] 60 2000
age[ age >= 50 ]
[1] 60 50 2000
# 40 <= age <=50
age_grater_40 = age[ age >= 40 ]
age_grater_40
[1] 40 60 45 50 2000
age_40_50 = age_grater_40[ age_grater_40 <= 50 ]
age_40_50
[1] 40 45 50
age_40_50_v2 = age[ age >= 40 & age <= 50]
age_40_50_v2
[1] 40 45 50
Condition
result = score >= 80
result
[1] TRUE TRUE FALSE TRUE TRUE TRUE FALSE
Simple stats
min(score)
[1] 60
max(score)
[1] 100
mean(score)
[1] 83.28571
sum(score)
[1] 583
median(score)
[1] 85
sd(score)
[1] 12.60574
var(score)
[1] 158.9048
quantile(score)
0% 25% 50% 75% 100%
60 79 85 90 100
mode(score)
[1] "numeric"
Correlation
score = c(score, c(100, 80, 20, 20))
age = c(age, c(30, 70, 75))
score
[1] 100 90 60 80 85 90 78 100 80 20 20
age
[1] 40 30 35 60 45 50 32 2000 30 70 75
score2 = c(10, 50, 30, 40)
age2 = c(20, 25, 30, 35)
cor(score2, age2)
[1] 0.5291503
cor(score, age)
[1] 0.2961703
cor(age, score)
[1] 0.2961703
# +: positive correlation
# -: neg correlation
Data Frame
data = data.frame(
ID = c(1, 2, 3, 4, 5),
name = c("A", "S", "D", "P", "C"),
score = c(10, 50, 30, 40, 45),
age = c(20, 25, 30, 35, 32)
)
data
Select column
data$score
[1] 10 50 30 40 45
data$age
[1] 20 25 30 35 32
Selection: data[row : col]
data[2, ]
data[ ,2]
[1] "A" "S" "D" "P" "C"
data[ 2, 2]
[1] "S"
data[ , c(1, 3) ]
data[ , c(1, 3,4) ]
data[c(2, 4) , c(1,3,4) ]
data
data[ data$age <= 30 , ]
select students score greater than equal 40
data[ data$score >= 40 , ]
data[ data$score >= 40 , c(1, 2, 3) ]
Select students with score ge 40 and age ge 30. Show only name and
score.
data[ data$score >= 40 & data$age >= 30, c(2, 3) ]
Ordering dataset
data[order(data$score) , ]
data[order(data$score, decreasing = TRUE) , ]
Add new col
dept = c("CS", "Bio", "Genetics", "Phy", "Sports")
data = cbind(data, dept)
data
NA
Column reorder
data = data[ , c(1, 2, 5, 3, 4)]
data
delete a column
data = data[ , c(1, 2, 3, 4)]
data
data
summary(data)
ID name dept score
Min. :1 Length:5 Length:5 Min. :10
1st Qu.:2 Class :character Class :character 1st Qu.:30
Median :3 Mode :character Mode :character Median :40
Mean :3 Mean :35
3rd Qu.:4 3rd Qu.:45
Max. :5 Max. :50
str(data)
'data.frame': 5 obs. of 4 variables:
$ ID : num 1 2 3 4 5
$ name : chr "A" "S" "D" "P" ...
$ dept : chr "CS" "Bio" "Genetics" "Phy" ...
$ score: num 10 50 30 40 45
IRIS Dataset
iris = read.csv("iris.csv")
iris
str(iris)
'data.frame': 150 obs. of 5 variables:
$ sepal.length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ sepal.width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ petal.length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ petal.width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ variety : chr "Setosa" "Setosa" "Setosa" "Setosa" ...
summary(iris)
sepal.length sepal.width petal.length petal.width variety
Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100 Length:150
1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300 Class :character
Median :5.800 Median :3.000 Median :4.350 Median :1.300 Mode :character
Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
Scatter plot
library(ggplot2)
ggplot(iris, aes(x = sepal.length, y = sepal.width, color = variety)) +
geom_point() +
theme_minimal()

Boxplot
ggplot(iris, aes(y = sepal.length, x = variety, fill = variety)) +
geom_boxplot()

Boxplot
ggplot(iris, aes(y = sepal.length, x = variety, fill = variety)) +
geom_violin()

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICBodG1sX2RvY3VtZW50OgogICAgZGZfcHJpbnQ6IHBhZ2VkCiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0CiAgd29yZF9kb2N1bWVudDogZGVmYXVsdAotLS0KCiMgVmVjdG9yCgpgYGB7cn0KIyBDcmVhdGUKc2NvcmUgPSBjKDEwMCwgOTAsIDYwLCA3MCwgODUsIDkwKQphZ2UgPWMoMzAsIDM1LCA2MCwgNTAsIDMyLCAyNSkKCiMgc2VsZWN0CnNjb3JlWzNdCmFnZVs1XQoKYWdlWzEwXQpgYGAKCmBgYHtyfQpsZW5ndGgoc2NvcmUpCmxlbmd0aChhZ2UpCmBgYAoKIyMjIEFwcGVuZCBlbGVtZW50CmBgYHtyfQphZ2UKYWdlID0gYyhhZ2UsIDIwMDApCmFnZQoKYGBgCgpgYGB7cn0KCgpzY29yZSA9IGMoc2NvcmUsIDc4KQpzY29yZQpgYGAKCgpgYGB7cn0KYWdlID0gYyg0MCwgYWdlKQphZ2UKYGBgCgpgYGB7cn0KYWdlWzE6NF0KYWdlWzU6OF0KCmFnZSA9IGMoYWdlWzE6NF0sIDQ1LCBhZ2VbNTo4XSApCmFnZQpgYGAKCgojIyMgU2VsZWN0IGEgdmVjdG9yIHNlZ21lbnQKYGBge3J9CmFnZVszOjVdCgpzY29yZVs0OjEwXQpgYGAKCiMjIyBSZW1vdmUgZWxlbWVudCAKYGBge3J9CmFnZSA9IGFnZVstOF0KYWdlCmBgYAoKIyMjIFVwZGF0ZQoKYGBge3J9CnNjb3JlCnNjb3JlWzRdID0gODAKc2NvcmUKYGBgCgojIyMgQ29uZGl0aW9uYWwgU2VsZWN0aW9uCmBgYHtyfQphZ2UKYWdlWyBhZ2UgPCA1MCBdCmFnZVsgYWdlIDw9IDUwIF0KCmFnZVsgYWdlID4gNTAgXQphZ2VbIGFnZSA+PSA1MCBdCmBgYAoKCmBgYHtyfQojIDQwIDw9IGFnZSA8PTUwCmFnZV9ncmF0ZXJfNDAgPSBhZ2VbIGFnZSA+PSA0MCBdCmFnZV9ncmF0ZXJfNDAKCgphZ2VfNDBfNTAgPSBhZ2VfZ3JhdGVyXzQwWyBhZ2VfZ3JhdGVyXzQwIDw9IDUwIF0KYWdlXzQwXzUwCmBgYAoKCmBgYHtyfQphZ2VfNDBfNTBfdjIgPSBhZ2VbIGFnZSA+PSA0MCAgJiBhZ2UgPD0gNTBdCmFnZV80MF81MF92MgpgYGAKCgojIyMgQ29uZGl0aW9uCgpgYGB7cn0KcmVzdWx0ID0gc2NvcmUgPj0gODAKcmVzdWx0CmBgYAoKCiMjIyBTaW1wbGUgc3RhdHMKYGBge3J9Cm1pbihzY29yZSkKbWF4KHNjb3JlKQptZWFuKHNjb3JlKQpzdW0oc2NvcmUpCm1lZGlhbihzY29yZSkKc2Qoc2NvcmUpCnZhcihzY29yZSkKcXVhbnRpbGUoc2NvcmUpCm1vZGUoc2NvcmUpCmBgYAoKCiMjIyBDb3JyZWxhdGlvbgpgYGB7cn0Kc2NvcmUgPSBjKHNjb3JlLCBjKDEwMCwgODAsIDIwLCAyMCkpCmBgYAoKYGBge3J9CmFnZSA9IGMoYWdlLCBjKDMwLCA3MCwgNzUpKQpgYGAKCgpgYGB7cn0Kc2NvcmUKYWdlCmBgYAoKYGBge3J9CnNjb3JlMiA9IGMoMTAsIDUwLCAzMCwgNDApCmFnZTIgPSBjKDIwLCAyNSwgMzAsIDM1KQpjb3Ioc2NvcmUyLCBhZ2UyKQoKYGBgCgoKYGBge3J9CmNvcihzY29yZSwgYWdlKQpjb3IoYWdlLCBzY29yZSkKIyArOiBwb3NpdGl2ZSBjb3JyZWxhdGlvbiAKIyAtOiBuZWcgY29ycmVsYXRpb24KCmBgYAoKCgojIERhdGEgRnJhbWUKYGBge3J9CgpkYXRhID0gZGF0YS5mcmFtZSgKSUQgPSBjKDEsIDIsIDMsIDQsIDUpLApuYW1lID0gYygiQSIsICJTIiwgIkQiLCAiUCIsICJDIiksCnNjb3JlID0gYygxMCwgNTAsIDMwLCA0MCwgNDUpLAphZ2UgPSBjKDIwLCAyNSwgMzAsIDM1LCAzMikKKQoKZGF0YQpgYGAKCiMjIFNlbGVjdCBjb2x1bW4KYGBge3J9CmRhdGEkc2NvcmUKZGF0YSRhZ2UKYGBgCgojIyBTZWxlY3Rpb246IGRhdGFbcm93IDogY29sXQpgYGB7cn0KZGF0YVsyLCBdCmBgYAoKYGBge3J9CmRhdGFbICwyXQpgYGAKCgpgYGB7cn0KZGF0YVsgMiwgMl0KYGBgCgoKCmBgYHtyfQpkYXRhWyAsIGMoMSwgMykgXQpgYGAKCmBgYHtyfQpkYXRhWyAsIGMoMSwzLDQpIF0KYGBgCgoKYGBge3J9CmRhdGFbYygyLCA0KSAsIGMoMSwzLDQpIF0KYGBgCgoKYGBge3J9CmRhdGEKYGBgCgpgYGB7cn0KZGF0YVsgZGF0YSRhZ2UgPD0gMzAgLCBdCmBgYAoKIyMjIHNlbGVjdCBzdHVkZW50cyBzY29yZSBncmVhdGVyIHRoYW4gZXF1YWwgNDAKYGBge3J9CmRhdGFbIGRhdGEkc2NvcmUgPj0gNDAgLCBdCmBgYAoKCmBgYHtyfQpkYXRhWyBkYXRhJHNjb3JlID49IDQwICwgYygxLCAyLCAzKSBdCmBgYAoKIyMjIyBTZWxlY3Qgc3R1ZGVudHMgd2l0aCBzY29yZSBnZSA0MCBhbmQgYWdlIGdlIDMwLiBTaG93IG9ubHkgbmFtZSBhbmQgc2NvcmUuCmBgYHtyfQpkYXRhWyBkYXRhJHNjb3JlID49IDQwICAmIGRhdGEkYWdlID49IDMwLCBjKDIsIDMpIF0KYGBgCgojIyMgT3JkZXJpbmcgZGF0YXNldApgYGB7cn0KZGF0YVtvcmRlcihkYXRhJHNjb3JlKSAsIF0KYGBgCgpgYGB7cn0KZGF0YVtvcmRlcihkYXRhJHNjb3JlLCBkZWNyZWFzaW5nID0gVFJVRSkgLCBdCmBgYAoKIyMjIEFkZCBuZXcgY29sCmBgYHtyfQpkZXB0ID0gYygiQ1MiLCAiQmlvIiwgIkdlbmV0aWNzIiwgIlBoeSIsICJTcG9ydHMiKQoKZGF0YSA9IGNiaW5kKGRhdGEsIGRlcHQpICNyYmluZCBmb3IgYWRkaW5nIG5ldyByb3cKZGF0YQoKYGBgCiMjIyBDb2x1bW4gcmVvcmRlcgpgYGB7cn0KZGF0YSA9IGRhdGFbICwgYygxLCAyLCA1LCAzLCA0KV0KZGF0YQpgYGAKCgojIyMjIGRlbGV0ZSBhIGNvbHVtbgpgYGB7cn0KZGF0YSA9IGRhdGFbICwgYygxLCAyLCAzLCA0KV0KZGF0YQpgYGAKCgpgYGB7cn0KZGF0YQpgYGAKCmBgYHtyfQpzdW1tYXJ5KGRhdGEpCmBgYAoKYGBge3J9CnN0cihkYXRhKQpgYGAKIyBJUklTIERhdGFzZXQKYGBge3J9CmlyaXMgPSByZWFkLmNzdigiaXJpcy5jc3YiKQppcmlzCmBgYAoKCmBgYHtyfQpzdHIoaXJpcykKYGBgCgpgYGB7cn0Kc3VtbWFyeShpcmlzKQpgYGAKCiMjIFNjYXR0ZXIgcGxvdApgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQoKZ2dwbG90KGlyaXMsIGFlcyh4ID0gc2VwYWwubGVuZ3RoLCB5ID0gc2VwYWwud2lkdGgsIGNvbG9yID0gdmFyaWV0eSkpICsKICBnZW9tX3BvaW50KCkgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCgojIyMgQm94cGxvdApgYGB7cn0KZ2dwbG90KGlyaXMsIGFlcyh5ID0gc2VwYWwubGVuZ3RoLCB4ID0gdmFyaWV0eSwgZmlsbCA9IHZhcmlldHkpKSArCiAgZ2VvbV9ib3hwbG90KCkKYGBgCgojIyMgQm94cGxvdApgYGB7cn0KZ2dwbG90KGlyaXMsIGFlcyh5ID0gc2VwYWwubGVuZ3RoLCB4ID0gdmFyaWV0eSwgZmlsbCA9IHZhcmlldHkpKSArCiAgZ2VvbV92aW9saW4oKQpgYGAKCg==