Vector

# Create
score = c(100, 90, 60, 70, 85, 90)
age =c(30, 35, 60, 50, 32, 25)

# select
score[3]
[1] 60
age[5]
[1] 32
age[10]
[1] NA
length(score)
[1] 6
length(age)
[1] 6

Append element

age
[1] 30 35 60 50 32 25
age = c(age, 2000)
age
[1]   30   35   60   50   32   25 2000


score = c(score, 78)
score
[1] 100  90  60  70  85  90  78
age = c(40, age)
age
[1]   40   30   35   60   50   32   25 2000
age[1:4]
[1] 40 30 35 60
age[5:8]
[1]   50   32   25 2000
age = c(age[1:4], 45, age[5:8] )
age
[1]   40   30   35   60   45   50   32   25 2000

Select a vector segment

age[3:5]
[1] 35 60 45
score[4:10]
[1] 70 85 90 78 NA NA NA

Remove element

age = age[-8]
age
[1]   40   30   35   60   45   50   32 2000

Update

score
[1] 100  90  60  70  85  90  78
score[4] = 80
score
[1] 100  90  60  80  85  90  78

Conditional Selection

age
[1]   40   30   35   60   45   50   32 2000
age[ age < 50 ]
[1] 40 30 35 45 32
age[ age <= 50 ]
[1] 40 30 35 45 50 32
age[ age > 50 ]
[1]   60 2000
age[ age >= 50 ]
[1]   60   50 2000
# 40 <= age <=50
age_grater_40 = age[ age >= 40 ]
age_grater_40
[1]   40   60   45   50 2000
age_40_50 = age_grater_40[ age_grater_40 <= 50 ]
age_40_50
[1] 40 45 50
age_40_50_v2 = age[ age >= 40  & age <= 50]
age_40_50_v2
[1] 40 45 50

Condition

result = score >= 80
result
[1]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE

Simple stats

min(score)
[1] 60
max(score)
[1] 100
mean(score)
[1] 83.28571
sum(score)
[1] 583
median(score)
[1] 85
sd(score)
[1] 12.60574
var(score)
[1] 158.9048
quantile(score)
  0%  25%  50%  75% 100% 
  60   79   85   90  100 
mode(score)
[1] "numeric"

Correlation

score = c(score, c(100, 80, 20, 20))
age = c(age, c(30, 70, 75))
score
 [1] 100  90  60  80  85  90  78 100  80  20  20
age
 [1]   40   30   35   60   45   50   32 2000   30   70   75
score2 = c(10, 50, 30, 40)
age2 = c(20, 25, 30, 35)
cor(score2, age2)
[1] 0.5291503
cor(score, age)
[1] 0.2961703
cor(age, score)
[1] 0.2961703
# +: positive correlation 
# -: neg correlation

Data Frame


data = data.frame(
ID = c(1, 2, 3, 4, 5),
name = c("A", "S", "D", "P", "C"),
score = c(10, 50, 30, 40, 45),
age = c(20, 25, 30, 35, 32)
)

data

Select column

data$score
[1] 10 50 30 40 45
data$age
[1] 20 25 30 35 32

Selection: data[row : col]

data[2, ]
data[ ,2]
[1] "A" "S" "D" "P" "C"
data[ 2, 2]
[1] "S"
data[ , c(1, 3) ]
data[ , c(1, 3,4) ]
data[c(2, 4) , c(1,3,4) ]
data
data[ data$age <= 30 , ]

select students score greater than equal 40

data[ data$score >= 40 , ]
data[ data$score >= 40 , c(1, 2, 3) ]

Select students with score ge 40 and age ge 30. Show only name and score.

data[ data$score >= 40  & data$age >= 30, c(2, 3) ]

Ordering dataset

data[order(data$score) , ]
data[order(data$score, decreasing = TRUE) , ]

Add new col

dept = c("CS", "Bio", "Genetics", "Phy", "Sports")

data = cbind(data, dept)
data
NA

Column reorder

data = data[ , c(1, 2, 5, 3, 4)]
data

delete a column

data = data[ , c(1, 2, 3, 4)]
data
data
summary(data)
       ID        name               dept               score   
 Min.   :1   Length:5           Length:5           Min.   :10  
 1st Qu.:2   Class :character   Class :character   1st Qu.:30  
 Median :3   Mode  :character   Mode  :character   Median :40  
 Mean   :3                                         Mean   :35  
 3rd Qu.:4                                         3rd Qu.:45  
 Max.   :5                                         Max.   :50  
str(data)
'data.frame':   5 obs. of  4 variables:
 $ ID   : num  1 2 3 4 5
 $ name : chr  "A" "S" "D" "P" ...
 $ dept : chr  "CS" "Bio" "Genetics" "Phy" ...
 $ score: num  10 50 30 40 45

IRIS Dataset

iris = read.csv("iris.csv")
iris
str(iris)
'data.frame':   150 obs. of  5 variables:
 $ sepal.length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
 $ sepal.width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
 $ petal.length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 $ petal.width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 $ variety     : chr  "Setosa" "Setosa" "Setosa" "Setosa" ...
summary(iris)
  sepal.length    sepal.width     petal.length    petal.width      variety         
 Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100   Length:150        
 1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300   Class :character  
 Median :5.800   Median :3.000   Median :4.350   Median :1.300   Mode  :character  
 Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199                     
 3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800                     
 Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500                     

Scatter plot

library(ggplot2)

ggplot(iris, aes(x = sepal.length, y = sepal.width, color = variety)) +
  geom_point() +
  theme_minimal()

Boxplot

ggplot(iris, aes(y = sepal.length, x = variety, fill = variety)) +
  geom_boxplot()

Boxplot

ggplot(iris, aes(y = sepal.length, x = variety, fill = variety)) +
  geom_violin()

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICBodG1sX2RvY3VtZW50OgogICAgZGZfcHJpbnQ6IHBhZ2VkCiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0CiAgd29yZF9kb2N1bWVudDogZGVmYXVsdAotLS0KCiMgVmVjdG9yCgpgYGB7cn0KIyBDcmVhdGUKc2NvcmUgPSBjKDEwMCwgOTAsIDYwLCA3MCwgODUsIDkwKQphZ2UgPWMoMzAsIDM1LCA2MCwgNTAsIDMyLCAyNSkKCiMgc2VsZWN0CnNjb3JlWzNdCmFnZVs1XQoKYWdlWzEwXQpgYGAKCmBgYHtyfQpsZW5ndGgoc2NvcmUpCmxlbmd0aChhZ2UpCmBgYAoKIyMjIEFwcGVuZCBlbGVtZW50CmBgYHtyfQphZ2UKYWdlID0gYyhhZ2UsIDIwMDApCmFnZQoKYGBgCgpgYGB7cn0KCgpzY29yZSA9IGMoc2NvcmUsIDc4KQpzY29yZQpgYGAKCgpgYGB7cn0KYWdlID0gYyg0MCwgYWdlKQphZ2UKYGBgCgpgYGB7cn0KYWdlWzE6NF0KYWdlWzU6OF0KCmFnZSA9IGMoYWdlWzE6NF0sIDQ1LCBhZ2VbNTo4XSApCmFnZQpgYGAKCgojIyMgU2VsZWN0IGEgdmVjdG9yIHNlZ21lbnQKYGBge3J9CmFnZVszOjVdCgpzY29yZVs0OjEwXQpgYGAKCiMjIyBSZW1vdmUgZWxlbWVudCAKYGBge3J9CmFnZSA9IGFnZVstOF0KYWdlCmBgYAoKIyMjIFVwZGF0ZQoKYGBge3J9CnNjb3JlCnNjb3JlWzRdID0gODAKc2NvcmUKYGBgCgojIyMgQ29uZGl0aW9uYWwgU2VsZWN0aW9uCmBgYHtyfQphZ2UKYWdlWyBhZ2UgPCA1MCBdCmFnZVsgYWdlIDw9IDUwIF0KCmFnZVsgYWdlID4gNTAgXQphZ2VbIGFnZSA+PSA1MCBdCmBgYAoKCmBgYHtyfQojIDQwIDw9IGFnZSA8PTUwCmFnZV9ncmF0ZXJfNDAgPSBhZ2VbIGFnZSA+PSA0MCBdCmFnZV9ncmF0ZXJfNDAKCgphZ2VfNDBfNTAgPSBhZ2VfZ3JhdGVyXzQwWyBhZ2VfZ3JhdGVyXzQwIDw9IDUwIF0KYWdlXzQwXzUwCmBgYAoKCmBgYHtyfQphZ2VfNDBfNTBfdjIgPSBhZ2VbIGFnZSA+PSA0MCAgJiBhZ2UgPD0gNTBdCmFnZV80MF81MF92MgpgYGAKCgojIyMgQ29uZGl0aW9uCgpgYGB7cn0KcmVzdWx0ID0gc2NvcmUgPj0gODAKcmVzdWx0CmBgYAoKCiMjIyBTaW1wbGUgc3RhdHMKYGBge3J9Cm1pbihzY29yZSkKbWF4KHNjb3JlKQptZWFuKHNjb3JlKQpzdW0oc2NvcmUpCm1lZGlhbihzY29yZSkKc2Qoc2NvcmUpCnZhcihzY29yZSkKcXVhbnRpbGUoc2NvcmUpCm1vZGUoc2NvcmUpCmBgYAoKCiMjIyBDb3JyZWxhdGlvbgpgYGB7cn0Kc2NvcmUgPSBjKHNjb3JlLCBjKDEwMCwgODAsIDIwLCAyMCkpCmBgYAoKYGBge3J9CmFnZSA9IGMoYWdlLCBjKDMwLCA3MCwgNzUpKQpgYGAKCgpgYGB7cn0Kc2NvcmUKYWdlCmBgYAoKYGBge3J9CnNjb3JlMiA9IGMoMTAsIDUwLCAzMCwgNDApCmFnZTIgPSBjKDIwLCAyNSwgMzAsIDM1KQpjb3Ioc2NvcmUyLCBhZ2UyKQoKYGBgCgoKYGBge3J9CmNvcihzY29yZSwgYWdlKQpjb3IoYWdlLCBzY29yZSkKIyArOiBwb3NpdGl2ZSBjb3JyZWxhdGlvbiAKIyAtOiBuZWcgY29ycmVsYXRpb24KCmBgYAoKCgojIERhdGEgRnJhbWUKYGBge3J9CgpkYXRhID0gZGF0YS5mcmFtZSgKSUQgPSBjKDEsIDIsIDMsIDQsIDUpLApuYW1lID0gYygiQSIsICJTIiwgIkQiLCAiUCIsICJDIiksCnNjb3JlID0gYygxMCwgNTAsIDMwLCA0MCwgNDUpLAphZ2UgPSBjKDIwLCAyNSwgMzAsIDM1LCAzMikKKQoKZGF0YQpgYGAKCiMjIFNlbGVjdCBjb2x1bW4KYGBge3J9CmRhdGEkc2NvcmUKZGF0YSRhZ2UKYGBgCgojIyBTZWxlY3Rpb246IGRhdGFbcm93IDogY29sXQpgYGB7cn0KZGF0YVsyLCBdCmBgYAoKYGBge3J9CmRhdGFbICwyXQpgYGAKCgpgYGB7cn0KZGF0YVsgMiwgMl0KYGBgCgoKCmBgYHtyfQpkYXRhWyAsIGMoMSwgMykgXQpgYGAKCmBgYHtyfQpkYXRhWyAsIGMoMSwzLDQpIF0KYGBgCgoKYGBge3J9CmRhdGFbYygyLCA0KSAsIGMoMSwzLDQpIF0KYGBgCgoKYGBge3J9CmRhdGEKYGBgCgpgYGB7cn0KZGF0YVsgZGF0YSRhZ2UgPD0gMzAgLCBdCmBgYAoKIyMjIHNlbGVjdCBzdHVkZW50cyBzY29yZSBncmVhdGVyIHRoYW4gZXF1YWwgNDAKYGBge3J9CmRhdGFbIGRhdGEkc2NvcmUgPj0gNDAgLCBdCmBgYAoKCmBgYHtyfQpkYXRhWyBkYXRhJHNjb3JlID49IDQwICwgYygxLCAyLCAzKSBdCmBgYAoKIyMjIyBTZWxlY3Qgc3R1ZGVudHMgd2l0aCBzY29yZSBnZSA0MCBhbmQgYWdlIGdlIDMwLiBTaG93IG9ubHkgbmFtZSBhbmQgc2NvcmUuCmBgYHtyfQpkYXRhWyBkYXRhJHNjb3JlID49IDQwICAmIGRhdGEkYWdlID49IDMwLCBjKDIsIDMpIF0KYGBgCgojIyMgT3JkZXJpbmcgZGF0YXNldApgYGB7cn0KZGF0YVtvcmRlcihkYXRhJHNjb3JlKSAsIF0KYGBgCgpgYGB7cn0KZGF0YVtvcmRlcihkYXRhJHNjb3JlLCBkZWNyZWFzaW5nID0gVFJVRSkgLCBdCmBgYAoKIyMjIEFkZCBuZXcgY29sCmBgYHtyfQpkZXB0ID0gYygiQ1MiLCAiQmlvIiwgIkdlbmV0aWNzIiwgIlBoeSIsICJTcG9ydHMiKQoKZGF0YSA9IGNiaW5kKGRhdGEsIGRlcHQpICNyYmluZCBmb3IgYWRkaW5nIG5ldyByb3cKZGF0YQoKYGBgCiMjIyBDb2x1bW4gcmVvcmRlcgpgYGB7cn0KZGF0YSA9IGRhdGFbICwgYygxLCAyLCA1LCAzLCA0KV0KZGF0YQpgYGAKCgojIyMjIGRlbGV0ZSBhIGNvbHVtbgpgYGB7cn0KZGF0YSA9IGRhdGFbICwgYygxLCAyLCAzLCA0KV0KZGF0YQpgYGAKCgpgYGB7cn0KZGF0YQpgYGAKCmBgYHtyfQpzdW1tYXJ5KGRhdGEpCmBgYAoKYGBge3J9CnN0cihkYXRhKQpgYGAKIyBJUklTIERhdGFzZXQKYGBge3J9CmlyaXMgPSByZWFkLmNzdigiaXJpcy5jc3YiKQppcmlzCmBgYAoKCmBgYHtyfQpzdHIoaXJpcykKYGBgCgpgYGB7cn0Kc3VtbWFyeShpcmlzKQpgYGAKCiMjIFNjYXR0ZXIgcGxvdApgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQoKZ2dwbG90KGlyaXMsIGFlcyh4ID0gc2VwYWwubGVuZ3RoLCB5ID0gc2VwYWwud2lkdGgsIGNvbG9yID0gdmFyaWV0eSkpICsKICBnZW9tX3BvaW50KCkgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCgojIyMgQm94cGxvdApgYGB7cn0KZ2dwbG90KGlyaXMsIGFlcyh5ID0gc2VwYWwubGVuZ3RoLCB4ID0gdmFyaWV0eSwgZmlsbCA9IHZhcmlldHkpKSArCiAgZ2VvbV9ib3hwbG90KCkKYGBgCgojIyMgQm94cGxvdApgYGB7cn0KZ2dwbG90KGlyaXMsIGFlcyh5ID0gc2VwYWwubGVuZ3RoLCB4ID0gdmFyaWV0eSwgZmlsbCA9IHZhcmlldHkpKSArCiAgZ2VvbV92aW9saW4oKQpgYGAKCg==