#vector
#create
score = c(100, 90, 60, 70, 85, 90)
age = c(30, 35, 60, 50, 32, 25)
#select
score[3]
## [1] 60
age[5]
## [1] 32
age[10]
## [1] NA
length(score)
## [1] 6
length(age)
## [1] 6
age= c(age, 39)
age
## [1] 30 35 60 50 32 25 39
score = c(score, 78)
score
## [1] 100 90 60 70 85 90 78
age= c(40, age)
age
## [1] 40 30 35 60 50 32 25 39
age[1:4]
## [1] 40 30 35 60
age[5:8]
## [1] 50 32 25 39
age =c(age[1:4], 45, age[5:8])
age
## [1] 40 30 35 60 45 50 32 25 39
age[3:5]
## [1] 35 60 45
score[4:10]
## [1] 70 85 90 78 NA NA NA
###Remove element
age = age[-8]
age
## [1] 40 30 35 60 45 50 32 39
###Update
score
## [1] 100 90 60 70 85 90 78
score[4]= 80
score
## [1] 100 90 60 80 85 90 78
#conditional selection
age_less_50 = age[age<50]
age_less_50
## [1] 40 30 35 45 32 39
age_less_equal_50 = age[age<=50]
age_less_equal_50
## [1] 40 30 35 45 50 32 39
age_more_equal_50 = age[age>=50]
age_more_equal_50
## [1] 60 50
#40<=age<=50
age_greater_40 = age[age>=40]
age_greater_40
## [1] 40 60 45 50
age_40_50 = age_greater_40[age_greater_40<=50]
age_40_50
## [1] 40 45 50
age_40_50_v2 = age[age>=40 & age <=50]
age_40_50_v2
## [1] 40 45 50
###condition
result = score >=80
result
## [1] TRUE TRUE FALSE TRUE TRUE TRUE FALSE
min(score)
## [1] 60
max(score)
## [1] 100
mean(score)
## [1] 83.28571
sum(score)
## [1] 583
median(score)
## [1] 85
sd(score)
## [1] 12.60574
var(score)
## [1] 158.9048
quantile(score)
## 0% 25% 50% 75% 100%
## 60 79 85 90 100
###Correlation
score = c(score, c(100, 80, 20, 20))
score
## [1] 100 90 60 80 85 90 78 100 80 20 20
age = c(age,c(30, 70, 75))
age
## [1] 40 30 35 60 45 50 32 39 30 70 75
score
## [1] 100 90 60 80 85 90 78 100 80 20 20
age
## [1] 40 30 35 60 45 50 32 39 30 70 75
cor(score, age)
## [1] -0.7311604
cor(age, score)
## [1] -0.7311604
#Data Frame
data= data.frame(
ID = c(1, 2, 3, 4, 5),
name = c("A", "B", "S", "D", "P"),
score = c(100, 80, 20, 20, 30),
age = c(30, 35, 60, 50, 60)
)
data
#Select column
data$score
## [1] 100 80 20 20 30
data$age
## [1] 30 35 60 50 60
data[2,]
data[ ,2]
## [1] "A" "B" "S" "D" "P"
data[2,2]
## [1] "B"
data[ ,c(3,4)]
data[ ,c(1,3,4)]
data[c(2,4) ,c(1,3,4)]
data[data$age <=50, ]
data[data$score >=40, c(1, 2, 3) ]
data[data$score >=40 & data$age>=30,c(2,3)]
###Ordering dataset
data[order(data$age),]
data[order(data$score, decreasing=TRUE),]
dept = c("CS", "Bio","Genetices", "Phy", "Sports")
data = cbind(data,dept) #rbind for adding new row
data
summary(data)
## ID name score age dept
## Min. :1 Length:5 Min. : 20 Min. :30 Length:5
## 1st Qu.:2 Class :character 1st Qu.: 20 1st Qu.:35 Class :character
## Median :3 Mode :character Median : 30 Median :50 Mode :character
## Mean :3 Mean : 50 Mean :47
## 3rd Qu.:4 3rd Qu.: 80 3rd Qu.:60
## Max. :5 Max. :100 Max. :60
str(data)
## 'data.frame': 5 obs. of 5 variables:
## $ ID : num 1 2 3 4 5
## $ name : chr "A" "B" "S" "D" ...
## $ score: num 100 80 20 20 30
## $ age : num 30 35 60 50 60
## $ dept : chr "CS" "Bio" "Genetices" "Phy" ...
#IRIS Dataset
iris=read.csv("iris.csv")
iris
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ sepal.length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ sepal.width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ petal.length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ petal.width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ variety : chr "Setosa" "Setosa" "Setosa" "Setosa" ...
summary(iris)
## sepal.length sepal.width petal.length petal.width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## variety
## Length:150
## Class :character
## Mode :character
##
##
##
library(ggplot2)
ggplot(iris,aes (x=sepal.length,y = sepal.width, color = variety))+geom_point()
###Boxplot
ggplot(iris,aes (y =sepal.length,x= sepal.width, color = variety,fill =variety))+geom_boxplot()
###Boxplot
ggplot(iris,aes (y =sepal.length, x= sepal.width, color = variety, fill =variety))+geom_violin()
## Warning: `position_dodge()` requires non-overlapping x intervals