Data frame
students <- data.frame(
Name = c("john","grace","kiptoo","dan","collo"),
Score = c(56,87,97,47,98),
Age = c(20,21,18,19,22)
)
str(students)
## 'data.frame': 5 obs. of 3 variables:
## $ Name : chr "john" "grace" "kiptoo" "dan" ...
## $ Score: num 56 87 97 47 98
## $ Age : num 20 21 18 19 22
View(students)
summary(students)
## Name Score Age
## Length:5 Min. :47 Min. :18
## Class :character 1st Qu.:56 1st Qu.:19
## Mode :character Median :87 Median :20
## Mean :77 Mean :20
## 3rd Qu.:97 3rd Qu.:21
## Max. :98 Max. :22
print(students)
## Name Score Age
## 1 john 56 20
## 2 grace 87 21
## 3 kiptoo 97 18
## 4 dan 47 19
## 5 collo 98 22
Handling missing errors
studentss <- data.frame(
Name = c("john","grace","kiptoo","dan","collo"),
Score = c(56,NA,97,47,NA),
Age = c(20,21,NA,19,22))
is.na(studentss) # shows true where values are missing
## Name Score Age
## [1,] FALSE FALSE FALSE
## [2,] FALSE TRUE FALSE
## [3,] FALSE FALSE TRUE
## [4,] FALSE FALSE FALSE
## [5,] FALSE TRUE FALSE
sum(is.na(studentss)) # counting the number of students with missing data
## [1] 3
na.omit(studentss) #ommiting students with missing data
## Name Score Age
## 1 john 56 20
## 4 dan 47 19
studentss$Score[is.na(studentss$Score)] <- 50
#studentss$Score[is.na(studentss$Score)] <- 65
studentss$Age[is.na(studentss$Age)] <- 20
studentss
## Name Score Age
## 1 john 56 20
## 2 grace 50 21
## 3 kiptoo 97 20
## 4 dan 47 19
## 5 collo 50 22
studentss$Score[is.na(studentss$Score)] <- mean(studentss$Score, na.rm = (TRUE))
studentss
## Name Score Age
## 1 john 56 20
## 2 grace 50 21
## 3 kiptoo 97 20
## 4 dan 47 19
## 5 collo 50 22
Filtering data
students <- data.frame(
Name = c("john","grace","kiptoo","dan","collo"),
Score = c(56,87,97,47,98),
Age = c(20,21,18,19,22)
)
filtered_data <- students[students$Age > 20,]
filtered_data
## Name Score Age
## 2 grace 87 21
## 5 collo 98 22
Sorting data by score in descending order
students <- data.frame(
Name = c("john","grace","kiptoo","dan","collo"),
Score = c(56,87,97,47,98),
Age = c(20,21,18,19,22)
)
students[order(-students$Score),]
## Name Score Age
## 5 collo 98 22
## 3 kiptoo 97 18
## 2 grace 87 21
## 1 john 56 20
## 4 dan 47 19