Data frame

students <- data.frame(
  Name = c("john","grace","kiptoo","dan","collo"),
  Score = c(56,87,97,47,98),
  Age = c(20,21,18,19,22)
  )
str(students)
## 'data.frame':    5 obs. of  3 variables:
##  $ Name : chr  "john" "grace" "kiptoo" "dan" ...
##  $ Score: num  56 87 97 47 98
##  $ Age  : num  20 21 18 19 22
View(students)
summary(students)
##      Name               Score         Age    
##  Length:5           Min.   :47   Min.   :18  
##  Class :character   1st Qu.:56   1st Qu.:19  
##  Mode  :character   Median :87   Median :20  
##                     Mean   :77   Mean   :20  
##                     3rd Qu.:97   3rd Qu.:21  
##                     Max.   :98   Max.   :22
print(students)
##     Name Score Age
## 1   john    56  20
## 2  grace    87  21
## 3 kiptoo    97  18
## 4    dan    47  19
## 5  collo    98  22

Handling missing errors

studentss <- data.frame(
  Name = c("john","grace","kiptoo","dan","collo"),
  Score = c(56,NA,97,47,NA),
  Age = c(20,21,NA,19,22))
is.na(studentss) # shows true where values are missing
##       Name Score   Age
## [1,] FALSE FALSE FALSE
## [2,] FALSE  TRUE FALSE
## [3,] FALSE FALSE  TRUE
## [4,] FALSE FALSE FALSE
## [5,] FALSE  TRUE FALSE
sum(is.na(studentss)) # counting the number of students with missing data
## [1] 3
na.omit(studentss) #ommiting students with missing data
##   Name Score Age
## 1 john    56  20
## 4  dan    47  19
studentss$Score[is.na(studentss$Score)] <- 50
#studentss$Score[is.na(studentss$Score)] <- 65
studentss$Age[is.na(studentss$Age)] <- 20 
studentss
##     Name Score Age
## 1   john    56  20
## 2  grace    50  21
## 3 kiptoo    97  20
## 4    dan    47  19
## 5  collo    50  22
studentss$Score[is.na(studentss$Score)] <- mean(studentss$Score, na.rm = (TRUE))
studentss
##     Name Score Age
## 1   john    56  20
## 2  grace    50  21
## 3 kiptoo    97  20
## 4    dan    47  19
## 5  collo    50  22

Filtering data

students <- data.frame(
  Name = c("john","grace","kiptoo","dan","collo"),
  Score = c(56,87,97,47,98),
  Age = c(20,21,18,19,22)
  )
filtered_data <- students[students$Age > 20,]
filtered_data
##    Name Score Age
## 2 grace    87  21
## 5 collo    98  22

Sorting data by score in descending order

students <- data.frame(
  Name = c("john","grace","kiptoo","dan","collo"),
  Score = c(56,87,97,47,98),
  Age = c(20,21,18,19,22)
  )
students[order(-students$Score),]
##     Name Score Age
## 5  collo    98  22
## 3 kiptoo    97  18
## 2  grace    87  21
## 1   john    56  20
## 4    dan    47  19