Data
ID<-c(16762,16439,16211,16790,16443,16998,
16543,16779,16945,16111,16224,16980,
16779,16000,16111,16224,16400,16327)
Name<-c(NA,NA,"Ibraheem","Fahd",
"Majeda",NA,"Mohammed","Remas",
"Rteel","Abdalrhman",NA,"Tala",
"Remas","Nadiah",NA,"Mhdi",
"Lila",NA)
Age<-c(30,NA,29,NA,27,9,32,9,NA,29,28,9,
9,30,NA,28,42,NA )
Sex<-c("M","M","M","M","F","F","M","F",
"F","M","M","F","F","F","M","M",
"F","F")
data<-data.frame(ID,Name,Age,Sex)
Test the missing values
is.na(data)
## ID Name Age Sex
## [1,] FALSE TRUE FALSE FALSE
## [2,] FALSE TRUE TRUE FALSE
## [3,] FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE TRUE FALSE
## [5,] FALSE FALSE FALSE FALSE
## [6,] FALSE TRUE FALSE FALSE
## [7,] FALSE FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE TRUE FALSE
## [10,] FALSE FALSE FALSE FALSE
## [11,] FALSE TRUE FALSE FALSE
## [12,] FALSE FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE FALSE
## [15,] FALSE TRUE TRUE FALSE
## [16,] FALSE FALSE FALSE FALSE
## [17,] FALSE FALSE FALSE FALSE
## [18,] FALSE TRUE TRUE FALSE
#Identify count of missing values
sum(is.na(data))
## [1] 11
#Identify mean of missing values
mean(is.na(data))
## [1] 0.1527778
#list rows of data that have missing values
data[!complete.cases(data),]
## ID Name Age Sex
## 1 16762 <NA> 30 M
## 2 16439 <NA> NA M
## 4 16790 Fahd NA M
## 6 16998 <NA> 9 F
## 9 16945 Rteel NA F
## 11 16224 <NA> 28 M
## 15 16111 <NA> NA M
## 18 16327 <NA> NA F
#list rows of data that no have missing values
data[complete.cases(data),]
## ID Name Age Sex
## 3 16211 Ibraheem 29 M
## 5 16443 Majeda 27 F
## 7 16543 Mohammed 32 M
## 8 16779 Remas 9 F
## 10 16111 Abdalrhman 29 M
## 12 16980 Tala 9 F
## 13 16779 Remas 9 F
## 14 16000 Nadiah 30 F
## 16 16224 Mhdi 28 M
## 17 16400 Lila 42 F
Data
ID<-c(16762,16439,16211,16790,16443,16998,
16543,16779,16945,16111,16224,16980,
16779,16000,16111,16224,16400,16327)
Name<-c(NA,NA,"Ibraheem","Fahd",
"Majeda",NA,"Mohammed","Remas",
"Rteel","Abdalrhman",NA,"Tala",
"Remas","Nadiah",NA,"Mhdi",
"Lila",NA)
Age<-c(30,NA,29,NA,27,9,32,9,NA,29,28,9,
9,30,NA,28,42,NA )
Sex<-c("M","M","M","M","F","F","M","F",
"F","M","M","F","F","F","M","M",
"F","F")
data<-data.frame(ID,Name,Age,Sex)
1)Replace the missing values with zero
data[is.na(data)] = 0
data
## ID Name Age Sex
## 1 16762 0 30 M
## 2 16439 0 0 M
## 3 16211 Ibraheem 29 M
## 4 16790 Fahd 0 M
## 5 16443 Majeda 27 F
## 6 16998 0 9 F
## 7 16543 Mohammed 32 M
## 8 16779 Remas 9 F
## 9 16945 Rteel 0 F
## 10 16111 Abdalrhman 29 M
## 11 16224 0 28 M
## 12 16980 Tala 9 F
## 13 16779 Remas 9 F
## 14 16000 Nadiah 30 F
## 15 16111 0 0 M
## 16 16224 Mhdi 28 M
## 17 16400 Lila 42 F
## 18 16327 0 0 F
Data
ID<-c(16762,16439,16211,16790,16443,16998,
16543,16779,16945,16111,16224,16980,
16779,16000,16111,16224,16400,16327)
Name<-c(NA,NA,"Ibraheem","Fahd",
"Majeda",NA,"Mohammed","Remas",
"Rteel","Abdalrhman",NA,"Tala",
"Remas","Nadiah",NA,"Mhdi",
"Lila",NA)
Age<-c(30,NA,29,NA,27,9,32,9,NA,29,28,9,
9,30,NA,28,42,NA )
Sex<-c("M","M","M","M","F","F","M","F",
"F","M","M","F","F","F","M","M",
"F","F")
data<-data.frame(ID,Name,Age,Sex)
2)Replace the missing values with column mean
data$Age[is.na(data$Age)]<-mean(data$Age,na.rm=TRUE)
data$Age
## [1] 30.00000 23.92308 29.00000 23.92308 27.00000 9.00000 32.00000 9.00000
## [9] 23.92308 29.00000 28.00000 9.00000 9.00000 30.00000 23.92308 28.00000
## [17] 42.00000 23.92308
Data
ID<-c(16762,16439,16211,16790,16443,16998,
16543,16779,16945,16111,16224,16980,
16779,16000,16111,16224,16400,16327)
Name<-c(NA,NA,"Ibraheem","Fahd",
"Majeda",NA,"Mohammed","Remas",
"Rteel","Abdalrhman",NA,"Tala",
"Remas","Nadiah",NA,"Mhdi",
"Lila",NA)
Age<-c(30,NA,29,NA,27,9,32,9,NA,29,28,9,
9,30,NA,28,42,NA )
Sex<-c("M","M","M","M","F","F","M","F",
"F","M","M","F","F","F","M","M",
"F","F")
data<-data.frame(ID,Name,Age,Sex)
3)Remove all missing values
na.omit(data)
## ID Name Age Sex
## 3 16211 Ibraheem 29 M
## 5 16443 Majeda 27 F
## 7 16543 Mohammed 32 M
## 8 16779 Remas 9 F
## 10 16111 Abdalrhman 29 M
## 12 16980 Tala 9 F
## 13 16779 Remas 9 F
## 14 16000 Nadiah 30 F
## 16 16224 Mhdi 28 M
## 17 16400 Lila 42 F