1. Cơ chế khuyết dữ liệu
Cơ chế khuyết dữ liệu
Thực hành xác định cơ chế gây khuyết
head(attenu)
Lệnh anyNA(dataset)
anyNA(attenu)
[1] TRUE
Lệnh is.na(dataset)
colSums(is.na(attenu))
event mag station dist accel
0 0 16 0 0
# TRUE = 1
# FALSE = 0
TRUE + TRUE # 1 + 1
[1] 2
TRUE + 10 # 1 + 10
[1] 11
FALSE * 10 # 0 * 10
[1] 0
sum(rowSums(is.na(attenu)) > 0) # bao nhiêu quan sát bị khuyết
[1] 16
# case = obs
# bao nhiêu quan sát khuyết
sum(!complete.cases(attenu))
[1] 16
Package visdat
vis_dat(dataset)
vis_miss(dataset)
library(visdat)
vis_dat(attenu)

vis_miss(attenu)

Package VIM
library(VIM)
aggr(attenu, numbers = TRUE)

aggr(sleep, numbers = TRUE)

LS0tCnRpdGxlOiAnQ2jGsMahbmcgMzogTWlzc2luZyBEYXRhJwpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazogZGVmYXVsdAotLS0KCiMjIyAxLiBDxqEgY2jhur8ga2h1eeG6v3QgZOG7ryBsaeG7h3UKCkPGoSBjaOG6vyBraHV54bq/dCBk4buvIGxp4buHdQoKLSAgIE1DQVIKCi0gICBNQVIKCi0gICBNTkFSCgoqKlRo4buxYyBow6BuaCB4w6FjIMSR4buLbmggY8ahIGNo4bq/IGfDonkga2h1eeG6v3QqKgoKYGBge3J9CmhlYWQoYXR0ZW51KQpgYGAKCjxicj4KCjxicj4KCioqTOG7h25oIGBhbnlOQShkYXRhc2V0KWAqKgoKYGBge3J9CmFueU5BKGF0dGVudSkKYGBgCgo8YnI+Cgo8YnI+CgoqKkzhu4duaCBgaXMubmEoZGF0YXNldClgKioKCmBgYHtyfQpjb2xTdW1zKGlzLm5hKGF0dGVudSkpCmBgYAoKYGBge3J9CiMgVFJVRSA9IDEKIyBGQUxTRSA9IDAKVFJVRSArIFRSVUUgIyAxICsgMQpUUlVFICsgMTAgIyAxICsgMTAKRkFMU0UgKiAxMCAjIDAgKiAxMApgYGAKCmBgYHtyfQpzdW0ocm93U3Vtcyhpcy5uYShhdHRlbnUpKSA+IDApICMgYmFvIG5oacOqdSBxdWFuIHPDoXQgYuG7iyBraHV54bq/dApgYGAKCmBgYHtyfQojIGNhc2UgPSBvYnMKIyBiYW8gbmhpw6p1IHF1YW4gc8OhdCBraHV54bq/dApzdW0oIWNvbXBsZXRlLmNhc2VzKGF0dGVudSkpCmBgYAoKPGJyPgoKPGJyPgoKKipQYWNrYWdlIGB2aXNkYXRgKioKCi0gICBgdmlzX2RhdChkYXRhc2V0KWAKCi0gICBgdmlzX21pc3MoZGF0YXNldClgCgpgYGB7cn0KbGlicmFyeSh2aXNkYXQpCnZpc19kYXQoYXR0ZW51KQpgYGAKCmBgYHtyfQp2aXNfbWlzcyhhdHRlbnUpCmBgYAoKPGJyPgoKPGJyPgoKKipQYWNrYWdlKiogYFZJTWAKCi0gICBgYWdncmAKCmBgYHtyfQpsaWJyYXJ5KFZJTSkKYWdncihhdHRlbnUsIG51bWJlcnMgPSBUUlVFKQpgYGAKCmBgYHtyfQphZ2dyKHNsZWVwLCBudW1iZXJzID0gVFJVRSkKYGBgCgpgYGB7cn0KaGVscChzbGVlcCkgIyB4ZW0gbcO0IHThuqMgduG7gSBkYXRhCmBgYAo=