1. Cơ chế khuyết dữ liệu

Cơ chế khuyết dữ liệu

Thực hành xác định cơ chế gây khuyết

head(attenu)



Lệnh anyNA(dataset)

anyNA(attenu)
[1] TRUE



Lệnh is.na(dataset)

colSums(is.na(attenu))
  event     mag station    dist   accel 
      0       0      16       0       0 
# TRUE = 1
# FALSE = 0
TRUE + TRUE # 1 + 1
[1] 2
TRUE + 10 # 1 + 10
[1] 11
FALSE * 10 # 0 * 10
[1] 0
sum(rowSums(is.na(attenu)) > 0) # bao nhiêu quan sát bị khuyết
[1] 16
# case = obs
# bao nhiêu quan sát khuyết
sum(!complete.cases(attenu))
[1] 16



Package visdat

library(visdat)
vis_dat(attenu)

vis_miss(attenu)



Package VIM

library(VIM)
aggr(attenu, numbers = TRUE)

aggr(sleep, numbers = TRUE)

LS0tCnRpdGxlOiAnQ2jGsMahbmcgMzogTWlzc2luZyBEYXRhJwpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazogZGVmYXVsdAotLS0KCiMjIyAxLiBDxqEgY2jhur8ga2h1eeG6v3QgZOG7ryBsaeG7h3UKCkPGoSBjaOG6vyBraHV54bq/dCBk4buvIGxp4buHdQoKLSAgIE1DQVIKCi0gICBNQVIKCi0gICBNTkFSCgoqKlRo4buxYyBow6BuaCB4w6FjIMSR4buLbmggY8ahIGNo4bq/IGfDonkga2h1eeG6v3QqKgoKYGBge3J9CmhlYWQoYXR0ZW51KQpgYGAKCjxicj4KCjxicj4KCioqTOG7h25oIGBhbnlOQShkYXRhc2V0KWAqKgoKYGBge3J9CmFueU5BKGF0dGVudSkKYGBgCgo8YnI+Cgo8YnI+CgoqKkzhu4duaCBgaXMubmEoZGF0YXNldClgKioKCmBgYHtyfQpjb2xTdW1zKGlzLm5hKGF0dGVudSkpCmBgYAoKYGBge3J9CiMgVFJVRSA9IDEKIyBGQUxTRSA9IDAKVFJVRSArIFRSVUUgIyAxICsgMQpUUlVFICsgMTAgIyAxICsgMTAKRkFMU0UgKiAxMCAjIDAgKiAxMApgYGAKCmBgYHtyfQpzdW0ocm93U3Vtcyhpcy5uYShhdHRlbnUpKSA+IDApICMgYmFvIG5oacOqdSBxdWFuIHPDoXQgYuG7iyBraHV54bq/dApgYGAKCmBgYHtyfQojIGNhc2UgPSBvYnMKIyBiYW8gbmhpw6p1IHF1YW4gc8OhdCBraHV54bq/dApzdW0oIWNvbXBsZXRlLmNhc2VzKGF0dGVudSkpCmBgYAoKPGJyPgoKPGJyPgoKKipQYWNrYWdlIGB2aXNkYXRgKioKCi0gICBgdmlzX2RhdChkYXRhc2V0KWAKCi0gICBgdmlzX21pc3MoZGF0YXNldClgCgpgYGB7cn0KbGlicmFyeSh2aXNkYXQpCnZpc19kYXQoYXR0ZW51KQpgYGAKCmBgYHtyfQp2aXNfbWlzcyhhdHRlbnUpCmBgYAoKPGJyPgoKPGJyPgoKKipQYWNrYWdlKiogYFZJTWAKCi0gICBgYWdncmAKCmBgYHtyfQpsaWJyYXJ5KFZJTSkKYWdncihhdHRlbnUsIG51bWJlcnMgPSBUUlVFKQpgYGAKCmBgYHtyfQphZ2dyKHNsZWVwLCBudW1iZXJzID0gVFJVRSkKYGBgCgpgYGB7cn0KaGVscChzbGVlcCkgIyB4ZW0gbcO0IHThuqMgduG7gSBkYXRhCmBgYAo=