Đọc dữ liệu
t = "C:\\Users\\SONY\\Google Drive\\HOC TAP\\CH 22\\Phan tich du lieu\\Workshop GS Tuan\\Phan tich du lieu va ung dung\\Datasets for practice\\Hip fracture data.csv"
dat = read.csv(t, na.strings = "")
Chon doi tuong co v2 < 2 dua vao dataframe "hh:
Chon bien lien quan trong dat1
hip = dat1[,c("hipfx", "gender", "age", "v2", "v3", "bmi", "v4", "v5")]
#imputation
library(mice)
## Loading required package: lattice
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
## VIM is ready to use.
## Since version 4.0.0 the GUI is in its own package VIMGUI.
##
## Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
Xem pattern of missing data
md.pattern(hip)
## hipfx gender age v2 v3 bmi v4 v5
## 2328 1 1 1 1 1 1 1 1 0
## 355 1 1 1 1 1 1 1 0 1
## 29 1 1 1 1 1 1 0 1 1
## 3 1 1 1 1 1 1 0 0 2
## 3 1 1 1 1 1 0 1 1 1
## 4 1 1 1 1 1 0 1 0 2
## 0 0 0 0 0 7 32 362 401
# impute dữ liệu bằng phương pháp MICE
ms.hip= mice(hip, seed=1234, printFlag=F)
Hoàn thiện data
ihip = complete(ms.hip)
Kiểm ta dataset sau imputation
head(ihip)
## hipfx gender age v2 v3 bmi v4 v5
## 1 0 Male 73 0.88 1.079 32 1.458 44
## 2 0 Female 67 0.85 0.966 26 1.325 18
## 3 0 Male 68 0.84 1.013 26 1.494 36
## 4 0 Female 62 0.71 0.839 24 1.214 25
## 5 0 Male 61 0.60 0.811 24 1.144 44
## 6 0 Female 76 0.58 0.743 28 0.980 15