library(readr)
## Warning: package 'readr' was built under R version 3.6.1
ma <- read_csv("~/RPy Analytics/Betty Tigere/ma.csv")
## Parsed with column specification:
## cols(
##   var_2 = col_double(),
##   var_3 = col_double(),
##   var_4 = col_double(),
##   var_5 = col_double()
## )
#View(ma)
attach(ma)
### loading the require packages

library(mice)
## Warning: package 'mice' was built under R version 3.6.1
## Loading required package: lattice
## 
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(VIM)
## Warning: package 'VIM' was built under R version 3.6.1
## Loading required package: colorspace
## Warning: package 'colorspace' was built under R version 3.6.1
## Loading required package: grid
## Loading required package: data.table
## Warning: package 'data.table' was built under R version 3.6.1
## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4
## VIM is ready to use. 
##  Since version 4.0.0 the GUI is in its own package VIMGUI.
## 
##           Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
## 
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
md.pattern(ma)

##    var_2 var_3 var_4 var_5  
## 34     1     1     1     1 0
## 2      1     0     0     0 3
##        0     2     2     2 6
p<-md.pairs(ma)
p
## $rr
##       var_2 var_3 var_4 var_5
## var_2    36    34    34    34
## var_3    34    34    34    34
## var_4    34    34    34    34
## var_5    34    34    34    34
## 
## $rm
##       var_2 var_3 var_4 var_5
## var_2     0     2     2     2
## var_3     0     0     0     0
## var_4     0     0     0     0
## var_5     0     0     0     0
## 
## $mr
##       var_2 var_3 var_4 var_5
## var_2     0     0     0     0
## var_3     2     0     0     0
## var_4     2     0     0     0
## var_5     2     0     0     0
## 
## $mm
##       var_2 var_3 var_4 var_5
## var_2     0     0     0     0
## var_3     0     2     2     2
## var_4     0     2     2     2
## var_5     0     2     2     2
summary(p)
##    Length Class  Mode   
## rr 16     -none- numeric
## rm 16     -none- numeric
## mr 16     -none- numeric
## mm 16     -none- numeric
boxplot(ma[,1:3])

###plot
pbox(var_3,int=FALSE,cex=0.7)

imp<-mice(ma)
## 
##  iter imp variable
##   1   1  var_3  var_4  var_5
##   1   2  var_3  var_4  var_5
##   1   3  var_3  var_4  var_5
##   1   4  var_3  var_4  var_5
##   1   5  var_3  var_4  var_5
##   2   1  var_3  var_4  var_5
##   2   2  var_3  var_4  var_5
##   2   3  var_3  var_4  var_5
##   2   4  var_3  var_4  var_5
##   2   5  var_3  var_4  var_5
##   3   1  var_3  var_4  var_5
##   3   2  var_3  var_4  var_5
##   3   3  var_3  var_4  var_5
##   3   4  var_3  var_4  var_5
##   3   5  var_3  var_4  var_5
##   4   1  var_3  var_4  var_5
##   4   2  var_3  var_4  var_5
##   4   3  var_3  var_4  var_5
##   4   4  var_3  var_4  var_5
##   4   5  var_3  var_4  var_5
##   5   1  var_3  var_4  var_5
##   5   2  var_3  var_4  var_5
##   5   3  var_3  var_4  var_5
##   5   4  var_3  var_4  var_5
##   5   5  var_3  var_4  var_5
imp$imp$var_3
##    1 2 3 4 5
## 3  0 0 1 0 0
## 35 0 0 1 0 0
head(complete(imp))
##   var_2 var_3 var_4 var_5
## 1     8     1     0    95
## 2    16     0     0    96
## 3     9     0     0    96
## 4     7     1     0    96
## 5     1     0     2    96
## 6     4     3     9    96
head(complete(imp,2))
##   var_2 var_3 var_4 var_5
## 1     8     1     0    95
## 2    16     0     0    96
## 3     9     0     0    99
## 4     7     1     0    96
## 5     1     0     2    96
## 6     4     3     9    96
summary(lm(var_2~var_3+var_4+var_5,data = ma))
## 
## Call:
## lm(formula = var_2 ~ var_3 + var_4 + var_5, data = ma)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.3532 -3.5811 -0.2992  4.5455  7.5348 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  19.3813    52.2977   0.371   0.7135  
## var_3         1.4608     0.8083   1.807   0.0808 .
## var_4        -0.7540     0.2802  -2.691   0.0115 *
## var_5        -0.1013     0.5383  -0.188   0.8520  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.097 on 30 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1951, Adjusted R-squared:  0.1146 
## F-statistic: 2.424 on 3 and 30 DF,  p-value: 0.08515
confint(lm(var_2~var_3+var_4+var_5,data = ma))
##                   2.5 %      97.5 %
## (Intercept) -87.4249652 126.1874897
## var_3        -0.1899145   3.1115568
## var_4        -1.3261946  -0.1817197
## var_5        -1.2005469   0.9979596
## multiple imputation
fit<-with(imp,lm(var_2~var_3+var_4+var_5,data = ma))
pool(fit)
## Class: mipo    m = 5 
##               estimate         ubar b            t dfcom     df riv lambda
## (Intercept) 19.3812622 2.735053e+03 0 2.735053e+03    30 28.179   0      0
## var_3        1.4608212 6.533226e-01 0 6.533226e-01    30 28.179   0      0
## var_4       -0.7539571 7.851003e-02 0 7.851003e-02    30 28.179   0      0
## var_5       -0.1012936 2.897131e-01 0 2.897131e-01    30 28.179   0      0
##                    fmi
## (Intercept) 0.06414574
## var_3       0.06414574
## var_4       0.06414574
## var_5       0.06414574
summary(pool(fit))
##               estimate  std.error  statistic     df    p.value
## (Intercept) 19.3812622 52.2977368  0.3705947 28.179 0.71371079
## var_3        1.4608212  0.8082838  1.8073123 28.179 0.08140177
## var_4       -0.7539571  0.2801964 -2.6908165 28.179 0.01185125
## var_5       -0.1012936  0.5382501 -0.1881907 28.179 0.85207588