library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(readxl)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
sakernas_diy <- read_excel("sakernas_new.xlsx")
sus1 <- read_excel("susenas 2020.xlsx", sheet = "RT")
sus2 <- read_excel("susenas 2020.xlsx", sheet = "Indo")
susenas_diy = merge(x = sus1, y = sus2, all.y = TRUE)
sakernas_diy['KODE_KAB']=str_pad(sakernas_diy$KODE_KAB, width = 2, side = 'left', pad = '0')
sakernas_diy['NO_DSRT']=str_pad(sakernas_diy$NO_DSRT, width = 2, side = 'left', pad = '0')

susenas_diy['R102']=str_pad(susenas_diy$R102, width = 2, side = 'left', pad = '0')

sakernas_diy["psu"]=paste(sakernas_diy$KODE_PROV, sakernas_diy$KODE_KAB, sakernas_diy$nks_ok, sep = "")
sakernas_diy["ssu"]=paste(sakernas_diy$KODE_PROV, sakernas_diy$KODE_KAB, sakernas_diy$nks_ok, sakernas_diy$NO_DSRT, sep = "")
sakernas_diy["strata"]=paste(sakernas_diy$KODE_PROV, sakernas_diy$KODE_KAB, sakernas_diy$KLASIFIKAS, sep = "")
sakernas_diy$B4_K3=as.factor(sakernas_diy$B4_K3)
sakernas_diy$B4_K6=as.factor(sakernas_diy$B4_K6)
sakernas_diy$B4_K9=as.factor(sakernas_diy$B4_K9)
sakernas_diy$B4_K10=as.factor(sakernas_diy$B4_K10)
sakernas_diy$B5_R1A=as.factor(sakernas_diy$B5_R1A)
sakernas_diy$B5_R4A=as.factor(sakernas_diy$B5_R4A)
sakernas_diy$B5_R4B=as.factor(sakernas_diy$B5_R4B)
sakernas_diy$B5_R4C=as.factor(sakernas_diy$B5_R4C)
sakernas_diy$B5_R4D=as.factor(sakernas_diy$B5_R4D)
sakernas_diy$B5_R4E=as.factor(sakernas_diy$B5_R4E)
sakernas_diy$B5_R5A1=as.factor(sakernas_diy$B5_R5A1)
sakernas_diy$B5_R5A2=as.factor(sakernas_diy$B5_R5A2)
sakernas_diy$B5_R5A3=as.factor(sakernas_diy$B5_R5A3)
sakernas_diy$B5_R5A4=as.factor(sakernas_diy$B5_R5A4)
sakernas_diy$B5_R5B=as.factor(sakernas_diy$B5_R5B)
sakernas_diy$B5_R20_KAT=as.factor(sakernas_diy$B5_R20_KAT)
sakernas_diy$B5_R24A=as.factor(sakernas_diy$B5_R24A)
sakernas_diy$B5_R12A=as.factor(sakernas_diy$B5_R12A)
sakernas_diy$B5_R12B=as.factor(sakernas_diy$B5_R12B)
sakernas_diy$B5_R17A=as.factor(sakernas_diy$B5_R17A)
sakernas_diy$KLASIFIKAS=as.factor(sakernas_diy$KLASIFIKAS)
sakernas_diy$KODE_KAB=as.factor(sakernas_diy$KODE_KAB)
susenas_diy$B2_R1=susenas_diy$R301
susenas_diy$B2_R2=susenas_diy$R303
susenas_diy$B4_K3=as.factor(susenas_diy$R403)
susenas_diy$B4_K6=as.factor(susenas_diy$R405)
susenas_diy$B4_K9=as.factor(susenas_diy$R612)
susenas_diy$B4_K8=susenas_diy$R407
susenas_diy$B4_K10=as.factor(susenas_diy$R404)
susenas_diy$B5_R1A=as.factor(susenas_diy$R615)
susenas_diy$B5_R4A=as.factor(susenas_diy$R1002)
susenas_diy$B5_R4B=as.factor(susenas_diy$R1003)
susenas_diy$B5_R4C=as.factor(susenas_diy$R1004)
susenas_diy$B5_R4D=as.factor(susenas_diy$R1005)
susenas_diy$B5_R4E=as.factor(susenas_diy$R1008)
susenas_diy$B5_R5A1=as.factor(susenas_diy$R702_A)
susenas_diy$B5_R5A2=as.factor(susenas_diy$R702_B)
susenas_diy$B5_R5A3=as.factor(susenas_diy$R702_C)
susenas_diy$B5_R5A4=as.factor(susenas_diy$R702_D)
susenas_diy$B5_R5B=as.factor(susenas_diy$R703)
susenas_diy$B5_R24A=as.factor(susenas_diy$R706)
susenas_diy$B5_R20_KAT=as.factor(susenas_diy$R705)
susenas_diy$KLASIFIKAS=as.factor(susenas_diy$R105)
susenas_diy$KODE_KAB=as.factor(susenas_diy$R102)
susenas_diy$STATUS.1=as.factor(susenas_diy$STATUS.1)
susenas_diy$STATUS=as.factor(susenas_diy$STATUS)
susenas_diy.15 <- subset(susenas_diy, R407>14)
df4=select(susenas_diy.15, B2_R1, B2_R2, B4_K3, B4_K6, B4_K8, B4_K9, B4_K10, B5_R4A, B5_R4B, B5_R4C, B5_R4D, B5_R4E, B5_R5A1, B5_R5A2, B5_R5A3, B5_R5A4, B5_R5B, B5_R1A, B5_R20_KAT, B5_R24A, KAPITA, RENUM, FWT, PSU, SSU, STRATA, KLASIFIKAS, KODE_KAB)
Train <- createDataPartition(sakernas_diy$B5_R12A, p=0.8, list=FALSE)
training <- sakernas_diy[Train, ]
testing <- sakernas_diy[-Train, ]

options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = training)
fit.logit1 <- svyglm(B5_R12A~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9
                     +B4_K10+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E
                     +B5_R5A1+B5_R5A2+B5_R5A3+B5_R5A4+B5_R5B+B5_R1A
                     +B5_R24A+KLASIFIKAS+KODE_KAB,design=des,family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit.logit1)
## 
## Call:
## svyglm(formula = B5_R12A ~ B2_R1 + B2_R2 + B4_K3 + B4_K6 + B4_K8 + 
##     B4_K9 + B4_K10 + B5_R4A + B5_R4B + B5_R4C + B5_R4D + B5_R4E + 
##     B5_R5A1 + B5_R5A2 + B5_R5A3 + B5_R5A4 + B5_R5B + B5_R1A + 
##     B5_R24A + KLASIFIKAS + KODE_KAB, design = des, family = binomial)
## 
## Survey design:
## svydesign(ids = ~psu + ssu, strata = ~strata, weights = ~w.adj2_20, 
##     data = training)
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.76121    2.04593  -0.861  0.39000    
## B2_R1       -0.05890    0.27005  -0.218  0.82750    
## B2_R2        0.14572    0.30614   0.476  0.63443    
## B4_K32       0.65931    0.47711   1.382  0.16802    
## B4_K33      -0.12009    0.43204  -0.278  0.78123    
## B4_K34      -1.74989    0.73182  -2.391  0.01740 *  
## B4_K35      -0.25741    0.64932  -0.396  0.69207    
## B4_K36       0.81046    0.94765   0.855  0.39309    
## B4_K37      14.16036    0.76062  18.617  < 2e-16 ***
## B4_K38      14.96509    0.69794  21.442  < 2e-16 ***
## B4_K39       0.54512    0.65780   0.829  0.40792    
## B4_K62       0.58080    0.26079   2.227  0.02667 *  
## B4_K8        0.05421    0.01208   4.489 1.01e-05 ***
## B4_K92       4.30988    1.84539   2.335  0.02016 *  
## B4_K93       1.94041    1.53346   1.265  0.20669    
## B4_K102      0.96788    0.45492   2.128  0.03417 *  
## B4_K103      0.72343    0.81413   0.889  0.37492    
## B4_K104      1.59225    1.30262   1.222  0.22252    
## B5_R4A2     -1.32342    0.55473  -2.386  0.01765 *  
## B5_R4A3     14.67736    0.81712  17.962  < 2e-16 ***
## B5_R4B5     15.00443    0.73482  20.419  < 2e-16 ***
## B5_R4B6     12.20458    0.67042  18.204  < 2e-16 ***
## B5_R4C2     -0.95776    1.45131  -0.660  0.50979    
## B5_R4C3     15.05919    0.84692  17.781  < 2e-16 ***
## B5_R4D5     16.62732    1.47422  11.279  < 2e-16 ***
## B5_R4D6     14.48218    1.79206   8.081 1.49e-14 ***
## B5_R4E2     17.00123    0.64506  26.356  < 2e-16 ***
## B5_R4E3     15.88789    0.93537  16.986  < 2e-16 ***
## B5_R5A12    -0.82855    0.64399  -1.287  0.19921    
## B5_R5A24     2.37054    1.18767   1.996  0.04682 *  
## B5_R5A32     0.46766    0.28832   1.622  0.10583    
## B5_R5A44    -0.08069    0.26748  -0.302  0.76311    
## B5_R5B2      3.28924    1.12332   2.928  0.00367 ** 
## B5_R5B3     -0.48805    0.63896  -0.764  0.44556    
## B5_R5B4     -2.07943    0.49652  -4.188 3.68e-05 ***
## B5_R1A10    -1.69525    1.05747  -1.603  0.10994    
## B5_R1A11    -1.30100    1.05372  -1.235  0.21789    
## B5_R1A12    -2.08549    1.48940  -1.400  0.16246    
## B5_R1A13    -1.35112    1.22700  -1.101  0.27169    
## B5_R1A14    -2.37952    1.09564  -2.172  0.03064 *  
## B5_R1A15    -1.89294    1.57423  -1.202  0.23011    
## B5_R1A16    12.71225    1.40673   9.037  < 2e-16 ***
## B5_R1A2     13.47854    1.11525  12.086  < 2e-16 ***
## B5_R1A3     -3.72346    2.30212  -1.617  0.10682    
## B5_R1A4     -1.14354    1.07573  -1.063  0.28860    
## B5_R1A5     14.73832    1.06382  13.854  < 2e-16 ***
## B5_R1A6     10.98725    1.41788   7.749 1.36e-13 ***
## B5_R1A7     -0.92652    1.09526  -0.846  0.39825    
## B5_R1A8     -1.96409    1.48646  -1.321  0.18738    
## B5_R1A9     15.65890    1.36727  11.453  < 2e-16 ***
## B5_R24A1     0.27051    0.59221   0.457  0.64815    
## B5_R24A2    -0.18115    0.62784  -0.289  0.77314    
## B5_R24A3     2.10743    1.13834   1.851  0.06508 .  
## B5_R24A4     0.60381    0.56688   1.065  0.28765    
## B5_R24A5    -1.05064    0.62501  -1.681  0.09378 .  
## B5_R24A6    15.57728    0.50596  30.788  < 2e-16 ***
## KLASIFIKAS2  0.11932    0.44557   0.268  0.78904    
## KODE_KAB02   0.08689    0.44603   0.195  0.84568    
## KODE_KAB03   0.02141    0.35576   0.060  0.95204    
## KODE_KAB04   0.65047    0.52249   1.245  0.21410    
## KODE_KAB71   0.43449    0.52772   0.823  0.41096    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.504714)
## 
## Number of Fisher Scoring iterations: 19
predict = predict(fit.logit1, newdata = testing, type = "response")

predict <- factor(ifelse(predict > 0.5, "2","1"))
log_conf <- confusionMatrix(predict, testing$B5_R12A, positive = "2")
log_conf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    1    2
##          1    2    0
##          2   24 2031
##                                           
##                Accuracy : 0.9883          
##                  95% CI : (0.9827, 0.9925)
##     No Information Rate : 0.9874          
##     P-Value [Acc > NIR] : 0.395           
##                                           
##                   Kappa : 0.1413          
##                                           
##  Mcnemar's Test P-Value : 2.668e-06       
##                                           
##             Sensitivity : 1.00000         
##             Specificity : 0.07692         
##          Pos Pred Value : 0.98832         
##          Neg Pred Value : 1.00000         
##              Prevalence : 0.98736         
##          Detection Rate : 0.98736         
##    Detection Prevalence : 0.99903         
##       Balanced Accuracy : 0.53846         
##                                           
##        'Positive' Class : 2               
## 
B5_R12A.p = predict(fit.logit1, newdata = sakernas_diy, type = "response")
sakernas_diy['B5_R12A.p'] <- factor(ifelse(B5_R12A.p > 0.5, "2","1"))

B5_R12A = predict(fit.logit1, newdata = susenas_diy.15, type = "response")
susenas_diy.15['B5_R12A'] <- factor(ifelse(B5_R12A > 0.5, "2","1"))
summary(susenas_diy.15$B5_R12A)
##    1    2 
##   25 9954
fit.logit2<-svyglm(B5_R12B~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9
                   +B4_K10+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E
                   +B5_R5A1+B5_R5A2+B5_R5A3+B5_R5A4+B5_R5B+B5_R24A
                   +B5_R1A+KLASIFIKAS+KODE_KAB,design=des,family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit.logit2)
## 
## Call:
## svyglm(formula = B5_R12B ~ B2_R1 + B2_R2 + B4_K3 + B4_K6 + B4_K8 + 
##     B4_K9 + B4_K10 + B5_R4A + B5_R4B + B5_R4C + B5_R4D + B5_R4E + 
##     B5_R5A1 + B5_R5A2 + B5_R5A3 + B5_R5A4 + B5_R5B + B5_R24A + 
##     B5_R1A + KLASIFIKAS + KODE_KAB, design = des, family = binomial)
## 
## Survey design:
## svydesign(ids = ~psu + ssu, strata = ~strata, weights = ~w.adj2_20, 
##     data = training)
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  23.61706    2.75991   8.557 5.62e-16 ***
## B2_R1         0.10780    0.49978   0.216  0.82937    
## B2_R2        -0.71107    0.51430  -1.383  0.16779    
## B4_K32        4.03350    1.53734   2.624  0.00913 ** 
## B4_K33        5.06479    2.28008   2.221  0.02706 *  
## B4_K34       22.44017    2.16915  10.345  < 2e-16 ***
## B4_K35       23.33007    1.97503  11.813  < 2e-16 ***
## B4_K36       21.96174    1.28154  17.137  < 2e-16 ***
## B4_K37       20.41685    1.98507  10.285  < 2e-16 ***
## B4_K38       20.39804    1.93493  10.542  < 2e-16 ***
## B4_K39       20.56898    1.21851  16.880  < 2e-16 ***
## B4_K62       -2.02684    1.70118  -1.191  0.23440    
## B4_K8         0.06036    0.02362   2.555  0.01110 *  
## B4_K92        6.28137    2.94011   2.136  0.03343 *  
## B4_K93      -13.48945    1.92522  -7.007 1.55e-11 ***
## B4_K102      -0.74546    2.00813  -0.371  0.71073    
## B4_K103      18.51055    2.10125   8.809  < 2e-16 ***
## B4_K104      18.14459    1.08016  16.798  < 2e-16 ***
## B5_R4A2      -2.39607    0.84988  -2.819  0.00513 ** 
## B5_R4A3      -8.20647    1.40510  -5.840 1.33e-08 ***
## B5_R4B5      38.07952    6.24757   6.095 3.28e-09 ***
## B5_R4B6      -5.96078    2.07738  -2.869  0.00440 ** 
## B5_R4C2      41.67409    3.27290  12.733  < 2e-16 ***
## B5_R4C3      17.42447    1.32528  13.148  < 2e-16 ***
## B5_R4D5      -3.46937    1.47302  -2.355  0.01914 *  
## B5_R4D6      -2.57335    1.94993  -1.320  0.18791    
## B5_R4E2      29.13163    3.34037   8.721  < 2e-16 ***
## B5_R4E3      11.10221    1.50963   7.354 1.76e-12 ***
## B5_R5A12     -1.10735    1.25034  -0.886  0.37650    
## B5_R5A24      0.92988    0.72697   1.279  0.20183    
## B5_R5A32     -0.99072    1.19554  -0.829  0.40793    
## B5_R5A44      0.99454    0.75019   1.326  0.18592    
## B5_R5B2      19.96179    3.48141   5.734 2.35e-08 ***
## B5_R5B3      -2.76859    1.43201  -1.933  0.05411 .  
## B5_R5B4      -0.42064    2.06530  -0.204  0.83875    
## B5_R24A1      0.57893    1.50643   0.384  0.70102    
## B5_R24A2     -0.71083    1.01205  -0.702  0.48299    
## B5_R24A3     -0.25372    1.62395  -0.156  0.87595    
## B5_R24A4     -0.27250    0.91398  -0.298  0.76579    
## B5_R24A5     -1.74793    1.35461  -1.290  0.19790    
## B5_R24A6     17.40375    1.18794  14.650  < 2e-16 ***
## B5_R1A10     -1.71199    2.05027  -0.835  0.40436    
## B5_R1A11     -1.87760    2.16467  -0.867  0.38641    
## B5_R1A12     -3.93028    2.40404  -1.635  0.10310    
## B5_R1A13     -1.64399    2.44297  -0.673  0.50149    
## B5_R1A14     -1.60264    2.00507  -0.799  0.42474    
## B5_R1A15     -1.97938    2.65901  -0.744  0.45720    
## B5_R1A16     14.82713    2.79672   5.302 2.20e-07 ***
## B5_R1A2      15.27319    1.99970   7.638 2.83e-13 ***
## B5_R1A3     -14.89147    3.53679  -4.210 3.35e-05 ***
## B5_R1A4      -1.11869    1.97790  -0.566  0.57208    
## B5_R1A5      17.12726    1.87026   9.158  < 2e-16 ***
## B5_R1A6     -31.81063    3.76941  -8.439 1.28e-15 ***
## B5_R1A7      -2.19247    1.96899  -1.113  0.26636    
## B5_R1A8      15.91458    2.05770   7.734 1.51e-13 ***
## B5_R1A9      14.42132    2.51000   5.746 2.21e-08 ***
## KLASIFIKAS2  -1.26564    1.02595  -1.234  0.21828    
## KODE_KAB02   -1.89847    1.36099  -1.395  0.16405    
## KODE_KAB03   -0.20707    1.26224  -0.164  0.86980    
## KODE_KAB04   -2.04780    1.35651  -1.510  0.13217    
## KODE_KAB71   -1.77730    1.51499  -1.173  0.24165    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.4575021)
## 
## Number of Fisher Scoring iterations: 23
predict = predict(fit.logit2, newdata = testing, type = "response")
summary(predict)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.6591  0.9995  1.0000  0.9981  1.0000  1.0000
predict <- factor(ifelse(predict > 0.5, "2","1"))
log_conf <- confusionMatrix(predict, testing$B5_R12B, positive = "2")
## Warning in confusionMatrix.default(predict, testing$B5_R12B, positive = "2"):
## Levels are not in the same order for reference and data. Refactoring data to
## match.
log_conf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    1    2
##          1    0    0
##          2    5 2052
##                                           
##                Accuracy : 0.9976          
##                  95% CI : (0.9943, 0.9992)
##     No Information Rate : 0.9976          
##     P-Value [Acc > NIR] : 0.61596         
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : 0.07364         
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.9976          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.9976          
##          Detection Rate : 0.9976          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class : 2               
## 
B5_R12B.p = predict(fit.logit1, newdata = sakernas_diy, type = "response")
sakernas_diy['B5_R12A.p'] <- factor(ifelse(B5_R12B.p > 0.5, "2","1"))

B5_R12B = predict(fit.logit2, newdata = susenas_diy.15, type = "response")
susenas_diy.15['B5_R12B'] <- factor(ifelse(B5_R12B > 0.5, "2","1"))
summary(susenas_diy.15$B5_R12B)
##    1    2 
##    7 9972
table(sakernas_diy$B5_R17A)
## 
##     0     1     2     3     4 
##   157     2     1     1 10129
fit.logit3<-svyglm(B5_R17A~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9+B4_K10
                   +B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1+B5_R5A2
                   +B5_R5A3+B5_R5A4+B5_R5B+B5_R24A+B5_R12A+B5_R12B+B5_R1A
                   +KLASIFIKAS+KODE_KAB,design=des,family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit.logit3)
## 
## Call:
## svyglm(formula = B5_R17A ~ B2_R1 + B2_R2 + B4_K3 + B4_K6 + B4_K8 + 
##     B4_K9 + B4_K10 + B5_R4A + B5_R4B + B5_R4C + B5_R4D + B5_R4E + 
##     B5_R5A1 + B5_R5A2 + B5_R5A3 + B5_R5A4 + B5_R5B + B5_R24A + 
##     B5_R12A + B5_R12B + B5_R1A + KLASIFIKAS + KODE_KAB, design = des, 
##     family = binomial)
## 
## Survey design:
## svydesign(ids = ~psu + ssu, strata = ~strata, weights = ~w.adj2_20, 
##     data = training)
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -79.176942   0.402925 -196.505  < 2e-16 ***
## B2_R1         0.012391   0.043415    0.285  0.77552    
## B2_R2        -0.019418   0.045906   -0.423  0.67260    
## B4_K32       -0.011002   0.041078   -0.268  0.78902    
## B4_K33        0.016504   0.060976    0.271  0.78683    
## B4_K34        0.007138   0.252903    0.028  0.97750    
## B4_K35        0.024476   0.075603    0.324  0.74636    
## B4_K36        0.026845   0.131872    0.204  0.83883    
## B4_K37        0.024141   0.086100    0.280  0.77937    
## B4_K38       -0.058360   0.292936   -0.199  0.84222    
## B4_K39        0.017902   0.096201    0.186  0.85250    
## B4_K62       -0.010467   0.036226   -0.289  0.77284    
## B4_K8        -0.002283   0.001847   -1.236  0.21744    
## B4_K92       -0.029644   0.190569   -0.156  0.87648    
## B4_K93        0.001495   0.102592    0.015  0.98839    
## B4_K102       0.034444   0.061631    0.559  0.57666    
## B4_K103       0.032246   0.095068    0.339  0.73470    
## B4_K104       0.034742   0.081626    0.426  0.67068    
## B5_R4A2      -0.001104   0.089630   -0.012  0.99018    
## B5_R4A3      -0.081437   0.220782   -0.369  0.71249    
## B5_R4B5       0.019294   0.086676    0.223  0.82400    
## B5_R4B6      -0.009308   0.198634   -0.047  0.96266    
## B5_R4C2       0.002436   0.094273    0.026  0.97940    
## B5_R4C3       0.009498   0.204399    0.046  0.96297    
## B5_R4D5       0.007949   0.150365    0.053  0.95787    
## B5_R4D6       0.047176   0.289354    0.163  0.87060    
## B5_R4E2       0.002082   0.128428    0.016  0.98708    
## B5_R4E3      -0.016762   0.219383   -0.076  0.93915    
## B5_R5A12     -0.029344   0.090688   -0.324  0.74648    
## B5_R5A24     -0.011451   0.204174   -0.056  0.95531    
## B5_R5A32     -0.003427   0.067260   -0.051  0.95939    
## B5_R5A44      0.001954   0.068395    0.029  0.97723    
## B5_R5B2      -0.030690   0.139819   -0.219  0.82641    
## B5_R5B3       0.003948   0.070736    0.056  0.95553    
## B5_R5B4       0.014035   0.095637    0.147  0.88342    
## B5_R24A1     -0.017075   0.083715   -0.204  0.83851    
## B5_R24A2     -0.029000   0.090671   -0.320  0.74931    
## B5_R24A3     -0.017835   0.108046   -0.165  0.86900    
## B5_R24A4     -0.019715   0.088801   -0.222  0.82446    
## B5_R24A5     -0.027876   0.103245   -0.270  0.78734    
## B5_R24A6     -0.045286   0.098530   -0.460  0.64612    
## B5_R12A2     53.144795   0.133283  398.736  < 2e-16 ***
## B5_R12B2     52.654263   0.237497  221.705  < 2e-16 ***
## B5_R1A10     -0.022644   0.082394   -0.275  0.78364    
## B5_R1A11     -0.025564   0.080152   -0.319  0.74999    
## B5_R1A12      0.047569   0.161913    0.294  0.76912    
## B5_R1A13     -0.021132   0.110411   -0.191  0.84835    
## B5_R1A14     -0.009574   0.096300   -0.099  0.92087    
## B5_R1A15     -0.022355   0.170377   -0.131  0.89570    
## B5_R1A16      0.088246   0.598700    0.147  0.88292    
## B5_R1A2      -0.037493   0.224274   -0.167  0.86734    
## B5_R1A3      -0.033683   0.701294   -0.048  0.96172    
## B5_R1A4      -0.020791   0.068618   -0.303  0.76210    
## B5_R1A5       0.016756   0.268412    0.062  0.95026    
## B5_R1A6      -0.067706   0.393475   -0.172  0.86350    
## B5_R1A7      -0.018847   0.070287   -0.268  0.78877    
## B5_R1A8       0.021782   0.195826    0.111  0.91151    
## B5_R1A9       0.095496   0.469830    0.203  0.83907    
## KLASIFIKAS2  -0.006526   0.071053   -0.092  0.92689    
## KODE_KAB02    0.173111   0.086869    1.993  0.04718 *  
## KODE_KAB03    0.073978   0.063519    1.165  0.24507    
## KODE_KAB04    0.248402   0.092721    2.679  0.00778 ** 
## KODE_KAB71    0.050412   0.108686    0.464  0.64310    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 2.001676e-13)
## 
## Number of Fisher Scoring iterations: 25
predict = predict(fit.logit3, newdata = testing, type = "response")
summary(predict)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  1.0000  1.0000  0.9849  1.0000  1.0000
predict <- cut(predict, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1), labels = c(0, 1, 2, 3, 4), right = TRUE)
log_conf <- confusionMatrix(predict, testing$B5_R17A)
log_conf 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1    2    3    4
##          0   31    0    0    0    0
##          1    0    0    0    0    0
##          2    0    0    0    0    0
##          3    0    0    0    0    0
##          4    0    1    0    0 2025
## 
## Overall Statistics
##                                      
##                Accuracy : 0.9995     
##                  95% CI : (0.9973, 1)
##     No Information Rate : 0.9844     
##     P-Value [Acc > NIR] : 3.3e-13    
##                                      
##                   Kappa : 0.9839     
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: 0  Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity           1.00000 0.0000000       NA       NA   1.0000
## Specificity           1.00000 1.0000000        1        1   0.9688
## Pos Pred Value        1.00000       NaN       NA       NA   0.9995
## Neg Pred Value        1.00000 0.9995139       NA       NA   1.0000
## Prevalence            0.01507 0.0004861        0        0   0.9844
## Detection Rate        0.01507 0.0000000        0        0   0.9844
## Detection Prevalence  0.01507 0.0000000        0        0   0.9849
## Balanced Accuracy     1.00000 0.5000000       NA       NA   0.9844
B5_R17A.p = predict(fit.logit3, newdata = sakernas_diy, type = "response")
sakernas_diy['B5_R17A.p'] <- cut(B5_R17A.p, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1), labels = c(0, 1, 2, 3, 4), right = TRUE)

B5_R17A = predict(fit.logit3, newdata = susenas_diy.15, type = "response")
susenas_diy.15['B5_R17A'] <- cut(B5_R17A, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1), labels = c(0, 1, 2, 3, 4), right = TRUE)
summary(susenas_diy.15$B5_R17A)
##    0    1    2    3    4 
##   32    0    0    0 9947
susenas_diy.kota = filter(susenas_diy.15, KLASIFIKAS == "1")
sakernas_diy.kota = filter(sakernas_diy, KLASIFIKAS == "1")

set.seed(123)
Train <- createDataPartition(susenas_diy.kota$STATUS, p=0.8, list=FALSE)
## Warning in createDataPartition(susenas_diy.kota$STATUS, p = 0.8, list = FALSE):
## Some classes have no records ( 3, 4 ) and these will be ignored
training <- susenas_diy.kota[Train, ]
testing <- susenas_diy.kota[-Train, ]

options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = training)
fit.logit4<-svyglm(STATUS~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9
                   +B4_K10+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1
                   +B5_R5A2+B5_R5A3+B5_R5A4+B5_R5B+B5_R20_KAT
                   +KODE_KAB,design=des, family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.logit4)
## 
## Call:
## svyglm(formula = STATUS ~ B2_R1 + B2_R2 + B4_K3 + B4_K6 + B4_K8 + 
##     B4_K9 + B4_K10 + B5_R4A + B5_R4B + B5_R4C + B5_R4D + B5_R4E + 
##     B5_R5A1 + B5_R5A2 + B5_R5A3 + B5_R5A4 + B5_R5B + B5_R20_KAT + 
##     KODE_KAB, design = des, family = binomial)
## 
## Survey design:
## svydesign(ids = ~PSU + SSU, strata = ~STRATA, weights = ~FWT, 
##     data = training)
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.899265   1.064905   1.784 0.075864 .  
## B2_R1        -0.227538   0.166504  -1.367 0.173139    
## B2_R2        -0.018694   0.199968  -0.093 0.925602    
## B4_K32        0.007702   0.165446   0.047 0.962914    
## B4_K33       -0.690934   0.205141  -3.368 0.000892 ***
## B4_K34       -0.941591   0.911221  -1.033 0.302569    
## B4_K35       -0.345899   0.284524  -1.216 0.225381    
## B4_K36       -1.531898   0.531651  -2.881 0.004346 ** 
## B4_K37        0.038628   0.377799   0.102 0.918655    
## B4_K38       12.122338   0.646235  18.758  < 2e-16 ***
## B4_K39       -0.341152   0.369031  -0.924 0.356250    
## B4_K62       -0.107534   0.160530  -0.670 0.503634    
## B4_K8        -0.013776   0.007210  -1.911 0.057329 .  
## B4_K92        0.692920   0.448495   1.545 0.123766    
## B4_K93        0.983477   0.295763   3.325 0.001033 ** 
## B4_K102      -0.097596   0.218568  -0.447 0.655651    
## B4_K103       0.061445   0.362510   0.169 0.865559    
## B4_K104       0.500576   0.372984   1.342 0.180934    
## B5_R4A2       0.126092   0.281948   0.447 0.655152    
## B5_R4A3       0.868078   0.769201   1.129 0.260302    
## B5_R4B5      -0.329198   0.481682  -0.683 0.495042    
## B5_R4B6      -0.156853   0.728321  -0.215 0.829682    
## B5_R4C2      -0.388623   0.356896  -1.089 0.277375    
## B5_R4C3       0.100041   0.687805   0.145 0.884487    
## B5_R4D5      -0.344334   0.684534  -0.503 0.615447    
## B5_R4D6       0.317279   0.922500   0.344 0.731220    
## B5_R4E2       0.228485   0.751585   0.304 0.761408    
## B5_R4E3      -0.510138   0.569591  -0.896 0.371421    
## B5_R5A12      0.405164   0.387581   1.045 0.296986    
## B5_R5A24     -0.434233   0.929833  -0.467 0.640955    
## B5_R5A32     -0.470334   0.243490  -1.932 0.054672 .  
## B5_R5A44     -0.426136   0.203458  -2.094 0.037347 *  
## B5_R5B2       0.299422   0.919030   0.326 0.744880    
## B5_R5B3      -0.572267   0.226606  -2.525 0.012252 *  
## B5_R5B4      -0.005104   0.404623  -0.013 0.989947    
## B5_R20_KAT1  -0.528346   0.401648  -1.315 0.189711    
## B5_R20_KAT2  -0.738274   0.719214  -1.027 0.305767    
## B5_R20_KAT3  -0.067786   0.417976  -0.162 0.871313    
## B5_R20_KAT4  13.026193   0.699179  18.631  < 2e-16 ***
## B5_R20_KAT5  -1.121194   1.407063  -0.797 0.426396    
## B5_R20_KAT6  -0.127141   0.434090  -0.293 0.769877    
## B5_R20_KAT7   0.684682   0.438596   1.561 0.119924    
## B5_R20_KAT8   0.340434   0.544026   0.626 0.532107    
## B5_R20_KAT9   0.091039   0.402740   0.226 0.821370    
## B5_R20_KAT10 -0.135003   0.640887  -0.211 0.833353    
## B5_R20_KAT11  2.089733   0.970260   2.154 0.032329 *  
## B5_R20_KAT12 -0.066634   0.859060  -0.078 0.938243    
## B5_R20_KAT13  1.244841   0.877510   1.419 0.157410    
## B5_R20_KAT14  2.948656   0.793884   3.714 0.000258 ***
## B5_R20_KAT15  1.433187   0.543543   2.637 0.008959 ** 
## B5_R20_KAT16  3.016341   0.672583   4.485 1.17e-05 ***
## B5_R20_KAT17  0.024441   0.436027   0.056 0.955349    
## KODE_KAB02    1.255187   0.293079   4.283 2.74e-05 ***
## KODE_KAB03    0.907783   0.402088   2.258 0.024934 *  
## KODE_KAB04    2.324631   0.368227   6.313 1.46e-09 ***
## KODE_KAB71    2.037263   0.370945   5.492 1.08e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9645859)
## 
## Number of Fisher Scoring iterations: 13
predict = predict(fit.logit4, newdata = testing, type = "response")
summary(predict)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1687  0.8220  0.9177  0.8616  0.9608  1.0000
predict <- cut(predict, breaks = c(0, 0.5, 1), labels = c(1, 2), right = TRUE)
log_conf <- confusionMatrix(predict, testing$STATUS)
## Warning in confusionMatrix.default(predict, testing$STATUS): Levels are not in
## the same order for reference and data. Refactoring data to match.
log_conf 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    1    2    3    4
##          1   25   24    0    0
##          2  150 1105    0    0
##          3    0    0    0    0
##          4    0    0    0    0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.8666          
##                  95% CI : (0.8469, 0.8846)
##     No Information Rate : 0.8658          
##     P-Value [Acc > NIR] : 0.4877          
##                                           
##                   Kappa : 0.1748          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity           0.14286   0.9787       NA       NA
## Specificity           0.97874   0.1429        1        1
## Pos Pred Value        0.51020   0.8805       NA       NA
## Neg Pred Value        0.88048   0.5102       NA       NA
## Prevalence            0.13420   0.8658        0        0
## Detection Rate        0.01917   0.8474        0        0
## Detection Prevalence  0.03758   0.9624        0        0
## Balanced Accuracy     0.56080   0.5608       NA       NA
STATUS.p = predict(fit.logit4, newdata = susenas_diy.kota, type = "response")
susenas_diy.kota['STATUS.p'] <- cut(STATUS.p, breaks = c(0, 0.5, 1), labels = c(1, 2), right = TRUE)

STATUS = predict(fit.logit4, newdata = sakernas_diy.kota, type = "response")
sakernas_diy.kota['STATUS'] <- cut(STATUS, breaks = c(0, 0.5, 1), labels = c(1, 2), right = TRUE)
summary(sakernas_diy.kota$STATUS)
##    1    2 
##  194 6694
susenas_diy.desa = filter(susenas_diy.15, KLASIFIKAS == "2")
sakernas_diy.desa = filter(sakernas_diy, KLASIFIKAS == "2")

set.seed(123)
Train <- createDataPartition(susenas_diy.desa$STATUS, p=0.8, list=FALSE)
## Warning in createDataPartition(susenas_diy.desa$STATUS, p = 0.8, list = FALSE):
## Some classes have no records ( 1, 2 ) and these will be ignored
training <- susenas_diy.desa[Train, ]
testing <- susenas_diy.desa[-Train, ]

options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = training)
fit.logit5<-svyglm(STATUS~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9
                   +B4_K10+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E
                   +B5_R5A1+B5_R5A2+B5_R5A3+B5_R5A4+B5_R5B
                   +B5_R20_KAT+KODE_KAB,design=des, family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.logit5)
## 
## Call:
## svyglm(formula = STATUS ~ B2_R1 + B2_R2 + B4_K3 + B4_K6 + B4_K8 + 
##     B4_K9 + B4_K10 + B5_R4A + B5_R4B + B5_R4C + B5_R4D + B5_R4E + 
##     B5_R5A1 + B5_R5A2 + B5_R5A3 + B5_R5A4 + B5_R5B + B5_R20_KAT + 
##     KODE_KAB, design = des, family = binomial)
## 
## Survey design:
## svydesign(ids = ~PSU + SSU, strata = ~STRATA, weights = ~FWT, 
##     data = training)
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   18.235539   1.459670  12.493  < 2e-16 ***
## B2_R1         -0.829438   0.204725  -4.051 0.000128 ***
## B2_R2          0.705743   0.224626   3.142 0.002449 ** 
## B4_K32        -0.247133   0.170117  -1.453 0.150706    
## B4_K33        -0.924694   0.299460  -3.088 0.002876 ** 
## B4_K34        -1.937210   1.205029  -1.608 0.112360    
## B4_K35        -0.324733   0.391377  -0.830 0.409478    
## B4_K36        -1.090371   0.975043  -1.118 0.267217    
## B4_K37         0.239706   0.337932   0.709 0.480442    
## B4_K38        14.364491   1.044112  13.758  < 2e-16 ***
## B4_K39         0.667790   0.706966   0.945 0.348074    
## B4_K62        -0.058166   0.164014  -0.355 0.723912    
## B4_K8         -0.026497   0.009006  -2.942 0.004401 ** 
## B4_K92         0.174222   0.834748   0.209 0.835270    
## B4_K93         0.255258   0.201141   1.269 0.208569    
## B4_K102        0.322248   0.279708   1.152 0.253149    
## B4_K103        0.119796   0.423994   0.283 0.778349    
## B4_K104        0.785502   0.428218   1.834 0.070792 .  
## B5_R4A2       -0.600929   0.384483  -1.563 0.122510    
## B5_R4A3       -0.264515   0.682005  -0.388 0.699288    
## B5_R4B5        0.343253   0.272278   1.261 0.211555    
## B5_R4B6        0.210564   0.541404   0.389 0.698499    
## B5_R4C2       -0.577295   0.376150  -1.535 0.129290    
## B5_R4C3       -0.066735   0.518128  -0.129 0.897881    
## B5_R4D5       -0.812484   0.442267  -1.837 0.070381 .  
## B5_R4D6       -0.124554   0.824338  -0.151 0.880329    
## B5_R4E2        0.237436   0.462851   0.513 0.609554    
## B5_R4E3       -0.588883   0.685199  -0.859 0.392993    
## B5_R5A12      -0.641985   0.616278  -1.042 0.301079    
## B5_R5A24     -14.326132   1.289851 -11.107  < 2e-16 ***
## B5_R5A32      -0.320691   0.221957  -1.445 0.152903    
## B5_R5A44      -0.127149   0.249065  -0.511 0.611283    
## B5_R5B2      -13.379123   1.099144 -12.172  < 2e-16 ***
## B5_R5B3        0.036579   0.289677   0.126 0.899872    
## B5_R5B4        0.289112   0.485626   0.595 0.553510    
## B5_R20_KAT1   -0.508441   0.582730  -0.873 0.385868    
## B5_R20_KAT2   -0.636529   1.204035  -0.529 0.598688    
## B5_R20_KAT3   -0.391955   0.638181  -0.614 0.541062    
## B5_R20_KAT4   14.514785   0.615101  23.597  < 2e-16 ***
## B5_R20_KAT5   14.032311   0.934119  15.022  < 2e-16 ***
## B5_R20_KAT6   -0.803711   0.605477  -1.327 0.188629    
## B5_R20_KAT7    0.261732   0.639898   0.409 0.683756    
## B5_R20_KAT8    1.720384   1.103570   1.559 0.123460    
## B5_R20_KAT9    0.189941   0.733301   0.259 0.796368    
## B5_R20_KAT10  15.010992   0.984279  15.251  < 2e-16 ***
## B5_R20_KAT11  14.579612   0.631125  23.101  < 2e-16 ***
## B5_R20_KAT13   0.612577   1.309319   0.468 0.641318    
## B5_R20_KAT14   0.645285   0.759990   0.849 0.398696    
## B5_R20_KAT15   2.066161   0.946061   2.184 0.032270 *  
## B5_R20_KAT16  -0.436580   0.892280  -0.489 0.626147    
## B5_R20_KAT17   0.294276   0.702418   0.419 0.676520    
## KODE_KAB02     3.962015   0.662200   5.983 8.07e-08 ***
## KODE_KAB03     0.108348   0.240660   0.450 0.653931    
## KODE_KAB04     1.914012   0.693851   2.759 0.007379 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.054858)
## 
## Number of Fisher Scoring iterations: 15
predict = predict(fit.logit5, newdata = testing, type = "response")
summary(predict)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2295  0.8191  0.8788  0.8579  0.9299  1.0000
predict <- cut(predict, breaks = c(0, 0.5, 1), labels = c(3, 4), right = TRUE)
log_conf <- confusionMatrix(predict, testing$STATUS)
## Warning in confusionMatrix.default(predict, testing$STATUS): Levels are not in
## the same order for reference and data. Refactoring data to match.
log_conf 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   2   3   4
##          1   0   0   0   0
##          2   0   0   0   0
##          3   0   0   4   3
##          4   0   0  99 585
## 
## Overall Statistics
##                                          
##                Accuracy : 0.8524         
##                  95% CI : (0.8237, 0.878)
##     No Information Rate : 0.8509         
##     P-Value [Acc > NIR] : 0.4837         
##                                          
##                   Kappa : 0.0548         
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity                NA       NA 0.038835  0.99490
## Specificity                 1        1 0.994898  0.03883
## Pos Pred Value             NA       NA 0.571429  0.85526
## Neg Pred Value             NA       NA 0.855263  0.57143
## Prevalence                  0        0 0.149059  0.85094
## Detection Rate              0        0 0.005789  0.84660
## Detection Prevalence        0        0 0.010130  0.98987
## Balanced Accuracy          NA       NA 0.516866  0.51687
STATUS.p = predict(fit.logit5, newdata = susenas_diy.desa, type = "response")
susenas_diy.desa['STATUS.p'] <- cut(STATUS.p, breaks = c(0, 0.5, 1), labels = c(3, 4), right = TRUE)

STATUS = predict(fit.logit5, newdata = sakernas_diy.desa, type = "response")
sakernas_diy.desa['STATUS'] <- cut(STATUS, breaks = c(0, 0.5, 1), labels = c(3, 4), right = TRUE)
summary(sakernas_diy.desa$STATUS)
##    3    4 
##   44 3358
set.seed(123)
ctrl <- trainControl(method = "repeatedcv", number = 2000, savePredictions = TRUE)

mod_fit1 <- train(KAPITA~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9+B4_K10
                  +B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1+B5_R5A2
                  +B5_R5A3+B5_R5A4+B5_R5B+B5_R1A+B5_R20_KAT+B5_R24A
                  +KLASIFIKAS+KODE_KAB,
                  data=susenas_diy.15, method="knn", 
                  trControl = ctrl, tuneLength = 10, weights=FWT)

print(mod_fit1)
## k-Nearest Neighbors 
## 
## 9979 samples
##   22 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (2000 fold, repeated 1 times) 
## Summary of sample sizes: 9974, 9973, 9974, 9974, 9975, 9974, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE       Rsquared   MAE     
##    5  1031754.1  0.4496178  751599.0
##    7  1012689.8  0.4520713  742475.5
##    9  1001588.1  0.4587419  737252.4
##   11   996931.9  0.4563559  736043.5
##   13   994161.6  0.4509211  735480.2
##   15   993517.0  0.4525171  736946.0
##   17   993472.1  0.4510652  738072.7
##   19   992480.8  0.4494446  738894.3
##   21   989948.8  0.4508724  737789.7
##   23   989888.7  0.4526561  738197.0
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 23.
susenas_diy.15['KAPITA.p'] = predict(mod_fit1, newdata = susenas_diy.15)

sakernas_diy['KAPITA'] = predict(mod_fit1, newdata = sakernas_diy)
summary(sakernas_diy$KAPITA)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  508897  948432 1207362 1368531 1568291 5009934
summary(susenas_diy.15$KAPITA)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   158071   563358   905111  1386516  1603787 40199452
susenas_diy.156=filter(susenas_diy.15, B5_R24A %in% c("1", "5"))
sakernas_diy.156=filter(sakernas_diy, B5_R24A %in% c("1", "5"))
set.seed(123)
ctrl <- trainControl(method = "repeatedcv", number = 400, savePredictions = TRUE)

mod_fit1 <- train(B5_R28A1~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9+B4_K10
                  +B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1+B5_R5A2
                  +B5_R5A3+B5_R5A4+B5_R5B+B5_R1A+B5_R20_KAT+B5_R24A
                  +KLASIFIKAS+KODE_KAB, data=sakernas_diy.156, 
                  method="knn", trControl = ctrl, tuneLength = 10, 
                  weights =w.adj2_20)

print(mod_fit1)
## k-Nearest Neighbors 
## 
## 2082 samples
##   22 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (400 fold, repeated 1 times) 
## Summary of sample sizes: 2076, 2078, 2076, 2076, 2077, 2077, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE      Rsquared   MAE     
##    5  7.443666  0.3760366  6.043422
##    7  7.320277  0.3870404  5.945774
##    9  7.295401  0.3908491  5.931135
##   11  7.290796  0.3967244  5.941500
##   13  7.294517  0.3911463  5.944746
##   15  7.332118  0.3831646  5.979311
##   17  7.342985  0.3883037  5.985700
##   19  7.349223  0.3890382  6.000590
##   21  7.371387  0.3929281  6.021109
##   23  7.350922  0.3969477  5.990244
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 11.
sakernas_diy.156['B5_R28A1.p'] = predict(mod_fit1, newdata = sakernas_diy.156)

susenas_diy.156['B5_R28A1'] = predict(mod_fit1, newdata = susenas_diy.156)
summary(susenas_diy.156$B5_R28A1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.8701 16.7953 18.9251 18.6188 20.7603 26.8719
set.seed(123)
ctrl <- trainControl(method = "repeatedcv", number = 400, savePredictions = TRUE)

mod_fit1 <- train(B5_R28B1~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9+B4_K10
                  +B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1+B5_R5A2
                  +B5_R5A3+B5_R5A4+B5_R5B+B5_R1A+B5_R20_KAT+B5_R24A+KLASIFIKAS
                  +B5_R28A1+KODE_KAB, data=sakernas_diy.156, 
                  method="knn", trControl = ctrl, tuneLength = 10, 
                  weights =w.adj2_20)

print(mod_fit1)
## k-Nearest Neighbors 
## 
## 2082 samples
##   23 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (400 fold, repeated 1 times) 
## Summary of sample sizes: 2077, 2078, 2077, 2076, 2077, 2077, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE      Rsquared   MAE     
##    5  768473.8  0.4704571  585101.4
##    7  754054.5  0.4764575  577112.1
##    9  743955.9  0.4859606  570928.4
##   11  735271.0  0.4892157  564705.9
##   13  728998.8  0.4930031  560505.8
##   15  726070.6  0.4903515  560330.7
##   17  724023.1  0.4871913  558779.3
##   19  722610.2  0.4850262  557991.5
##   21  721563.7  0.4799188  557865.3
##   23  720596.1  0.4792577  557637.8
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 23.
sakernas_diy.156['B5_R28B1.p'] = predict(mod_fit1, newdata = sakernas_diy.156)

susenas_diy.156['B5_R28B1'] = predict(mod_fit1, newdata = susenas_diy.156)
summary(susenas_diy.156$B5_R28B1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0 1280408 1485317 1476898 1703913 2323913
set.seed(123)
ctrl <- trainControl(method = "repeatedcv", number = 400, savePredictions = TRUE)

mod_fit1 <- train(B5_R28B2~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9+B4_K10
                  +B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1+B5_R5A2
                  +B5_R5A3+B5_R5A4+B5_R5B+B5_R1A+B5_R20_KAT+KLASIFIKAS
                  +B5_R24A+B5_R28A1+KODE_KAB, data=sakernas_diy.156, 
                  method="knn", trControl = ctrl, tuneLength = 10, 
                  weights =w.adj2_20)

print(mod_fit1)
## k-Nearest Neighbors 
## 
## 2082 samples
##   23 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (400 fold, repeated 1 times) 
## Summary of sample sizes: 2076, 2077, 2077, 2077, 2077, 2076, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE      Rsquared   MAE     
##    5  91934.54  0.6476582  52631.24
##    7  90538.27  0.6399362  52653.09
##    9  89992.41  0.6279917  52571.47
##   11  88385.26  0.6310466  51909.29
##   13  88449.67  0.6255395  52274.13
##   15  89328.68  0.6181131  53098.43
##   17  90732.74  0.6075716  54165.53
##   19  90349.79  0.6094265  54084.69
##   21  90463.69  0.6060394  54472.23
##   23  91340.57  0.6048124  55012.87
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 11.
sakernas_diy.156['B5_R28B2.p'] = predict(mod_fit1, newdata = sakernas_diy.156)

susenas_diy.156['B5_R28B2'] = predict(mod_fit1, newdata = susenas_diy.156)
summary(susenas_diy.156$B5_R28B2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0   12727   46364   87629   85568  903636
susenas_diy.buruh=filter(susenas_diy, B5_R24A %in% c("4"))
sakernas_diy.buruh=filter(sakernas_diy, B5_R24A %in% c("4"))
set.seed(123)
ctrl <- trainControl(method = "repeatedcv", number = 600, savePredictions = TRUE)

mod_fit1 <- train(B5_R28C1~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9+B4_K10
                  +B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1+B5_R5A2
                  +B5_R5A3+B5_R5A4+B5_R5B+B5_R1A+B5_R20_KAT+KLASIFIKAS
                  +B5_R24A+KODE_KAB,
                  data=sakernas_diy.buruh, method="knn", trControl = ctrl, 
                  tuneLength = 10, weights =w.adj2_20)

print(mod_fit1)
## k-Nearest Neighbors 
## 
## 2946 samples
##   22 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (600 fold, repeated 1 times) 
## Summary of sample sizes: 2941, 2941, 2941, 2941, 2942, 2942, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE     Rsquared   MAE    
##    5  1395727  0.4444137  1037988
##    7  1379620  0.4493956  1037267
##    9  1380755  0.4385240  1039507
##   11  1390968  0.4305453  1047196
##   13  1394145  0.4321954  1049331
##   15  1399060  0.4234003  1054775
##   17  1399670  0.4247791  1057048
##   19  1401296  0.4268511  1059138
##   21  1404161  0.4228207  1062757
##   23  1409258  0.4204803  1067165
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 7.
sakernas_diy.buruh['B5_R28C1.p'] = predict(mod_fit1, newdata = sakernas_diy.buruh)

susenas_diy.buruh['B5_R28C1'] = predict(mod_fit1, newdata = susenas_diy.buruh)
summary(susenas_diy.buruh$B5_R28C1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  917857 1712727 2122857 2374186 2764000 8455000
set.seed(123)
ctrl <- trainControl(method = "repeatedcv", number = 600, savePredictions = TRUE)

mod_fit1 <- train(B5_R28C2~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9+B4_K10
                  +B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1+B5_R5A2
                  +B5_R5A3+B5_R5A4+B5_R5B+B5_R1A+B5_R20_KAT+KLASIFIKAS
                  +B5_R24A+KODE_KAB,
                  data=sakernas_diy.buruh, method="knn", trControl = ctrl, 
                  tuneLength = 10, weights =w.adj2_20)

print(mod_fit1)
## k-Nearest Neighbors 
## 
## 2946 samples
##   22 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (600 fold, repeated 1 times) 
## Summary of sample sizes: 2941, 2941, 2941, 2941, 2941, 2941, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE       Rsquared   MAE     
##    5  104853.35  0.3877252  68262.10
##    7  101066.54  0.3969221  66894.55
##    9   99354.68  0.4006963  66052.59
##   11   98401.48  0.4135759  65527.00
##   13   98107.51  0.4028811  65514.60
##   15   97404.81  0.4072118  65174.18
##   17   96579.75  0.4085570  64684.63
##   19   96162.67  0.4151930  64612.54
##   21   96367.14  0.4070182  64750.60
##   23   96331.84  0.4057632  64888.07
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 19.
sakernas_diy.buruh['B5_R28C2.p'] = predict(mod_fit1, newdata = sakernas_diy.buruh)

susenas_diy.buruh['B5_R28C2'] = predict(mod_fit1, newdata = susenas_diy.buruh)
summary(susenas_diy.buruh$B5_R28C2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3044   41118   58519   71078   81440  475000

#TUJUAN 3

  1. Variabel Susenas yang tidak dikumpulkan di sakernas

#KAPITA

options(survey.lonely.psu = "adjust")
des1 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy)
svymean(~KAPITA, des1)
##           mean    SE
## KAPITA 1397334 12685
options(survey.lonely.psu = "adjust")
des2 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.15)
svymean(~KAPITA.p, des2)
##             mean    SE
## KAPITA.p 1406226 14230
svymean(~KAPITA, des2)
##           mean    SE
## KAPITA 1476320 42204
mns = svymean(~KAPITA+KAPITA.p, des2)
vcov(mns)
##              KAPITA  KAPITA.p
## KAPITA   1781195124 372687722
## KAPITA.p  372687722 202497609
var = 1781195124
akar_var = sqrt(var)
akar_var
## [1] 42204.21
var_xA = 12685^2
var_xB = 14230^2
myu_xA = 1397334
myu_xB = 1406226

myu_B = 1476320
var_B = 42204^2

cov = 372687722

(alpha = var_xB / (var_xA+var_xB))
## [1] 0.5572144
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 1401271
myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 1467201
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 1095246711
var_B
## [1] 1781177616

#STATUS PENDUDUK MISKIN DESA

options(survey.lonely.psu = "adjust")
des3 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy.desa)
svymean(~factor(STATUS), des3)
##                     mean     SE
## factor(STATUS)3 0.012336 0.0027
## factor(STATUS)4 0.987664 0.0027
options(survey.lonely.psu = "adjust")
des4 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.desa)
svymean(~factor(STATUS.p), des4)
##                       mean     SE
## factor(STATUS.p)3 0.011843 0.0026
## factor(STATUS.p)4 0.988157 0.0026
svymean(~factor(STATUS), des4)
##                    mean     SE
## factor(STATUS)3 0.13589 0.0141
## factor(STATUS)4 0.86411 0.0141
mns1 = svymean(~STATUS+ STATUS.p, des4)
vcov(mns1)
##           STATUS1 STATUS2       STATUS3       STATUS4     STATUS.p3
## STATUS1         0       0  0.000000e+00  0.000000e+00  0.000000e+00
## STATUS2         0       0  0.000000e+00  0.000000e+00  0.000000e+00
## STATUS3         0       0  1.999794e-04 -1.999794e-04  8.817654e-06
## STATUS4         0       0 -1.999794e-04  1.999794e-04 -8.817654e-06
## STATUS.p3       0       0  8.817654e-06 -8.817654e-06  6.983914e-06
## STATUS.p4       0       0 -8.817654e-06  8.817654e-06 -6.983914e-06
##               STATUS.p4
## STATUS1    0.000000e+00
## STATUS2    0.000000e+00
## STATUS3   -8.817654e-06
## STATUS4    8.817654e-06
## STATUS.p3 -6.983914e-06
## STATUS.p4  6.983914e-06

#Kategori 3

var_xA = 0.0027^2
var_xB = 0.0026^2
myu_xA = 0.012336
myu_xB = 0.011843

myu_B = 0.13589
var_B = 0.0141^2

(alpha = var_xB / (var_xA+var_xB))
## [1] 0.4811388
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 0.0120802
cov = 8.817654e-06

myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 0.1361994
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 0.0001873084
var_B
## [1] 0.00019881

#kategori 4

var_xA = 0.0027^2
var_xB = 0.0026^2
myu_xA = 0.987664
myu_xB = 0.988157

myu_B = 0.86411
var_B = 0.0141^2

(alpha = var_xB / (var_xA+var_xB))
## [1] 0.4811388
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 0.9879198
cov = 8.817654e-06

myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 0.8638006
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 0.0001873084
var_B
## [1] 0.00019881

#STATUS PENDUDUK MISKIN KOTA

options(survey.lonely.psu = "adjust")
des5 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy.kota)
svymean(~factor(STATUS), des5)
##                     mean     SE
## factor(STATUS)1 0.019086 0.0019
## factor(STATUS)2 0.980914 0.0019
options(survey.lonely.psu = "adjust")
des6 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.kota)
svymean(~factor(STATUS.p), des6)
##                       mean     SE
## factor(STATUS.p)1 0.018727 0.0022
## factor(STATUS.p)2 0.981273 0.0022
svymean(~factor(STATUS), des6)
##                    mean     SE
## factor(STATUS)1 0.10982 0.0094
## factor(STATUS)2 0.89018 0.0094
mns2 = svymean(~STATUS+ STATUS.p, des6)
vcov(mns2)
##                 STATUS1       STATUS2 STATUS3 STATUS4     STATUS.p1
## STATUS1    8.765963e-05 -8.765963e-05       0       0  5.500767e-06
## STATUS2   -8.765963e-05  8.765963e-05       0       0 -5.500767e-06
## STATUS3    0.000000e+00  0.000000e+00       0       0  0.000000e+00
## STATUS4    0.000000e+00  0.000000e+00       0       0  0.000000e+00
## STATUS.p1  5.500767e-06 -5.500767e-06       0       0  4.718018e-06
## STATUS.p2 -5.500767e-06  5.500767e-06       0       0 -4.718018e-06
##               STATUS.p2
## STATUS1   -5.500767e-06
## STATUS2    5.500767e-06
## STATUS3    0.000000e+00
## STATUS4    0.000000e+00
## STATUS.p1 -4.718018e-06
## STATUS.p2  4.718018e-06

#kategori 1

var_xA = 0.0019^2
var_xB = 0.0022^2
myu_xA = 0.019086
myu_xB = 0.018727

myu_B = 0.10982
var_B = 0.0094^2

(alpha = var_xB / (var_xA+var_xB))
## [1] 0.5727811
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 0.01893263
cov = 5.500767e-06

myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 0.1100537
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 8.210826e-05
var_B
## [1] 8.836e-05

#kategori 2

var_xA = 0.0019^2
var_xB = 0.0022^2
myu_xA = 0.980914
myu_xB = 0.981273

myu_B = 0.89018
var_B = 0.0094^2

(alpha = var_xB / (var_xA+var_xB))
## [1] 0.5727811
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 0.9810674
cov = 5.500767e-06

myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 0.8899463
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 8.210826e-05
var_B
## [1] 8.836e-05
  1. variabel sakernas yang tidak dikumpulkan di susenas

#B5_R17A

sakernas_diy$B5_R17A[sakernas_diy$B5_R17A=="1"] = "0"
sakernas_diy$B5_R17A[sakernas_diy$B5_R17A=="2"] = "0"
sakernas_diy$B5_R17A[sakernas_diy$B5_R17A=="3"] = "0"

sakernas_diy$B5_R17A.p[sakernas_diy$B5_R17A.p=="1"] = "0"
sakernas_diy$B5_R17A.p[sakernas_diy$B5_R17A.p=="2"] = "0"
sakernas_diy$B5_R17A.p[sakernas_diy$B5_R17A.p=="3"] = "0"

susenas_diy.15$B5_R17A[susenas_diy.15$B5_R17A=="1"] = "0"
susenas_diy.15$B5_R17A[susenas_diy.15$B5_R17A=="2"] = "0"
susenas_diy.15$B5_R17A[susenas_diy.15$B5_R17A=="3"] = "0"

table(sakernas_diy$B5_R17A)
## 
##     0     1     2     3     4 
##   161     0     0     0 10129
table(sakernas_diy$B5_R17A.p)
## 
##     0     1     2     3     4 
##   157     0     0     0 10133
table(susenas_diy.15$B5_R17A)
## 
##    0    1    2    3    4 
##   32    0    0    0 9947
options(survey.lonely.psu = "adjust")
des7 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.15)
svymean(~factor(B5_R17A), des7)
##                       mean    SE
## factor(B5_R17A)0 0.0027984 6e-04
## factor(B5_R17A)4 0.9972016 6e-04
options(survey.lonely.psu = "adjust")
des1 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy)
svymean(~factor(B5_R17A.p), des1)
##                        mean     SE
## factor(B5_R17A.p)0 0.016239 0.0016
## factor(B5_R17A.p)4 0.983761 0.0016
svymean(~factor(B5_R17A), des1)
##                      mean     SE
## factor(B5_R17A)0 0.016747 0.0016
## factor(B5_R17A)4 0.983253 0.0016
mns3 = svymean(~B5_R17A+B5_R17A.p, des1)
vcov(mns3)
##                 B5_R17A0 B5_R17A1 B5_R17A2 B5_R17A3      B5_R17A4    B5_R17A.p0
## B5_R17A0    2.721615e-06        0        0        0 -2.721615e-06  2.568321e-06
## B5_R17A1    0.000000e+00        0        0        0  0.000000e+00  0.000000e+00
## B5_R17A2    0.000000e+00        0        0        0  0.000000e+00  0.000000e+00
## B5_R17A3    0.000000e+00        0        0        0  0.000000e+00  0.000000e+00
## B5_R17A4   -2.721615e-06        0        0        0  2.721615e-06 -2.568321e-06
## B5_R17A.p0  2.568321e-06        0        0        0 -2.568321e-06  2.481277e-06
## B5_R17A.p1  0.000000e+00        0        0        0  0.000000e+00  0.000000e+00
## B5_R17A.p2  0.000000e+00        0        0        0  0.000000e+00  0.000000e+00
## B5_R17A.p3  0.000000e+00        0        0        0  0.000000e+00  0.000000e+00
## B5_R17A.p4 -2.568321e-06        0        0        0  2.568321e-06 -2.481277e-06
##            B5_R17A.p1 B5_R17A.p2 B5_R17A.p3    B5_R17A.p4
## B5_R17A0            0          0          0 -2.568321e-06
## B5_R17A1            0          0          0  0.000000e+00
## B5_R17A2            0          0          0  0.000000e+00
## B5_R17A3            0          0          0  0.000000e+00
## B5_R17A4            0          0          0  2.568321e-06
## B5_R17A.p0          0          0          0 -2.481277e-06
## B5_R17A.p1          0          0          0  0.000000e+00
## B5_R17A.p2          0          0          0  0.000000e+00
## B5_R17A.p3          0          0          0  0.000000e+00
## B5_R17A.p4          0          0          0  2.481277e-06

#kategori 0

var_xA = 7e-04^2
var_xB = 0.0016^2
myu_xA = 0.0043247
myu_xB = 0.016239

myu_B = 0.016747
var_B = 0.0016^2

(alpha = var_xB / (var_xA+var_xB))
## [1] 0.8393443
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 0.006238801
cov = 2.568321e-06

myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 0.006714296
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] -1.666905e-08
var_B
## [1] 2.56e-06

#B5_R28B1

options(survey.lonely.psu = "adjust")
des8 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.156)
svymean(~B5_R28B1, des8)
##             mean     SE
## B5_R28B1 1506428 9090.1
options(survey.lonely.psu = "adjust")
des9 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy.156)
svymean(~B5_R28B1.p, des9)
##               mean    SE
## B5_R28B1.p 1247855 20338
svymean(~B5_R28B1, des9)
##             mean    SE
## B5_R28B1 1260581 33776
mns4 = svymean(~B5_R28B1+ B5_R28B1.p, des9)
vcov(mns4)
##              B5_R28B1 B5_R28B1.p
## B5_R28B1   1140844964  459001413
## B5_R28B1.p  459001413  413615124
var_xA = 9090.1^2
var_xB = 20338^2
myu_xA = 1506428
myu_xB = 1247855

myu_B = 1260581
var_B = 33776^2

(alpha = var_xB / (var_xA+var_xB))
## [1] 0.8334961
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 1463375
cov = 459001413

myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 1499739
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 631473749
var_B
## [1] 1140818176
options(survey.lonely.psu = "adjust")
des8 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.156)
svymean(~B5_R28B2, des8)
##           mean     SE
## B5_R28B2 94342 4513.7
options(survey.lonely.psu = "adjust")
des9 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy.156)
svymean(~B5_R28B2.p, des9)
##             mean     SE
## B5_R28B2.p 73277 3681.1
svymean(~B5_R28B2, des9)
##           mean     SE
## B5_R28B2 73112 7406.8
mns5 = svymean(~B5_R28B2+B5_R28B2.p, des9)
vcov(mns5)
##            B5_R28B2 B5_R28B2.p
## B5_R28B2   54860156   17669381
## B5_R28B2.p 17669381   13550469

#B5_R28B2

var_xA = 4513.7^2
var_xB = 3681.1^2
myu_xA = 94342
myu_xB = 73277

myu_B = 73112
var_B = 7406.8^2

(alpha = var_xB / (var_xA+var_xB))
## [1] 0.3994371
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 81691.14
cov = 17669381

myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 84083.75
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 31820423
var_B
## [1] 54860686

#B5_R28C1

options(survey.lonely.psu = "adjust")
des10 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.buruh)
svymean(~B5_R28C1, des10)
##             mean    SE
## B5_R28C1 2351402 23681
options(survey.lonely.psu = "adjust")
des11 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy.buruh)
svymean(~B5_R28C1.p, des11)
##               mean    SE
## B5_R28C1.p 2323908 20448
svymean(~B5_R28C1, des11)
##             mean    SE
## B5_R28C1 2288719 47277
mns6 = svymean(~B5_R28C1+B5_R28C1.p, des11)
vcov(mns6)
##              B5_R28C1 B5_R28C1.p
## B5_R28C1   2235103759  705673933
## B5_R28C1.p  705673933  418138926
var_xA = 23681^2
var_xB = 20448^2
myu_xA = 2351402
myu_xB = 2323908

myu_B = 2288719
var_B = 47277^2

(alpha = var_xB / (var_xA+var_xB))
## [1] 0.4271286
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 2335651
cov = 705673933

myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 2308539
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 1044129219
var_B
## [1] 2235114729

#B5_R28C2

options(survey.lonely.psu = "adjust")
des10 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.buruh)
svymean(~B5_R28C2, des10)
##           mean     SE
## B5_R28C2 70033 1008.6
options(survey.lonely.psu = "adjust")
des11 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy.buruh)
svymean(~B5_R28C2.p, des11)
##             mean     SE
## B5_R28C2.p 67532 921.58
svymean(~B5_R28C2, des11)
##           mean     SE
## B5_R28C2 71108 6621.3
mns7 = svymean(~B5_R28C2+B5_R28C2.p, des11)
vcov(mns7)
##            B5_R28C2 B5_R28C2.p
## B5_R28C2   43842182  2372823.3
## B5_R28C2.p  2372823   849316.1
var_xA = 1008.6^2
var_xB = 921.58^2
myu_xA = 70033
myu_xB = 67532

myu_B = 71108
var_B = 6621.3^2

(alpha = var_xB / (var_xA+var_xB))
## [1] 0.4550076
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 68669.97
cov = 2372823.3

myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 74287.3
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 37212359
var_B
## [1] 43841614

#DATA PENCACAHAN

sakernas2020_diy <- read_excel("sak0220(edit B5_R17A).xlsx")
sus1 <- read_excel("susenas 2020.xlsx", sheet = "RT")
sus2 <- read_excel("susenas 2020.xlsx", sheet = "Indo")
susenas_diy = merge(x = sus1, y = sus2, all.y = TRUE)
susenas_diy.15 <- subset(susenas_diy, R407>14)
sakernas2020_diy['KODE_KAB']=str_pad(sakernas2020_diy$KODE_KAB, width = 2, side = 'left', pad = '0')
sakernas2020_diy['NO_DSRT']=str_pad(sakernas2020_diy$NO_DSRT, width = 2, side = 'left', pad = '0')
sakernas2020_diy["psu"]=paste(sakernas2020_diy$KODE_PROV, sakernas2020_diy$KODE_KAB, sakernas2020_diy$nks_ok, sep = "")
sakernas2020_diy["ssu"]=paste(sakernas2020_diy$KODE_PROV, sakernas2020_diy$KODE_KAB, sakernas2020_diy$nks_ok, sakernas2020_diy$NO_DSRT, sep = "")
sakernas2020_diy["strata"]=paste(sakernas2020_diy$KODE_PROV, sakernas2020_diy$KODE_KAB, sakernas2020_diy$KLASIFIKAS, sep = "")

#Susenas

options(survey.lonely.psu = "adjust")
des12 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.15)
svymean(~KAPITA, des12)
##           mean    SE
## KAPITA 1476320 42204
svymean(~factor(STATUS), des12)
##                     mean     SE
## factor(STATUS)1 0.080216 0.0068
## factor(STATUS)2 0.650228 0.0095
## factor(STATUS)3 0.036630 0.0039
## factor(STATUS)4 0.232926 0.0071
sakernas2020_diy$B5_R17A[sakernas2020_diy$B5_R17A=="1"] = "0"
sakernas2020_diy$B5_R17A[sakernas2020_diy$B5_R17A=="2"] = "0"
sakernas2020_diy$B5_R17A[sakernas2020_diy$B5_R17A=="3"] = "0"
table(sakernas2020_diy$B5_R17A)
## 
##    0    4 
##   97 2280

#Sakernas

options(survey.lonely.psu = "adjust")
des13 = svydesign(ids=~psu+ssu, strata=~strata, weights=~FINAL_WEIG, data = sakernas2020_diy)
svymean(~factor(B5_R17A), des13)
##                      mean    SE
## factor(B5_R17A)0 0.043875 0.005
## factor(B5_R17A)4 0.956125 0.005
sakernas_diy.156.1=filter(sakernas2020_diy, B5_R24A %in% c("1", "5", "6"))
sakernas_diy.buruh1=filter(sakernas2020_diy, B5_R24A %in% c("4"))
options(survey.lonely.psu = "adjust")
des14 = svydesign(ids=~psu+ssu, strata=~strata, weights=~FINAL_WEIG, data = sakernas_diy.156.1)
svymean(~B5_R28B1, des14)
##            mean    SE
## B5_R28B1 945229 69781
svymean(~B5_R28B2, des14)
##           mean    SE
## B5_R28B2 48622 17634
options(survey.lonely.psu = "adjust")
des15 = svydesign(ids=~psu+ssu, strata=~strata, weights=~FINAL_WEIG, data = sakernas_diy.buruh1)
svymean(~B5_R28C1, des15)
##             mean     SE
## B5_R28C1 2337952 104460
svymean(~B5_R28C2, des15)
##            mean    SE
## B5_R28C2 104503 24344