library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(readxl)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
sakernas_diy <- read_excel("sakernas_new.xlsx")
sus1 <- read_excel("susenas 2020.xlsx", sheet = "RT")
sus2 <- read_excel("susenas 2020.xlsx", sheet = "Indo")
susenas_diy = merge(x = sus1, y = sus2, all.y = TRUE)
sakernas_diy['KODE_KAB']=str_pad(sakernas_diy$KODE_KAB, width = 2, side = 'left', pad = '0')
sakernas_diy['NO_DSRT']=str_pad(sakernas_diy$NO_DSRT, width = 2, side = 'left', pad = '0')
susenas_diy['R102']=str_pad(susenas_diy$R102, width = 2, side = 'left', pad = '0')
sakernas_diy["psu"]=paste(sakernas_diy$KODE_PROV, sakernas_diy$KODE_KAB, sakernas_diy$nks_ok, sep = "")
sakernas_diy["ssu"]=paste(sakernas_diy$KODE_PROV, sakernas_diy$KODE_KAB, sakernas_diy$nks_ok, sakernas_diy$NO_DSRT, sep = "")
sakernas_diy["strata"]=paste(sakernas_diy$KODE_PROV, sakernas_diy$KODE_KAB, sakernas_diy$KLASIFIKAS, sep = "")
sakernas_diy$B4_K3=as.factor(sakernas_diy$B4_K3)
sakernas_diy$B4_K6=as.factor(sakernas_diy$B4_K6)
sakernas_diy$B4_K9=as.factor(sakernas_diy$B4_K9)
sakernas_diy$B4_K10=as.factor(sakernas_diy$B4_K10)
sakernas_diy$B5_R1A=as.factor(sakernas_diy$B5_R1A)
sakernas_diy$B5_R4A=as.factor(sakernas_diy$B5_R4A)
sakernas_diy$B5_R4B=as.factor(sakernas_diy$B5_R4B)
sakernas_diy$B5_R4C=as.factor(sakernas_diy$B5_R4C)
sakernas_diy$B5_R4D=as.factor(sakernas_diy$B5_R4D)
sakernas_diy$B5_R4E=as.factor(sakernas_diy$B5_R4E)
sakernas_diy$B5_R5A1=as.factor(sakernas_diy$B5_R5A1)
sakernas_diy$B5_R5A2=as.factor(sakernas_diy$B5_R5A2)
sakernas_diy$B5_R5A3=as.factor(sakernas_diy$B5_R5A3)
sakernas_diy$B5_R5A4=as.factor(sakernas_diy$B5_R5A4)
sakernas_diy$B5_R5B=as.factor(sakernas_diy$B5_R5B)
sakernas_diy$B5_R20_KAT=as.factor(sakernas_diy$B5_R20_KAT)
sakernas_diy$B5_R24A=as.factor(sakernas_diy$B5_R24A)
sakernas_diy$B5_R12A=as.factor(sakernas_diy$B5_R12A)
sakernas_diy$B5_R12B=as.factor(sakernas_diy$B5_R12B)
sakernas_diy$B5_R17A=as.factor(sakernas_diy$B5_R17A)
sakernas_diy$KLASIFIKAS=as.factor(sakernas_diy$KLASIFIKAS)
sakernas_diy$KODE_KAB=as.factor(sakernas_diy$KODE_KAB)
susenas_diy$B2_R1=susenas_diy$R301
susenas_diy$B2_R2=susenas_diy$R303
susenas_diy$B4_K3=as.factor(susenas_diy$R403)
susenas_diy$B4_K6=as.factor(susenas_diy$R405)
susenas_diy$B4_K9=as.factor(susenas_diy$R612)
susenas_diy$B4_K8=susenas_diy$R407
susenas_diy$B4_K10=as.factor(susenas_diy$R404)
susenas_diy$B5_R1A=as.factor(susenas_diy$R615)
susenas_diy$B5_R4A=as.factor(susenas_diy$R1002)
susenas_diy$B5_R4B=as.factor(susenas_diy$R1003)
susenas_diy$B5_R4C=as.factor(susenas_diy$R1004)
susenas_diy$B5_R4D=as.factor(susenas_diy$R1005)
susenas_diy$B5_R4E=as.factor(susenas_diy$R1008)
susenas_diy$B5_R5A1=as.factor(susenas_diy$R702_A)
susenas_diy$B5_R5A2=as.factor(susenas_diy$R702_B)
susenas_diy$B5_R5A3=as.factor(susenas_diy$R702_C)
susenas_diy$B5_R5A4=as.factor(susenas_diy$R702_D)
susenas_diy$B5_R5B=as.factor(susenas_diy$R703)
susenas_diy$B5_R24A=as.factor(susenas_diy$R706)
susenas_diy$B5_R20_KAT=as.factor(susenas_diy$R705)
susenas_diy$KLASIFIKAS=as.factor(susenas_diy$R105)
susenas_diy$KODE_KAB=as.factor(susenas_diy$R102)
susenas_diy$STATUS.1=as.factor(susenas_diy$STATUS.1)
susenas_diy$STATUS=as.factor(susenas_diy$STATUS)
susenas_diy.15 <- subset(susenas_diy, R407>14)
df4=select(susenas_diy.15, B2_R1, B2_R2, B4_K3, B4_K6, B4_K8, B4_K9, B4_K10, B5_R4A, B5_R4B, B5_R4C, B5_R4D, B5_R4E, B5_R5A1, B5_R5A2, B5_R5A3, B5_R5A4, B5_R5B, B5_R1A, B5_R20_KAT, B5_R24A, KAPITA, RENUM, FWT, PSU, SSU, STRATA, KLASIFIKAS, KODE_KAB)
Train <- createDataPartition(sakernas_diy$B5_R12A, p=0.8, list=FALSE)
training <- sakernas_diy[Train, ]
testing <- sakernas_diy[-Train, ]
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = training)
fit.logit1 <- svyglm(B5_R12A~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9
+B4_K10+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E
+B5_R5A1+B5_R5A2+B5_R5A3+B5_R5A4+B5_R5B+B5_R1A
+B5_R24A+KLASIFIKAS+KODE_KAB,design=des,family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit.logit1)
##
## Call:
## svyglm(formula = B5_R12A ~ B2_R1 + B2_R2 + B4_K3 + B4_K6 + B4_K8 +
## B4_K9 + B4_K10 + B5_R4A + B5_R4B + B5_R4C + B5_R4D + B5_R4E +
## B5_R5A1 + B5_R5A2 + B5_R5A3 + B5_R5A4 + B5_R5B + B5_R1A +
## B5_R24A + KLASIFIKAS + KODE_KAB, design = des, family = binomial)
##
## Survey design:
## svydesign(ids = ~psu + ssu, strata = ~strata, weights = ~w.adj2_20,
## data = training)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.76121 2.04593 -0.861 0.39000
## B2_R1 -0.05890 0.27005 -0.218 0.82750
## B2_R2 0.14572 0.30614 0.476 0.63443
## B4_K32 0.65931 0.47711 1.382 0.16802
## B4_K33 -0.12009 0.43204 -0.278 0.78123
## B4_K34 -1.74989 0.73182 -2.391 0.01740 *
## B4_K35 -0.25741 0.64932 -0.396 0.69207
## B4_K36 0.81046 0.94765 0.855 0.39309
## B4_K37 14.16036 0.76062 18.617 < 2e-16 ***
## B4_K38 14.96509 0.69794 21.442 < 2e-16 ***
## B4_K39 0.54512 0.65780 0.829 0.40792
## B4_K62 0.58080 0.26079 2.227 0.02667 *
## B4_K8 0.05421 0.01208 4.489 1.01e-05 ***
## B4_K92 4.30988 1.84539 2.335 0.02016 *
## B4_K93 1.94041 1.53346 1.265 0.20669
## B4_K102 0.96788 0.45492 2.128 0.03417 *
## B4_K103 0.72343 0.81413 0.889 0.37492
## B4_K104 1.59225 1.30262 1.222 0.22252
## B5_R4A2 -1.32342 0.55473 -2.386 0.01765 *
## B5_R4A3 14.67736 0.81712 17.962 < 2e-16 ***
## B5_R4B5 15.00443 0.73482 20.419 < 2e-16 ***
## B5_R4B6 12.20458 0.67042 18.204 < 2e-16 ***
## B5_R4C2 -0.95776 1.45131 -0.660 0.50979
## B5_R4C3 15.05919 0.84692 17.781 < 2e-16 ***
## B5_R4D5 16.62732 1.47422 11.279 < 2e-16 ***
## B5_R4D6 14.48218 1.79206 8.081 1.49e-14 ***
## B5_R4E2 17.00123 0.64506 26.356 < 2e-16 ***
## B5_R4E3 15.88789 0.93537 16.986 < 2e-16 ***
## B5_R5A12 -0.82855 0.64399 -1.287 0.19921
## B5_R5A24 2.37054 1.18767 1.996 0.04682 *
## B5_R5A32 0.46766 0.28832 1.622 0.10583
## B5_R5A44 -0.08069 0.26748 -0.302 0.76311
## B5_R5B2 3.28924 1.12332 2.928 0.00367 **
## B5_R5B3 -0.48805 0.63896 -0.764 0.44556
## B5_R5B4 -2.07943 0.49652 -4.188 3.68e-05 ***
## B5_R1A10 -1.69525 1.05747 -1.603 0.10994
## B5_R1A11 -1.30100 1.05372 -1.235 0.21789
## B5_R1A12 -2.08549 1.48940 -1.400 0.16246
## B5_R1A13 -1.35112 1.22700 -1.101 0.27169
## B5_R1A14 -2.37952 1.09564 -2.172 0.03064 *
## B5_R1A15 -1.89294 1.57423 -1.202 0.23011
## B5_R1A16 12.71225 1.40673 9.037 < 2e-16 ***
## B5_R1A2 13.47854 1.11525 12.086 < 2e-16 ***
## B5_R1A3 -3.72346 2.30212 -1.617 0.10682
## B5_R1A4 -1.14354 1.07573 -1.063 0.28860
## B5_R1A5 14.73832 1.06382 13.854 < 2e-16 ***
## B5_R1A6 10.98725 1.41788 7.749 1.36e-13 ***
## B5_R1A7 -0.92652 1.09526 -0.846 0.39825
## B5_R1A8 -1.96409 1.48646 -1.321 0.18738
## B5_R1A9 15.65890 1.36727 11.453 < 2e-16 ***
## B5_R24A1 0.27051 0.59221 0.457 0.64815
## B5_R24A2 -0.18115 0.62784 -0.289 0.77314
## B5_R24A3 2.10743 1.13834 1.851 0.06508 .
## B5_R24A4 0.60381 0.56688 1.065 0.28765
## B5_R24A5 -1.05064 0.62501 -1.681 0.09378 .
## B5_R24A6 15.57728 0.50596 30.788 < 2e-16 ***
## KLASIFIKAS2 0.11932 0.44557 0.268 0.78904
## KODE_KAB02 0.08689 0.44603 0.195 0.84568
## KODE_KAB03 0.02141 0.35576 0.060 0.95204
## KODE_KAB04 0.65047 0.52249 1.245 0.21410
## KODE_KAB71 0.43449 0.52772 0.823 0.41096
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.504714)
##
## Number of Fisher Scoring iterations: 19
predict = predict(fit.logit1, newdata = testing, type = "response")
predict <- factor(ifelse(predict > 0.5, "2","1"))
log_conf <- confusionMatrix(predict, testing$B5_R12A, positive = "2")
log_conf
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2
## 1 2 0
## 2 24 2031
##
## Accuracy : 0.9883
## 95% CI : (0.9827, 0.9925)
## No Information Rate : 0.9874
## P-Value [Acc > NIR] : 0.395
##
## Kappa : 0.1413
##
## Mcnemar's Test P-Value : 2.668e-06
##
## Sensitivity : 1.00000
## Specificity : 0.07692
## Pos Pred Value : 0.98832
## Neg Pred Value : 1.00000
## Prevalence : 0.98736
## Detection Rate : 0.98736
## Detection Prevalence : 0.99903
## Balanced Accuracy : 0.53846
##
## 'Positive' Class : 2
##
B5_R12A.p = predict(fit.logit1, newdata = sakernas_diy, type = "response")
sakernas_diy['B5_R12A.p'] <- factor(ifelse(B5_R12A.p > 0.5, "2","1"))
B5_R12A = predict(fit.logit1, newdata = susenas_diy.15, type = "response")
susenas_diy.15['B5_R12A'] <- factor(ifelse(B5_R12A > 0.5, "2","1"))
summary(susenas_diy.15$B5_R12A)
## 1 2
## 25 9954
fit.logit2<-svyglm(B5_R12B~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9
+B4_K10+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E
+B5_R5A1+B5_R5A2+B5_R5A3+B5_R5A4+B5_R5B+B5_R24A
+B5_R1A+KLASIFIKAS+KODE_KAB,design=des,family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit.logit2)
##
## Call:
## svyglm(formula = B5_R12B ~ B2_R1 + B2_R2 + B4_K3 + B4_K6 + B4_K8 +
## B4_K9 + B4_K10 + B5_R4A + B5_R4B + B5_R4C + B5_R4D + B5_R4E +
## B5_R5A1 + B5_R5A2 + B5_R5A3 + B5_R5A4 + B5_R5B + B5_R24A +
## B5_R1A + KLASIFIKAS + KODE_KAB, design = des, family = binomial)
##
## Survey design:
## svydesign(ids = ~psu + ssu, strata = ~strata, weights = ~w.adj2_20,
## data = training)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.61706 2.75991 8.557 5.62e-16 ***
## B2_R1 0.10780 0.49978 0.216 0.82937
## B2_R2 -0.71107 0.51430 -1.383 0.16779
## B4_K32 4.03350 1.53734 2.624 0.00913 **
## B4_K33 5.06479 2.28008 2.221 0.02706 *
## B4_K34 22.44017 2.16915 10.345 < 2e-16 ***
## B4_K35 23.33007 1.97503 11.813 < 2e-16 ***
## B4_K36 21.96174 1.28154 17.137 < 2e-16 ***
## B4_K37 20.41685 1.98507 10.285 < 2e-16 ***
## B4_K38 20.39804 1.93493 10.542 < 2e-16 ***
## B4_K39 20.56898 1.21851 16.880 < 2e-16 ***
## B4_K62 -2.02684 1.70118 -1.191 0.23440
## B4_K8 0.06036 0.02362 2.555 0.01110 *
## B4_K92 6.28137 2.94011 2.136 0.03343 *
## B4_K93 -13.48945 1.92522 -7.007 1.55e-11 ***
## B4_K102 -0.74546 2.00813 -0.371 0.71073
## B4_K103 18.51055 2.10125 8.809 < 2e-16 ***
## B4_K104 18.14459 1.08016 16.798 < 2e-16 ***
## B5_R4A2 -2.39607 0.84988 -2.819 0.00513 **
## B5_R4A3 -8.20647 1.40510 -5.840 1.33e-08 ***
## B5_R4B5 38.07952 6.24757 6.095 3.28e-09 ***
## B5_R4B6 -5.96078 2.07738 -2.869 0.00440 **
## B5_R4C2 41.67409 3.27290 12.733 < 2e-16 ***
## B5_R4C3 17.42447 1.32528 13.148 < 2e-16 ***
## B5_R4D5 -3.46937 1.47302 -2.355 0.01914 *
## B5_R4D6 -2.57335 1.94993 -1.320 0.18791
## B5_R4E2 29.13163 3.34037 8.721 < 2e-16 ***
## B5_R4E3 11.10221 1.50963 7.354 1.76e-12 ***
## B5_R5A12 -1.10735 1.25034 -0.886 0.37650
## B5_R5A24 0.92988 0.72697 1.279 0.20183
## B5_R5A32 -0.99072 1.19554 -0.829 0.40793
## B5_R5A44 0.99454 0.75019 1.326 0.18592
## B5_R5B2 19.96179 3.48141 5.734 2.35e-08 ***
## B5_R5B3 -2.76859 1.43201 -1.933 0.05411 .
## B5_R5B4 -0.42064 2.06530 -0.204 0.83875
## B5_R24A1 0.57893 1.50643 0.384 0.70102
## B5_R24A2 -0.71083 1.01205 -0.702 0.48299
## B5_R24A3 -0.25372 1.62395 -0.156 0.87595
## B5_R24A4 -0.27250 0.91398 -0.298 0.76579
## B5_R24A5 -1.74793 1.35461 -1.290 0.19790
## B5_R24A6 17.40375 1.18794 14.650 < 2e-16 ***
## B5_R1A10 -1.71199 2.05027 -0.835 0.40436
## B5_R1A11 -1.87760 2.16467 -0.867 0.38641
## B5_R1A12 -3.93028 2.40404 -1.635 0.10310
## B5_R1A13 -1.64399 2.44297 -0.673 0.50149
## B5_R1A14 -1.60264 2.00507 -0.799 0.42474
## B5_R1A15 -1.97938 2.65901 -0.744 0.45720
## B5_R1A16 14.82713 2.79672 5.302 2.20e-07 ***
## B5_R1A2 15.27319 1.99970 7.638 2.83e-13 ***
## B5_R1A3 -14.89147 3.53679 -4.210 3.35e-05 ***
## B5_R1A4 -1.11869 1.97790 -0.566 0.57208
## B5_R1A5 17.12726 1.87026 9.158 < 2e-16 ***
## B5_R1A6 -31.81063 3.76941 -8.439 1.28e-15 ***
## B5_R1A7 -2.19247 1.96899 -1.113 0.26636
## B5_R1A8 15.91458 2.05770 7.734 1.51e-13 ***
## B5_R1A9 14.42132 2.51000 5.746 2.21e-08 ***
## KLASIFIKAS2 -1.26564 1.02595 -1.234 0.21828
## KODE_KAB02 -1.89847 1.36099 -1.395 0.16405
## KODE_KAB03 -0.20707 1.26224 -0.164 0.86980
## KODE_KAB04 -2.04780 1.35651 -1.510 0.13217
## KODE_KAB71 -1.77730 1.51499 -1.173 0.24165
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.4575021)
##
## Number of Fisher Scoring iterations: 23
predict = predict(fit.logit2, newdata = testing, type = "response")
summary(predict)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6591 0.9995 1.0000 0.9981 1.0000 1.0000
predict <- factor(ifelse(predict > 0.5, "2","1"))
log_conf <- confusionMatrix(predict, testing$B5_R12B, positive = "2")
## Warning in confusionMatrix.default(predict, testing$B5_R12B, positive = "2"):
## Levels are not in the same order for reference and data. Refactoring data to
## match.
log_conf
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2
## 1 0 0
## 2 5 2052
##
## Accuracy : 0.9976
## 95% CI : (0.9943, 0.9992)
## No Information Rate : 0.9976
## P-Value [Acc > NIR] : 0.61596
##
## Kappa : 0
##
## Mcnemar's Test P-Value : 0.07364
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.9976
## Neg Pred Value : NaN
## Prevalence : 0.9976
## Detection Rate : 0.9976
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : 2
##
B5_R12B.p = predict(fit.logit1, newdata = sakernas_diy, type = "response")
sakernas_diy['B5_R12A.p'] <- factor(ifelse(B5_R12B.p > 0.5, "2","1"))
B5_R12B = predict(fit.logit2, newdata = susenas_diy.15, type = "response")
susenas_diy.15['B5_R12B'] <- factor(ifelse(B5_R12B > 0.5, "2","1"))
summary(susenas_diy.15$B5_R12B)
## 1 2
## 7 9972
table(sakernas_diy$B5_R17A)
##
## 0 1 2 3 4
## 157 2 1 1 10129
fit.logit3<-svyglm(B5_R17A~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9+B4_K10
+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1+B5_R5A2
+B5_R5A3+B5_R5A4+B5_R5B+B5_R24A+B5_R12A+B5_R12B+B5_R1A
+KLASIFIKAS+KODE_KAB,design=des,family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit.logit3)
##
## Call:
## svyglm(formula = B5_R17A ~ B2_R1 + B2_R2 + B4_K3 + B4_K6 + B4_K8 +
## B4_K9 + B4_K10 + B5_R4A + B5_R4B + B5_R4C + B5_R4D + B5_R4E +
## B5_R5A1 + B5_R5A2 + B5_R5A3 + B5_R5A4 + B5_R5B + B5_R24A +
## B5_R12A + B5_R12B + B5_R1A + KLASIFIKAS + KODE_KAB, design = des,
## family = binomial)
##
## Survey design:
## svydesign(ids = ~psu + ssu, strata = ~strata, weights = ~w.adj2_20,
## data = training)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -79.176942 0.402925 -196.505 < 2e-16 ***
## B2_R1 0.012391 0.043415 0.285 0.77552
## B2_R2 -0.019418 0.045906 -0.423 0.67260
## B4_K32 -0.011002 0.041078 -0.268 0.78902
## B4_K33 0.016504 0.060976 0.271 0.78683
## B4_K34 0.007138 0.252903 0.028 0.97750
## B4_K35 0.024476 0.075603 0.324 0.74636
## B4_K36 0.026845 0.131872 0.204 0.83883
## B4_K37 0.024141 0.086100 0.280 0.77937
## B4_K38 -0.058360 0.292936 -0.199 0.84222
## B4_K39 0.017902 0.096201 0.186 0.85250
## B4_K62 -0.010467 0.036226 -0.289 0.77284
## B4_K8 -0.002283 0.001847 -1.236 0.21744
## B4_K92 -0.029644 0.190569 -0.156 0.87648
## B4_K93 0.001495 0.102592 0.015 0.98839
## B4_K102 0.034444 0.061631 0.559 0.57666
## B4_K103 0.032246 0.095068 0.339 0.73470
## B4_K104 0.034742 0.081626 0.426 0.67068
## B5_R4A2 -0.001104 0.089630 -0.012 0.99018
## B5_R4A3 -0.081437 0.220782 -0.369 0.71249
## B5_R4B5 0.019294 0.086676 0.223 0.82400
## B5_R4B6 -0.009308 0.198634 -0.047 0.96266
## B5_R4C2 0.002436 0.094273 0.026 0.97940
## B5_R4C3 0.009498 0.204399 0.046 0.96297
## B5_R4D5 0.007949 0.150365 0.053 0.95787
## B5_R4D6 0.047176 0.289354 0.163 0.87060
## B5_R4E2 0.002082 0.128428 0.016 0.98708
## B5_R4E3 -0.016762 0.219383 -0.076 0.93915
## B5_R5A12 -0.029344 0.090688 -0.324 0.74648
## B5_R5A24 -0.011451 0.204174 -0.056 0.95531
## B5_R5A32 -0.003427 0.067260 -0.051 0.95939
## B5_R5A44 0.001954 0.068395 0.029 0.97723
## B5_R5B2 -0.030690 0.139819 -0.219 0.82641
## B5_R5B3 0.003948 0.070736 0.056 0.95553
## B5_R5B4 0.014035 0.095637 0.147 0.88342
## B5_R24A1 -0.017075 0.083715 -0.204 0.83851
## B5_R24A2 -0.029000 0.090671 -0.320 0.74931
## B5_R24A3 -0.017835 0.108046 -0.165 0.86900
## B5_R24A4 -0.019715 0.088801 -0.222 0.82446
## B5_R24A5 -0.027876 0.103245 -0.270 0.78734
## B5_R24A6 -0.045286 0.098530 -0.460 0.64612
## B5_R12A2 53.144795 0.133283 398.736 < 2e-16 ***
## B5_R12B2 52.654263 0.237497 221.705 < 2e-16 ***
## B5_R1A10 -0.022644 0.082394 -0.275 0.78364
## B5_R1A11 -0.025564 0.080152 -0.319 0.74999
## B5_R1A12 0.047569 0.161913 0.294 0.76912
## B5_R1A13 -0.021132 0.110411 -0.191 0.84835
## B5_R1A14 -0.009574 0.096300 -0.099 0.92087
## B5_R1A15 -0.022355 0.170377 -0.131 0.89570
## B5_R1A16 0.088246 0.598700 0.147 0.88292
## B5_R1A2 -0.037493 0.224274 -0.167 0.86734
## B5_R1A3 -0.033683 0.701294 -0.048 0.96172
## B5_R1A4 -0.020791 0.068618 -0.303 0.76210
## B5_R1A5 0.016756 0.268412 0.062 0.95026
## B5_R1A6 -0.067706 0.393475 -0.172 0.86350
## B5_R1A7 -0.018847 0.070287 -0.268 0.78877
## B5_R1A8 0.021782 0.195826 0.111 0.91151
## B5_R1A9 0.095496 0.469830 0.203 0.83907
## KLASIFIKAS2 -0.006526 0.071053 -0.092 0.92689
## KODE_KAB02 0.173111 0.086869 1.993 0.04718 *
## KODE_KAB03 0.073978 0.063519 1.165 0.24507
## KODE_KAB04 0.248402 0.092721 2.679 0.00778 **
## KODE_KAB71 0.050412 0.108686 0.464 0.64310
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 2.001676e-13)
##
## Number of Fisher Scoring iterations: 25
predict = predict(fit.logit3, newdata = testing, type = "response")
summary(predict)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 1.0000 1.0000 0.9849 1.0000 1.0000
predict <- cut(predict, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1), labels = c(0, 1, 2, 3, 4), right = TRUE)
log_conf <- confusionMatrix(predict, testing$B5_R17A)
log_conf
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1 2 3 4
## 0 31 0 0 0 0
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 1 0 0 2025
##
## Overall Statistics
##
## Accuracy : 0.9995
## 95% CI : (0.9973, 1)
## No Information Rate : 0.9844
## P-Value [Acc > NIR] : 3.3e-13
##
## Kappa : 0.9839
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 0 Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity 1.00000 0.0000000 NA NA 1.0000
## Specificity 1.00000 1.0000000 1 1 0.9688
## Pos Pred Value 1.00000 NaN NA NA 0.9995
## Neg Pred Value 1.00000 0.9995139 NA NA 1.0000
## Prevalence 0.01507 0.0004861 0 0 0.9844
## Detection Rate 0.01507 0.0000000 0 0 0.9844
## Detection Prevalence 0.01507 0.0000000 0 0 0.9849
## Balanced Accuracy 1.00000 0.5000000 NA NA 0.9844
B5_R17A.p = predict(fit.logit3, newdata = sakernas_diy, type = "response")
sakernas_diy['B5_R17A.p'] <- cut(B5_R17A.p, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1), labels = c(0, 1, 2, 3, 4), right = TRUE)
B5_R17A = predict(fit.logit3, newdata = susenas_diy.15, type = "response")
susenas_diy.15['B5_R17A'] <- cut(B5_R17A, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1), labels = c(0, 1, 2, 3, 4), right = TRUE)
summary(susenas_diy.15$B5_R17A)
## 0 1 2 3 4
## 32 0 0 0 9947
susenas_diy.kota = filter(susenas_diy.15, KLASIFIKAS == "1")
sakernas_diy.kota = filter(sakernas_diy, KLASIFIKAS == "1")
set.seed(123)
Train <- createDataPartition(susenas_diy.kota$STATUS, p=0.8, list=FALSE)
## Warning in createDataPartition(susenas_diy.kota$STATUS, p = 0.8, list = FALSE):
## Some classes have no records ( 3, 4 ) and these will be ignored
training <- susenas_diy.kota[Train, ]
testing <- susenas_diy.kota[-Train, ]
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = training)
fit.logit4<-svyglm(STATUS~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9
+B4_K10+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1
+B5_R5A2+B5_R5A3+B5_R5A4+B5_R5B+B5_R20_KAT
+KODE_KAB,design=des, family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.logit4)
##
## Call:
## svyglm(formula = STATUS ~ B2_R1 + B2_R2 + B4_K3 + B4_K6 + B4_K8 +
## B4_K9 + B4_K10 + B5_R4A + B5_R4B + B5_R4C + B5_R4D + B5_R4E +
## B5_R5A1 + B5_R5A2 + B5_R5A3 + B5_R5A4 + B5_R5B + B5_R20_KAT +
## KODE_KAB, design = des, family = binomial)
##
## Survey design:
## svydesign(ids = ~PSU + SSU, strata = ~STRATA, weights = ~FWT,
## data = training)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.899265 1.064905 1.784 0.075864 .
## B2_R1 -0.227538 0.166504 -1.367 0.173139
## B2_R2 -0.018694 0.199968 -0.093 0.925602
## B4_K32 0.007702 0.165446 0.047 0.962914
## B4_K33 -0.690934 0.205141 -3.368 0.000892 ***
## B4_K34 -0.941591 0.911221 -1.033 0.302569
## B4_K35 -0.345899 0.284524 -1.216 0.225381
## B4_K36 -1.531898 0.531651 -2.881 0.004346 **
## B4_K37 0.038628 0.377799 0.102 0.918655
## B4_K38 12.122338 0.646235 18.758 < 2e-16 ***
## B4_K39 -0.341152 0.369031 -0.924 0.356250
## B4_K62 -0.107534 0.160530 -0.670 0.503634
## B4_K8 -0.013776 0.007210 -1.911 0.057329 .
## B4_K92 0.692920 0.448495 1.545 0.123766
## B4_K93 0.983477 0.295763 3.325 0.001033 **
## B4_K102 -0.097596 0.218568 -0.447 0.655651
## B4_K103 0.061445 0.362510 0.169 0.865559
## B4_K104 0.500576 0.372984 1.342 0.180934
## B5_R4A2 0.126092 0.281948 0.447 0.655152
## B5_R4A3 0.868078 0.769201 1.129 0.260302
## B5_R4B5 -0.329198 0.481682 -0.683 0.495042
## B5_R4B6 -0.156853 0.728321 -0.215 0.829682
## B5_R4C2 -0.388623 0.356896 -1.089 0.277375
## B5_R4C3 0.100041 0.687805 0.145 0.884487
## B5_R4D5 -0.344334 0.684534 -0.503 0.615447
## B5_R4D6 0.317279 0.922500 0.344 0.731220
## B5_R4E2 0.228485 0.751585 0.304 0.761408
## B5_R4E3 -0.510138 0.569591 -0.896 0.371421
## B5_R5A12 0.405164 0.387581 1.045 0.296986
## B5_R5A24 -0.434233 0.929833 -0.467 0.640955
## B5_R5A32 -0.470334 0.243490 -1.932 0.054672 .
## B5_R5A44 -0.426136 0.203458 -2.094 0.037347 *
## B5_R5B2 0.299422 0.919030 0.326 0.744880
## B5_R5B3 -0.572267 0.226606 -2.525 0.012252 *
## B5_R5B4 -0.005104 0.404623 -0.013 0.989947
## B5_R20_KAT1 -0.528346 0.401648 -1.315 0.189711
## B5_R20_KAT2 -0.738274 0.719214 -1.027 0.305767
## B5_R20_KAT3 -0.067786 0.417976 -0.162 0.871313
## B5_R20_KAT4 13.026193 0.699179 18.631 < 2e-16 ***
## B5_R20_KAT5 -1.121194 1.407063 -0.797 0.426396
## B5_R20_KAT6 -0.127141 0.434090 -0.293 0.769877
## B5_R20_KAT7 0.684682 0.438596 1.561 0.119924
## B5_R20_KAT8 0.340434 0.544026 0.626 0.532107
## B5_R20_KAT9 0.091039 0.402740 0.226 0.821370
## B5_R20_KAT10 -0.135003 0.640887 -0.211 0.833353
## B5_R20_KAT11 2.089733 0.970260 2.154 0.032329 *
## B5_R20_KAT12 -0.066634 0.859060 -0.078 0.938243
## B5_R20_KAT13 1.244841 0.877510 1.419 0.157410
## B5_R20_KAT14 2.948656 0.793884 3.714 0.000258 ***
## B5_R20_KAT15 1.433187 0.543543 2.637 0.008959 **
## B5_R20_KAT16 3.016341 0.672583 4.485 1.17e-05 ***
## B5_R20_KAT17 0.024441 0.436027 0.056 0.955349
## KODE_KAB02 1.255187 0.293079 4.283 2.74e-05 ***
## KODE_KAB03 0.907783 0.402088 2.258 0.024934 *
## KODE_KAB04 2.324631 0.368227 6.313 1.46e-09 ***
## KODE_KAB71 2.037263 0.370945 5.492 1.08e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9645859)
##
## Number of Fisher Scoring iterations: 13
predict = predict(fit.logit4, newdata = testing, type = "response")
summary(predict)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1687 0.8220 0.9177 0.8616 0.9608 1.0000
predict <- cut(predict, breaks = c(0, 0.5, 1), labels = c(1, 2), right = TRUE)
log_conf <- confusionMatrix(predict, testing$STATUS)
## Warning in confusionMatrix.default(predict, testing$STATUS): Levels are not in
## the same order for reference and data. Refactoring data to match.
log_conf
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4
## 1 25 24 0 0
## 2 150 1105 0 0
## 3 0 0 0 0
## 4 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.8666
## 95% CI : (0.8469, 0.8846)
## No Information Rate : 0.8658
## P-Value [Acc > NIR] : 0.4877
##
## Kappa : 0.1748
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity 0.14286 0.9787 NA NA
## Specificity 0.97874 0.1429 1 1
## Pos Pred Value 0.51020 0.8805 NA NA
## Neg Pred Value 0.88048 0.5102 NA NA
## Prevalence 0.13420 0.8658 0 0
## Detection Rate 0.01917 0.8474 0 0
## Detection Prevalence 0.03758 0.9624 0 0
## Balanced Accuracy 0.56080 0.5608 NA NA
STATUS.p = predict(fit.logit4, newdata = susenas_diy.kota, type = "response")
susenas_diy.kota['STATUS.p'] <- cut(STATUS.p, breaks = c(0, 0.5, 1), labels = c(1, 2), right = TRUE)
STATUS = predict(fit.logit4, newdata = sakernas_diy.kota, type = "response")
sakernas_diy.kota['STATUS'] <- cut(STATUS, breaks = c(0, 0.5, 1), labels = c(1, 2), right = TRUE)
summary(sakernas_diy.kota$STATUS)
## 1 2
## 194 6694
susenas_diy.desa = filter(susenas_diy.15, KLASIFIKAS == "2")
sakernas_diy.desa = filter(sakernas_diy, KLASIFIKAS == "2")
set.seed(123)
Train <- createDataPartition(susenas_diy.desa$STATUS, p=0.8, list=FALSE)
## Warning in createDataPartition(susenas_diy.desa$STATUS, p = 0.8, list = FALSE):
## Some classes have no records ( 1, 2 ) and these will be ignored
training <- susenas_diy.desa[Train, ]
testing <- susenas_diy.desa[-Train, ]
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = training)
fit.logit5<-svyglm(STATUS~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9
+B4_K10+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E
+B5_R5A1+B5_R5A2+B5_R5A3+B5_R5A4+B5_R5B
+B5_R20_KAT+KODE_KAB,design=des, family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.logit5)
##
## Call:
## svyglm(formula = STATUS ~ B2_R1 + B2_R2 + B4_K3 + B4_K6 + B4_K8 +
## B4_K9 + B4_K10 + B5_R4A + B5_R4B + B5_R4C + B5_R4D + B5_R4E +
## B5_R5A1 + B5_R5A2 + B5_R5A3 + B5_R5A4 + B5_R5B + B5_R20_KAT +
## KODE_KAB, design = des, family = binomial)
##
## Survey design:
## svydesign(ids = ~PSU + SSU, strata = ~STRATA, weights = ~FWT,
## data = training)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.235539 1.459670 12.493 < 2e-16 ***
## B2_R1 -0.829438 0.204725 -4.051 0.000128 ***
## B2_R2 0.705743 0.224626 3.142 0.002449 **
## B4_K32 -0.247133 0.170117 -1.453 0.150706
## B4_K33 -0.924694 0.299460 -3.088 0.002876 **
## B4_K34 -1.937210 1.205029 -1.608 0.112360
## B4_K35 -0.324733 0.391377 -0.830 0.409478
## B4_K36 -1.090371 0.975043 -1.118 0.267217
## B4_K37 0.239706 0.337932 0.709 0.480442
## B4_K38 14.364491 1.044112 13.758 < 2e-16 ***
## B4_K39 0.667790 0.706966 0.945 0.348074
## B4_K62 -0.058166 0.164014 -0.355 0.723912
## B4_K8 -0.026497 0.009006 -2.942 0.004401 **
## B4_K92 0.174222 0.834748 0.209 0.835270
## B4_K93 0.255258 0.201141 1.269 0.208569
## B4_K102 0.322248 0.279708 1.152 0.253149
## B4_K103 0.119796 0.423994 0.283 0.778349
## B4_K104 0.785502 0.428218 1.834 0.070792 .
## B5_R4A2 -0.600929 0.384483 -1.563 0.122510
## B5_R4A3 -0.264515 0.682005 -0.388 0.699288
## B5_R4B5 0.343253 0.272278 1.261 0.211555
## B5_R4B6 0.210564 0.541404 0.389 0.698499
## B5_R4C2 -0.577295 0.376150 -1.535 0.129290
## B5_R4C3 -0.066735 0.518128 -0.129 0.897881
## B5_R4D5 -0.812484 0.442267 -1.837 0.070381 .
## B5_R4D6 -0.124554 0.824338 -0.151 0.880329
## B5_R4E2 0.237436 0.462851 0.513 0.609554
## B5_R4E3 -0.588883 0.685199 -0.859 0.392993
## B5_R5A12 -0.641985 0.616278 -1.042 0.301079
## B5_R5A24 -14.326132 1.289851 -11.107 < 2e-16 ***
## B5_R5A32 -0.320691 0.221957 -1.445 0.152903
## B5_R5A44 -0.127149 0.249065 -0.511 0.611283
## B5_R5B2 -13.379123 1.099144 -12.172 < 2e-16 ***
## B5_R5B3 0.036579 0.289677 0.126 0.899872
## B5_R5B4 0.289112 0.485626 0.595 0.553510
## B5_R20_KAT1 -0.508441 0.582730 -0.873 0.385868
## B5_R20_KAT2 -0.636529 1.204035 -0.529 0.598688
## B5_R20_KAT3 -0.391955 0.638181 -0.614 0.541062
## B5_R20_KAT4 14.514785 0.615101 23.597 < 2e-16 ***
## B5_R20_KAT5 14.032311 0.934119 15.022 < 2e-16 ***
## B5_R20_KAT6 -0.803711 0.605477 -1.327 0.188629
## B5_R20_KAT7 0.261732 0.639898 0.409 0.683756
## B5_R20_KAT8 1.720384 1.103570 1.559 0.123460
## B5_R20_KAT9 0.189941 0.733301 0.259 0.796368
## B5_R20_KAT10 15.010992 0.984279 15.251 < 2e-16 ***
## B5_R20_KAT11 14.579612 0.631125 23.101 < 2e-16 ***
## B5_R20_KAT13 0.612577 1.309319 0.468 0.641318
## B5_R20_KAT14 0.645285 0.759990 0.849 0.398696
## B5_R20_KAT15 2.066161 0.946061 2.184 0.032270 *
## B5_R20_KAT16 -0.436580 0.892280 -0.489 0.626147
## B5_R20_KAT17 0.294276 0.702418 0.419 0.676520
## KODE_KAB02 3.962015 0.662200 5.983 8.07e-08 ***
## KODE_KAB03 0.108348 0.240660 0.450 0.653931
## KODE_KAB04 1.914012 0.693851 2.759 0.007379 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.054858)
##
## Number of Fisher Scoring iterations: 15
predict = predict(fit.logit5, newdata = testing, type = "response")
summary(predict)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2295 0.8191 0.8788 0.8579 0.9299 1.0000
predict <- cut(predict, breaks = c(0, 0.5, 1), labels = c(3, 4), right = TRUE)
log_conf <- confusionMatrix(predict, testing$STATUS)
## Warning in confusionMatrix.default(predict, testing$STATUS): Levels are not in
## the same order for reference and data. Refactoring data to match.
log_conf
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 4 3
## 4 0 0 99 585
##
## Overall Statistics
##
## Accuracy : 0.8524
## 95% CI : (0.8237, 0.878)
## No Information Rate : 0.8509
## P-Value [Acc > NIR] : 0.4837
##
## Kappa : 0.0548
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity NA NA 0.038835 0.99490
## Specificity 1 1 0.994898 0.03883
## Pos Pred Value NA NA 0.571429 0.85526
## Neg Pred Value NA NA 0.855263 0.57143
## Prevalence 0 0 0.149059 0.85094
## Detection Rate 0 0 0.005789 0.84660
## Detection Prevalence 0 0 0.010130 0.98987
## Balanced Accuracy NA NA 0.516866 0.51687
STATUS.p = predict(fit.logit5, newdata = susenas_diy.desa, type = "response")
susenas_diy.desa['STATUS.p'] <- cut(STATUS.p, breaks = c(0, 0.5, 1), labels = c(3, 4), right = TRUE)
STATUS = predict(fit.logit5, newdata = sakernas_diy.desa, type = "response")
sakernas_diy.desa['STATUS'] <- cut(STATUS, breaks = c(0, 0.5, 1), labels = c(3, 4), right = TRUE)
summary(sakernas_diy.desa$STATUS)
## 3 4
## 44 3358
set.seed(123)
ctrl <- trainControl(method = "repeatedcv", number = 2000, savePredictions = TRUE)
mod_fit1 <- train(KAPITA~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9+B4_K10
+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1+B5_R5A2
+B5_R5A3+B5_R5A4+B5_R5B+B5_R1A+B5_R20_KAT+B5_R24A
+KLASIFIKAS+KODE_KAB,
data=susenas_diy.15, method="knn",
trControl = ctrl, tuneLength = 10, weights=FWT)
print(mod_fit1)
## k-Nearest Neighbors
##
## 9979 samples
## 22 predictor
##
## No pre-processing
## Resampling: Cross-Validated (2000 fold, repeated 1 times)
## Summary of sample sizes: 9974, 9973, 9974, 9974, 9975, 9974, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 1031754.1 0.4496178 751599.0
## 7 1012689.8 0.4520713 742475.5
## 9 1001588.1 0.4587419 737252.4
## 11 996931.9 0.4563559 736043.5
## 13 994161.6 0.4509211 735480.2
## 15 993517.0 0.4525171 736946.0
## 17 993472.1 0.4510652 738072.7
## 19 992480.8 0.4494446 738894.3
## 21 989948.8 0.4508724 737789.7
## 23 989888.7 0.4526561 738197.0
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 23.
susenas_diy.15['KAPITA.p'] = predict(mod_fit1, newdata = susenas_diy.15)
sakernas_diy['KAPITA'] = predict(mod_fit1, newdata = sakernas_diy)
summary(sakernas_diy$KAPITA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 508897 948432 1207362 1368531 1568291 5009934
summary(susenas_diy.15$KAPITA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 158071 563358 905111 1386516 1603787 40199452
susenas_diy.156=filter(susenas_diy.15, B5_R24A %in% c("1", "5"))
sakernas_diy.156=filter(sakernas_diy, B5_R24A %in% c("1", "5"))
set.seed(123)
ctrl <- trainControl(method = "repeatedcv", number = 400, savePredictions = TRUE)
mod_fit1 <- train(B5_R28A1~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9+B4_K10
+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1+B5_R5A2
+B5_R5A3+B5_R5A4+B5_R5B+B5_R1A+B5_R20_KAT+B5_R24A
+KLASIFIKAS+KODE_KAB, data=sakernas_diy.156,
method="knn", trControl = ctrl, tuneLength = 10,
weights =w.adj2_20)
print(mod_fit1)
## k-Nearest Neighbors
##
## 2082 samples
## 22 predictor
##
## No pre-processing
## Resampling: Cross-Validated (400 fold, repeated 1 times)
## Summary of sample sizes: 2076, 2078, 2076, 2076, 2077, 2077, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 7.443666 0.3760366 6.043422
## 7 7.320277 0.3870404 5.945774
## 9 7.295401 0.3908491 5.931135
## 11 7.290796 0.3967244 5.941500
## 13 7.294517 0.3911463 5.944746
## 15 7.332118 0.3831646 5.979311
## 17 7.342985 0.3883037 5.985700
## 19 7.349223 0.3890382 6.000590
## 21 7.371387 0.3929281 6.021109
## 23 7.350922 0.3969477 5.990244
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 11.
sakernas_diy.156['B5_R28A1.p'] = predict(mod_fit1, newdata = sakernas_diy.156)
susenas_diy.156['B5_R28A1'] = predict(mod_fit1, newdata = susenas_diy.156)
summary(susenas_diy.156$B5_R28A1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.8701 16.7953 18.9251 18.6188 20.7603 26.8719
set.seed(123)
ctrl <- trainControl(method = "repeatedcv", number = 400, savePredictions = TRUE)
mod_fit1 <- train(B5_R28B1~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9+B4_K10
+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1+B5_R5A2
+B5_R5A3+B5_R5A4+B5_R5B+B5_R1A+B5_R20_KAT+B5_R24A+KLASIFIKAS
+B5_R28A1+KODE_KAB, data=sakernas_diy.156,
method="knn", trControl = ctrl, tuneLength = 10,
weights =w.adj2_20)
print(mod_fit1)
## k-Nearest Neighbors
##
## 2082 samples
## 23 predictor
##
## No pre-processing
## Resampling: Cross-Validated (400 fold, repeated 1 times)
## Summary of sample sizes: 2077, 2078, 2077, 2076, 2077, 2077, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 768473.8 0.4704571 585101.4
## 7 754054.5 0.4764575 577112.1
## 9 743955.9 0.4859606 570928.4
## 11 735271.0 0.4892157 564705.9
## 13 728998.8 0.4930031 560505.8
## 15 726070.6 0.4903515 560330.7
## 17 724023.1 0.4871913 558779.3
## 19 722610.2 0.4850262 557991.5
## 21 721563.7 0.4799188 557865.3
## 23 720596.1 0.4792577 557637.8
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 23.
sakernas_diy.156['B5_R28B1.p'] = predict(mod_fit1, newdata = sakernas_diy.156)
susenas_diy.156['B5_R28B1'] = predict(mod_fit1, newdata = susenas_diy.156)
summary(susenas_diy.156$B5_R28B1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 1280408 1485317 1476898 1703913 2323913
set.seed(123)
ctrl <- trainControl(method = "repeatedcv", number = 400, savePredictions = TRUE)
mod_fit1 <- train(B5_R28B2~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9+B4_K10
+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1+B5_R5A2
+B5_R5A3+B5_R5A4+B5_R5B+B5_R1A+B5_R20_KAT+KLASIFIKAS
+B5_R24A+B5_R28A1+KODE_KAB, data=sakernas_diy.156,
method="knn", trControl = ctrl, tuneLength = 10,
weights =w.adj2_20)
print(mod_fit1)
## k-Nearest Neighbors
##
## 2082 samples
## 23 predictor
##
## No pre-processing
## Resampling: Cross-Validated (400 fold, repeated 1 times)
## Summary of sample sizes: 2076, 2077, 2077, 2077, 2077, 2076, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 91934.54 0.6476582 52631.24
## 7 90538.27 0.6399362 52653.09
## 9 89992.41 0.6279917 52571.47
## 11 88385.26 0.6310466 51909.29
## 13 88449.67 0.6255395 52274.13
## 15 89328.68 0.6181131 53098.43
## 17 90732.74 0.6075716 54165.53
## 19 90349.79 0.6094265 54084.69
## 21 90463.69 0.6060394 54472.23
## 23 91340.57 0.6048124 55012.87
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 11.
sakernas_diy.156['B5_R28B2.p'] = predict(mod_fit1, newdata = sakernas_diy.156)
susenas_diy.156['B5_R28B2'] = predict(mod_fit1, newdata = susenas_diy.156)
summary(susenas_diy.156$B5_R28B2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 12727 46364 87629 85568 903636
susenas_diy.buruh=filter(susenas_diy, B5_R24A %in% c("4"))
sakernas_diy.buruh=filter(sakernas_diy, B5_R24A %in% c("4"))
set.seed(123)
ctrl <- trainControl(method = "repeatedcv", number = 600, savePredictions = TRUE)
mod_fit1 <- train(B5_R28C1~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9+B4_K10
+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1+B5_R5A2
+B5_R5A3+B5_R5A4+B5_R5B+B5_R1A+B5_R20_KAT+KLASIFIKAS
+B5_R24A+KODE_KAB,
data=sakernas_diy.buruh, method="knn", trControl = ctrl,
tuneLength = 10, weights =w.adj2_20)
print(mod_fit1)
## k-Nearest Neighbors
##
## 2946 samples
## 22 predictor
##
## No pre-processing
## Resampling: Cross-Validated (600 fold, repeated 1 times)
## Summary of sample sizes: 2941, 2941, 2941, 2941, 2942, 2942, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 1395727 0.4444137 1037988
## 7 1379620 0.4493956 1037267
## 9 1380755 0.4385240 1039507
## 11 1390968 0.4305453 1047196
## 13 1394145 0.4321954 1049331
## 15 1399060 0.4234003 1054775
## 17 1399670 0.4247791 1057048
## 19 1401296 0.4268511 1059138
## 21 1404161 0.4228207 1062757
## 23 1409258 0.4204803 1067165
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 7.
sakernas_diy.buruh['B5_R28C1.p'] = predict(mod_fit1, newdata = sakernas_diy.buruh)
susenas_diy.buruh['B5_R28C1'] = predict(mod_fit1, newdata = susenas_diy.buruh)
summary(susenas_diy.buruh$B5_R28C1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 917857 1712727 2122857 2374186 2764000 8455000
set.seed(123)
ctrl <- trainControl(method = "repeatedcv", number = 600, savePredictions = TRUE)
mod_fit1 <- train(B5_R28C2~B2_R1+B2_R2+B4_K3+B4_K6+B4_K8+B4_K9+B4_K10
+B5_R4A+B5_R4B+B5_R4C+B5_R4D+B5_R4E+B5_R5A1+B5_R5A2
+B5_R5A3+B5_R5A4+B5_R5B+B5_R1A+B5_R20_KAT+KLASIFIKAS
+B5_R24A+KODE_KAB,
data=sakernas_diy.buruh, method="knn", trControl = ctrl,
tuneLength = 10, weights =w.adj2_20)
print(mod_fit1)
## k-Nearest Neighbors
##
## 2946 samples
## 22 predictor
##
## No pre-processing
## Resampling: Cross-Validated (600 fold, repeated 1 times)
## Summary of sample sizes: 2941, 2941, 2941, 2941, 2941, 2941, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 104853.35 0.3877252 68262.10
## 7 101066.54 0.3969221 66894.55
## 9 99354.68 0.4006963 66052.59
## 11 98401.48 0.4135759 65527.00
## 13 98107.51 0.4028811 65514.60
## 15 97404.81 0.4072118 65174.18
## 17 96579.75 0.4085570 64684.63
## 19 96162.67 0.4151930 64612.54
## 21 96367.14 0.4070182 64750.60
## 23 96331.84 0.4057632 64888.07
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 19.
sakernas_diy.buruh['B5_R28C2.p'] = predict(mod_fit1, newdata = sakernas_diy.buruh)
susenas_diy.buruh['B5_R28C2'] = predict(mod_fit1, newdata = susenas_diy.buruh)
summary(susenas_diy.buruh$B5_R28C2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3044 41118 58519 71078 81440 475000
#TUJUAN 3
#KAPITA
options(survey.lonely.psu = "adjust")
des1 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy)
svymean(~KAPITA, des1)
## mean SE
## KAPITA 1397334 12685
options(survey.lonely.psu = "adjust")
des2 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.15)
svymean(~KAPITA.p, des2)
## mean SE
## KAPITA.p 1406226 14230
svymean(~KAPITA, des2)
## mean SE
## KAPITA 1476320 42204
mns = svymean(~KAPITA+KAPITA.p, des2)
vcov(mns)
## KAPITA KAPITA.p
## KAPITA 1781195124 372687722
## KAPITA.p 372687722 202497609
var = 1781195124
akar_var = sqrt(var)
akar_var
## [1] 42204.21
var_xA = 12685^2
var_xB = 14230^2
myu_xA = 1397334
myu_xB = 1406226
myu_B = 1476320
var_B = 42204^2
cov = 372687722
(alpha = var_xB / (var_xA+var_xB))
## [1] 0.5572144
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 1401271
myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 1467201
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 1095246711
var_B
## [1] 1781177616
#STATUS PENDUDUK MISKIN DESA
options(survey.lonely.psu = "adjust")
des3 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy.desa)
svymean(~factor(STATUS), des3)
## mean SE
## factor(STATUS)3 0.012336 0.0027
## factor(STATUS)4 0.987664 0.0027
options(survey.lonely.psu = "adjust")
des4 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.desa)
svymean(~factor(STATUS.p), des4)
## mean SE
## factor(STATUS.p)3 0.011843 0.0026
## factor(STATUS.p)4 0.988157 0.0026
svymean(~factor(STATUS), des4)
## mean SE
## factor(STATUS)3 0.13589 0.0141
## factor(STATUS)4 0.86411 0.0141
mns1 = svymean(~STATUS+ STATUS.p, des4)
vcov(mns1)
## STATUS1 STATUS2 STATUS3 STATUS4 STATUS.p3
## STATUS1 0 0 0.000000e+00 0.000000e+00 0.000000e+00
## STATUS2 0 0 0.000000e+00 0.000000e+00 0.000000e+00
## STATUS3 0 0 1.999794e-04 -1.999794e-04 8.817654e-06
## STATUS4 0 0 -1.999794e-04 1.999794e-04 -8.817654e-06
## STATUS.p3 0 0 8.817654e-06 -8.817654e-06 6.983914e-06
## STATUS.p4 0 0 -8.817654e-06 8.817654e-06 -6.983914e-06
## STATUS.p4
## STATUS1 0.000000e+00
## STATUS2 0.000000e+00
## STATUS3 -8.817654e-06
## STATUS4 8.817654e-06
## STATUS.p3 -6.983914e-06
## STATUS.p4 6.983914e-06
#Kategori 3
var_xA = 0.0027^2
var_xB = 0.0026^2
myu_xA = 0.012336
myu_xB = 0.011843
myu_B = 0.13589
var_B = 0.0141^2
(alpha = var_xB / (var_xA+var_xB))
## [1] 0.4811388
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 0.0120802
cov = 8.817654e-06
myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 0.1361994
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 0.0001873084
var_B
## [1] 0.00019881
#kategori 4
var_xA = 0.0027^2
var_xB = 0.0026^2
myu_xA = 0.987664
myu_xB = 0.988157
myu_B = 0.86411
var_B = 0.0141^2
(alpha = var_xB / (var_xA+var_xB))
## [1] 0.4811388
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 0.9879198
cov = 8.817654e-06
myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 0.8638006
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 0.0001873084
var_B
## [1] 0.00019881
#STATUS PENDUDUK MISKIN KOTA
options(survey.lonely.psu = "adjust")
des5 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy.kota)
svymean(~factor(STATUS), des5)
## mean SE
## factor(STATUS)1 0.019086 0.0019
## factor(STATUS)2 0.980914 0.0019
options(survey.lonely.psu = "adjust")
des6 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.kota)
svymean(~factor(STATUS.p), des6)
## mean SE
## factor(STATUS.p)1 0.018727 0.0022
## factor(STATUS.p)2 0.981273 0.0022
svymean(~factor(STATUS), des6)
## mean SE
## factor(STATUS)1 0.10982 0.0094
## factor(STATUS)2 0.89018 0.0094
mns2 = svymean(~STATUS+ STATUS.p, des6)
vcov(mns2)
## STATUS1 STATUS2 STATUS3 STATUS4 STATUS.p1
## STATUS1 8.765963e-05 -8.765963e-05 0 0 5.500767e-06
## STATUS2 -8.765963e-05 8.765963e-05 0 0 -5.500767e-06
## STATUS3 0.000000e+00 0.000000e+00 0 0 0.000000e+00
## STATUS4 0.000000e+00 0.000000e+00 0 0 0.000000e+00
## STATUS.p1 5.500767e-06 -5.500767e-06 0 0 4.718018e-06
## STATUS.p2 -5.500767e-06 5.500767e-06 0 0 -4.718018e-06
## STATUS.p2
## STATUS1 -5.500767e-06
## STATUS2 5.500767e-06
## STATUS3 0.000000e+00
## STATUS4 0.000000e+00
## STATUS.p1 -4.718018e-06
## STATUS.p2 4.718018e-06
#kategori 1
var_xA = 0.0019^2
var_xB = 0.0022^2
myu_xA = 0.019086
myu_xB = 0.018727
myu_B = 0.10982
var_B = 0.0094^2
(alpha = var_xB / (var_xA+var_xB))
## [1] 0.5727811
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 0.01893263
cov = 5.500767e-06
myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 0.1100537
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 8.210826e-05
var_B
## [1] 8.836e-05
#kategori 2
var_xA = 0.0019^2
var_xB = 0.0022^2
myu_xA = 0.980914
myu_xB = 0.981273
myu_B = 0.89018
var_B = 0.0094^2
(alpha = var_xB / (var_xA+var_xB))
## [1] 0.5727811
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 0.9810674
cov = 5.500767e-06
myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 0.8899463
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 8.210826e-05
var_B
## [1] 8.836e-05
#B5_R17A
sakernas_diy$B5_R17A[sakernas_diy$B5_R17A=="1"] = "0"
sakernas_diy$B5_R17A[sakernas_diy$B5_R17A=="2"] = "0"
sakernas_diy$B5_R17A[sakernas_diy$B5_R17A=="3"] = "0"
sakernas_diy$B5_R17A.p[sakernas_diy$B5_R17A.p=="1"] = "0"
sakernas_diy$B5_R17A.p[sakernas_diy$B5_R17A.p=="2"] = "0"
sakernas_diy$B5_R17A.p[sakernas_diy$B5_R17A.p=="3"] = "0"
susenas_diy.15$B5_R17A[susenas_diy.15$B5_R17A=="1"] = "0"
susenas_diy.15$B5_R17A[susenas_diy.15$B5_R17A=="2"] = "0"
susenas_diy.15$B5_R17A[susenas_diy.15$B5_R17A=="3"] = "0"
table(sakernas_diy$B5_R17A)
##
## 0 1 2 3 4
## 161 0 0 0 10129
table(sakernas_diy$B5_R17A.p)
##
## 0 1 2 3 4
## 157 0 0 0 10133
table(susenas_diy.15$B5_R17A)
##
## 0 1 2 3 4
## 32 0 0 0 9947
options(survey.lonely.psu = "adjust")
des7 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.15)
svymean(~factor(B5_R17A), des7)
## mean SE
## factor(B5_R17A)0 0.0027984 6e-04
## factor(B5_R17A)4 0.9972016 6e-04
options(survey.lonely.psu = "adjust")
des1 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy)
svymean(~factor(B5_R17A.p), des1)
## mean SE
## factor(B5_R17A.p)0 0.016239 0.0016
## factor(B5_R17A.p)4 0.983761 0.0016
svymean(~factor(B5_R17A), des1)
## mean SE
## factor(B5_R17A)0 0.016747 0.0016
## factor(B5_R17A)4 0.983253 0.0016
mns3 = svymean(~B5_R17A+B5_R17A.p, des1)
vcov(mns3)
## B5_R17A0 B5_R17A1 B5_R17A2 B5_R17A3 B5_R17A4 B5_R17A.p0
## B5_R17A0 2.721615e-06 0 0 0 -2.721615e-06 2.568321e-06
## B5_R17A1 0.000000e+00 0 0 0 0.000000e+00 0.000000e+00
## B5_R17A2 0.000000e+00 0 0 0 0.000000e+00 0.000000e+00
## B5_R17A3 0.000000e+00 0 0 0 0.000000e+00 0.000000e+00
## B5_R17A4 -2.721615e-06 0 0 0 2.721615e-06 -2.568321e-06
## B5_R17A.p0 2.568321e-06 0 0 0 -2.568321e-06 2.481277e-06
## B5_R17A.p1 0.000000e+00 0 0 0 0.000000e+00 0.000000e+00
## B5_R17A.p2 0.000000e+00 0 0 0 0.000000e+00 0.000000e+00
## B5_R17A.p3 0.000000e+00 0 0 0 0.000000e+00 0.000000e+00
## B5_R17A.p4 -2.568321e-06 0 0 0 2.568321e-06 -2.481277e-06
## B5_R17A.p1 B5_R17A.p2 B5_R17A.p3 B5_R17A.p4
## B5_R17A0 0 0 0 -2.568321e-06
## B5_R17A1 0 0 0 0.000000e+00
## B5_R17A2 0 0 0 0.000000e+00
## B5_R17A3 0 0 0 0.000000e+00
## B5_R17A4 0 0 0 2.568321e-06
## B5_R17A.p0 0 0 0 -2.481277e-06
## B5_R17A.p1 0 0 0 0.000000e+00
## B5_R17A.p2 0 0 0 0.000000e+00
## B5_R17A.p3 0 0 0 0.000000e+00
## B5_R17A.p4 0 0 0 2.481277e-06
#kategori 0
var_xA = 7e-04^2
var_xB = 0.0016^2
myu_xA = 0.0043247
myu_xB = 0.016239
myu_B = 0.016747
var_B = 0.0016^2
(alpha = var_xB / (var_xA+var_xB))
## [1] 0.8393443
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 0.006238801
cov = 2.568321e-06
myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 0.006714296
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] -1.666905e-08
var_B
## [1] 2.56e-06
#B5_R28B1
options(survey.lonely.psu = "adjust")
des8 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.156)
svymean(~B5_R28B1, des8)
## mean SE
## B5_R28B1 1506428 9090.1
options(survey.lonely.psu = "adjust")
des9 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy.156)
svymean(~B5_R28B1.p, des9)
## mean SE
## B5_R28B1.p 1247855 20338
svymean(~B5_R28B1, des9)
## mean SE
## B5_R28B1 1260581 33776
mns4 = svymean(~B5_R28B1+ B5_R28B1.p, des9)
vcov(mns4)
## B5_R28B1 B5_R28B1.p
## B5_R28B1 1140844964 459001413
## B5_R28B1.p 459001413 413615124
var_xA = 9090.1^2
var_xB = 20338^2
myu_xA = 1506428
myu_xB = 1247855
myu_B = 1260581
var_B = 33776^2
(alpha = var_xB / (var_xA+var_xB))
## [1] 0.8334961
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 1463375
cov = 459001413
myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 1499739
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 631473749
var_B
## [1] 1140818176
options(survey.lonely.psu = "adjust")
des8 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.156)
svymean(~B5_R28B2, des8)
## mean SE
## B5_R28B2 94342 4513.7
options(survey.lonely.psu = "adjust")
des9 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy.156)
svymean(~B5_R28B2.p, des9)
## mean SE
## B5_R28B2.p 73277 3681.1
svymean(~B5_R28B2, des9)
## mean SE
## B5_R28B2 73112 7406.8
mns5 = svymean(~B5_R28B2+B5_R28B2.p, des9)
vcov(mns5)
## B5_R28B2 B5_R28B2.p
## B5_R28B2 54860156 17669381
## B5_R28B2.p 17669381 13550469
#B5_R28B2
var_xA = 4513.7^2
var_xB = 3681.1^2
myu_xA = 94342
myu_xB = 73277
myu_B = 73112
var_B = 7406.8^2
(alpha = var_xB / (var_xA+var_xB))
## [1] 0.3994371
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 81691.14
cov = 17669381
myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 84083.75
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 31820423
var_B
## [1] 54860686
#B5_R28C1
options(survey.lonely.psu = "adjust")
des10 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.buruh)
svymean(~B5_R28C1, des10)
## mean SE
## B5_R28C1 2351402 23681
options(survey.lonely.psu = "adjust")
des11 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy.buruh)
svymean(~B5_R28C1.p, des11)
## mean SE
## B5_R28C1.p 2323908 20448
svymean(~B5_R28C1, des11)
## mean SE
## B5_R28C1 2288719 47277
mns6 = svymean(~B5_R28C1+B5_R28C1.p, des11)
vcov(mns6)
## B5_R28C1 B5_R28C1.p
## B5_R28C1 2235103759 705673933
## B5_R28C1.p 705673933 418138926
var_xA = 23681^2
var_xB = 20448^2
myu_xA = 2351402
myu_xB = 2323908
myu_B = 2288719
var_B = 47277^2
(alpha = var_xB / (var_xA+var_xB))
## [1] 0.4271286
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 2335651
cov = 705673933
myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 2308539
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 1044129219
var_B
## [1] 2235114729
#B5_R28C2
options(survey.lonely.psu = "adjust")
des10 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.buruh)
svymean(~B5_R28C2, des10)
## mean SE
## B5_R28C2 70033 1008.6
options(survey.lonely.psu = "adjust")
des11 = svydesign(ids=~psu+ssu, strata=~strata, weights=~w.adj2_20, data = sakernas_diy.buruh)
svymean(~B5_R28C2.p, des11)
## mean SE
## B5_R28C2.p 67532 921.58
svymean(~B5_R28C2, des11)
## mean SE
## B5_R28C2 71108 6621.3
mns7 = svymean(~B5_R28C2+B5_R28C2.p, des11)
vcov(mns7)
## B5_R28C2 B5_R28C2.p
## B5_R28C2 43842182 2372823.3
## B5_R28C2.p 2372823 849316.1
var_xA = 1008.6^2
var_xB = 921.58^2
myu_xA = 70033
myu_xB = 67532
myu_B = 71108
var_B = 6621.3^2
(alpha = var_xB / (var_xA+var_xB))
## [1] 0.4550076
(myu_x = (alpha*myu_xA) + ((1-alpha)*myu_xB))
## [1] 68669.97
cov = 2372823.3
myu_gls = myu_B + ((cov/var_xB)*(myu_x-myu_xB))
myu_gls
## [1] 74287.3
var.myu_gls = var_B - (cov^2/var_xB)
var.myu_gls
## [1] 37212359
var_B
## [1] 43841614
#DATA PENCACAHAN
sakernas2020_diy <- read_excel("sak0220(edit B5_R17A).xlsx")
sus1 <- read_excel("susenas 2020.xlsx", sheet = "RT")
sus2 <- read_excel("susenas 2020.xlsx", sheet = "Indo")
susenas_diy = merge(x = sus1, y = sus2, all.y = TRUE)
susenas_diy.15 <- subset(susenas_diy, R407>14)
sakernas2020_diy['KODE_KAB']=str_pad(sakernas2020_diy$KODE_KAB, width = 2, side = 'left', pad = '0')
sakernas2020_diy['NO_DSRT']=str_pad(sakernas2020_diy$NO_DSRT, width = 2, side = 'left', pad = '0')
sakernas2020_diy["psu"]=paste(sakernas2020_diy$KODE_PROV, sakernas2020_diy$KODE_KAB, sakernas2020_diy$nks_ok, sep = "")
sakernas2020_diy["ssu"]=paste(sakernas2020_diy$KODE_PROV, sakernas2020_diy$KODE_KAB, sakernas2020_diy$nks_ok, sakernas2020_diy$NO_DSRT, sep = "")
sakernas2020_diy["strata"]=paste(sakernas2020_diy$KODE_PROV, sakernas2020_diy$KODE_KAB, sakernas2020_diy$KLASIFIKAS, sep = "")
#Susenas
options(survey.lonely.psu = "adjust")
des12 = svydesign(ids=~PSU+SSU, strata=~STRATA, weights=~FWT, data = susenas_diy.15)
svymean(~KAPITA, des12)
## mean SE
## KAPITA 1476320 42204
svymean(~factor(STATUS), des12)
## mean SE
## factor(STATUS)1 0.080216 0.0068
## factor(STATUS)2 0.650228 0.0095
## factor(STATUS)3 0.036630 0.0039
## factor(STATUS)4 0.232926 0.0071
sakernas2020_diy$B5_R17A[sakernas2020_diy$B5_R17A=="1"] = "0"
sakernas2020_diy$B5_R17A[sakernas2020_diy$B5_R17A=="2"] = "0"
sakernas2020_diy$B5_R17A[sakernas2020_diy$B5_R17A=="3"] = "0"
table(sakernas2020_diy$B5_R17A)
##
## 0 4
## 97 2280
#Sakernas
options(survey.lonely.psu = "adjust")
des13 = svydesign(ids=~psu+ssu, strata=~strata, weights=~FINAL_WEIG, data = sakernas2020_diy)
svymean(~factor(B5_R17A), des13)
## mean SE
## factor(B5_R17A)0 0.043875 0.005
## factor(B5_R17A)4 0.956125 0.005
sakernas_diy.156.1=filter(sakernas2020_diy, B5_R24A %in% c("1", "5", "6"))
sakernas_diy.buruh1=filter(sakernas2020_diy, B5_R24A %in% c("4"))
options(survey.lonely.psu = "adjust")
des14 = svydesign(ids=~psu+ssu, strata=~strata, weights=~FINAL_WEIG, data = sakernas_diy.156.1)
svymean(~B5_R28B1, des14)
## mean SE
## B5_R28B1 945229 69781
svymean(~B5_R28B2, des14)
## mean SE
## B5_R28B2 48622 17634
options(survey.lonely.psu = "adjust")
des15 = svydesign(ids=~psu+ssu, strata=~strata, weights=~FINAL_WEIG, data = sakernas_diy.buruh1)
svymean(~B5_R28C1, des15)
## mean SE
## B5_R28C1 2337952 104460
svymean(~B5_R28C2, des15)
## mean SE
## B5_R28C2 104503 24344