Link to this document: http://rpubs.com/anshulkumar/WHAgov1
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 1.000 1.000 0.934 1.000 1.000 27
## db2$`_3_1_a_functional`
## n missing distinct Info Sum Mean Gmd
## 303 27 2 0.185 283 0.934 0.1237
| Functionality | Frequency | Total | |
|---|---|---|---|
| Count | NA | ||
| 0 |
20 6.1 % |
0 0 % |
20 6.1 % |
| 1 |
283 85.8 % |
0 0 % |
283 85.8 % |
| NA |
27 8.2 % |
0 0 % |
27 8.2 % |
| Total |
330 100 % |
0 100 % |
330 100 % |
sjt.xtab(db$Type,db$`_3_1_a_functional`,title = " ", show.row.prc = TRUE, show.na = TRUE, var.labels = c("Water Source Type", "Functionality"), show.summary = TRUE)
| Water Source Type | Functionality | Total | ||
|---|---|---|---|---|
| 0 | 1 | NA | ||
| Borehole handpump |
5 3.5 % |
133 92.4 % |
6 4.2 % |
144 100 % |
| Borehole mech pump |
0 0 % |
22 100 % |
0 0 % |
22 100 % |
| Dug well open |
3 6.5 % |
43 93.5 % |
0 0 % |
46 100 % |
| Dug well handpump |
13 18.3 % |
56 78.9 % |
2 2.8 % |
71 100 % |
| Spring protected |
3 7.5 % |
35 87.5 % |
2 5 % |
40 100 % |
| Spring unprotected |
0 0 % |
10 100 % |
0 0 % |
10 100 % |
| Rain harvest tank |
0 0 % |
1 5.3 % |
18 94.7 % |
19 100 % |
| NA |
0 0 % |
0 0 % |
2 100 % |
2 100 % |
| Total |
24 6.8 % |
300 84.7 % |
30 8.5 % |
354 100 % |
χ2=18.738 · df=6 · Cramer’s V=0.240 · Fisher’s p=0.009 |
sjt.xtab(db$`_4_18_enoughfundsrepair`,db$`_3_1_a_functional`,title = " ", show.row.prc = TRUE, show.na = TRUE, var.labels = c("Enough Funds Repair", "Functionality"), show.summary = TRUE)
| Enough Funds Repair | Functionality | Total | ||
|---|---|---|---|---|
| 0 | 1 | NA | ||
| 0 |
16 7.9 % |
186 92.1 % |
0 0 % |
202 100 % |
| 1 |
6 5.6 % |
100 92.6 % |
2 1.9 % |
108 100 % |
| NA |
2 4.5 % |
14 31.8 % |
28 63.6 % |
44 100 % |
| Total |
24 6.8 % |
300 84.7 % |
30 8.5 % |
354 100 % |
χ2=0.249 · df=1 · φ=0.042 · Fisher’s p=0.642 |
sjt.xtab(db$`_4_4_feecollectionsystem`,db$`_3_1_a_functional`,title = " ", show.row.prc = TRUE, show.na = TRUE, var.labels = c("Fee collection system", "Functionality"), show.summary = TRUE)
|
Fee collection system |
Functionality | Total | ||
|---|---|---|---|---|
| 0 | 1 | NA | ||
| 0 |
15 8.8 % |
153 90 % |
2 1.2 % |
170 100 % |
| 1 |
7 4.7 % |
143 95.3 % |
0 0 % |
150 100 % |
| NA |
2 5.9 % |
4 11.8 % |
28 82.4 % |
34 100 % |
| Total |
24 6.8 % |
300 84.7 % |
30 8.5 % |
354 100 % |
χ2=1.622 · df=1 · φ=0.084 · p=0.203 |
sjt.xtab(db$`_2_2_a_Caretaker`,db$`_3_1_a_functional`,title = " ", show.row.prc = TRUE, show.na = TRUE, var.labels = c("Caretaker", "Functionality"), show.summary = TRUE)
| Caretaker | Functionality | Total | ||
|---|---|---|---|---|
| 0 | 1 | NA | ||
| 0 |
17 9.3 % |
164 89.6 % |
2 1.1 % |
183 100 % |
| 1 |
7 5 % |
133 95 % |
0 0 % |
140 100 % |
| NA |
0 0 % |
3 9.7 % |
28 90.3 % |
31 100 % |
| Total |
24 6.8 % |
300 84.7 % |
30 8.5 % |
354 100 % |
χ2=1.612 · df=1 · φ=0.083 · p=0.204 |
sjt.xtab(db$`_4_17_planOandM`,db$`_3_1_a_functional`,title = " ", show.row.prc = TRUE, show.na = TRUE, var.labels = c("Plan O and M", "Functionality"), show.summary = TRUE)
| Plan O and M | Functionality | Total | ||
|---|---|---|---|---|
| 0 | 1 | NA | ||
| 0 |
14 12.1 % |
102 87.9 % |
0 0 % |
116 100 % |
| 1 |
9 4.6 % |
184 94.8 % |
1 0.5 % |
194 100 % |
| NA |
1 2.3 % |
14 31.8 % |
29 65.9 % |
44 100 % |
| Total |
24 6.8 % |
300 84.7 % |
30 8.5 % |
354 100 % |
χ2=4.743 · df=1 · φ=0.137 · Fisher’s p=0.024 |
sjt.xtab(db$`_4_19_personrepair`,db$`_3_1_a_functional`,title = " ", show.row.prc = TRUE, show.na = TRUE, var.labels = c("Person repair", "Functionality"), show.summary = TRUE)
| Person repair | Functionality | Total | ||
|---|---|---|---|---|
| 0 | 1 | NA | ||
| 0 |
18 8.6 % |
190 90.9 % |
1 0.5 % |
209 100 % |
| 1 |
5 4.7 % |
101 94.4 % |
1 0.9 % |
107 100 % |
| NA |
1 2.6 % |
9 23.7 % |
28 73.7 % |
38 100 % |
| Total |
24 6.8 % |
300 84.7 % |
30 8.5 % |
354 100 % |
χ2=1.076 · df=1 · φ=0.071 · Fisher’s p=0.256 |
# logistic _3_1_a_functional watersourcetype2 _2_2_a_Caretaker _2_3_inspectorvisits _3_1_d_primaryuse _4_4_feecollectionsystem _4_17_planOandM _4_18_enoughfundsrepair _4_19_personrepair _4_20_persontrained _5_0_latrineaccess _5_1_a_facilitator
# db$reportedfeesdividedbyallusers
tab_model(logit1 <- glm(`_3_1_a_functional` ~ Type+ `_2_2_a_Caretaker` +`_4_18_enoughfundsrepair` +`_4_19_personrepair`, data = db, family = "binomial"))
| 3 1 a functional | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 20.80 | 5.97 – 72.40 | <0.001 |
| Borehole mech pump | 3568289.34 | 0.00 – Inf | 0.991 |
| Dug well open | 0.63 | 0.12 – 3.35 | 0.587 |
| Dug well handpump | 0.14 | 0.04 – 0.48 | 0.002 |
| Spring protected | 0.42 | 0.08 – 2.16 | 0.301 |
| Spring unprotected | 4907073.21 | 0.00 – Inf | 0.994 |
| Rain harvest tank | 4201877.71 | 0.00 – Inf | 0.998 |
_2_2_a_Caretaker
|
1.71 | 0.62 – 4.70 | 0.297 |
_4_18_enoughfundsrepair
|
0.99 | 0.34 – 2.89 | 0.991 |
_4_19_personrepair
|
1.33 | 0.43 – 4.12 | 0.619 |
| Observations | 298 | ||
| Cox & Snell’s R2 / Nagelkerke’s R2 | 0.065 / 0.160 | ||
# did not work:
# tab_model(logit1 <- glm(`_3_1_a_functional` ~ Type + `_2_2_a_Caretaker` + as.factor(`_3_1_d_primaryuse`) + as.factor(`_2_1_responsible`) +`_4_18_enoughfundsrepair` +`_4_19_personrepair`, data = db, family = "binomial"))
# summary(logit1)
Variables _3_1_d_primaryuse and _2_1_responsible were omitted from this regression because they caused errors.
This model yields the error message we see below.
tab_model(glm(`_3_1_a_functional` ~ Type + `_2_2_a_Caretaker` +`_2_3_inspectorvisits` +as.factor(`_3_1_d_primaryuse`) +`_4_4_feecollectionsystem` +`_4_17_planOandM` +`_4_18_enoughfundsrepair` +`_4_19_personrepair` +`_4_20_persontrained` +`_5_0_latrineaccess` +`_5_1_a_facilitator`, data = db, family = "binomial"))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
| 3 1 a functional | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 70416446552692.53 | 0.00 – Inf | 0.996 |
| Borehole mech pump | 217947894.36 | 0.00 – Inf | 0.999 |
| Dug well open | 94912991.50 | 0.00 – Inf | 0.999 |
| Dug well handpump | 0.06 | 0.00 – 4.26 | 0.199 |
| Spring protected | 1039086226.23 | 0.00 – Inf | 0.999 |
| Spring unprotected | 108550026.68 | 0.00 – Inf | 0.999 |
| Rain harvest tank | 3.16 | 0.00 – Inf | 1.000 |
_2_2_a_Caretaker
|
0.07 | 0.00 – 9.74 | 0.287 |
_2_3_inspectorvisits
|
1.35 | 0.18 – 10.33 | 0.772 |
as factor(3 1 d<br>primaryuse)1
|
4.12 | 0.00 – Inf | 1.000 |
as factor(3 1 d<br>primaryuse)2
|
0.03 | 0.00 – 3.45 | 0.143 |
as factor(3 1 d<br>primaryuse)3
|
96018860.80 | 0.00 – Inf | 1.000 |
as factor(3 1 d<br>primaryuse)4
|
0.22 | 0.00 – 12.69 | 0.467 |
_4_4_feecollectionsystem
|
15.27 | 0.16 – 1429.15 | 0.239 |
_4_17_planOandM
|
0.00 | 0.00 – Inf | 0.998 |
_4_18_enoughfundsrepair
|
0.50 | 0.01 – 18.10 | 0.705 |
_4_19_personrepair
|
3.78 | 0.12 – 119.10 | 0.450 |
_4_20_persontrained
|
75573443.09 | 0.00 – Inf | 0.999 |
_5_0_latrineaccess
|
0.95 | 0.75 – 1.20 | 0.666 |
_5_1_a_facilitator
|
0.08 | 0.00 – 3.73 | 0.196 |
| Observations | 223 | ||
| Cox & Snell’s R2 / Nagelkerke’s R2 | 0.064 / 0.482 | ||
Cross-table of water source type and functionality:
with(db,table(Type,`_3_1_a_functional`, useNA = 'always'))
## _3_1_a_functional
## Type 0 1 <NA>
## Borehole handpump 5 133 6
## Borehole mech pump 0 22 0
## Dug well open 3 43 0
## Dug well handpump 13 56 2
## Spring protected 3 35 2
## Spring unprotected 0 10 0
## Rain harvest tank 0 1 18
## <NA> 0 0 2
Cross-table of fee collection systema and functionality:
with(db,table(`_4_4_feecollectionsystem`,`_3_1_a_functional`, useNA = 'always'))
## _3_1_a_functional
## _4_4_feecollectionsystem 0 1 <NA>
## 0 15 153 2
## 1 7 143 0
## <NA> 2 4 28
Length of breakdown:
| Length | Frequency | Total | |
|---|---|---|---|
| Count | NA | ||
| 0 | 1 | 0 | 1 |
| 0.5 | 6 | 0 | 6 |
| 0.75 | 3 | 0 | 3 |
| 1 | 37 | 0 | 37 |
| 2 | 29 | 0 | 29 |
| 3 | 3 | 0 | 3 |
| 3.5 | 1 | 0 | 1 |
| 4 | 11 | 0 | 11 |
| 5 | 2 | 0 | 2 |
| 7 | 15 | 0 | 15 |
| 10.5 | 1 | 0 | 1 |
| 11 | 1 | 0 | 1 |
| 12 | 1 | 0 | 1 |
| 14 | 9 | 0 | 9 |
| 15 | 2 | 0 | 2 |
| 16 | 2 | 0 | 2 |
| 20 | 2 | 0 | 2 |
| 21 | 2 | 0 | 2 |
| 28 | 1 | 0 | 1 |
| 30 | 10 | 0 | 10 |
| 31 | 1 | 0 | 1 |
| 38 | 1 | 0 | 1 |
| 45 | 2 | 0 | 2 |
| 60 | 6 | 0 | 6 |
| 61 | 1 | 0 | 1 |
| 65 | 1 | 0 | 1 |
| 84 | 1 | 0 | 1 |
| 90 | 3 | 0 | 3 |
| 100 | 1 | 0 | 1 |
| 120 | 6 | 0 | 6 |
| 150 | 2 | 0 | 2 |
| 180 | 1 | 0 | 1 |
| 360 | 3 | 0 | 3 |
| 365 | 2 | 0 | 2 |
| 420 | 1 | 0 | 1 |
| 1080 | 2 | 0 | 2 |
| 1440 | 1 | 0 | 1 |
| 1460 | 2 | 0 | 2 |
| 1800 | 1 | 0 | 1 |
| 3240 | 1 | 0 | 1 |
| 3600 | 2 | 0 | 2 |
| 6570 | 1 | 0 | 1 |
| NA | 173 | 0 | 173 |
| Total | 354 | 0 | 354 |
## [1] 14.00 7.00 30.00 2.00 NA 1.00 30.00 1.00
## [9] NA 14.00 NA 28.00 1.00 100.00 1.00 2.00
## [17] 1.00 30.00 84.00 2.00 0.50 NA NA NA
## [25] 30.00 16.00 90.00 1.00 2.00 NA 11.00 NA
## [33] 4.00 20.00 14.00 1.00 4.00 0.50 0.50 7.00
## [41] NA 14.00 NA NA 4.00 2.00 NA NA
## [49] 30.00 1.00 NA 60.00 150.00 2.00 1080.00 1.00
## [57] NA 7.00 1.00 10.50 2.00 360.00 14.00 1.00
## [65] 2.00 NA NA 21.00 2.00 1.00 21.00 16.00
## [73] 3.00 4.00 30.00 365.00 4.00 2.00 NA NA
## [81] NA NA NA NA NA NA 4.00 NA
## [89] 2.00 NA NA NA NA 4.00 4.00 150.00
## [97] NA NA 60.00 NA NA 15.00 NA 1.00
## [105] 2.00 1.00 NA NA 7.00 NA NA NA
## [113] NA NA NA NA NA NA NA 2.00
## [121] 7.00 NA NA NA NA NA 0.50 NA
## [129] 2.00 2.00 NA NA 30.00 NA 5.00 NA
## [137] 0.00 NA 1.00 NA 2.00 3.00 NA 3.00
## [145] 1.00 1.00 NA NA NA 2.00 30.00 2.00
## [153] NA NA NA 2.00 NA 3.50 365.00 NA
## [161] NA 7.00 61.00 1.00 NA 14.00 1460.00 7.00
## [169] 38.00 NA NA 4.00 NA NA 30.00 120.00
## [177] NA 4.00 NA 1800.00 3240.00 NA 1.00 2.00
## [185] NA NA NA 2.00 NA NA 120.00 420.00
## [193] NA 1.00 NA NA NA NA NA 2.00
## [201] NA NA NA NA 2.00 1440.00 NA NA
## [209] 180.00 NA NA 60.00 NA NA NA NA
## [217] NA 1460.00 NA 360.00 NA NA NA NA
## [225] 3600.00 NA NA NA 60.00 3600.00 NA NA
## [233] NA NA NA NA NA NA 45.00 NA
## [241] 14.00 NA 12.00 NA NA 1080.00 NA 90.00
## [249] 120.00 NA 120.00 NA NA NA NA 30.00
## [257] 7.00 NA NA 14.00 NA NA NA 2.00
## [265] NA 4.00 1.00 1.00 90.00 120.00 0.75 1.00
## [273] 1.00 360.00 NA 7.00 45.00 NA 14.00 1.00
## [281] NA 7.00 7.00 1.00 1.00 20.00 NA NA
## [289] NA 1.00 0.75 65.00 60.00 6570.00 NA 7.00
## [297] NA NA 1.00 NA 15.00 7.00 1.00 1.00
## [305] 2.00 2.00 1.00 1.00 NA 1.00 5.00 NA
## [313] 120.00 1.00 31.00 0.50 1.00 7.00 2.00 7.00
## [321] 0.75 NA NA NA NA NA NA NA
## [329] NA 2.00 NA NA NA NA NA NA
## [337] NA NA 1.00 60.00 0.50 NA NA NA
## [345] NA NA NA NA NA NA NA NA
## [353] NA 2.00
Log transformed:
db$lengthbreakdown2 <- db$`_3_3_a_lengthbreakdown`
table(db$lengthbreakdown2, db$count, useNA = 'always')
##
## Count <NA>
## 0 1 0
## 0.5 6 0
## 0.75 3 0
## 1 37 0
## 2 29 0
## 3 3 0
## 3.5 1 0
## 4 11 0
## 5 2 0
## 7 15 0
## 10.5 1 0
## 11 1 0
## 12 1 0
## 14 9 0
## 15 2 0
## 16 2 0
## 20 2 0
## 21 2 0
## 28 1 0
## 30 10 0
## 31 1 0
## 38 1 0
## 45 2 0
## 60 6 0
## 61 1 0
## 65 1 0
## 84 1 0
## 90 3 0
## 100 1 0
## 120 6 0
## 150 2 0
## 180 1 0
## 360 3 0
## 365 2 0
## 420 1 0
## 1080 2 0
## 1440 1 0
## 1460 2 0
## 1800 1 0
## 3240 1 0
## 3600 2 0
## 6570 1 0
## <NA> 173 0
db$lengthbreakdown3 <- recode(db$lengthbreakdown2,"0=NA")
table(db$lengthbreakdown3, db$count, useNA = 'always')
##
## Count <NA>
## 0.5 6 0
## 0.75 3 0
## 1 37 0
## 2 29 0
## 3 3 0
## 3.5 1 0
## 4 11 0
## 5 2 0
## 7 15 0
## 10.5 1 0
## 11 1 0
## 12 1 0
## 14 9 0
## 15 2 0
## 16 2 0
## 20 2 0
## 21 2 0
## 28 1 0
## 30 10 0
## 31 1 0
## 38 1 0
## 45 2 0
## 60 6 0
## 61 1 0
## 65 1 0
## 84 1 0
## 90 3 0
## 100 1 0
## 120 6 0
## 150 2 0
## 180 1 0
## 360 3 0
## 365 2 0
## 420 1 0
## 1080 2 0
## 1440 1 0
## 1460 2 0
## 1800 1 0
## 3240 1 0
## 3600 2 0
## 6570 1 0
## <NA> 174 0
db$logLenBrk <- log(db$lengthbreakdown3)
sjp.frq(db$logLenBrk, type = "hist")
# tab_model(ols0 <- lm(`_3_3_a_lengthbreakdown` ~ watersourcetype2 +`_2_2_a_Caretaker` +`_2_3_inspectorvisits` +`_3_1_d_primaryuse` +`_4_4_feecollectionsystem` +`_4_17_planOandM` +`_4_18_enoughfundsrepair` +`_4_19_personrepair` +`_4_20_persontrained` +`_5_0_latrineaccess` +`_5_1_a_facilitator` + `_4_9_actualmonthlyfees` + reportedfeesdividedbyallusers, data = db))
tab_model(ols0 <- lm(`_3_3_a_lengthbreakdown` ~ Type +`_2_2_a_Caretaker` + as.factor(`_3_1_d_primaryuse`) + as.factor(`_2_1_responsible`) +`_4_4_feecollectionsystem`+`_4_17_planOandM` +`_4_18_enoughfundsrepair` +`_4_19_personrepair`+`_4_20_persontrained`, data = db))
| 3 3 a lengthbreakdown | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 175.74 | -417.70 – 769.17 | 0.563 |
| Borehole mech pump | -51.98 | -530.26 – 426.30 | 0.832 |
| Dug well open | -33.10 | -753.66 – 687.46 | 0.928 |
| Dug well handpump | -18.58 | -280.39 – 243.24 | 0.890 |
| Rain harvest tank | -267.91 | -1528.12 – 992.29 | 0.678 |
_2_2_a_Caretaker
|
-133.83 | -399.76 – 132.09 | 0.326 |
as factor(3 1 d<br>primaryuse)1
|
-181.09 | -1456.87 – 1094.69 | 0.781 |
as factor(3 1 d<br>primaryuse)2
|
47.12 | -255.68 – 349.93 | 0.761 |
as factor(3 1 d<br>primaryuse)4
|
69.13 | -213.33 – 351.59 | 0.632 |
as factor(2 1<br>responsible)2
|
27.48 | -511.75 – 566.72 | 0.921 |
as factor(2 1<br>responsible)4
|
-146.56 | -1468.82 – 1175.69 | 0.828 |
as factor(2 1<br>responsible)5
|
-31.52 | -1375.55 – 1312.51 | 0.963 |
as factor(2 1<br>responsible)6
|
97.12 | -455.57 – 649.82 | 0.731 |
as factor(2 1<br>responsible)7
|
53.94 | -1325.62 – 1433.49 | 0.939 |
as factor(2 1<br>responsible)8
|
-42.13 | -757.38 – 673.12 | 0.908 |
as factor(2 1<br>responsible)9
|
66.91 | -469.53 – 603.35 | 0.807 |
as factor(2 1<br>responsible)10
|
-49.69 | -1062.06 – 962.68 | 0.924 |
_4_4_feecollectionsystem
|
-122.44 | -393.44 – 148.56 | 0.378 |
_4_17_planOandM
|
139.32 | -120.86 – 399.50 | 0.296 |
_4_18_enoughfundsrepair
|
30.33 | -229.11 – 289.77 | 0.819 |
_4_19_personrepair
|
-194.83 | -451.42 – 61.76 | 0.139 |
_4_20_persontrained
|
-16.69 | -346.31 – 312.94 | 0.921 |
| Observations | 145 | ||
| R2 / adjusted R2 | 0.066 / -0.093 | ||
# tab_model(ols1 <- lm(logLenBrk ~ watersourcetype2 +`_2_2_a_Caretaker` +`_2_3_inspectorvisits` +`_3_1_d_primaryuse` +`_4_4_feecollectionsystem` +`_4_17_planOandM` +`_4_18_enoughfundsrepair` +`_4_19_personrepair` +`_4_20_persontrained` +`_5_0_latrineaccess` +`_5_1_a_facilitator` + `_4_9_actualmonthlyfees` + reportedfeesdividedbyallusers, data = db))
tab_model(ols1 <- lm(logLenBrk ~ Type +`_2_2_a_Caretaker` + as.factor(`_3_1_d_primaryuse`) + as.factor(`_2_1_responsible`) +`_4_4_feecollectionsystem`+`_4_17_planOandM` +`_4_18_enoughfundsrepair` +`_4_19_personrepair`+`_4_20_persontrained`, data = db))
| log Len Brk | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 2.31 | 0.56 – 4.05 | 0.011 |
| Borehole mech pump | 0.57 | -0.85 – 1.98 | 0.435 |
| Dug well open | 1.52 | -0.61 – 3.64 | 0.164 |
| Dug well handpump | 0.20 | -0.58 – 0.99 | 0.611 |
| Rain harvest tank | -1.02 | -4.73 – 2.70 | 0.593 |
_2_2_a_Caretaker
|
-0.30 | -1.09 – 0.48 | 0.448 |
as factor(3 1 d<br>primaryuse)1
|
-2.24 | -6.03 – 1.54 | 0.248 |
as factor(3 1 d<br>primaryuse)2
|
-0.22 | -1.11 – 0.68 | 0.637 |
as factor(3 1 d<br>primaryuse)4
|
0.69 | -0.15 – 1.52 | 0.109 |
as factor(2 1<br>responsible)2
|
-0.16 | -1.74 – 1.43 | 0.846 |
as factor(2 1<br>responsible)4
|
-0.62 | -4.51 – 3.28 | 0.757 |
as factor(2 1<br>responsible)5
|
-0.68 | -4.64 – 3.27 | 0.735 |
as factor(2 1<br>responsible)6
|
-0.04 | -1.66 – 1.59 | 0.966 |
as factor(2 1<br>responsible)7
|
-1.75 | -5.81 – 2.32 | 0.401 |
as factor(2 1<br>responsible)8
|
0.14 | -1.97 – 2.24 | 0.899 |
as factor(2 1<br>responsible)9
|
-1.15 | -2.73 – 0.42 | 0.155 |
as factor(2 1<br>responsible)10
|
-1.58 | -4.56 – 1.40 | 0.301 |
_4_4_feecollectionsystem
|
-0.42 | -1.23 – 0.38 | 0.305 |
_4_17_planOandM
|
0.87 | 0.10 – 1.64 | 0.029 |
_4_18_enoughfundsrepair
|
0.10 | -0.66 – 0.87 | 0.790 |
_4_19_personrepair
|
-1.28 | -2.05 – -0.50 | 0.002 |
_4_20_persontrained
|
0.18 | -0.84 – 1.19 | 0.734 |
| Observations | 144 | ||
| R2 / adjusted R2 | 0.253 / 0.124 | ||
# fit is improved and observations are more when `_4_9_actualmonthlyfees` and reportedfeesdividedbyallusers are left out
db$used <- TRUE
db$used[na.action(ols1)] <- FALSE
db$yols1 <- ifelse(db$used==TRUE, db$logLenBrk, NA)
describe(db$yols1)
## db$yols1
## n missing distinct Info Mean Gmd .05 .10
## 144 210 32 0.981 1.778 2.06 -0.2445 0.0000
## .25 .50 .75 .90 .95
## 0.0000 1.3863 2.8284 4.3348 5.7548
##
## lowest : -0.6931472 -0.2876821 0.0000000 0.6931472 1.0986123
## highest: 5.8998974 6.0402547 7.2861917 7.4955419 8.7902691
yols1vec <- db$yols1
yols1a <- na.omit(yols1vec)
# describe(db$yols1)
# head(db[c("used","logLenBrk", "yols1")], n=50)
# with(db, table(used, useNA = 'always'))
ols1yhat <- fitted(ols1)
ols1rr <- resid(ols1, type = "response")
ols1rp <- resid(ols1, type = "pearson")
par(mfcol = c(2, 3))
# with(db, {
# plot(ols1yhat, ols1rr, main = "Fitted vs Residuals")
# qqnorm(ols1rr)
# plot(ols1yhat, ols1rp, main = "Fitted vs Pearson Residuals")
# qqnorm(ols1rp)
# plot(TTC, ols1rp, main = "Actual vs Pearson Residuals")
# plot(TTC, ols1yhat, main = "Actual vs Fitted")
# })
{
plot(ols1yhat, ols1rr, main = "Fitted vs Residuals")
qqnorm(ols1rr)
plot(ols1yhat, ols1rp, main = "Fitted vs Pearson Residuals")
qqnorm(ols1rp)
plot(yols1a, ols1rp, main = "Actual vs Pearson Residuals")
plot(yols1a, ols1yhat, main = "Actual vs Fitted")
}
hist(ols1rr,
main="Actual Residuals",
xlab="Residuals",
border="black",
col="skyblue",
# xlim=c(0,125),
las=1,
breaks=15)
# https://www.princeton.edu/~otorres/Regression101R.pdf # OLS diagnostics in r
# https://www.statmethods.net/stats/rdiagnostics.html # OLS diagnostics
# https://stats.stackexchange.com/questions/117052/replicating-statas-robust-option-in-r # robust standard errors
# https://data.princeton.edu/wws509/r/robust # robust
residualPlots(ols1)
## Test stat Pr(>|Test stat|)
## Type
## as.factor(`_3_1_d_primaryuse`)
## as.factor(`_2_1_responsible`)
## Tukey test 1.885 0.05943 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cor(yols1a,ols1yhat) # correlation of actual and fitted
## [1] 0.5029579
cor(ols1yhat, ols1rr) # correlation of predicted values and residuals
## [1] -6.592429e-17
This model adds _4_9_actualmonthlyfees and reportedfeesdividedbyallusers, which reduces the sample size.
tab_model(ols1 <- lm(logLenBrk ~ Type +`_2_2_a_Caretaker` + as.factor(`_3_1_d_primaryuse`) + as.factor(`_2_1_responsible`) +`_4_4_feecollectionsystem`+`_4_17_planOandM` +`_4_18_enoughfundsrepair` +`_4_19_personrepair`+`_4_20_persontrained` + reportedfeesdividedbyallusers + `_4_9_actualmonthlyfees`, data = db))
| log Len Brk | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 0.94 | -2.91 – 4.80 | 0.633 |
| Borehole mech pump | 0.03 | -4.08 – 4.14 | 0.989 |
| Dug well open | 2.60 | -0.81 – 6.02 | 0.141 |
| Dug well handpump | -0.15 | -1.13 – 0.82 | 0.759 |
_2_2_a_Caretaker
|
-0.38 | -1.63 – 0.87 | 0.557 |
as factor(3 1 d<br>primaryuse)2
|
-0.61 | -1.68 – 0.47 | 0.273 |
as factor(3 1 d<br>primaryuse)4
|
0.09 | -0.83 – 1.01 | 0.849 |
as factor(2 1<br>responsible)2
|
-0.50 | -2.19 – 1.19 | 0.563 |
as factor(2 1<br>responsible)6
|
-0.94 | -2.86 – 0.98 | 0.343 |
as factor(2 1<br>responsible)8
|
-1.35 | -4.41 – 1.70 | 0.389 |
as factor(2 1<br>responsible)9
|
-1.43 | -3.23 – 0.38 | 0.126 |
as factor(2 1<br>responsible)10
|
-3.56 | -7.26 – 0.14 | 0.064 |
_4_4_feecollectionsystem
|
1.63 | -1.52 – 4.79 | 0.313 |
_4_17_planOandM
|
0.52 | -0.40 – 1.44 | 0.271 |
_4_18_enoughfundsrepair
|
-0.22 | -1.15 – 0.72 | 0.653 |
_4_19_personrepair
|
-0.47 | -1.40 – 0.47 | 0.331 |
_4_20_persontrained
|
0.12 | -1.34 – 1.59 | 0.872 |
| reportedfeesdividedbyallusers | 0.00 | -0.00 – 0.00 | 0.442 |
_4_9_actualmonthlyfees
|
-0.00 | -0.00 – 0.00 | 0.365 |
| Observations | 80 | ||
| R2 / adjusted R2 | 0.250 / 0.029 | ||
Coming soon
# cor(db[, c('_2_2_a_Caretaker','_2_3_inspectorvisits','_3_1_d_primaryuse','_4_4_feecollectionsystem','_4_17_planOandM','_4_18_enoughfundsrepair','_4_19_personrepair','_4_20_persontrained','_5_0_latrineaccess','_5_1_a_facilitator')])
table(db$`_2_2_a_Caretaker`, db$count, useNA='always')
##
## Count <NA>
## 0 183 0
## 1 140 0
## <NA> 31 0
table(db$`_2_3_inspectorvisits`, db$count, useNA='always')
##
## Count <NA>
## 0 222 0
## 0.5 4 0
## 1 48 0
## 2 13 0
## 3 10 0
## 4 5 0
## 6 1 0
## 9 2 0
## 12 1 0
## 120 1 0
## <NA> 47 0
table(db$`_3_1_d_primaryuse`, db$count, useNA='always')
##
## Count <NA>
## 0 171 0
## 1 1 0
## 2 44 0
## 3 2 0
## 4 86 0
## <NA> 50 0
table(db$`_4_4_feecollectionsystem`, db$count, useNA='always')
##
## Count <NA>
## 0 170 0
## 1 150 0
## <NA> 34 0
table(db$`_4_17_planOandM`, db$count, useNA='always')
##
## Count <NA>
## 0 116 0
## 1 194 0
## <NA> 44 0
table(db$`_4_18_enoughfundsrepair`, db$count, useNA='always')
##
## Count <NA>
## 0 202 0
## 1 108 0
## <NA> 44 0
table(db$`_4_19_personrepair`, db$count, useNA='always')
##
## Count <NA>
## 0 209 0
## 1 107 0
## <NA> 38 0
table(db$`_4_20_persontrained`, db$count, useNA='always')
##
## Count <NA>
## 0 279 0
## 1 41 0
## <NA> 34 0
table(db$`_5_0_latrineaccess`, db$count, useNA='always')
##
## Count <NA>
## 2 1 0
## 5 1 0
## 30 1 0
## 50 3 0
## 55 1 0
## 70 8 0
## 75 7 0
## 77 1 0
## 77.5 1 0
## 80 22 0
## 82 1 0
## 85 8 0
## 87 1 0
## 90 26 0
## 92 2 0
## 93 3 0
## 95 20 0
## 96 1 0
## 97 4 0
## 97.5 2 0
## 98 19 0
## 99 16 0
## 99.5 2 0
## 100 166 0
## <NA> 37 0
table(db$`_5_1_a_facilitator`, db$count, useNA='always')
##
## Count <NA>
## 0 181 0
## 1 107 0
## <NA> 66 0
table(db$`_4_9_actualmonthlyfees`, db$count, useNA='always')
##
## Count <NA>
## 0 1 0
## 2700 1 0
## 5000 1 0
## 6000 1 0
## 10000 3 0
## 13000 1 0
## 15000 7 0
## 16000 2 0
## 18000 1 0
## 19000 1 0
## 20000 8 0
## 21000 1 0
## 22500 1 0
## 24000 2 0
## 25000 2 0
## 26000 1 0
## 27000 1 0
## 30000 6 0
## 35000 2 0
## 36000 1 0
## 40000 7 0
## 45000 7 0
## 46000 2 0
## 47500 1 0
## 48000 2 0
## 49000 1 0
## 50000 8 0
## 55000 2 0
## 60000 8 0
## 62500 1 0
## 65000 1 0
## 68000 1 0
## 70000 3 0
## 75000 3 0
## 80000 3 0
## 84000 1 0
## 85000 1 0
## 90000 2 0
## 96500 1 0
## 1e+05 5 0
## 106000 1 0
## 140000 1 0
## 141000 1 0
## 150000 2 0
## 160000 1 0
## 170000 1 0
## 192500 1 0
## 2e+05 2 0
## 220000 1 0
## 250000 1 0
## 280000 1 0
## 7e+05 1 0
## 9e+05 1 0
## 2e+06 1 0
## <NA> 234 0
table(db$reportedfeesdividedbyallusers, db$count, useNA='always')
##
## Count <NA>
## 0 1 0
## 30 1 0
## 40 1 0
## 51.2820510864258 1 0
## 60 1 0
## 82.8729248046875 1 0
## 100 2 0
## 112.5 1 0
## 125 1 0
## 140 1 0
## 153.846160888672 1 0
## 160 1 0
## 171.428573608398 1 0
## 194.444442749023 1 0
## 200 3 0
## 225 3 0
## 230.769226074219 1 0
## 250 3 0
## 300 3 0
## 320 1 0
## 327.868865966797 1 0
## 343.137268066406 1 0
## 350 1 0
## 385.714294433594 1 0
## 400 1 0
## 448.837219238281 1 0
## 450 1 0
## 466.666656494141 1 0
## 500 4 0
## 516.129028320312 1 0
## 571.428588867188 1 0
## 575 1 0
## 588.235290527344 1 0
## 600 1 0
## 617.9775390625 1 0
## 625 2 0
## 642.857116699219 1 0
## 714.285705566406 1 0
## 722.222229003906 1 0
## 735.294128417969 1 0
## 800 2 0
## 818.965515136719 1 0
## 833.333312988281 2 0
## 838.709655761719 1 0
## 900 2 0
## 933.333312988281 1 0
## 941.176452636719 1 0
## 1000 8 0
## 1007.14288330078 1 0
## 1080 1 0
## 1088.88891601562 1 0
## 1111.11108398438 1 0
## 1145.83337402344 1 0
## 1200 1 0
## 1203.125 1 0
## 1250 4 0
## 1357.14282226562 1 0
## 1360 1 0
## 1371.42858886719 1 0
## 1400 1 0
## 1428.57141113281 1 0
## 1523.8095703125 1 0
## 1700 1 0
## 1714.28576660156 1 0
## 1750 1 0
## 1766.66662597656 1 0
## 1840 1 0
## 1875 1 0
## 2000 2 0
## 2125 1 0
## 2250 1 0
## 2400 1 0
## 2500 2 0
## 2666.66674804688 2 0
## 3000 1 0
## 3750 1 0
## 4000 1 0
## 4400 1 0
## 7000 1 0
## 10000 2 0
## <NA> 244 0
table(db$`_2_1_responsible`, db$count, useNA='always')
##
## Count <NA>
## 1 12 0
## 2 70 0
## 3 8 0
## 4 1 0
## 5 5 0
## 6 119 0
## 7 2 0
## 8 63 0
## 9 70 0
## 10 4 0
## <NA> 0 0
table(db$`__4_15_savingsstorage`, db$count, useNA='always')
##
## Count <NA>
## 0 42 0
## 1 4 0
## 3 2 0
## 4 37 0
## 5 56 0
## 6 9 0
## <NA> 204 0