Link to this document: http://rpubs.com/anshulkumar/WHAgov1

1 Issues to work on

2 Dependent Variable: Functionality

2.1 Descriptive statistics

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   1.000   1.000   0.934   1.000   1.000      27
## db2$`_3_1_a_functional` 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##      303       27        2    0.185      283    0.934   0.1237

Functionality
Functionality Frequency Total
Count NA
0 20
6.1 %
0
0 %
20
6.1 %
1 283
85.8 %
0
0 %
283
85.8 %
NA 27
8.2 %
0
0 %
27
8.2 %
Total 330
100 %
0
100 %
330
100 %

2.1.1 Functionality and Water Source Type

sjt.xtab(db$Type,db$`_3_1_a_functional`,title = " ", show.row.prc = TRUE, show.na = TRUE, var.labels = c("Water Source Type", "Functionality"), show.summary = TRUE)
Water Source Type Functionality Total
0 1 NA
Borehole handpump 5
3.5 %
133
92.4 %
6
4.2 %
144
100 %
Borehole mech pump 0
0 %
22
100 %
0
0 %
22
100 %
Dug well open 3
6.5 %
43
93.5 %
0
0 %
46
100 %
Dug well handpump 13
18.3 %
56
78.9 %
2
2.8 %
71
100 %
Spring protected 3
7.5 %
35
87.5 %
2
5 %
40
100 %
Spring unprotected 0
0 %
10
100 %
0
0 %
10
100 %
Rain harvest tank 0
0 %
1
5.3 %
18
94.7 %
19
100 %
NA 0
0 %
0
0 %
2
100 %
2
100 %
Total 24
6.8 %
300
84.7 %
30
8.5 %
354
100 %
χ2=18.738 · df=6 · Cramer’s V=0.240 · Fisher’s p=0.009

2.1.2 Functionality and not-enough-funds

sjt.xtab(db$`_4_18_enoughfundsrepair`,db$`_3_1_a_functional`,title = " ", show.row.prc = TRUE, show.na = TRUE, var.labels = c("Enough Funds Repair", "Functionality"), show.summary = TRUE)
Enough Funds Repair Functionality Total
0 1 NA
0 16
7.9 %
186
92.1 %
0
0 %
202
100 %
1 6
5.6 %
100
92.6 %
2
1.9 %
108
100 %
NA 2
4.5 %
14
31.8 %
28
63.6 %
44
100 %
Total 24
6.8 %
300
84.7 %
30
8.5 %
354
100 %
χ2=0.249 · df=1 · φ=0.042 · Fisher’s p=0.642

2.1.3 Functionality and fee collection system

sjt.xtab(db$`_4_4_feecollectionsystem`,db$`_3_1_a_functional`,title = " ", show.row.prc = TRUE, show.na = TRUE, var.labels = c("Fee collection system", "Functionality"), show.summary = TRUE)
Fee collection
system
Functionality Total
0 1 NA
0 15
8.8 %
153
90 %
2
1.2 %
170
100 %
1 7
4.7 %
143
95.3 %
0
0 %
150
100 %
NA 2
5.9 %
4
11.8 %
28
82.4 %
34
100 %
Total 24
6.8 %
300
84.7 %
30
8.5 %
354
100 %
χ2=1.622 · df=1 · φ=0.084 · p=0.203

2.1.4 Functionality and caretaker

sjt.xtab(db$`_2_2_a_Caretaker`,db$`_3_1_a_functional`,title = " ", show.row.prc = TRUE, show.na = TRUE, var.labels = c("Caretaker", "Functionality"), show.summary = TRUE)
Caretaker Functionality Total
0 1 NA
0 17
9.3 %
164
89.6 %
2
1.1 %
183
100 %
1 7
5 %
133
95 %
0
0 %
140
100 %
NA 0
0 %
3
9.7 %
28
90.3 %
31
100 %
Total 24
6.8 %
300
84.7 %
30
8.5 %
354
100 %
χ2=1.612 · df=1 · φ=0.083 · p=0.204

2.1.5 Functionality and _4_17_planOandM

sjt.xtab(db$`_4_17_planOandM`,db$`_3_1_a_functional`,title = " ", show.row.prc = TRUE, show.na = TRUE, var.labels = c("Plan O and M", "Functionality"), show.summary = TRUE)
Plan O and M Functionality Total
0 1 NA
0 14
12.1 %
102
87.9 %
0
0 %
116
100 %
1 9
4.6 %
184
94.8 %
1
0.5 %
194
100 %
NA 1
2.3 %
14
31.8 %
29
65.9 %
44
100 %
Total 24
6.8 %
300
84.7 %
30
8.5 %
354
100 %
χ2=4.743 · df=1 · φ=0.137 · Fisher’s p=0.024

2.1.6 Functionality and Person Repair

sjt.xtab(db$`_4_19_personrepair`,db$`_3_1_a_functional`,title = " ", show.row.prc = TRUE, show.na = TRUE, var.labels = c("Person repair", "Functionality"), show.summary = TRUE)
Person repair Functionality Total
0 1 NA
0 18
8.6 %
190
90.9 %
1
0.5 %
209
100 %
1 5
4.7 %
101
94.4 %
1
0.9 %
107
100 %
NA 1
2.6 %
9
23.7 %
28
73.7 %
38
100 %
Total 24
6.8 %
300
84.7 %
30
8.5 %
354
100 %
χ2=1.076 · df=1 · φ=0.071 · Fisher’s p=0.256

2.2 Regressions

2.2.1 Logistic with selected variables (DV=functional)

# logistic _3_1_a_functional watersourcetype2 _2_2_a_Caretaker _2_3_inspectorvisits _3_1_d_primaryuse _4_4_feecollectionsystem _4_17_planOandM _4_18_enoughfundsrepair _4_19_personrepair _4_20_persontrained _5_0_latrineaccess _5_1_a_facilitator
# db$reportedfeesdividedbyallusers

tab_model(logit1 <- glm(`_3_1_a_functional` ~ Type+ `_2_2_a_Caretaker` +`_4_18_enoughfundsrepair` +`_4_19_personrepair`, data = db, family = "binomial"))
  3 1 a functional
Predictors Odds Ratios CI p
(Intercept) 20.80 5.97 – 72.40 <0.001
Borehole mech pump 3568289.34 0.00 – Inf 0.991
Dug well open 0.63 0.12 – 3.35 0.587
Dug well handpump 0.14 0.04 – 0.48 0.002
Spring protected 0.42 0.08 – 2.16 0.301
Spring unprotected 4907073.21 0.00 – Inf 0.994
Rain harvest tank 4201877.71 0.00 – Inf 0.998
_2_2_a_Caretaker 1.71 0.62 – 4.70 0.297
_4_18_enoughfundsrepair 0.99 0.34 – 2.89 0.991
_4_19_personrepair 1.33 0.43 – 4.12 0.619
Observations 298
Cox & Snell’s R2 / Nagelkerke’s R2 0.065 / 0.160
# did not work: 
# tab_model(logit1 <- glm(`_3_1_a_functional` ~ Type + `_2_2_a_Caretaker` + as.factor(`_3_1_d_primaryuse`) + as.factor(`_2_1_responsible`) +`_4_18_enoughfundsrepair`  +`_4_19_personrepair`, data = db, family = "binomial"))
# summary(logit1)

Variables _3_1_d_primaryuse and _2_1_responsible were omitted from this regression because they caused errors.

2.2.2 Logistic with all variables (DV=functional)

This model yields the error message we see below.

tab_model(glm(`_3_1_a_functional` ~ Type + `_2_2_a_Caretaker` +`_2_3_inspectorvisits` +as.factor(`_3_1_d_primaryuse`) +`_4_4_feecollectionsystem` +`_4_17_planOandM` +`_4_18_enoughfundsrepair` +`_4_19_personrepair` +`_4_20_persontrained` +`_5_0_latrineaccess` +`_5_1_a_facilitator`, data = db, family = "binomial"))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
  3 1 a functional
Predictors Odds Ratios CI p
(Intercept) 70416446552692.53 0.00 – Inf 0.996
Borehole mech pump 217947894.36 0.00 – Inf 0.999
Dug well open 94912991.50 0.00 – Inf 0.999
Dug well handpump 0.06 0.00 – 4.26 0.199
Spring protected 1039086226.23 0.00 – Inf 0.999
Spring unprotected 108550026.68 0.00 – Inf 0.999
Rain harvest tank 3.16 0.00 – Inf 1.000
_2_2_a_Caretaker 0.07 0.00 – 9.74 0.287
_2_3_inspectorvisits 1.35 0.18 – 10.33 0.772
as factor(3 1 d<br>primaryuse)1 4.12 0.00 – Inf 1.000
as factor(3 1 d<br>primaryuse)2 0.03 0.00 – 3.45 0.143
as factor(3 1 d<br>primaryuse)3 96018860.80 0.00 – Inf 1.000
as factor(3 1 d<br>primaryuse)4 0.22 0.00 – 12.69 0.467
_4_4_feecollectionsystem 15.27 0.16 – 1429.15 0.239
_4_17_planOandM 0.00 0.00 – Inf 0.998
_4_18_enoughfundsrepair 0.50 0.01 – 18.10 0.705
_4_19_personrepair 3.78 0.12 – 119.10 0.450
_4_20_persontrained 75573443.09 0.00 – Inf 0.999
_5_0_latrineaccess 0.95 0.75 – 1.20 0.666
_5_1_a_facilitator 0.08 0.00 – 3.73 0.196
Observations 223
Cox & Snell’s R2 / Nagelkerke’s R2 0.064 / 0.482

Cross-table of water source type and functionality:

with(db,table(Type,`_3_1_a_functional`, useNA = 'always'))
##                     _3_1_a_functional
## Type                   0   1 <NA>
##   Borehole handpump    5 133    6
##   Borehole mech pump   0  22    0
##   Dug well open        3  43    0
##   Dug well handpump   13  56    2
##   Spring protected     3  35    2
##   Spring unprotected   0  10    0
##   Rain harvest tank    0   1   18
##   <NA>                 0   0    2

Cross-table of fee collection systema and functionality:

with(db,table(`_4_4_feecollectionsystem`,`_3_1_a_functional`, useNA = 'always'))
##                         _3_1_a_functional
## _4_4_feecollectionsystem   0   1 <NA>
##                     0     15 153    2
##                     1      7 143    0
##                     <NA>   2   4   28

3 Dependent Variable: 3.3.a - length of last breakdown

3.1 Descriptive statistics

Length of breakdown:

Length of last breakdown
Length Frequency Total
Count NA
0 1 0 1
0.5 6 0 6
0.75 3 0 3
1 37 0 37
2 29 0 29
3 3 0 3
3.5 1 0 1
4 11 0 11
5 2 0 2
7 15 0 15
10.5 1 0 1
11 1 0 1
12 1 0 1
14 9 0 9
15 2 0 2
16 2 0 2
20 2 0 2
21 2 0 2
28 1 0 1
30 10 0 10
31 1 0 1
38 1 0 1
45 2 0 2
60 6 0 6
61 1 0 1
65 1 0 1
84 1 0 1
90 3 0 3
100 1 0 1
120 6 0 6
150 2 0 2
180 1 0 1
360 3 0 3
365 2 0 2
420 1 0 1
1080 2 0 2
1440 1 0 1
1460 2 0 2
1800 1 0 1
3240 1 0 1
3600 2 0 2
6570 1 0 1
NA 173 0 173
Total 354 0 354

##   [1]   14.00    7.00   30.00    2.00      NA    1.00   30.00    1.00
##   [9]      NA   14.00      NA   28.00    1.00  100.00    1.00    2.00
##  [17]    1.00   30.00   84.00    2.00    0.50      NA      NA      NA
##  [25]   30.00   16.00   90.00    1.00    2.00      NA   11.00      NA
##  [33]    4.00   20.00   14.00    1.00    4.00    0.50    0.50    7.00
##  [41]      NA   14.00      NA      NA    4.00    2.00      NA      NA
##  [49]   30.00    1.00      NA   60.00  150.00    2.00 1080.00    1.00
##  [57]      NA    7.00    1.00   10.50    2.00  360.00   14.00    1.00
##  [65]    2.00      NA      NA   21.00    2.00    1.00   21.00   16.00
##  [73]    3.00    4.00   30.00  365.00    4.00    2.00      NA      NA
##  [81]      NA      NA      NA      NA      NA      NA    4.00      NA
##  [89]    2.00      NA      NA      NA      NA    4.00    4.00  150.00
##  [97]      NA      NA   60.00      NA      NA   15.00      NA    1.00
## [105]    2.00    1.00      NA      NA    7.00      NA      NA      NA
## [113]      NA      NA      NA      NA      NA      NA      NA    2.00
## [121]    7.00      NA      NA      NA      NA      NA    0.50      NA
## [129]    2.00    2.00      NA      NA   30.00      NA    5.00      NA
## [137]    0.00      NA    1.00      NA    2.00    3.00      NA    3.00
## [145]    1.00    1.00      NA      NA      NA    2.00   30.00    2.00
## [153]      NA      NA      NA    2.00      NA    3.50  365.00      NA
## [161]      NA    7.00   61.00    1.00      NA   14.00 1460.00    7.00
## [169]   38.00      NA      NA    4.00      NA      NA   30.00  120.00
## [177]      NA    4.00      NA 1800.00 3240.00      NA    1.00    2.00
## [185]      NA      NA      NA    2.00      NA      NA  120.00  420.00
## [193]      NA    1.00      NA      NA      NA      NA      NA    2.00
## [201]      NA      NA      NA      NA    2.00 1440.00      NA      NA
## [209]  180.00      NA      NA   60.00      NA      NA      NA      NA
## [217]      NA 1460.00      NA  360.00      NA      NA      NA      NA
## [225] 3600.00      NA      NA      NA   60.00 3600.00      NA      NA
## [233]      NA      NA      NA      NA      NA      NA   45.00      NA
## [241]   14.00      NA   12.00      NA      NA 1080.00      NA   90.00
## [249]  120.00      NA  120.00      NA      NA      NA      NA   30.00
## [257]    7.00      NA      NA   14.00      NA      NA      NA    2.00
## [265]      NA    4.00    1.00    1.00   90.00  120.00    0.75    1.00
## [273]    1.00  360.00      NA    7.00   45.00      NA   14.00    1.00
## [281]      NA    7.00    7.00    1.00    1.00   20.00      NA      NA
## [289]      NA    1.00    0.75   65.00   60.00 6570.00      NA    7.00
## [297]      NA      NA    1.00      NA   15.00    7.00    1.00    1.00
## [305]    2.00    2.00    1.00    1.00      NA    1.00    5.00      NA
## [313]  120.00    1.00   31.00    0.50    1.00    7.00    2.00    7.00
## [321]    0.75      NA      NA      NA      NA      NA      NA      NA
## [329]      NA    2.00      NA      NA      NA      NA      NA      NA
## [337]      NA      NA    1.00   60.00    0.50      NA      NA      NA
## [345]      NA      NA      NA      NA      NA      NA      NA      NA
## [353]      NA    2.00

Log transformed:

db$lengthbreakdown2 <- db$`_3_3_a_lengthbreakdown`
table(db$lengthbreakdown2, db$count, useNA = 'always')
##       
##        Count <NA>
##   0        1    0
##   0.5      6    0
##   0.75     3    0
##   1       37    0
##   2       29    0
##   3        3    0
##   3.5      1    0
##   4       11    0
##   5        2    0
##   7       15    0
##   10.5     1    0
##   11       1    0
##   12       1    0
##   14       9    0
##   15       2    0
##   16       2    0
##   20       2    0
##   21       2    0
##   28       1    0
##   30      10    0
##   31       1    0
##   38       1    0
##   45       2    0
##   60       6    0
##   61       1    0
##   65       1    0
##   84       1    0
##   90       3    0
##   100      1    0
##   120      6    0
##   150      2    0
##   180      1    0
##   360      3    0
##   365      2    0
##   420      1    0
##   1080     2    0
##   1440     1    0
##   1460     2    0
##   1800     1    0
##   3240     1    0
##   3600     2    0
##   6570     1    0
##   <NA>   173    0
db$lengthbreakdown3 <- recode(db$lengthbreakdown2,"0=NA")
table(db$lengthbreakdown3, db$count, useNA = 'always')
##       
##        Count <NA>
##   0.5      6    0
##   0.75     3    0
##   1       37    0
##   2       29    0
##   3        3    0
##   3.5      1    0
##   4       11    0
##   5        2    0
##   7       15    0
##   10.5     1    0
##   11       1    0
##   12       1    0
##   14       9    0
##   15       2    0
##   16       2    0
##   20       2    0
##   21       2    0
##   28       1    0
##   30      10    0
##   31       1    0
##   38       1    0
##   45       2    0
##   60       6    0
##   61       1    0
##   65       1    0
##   84       1    0
##   90       3    0
##   100      1    0
##   120      6    0
##   150      2    0
##   180      1    0
##   360      3    0
##   365      2    0
##   420      1    0
##   1080     2    0
##   1440     1    0
##   1460     2    0
##   1800     1    0
##   3240     1    0
##   3600     2    0
##   6570     1    0
##   <NA>   174    0
db$logLenBrk <- log(db$lengthbreakdown3)

sjp.frq(db$logLenBrk, type = "hist")

3.2 Regressions

3.2.1 OLS, without log transformation (DV = length of most recent breakdown)

# tab_model(ols0 <- lm(`_3_3_a_lengthbreakdown` ~ watersourcetype2 +`_2_2_a_Caretaker` +`_2_3_inspectorvisits` +`_3_1_d_primaryuse` +`_4_4_feecollectionsystem` +`_4_17_planOandM` +`_4_18_enoughfundsrepair` +`_4_19_personrepair` +`_4_20_persontrained` +`_5_0_latrineaccess` +`_5_1_a_facilitator` + `_4_9_actualmonthlyfees` + reportedfeesdividedbyallusers, data = db))

tab_model(ols0 <- lm(`_3_3_a_lengthbreakdown` ~ Type +`_2_2_a_Caretaker` + as.factor(`_3_1_d_primaryuse`) + as.factor(`_2_1_responsible`) +`_4_4_feecollectionsystem`+`_4_17_planOandM` +`_4_18_enoughfundsrepair` +`_4_19_personrepair`+`_4_20_persontrained`, data = db))
  3 3 a lengthbreakdown
Predictors Estimates CI p
(Intercept) 175.74 -417.70 – 769.17 0.563
Borehole mech pump -51.98 -530.26 – 426.30 0.832
Dug well open -33.10 -753.66 – 687.46 0.928
Dug well handpump -18.58 -280.39 – 243.24 0.890
Rain harvest tank -267.91 -1528.12 – 992.29 0.678
_2_2_a_Caretaker -133.83 -399.76 – 132.09 0.326
as factor(3 1 d<br>primaryuse)1 -181.09 -1456.87 – 1094.69 0.781
as factor(3 1 d<br>primaryuse)2 47.12 -255.68 – 349.93 0.761
as factor(3 1 d<br>primaryuse)4 69.13 -213.33 – 351.59 0.632
as factor(2 1<br>responsible)2 27.48 -511.75 – 566.72 0.921
as factor(2 1<br>responsible)4 -146.56 -1468.82 – 1175.69 0.828
as factor(2 1<br>responsible)5 -31.52 -1375.55 – 1312.51 0.963
as factor(2 1<br>responsible)6 97.12 -455.57 – 649.82 0.731
as factor(2 1<br>responsible)7 53.94 -1325.62 – 1433.49 0.939
as factor(2 1<br>responsible)8 -42.13 -757.38 – 673.12 0.908
as factor(2 1<br>responsible)9 66.91 -469.53 – 603.35 0.807
as factor(2 1<br>responsible)10 -49.69 -1062.06 – 962.68 0.924
_4_4_feecollectionsystem -122.44 -393.44 – 148.56 0.378
_4_17_planOandM 139.32 -120.86 – 399.50 0.296
_4_18_enoughfundsrepair 30.33 -229.11 – 289.77 0.819
_4_19_personrepair -194.83 -451.42 – 61.76 0.139
_4_20_persontrained -16.69 -346.31 – 312.94 0.921
Observations 145
R2 / adjusted R2 0.066 / -0.093

3.2.2 OLS, with log transformation (DV = log of length of most recent breakdown)

3.2.2.1 Model results

# tab_model(ols1 <- lm(logLenBrk ~ watersourcetype2 +`_2_2_a_Caretaker` +`_2_3_inspectorvisits` +`_3_1_d_primaryuse` +`_4_4_feecollectionsystem` +`_4_17_planOandM` +`_4_18_enoughfundsrepair` +`_4_19_personrepair` +`_4_20_persontrained` +`_5_0_latrineaccess` +`_5_1_a_facilitator` + `_4_9_actualmonthlyfees` + reportedfeesdividedbyallusers, data = db))

tab_model(ols1 <- lm(logLenBrk ~ Type +`_2_2_a_Caretaker` + as.factor(`_3_1_d_primaryuse`) + as.factor(`_2_1_responsible`) +`_4_4_feecollectionsystem`+`_4_17_planOandM` +`_4_18_enoughfundsrepair` +`_4_19_personrepair`+`_4_20_persontrained`, data = db))
  log Len Brk
Predictors Estimates CI p
(Intercept) 2.31 0.56 – 4.05 0.011
Borehole mech pump 0.57 -0.85 – 1.98 0.435
Dug well open 1.52 -0.61 – 3.64 0.164
Dug well handpump 0.20 -0.58 – 0.99 0.611
Rain harvest tank -1.02 -4.73 – 2.70 0.593
_2_2_a_Caretaker -0.30 -1.09 – 0.48 0.448
as factor(3 1 d<br>primaryuse)1 -2.24 -6.03 – 1.54 0.248
as factor(3 1 d<br>primaryuse)2 -0.22 -1.11 – 0.68 0.637
as factor(3 1 d<br>primaryuse)4 0.69 -0.15 – 1.52 0.109
as factor(2 1<br>responsible)2 -0.16 -1.74 – 1.43 0.846
as factor(2 1<br>responsible)4 -0.62 -4.51 – 3.28 0.757
as factor(2 1<br>responsible)5 -0.68 -4.64 – 3.27 0.735
as factor(2 1<br>responsible)6 -0.04 -1.66 – 1.59 0.966
as factor(2 1<br>responsible)7 -1.75 -5.81 – 2.32 0.401
as factor(2 1<br>responsible)8 0.14 -1.97 – 2.24 0.899
as factor(2 1<br>responsible)9 -1.15 -2.73 – 0.42 0.155
as factor(2 1<br>responsible)10 -1.58 -4.56 – 1.40 0.301
_4_4_feecollectionsystem -0.42 -1.23 – 0.38 0.305
_4_17_planOandM 0.87 0.10 – 1.64 0.029
_4_18_enoughfundsrepair 0.10 -0.66 – 0.87 0.790
_4_19_personrepair -1.28 -2.05 – -0.50 0.002
_4_20_persontrained 0.18 -0.84 – 1.19 0.734
Observations 144
R2 / adjusted R2 0.253 / 0.124
# fit is improved and observations are more when `_4_9_actualmonthlyfees` and reportedfeesdividedbyallusers are left out

3.2.2.2 Post-estimation tests

db$used <- TRUE
db$used[na.action(ols1)] <- FALSE

db$yols1 <- ifelse(db$used==TRUE, db$logLenBrk, NA)
describe(db$yols1)
## db$yols1 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      144      210       32    0.981    1.778     2.06  -0.2445   0.0000 
##      .25      .50      .75      .90      .95 
##   0.0000   1.3863   2.8284   4.3348   5.7548 
## 
## lowest : -0.6931472 -0.2876821  0.0000000  0.6931472  1.0986123
## highest:  5.8998974  6.0402547  7.2861917  7.4955419  8.7902691
yols1vec <- db$yols1
yols1a <- na.omit(yols1vec)
# describe(db$yols1)

# head(db[c("used","logLenBrk", "yols1")], n=50)
# with(db, table(used, useNA = 'always'))

ols1yhat <- fitted(ols1)
ols1rr <- resid(ols1, type = "response")
ols1rp <- resid(ols1, type = "pearson")

par(mfcol = c(2, 3))

# with(db, {
#   plot(ols1yhat, ols1rr, main = "Fitted vs Residuals")
#   qqnorm(ols1rr)
#   plot(ols1yhat, ols1rp, main = "Fitted vs Pearson Residuals")
#   qqnorm(ols1rp)
#   plot(TTC, ols1rp, main = "Actual vs Pearson Residuals")
#   plot(TTC, ols1yhat, main = "Actual vs Fitted")
# })

{
  plot(ols1yhat, ols1rr, main = "Fitted vs Residuals")
  qqnorm(ols1rr)
  plot(ols1yhat, ols1rp, main = "Fitted vs Pearson Residuals")
  qqnorm(ols1rp)
  plot(yols1a, ols1rp, main = "Actual vs Pearson Residuals")
  plot(yols1a, ols1yhat, main = "Actual vs Fitted")
}

hist(ols1rr, 
     main="Actual Residuals", 
     xlab="Residuals", 
     border="black", 
     col="skyblue",
     # xlim=c(0,125),
     las=1, 
     breaks=15)

# https://www.princeton.edu/~otorres/Regression101R.pdf # OLS diagnostics in r
# https://www.statmethods.net/stats/rdiagnostics.html # OLS diagnostics
# https://stats.stackexchange.com/questions/117052/replicating-statas-robust-option-in-r # robust standard errors
# https://data.princeton.edu/wws509/r/robust # robust 
residualPlots(ols1)

##                                Test stat Pr(>|Test stat|)  
## Type                                                       
## as.factor(`_3_1_d_primaryuse`)                             
## as.factor(`_2_1_responsible`)                              
## Tukey test                         1.885          0.05943 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cor(yols1a,ols1yhat) # correlation of actual and fitted
## [1] 0.5029579
cor(ols1yhat, ols1rr) # correlation of predicted values and residuals
## [1] -6.592429e-17

3.2.3 OLS, with log transformation (DV = log of length of most recent breakdown), added variables

This model adds _4_9_actualmonthlyfees and reportedfeesdividedbyallusers, which reduces the sample size.

3.2.3.1 Model results - added variables

tab_model(ols1 <- lm(logLenBrk ~ Type +`_2_2_a_Caretaker` + as.factor(`_3_1_d_primaryuse`) + as.factor(`_2_1_responsible`) +`_4_4_feecollectionsystem`+`_4_17_planOandM` +`_4_18_enoughfundsrepair` +`_4_19_personrepair`+`_4_20_persontrained` + reportedfeesdividedbyallusers + `_4_9_actualmonthlyfees`, data = db))
  log Len Brk
Predictors Estimates CI p
(Intercept) 0.94 -2.91 – 4.80 0.633
Borehole mech pump 0.03 -4.08 – 4.14 0.989
Dug well open 2.60 -0.81 – 6.02 0.141
Dug well handpump -0.15 -1.13 – 0.82 0.759
_2_2_a_Caretaker -0.38 -1.63 – 0.87 0.557
as factor(3 1 d<br>primaryuse)2 -0.61 -1.68 – 0.47 0.273
as factor(3 1 d<br>primaryuse)4 0.09 -0.83 – 1.01 0.849
as factor(2 1<br>responsible)2 -0.50 -2.19 – 1.19 0.563
as factor(2 1<br>responsible)6 -0.94 -2.86 – 0.98 0.343
as factor(2 1<br>responsible)8 -1.35 -4.41 – 1.70 0.389
as factor(2 1<br>responsible)9 -1.43 -3.23 – 0.38 0.126
as factor(2 1<br>responsible)10 -3.56 -7.26 – 0.14 0.064
_4_4_feecollectionsystem 1.63 -1.52 – 4.79 0.313
_4_17_planOandM 0.52 -0.40 – 1.44 0.271
_4_18_enoughfundsrepair -0.22 -1.15 – 0.72 0.653
_4_19_personrepair -0.47 -1.40 – 0.47 0.331
_4_20_persontrained 0.12 -1.34 – 1.59 0.872
reportedfeesdividedbyallusers 0.00 -0.00 – 0.00 0.442
_4_9_actualmonthlyfees -0.00 -0.00 – 0.00 0.365
Observations 80
R2 / adjusted R2 0.250 / 0.029

4 Dependent Variable: 3.4.a - broken down in last year

Coming soon

5 Independent variables descriptive statistics

# cor(db[, c('_2_2_a_Caretaker','_2_3_inspectorvisits','_3_1_d_primaryuse','_4_4_feecollectionsystem','_4_17_planOandM','_4_18_enoughfundsrepair','_4_19_personrepair','_4_20_persontrained','_5_0_latrineaccess','_5_1_a_facilitator')])

table(db$`_2_2_a_Caretaker`, db$count, useNA='always')
##       
##        Count <NA>
##   0      183    0
##   1      140    0
##   <NA>    31    0
table(db$`_2_3_inspectorvisits`, db$count, useNA='always')
##       
##        Count <NA>
##   0      222    0
##   0.5      4    0
##   1       48    0
##   2       13    0
##   3       10    0
##   4        5    0
##   6        1    0
##   9        2    0
##   12       1    0
##   120      1    0
##   <NA>    47    0
table(db$`_3_1_d_primaryuse`, db$count, useNA='always')
##       
##        Count <NA>
##   0      171    0
##   1        1    0
##   2       44    0
##   3        2    0
##   4       86    0
##   <NA>    50    0
table(db$`_4_4_feecollectionsystem`, db$count, useNA='always')
##       
##        Count <NA>
##   0      170    0
##   1      150    0
##   <NA>    34    0
table(db$`_4_17_planOandM`, db$count, useNA='always')
##       
##        Count <NA>
##   0      116    0
##   1      194    0
##   <NA>    44    0
table(db$`_4_18_enoughfundsrepair`, db$count, useNA='always')
##       
##        Count <NA>
##   0      202    0
##   1      108    0
##   <NA>    44    0
table(db$`_4_19_personrepair`, db$count, useNA='always')
##       
##        Count <NA>
##   0      209    0
##   1      107    0
##   <NA>    38    0
table(db$`_4_20_persontrained`, db$count, useNA='always')
##       
##        Count <NA>
##   0      279    0
##   1       41    0
##   <NA>    34    0
table(db$`_5_0_latrineaccess`, db$count, useNA='always')
##       
##        Count <NA>
##   2        1    0
##   5        1    0
##   30       1    0
##   50       3    0
##   55       1    0
##   70       8    0
##   75       7    0
##   77       1    0
##   77.5     1    0
##   80      22    0
##   82       1    0
##   85       8    0
##   87       1    0
##   90      26    0
##   92       2    0
##   93       3    0
##   95      20    0
##   96       1    0
##   97       4    0
##   97.5     2    0
##   98      19    0
##   99      16    0
##   99.5     2    0
##   100    166    0
##   <NA>    37    0
table(db$`_5_1_a_facilitator`, db$count, useNA='always')
##       
##        Count <NA>
##   0      181    0
##   1      107    0
##   <NA>    66    0
table(db$`_4_9_actualmonthlyfees`, db$count, useNA='always')
##         
##          Count <NA>
##   0          1    0
##   2700       1    0
##   5000       1    0
##   6000       1    0
##   10000      3    0
##   13000      1    0
##   15000      7    0
##   16000      2    0
##   18000      1    0
##   19000      1    0
##   20000      8    0
##   21000      1    0
##   22500      1    0
##   24000      2    0
##   25000      2    0
##   26000      1    0
##   27000      1    0
##   30000      6    0
##   35000      2    0
##   36000      1    0
##   40000      7    0
##   45000      7    0
##   46000      2    0
##   47500      1    0
##   48000      2    0
##   49000      1    0
##   50000      8    0
##   55000      2    0
##   60000      8    0
##   62500      1    0
##   65000      1    0
##   68000      1    0
##   70000      3    0
##   75000      3    0
##   80000      3    0
##   84000      1    0
##   85000      1    0
##   90000      2    0
##   96500      1    0
##   1e+05      5    0
##   106000     1    0
##   140000     1    0
##   141000     1    0
##   150000     2    0
##   160000     1    0
##   170000     1    0
##   192500     1    0
##   2e+05      2    0
##   220000     1    0
##   250000     1    0
##   280000     1    0
##   7e+05      1    0
##   9e+05      1    0
##   2e+06      1    0
##   <NA>     234    0
table(db$reportedfeesdividedbyallusers, db$count, useNA='always')
##                   
##                    Count <NA>
##   0                    1    0
##   30                   1    0
##   40                   1    0
##   51.2820510864258     1    0
##   60                   1    0
##   82.8729248046875     1    0
##   100                  2    0
##   112.5                1    0
##   125                  1    0
##   140                  1    0
##   153.846160888672     1    0
##   160                  1    0
##   171.428573608398     1    0
##   194.444442749023     1    0
##   200                  3    0
##   225                  3    0
##   230.769226074219     1    0
##   250                  3    0
##   300                  3    0
##   320                  1    0
##   327.868865966797     1    0
##   343.137268066406     1    0
##   350                  1    0
##   385.714294433594     1    0
##   400                  1    0
##   448.837219238281     1    0
##   450                  1    0
##   466.666656494141     1    0
##   500                  4    0
##   516.129028320312     1    0
##   571.428588867188     1    0
##   575                  1    0
##   588.235290527344     1    0
##   600                  1    0
##   617.9775390625       1    0
##   625                  2    0
##   642.857116699219     1    0
##   714.285705566406     1    0
##   722.222229003906     1    0
##   735.294128417969     1    0
##   800                  2    0
##   818.965515136719     1    0
##   833.333312988281     2    0
##   838.709655761719     1    0
##   900                  2    0
##   933.333312988281     1    0
##   941.176452636719     1    0
##   1000                 8    0
##   1007.14288330078     1    0
##   1080                 1    0
##   1088.88891601562     1    0
##   1111.11108398438     1    0
##   1145.83337402344     1    0
##   1200                 1    0
##   1203.125             1    0
##   1250                 4    0
##   1357.14282226562     1    0
##   1360                 1    0
##   1371.42858886719     1    0
##   1400                 1    0
##   1428.57141113281     1    0
##   1523.8095703125      1    0
##   1700                 1    0
##   1714.28576660156     1    0
##   1750                 1    0
##   1766.66662597656     1    0
##   1840                 1    0
##   1875                 1    0
##   2000                 2    0
##   2125                 1    0
##   2250                 1    0
##   2400                 1    0
##   2500                 2    0
##   2666.66674804688     2    0
##   3000                 1    0
##   3750                 1    0
##   4000                 1    0
##   4400                 1    0
##   7000                 1    0
##   10000                2    0
##   <NA>               244    0
table(db$`_2_1_responsible`, db$count, useNA='always')
##       
##        Count <NA>
##   1       12    0
##   2       70    0
##   3        8    0
##   4        1    0
##   5        5    0
##   6      119    0
##   7        2    0
##   8       63    0
##   9       70    0
##   10       4    0
##   <NA>     0    0
table(db$`__4_15_savingsstorage`, db$count, useNA='always')
##       
##        Count <NA>
##   0       42    0
##   1        4    0
##   3        2    0
##   4       37    0
##   5       56    0
##   6        9    0
##   <NA>   204    0