An informal analysis conducted on 19 Feb 2024 of whether demographics vary by call attempt using Bihar V18 round.
library(tidyverse)
library(ggplot2)
library(lubridate)
library(janitor)
library(readr)
library(table1)
library(gtsummary)
library(nnet)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
options(scipen = 999)
d1 <- read_csv("Nomination Survey V18_WIDE.csv")
d1 %>% tabyl(call_status)
call_status n percent
1 3001 0.13406299
2 1188 0.05307125
3 2393 0.10690194
4 15004 0.67027027
5 799 0.03569354
1 = picked up 2 = asked to call back later 3 = invalid/ wrong number 4 = rang, no answer 5 = DND, do not disturb
d1 %>% group_by(sno) %>% summarise(count= n())
d1 %>% group_by(sno, call_status) %>% summarise(count= n())
`summarise()` has grouped output by 'sno'. You can override using the `.groups` argument.
d1 <- d1 %>% group_by(sno) %>% mutate(count= n()) %>% ungroup()
d1 <- d1 %>% group_by(sno) %>% mutate(attempts = sum(call_status == 4, na.rm = TRUE)+ 1) %>% ungroup()
d1 %>% tabyl(attempts, count)
attempts 1 2 3 4 5 6 7 8 9
1 1176 794 1263 240 40 0 0 0 0
2 23 1168 603 356 250 36 0 0 0
3 0 4 1011 424 290 36 0 0 0
4 0 0 3 916 955 192 14 8 0
5 0 0 0 0 2480 426 21 0 0
6 0 0 0 0 7400 1230 126 24 0
7 0 0 0 0 0 666 98 16 0
8 0 0 0 0 0 0 70 0 9
9 0 0 0 0 0 0 0 8 9
d2 <- d1 %>% filter(consent == 1)
#glimpse(d2)
1
[1] 1
table1(~ age + factor(gender) + factor(caste) + factor(education) + factor(occupation) + factor(is_vhnd_RI) + factor(nominate) | factor(count), data = d1)
| 1 (N=1199) |
2 (N=1966) |
3 (N=2880) |
4 (N=1936) |
5 (N=11415) |
6 (N=2586) |
7 (N=329) |
8 (N=56) |
9 (N=18) |
Overall (N=22385) |
|
|---|---|---|---|---|---|---|---|---|---|---|
| age | ||||||||||
| Mean (SD) | 31.1 (9.67) | 31.6 (10.1) | 30.8 (9.72) | 31.7 (9.63) | 30.2 (8.33) | 31.9 (10.1) | 27.6 (5.77) | 35.0 (NA) | NA (NA) | 31.2 (9.70) |
| Median [Min, Max] | 28.0 [18.0, 73.0] | 29.0 [18.0, 75.0] | 28.0 [18.0, 70.0] | 30.0 [18.0, 63.0] | 28.0 [18.0, 69.0] | 30.0 [18.0, 70.0] | 25.0 [19.0, 35.0] | 35.0 [35.0, 35.0] | NA [NA, NA] | 29.0 [18.0, 75.0] |
| Missing | 291 (24.3%) | 1199 (61.0%) | 2380 (82.6%) | 1624 (83.9%) | 11221 (98.3%) | 2523 (97.6%) | 320 (97.3%) | 55 (98.2%) | 18 (100%) | 19631 (87.7%) |
| factor(gender) | ||||||||||
| 1 | 624 (52.0%) | 545 (27.7%) | 361 (12.5%) | 208 (10.7%) | 122 (1.1%) | 49 (1.9%) | 6 (1.8%) | 1 (1.8%) | 0 (0%) | 1916 (8.6%) |
| 2 | 280 (23.4%) | 219 (11.1%) | 135 (4.7%) | 101 (5.2%) | 71 (0.6%) | 14 (0.5%) | 3 (0.9%) | 0 (0%) | 0 (0%) | 823 (3.7%) |
| Missing | 295 (24.6%) | 1202 (61.1%) | 2384 (82.8%) | 1627 (84.0%) | 11222 (98.3%) | 2523 (97.6%) | 320 (97.3%) | 55 (98.2%) | 18 (100%) | 19646 (87.8%) |
| factor(caste) | ||||||||||
| -9999 | 4 (0.3%) | 1 (0.1%) | 2 (0.1%) | 0 (0%) | 0 (0%) | 1 (0.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 8 (0.0%) |
| -99 | 0 (0%) | 5 (0.3%) | 2 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 7 (0.0%) |
| 1 | 199 (16.6%) | 152 (7.7%) | 114 (4.0%) | 54 (2.8%) | 42 (0.4%) | 19 (0.7%) | 1 (0.3%) | 0 (0%) | 0 (0%) | 581 (2.6%) |
| 2 | 568 (47.4%) | 484 (24.6%) | 306 (10.6%) | 206 (10.6%) | 113 (1.0%) | 36 (1.4%) | 8 (2.4%) | 0 (0%) | 0 (0%) | 1721 (7.7%) |
| 3 | 133 (11.1%) | 122 (6.2%) | 72 (2.5%) | 50 (2.6%) | 38 (0.3%) | 7 (0.3%) | 0 (0%) | 1 (1.8%) | 0 (0%) | 423 (1.9%) |
| Missing | 295 (24.6%) | 1202 (61.1%) | 2384 (82.8%) | 1626 (84.0%) | 11222 (98.3%) | 2523 (97.6%) | 320 (97.3%) | 55 (98.2%) | 18 (100%) | 19645 (87.8%) |
| factor(education) | ||||||||||
| -99 | 0 (0%) | 1 (0.1%) | 1 (0.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 2 (0.0%) |
| 0 | 83 (6.9%) | 58 (3.0%) | 42 (1.5%) | 20 (1.0%) | 17 (0.1%) | 8 (0.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 228 (1.0%) |
| 1 | 129 (10.8%) | 109 (5.5%) | 62 (2.2%) | 38 (2.0%) | 29 (0.3%) | 3 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 370 (1.7%) |
| 2 | 349 (29.1%) | 286 (14.5%) | 201 (7.0%) | 121 (6.3%) | 74 (0.6%) | 25 (1.0%) | 4 (1.2%) | 0 (0%) | 0 (0%) | 1060 (4.7%) |
| 3 | 342 (28.5%) | 308 (15.7%) | 190 (6.6%) | 131 (6.8%) | 73 (0.6%) | 27 (1.0%) | 5 (1.5%) | 1 (1.8%) | 0 (0%) | 1077 (4.8%) |
| Missing | 296 (24.7%) | 1204 (61.2%) | 2384 (82.8%) | 1626 (84.0%) | 11222 (98.3%) | 2523 (97.6%) | 320 (97.3%) | 55 (98.2%) | 18 (100%) | 19648 (87.8%) |
| factor(occupation) | ||||||||||
| -9999 | 2 (0.2%) | 0 (0%) | 1 (0.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 3 (0.0%) |
| -99 | 0 (0%) | 2 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 2 (0.0%) |
| 1 | 125 (10.4%) | 88 (4.5%) | 61 (2.1%) | 32 (1.7%) | 32 (0.3%) | 10 (0.4%) | 3 (0.9%) | 1 (1.8%) | 0 (0%) | 352 (1.6%) |
| 2 | 145 (12.1%) | 116 (5.9%) | 72 (2.5%) | 48 (2.5%) | 38 (0.3%) | 12 (0.5%) | 0 (0%) | 0 (0%) | 0 (0%) | 431 (1.9%) |
| 3 | 24 (2.0%) | 16 (0.8%) | 11 (0.4%) | 9 (0.5%) | 3 (0.0%) | 3 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 66 (0.3%) |
| 4 | 15 (1.3%) | 11 (0.6%) | 9 (0.3%) | 2 (0.1%) | 0 (0%) | 0 (0%) | 1 (0.3%) | 0 (0%) | 0 (0%) | 38 (0.2%) |
| 5 | 97 (8.1%) | 75 (3.8%) | 57 (2.0%) | 29 (1.5%) | 20 (0.2%) | 5 (0.2%) | 0 (0%) | 0 (0%) | 0 (0%) | 283 (1.3%) |
| 6 | 175 (14.6%) | 140 (7.1%) | 72 (2.5%) | 47 (2.4%) | 37 (0.3%) | 10 (0.4%) | 0 (0%) | 0 (0%) | 0 (0%) | 481 (2.1%) |
| 7 | 312 (26.0%) | 295 (15.0%) | 192 (6.7%) | 117 (6.0%) | 55 (0.5%) | 22 (0.9%) | 5 (1.5%) | 0 (0%) | 0 (0%) | 998 (4.5%) |
| 8 | 3 (0.3%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 3 (0.0%) |
| 9 | 5 (0.4%) | 19 (1.0%) | 21 (0.7%) | 25 (1.3%) | 7 (0.1%) | 1 (0.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 78 (0.3%) |
| Missing | 296 (24.7%) | 1204 (61.2%) | 2384 (82.8%) | 1627 (84.0%) | 11223 (98.3%) | 2523 (97.6%) | 320 (97.3%) | 55 (98.2%) | 18 (100%) | 19650 (87.8%) |
| factor(is_vhnd_RI) | ||||||||||
| -9999 | 262 (21.9%) | 210 (10.7%) | 127 (4.4%) | 73 (3.8%) | 42 (0.4%) | 27 (1.0%) | 2 (0.6%) | 0 (0%) | 0 (0%) | 743 (3.3%) |
| 0 | 351 (29.3%) | 319 (16.2%) | 230 (8.0%) | 139 (7.2%) | 90 (0.8%) | 19 (0.7%) | 4 (1.2%) | 1 (1.8%) | 0 (0%) | 1153 (5.2%) |
| 1 | 291 (24.3%) | 233 (11.9%) | 140 (4.9%) | 98 (5.1%) | 62 (0.5%) | 16 (0.6%) | 3 (0.9%) | 0 (0%) | 0 (0%) | 843 (3.8%) |
| Missing | 295 (24.6%) | 1204 (61.2%) | 2383 (82.7%) | 1626 (84.0%) | 11221 (98.3%) | 2524 (97.6%) | 320 (97.3%) | 55 (98.2%) | 18 (100%) | 19646 (87.8%) |
| factor(nominate) | ||||||||||
| -9999 | 3 (0.3%) | 1 (0.1%) | 2 (0.1%) | 2 (0.1%) | 0 (0%) | 1 (0.0%) | 0 (0%) | 0 (0%) | 0 (0%) | 9 (0.0%) |
| 0 | 435 (36.3%) | 375 (19.1%) | 242 (8.4%) | 171 (8.8%) | 97 (0.8%) | 26 (1.0%) | 5 (1.5%) | 0 (0%) | 0 (0%) | 1351 (6.0%) |
| 1 | 470 (39.2%) | 391 (19.9%) | 256 (8.9%) | 139 (7.2%) | 97 (0.8%) | 36 (1.4%) | 4 (1.2%) | 1 (1.8%) | 0 (0%) | 1394 (6.2%) |
| Missing | 291 (24.3%) | 1199 (61.0%) | 2380 (82.6%) | 1624 (83.9%) | 11221 (98.3%) | 2523 (97.6%) | 320 (97.3%) | 55 (98.2%) | 18 (100%) | 19631 (87.7%) |
NA
d3 <- d1 %>% filter(consent == 1 & count <7) %>%
select(age, gender, caste, education, occupation, is_vhnd_RI, nominate, count, attempts)
d3 <- d3 %>% mutate_at(vars(age, gender, caste, education, occupation, is_vhnd_RI, nominate), ~ replace(., . < 0, NA))
d3 %>%
tbl_summary(by = count,
type = list(occupation ~ "categorical"),
missing = "no",
statistic = list(
all_categorical() ~ "{p}%",
all_continuous() ~ "{mean}, ({sd})"
)) %>%
add_p()
There was an error in 'add_p()/add_difference()' for variable 'occupation', p-value omitted:
Error in stats::fisher.test(c(1, 2, 6, 7, 7, 5, 2, 7, 7, 7, 7, 7, 2, 5, : FEXACT error 5.
The hash table key cannot be computed because the largest key
is larger than the largest representable int.
The algorithm cannot proceed.
Reduce the workspace, consider using 'simulate.p.value=TRUE' or another algorithm.
There was an error in 'add_p()/add_difference()' for variable 'attempts', p-value omitted:
Error in stats::fisher.test(c(1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, : FEXACT error 5.
The hash table key cannot be computed because the largest key
is larger than the largest representable int.
The algorithm cannot proceed.
Reduce the workspace, consider using 'simulate.p.value=TRUE' or another algorithm.
| Characteristic | 1, N = 9091 | 2, N = 7741 | 3, N = 5051 | 4, N = 3141 | 5, N = 1941 | 6, N = 631 | p-value2 |
|---|---|---|---|---|---|---|---|
| age | 31, (10) | 32, (10) | 31, (10) | 32, (10) | 30, (8) | 32, (10) | 0.3 |
| gender | 0.076 | ||||||
| 1 | 69% | 71% | 73% | 67% | 63% | 78% | |
| 2 | 31% | 29% | 27% | 33% | 37% | 22% | |
| caste | 0.3 | ||||||
| 1 | 22% | 20% | 23% | 17% | 22% | 31% | |
| 2 | 63% | 64% | 62% | 66% | 59% | 58% | |
| 3 | 15% | 16% | 15% | 16% | 20% | 11% | |
| education | 0.7 | ||||||
| 0 | 9.2% | 7.6% | 8.5% | 6.5% | 8.8% | 13% | |
| 1 | 14% | 14% | 13% | 12% | 15% | 4.8% | |
| 2 | 39% | 38% | 41% | 39% | 38% | 40% | |
| 3 | 38% | 40% | 38% | 42% | 38% | 43% | |
| occupation | |||||||
| 1 | 14% | 12% | 12% | 10% | 17% | 16% | |
| 2 | 16% | 15% | 15% | 16% | 20% | 19% | |
| 3 | 2.7% | 2.1% | 2.2% | 2.9% | 1.6% | 4.8% | |
| 4 | 1.7% | 1.4% | 1.8% | 0.6% | 0% | 0% | |
| 5 | 11% | 9.9% | 12% | 9.4% | 10% | 7.9% | |
| 6 | 19% | 18% | 15% | 15% | 19% | 16% | |
| 7 | 35% | 39% | 39% | 38% | 29% | 35% | |
| 8 | 0.3% | 0% | 0% | 0% | 0% | 0% | |
| 9 | 0.6% | 2.5% | 4.2% | 8.1% | 3.6% | 1.6% | |
| is_vhnd_RI | 45% | 42% | 38% | 41% | 41% | 46% | 0.3 |
| nominate | 52% | 51% | 51% | 45% | 50% | 58% | 0.3 |
| attempts | |||||||
| 1 | 100% | 41% | 12% | 1.3% | 0% | 0% | |
| 2 | 0% | 59% | 34% | 13% | 1.5% | 4.8% | |
| 3 | 0% | 0% | 54% | 29% | 12% | 7.9% | |
| 4 | 0% | 0% | 0% | 57% | 36% | 25% | |
| 5 | 0% | 0% | 0% | 0% | 50% | 27% | |
| 6 | 0% | 0% | 0% | 0% | 0% | 35% | |
| 1 Mean, (SD); % | |||||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | |||||||
d3 %>%
tbl_summary(by = attempts,
type = list(occupation ~ "categorical"),
missing = "no",
statistic = list(
all_categorical() ~ "{p}%",
all_continuous() ~ "{mean}, ({sd})"
)) %>%
add_p()
There was an error in 'add_p()/add_difference()' for variable 'caste', p-value omitted:
Error in stats::fisher.test(c(1, 2, 2, 2, 2, 3, 2, 2, 1, 2, 2, 2, 2, 2, : FEXACT error 6. LDKEY=605 is too small for this problem,
(ii := key2[itp=997] = 399847912, ldstp=18150)
Try increasing the size of the workspace and possibly 'mult'
There was an error in 'add_p()/add_difference()' for variable 'education', p-value omitted:
Error in stats::fisher.test(c(0, 2, 1, 2, 2, 3, 2, 3, 0, 1, 1, 2, 0, 2, : FEXACT error 501.
The hash table key cannot be computed because the largest key
is larger than the largest representable int.
The algorithm cannot proceed.
Reduce the workspace, consider using 'simulate.p.value=TRUE' or another algorithm.
There was an error in 'add_p()/add_difference()' for variable 'occupation', p-value omitted:
Error in stats::fisher.test(c(1, 2, 6, 7, 7, 5, 2, 7, 7, 7, 7, 7, 2, 5, : FEXACT error 5.
The hash table key cannot be computed because the largest key
is larger than the largest representable int.
The algorithm cannot proceed.
Reduce the workspace, consider using 'simulate.p.value=TRUE' or another algorithm.
Warning for variable 'is_vhnd_RI':
simpleWarning in stats::chisq.test(x = c(NA, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, NA, : Chi-squared approximation may be incorrect
There was an error in 'add_p()/add_difference()' for variable 'count', p-value omitted:
Error in stats::fisher.test(c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, : FEXACT error 5.
The hash table key cannot be computed because the largest key
is larger than the largest representable int.
The algorithm cannot proceed.
Reduce the workspace, consider using 'simulate.p.value=TRUE' or another algorithm.
| Characteristic | 1, N = 1,2941 | 2, N = 6711 | 3, N = 3941 | 4, N = 2641 | 5, N = 1141 | 6, N = 221 | p-value2 |
|---|---|---|---|---|---|---|---|
| age | 32, (10) | 31, (10) | 31, (9) | 30, (9) | 31, (9) | 30, (10) | 0.4 |
| gender | 0.3 | ||||||
| 1 | 72% | 70% | 68% | 68% | 63% | 77% | |
| 2 | 28% | 30% | 32% | 32% | 37% | 23% | |
| caste | |||||||
| 1 | 21% | 21% | 22% | 20% | 25% | 38% | |
| 2 | 63% | 65% | 61% | 67% | 54% | 48% | |
| 3 | 16% | 14% | 17% | 13% | 20% | 14% | |
| education | |||||||
| 0 | 8.5% | 8.5% | 7.4% | 7.7% | 11% | 9.1% | |
| 1 | 13% | 17% | 11% | 13% | 14% | 0% | |
| 2 | 39% | 37% | 44% | 37% | 35% | 45% | |
| 3 | 40% | 38% | 37% | 43% | 40% | 45% | |
| occupation | |||||||
| 1 | 13% | 13% | 13% | 14% | 11% | 18% | |
| 2 | 16% | 13% | 17% | 16% | 24% | 23% | |
| 3 | 2.5% | 3.0% | 1.5% | 1.9% | 2.6% | 0% | |
| 4 | 1.6% | 1.2% | 1.8% | 0.4% | 0% | 0% | |
| 5 | 11% | 9.7% | 10% | 11% | 6.1% | 4.5% | |
| 6 | 19% | 17% | 16% | 14% | 24% | 23% | |
| 7 | 37% | 40% | 35% | 35% | 27% | 32% | |
| 8 | 0.2% | 0% | 0% | 0% | 0% | 0% | |
| 9 | 0.5% | 3.5% | 6.2% | 7.0% | 5.3% | 0% | |
| is_vhnd_RI | 45% | 40% | 39% | 43% | 36% | 64% | 0.2 |
| nominate | 53% | 50% | 47% | 46% | 50% | 57% | 0.10 |
| count | |||||||
| 1 | 70% | 0% | 0% | 0% | 0% | 0% | |
| 2 | 25% | 68% | 0% | 0% | 0% | 0% | |
| 3 | 4.6% | 26% | 69% | 0% | 0% | 0% | |
| 4 | 0.3% | 6.0% | 23% | 67% | 0% | 0% | |
| 5 | 0% | 0.4% | 6.1% | 27% | 85% | 0% | |
| 6 | 0% | 0.4% | 1.3% | 6.1% | 15% | 100% | |
| 1 Mean, (SD); % | |||||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | |||||||
d3 %>% ggplot(aes(x=age, fill = factor(count))) + geom_density(alpha = 0.3) + theme_bw()
d3 %>% ggplot(aes(x=age, fill = factor(attempts))) + geom_density(alpha = 0.3) + theme_bw()
d3 %>% ggplot(aes(x= factor(count), fill= factor(occupation))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
d3 %>% ggplot(aes(x= factor(count), fill= factor(caste))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
d3 %>% ggplot(aes(x= factor(count), fill= factor(gender))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
d3 %>% ggplot(aes(x= factor(attempts), fill= factor(occupation))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
d3 %>% ggplot(aes(x= factor(attempts), fill= factor(caste))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
d3 %>% ggplot(aes(x= factor(attempts), fill= factor(gender))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
d3 %>% ggplot(aes(x= factor(attempts), fill= factor(nominate))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
#geom_text(stat = "count", aes(label = paste0(round(100 * ..count../sum(..count..)), "%")),
#position = position_fill(vjust = 0.5), color = "white")
glm( nominate ~ attempts, family = binomial(), data = d3) %>% tbl_regression(exponentiate = TRUE)
| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| attempts | 0.93 | 0.87, 0.99 | 0.022 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
glm( nominate ~ attempts, data = d3) %>% tbl_regression(exponentiate = TRUE)
| Characteristic | exp(Beta) | 95% CI1 | p-value |
|---|---|---|---|
| attempts | 0.98 | 0.97, 1.00 | 0.022 |
| 1 CI = Confidence Interval | |||
glm( is_vhnd_RI ~ attempts, family = binomial(), data = d3) %>% tbl_regression(exponentiate = TRUE)
| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| attempts | 0.95 | 0.89, 1.03 | 0.2 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
multinom(caste ~ attempts, data = d3) %>% tbl_regression(exponentiate =TRUE )
# weights: 9 (4 variable)
initial value 2982.732364
iter 10 value 2469.258613
final value 2469.258453
converged
ℹ Multinomial models have a different underlying structure than the models
gtsummary was designed for. Other gtsummary functions designed to work with
tbl_regression objects may yield unexpected results.
| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| 2 | |||
| attempts | 0.96 | 0.89, 1.04 | 0.3 |
| 3 | |||
| attempts | 0.98 | 0.88, 1.08 | 0.7 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
multinom(education ~ attempts, data = d3) %>% tbl_regression(exponentiate =TRUE )
# weights: 12 (6 variable)
initial value 3777.652134
iter 10 value 3305.330625
final value 3305.330523
converged
ℹ Multinomial models have a different underlying structure than the models
gtsummary was designed for. Other gtsummary functions designed to work with
tbl_regression objects may yield unexpected results.
| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| 1 | |||
| attempts | 0.97 | 0.85, 1.11 | 0.7 |
| 2 | |||
| attempts | 1.01 | 0.90, 1.13 | 0.9 |
| 3 | |||
| attempts | 1.01 | 0.90, 1.14 | 0.8 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
multinom(occupation ~ attempts, data = d3) %>% tbl_regression(exponentiate =TRUE )
# weights: 27 (16 variable)
initial value 5976.450850
iter 10 value 4850.548500
iter 20 value 4667.603492
iter 30 value 4661.792595
iter 40 value 4661.286125
iter 50 value 4661.259834
iter 60 value 4661.161012
final value 4661.160377
converged
ℹ Multinomial models have a different underlying structure than the models
gtsummary was designed for. Other gtsummary functions designed to work with
tbl_regression objects may yield unexpected results.
| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| 2 | |||
| attempts | 1.04 | 0.92, 1.16 | 0.5 |
| 3 | |||
| attempts | 0.89 | 0.71, 1.12 | 0.3 |
| 4 | |||
| attempts | 0.74 | 0.53, 1.03 | 0.075 |
| 5 | |||
| attempts | 0.91 | 0.80, 1.04 | 0.2 |
| 6 | |||
| attempts | 0.96 | 0.86, 1.07 | 0.5 |
| 7 | |||
| attempts | 0.95 | 0.86, 1.04 | 0.3 |
| 8 | |||
| attempts | 0.00 | 0.00, 0.00 | <0.001 |
| 9 | |||
| attempts | 1.56 | 1.31, 1.85 | <0.001 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
multinom(gender ~ attempts, data = d3) %>% tbl_regression(exponentiate =TRUE )
# weights: 3 (2 variable)
initial value 1891.598656
final value 1666.193203
converged
ℹ Multinomial models have a different underlying structure than the models
gtsummary was designed for. Other gtsummary functions designed to work with
tbl_regression objects may yield unexpected results.
| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| 1 | |||
| attempts | 1.07 | 1.00, 1.14 | 0.047 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
NA
NA
1 “Not in work force/no occupation” 2 “Agricultural / Non-agricultural labor” 3 “Government service/ Elected Official” 4 “Private Doctor, Lawyer, Accountant” 5 “Own business” 6 “Services/household and domestic” 7 “Manual - skilled and unskilled” 8 “Other” 9 “Homemaker” Looks like for occupation. An increase in odds for someone to be a homemaker. And to be “other”
1 “Male” 2 “Female” 888 “other” -99 “Refused to answer”.
Slightly more likely to be women the more call attempts.
Slightly less likely to give nominations based on more call attempts.
RUF_Survey_2021_WIDE (2)
b1 <- read_csv("RUF_Survey_2021_WIDE (2).csv")
Warning: One or more parsing issues, call `problems()` on your data frame for details, e.g.:
dat <- vroom(...)
problems(dat)Rows: 2248 Columns: 181── Column specification ──────────────────────────────────────────────────────────────
Delimiter: ","
chr (45): SubmissionDate, starttime, endtime, deviceid, childuid_biharhealthregis...
dbl (129): subscriberid, simid, devicephonenum, duration, sno, contact_number, cal...
lgl (7): sms_vaccine, days_since_sms, response_exotel, not_verify_respondent_oth...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
b1 %>% tabyl(sno, call_status)
b1 %>% tabyl(call_status)
label define call_status 1 "Picked Up" 2 "Ringing but no answer" 3 "Cut after a few beeps" 4 "Number unreachable/switched off (Audio Voice Recording)" 5 "Invalid number (AVR)" 6 "Phone not in use (AVR)" 7 "Wrong number (AVR)" 8 "No incoming call facility on this number (AVR)" 9 "Respondent asked to call later" 10 "Do not disturb (DND)" 888 "other [specify]"
b1 %>% group_by(sno) %>% summarise(count= n())
b1 %>% group_by(sno, call_status) %>% summarise(count= n())
`summarise()` has grouped output by 'sno'. You can override using the `.groups` argument.
b1 <- b1 %>% group_by(sno) %>% mutate(count= n()) %>% ungroup()
b1 <- b1 %>% group_by(sno) %>% mutate(attempts = sum(call_status == 4, na.rm = TRUE)+ 1) %>% ungroup()
b1 %>% tabyl(attempts, count)
attempts 1 2 3 4 5 6 7
1 354 296 180 480 50 6 7
2 15 64 87 140 25 12 0
3 0 0 57 84 15 0 0
4 0 0 30 84 25 6 0
5 0 0 0 196 5 0 0
6 0 0 0 0 0 12 0
7 0 0 0 0 0 18 0
b2 <- b1 %>% filter(consent == 1)
b3 <- b2 %>% filter(count <7) %>%
select(is_mother, get_mother, relmother, same_hh, yourphone, literacy_resp, sms_rec, count, attempts)
b3 <- b3 %>% mutate_at(vars(is_mother, get_mother, relmother, same_hh, yourphone, literacy_resp, sms_rec, count, attempts), ~ replace(., . < 0, NA))
b3 %>%
tbl_summary(by = count,
#type = list(occupation ~ "categorical"),
missing = "no",
statistic = list(
all_categorical() ~ "{p}%",
all_continuous() ~ "{mean}, ({sd})"
)) %>%
add_p()
There was an error in 'add_p()/add_difference()' for variable 'yourphone', p-value omitted:
Error in stats::fisher.test(c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, : 'x' and 'y' must have at least 2 levels
There was an error in 'add_p()/add_difference()' for variable 'attempts', p-value omitted:
Error in stats::fisher.test(c(1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, : FEXACT error 7(location). LDSTP=18630 is too small for this problem,
(pastp=20.355, ipn_0:=ipoin[itp=161]=18629, stp[ipn_0]=22.3897).
Increase workspace or consider using 'simulate.p.value=TRUE'
| Characteristic | 1, N = 1241 | 2, N = 501 | 3, N = 291 | 4, N = 181 | 5, N = 61 | 6, N = 11 | p-value2 |
|---|---|---|---|---|---|---|---|
| is_mother | 59% | 62% | 66% | 83% | 50% | 0% | 0.3 |
| get_mother | 98% | 100% | 100% | 100% | 67% | 100% | 0.2 |
| relmother | >0.9 | ||||||
| 4 | 100% | NA% | NA% | NA% | 0% | NA% | |
| 5 | 0% | NA% | NA% | NA% | 100% | NA% | |
| same_hh | 100% | NA% | NA% | NA% | 0% | NA% | >0.9 |
| yourphone | 0% | NA% | NA% | NA% | 0% | NA% | |
| literacy_resp | 0.8 | ||||||
| 0 | 25% | 30% | 34% | 33% | 17% | 100% | |
| 1 | 73% | 68% | 66% | 67% | 83% | 0% | |
| 2 | 2.4% | 2.0% | 0% | 0% | 0% | 0% | |
| sms_rec | 0.6 | ||||||
| 0 | 17% | 26% | 33% | 29% | 40% | NA% | |
| 1 | 63% | 52% | 58% | 43% | 40% | NA% | |
| 2 | 19% | 22% | 8.3% | 14% | 20% | NA% | |
| 888 | 1.6% | 0% | 0% | 14% | 0% | NA% | |
| attempts | |||||||
| 1 | 100% | 80% | 59% | 78% | 83% | 0% | |
| 2 | 0% | 20% | 28% | 11% | 0% | 100% | |
| 3 | 0% | 0% | 14% | 5.6% | 0% | 0% | |
| 4 | 0% | 0% | 0% | 5.6% | 17% | 0% | |
| 1 % | |||||||
| 2 Fisher’s exact test | |||||||
d3 %>%
tbl_summary(by = attempts,
type = list(occupation ~ "categorical"),
missing = "no",
statistic = list(
all_categorical() ~ "{p}%",
all_continuous() ~ "{mean}, ({sd})"
)) %>%
add_p()
There was an error in 'add_p()/add_difference()' for variable 'caste', p-value omitted:
Error in stats::fisher.test(c(1, 2, 2, 2, 2, 3, 2, 2, 1, 2, 2, 2, 2, 2, : FEXACT error 6. LDKEY=605 is too small for this problem,
(ii := key2[itp=997] = 399847912, ldstp=18150)
Try increasing the size of the workspace and possibly 'mult'
There was an error in 'add_p()/add_difference()' for variable 'education', p-value omitted:
Error in stats::fisher.test(c(0, 2, 1, 2, 2, 3, 2, 3, 0, 1, 1, 2, 0, 2, : FEXACT error 501.
The hash table key cannot be computed because the largest key
is larger than the largest representable int.
The algorithm cannot proceed.
Reduce the workspace, consider using 'simulate.p.value=TRUE' or another algorithm.
There was an error in 'add_p()/add_difference()' for variable 'occupation', p-value omitted:
Error in stats::fisher.test(c(1, 2, 6, 7, 7, 5, 2, 7, 7, 7, 7, 7, 2, 5, : FEXACT error 5.
The hash table key cannot be computed because the largest key
is larger than the largest representable int.
The algorithm cannot proceed.
Reduce the workspace, consider using 'simulate.p.value=TRUE' or another algorithm.
Warning for variable 'is_vhnd_RI':
simpleWarning in stats::chisq.test(x = c(NA, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, NA, : Chi-squared approximation may be incorrect
There was an error in 'add_p()/add_difference()' for variable 'count', p-value omitted:
Error in stats::fisher.test(c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, : FEXACT error 5.
The hash table key cannot be computed because the largest key
is larger than the largest representable int.
The algorithm cannot proceed.
Reduce the workspace, consider using 'simulate.p.value=TRUE' or another algorithm.
| Characteristic | 1, N = 1,2941 | 2, N = 6711 | 3, N = 3941 | 4, N = 2641 | 5, N = 1141 | 6, N = 221 | p-value2 |
|---|---|---|---|---|---|---|---|
| age | 32, (10) | 31, (10) | 31, (9) | 30, (9) | 31, (9) | 30, (10) | 0.4 |
| gender | 0.3 | ||||||
| 1 | 72% | 70% | 68% | 68% | 63% | 77% | |
| 2 | 28% | 30% | 32% | 32% | 37% | 23% | |
| caste | |||||||
| 1 | 21% | 21% | 22% | 20% | 25% | 38% | |
| 2 | 63% | 65% | 61% | 67% | 54% | 48% | |
| 3 | 16% | 14% | 17% | 13% | 20% | 14% | |
| education | |||||||
| 0 | 8.5% | 8.5% | 7.4% | 7.7% | 11% | 9.1% | |
| 1 | 13% | 17% | 11% | 13% | 14% | 0% | |
| 2 | 39% | 37% | 44% | 37% | 35% | 45% | |
| 3 | 40% | 38% | 37% | 43% | 40% | 45% | |
| occupation | |||||||
| 1 | 13% | 13% | 13% | 14% | 11% | 18% | |
| 2 | 16% | 13% | 17% | 16% | 24% | 23% | |
| 3 | 2.5% | 3.0% | 1.5% | 1.9% | 2.6% | 0% | |
| 4 | 1.6% | 1.2% | 1.8% | 0.4% | 0% | 0% | |
| 5 | 11% | 9.7% | 10% | 11% | 6.1% | 4.5% | |
| 6 | 19% | 17% | 16% | 14% | 24% | 23% | |
| 7 | 37% | 40% | 35% | 35% | 27% | 32% | |
| 8 | 0.2% | 0% | 0% | 0% | 0% | 0% | |
| 9 | 0.5% | 3.5% | 6.2% | 7.0% | 5.3% | 0% | |
| is_vhnd_RI | 45% | 40% | 39% | 43% | 36% | 64% | 0.2 |
| nominate | 53% | 50% | 47% | 46% | 50% | 57% | 0.10 |
| count | |||||||
| 1 | 70% | 0% | 0% | 0% | 0% | 0% | |
| 2 | 25% | 68% | 0% | 0% | 0% | 0% | |
| 3 | 4.6% | 26% | 69% | 0% | 0% | 0% | |
| 4 | 0.3% | 6.0% | 23% | 67% | 0% | 0% | |
| 5 | 0% | 0.4% | 6.1% | 27% | 85% | 0% | |
| 6 | 0% | 0.4% | 1.3% | 6.1% | 15% | 100% | |
| 1 Mean, (SD); % | |||||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | |||||||
d3 %>% ggplot(aes(x=age, fill = factor(count))) + geom_density(alpha = 0.3) + theme_bw()
d3 %>% ggplot(aes(x=age, fill = factor(attempts))) + geom_density(alpha = 0.3) + theme_bw()
d3 %>% ggplot(aes(x= factor(count), fill= factor(occupation))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
m1 <- read_csv("Maha Child RUF Survey Round 2_WIDE.csv")
Rows: 2052 Columns: 117── Column specification ──────────────────────────────────────────────────────────────
Delimiter: ","
chr (35): SubmissionDate, starttime, endtime, deviceid, contact_name, mother_name,...
dbl (78): subscriberid, simid, devicephonenum, duration, sno, child_rchid, contact...
lgl (4): sms_vaccine, days_since_sms, village_enrollee, response_exotel
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
m1 %>% tabyl(call_status)
call_status n percent
1 821 0.40009747
2 35 0.01705653
3 173 0.08430799
4 902 0.43957115
5 121 0.05896686
1 “Picked Up” 2 “Respondent asked to call later” 3 “Invalid number / Wrong number” 4 “Number unreachable/switched off/ Ringing” 5 “Do not disturb (DND)”
m1 %>% group_by(sno) %>% summarise(count= n())
m1 %>% group_by(sno, call_status) %>% summarise(count= n())
`summarise()` has grouped output by 'sno'. You can override using the `.groups` argument.
m1 <- m1 %>% group_by(sno) %>% mutate(count= n()) %>% ungroup()
m1 <- m1 %>% group_by(sno) %>% mutate(attempts = sum(call_status == 4, na.rm = TRUE)+ 1) %>% ungroup()
m1 %>% tabyl(attempts, count)
attempts 1 2 3 4 5 6
1 897 90 9 0 0 0
2 173 158 18 4 0 0
3 0 130 90 4 0 0
4 0 0 42 92 0 0
5 0 0 0 120 20 0
6 0 0 0 0 175 6
7 0 0 0 0 0 24
m2 <- m1 %>% filter(consent == 1)
m3 <- m2 %>% filter(count <5) %>%
select(is_mother, whosephone, child_sex, verify_dob, vaccinated, age, education, caste, isuseful, counterfact, literacy_resp, sms_rec, count, attempts)
m3 <- m3 %>% mutate_at(vars(is_mother, whosephone, child_sex, verify_dob, vaccinated, age, education, caste, isuseful, counterfact, literacy_resp, sms_rec, count, attempts), ~ replace(., . < 0, NA)) %>%
mutate_at(vars(is_mother, whosephone, child_sex, verify_dob, vaccinated, age, education, caste, isuseful, counterfact, literacy_resp, sms_rec, count, attempts), ~ replace(., . > 100, NA))
m3 %>%
tbl_summary(by = count,
type = list(whosephone ~ "categorical"),
missing = "no",
statistic = list(
all_categorical() ~ "{p}%",
all_continuous() ~ "{mean}, ({sd})"
)) %>%
add_p()
There was an error in 'add_p()/add_difference()' for variable 'whosephone', p-value omitted:
Error in stats::fisher.test(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, : FEXACT[f3xact()] error: hash key 3e+09 > INT_MAX, kyy=549, it[i (= nco = 5)]= -2114443664.
Rather set 'simulate.p.value=TRUE'
There was an error in 'add_p()/add_difference()' for variable 'education', p-value omitted:
Error in stats::fisher.test(c(1, 3, 2, 3, 2, 3, 2, 2, 3, 1, 2, 2, 2, 2, : FEXACT error 6. LDKEY=618 is too small for this problem,
(ii := key2[itp=672] = 57907270, ldstp=18540)
Try increasing the size of the workspace and possibly 'mult'
There was an error in 'add_p()/add_difference()' for variable 'attempts', p-value omitted:
Error in stats::fisher.test(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, : FEXACT error 6 (f5xact). LDKEY=618 is too small for this problem: kval=10739405.
Try increasing the size of the workspace.
| Characteristic | 1, N = 5511 | 2, N = 951 | 3, N = 271 | 4, N = 181 | p-value2 |
|---|---|---|---|---|---|
| is_mother | 32% | 41% | 44% | 56% | 0.035 |
| whosephone | |||||
| 0 | 47% | 36% | 50% | 39% | |
| 1 | 48% | 54% | 38% | 61% | |
| 2 | 0.5% | 3.2% | 0% | 0% | |
| 3 | 2.4% | 1.1% | 0% | 0% | |
| 4 | 1.1% | 2.1% | 7.7% | 0% | |
| 5 | 0.4% | 1.1% | 0% | 0% | |
| 6 | 0.2% | 0% | 0% | 0% | |
| 7 | 0.4% | 1.1% | 0% | 0% | |
| 8 | 0.2% | 1.1% | 3.8% | 0% | |
| 9 | 0% | 1.1% | 0% | 0% | |
| child_sex | 0.8 | ||||
| 1 | 52% | 56% | 54% | 44% | |
| 2 | 48% | 44% | 46% | 56% | |
| verify_dob | 84% | 82% | 89% | 78% | 0.7 |
| vaccinated | 93% | 96% | 96% | 100% | 0.8 |
| age | 27.7, (6.1) | 27.5, (6.6) | 26.6, (6.2) | 26.5, (4.6) | 0.4 |
| education | |||||
| 0 | 1.8% | 2.3% | 4.0% | 0% | |
| 1 | 8.6% | 9.3% | 4.0% | 11% | |
| 2 | 38% | 43% | 36% | 22% | |
| 3 | 52% | 45% | 56% | 67% | |
| caste | 0.15 | ||||
| 1 | 29% | 33% | 39% | 44% | |
| 2 | 24% | 25% | 39% | 11% | |
| 3 | 47% | 42% | 22% | 44% | |
| isuseful | 0.027 | ||||
| 1 | 56% | 54% | 38% | 50% | |
| 2 | 42% | 41% | 25% | 50% | |
| 3 | 1.1% | 2.7% | 25% | 0% | |
| 4 | 1.1% | 2.7% | 13% | 0% | |
| counterfact | 0.2 | ||||
| 1 | 53% | 70% | 63% | 50% | |
| 2 | 34% | 16% | 25% | 30% | |
| 3 | 12% | 14% | 0% | 20% | |
| 4 | 1.1% | 0% | 13% | 0% | |
| literacy_resp | 60% | 60% | 50% | 50% | >0.9 |
| sms_rec | 48% | 35% | 26% | 56% | 0.014 |
| attempts | |||||
| 1 | 100% | 47% | 15% | 0% | |
| 2 | 0% | 53% | 26% | 5.6% | |
| 3 | 0% | 0% | 59% | 11% | |
| 4 | 0% | 0% | 0% | 83% | |
| 1 %; Mean, (SD) | |||||
| 2 Pearson’s Chi-squared test; Fisher’s exact test; Kruskal-Wallis rank sum test | |||||
label define sms_rec 1 "Yes" 0 "No"
caste 1 "Scheduled Caste/Scheduled Tribe /Nomadic Tribe(SC/ST/NT)" 2 "Backward Caste (OBC/BC/MBC)" 3 "Forward Caste (FC/OC)"
isuseful 1 "Very helpful" 2 "helpful" 3 "not helpful" 4 "Not helpful at all"
label values isuseful isuseful
counterfact 1 "Yes definitely" 2 "Yes probably" 3 "No probably not" 4 "No definitely not"
m3 %>% ggplot(aes(x=age, fill = factor(count))) + geom_density(alpha = 0.3) + theme_bw()
m3 %>% ggplot(aes(x= factor(count), fill= factor(isuseful))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
m3 %>% ggplot(aes(x= factor(count), fill= factor(sms_rec))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
m3 %>% ggplot(aes(x= factor(count), fill= factor(education))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
m3 %>% ggplot(aes(x= factor(count), fill= factor(is_mother))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
m3 %>% ggplot(aes(x= factor(count), fill= factor(whosephone))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
glm( is_mother ~ attempts, family = binomial(), data = m3) %>% tbl_regression(exponentiate = TRUE)
| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| attempts | 1.32 | 1.02, 1.71 | 0.034 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
glm( is_mother ~ attempts, data = m3) %>% tbl_regression()
| Characteristic | Beta | 95% CI1 | p-value |
|---|---|---|---|
| attempts | 0.07 | 0.01, 0.13 | 0.031 |
| 1 CI = Confidence Interval | |||
NA
NA
glm( counterfact ~ attempts, data = m3) %>% tbl_regression()
| Characteristic | Beta | 95% CI1 | p-value |
|---|---|---|---|
| attempts | -0.01 | -0.15, 0.12 | 0.8 |
| 1 CI = Confidence Interval | |||
glm( isuseful ~ attempts, data = m3) %>% tbl_regression()
| Characteristic | Beta | 95% CI1 | p-value |
|---|---|---|---|
| attempts | 0.08 | -0.03, 0.19 | 0.14 |
| 1 CI = Confidence Interval | |||
NA
NA
NA
glm( sms_rec ~ attempts, family = binomial(), data = m3) %>% tbl_regression(exponentiate = TRUE)
| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| attempts | 0.93 | 0.72, 1.20 | 0.6 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
multinom(caste ~ attempts, data = m3) %>% tbl_regression(exponentiate =TRUE )
# weights: 9 (4 variable)
initial value 700.914640
final value 676.945299
converged
ℹ Multinomial models have a different underlying structure than the models
gtsummary was designed for. Other gtsummary functions designed to work with
tbl_regression objects may yield unexpected results.
| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| 2 | |||
| attempts | 0.75 | 0.52, 1.06 | 0.10 |
| 3 | |||
| attempts | 0.72 | 0.54, 0.97 | 0.033 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
d1 %>% group_by(consent, count) %>% summarise(time = sum(duration))
`summarise()` has grouped output by 'consent'. You can override using the `.groups` argument.
d1 %>% group_by(sno) %>% mutate(count= n()) %>% ungroup()
NA
| Sum of time | Column Labels | |||||||||
| Row Labels | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Grand Total |
| Group1 | 2% | 4% | 5% | 4% | 23% | 6% | 1% | 0% | 0% | 45% |
| 1 | 20% | 15% | 10% | 6% | 3% | 1% | 0% | 0% | 0% | 55% |
| Grand Total | 22% | 19% | 15% | 10% | 27% | 7% | 1% | 0% | 0% | 100% |
| Sum of time | Column Labels | |||||||||
| Row Labels | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Grand Total |
| Group1 | 7% | 19% | 36% | 41% | 87% | 83% | 84% | 84% | 100% | 45% |
| 1 | 93% | 81% | 64% | 59% | 13% | 17% | 16% | 16% | 0% | 55% |
| Grand Total | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% |
| Sum of time | Column Labels | |||||||||
| Row Labels | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Grand Total |
| Group1 | 4% | 8% | 12% | 9% | 53% | 13% | 2% | 0% | 0% | 100% |
| 1 | 37% | 27% | 17% | 10% | 6% | 2% | 0% | 0% | 0% | 100% |
| Grand Total | 22% | 19% | 15% | 10% | 27% | 7% | 1% | 0% | 0% | 100% |
m1 <- m1 %>% group_by(sno) %>% mutate(attempts = sum(call_status == 4, na.rm = TRUE)+ 1) %>% ungroup()
m1 %>% tabyl(attempts, count)
attempts 1 2 3 4 5 6
1 897 90 9 0 0 0
2 173 158 18 4 0 0
3 0 130 90 4 0 0
4 0 0 42 92 0 0
5 0 0 0 120 20 0
6 0 0 0 0 175 6
7 0 0 0 0 0 24
d1 <- d1 %>% mutate(starttime = mdy_hms(starttime))
d1 <- d1 %>% group_by(sno) %>% mutate(minstart = min(starttime)) %>% ungroup()
d1 %>% mutate(firstcall = minstart==starttime) %>% filter(firstcall == TRUE) %>% tabyl(call_status)
call_status n percent
1 1026 0.16041276
2 419 0.06550969
3 706 0.11038149
4 3506 0.54815510
5 739 0.11554096
d1$starttime %>% hour() %>% tabyl()
. n percent
7 9 0.0004020549
8 136 0.0060754970
9 3462 0.1546571365
10 2835 0.1266473085
11 2587 0.1155684610
12 3361 0.1501451865
13 730 0.0326111235
14 2824 0.1261559080
15 2628 0.1174000447
16 3360 0.1501005137
17 235 0.0104981014
18 104 0.0046459683
19 79 0.0035291490
20 23 0.0010274738
21 12 0.0005360733
d1 <- d1 %>% mutate(
hour = hour(starttime),
minute = minute(starttime)
)
d1 <- d1 %>% mutate(
time_number = round(hour + minute/60,2))
tabyl(d1$minute)
d1$minute n percent
0 236 0.01054277
1 266 0.01188296
2 230 0.01027474
3 264 0.01179361
4 240 0.01072147
5 250 0.01116819
6 296 0.01322314
7 325 0.01451865
8 340 0.01518874
9 357 0.01594818
10 378 0.01688631
11 406 0.01813715
12 390 0.01742238
13 404 0.01804780
14 393 0.01755640
15 417 0.01862855
16 417 0.01862855
17 426 0.01903060
18 412 0.01840518
19 414 0.01849453
20 429 0.01916462
21 429 0.01916462
22 417 0.01862855
23 408 0.01822649
24 430 0.01920929
25 432 0.01929864
26 422 0.01885191
27 403 0.01800313
28 426 0.01903060
29 410 0.01831584
30 389 0.01737771
31 413 0.01844985
32 409 0.01827116
33 368 0.01643958
34 413 0.01844985
35 388 0.01733304
36 400 0.01786911
37 395 0.01764574
38 419 0.01871789
39 421 0.01880724
40 362 0.01617154
41 405 0.01809247
42 426 0.01903060
43 422 0.01885191
44 410 0.01831584
45 402 0.01795845
46 379 0.01693098
47 407 0.01818182
48 412 0.01840518
49 399 0.01782444
50 373 0.01666294
51 359 0.01603753
52 374 0.01670762
53 362 0.01617154
54 347 0.01550145
55 325 0.01451865
56 316 0.01411660
57 280 0.01250838
58 282 0.01259772
59 261 0.01165959
library(ggridges)
Warning: package ‘ggridges’ was built under R version 4.2.2
d1 %>% filter(hour > 7 & hour <18) %>% ggplot(aes(x= time_number, y= fo_name, fill=fo_name)) + geom_density_ridges(alpha = 0.3, adjust= 0.5) + theme_bw()
Warning: Ignoring unknown parameters: `adjust`
d1 %>% filter(hour > 7 & hour <18) %>% ggplot(aes(x= time_number, fill= fo_name)) + geom_density(alpha = 0.3, adjust= 0.3) + theme_bw() +facet_wrap(vars(fo_name), ncol=3) + theme(legend.position = "none")
d4 <- d1 %>% filter(consent == 1) %>% filter(hour > 7 & hour <20) %>% select(hour, age, caste, gender, occupation, education ) %>% mutate_at(vars(hour, age, caste, gender, occupation, education), ~ replace(., . < 0, NA))
d4 %>% tabyl(hour)
hour n percent
8 27 0.009789703
9 439 0.159173314
10 390 0.141406817
11 339 0.122915156
12 362 0.131254532
13 97 0.035170413
14 381 0.138143582
15 346 0.125453227
16 318 0.115300943
17 25 0.009064540
18 15 0.005438724
19 19 0.006889050
d4 %>% ggplot(aes(x=age, fill = factor(hour))) + geom_density(alpha = 0.3) + theme_bw()
d4 %>% ggplot(aes(x= factor(hour), fill= factor(gender))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
d4 %>% ggplot(aes(x= factor(hour), fill= factor(caste))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
d4 %>% ggplot(aes(x= factor(hour), fill= factor(occupation))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
d4 %>% ggplot(aes(x= factor(hour), fill= factor(education))) + geom_bar(position= "fill") + theme_bw() + coord_flip()
NA
NA
NA
NA
NA
fisher_sim_p <- function(data, variable, by, ...) {
result <- list()
result$p <- stats::fisher.test(x = data %>% pull({{variable}}), y = data %>% pull({{by}}), simulate.p.value = T)$p.value
result$test <- "Fisher's test with simulated p-value"
result
}
d4 %>%
tbl_summary(by = hour,
# type = list(whosephone ~ "categorical"),
missing = "no",
statistic = list(
all_categorical() ~ "{p}%",
all_continuous() ~ "{mean}, ({sd})"
)) %>%
add_p(test = list(
c("caste", "education", "gender", "occupation") ~ "fisher_sim_p" # can also use helper functions instead of character vector of columns - e.g., all_categorical() ~ ...
)
)
| Characteristic | 8, N = 271 | 9, N = 4391 | 10, N = 3901 | 11, N = 3391 | 12, N = 3621 | 13, N = 971 | 14, N = 3811 | 15, N = 3461 | 16, N = 3181 | 17, N = 251 | 18, N = 151 | 19, N = 191 | p-value2 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| age | 30, (9) | 32, (10) | 32, (10) | 31, (10) | 31, (10) | 31, (9) | 31, (9) | 31, (10) | 30, (9) | 30, (7) | 33, (10) | 31, (9) | >0.9 |
| caste | 0.5 | ||||||||||||
| 1 | 15% | 25% | 18% | 23% | 18% | 27% | 22% | 20% | 23% | 13% | 27% | 26% | |
| 2 | 73% | 61% | 68% | 59% | 66% | 63% | 61% | 65% | 62% | 63% | 53% | 63% | |
| 3 | 12% | 14% | 14% | 18% | 17% | 10% | 17% | 15% | 16% | 25% | 20% | 11% | |
| gender | 0.14 | ||||||||||||
| 1 | 81% | 70% | 68% | 69% | 69% | 80% | 71% | 65% | 71% | 76% | 80% | 89% | |
| 2 | 19% | 30% | 32% | 31% | 31% | 20% | 29% | 35% | 29% | 24% | 20% | 11% | |
| occupation | 0.068 | ||||||||||||
| 1 | 11% | 15% | 13% | 14% | 11% | 9.4% | 13% | 12% | 14% | 8.0% | 13% | 11% | |
| 2 | 3.7% | 17% | 17% | 18% | 16% | 16% | 12% | 15% | 17% | 12% | 20% | 26% | |
| 3 | 7.4% | 5.0% | 1.6% | 0.9% | 2.0% | 1.0% | 2.1% | 2.9% | 1.6% | 8.0% | 0% | 0% | |
| 4 | 0% | 0.9% | 1.0% | 1.2% | 1.1% | 2.1% | 3.2% | 0.9% | 1.6% | 0% | 0% | 0% | |
| 5 | 19% | 7.3% | 10% | 12% | 11% | 7.3% | 12% | 10% | 9.4% | 8.0% | 20% | 5.3% | |
| 6 | 11% | 15% | 16% | 21% | 17% | 9.4% | 19% | 21% | 18% | 16% | 0% | 11% | |
| 7 | 44% | 36% | 38% | 32% | 37% | 51% | 35% | 35% | 35% | 48% | 47% | 47% | |
| 8 | 0% | 0% | 0.5% | 0.3% | 0% | 0% | 0% | 0% | 0% | 0% | 0% | 0% | |
| 9 | 3.7% | 3.4% | 3.4% | 0.6% | 3.6% | 4.2% | 2.7% | 2.9% | 3.2% | 0% | 0% | 0% | |
| education | 0.4 | ||||||||||||
| 0 | 11% | 9.4% | 6.5% | 9.0% | 8.7% | 7.3% | 7.5% | 11% | 6.4% | 8.0% | 13% | 5.3% | |
| 1 | 11% | 12% | 15% | 14% | 15% | 16% | 11% | 15% | 13% | 0% | 20% | 16% | |
| 2 | 26% | 40% | 42% | 36% | 35% | 45% | 38% | 39% | 38% | 56% | 33% | 47% | |
| 3 | 52% | 38% | 37% | 41% | 42% | 32% | 43% | 35% | 43% | 36% | 33% | 32% | |
| 1 Mean, (SD); % | |||||||||||||
| 2 Kruskal-Wallis rank sum test; Fisher’s test with simulated p-value | |||||||||||||
NA
NA
caste 1 “Scheduled Caste/Scheduled Tribe /Nomadic Tribe(SC/ST/NT)” 2 “Backward Caste (OBC/BC/MBC)” 3 “Forward Caste (FC/OC)”
1 “Not in work force/no occupation” 2 “Agricultural / Non-agricultural labor” 3 “Government service/ Elected Official” 4 “Private Doctor, Lawyer, Accountant” 5 “Own business” 6 “Services/household and domestic” 7 “Manual - skilled and unskilled” 8 “Other” 9 “Homemaker” Looks like for occupation. An increase in odds for someone to be a homemaker. And to be “other”
1 “Male” 2 “Female” 888 “other” -99 “Refused to answer”.