Compare Groups

compareGroups (without considering weight)

setwd("C:/Users/mvx13/OneDrive - Texas State University/Papers/2025/00_NewPapers/Survey/PewSurvey/W152/Arka/V3")
library(readxl)
library(ggplot2)
library(dplyr)
library(compareGroups)
library(vtree)
library(DT)

df= read.csv("ATP W152 Data_cleanedFin_AC.csv")
df1= df[, c(1:13, 15)]
res1 <- compareGroups(DrCell ~ ., df1, ref = 1, max.ylev = 50,
                      max.xlev = 50,  chisq.test.perm=TRUE, chisq.test.B=10000)
res2= createTable(res1, show.ratio = TRUE)
res2
## 
## --------Summary descriptives table by 'DrCell'---------
## 
## _________________________________________________________________________________________________ 
##                                   Major problem Minor problem No Response Not a problem p.overall 
##                                      N=4253         N=996        N=13         N=148               
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## Area:                                                                                     0.114   
##     Metropolitan                  3746 (88.1%)   855 (85.8%)  12 (92.3%)   123 (83.1%)            
##     No Response                     1 (0.02%)     2 (0.20%)    0 (0.00%)    0 (0.00%)             
##     Non-metropolitan               506 (11.9%)   139 (14.0%)   1 (7.69%)   25 (16.9%)             
## Regn:                                                                                     0.064   
##     Midwest                        857 (20.2%)   198 (19.9%)   3 (23.1%)   31 (20.9%)             
##     No Response                     1 (0.02%)     2 (0.20%)    0 (0.00%)    0 (0.00%)             
##     Northeast                      703 (16.5%)   193 (19.4%)   5 (38.5%)   34 (23.0%)             
##     South                         1663 (39.1%)   355 (35.6%)   4 (30.8%)   54 (36.5%)             
##     West                          1029 (24.2%)   248 (24.9%)   1 (7.69%)   29 (19.6%)             
## Land:                                                                                     0.101   
##     Refused                        44 (1.03%)    10 (1.00%)    0 (0.00%)    3 (2.03%)             
##     Rural                          948 (22.3%)   237 (23.8%)   3 (23.1%)   47 (31.8%)             
##     Suburban                      2224 (52.3%)   527 (52.9%)   5 (38.5%)   60 (40.5%)             
##     Urban                         1037 (24.4%)   222 (22.3%)   5 (38.5%)   38 (25.7%)             
## Age:                                                                                      0.001   
##     18-29                          575 (13.5%)   152 (15.3%)   4 (30.8%)   32 (21.6%)             
##     30-49                         1491 (35.1%)   330 (33.1%)   3 (23.1%)   59 (39.9%)             
##     50-64                         1103 (25.9%)   208 (20.9%)   3 (23.1%)   28 (18.9%)             
##     65+                           1062 (25.0%)   303 (30.4%)   2 (15.4%)   28 (18.9%)             
##     Refused                        22 (0.52%)     3 (0.30%)    1 (7.69%)    1 (0.68%)             
## Gender:                                                                                  <0.001   
##     A man                         2089 (49.1%)   527 (52.9%)   5 (38.5%)   68 (45.9%)             
##     A woman                       2127 (50.0%)   457 (45.9%)   6 (46.2%)   77 (52.0%)             
##     In some other way              22 (0.52%)     7 (0.70%)    0 (0.00%)    2 (1.35%)             
##     Refused                        15 (0.35%)     5 (0.50%)    2 (15.4%)    1 (0.68%)             
## Edu:                                                                                      0.001   
##     College graduate+             1805 (42.4%)   458 (46.0%)   5 (38.5%)   41 (27.7%)             
##     H.S. graduate or less         1116 (26.2%)   271 (27.2%)   5 (38.5%)   66 (44.6%)             
##     Refused                         9 (0.21%)     3 (0.30%)    1 (7.69%)    0 (0.00%)             
##     Some College                  1323 (31.1%)   264 (26.5%)   2 (15.4%)   41 (27.7%)             
## Race:                                                                                     0.124   
##     Cuban                          45 (1.06%)     5 (0.50%)    0 (0.00%)    2 (1.35%)             
##     Dominican                      19 (0.45%)     7 (0.70%)    0 (0.00%)    3 (2.03%)             
##     Mexican                        363 (8.54%)   56 (5.62%)    3 (23.1%)   13 (8.78%)             
##     No Response                   3570 (83.9%)   877 (88.1%)  10 (76.9%)   115 (77.7%)            
##     Other Central American         36 (0.85%)     5 (0.50%)    0 (0.00%)    1 (0.68%)             
##     Other country                  18 (0.42%)     4 (0.40%)    0 (0.00%)    0 (0.00%)             
##     Other South American           74 (1.74%)    13 (1.31%)    0 (0.00%)    3 (2.03%)             
##     Puerto Rican                   62 (1.46%)    10 (1.00%)    0 (0.00%)    7 (4.73%)             
##     Refused                        12 (0.28%)     4 (0.40%)    0 (0.00%)    1 (0.68%)             
##     Salvadoran                     20 (0.47%)     4 (0.40%)    0 (0.00%)    1 (0.68%)             
##     Spanish                        34 (0.80%)    11 (1.10%)    0 (0.00%)    2 (1.35%)             
## Born:                                                                                     0.215   
##     Foreign                        699 (16.4%)   156 (15.7%)   4 (30.8%)   23 (15.5%)             
##     Refused                        23 (0.54%)     8 (0.80%)    0 (0.00%)    3 (2.03%)             
##     U.S.                          3531 (83.0%)   832 (83.5%)   9 (69.2%)   122 (82.4%)            
## Marr:                                                                                    <0.001   
##     Divorced                       376 (8.84%)   102 (10.2%)   1 (7.69%)   15 (10.1%)             
##     Living with a partner          394 (9.26%)   83 (8.33%)    0 (0.00%)   21 (14.2%)             
##     Married                       2365 (55.6%)   513 (51.5%)   6 (46.2%)   52 (35.1%)             
##     Never been married             837 (19.7%)   219 (22.0%)   4 (30.8%)   50 (33.8%)             
##     Refused                        19 (0.45%)     7 (0.70%)    2 (15.4%)    1 (0.68%)             
##     Separated                      70 (1.65%)    20 (2.01%)    0 (0.00%)    2 (1.35%)             
##     Widowed                        192 (4.51%)   52 (5.22%)    0 (0.00%)    7 (4.73%)             
## Politics:                                                                                 0.027   
##     No Response                   2419 (56.9%)   550 (55.2%)   9 (69.2%)   67 (45.3%)             
##     Refused                        156 (3.67%)   32 (3.21%)    2 (15.4%)    7 (4.73%)             
##     The Democratic Party           885 (20.8%)   216 (21.7%)   1 (7.69%)   46 (31.1%)             
##     The Republican Party           793 (18.6%)   198 (19.9%)   1 (7.69%)   28 (18.9%)             
## Income:                                                                                  <0.001   
##     $100,000 or more              1399 (32.9%)   320 (32.1%)   3 (23.1%)   19 (12.8%)             
##     $30,000 to less than $40,000   340 (7.99%)   79 (7.93%)    0 (0.00%)   17 (11.5%)             
##     $40,000 to less than $50,000   329 (7.74%)   69 (6.93%)    2 (15.4%)   16 (10.8%)             
##     $50,000 to less than $60,000   308 (7.24%)   59 (5.92%)    2 (15.4%)    9 (6.08%)             
##     $60,000 to less than $70,000   292 (6.87%)   63 (6.33%)    0 (0.00%)   14 (9.46%)             
##     $70,000 to less than $80,000   283 (6.65%)   75 (7.53%)    0 (0.00%)   12 (8.11%)             
##     $80,000 to less than $90,000   251 (5.90%)   44 (4.42%)    0 (0.00%)    9 (6.08%)             
##     $90,000 to less than $100,000  247 (5.81%)   77 (7.73%)    1 (7.69%)    2 (1.35%)             
##     Less than $30,000              597 (14.0%)   165 (16.6%)   1 (7.69%)   46 (31.1%)             
##     Refused                        207 (4.87%)   45 (4.52%)    4 (30.8%)    4 (2.70%)             
## Ideology:                                                                                <0.001   
##     Conservative                  1039 (24.4%)   245 (24.6%)   3 (23.1%)   27 (18.2%)             
##     Liberal                        766 (18.0%)   196 (19.7%)   1 (7.69%)   19 (12.8%)             
##     Moderate                      1696 (39.9%)   358 (35.9%)   3 (23.1%)   70 (47.3%)             
##     Refused                        84 (1.98%)    17 (1.71%)    4 (30.8%)    3 (2.03%)             
##     Very conservative              351 (8.25%)   95 (9.54%)    2 (15.4%)   16 (10.8%)             
##     Very liberal                   317 (7.45%)   85 (8.53%)    0 (0.00%)   13 (8.78%)             
## IntUse:                                                                                  <0.001   
##     About once a day               153 (3.60%)   53 (5.32%)    0 (0.00%)   21 (14.2%)             
##     Almost constantly             2106 (49.5%)   458 (46.0%)   6 (46.2%)   59 (39.9%)             
##     Do not use the internet        61 (1.43%)    23 (2.31%)    0 (0.00%)    0 (0.00%)             
##     Less often                     68 (1.60%)    15 (1.51%)    3 (23.1%)    6 (4.05%)             
##     Refused                        17 (0.40%)     4 (0.40%)    1 (7.69%)    1 (0.68%)             
##     Several times a day           1759 (41.4%)   419 (42.1%)   3 (23.1%)   55 (37.2%)             
##     Several times a week           89 (2.09%)    24 (2.41%)    0 (0.00%)    6 (4.05%)             
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

Alternative (considering weight)

library(survey)

vars <- c("Area","Regn","Land","Age","Gender","Edu","Race","Born","Marr",
          "Politics","Income","Ideology","IntUse","DrCell","Wght")
df1 <- df[, vars]

# treat as categorical (edit if any should be numeric)
cat_vars <- setdiff(vars, "Wght")
df1[cat_vars] <- lapply(df1[cat_vars], function(x) factor(x, exclude = NULL))

# survey design (ids=~1 is OK if you only have weights)
des <- svydesign(ids = ~1, weights = ~Wght, data = df1)

# ---- helper: weighted % by group + survey-weighted chi-square p-value ----
wtab_cat <- function(var, by, design) {
  f <- as.formula(paste0("~", var, "+", by))
  tab <- svytable(f, design)                   # weighted counts
  colp <- prop.table(tab, margin = 2) * 100    # % within DrCell groups

  # design-based Rao-Scott test
  p <- tryCatch(svychisq(as.formula(paste0("~", var, "+", by)),
                        design, statistic = "F")$p.value,
                error = function(e) NA_real_)

  # return long data.frame
  out <- as.data.frame.matrix(round(colp, 2))
  out$Level <- rownames(out)
  rownames(out) <- NULL
  out$Variable <- var
  out$p_value <- p
  out[, c("Variable","Level", setdiff(names(out), c("Variable","Level")))]
}

# build table for all predictors (exclude DrCell and Wght)
preds <- setdiff(names(df1), c("DrCell","Wght"))
tbl_list <- lapply(preds, wtab_cat, by = "DrCell", design = des)
tbl_weighted <- do.call(rbind, tbl_list)

tbl_weighted
##    Variable                         Level Major problem Minor problem
## 1      Area                  Metropolitan         86.91         83.55
## 2      Area                   No Response          0.03          0.19
## 3      Area              Non-metropolitan         13.06         16.26
## 4      Regn                       Midwest         20.60         20.10
## 5      Regn                   No Response          0.03          0.19
## 6      Regn                     Northeast         16.74         20.19
## 7      Regn                         South         39.00         35.23
## 8      Regn                          West         23.63         24.29
## 9      Land                       Refused          1.13          0.96
## 10     Land                         Rural         24.70         26.76
## 11     Land                      Suburban         49.96         51.24
## 12     Land                         Urban         24.21         21.05
## 13      Age                         18-29         18.89         21.72
## 14      Age                         30-49         33.65         30.93
## 15      Age                         50-64         25.40         21.04
## 16      Age                           65+         21.60         26.11
## 17      Age                       Refused          0.46          0.19
## 18   Gender                         A man         47.34         52.42
## 19   Gender                       A woman         51.61         45.55
## 20   Gender             In some other way          0.56          1.26
## 21   Gender                       Refused          0.49          0.77
## 22      Edu             College graduate+         34.09         36.51
## 23      Edu         H.S. graduate or less         35.13         35.84
## 24      Edu                       Refused          0.38          0.57
## 25      Edu                  Some College         30.40         27.08
## 26     Race                         Cuban          0.69          0.38
## 27     Race                     Dominican          0.37          0.76
## 28     Race                       Mexican          9.58          6.04
## 29     Race                   No Response         83.24         86.69
## 30     Race        Other Central American          1.07          0.45
## 31     Race                 Other country          0.41          0.33
## 32     Race          Other South American          1.43          0.99
## 33     Race                  Puerto Rican          1.67          1.59
## 34     Race                       Refused          0.32          0.70
## 35     Race                    Salvadoran          0.61          0.67
## 36     Race                       Spanish          0.62          1.41
## 37     Born                       Foreign         15.61         11.89
## 38     Born                       Refused          0.87          0.58
## 39     Born                         U.S.          83.53         87.53
## 40     Marr                      Divorced          9.01          9.54
## 41     Marr         Living with a partner         10.15          9.00
## 42     Marr                       Married         51.34         46.05
## 43     Marr            Never been married         23.05         25.78
## 44     Marr                       Refused          0.44          0.83
## 45     Marr                     Separated          1.36          2.66
## 46     Marr                       Widowed          4.65          6.14
## 47 Politics                   No Response         53.82         52.51
## 48 Politics                       Refused          6.75          5.83
## 49 Politics          The Democratic Party         19.65         22.50
## 50 Politics          The Republican Party         19.79         19.16
## 51   Income              $100,000 or more         27.25         27.18
## 52   Income  $30,000 to less than $40,000          8.99          8.81
## 53   Income  $40,000 to less than $50,000          8.88          9.08
## 54   Income  $50,000 to less than $60,000          7.26          5.44
## 55   Income  $60,000 to less than $70,000          6.93          6.75
## 56   Income  $70,000 to less than $80,000          6.58          7.69
## 57   Income  $80,000 to less than $90,000          5.99          4.16
## 58   Income $90,000 to less than $100,000          5.10          6.80
## 59   Income             Less than $30,000         17.60         19.68
## 60   Income                       Refused          5.41          4.41
## 61 Ideology                  Conservative         24.57         24.92
## 62 Ideology                       Liberal         16.24         18.41
## 63 Ideology                      Moderate         40.62         36.01
## 64 Ideology                       Refused          3.02          3.25
## 65 Ideology             Very conservative          8.84          9.29
## 66 Ideology                  Very liberal          6.70          8.12
## 67   IntUse              About once a day          4.85          7.74
## 68   IntUse             Almost constantly         42.68         39.00
## 69   IntUse       Do not use the internet          2.64          3.46
## 70   IntUse                    Less often          2.88          2.51
## 71   IntUse                       Refused          0.42          0.41
## 72   IntUse           Several times a day         43.66         42.49
## 73   IntUse          Several times a week          2.86          4.38
##    No Response Not a problem      p_value
## 1        94.34         85.54 1.733991e-01
## 2         0.00          0.00 1.733991e-01
## 3         5.66         14.46 1.733991e-01
## 4        35.49         23.10 1.276890e-01
## 5         0.00          0.00 1.276890e-01
## 6        32.21         21.73 1.276890e-01
## 7        30.41         38.76 1.276890e-01
## 8         1.89         16.41 1.276890e-01
## 9         0.00          1.85 1.363321e-01
## 10       25.63         34.87 1.363321e-01
## 11       34.24         36.94 1.363321e-01
## 12       40.13         26.34 1.363321e-01
## 13       33.07         30.87 1.511489e-05
## 14       29.47         36.00 1.511489e-05
## 15        9.55         18.57 1.511489e-05
## 16       19.84         14.47 1.511489e-05
## 17        8.07          0.09 1.511489e-05
## 18       24.92         45.50 3.964284e-09
## 19       55.11         51.15 3.964284e-09
## 20        0.00          1.22 3.964284e-09
## 21       19.97          2.13 3.964284e-09
## 22       28.14         17.00 4.930719e-06
## 23       56.45         60.22 4.930719e-06
## 24        5.95          0.00 4.930719e-06
## 25        9.46         22.78 4.930719e-06
## 26        0.00          0.54 5.690414e-03
## 27        0.00          3.64 5.690414e-03
## 28       29.32          9.20 5.690414e-03
## 29       70.68         75.11 5.690414e-03
## 30        0.00          0.18 5.690414e-03
## 31        0.00          0.00 5.690414e-03
## 32        0.00          4.77 5.690414e-03
## 33        0.00          2.93 5.690414e-03
## 34        0.00          0.42 5.690414e-03
## 35        0.00          2.13 5.690414e-03
## 36        0.00          1.07 5.690414e-03
## 37       36.36         16.04 6.250988e-03
## 38        0.00          4.35 6.250988e-03
## 39       63.64         79.61 6.250988e-03
## 40        5.66          7.78 1.078551e-11
## 41        0.00         12.52 1.078551e-11
## 42       51.00         30.83 1.078551e-11
## 43       25.26         39.72 1.078551e-11
## 44       18.08          0.09 1.078551e-11
## 45        0.00          2.32 1.078551e-11
## 46        0.00          6.74 1.078551e-11
## 47       62.76         41.08 3.078118e-02
## 48       19.84          8.40 3.078118e-02
## 49       11.90         33.45 3.078118e-02
## 50        5.51         17.07 3.078118e-02
## 51        4.89          9.78 1.426458e-05
## 52        0.00         11.32 1.426458e-05
## 53       17.13         14.44 1.426458e-05
## 54       30.28          6.43 1.426458e-05
## 55        0.00         10.07 1.426458e-05
## 56        0.00          8.65 1.426458e-05
## 57        0.00          5.97 1.426458e-05
## 58        8.07          0.42 1.426458e-05
## 59        9.71         28.95 1.426458e-05
## 60       29.92          3.98 1.426458e-05
## 61       20.60         21.71 1.197302e-03
## 62        1.90         12.50 1.197302e-03
## 63       25.25         42.16 1.197302e-03
## 64       29.24          1.72 1.197302e-03
## 65       23.00         10.50 1.197302e-03
## 66        0.00         11.41 1.197302e-03
## 67        0.00         18.32 4.556412e-14
## 68       28.25         32.02 4.556412e-14
## 69        0.00          0.00 4.556412e-14
## 70       39.99          5.82 4.556412e-14
## 71        8.07          1.18 4.556412e-14
## 72       23.70         38.22 4.556412e-14
## 73        0.00          4.43 4.556412e-14
datatable(tbl_weighted)
# write.csv(tbl_weighted, "Table1_weighted.csv", row.names = FALSE)

Single Var

single_var <- df %>%
  group_by(Age) %>%
  summarise(
    Count = n(),
    Weighted = sum(Wght),
    .groups = "drop"
  ) %>%
  mutate(
    Unweighted_Percent = 100 * Count / sum(Count),
    Weighted_Percent   = 100 * Weighted / sum(Weighted)
  )

single_var
## # A tibble: 5 × 5
##   Age     Count Weighted Unweighted_Percent Weighted_Percent
##   <chr>   <int>    <dbl>              <dbl>            <dbl>
## 1 18-29     763   1074.              14.1             19.8  
## 2 30-49    1883   1797.              34.8             33.2  
## 3 50-64    1342   1316.              24.8             24.3  
## 4 65+      1395   1201.              25.8             22.2  
## 5 Refused    27     23.0              0.499            0.426

Two Var

two_var <- df %>%
  group_by(Regn, Land) %>%
  summarise(
    Count = n(),
    Weighted = sum(Wght),
    .groups = "drop"
  ) %>%
  group_by(Regn) %>%   # <-- cluster by first column
  mutate(
    Unweighted_Percent = 100 * Count / sum(Count),
    Weighted_Percent   = 100 * Weighted / sum(Weighted)
  ) %>%
  ungroup()
two_var
## # A tibble: 18 × 6
##    Regn        Land     Count Weighted Unweighted_Percent Weighted_Percent
##    <chr>       <chr>    <int>    <dbl>              <dbl>            <dbl>
##  1 Midwest     Refused     15    19.9               1.38             1.79 
##  2 Midwest     Rural      313   341.               28.7             30.5  
##  3 Midwest     Suburban   532   532.               48.9             47.7  
##  4 Midwest     Urban      229   223.               21.0             20.0  
##  5 No Response Rural        2     2.11             66.7             66.3  
##  6 No Response Suburban     1     1.07             33.3             33.7  
##  7 Northeast   Refused      9     7.88              0.963            0.828
##  8 Northeast   Rural      186   221.               19.9             23.2  
##  9 Northeast   Suburban   496   469.               53.0             49.3  
## 10 Northeast   Urban      244   254.               26.1             26.7  
## 11 South       Refused     22    18.7               1.06             0.905
## 12 South       Rural      532   596.               25.6             28.8  
## 13 South       Suburban  1075  1020.               51.8             49.3  
## 14 South       Urban      447   436.               21.5             21.1  
## 15 West        Refused     11    14.0               0.842            1.10 
## 16 West        Rural      202   215.               15.5             17.0  
## 17 West        Suburban   712   668.               54.5             52.7  
## 18 West        Urban      382   371.               29.2             29.3