library(readr)
library(ggplot2)
library(MASS)
library(kableExtra)
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library('dplyr')      # for data manipulation
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library('tidyr')      # for reshaping data
library('ggplot2')    # plotting data
library('scales')     # for scale_y_continuous(label = percent)
## 
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
## 
##     alpha, rescale
## The following object is masked from 'package:readr':
## 
##     col_factor
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble  1.4.2     ✔ stringr 1.3.1
## ✔ purrr   0.2.5     ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%()         masks ggplot2::%+%()
## ✖ scales::alpha()      masks psych::alpha(), ggplot2::alpha()
## ✖ scales::col_factor() masks readr::col_factor()
## ✖ purrr::discard()     masks scales::discard()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ dplyr::select()      masks MASS::select()
library(forcats)
Dissertation_Dataset <- read_csv("Dissertation_Dataset_2.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   CASE = col_character(),
##   SPEAKER_OR = col_character(),
##   SENATE_COM = col_character(),
##   HOUSE_COM = col_character(),
##   GROUP_TYPE = col_character(),
##   HEALTH_CJ = col_character(),
##   FED_STATE = col_character()
## )
## See spec(...) for full column specifications.
Dissertation_Dataset$FED_STATE <- as.factor(Dissertation_Dataset$FED_STATE)
Dissertation_Dataset$GROUP_TYPE <- as.factor(Dissertation_Dataset$GROUP_TYPE)
Dissertation_Dataset$HEALTH_CJ <- as.factor(Dissertation_Dataset$HEALTH_CJ)
Dissertation_Dataset$SENATE <- as.factor(Dissertation_Dataset$SENATE)
str(Dissertation_Dataset)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 150 obs. of  50 variables:
##  $ CASE            : chr  "Case #125 - 77 - 2015 WL 1384258" "Case #65 - 097 - 2015 WL 5999232" "Case #67 - 099 - 2015 WL 1886240" "Case #47 - 069 - 2016 WL 223739" ...
##  $ YEAR            : num  2015 2015 2015 2016 2015 ...
##  $ SPEAKER_OR      : chr  "Alcohol and Drug Abuse Institute  University of Washington" "Allegheny Health Network" "American Academy of Addiction Psychiatry" "American Academy of Pain Management" ...
##  $ SENATE_COM      : chr  NA "Finance" NA "Special Aging" ...
##  $ HOUSE_COM       : chr  "Energy and Commerce" NA "Energy and Commerce" NA ...
##  $ SENATE          : Factor w/ 2 levels "0","1": 1 2 1 2 1 1 2 2 2 2 ...
##  $ GROUP_TYPE      : Factor w/ 27 levels "Child Welfare",..: 5 18 25 18 18 25 20 27 23 23 ...
##  $ HEALTH_CJ       : Factor w/ 3 levels "Health","Law Enforcement",..: 1 1 1 1 1 1 2 2 2 2 ...
##  $ FED_STATE       : Factor w/ 5 levels "Federal","Indian Affairs",..: 5 4 4 4 4 4 5 2 5 5 ...
##  $ ps_OPR          : num  0 0 0 0 0 0 0 0 0 115 ...
##  $ ps_OPRS_H       : num  0 0 0 0 46 0 54 0 11 166 ...
##  $ ps_prescrip     : num  0 245 0 0 0 0 0 0 0 0 ...
##  $ ps_Prescribers  : num  0 0 0 0 0 0 0 0 0 26 ...
##  $ ps_Prescrib_Ed  : num  0 63 0 0 0 0 0 0 0 0 ...
##  $ ps_vital        : num  0 62 0 0 0 0 0 0 0 0 ...
##  $ ps_Manuf        : num  0 61 0 0 0 0 0 0 0 0 ...
##  $ ps_FDA          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ps_Left         : num  0 0 0 0 0 0 56 0 0 0 ...
##  $ ps_BadApp       : num  0 150 0 336 0 38 107 23 45 0 ...
##  $ ps_Foreign      : num  0 0 0 0 0 ...
##  $ pd_Disease      : num  29 0 0 0 270 199 0 0 0 0 ...
##  $ pd_Quality      : num  0 0 0 0 0 116 0 0 0 0 ...
##  $ pd_PSE          : num  0 0 0 0 0 0 0 56 0 0 ...
##  $ pd_CJ           : num  0 0 0 0 0 0 0 0 15 0 ...
##  $ pd_MAT          : num  38 0 0 0 243 581 0 0 0 0 ...
##  $ pd_Access       : num  0 0 0 0 58 27 0 28 0 0 ...
##  $ ds_PSE          : num  0 0 413 0 0 119 0 0 0 0 ...
##  $ ds_Prevent      : num  222 0 0 0 216 44 111 90 207 29 ...
##  $ ds_Stigma       : num  0 0 0 0 231 0 0 0 0 0 ...
##  $ ds_MAT          : num  22 0 447 0 457 965 0 0 0 0 ...
##  $ ds_Access       : num  0 0 550 43 689 82 0 0 0 0 ...
##  $ ds_Quality      : num  0 0 50 0 17 205 0 0 0 0 ...
##  $ ds_ODR          : num  162 0 0 0 554 0 0 0 175 31 ...
##  $ ds_Samari       : num  0 0 0 0 244 0 0 0 0 37 ...
##  $ ds_Coordinate   : num  0 0 0 0 0 0 31 0 181 0 ...
##  $ ds_Divert2PH    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ss_Take         : num  17 0 0 0 0 0 0 0 31 0 ...
##  $ ss_PDMP         : num  33 136 0 980 519 0 0 0 0 34 ...
##  $ ss_Reg          : num  0 0 0 0 40 0 0 0 0 0 ...
##  $ ss_New          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ss_Guide        : num  120 195 0 0 0 0 0 0 0 0 ...
##  $ ss_Cautious     : num  26 0 0 0 72 33 0 0 0 0 ...
##  $ ss_Prescrib_Ed  : num  12 0 0 929 488 22 0 0 0 0 ...
##  $ ss_Crim_Enforce : num  0 0 0 0 0 0 588 377 0 76 ...
##  $ ss_Penalt       : num  0 19 0 0 0 0 0 0 0 0 ...
##  $ Problem_Demand  : num  67 0 0 0 571 923 0 84 15 0 ...
##  $ Problem_Supply  : num  0 581 0 336 46 ...
##  $ Solutions_Demand: num  406 0 1460 43 2408 ...
##  $ Solutions_Supply: num  208 350 0 1909 1119 ...
##  $ TOTAL WORDS     : num  870 1386 973 2859 3381 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   CASE = col_character(),
##   ..   YEAR = col_double(),
##   ..   SPEAKER_OR = col_character(),
##   ..   SENATE_COM = col_character(),
##   ..   HOUSE_COM = col_character(),
##   ..   SENATE = col_double(),
##   ..   GROUP_TYPE = col_character(),
##   ..   HEALTH_CJ = col_character(),
##   ..   FED_STATE = col_character(),
##   ..   ps_OPR = col_double(),
##   ..   ps_OPRS_H = col_double(),
##   ..   ps_prescrip = col_double(),
##   ..   ps_Prescribers = col_double(),
##   ..   ps_Prescrib_Ed = col_double(),
##   ..   ps_vital = col_double(),
##   ..   ps_Manuf = col_double(),
##   ..   ps_FDA = col_double(),
##   ..   ps_Left = col_double(),
##   ..   ps_BadApp = col_double(),
##   ..   ps_Foreign = col_double(),
##   ..   pd_Disease = col_double(),
##   ..   pd_Quality = col_double(),
##   ..   pd_PSE = col_double(),
##   ..   pd_CJ = col_double(),
##   ..   pd_MAT = col_double(),
##   ..   pd_Access = col_double(),
##   ..   ds_PSE = col_double(),
##   ..   ds_Prevent = col_double(),
##   ..   ds_Stigma = col_double(),
##   ..   ds_MAT = col_double(),
##   ..   ds_Access = col_double(),
##   ..   ds_Quality = col_double(),
##   ..   ds_ODR = col_double(),
##   ..   ds_Samari = col_double(),
##   ..   ds_Coordinate = col_double(),
##   ..   ds_Divert2PH = col_double(),
##   ..   ss_Take = col_double(),
##   ..   ss_PDMP = col_double(),
##   ..   ss_Reg = col_double(),
##   ..   ss_New = col_double(),
##   ..   ss_Guide = col_double(),
##   ..   ss_Cautious = col_double(),
##   ..   ss_Prescrib_Ed = col_double(),
##   ..   ss_Crim_Enforce = col_double(),
##   ..   ss_Penalt = col_double(),
##   ..   Problem_Demand = col_double(),
##   ..   Problem_Supply = col_double(),
##   ..   Solutions_Demand = col_double(),
##   ..   Solutions_Supply = col_double(),
##   ..   `TOTAL WORDS` = col_double()
##   .. )

Variable Summary

text_tbl <- data.frame(
  Variables = c("HEALTH_CJ","FED_STATE","ps_OPR","ps_OPRS_H","ps_prescrip","ps_Prescribers",
      "ps_Prescrib_Ed","ps_vital","ps_Manuf","ps_FDA","ps_Left","ps_BadApp","ps_Foreign","pd_Disease",
      "pd_Quality","pd_PSE","pd_CJ","pd_MAT","pd_Access","ds_PSE","ds_Prevent","ds_Stigma","ds_MAT",
      "ds_Access","ds_Quality","ds_ODR","ds_Samari","ds_Coordinate","ds_Divert2PH","ss_Take","ss_PDMP",
      "ss_Reg","ss_New","ss_Guide","ss_Cautious","ss_Prescrib_Ed","ss_Crim_Enforce","ss_Penalt",
      "Problem_Supply","Problem_Demand","Solutions_Demand","Solutions_Supply","TOTAL WORDS"),
  Description = c(
      "Categorical Variable, which groups speaker orgs based on either health focus, criminal justice (CJ) focus, or other",
      "Categorical Variable, which groups speaker orgs based on either federal gov agency, state agency, local agency, regional group of agencies, or private entities",
      "Word count of problem definition (WCPD) blaming the characteristics of opioid prescriptions",
      "WCPD blaming opioid prescriptions for heroin use",
      "WCPD generally blaming overprescription of opioids",
      "WCPD blaming prescribers for overprescribing",
      "WCPD blaming lack of prescriber education",
      "WCPD blaming 5th vital sign of pain",
      "WCPD blaming drug manufacturers",
      "WCPD blaming FDA",
      "WCPD blaming diversion of left-over prescriptions",
      "WCPD blaming bad apples for increasing drug supply",
      "WCPD blaming foreign actors for increasing drug supply",
      "WCPD acknowleding addiction as a disease",
      "WCPD blaming poor quality of addiction treatment",
      "WCPD blaming psychological, sociological, environmental or economical (PSEE) factors",
      "WCPD blaming the mischaracterization of the problem as a CJ problem rather than a health problem",
      "WCPD blaming poor access to Medication Assisted Treatment (MAT)",
      "WCPD blaming poor access to Treatment Generally",
      "Word count of solutions (WCS) addressing PSEE factors",
      "WCS addressing demand side prevention (excluse preventioon of supply tactics)",
      "WCS addressing stigma",
      "WCS increasing access to MAT",
      "WCS increasing access to Treatment Generally",
      "WCS increasing quality of Treatment",
      "WCS increasing access to Overdose Reversal Medications",
      "WCS passage or strengthening of Good Samaritan Laws",
      "WCS coordinating between CJ and Health actors",
      "WCS that are alterntives to incarceration, like drug courts or treatment",
      "WCS involving drug take back programs",
      "WCS regarding Prescription Drug Monitoring Programs (PDMP)",
      "WCS involving rescheduling, adding black box labels, or regulation of opioids",
      "WCS funding new drugs to address pain, with the intent of decreasing the prescribing of opioids",
      "WCS promoting prescriber guidelines",
      "WCS calling for more cautious prescribing practices generally",
      "WCS of prescriber or distributor education",
      "WCS commitment to criminal enforcement",
      "WCS increasing or creating new criminal penalities",
      "Total WC of all subcategories that define the problem as an issue of drug supply",
      "Total WC of all subcategories that define the problem as the demand for drugs",
      "Total WC of all subcategories that propose soltions aimed at decreasing the demand",
      "Total WC of all subcategories that propose solutions aimed at decreasing the supply of drugs",
      "Total WC of all words in each case"
  )
)

kable(text_tbl, booktabs = T)%>% 
  kable_styling(font_size=10,latex_options = c("striped","scale_down")) %>%
  group_rows("Interest Group Type", 1, 2) %>%
  group_rows("Problem Definition - Supply", 3, 13) %>%
  group_rows("Problem Definition - Demand", 14, 19) %>%
  group_rows("Solutions - Demand", 20, 28) %>%
  group_rows("Solutions - Supply", 29, 37) %>%
  group_rows("Theme Definitions", 38, 42) %>%
  group_rows("Other", 43, 43) %>%
  column_spec(1, width = "10em") %>%
  column_spec(2, width = "40em") %>%
  row_spec(0, bold=T, color = "white", background = "black")
Variables Description
Interest Group Type
HEALTH_CJ Categorical Variable, which groups speaker orgs based on either health focus, criminal justice (CJ) focus, or other
FED_STATE Categorical Variable, which groups speaker orgs based on either federal gov agency, state agency, local agency, regional group of agencies, or private entities
Problem Definition - Supply
ps_OPR Word count of problem definition (WCPD) blaming the characteristics of opioid prescriptions
ps_OPRS_H WCPD blaming opioid prescriptions for heroin use
ps_prescrip WCPD generally blaming overprescription of opioids
ps_Prescribers WCPD blaming prescribers for overprescribing
ps_Prescrib_Ed WCPD blaming lack of prescriber education
ps_vital WCPD blaming 5th vital sign of pain
ps_Manuf WCPD blaming drug manufacturers
ps_FDA WCPD blaming FDA
ps_Left WCPD blaming diversion of left-over prescriptions
ps_BadApp WCPD blaming bad apples for increasing drug supply
ps_Foreign WCPD blaming foreign actors for increasing drug supply
Problem Definition - Demand
pd_Disease WCPD acknowleding addiction as a disease
pd_Quality WCPD blaming poor quality of addiction treatment
pd_PSE WCPD blaming psychological, sociological, environmental or economical (PSEE) factors
pd_CJ WCPD blaming the mischaracterization of the problem as a CJ problem rather than a health problem
pd_MAT WCPD blaming poor access to Medication Assisted Treatment (MAT)
pd_Access WCPD blaming poor access to Treatment Generally
Solutions - Demand
ds_PSE Word count of solutions (WCS) addressing PSEE factors
ds_Prevent WCS addressing demand side prevention (excluse preventioon of supply tactics)
ds_Stigma WCS addressing stigma
ds_MAT WCS increasing access to MAT
ds_Access WCS increasing access to Treatment Generally
ds_Quality WCS increasing quality of Treatment
ds_ODR WCS increasing access to Overdose Reversal Medications
ds_Samari WCS passage or strengthening of Good Samaritan Laws
ds_Coordinate WCS coordinating between CJ and Health actors
Solutions - Supply
ds_Divert2PH WCS that are alterntives to incarceration, like drug courts or treatment
ss_Take WCS involving drug take back programs
ss_PDMP WCS regarding Prescription Drug Monitoring Programs (PDMP)
ss_Reg WCS involving rescheduling, adding black box labels, or regulation of opioids
ss_New WCS funding new drugs to address pain, with the intent of decreasing the prescribing of opioids
ss_Guide WCS promoting prescriber guidelines
ss_Cautious WCS calling for more cautious prescribing practices generally
ss_Prescrib_Ed WCS of prescriber or distributor education
ss_Crim_Enforce WCS commitment to criminal enforcement
Theme Definitions
ss_Penalt WCS increasing or creating new criminal penalities
Problem_Supply Total WC of all subcategories that define the problem as an issue of drug supply
Problem_Demand Total WC of all subcategories that define the problem as the demand for drugs
Solutions_Demand Total WC of all subcategories that propose soltions aimed at decreasing the demand
Solutions_Supply Total WC of all subcategories that propose solutions aimed at decreasing the supply of drugs
Other
TOTAL WORDS Total WC of all words in each case

Supply vs. Demand Descriptive Statistics

sum_PD <- sum(Dissertation_Dataset$Problem_Demand)
sum_PS <- sum(Dissertation_Dataset$Problem_Supply)
sum_SD <- sum(Dissertation_Dataset$Solutions_Demand)
sum_SS <- sum(Dissertation_Dataset$Solutions_Supply)
total_words_coded = sum_PD +sum_PS + sum_SD +sum_SS
sum_PD
## [1] 25602
sum_PS
## [1] 56087
sum_SD
## [1] 104790
sum_SS
## [1] 95780
sum_total_words <- sum(Dissertation_Dataset$`TOTAL WORDS`)
sum_total_words
## [1] 376426
total_words_coded
## [1] 282259
total_words_coded/sum_total_words
## [1] 0.7498393

All of the hearing testimony amounted to 376426. 246096 of those words, or 65%, of those words were coded.

The least time was spent discussing demand-side causes of the problem (wc = 23921). Supply side causes were discussed at 2x the rate of demand side causes (47620 v. 23921). However, the amount of time spend discussing demand-side and supply-side problems was far out-shadowed by the time spent discussing the proposed solutions to the problem. Despite the preference for discussing supply side soluions, the time spent discussing supply side solutions was close to that spent supporting demand-side solutions (87640 v. 86915).

sum_PD + sum_PS
## [1] 81689
sum_SD + sum_SS
## [1] 200570

Overeall, more time was spent discussing the solution than the problem. Nearly double the time was spent discussing solutions vs. problems (174555 v. 71541).

If we are interested in changing these to percentages or proportions…

sum_PD/total_words_coded
## [1] 0.09070393
sum_PS/total_words_coded
## [1] 0.1987076
sum_SD/total_words_coded
## [1] 0.3712548
sum_SS/total_words_coded
## [1] 0.3393337
(sum_PD + sum_PS)/total_words_coded
## [1] 0.2894115
(sum_SD + sum_SS)/total_words_coded
## [1] 0.7105885
(sum_PS + sum_SS)/total_words_coded
## [1] 0.5380413

Percentages of Total Discourse Each Category Represents

Creating a Loop to give me the total for each variable in the dataset.

library(tidyverse)
Count_Data <- Dissertation_Dataset[10:50] #create dataset of counts only
output <- vector("double", ncol(Count_Data))  
for (i in seq_along(Count_Data)) {
  output[[i]] <- (sum(Count_Data[[i]]))}
output
##  [1]   6527   9755   2399   3580    736   2548   2039    912   1127  11283
## [11]  15181   5665   1539   2850   6002   3365   6181   4535  24567   2555
## [21]  20040  21109   5361  14959    852   4783   6029   4267  22551   1485
## [31]   3456   4727   4143  15671  38870    610  25602  56087 104790  95780
## [41] 376426
cd <- ((output/total_words_coded)*100)
var <-names(Count_Data)
kable(cbind(var, cd), caption = "% of coded discourse", booktabs = T) %>%
    kable_styling(font_size=12,latex_options = c("striped","scale_down")) %>%
    row_spec(0, bold=T, color = "white", background = "black") %>%
    column_spec(1, bold=T)
% of coded discourse
var cd
ps_OPR 2.31241519313822
ps_OPRS_H 3.45604568853429
ps_prescrip 0.849928611665173
ps_Prescribers 1.26833865350618
ps_Prescrib_Ed 0.260753421502946
ps_vital 0.902717008137916
ps_Manuf 0.722386177234384
ps_FDA 0.323107500557998
ps_Left 0.399278676676386
ps_BadApp 3.99739246578497
ps_Foreign 5.37839360303834
pd_Disease 2.0070219195845
pd_Quality 0.545243907191622
pd_PSE 1.00971093924374
pd_CJ 2.12641580959332
pd_MAT 1.19216747738779
pd_Access 2.18983274226863
ds_PSE 1.60668038928785
ds_Prevent 8.70370829628108
ds_Stigma 0.905196999918515
ds_MAT 7.09986218331391
ds_Access 7.47859235666533
ds_Quality 1.8993194193985
ds_ODR 5.29974243513936
ds_Samari 0.301850428152867
ds_Coordinate 1.69454295522906
ds_Divert2PH 2.13598149217563
ss_Take 1.51173213254493
ss_PDMP 7.98947066346866
ss_Reg 0.526112542027004
ss_New 1.22440737053557
ss_Guide 1.67470302098427
ss_Cautious 1.46780084957433
ss_Prescrib_Ed 5.55199302768025
ss_Crim_Enforce 13.7710400731243
ss_Penalt 0.21611356945217
Problem_Demand 9.07039279526959
Problem_Supply 19.8707569997768
Solutions_Demand 37.1254769555621
Solutions_Supply 33.9333732493915
TOTAL WORDS 133.361912286234

Group Types

levels(Dissertation_Dataset$GROUP_TYPE)
##  [1] "Child Welfare"            "Coalition"               
##  [3] "Distributors"             "Drug Court"              
##  [5] "Expert"                   "Family"                  
##  [7] "Federal Health Agency"    "Federal LE Agency"       
##  [9] "Governor"                 "Hospital System"         
## [11] "Housing Provider"         "Local Coalition"         
## [13] "Local Government"         "Local Health Agency"     
## [15] "Local Law Enforcement"    "Military"                
## [17] "Millenial Marketing"      "Prescribers"             
## [19] "Public Interest"          "Regional Law Enforcement"
## [21] "State Civil Enforcement"  "State Health Agency"     
## [23] "State Law Enforcement"    "State Legislator"        
## [25] "SUD Providers"            "Tribal Government"       
## [27] "Tribal Law Enforcement"
table(Dissertation_Dataset$GROUP_TYPE)
## 
##            Child Welfare                Coalition             Distributors 
##                        1                        1                        1 
##               Drug Court                   Expert                   Family 
##                        1                       10                        6 
##    Federal Health Agency        Federal LE Agency                 Governor 
##                       30                       18                        2 
##          Hospital System         Housing Provider          Local Coalition 
##                        5                        1                        1 
##         Local Government      Local Health Agency    Local Law Enforcement 
##                        2                        3                       10 
##                 Military      Millenial Marketing              Prescribers 
##                        2                        1                        4 
##          Public Interest Regional Law Enforcement  State Civil Enforcement 
##                        8                        3                        1 
##      State Health Agency    State Law Enforcement         State Legislator 
##                        8                       11                        4 
##            SUD Providers        Tribal Government   Tribal Law Enforcement 
##                       13                        2                        1
table(Dissertation_Dataset$GROUP_TYPE, Dissertation_Dataset$HEALTH_CJ)
##                           
##                            Health Law Enforcement Other
##   Child Welfare                 0               0     1
##   Coalition                     1               0     0
##   Distributors                  1               0     0
##   Drug Court                    0               1     0
##   Expert                       10               0     0
##   Family                        0               0     6
##   Federal Health Agency        30               0     0
##   Federal LE Agency             0              18     0
##   Governor                      0               0     2
##   Hospital System               5               0     0
##   Housing Provider              0               0     1
##   Local Coalition               1               0     0
##   Local Government              0               0     2
##   Local Health Agency           3               0     0
##   Local Law Enforcement         0              10     0
##   Military                      0               2     0
##   Millenial Marketing           0               0     1
##   Prescribers                   4               0     0
##   Public Interest               3               0     5
##   Regional Law Enforcement      0               3     0
##   State Civil Enforcement       1               0     0
##   State Health Agency           8               0     0
##   State Law Enforcement         0              11     0
##   State Legislator              0               0     4
##   SUD Providers                13               0     0
##   Tribal Government             0               0     2
##   Tribal Law Enforcement        0               1     0
library(tidyverse)

Count_Data <- Dissertation_Dataset[10:50] #create dataset of counts only

##cd_grp <- tapply(Dissertation_Dataset$'Total Words', Dissertation_Dataset$GROUP_TYPE,FUN=mean) ##doesnt work
###tapply(total_words_coded, Dissertation_Dataset$GROUP_TYPE,FUN=sum) ###DIDNT WORK

TWC supply v. demand by group

sum_pd_gp <- aggregate(Dissertation_Dataset$Problem_Demand, by = list(Dissertation_Dataset$GROUP_TYPE), 
                                                         FUN = sum) ##YAY this works~
sum_ps_gp <- aggregate(Dissertation_Dataset$Problem_Supply, by = list(Dissertation_Dataset$GROUP_TYPE), 
                                                         FUN = sum)
sum_ds_gp<- aggregate(Dissertation_Dataset$Solutions_Demand, by = list(Dissertation_Dataset$GROUP_TYPE), 
                                                         FUN = sum)
sum_ss_gp <- aggregate(Dissertation_Dataset$Solutions_Supply, by = list(Dissertation_Dataset$GROUP_TYPE),                                                          FUN = sum)
cd_group <- (sum_pd_gp$x+sum_ps_gp$x+sum_ds_gp$x+sum_ss_gp$x) #total coded words per group, had to select the 2nd column titled x because the first column was the group names and it couldnt add up the group names. 
grp_names <- levels(Dissertation_Dataset$GROUP_TYPE) #extracting group names for table

library(kableExtra)
kable(cbind(grp_names, sum_pd_gp$x, sum_ps_gp$x, sum_ds_gp$x, sum_ss_gp$x, cd_group), caption = "% of coded discourse", booktabs = T) %>%
    kable_styling(font_size=12,latex_options = c("striped","scale_down"))
% of coded discourse
grp_names cd_group
Child Welfare 86 0 513 0 599
Coalition 207 35 940 724 1906
Distributors 0 119 0 0 119
Drug Court 1384 171 393 36 1984
Expert 2216 1008 3736 3279 10239
Family 563 609 4177 476 5825
Federal Health Agency 3027 11610 34187 25332 74156
Federal LE Agency 225 18532 7631 31188 57576
Governor 22 124 1122 444 1712
Hospital System 689 329 1844 1073 3935
Housing Provider 310 0 374 0 684
Local Coalition 57 0 640 936 1633
Local Government 95 220 263 289 867
Local Health Agency 2129 178 8937 1445 12689
Local Law Enforcement 334 5691 4035 5045 15105
Military 0 951 0 3157 4108
Millenial Marketing 0 0 2718 0 2718
Prescribers 719 3761 2719 3913 11112
Public Interest 4355 614 6728 1775 13472
Regional Law Enforcement 0 3118 1043 1886 6047
State Civil Enforcement 0 900 114 16 1030
State Health Agency 1042 1325 8376 3390 14133
State Law Enforcement 1736 2642 4460 7776 16614
State Legislator 720 382 998 922 3022
SUD Providers 5129 3281 7548 1756 17714
Tribal Government 473 464 1204 545 2686
Tribal Law Enforcement 84 23 90 377 574

I spot checked and it seems that all the columns add up, becuase before they were not adding up. Now, I need to calculate the percentage of the groups coded word count that was represented by each category.

percA <- ((sum_pd_gp$x/cd_group)*100) 
percB <- ((sum_ps_gp$x/cd_group)*100)
percC <- ((sum_ds_gp$x/cd_group)*100)
percD <- ((sum_ss_gp$x/cd_group)*100)
library(kableExtra)
kable(cbind(grp_names, percA,percB,percC, percD), caption = "% of coded discourse", booktabs = T) %>%
    kable_styling(font_size=12,latex_options = c("striped","scale_down"))
% of coded discourse
grp_names percA percB percC percD
Child Welfare 14.3572621035058 0 85.6427378964942 0
Coalition 10.8604407135362 1.83630640083945 49.3179433368311 37.9853095487933
Distributors 0 100 0 0
Drug Court 69.758064516129 8.61895161290323 19.8084677419355 1.81451612903226
Expert 21.6427385486864 9.84471139759742 36.4879382752222 32.024611778494
Family 9.66523605150215 10.4549356223176 71.7081545064378 8.17167381974249
Federal Health Agency 4.08193537947031 15.6561842602082 46.1014617832677 34.1604185770538
Federal LE Agency 0.390787828261776 32.1870223704321 13.2537862998472 54.1684035014589
Governor 1.28504672897196 7.24299065420561 65.5373831775701 25.9345794392523
Hospital System 17.5095298602287 8.36086404066074 46.861499364676 27.2681067344346
Housing Provider 45.3216374269006 0 54.6783625730994 0
Local Coalition 3.49050826699326 0 39.1916717697489 57.3178199632578
Local Government 10.957324106113 25.3748558246828 30.3344867358708 33.3333333333333
Local Health Agency 16.7783119237135 1.40278981795256 70.4310820395618 11.3878162187722
Local Law Enforcement 2.21118834822906 37.6762661370407 26.7130089374379 33.3995365772923
Military 0 23.1499513145083 0 76.8500486854917
Millenial Marketing 0 0 100 0
Prescribers 6.47048236141109 33.8462922966163 24.4690424766019 35.2141828653708
Public Interest 32.3263064133017 4.55760095011876 49.9406175771972 13.1754750593824
Regional Law Enforcement 0 51.5627583925914 17.2482222589714 31.1890193484372
State Civil Enforcement 0 87.378640776699 11.0679611650485 1.55339805825243
State Health Agency 7.37281539658954 9.37522111370551 59.2655487157716 23.9864147739333
State Law Enforcement 10.4490188997231 15.9022511135187 26.8448296617311 46.8039003250271
State Legislator 23.8252812706817 12.6406353408339 33.0244870946393 30.5095962938451
SUD Providers 28.9544992661172 18.5220729366603 42.6103646833013 9.91306311392119
Tribal Government 17.6098287416232 17.2747580044676 44.825018615041 20.2903946388682
Tribal Law Enforcement 14.6341463414634 4.00696864111498 15.6794425087108 65.6794425087108

Now that we are done looking at the breakdown of group type for the themes, we can look at the individual narratives by group type.

wc_cat <-(aggregate(Count_Data, by = list(Dissertation_Dataset$GROUP_TYPE), 
                                                         FUN = sum)) #total number of words for each category by group type
wc_cat2 <-wc_cat %>% select(2:42) #select numeric columns
perc_grp <-((wc_cat2/cd_group)*100) #calc proportion then change to perc
kable(cbind(grp_names, perc_grp), caption = "% of coded discourse", booktabs = T) %>%
    kable_styling(font_size=12,latex_options = c("striped","scale_down"))
% of coded discourse
grp_names ps_OPR ps_OPRS_H ps_prescrip ps_Prescribers ps_Prescrib_Ed ps_vital ps_Manuf ps_FDA ps_Left ps_BadApp ps_Foreign pd_Disease pd_Quality pd_PSE pd_CJ pd_MAT pd_Access ds_PSE ds_Prevent ds_Stigma ds_MAT ds_Access ds_Quality ds_ODR ds_Samari ds_Coordinate ds_Divert2PH ss_Take ss_PDMP ss_Reg ss_New ss_Guide ss_Cautious ss_Prescrib_Ed ss_Crim_Enforce ss_Penalt Problem_Demand Problem_Supply Solutions_Demand Solutions_Supply TOTAL WORDS
Child Welfare 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 4.507512 0.0000000 0.0000000 0.0000000 0.0000000 9.8497496 3.5058431 0.0000000 0.0000000 0.0000000 44.240401 0.0000000 0.0000000 0.0000000 0.0000000 37.8964942 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 14.3572621 0.000000 85.64274 0.000000 180.30050
Coalition 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 1.8363064 0.000000 6.925498 0.7345226 0.0000000 0.0000000 0.0000000 3.2004197 3.3578174 0.0000000 0.0000000 6.4533054 11.122770 7.8174187 10.5456453 10.0209864 0.0000000 0.0000000 7.1353620 14.690451 0.0000000 0.0000000 0.0000000 0.0000000 16.1594963 0.0000000 0.0000000 10.8604407 1.836306 49.31794 37.985309 120.77650
Distributors 0.0000000 100.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 100.000000 0.00000 0.000000 2894.95798
Drug Court 4.6875000 3.931452 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.000000 0.0000000 69.7580645 0.0000000 0.0000000 0.0000000 0.0000000 1.4616935 11.9959677 1.9657258 4.385081 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 1.8145161 0.0000000 0.0000000 0.0000000 69.7580645 8.618952 19.80847 1.814516 112.65121
Expert 2.1388808 1.054790 0.3320637 1.5821858 1.9142494 0.0000000 0.1367321 0.8594589 0.4492626 1.3770876 0.000000 9.444282 2.7248755 0.2246313 4.1019631 4.1605626 0.9864245 0.0000000 11.5440961 0.1953316 15.8218576 4.453560 1.2501221 3.2229710 0.0000000 0.0000000 0.0000000 1.5528860 19.601524 0.0000000 0.0683661 3.4769020 4.9125891 2.3830452 0.0292997 0.0000000 21.6427385 9.844711 36.48794 32.024612 204.96142
Family 2.2317597 2.008584 0.0000000 2.6437768 0.0000000 3.1244635 0.0000000 0.0000000 0.1201717 0.3261803 0.000000 5.493562 0.0000000 0.0000000 1.5107296 0.0000000 2.6609442 7.8798283 49.1845494 1.8712446 0.0000000 5.785408 2.8154506 3.5193133 0.0000000 0.6523605 0.0000000 0.0000000 2.472103 0.0000000 0.0000000 3.7081545 0.0000000 1.1845494 0.8068670 0.0000000 9.6652361 10.454936 71.70815 8.171674 215.75966
Federal Health Agency 5.1539997 2.520363 1.7571066 1.9593829 0.2966719 0.0000000 0.6850423 0.0000000 0.0000000 3.2836183 0.000000 1.011381 0.1065322 0.0000000 0.0000000 0.8657425 2.0982793 0.1860942 7.8914720 0.0647284 16.0135390 9.430120 2.1630077 8.0195803 0.0000000 0.5825557 1.7503641 0.5124332 13.181671 0.9277739 4.6186418 2.7509574 3.6962619 6.8409839 1.6316953 0.0000000 4.0819354 15.656184 46.10146 34.160419 133.49965
Federal LE Agency 2.1015701 10.007642 0.0885786 0.3056829 0.0000000 0.0000000 0.0000000 0.0000000 0.7086286 5.7385022 13.236418 0.041684 0.0000000 0.0000000 0.2674726 0.0000000 0.0816312 0.3577880 6.8778658 0.0000000 0.0000000 3.022092 0.2379464 0.3925247 0.0000000 2.2127275 0.1528415 3.9443518 4.779769 0.4394192 0.0000000 0.0000000 0.0000000 10.7423232 34.0176462 0.2448937 0.3907878 32.187022 13.25379 54.168403 100.21884
Governor 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 7.242991 1.285047 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 17.9906542 1.2850467 10.2219626 24.591121 0.0000000 3.9719626 0.0000000 0.0000000 7.4766355 3.9719626 3.971963 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 17.9906542 0.0000000 1.2850467 7.242991 65.53738 25.934579 105.43224
Hospital System 0.0000000 1.143583 2.0584498 0.0000000 0.0000000 3.1003812 0.0000000 0.0000000 0.0000000 2.0584498 0.000000 4.320203 0.0000000 0.0000000 1.8805591 4.2439644 7.0648030 0.0000000 0.8132147 8.0559085 4.5743329 27.598475 5.8195680 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 13.036849 0.0000000 0.0000000 9.5806861 3.1766201 1.4739517 0.0000000 0.0000000 17.5095299 8.360864 46.86150 27.268107 184.75222
Housing Provider 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.000000 0.0000000 13.5964912 0.0000000 6.7251462 25.0000000 35.9649123 0.0000000 6.7251462 3.3625731 4.970760 3.6549708 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 45.3216374 0.000000 54.67836 0.000000 157.01754
Local Coalition 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 3.4905083 0.0000000 11.8799755 0.0000000 4.5927740 4.715248 0.5511329 12.4311084 0.0000000 5.0214329 0.0000000 18.8609920 23.270055 0.0000000 0.0000000 0.0000000 0.0000000 15.1867728 0.0000000 0.0000000 3.4905083 0.000000 39.19167 57.317820 203.79669
Local Government 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 16.4936563 8.881199 8.765859 0.0000000 0.0000000 0.0000000 0.0000000 2.1914648 0.0000000 12.9181084 0.0000000 0.0000000 5.420992 0.0000000 5.8823529 0.0000000 6.1130334 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 33.3333333 0.0000000 10.9573241 25.374856 30.33449 33.333333 169.66551
Local Health Agency 0.0000000 0.000000 0.2679486 0.9614627 0.1733785 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 5.469304 0.0000000 0.7802033 4.6496966 1.6707384 4.2083695 7.3055402 6.8878556 3.6882339 7.7389865 19.221373 1.8362361 18.2283868 0.4570888 0.4728505 4.5945307 1.5604067 1.694381 1.1900071 0.1891402 3.9246592 0.2994720 2.5297502 0.0000000 0.0000000 16.7783119 1.402790 70.43108 11.387816 94.67255
Local Law Enforcement 1.2446210 2.118504 0.0000000 0.9069844 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 13.5518040 19.854353 0.000000 0.0000000 0.0000000 2.0721615 0.0000000 0.1390268 0.9467064 3.6809004 0.0000000 0.1986097 4.614366 0.0000000 5.0910295 0.0000000 6.4614366 5.7199603 0.8407812 2.846739 0.0000000 0.0000000 0.0000000 0.0000000 1.3902681 26.9182390 1.4035088 2.2111883 37.676266 26.71301 33.399537 120.51639
Military 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 23.149951 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 76.8500487 0.0000000 0.0000000 23.149951 0.00000 76.850049 279.99026
Millenial Marketing 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 100.0000000 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 100.00000 0.000000 103.67918
Prescribers 0.0000000 1.619870 3.4557235 3.0507559 0.5669546 13.5439165 1.4758819 0.0000000 4.5536357 5.2915767 0.287977 2.690785 0.0000000 0.0000000 1.0709143 2.1868251 0.5219582 0.0000000 2.1598272 2.0788337 5.7865371 6.803456 0.1529878 5.2915767 2.1958243 0.0000000 0.0000000 0.0000000 14.713823 2.2858171 0.0000000 4.0586753 0.8999280 13.0849532 0.0000000 0.1709863 6.4704824 33.846292 24.46904 35.214183 115.40677
Public Interest 1.4697150 0.601247 0.1633017 0.2078385 0.8461995 0.0000000 0.0000000 0.0000000 0.0000000 1.2692993 0.000000 1.573634 4.6763658 0.2597981 21.5261283 2.9765439 1.3138361 4.3349169 15.9516033 0.7274347 1.3954869 3.830166 11.2455463 11.0302850 0.3117577 0.0000000 1.1134204 1.2173397 5.433492 0.0000000 0.0000000 2.3307601 0.0296912 2.7835511 1.3806413 0.0000000 32.3263064 4.557601 49.94062 13.175475 123.37441
Regional Law Enforcement 0.0000000 2.017529 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.9260790 4.6303952 43.988755 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 5.3249545 0.0000000 0.0000000 1.868695 0.0000000 1.2568216 0.0000000 8.7977510 0.0000000 0.0000000 1.984455 0.0000000 0.0000000 0.0000000 0.5787994 0.7110964 26.1617331 1.7529353 0.0000000 51.562758 17.24822 31.189019 122.57318
State Civil Enforcement 0.0000000 0.000000 2.1359223 0.0000000 0.0000000 0.0000000 85.2427184 0.0000000 0.0000000 0.0000000 0.000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 3.4951456 0.0000000 3.3009709 2.427185 0.0000000 1.8446602 0.0000000 0.0000000 0.0000000 1.5533981 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 87.378641 11.06796 1.553398 151.45631
State Health Agency 0.4669921 0.077832 0.9552112 0.4033114 0.0000000 4.1746268 0.2759499 0.0000000 0.5377485 2.4835491 0.000000 2.299583 0.0000000 1.4222034 0.1273615 1.1816316 2.3420364 5.1227623 14.3140168 2.4623222 7.2525295 10.542701 4.2029293 10.8894078 0.0000000 1.8184391 2.6604401 2.8939362 10.875257 0.7075639 0.0000000 2.6745914 1.6344725 4.1038704 1.0967240 0.0000000 7.3728154 9.375221 59.26555 23.986415 131.44414
State Law Enforcement 0.6921873 2.455760 0.4574455 2.9794150 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 5.3388708 3.978572 1.203804 1.3422415 1.2579752 5.7662213 0.4815216 0.3972553 0.3551222 3.1298905 0.9028530 1.6793066 2.377513 0.4092934 3.9484772 1.1195377 4.3577706 8.5650656 0.1865896 1.011195 0.0000000 0.0000000 0.3972553 0.0000000 0.0421331 44.6310341 0.5356928 10.4490189 15.902251 26.84483 46.803900 148.36885
State Legislator 5.2945069 1.257445 1.4890801 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 4.5996029 0.000000 2.779616 0.0000000 0.0000000 2.2170748 9.3977498 9.4308405 2.8457975 0.0000000 0.4301787 12.0119126 3.706155 0.0000000 2.7134348 0.7941760 5.0959629 5.4268696 0.0000000 30.509596 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 23.8252813 12.640635 33.02449 30.509596 185.27465
SUD Providers 1.6822852 2.602461 1.1967935 1.6822852 0.6830755 0.8411426 2.3314892 4.6516879 0.0000000 2.8282714 0.022581 7.694479 0.8750141 2.5968161 1.5637349 3.9347409 12.2897144 4.5218471 2.6193971 2.5234278 13.4695721 13.452636 2.7605284 1.1290505 0.6040420 0.8637236 0.6661398 0.0000000 4.668624 0.2201648 0.0000000 0.0000000 1.8629333 2.5968161 0.3217794 0.2427459 28.9544993 18.522073 42.61036 9.913063 121.42373
Tribal Government 1.0424423 1.340283 0.0000000 5.9195830 0.0000000 0.0000000 0.8562919 0.0000000 1.0424423 5.1377513 1.935964 0.000000 5.9195830 10.7967238 0.8935220 0.0000000 0.0000000 2.8667163 1.5264334 0.0000000 0.0000000 15.934475 0.0000000 0.0000000 0.0000000 1.7870439 22.7103500 0.0000000 1.712584 0.0000000 0.0000000 1.1541325 0.0000000 1.3402829 16.0833954 0.0000000 17.6098287 17.274758 44.82502 20.290395 169.06180
Tribal Law Enforcement 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 4.0069686 0.000000 0.000000 0.0000000 9.7560976 0.0000000 0.0000000 4.8780488 0.0000000 15.6794425 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 65.6794425 0.0000000 14.6341463 4.006969 15.67944 65.679442 754.70383

Health vs. CJ

Supply vs. Demand

sum_pd_hc <- aggregate(Dissertation_Dataset$Problem_Demand, by = list(Dissertation_Dataset$HEALTH_CJ), 
                                                         FUN = sum)
sum_ps_hc <- aggregate(Dissertation_Dataset$Problem_Supply, by = list(Dissertation_Dataset$HEALTH_CJ), 
                                                         FUN = sum)
sum_ds_hc<- aggregate(Dissertation_Dataset$Solutions_Demand, by = list(Dissertation_Dataset$HEALTH_CJ), 
                                                         FUN = sum)
sum_ss_hc <- aggregate(Dissertation_Dataset$Solutions_Supply, by = list(Dissertation_Dataset$HEALTH_CJ),                                                          FUN = sum)
cd_hc <- (sum_pd_hc$x+sum_ps_hc$x+sum_ds_hc$x+sum_ss_hc$x) #total coded words per group, had to select the 2nd column titled x because the first column was the group names and it couldnt add up the group names. 

perc1 <- ((sum_pd_hc$x/cd_hc)*100)
perc2 <- ((sum_ps_hc$x/cd_hc)*100)
perc3 <- ((sum_ds_hc$x/cd_hc)*100)
perc4 <- ((sum_ss_hc$x/cd_hc)*100)

grp_names_hc <- levels(Dissertation_Dataset$HEALTH_CJ) #extracting group names for table

library(kableExtra)
kable(cbind(grp_names_hc, perc1, perc2, perc3, perc4), caption = "% of coded discourse", booktabs = T) %>%
    kable_styling(font_size=12,latex_options = c("striped","scale_down"))
% of coded discourse
grp_names_hc perc1 perc2 perc3 perc4
Health 10.5886264513563 14.9115658676194 46.5261977560613 27.973609924963
Law Enforcement 3.68892635871696 30.5152537055917 17.3045251352835 48.4912948004078
Other 20.8376768428891 7.76619508562919 58.7155621742368 12.680565897245
wc_cat_hc <-(aggregate(Count_Data, by = list(Dissertation_Dataset$HEALTH_CJ), 
                                                         FUN = sum)) #total number of words for each category by Health_CJ
wc_cat2 <-wc_cat_hc %>% select(2:42) #select numeric columns
perc_hc <-((wc_cat2/cd_hc)*100) #calc proportion then change to perc
kable(cbind(grp_names_hc, perc_hc), caption = "% of coded discourse", booktabs = T) %>%
    kable_styling(font_size=12,latex_options = c("striped","scale_down"))
% of coded discourse
grp_names_hc ps_OPR ps_OPRS_H ps_prescrip ps_Prescribers ps_Prescrib_Ed ps_vital ps_Manuf ps_FDA ps_Left ps_BadApp ps_Foreign pd_Disease pd_Quality pd_PSE pd_CJ pd_MAT pd_Access ds_PSE ds_Prevent ds_Stigma ds_MAT ds_Access ds_Quality ds_ODR ds_Samari ds_Coordinate ds_Divert2PH ss_Take ss_PDMP ss_Reg ss_New ss_Guide ss_Cautious ss_Prescrib_Ed ss_Crim_Enforce ss_Penalt Problem_Demand Problem_Supply Solutions_Demand Solutions_Supply TOTAL WORDS
Health 2.978010 1.820837 1.4518453 1.6030927 0.4798195 1.5424634 1.3142883 0.594559 0.4094112 2.693770 0.0234694 3.1846719 0.7542815 0.5332777 0.9765892 1.6650260 3.4747801 1.7732461 7.169912 1.2888631 12.3677400 10.438031 3.2146606 7.692759 0.3911572 0.6414979 1.548331 1.1056711 11.460255 0.8031762 2.253066 2.8215475 2.6520461 5.9084301 0.9289984 0.0404196 10.588627 14.911566 46.52620 27.97361 135.8795
Law Enforcement 1.574386 6.558309 0.1245000 0.7920947 0.0000000 0.0000000 0.0000000 0.000000 0.4548663 6.412242 14.5988550 0.2195906 0.2186103 1.6165399 1.3969493 0.0784252 0.1588111 0.3999686 5.369187 0.3803623 0.3411497 2.972316 0.2009646 1.693005 0.1823386 3.4369853 2.328249 2.3811858 3.401694 0.2480198 0.000000 0.0647008 0.0696024 6.3181319 35.4707474 0.5372128 3.688926 30.515254 17.30453 48.49129 123.5560
Other 1.314222 1.012658 0.1675354 1.1653016 0.0000000 0.6775875 0.0856292 0.000000 0.1303053 2.271035 0.9419211 2.0699926 0.5919583 1.4259121 11.4631422 2.7215190 2.5651526 5.2382725 30.126582 0.7073716 2.6842889 7.691735 0.8376768 5.331348 0.2457185 1.0908414 4.761728 0.5286672 5.591958 0.0000000 0.000000 1.2397617 0.0148920 0.6068503 4.6984363 0.0000000 20.837677 7.766195 58.71556 12.68057 156.2249

Is there a statistically significant difference between Health_CJ actors in Problem_Demand? (MPD)

library(sandwich)
library(msm)
attach(Dissertation_Dataset)
mpd <- glm(Problem_Demand~HEALTH_CJ+offset(log(`TOTAL WORDS`)), family=poisson)
summary(mpd)
## 
## Call:
## glm(formula = Problem_Demand ~ HEALTH_CJ + offset(log(`TOTAL WORDS`)), 
##     family = poisson)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -31.337  -15.547  -10.138    0.826   75.883  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -2.551988   0.007847 -325.24   <2e-16 ***
## HEALTH_CJLaw Enforcement -0.959371   0.018091  -53.03   <2e-16 ***
## HEALTH_CJOther            0.537455   0.015500   34.68   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 61154  on 149  degrees of freedom
## Residual deviance: 55391  on 147  degrees of freedom
## AIC: 55986
## 
## Number of Fisher Scoring iterations: 7
#robust standard errors
cov.mpd <- vcovHC(mpd, type="HC0")
std.err <- sqrt(diag(cov.mpd))
r.est <- cbind(Estimate= coef(mpd), "Robust SE" = std.err,
"Pr(>|z|)" = 2 * pnorm(abs(coef(mpd)/std.err), lower.tail=FALSE),
LL = coef(mpd) - 1.96 * std.err,
UL = coef(mpd) + 1.96 * std.err)

#chisquare
with(mpd, cbind(res.deviance = deviance, df = df.residual,
  p = pchisq(deviance, df.residual, lower.tail=FALSE))) 
##      res.deviance  df p
## [1,]     55391.49 147 0
#IRR
spd <- deltamethod(list(~ exp(x1), ~ exp(x2), ~ exp(x3)), 
                                                coef(mpd), cov.mpd)

## exponentiate old estimates dropping the p values
rexp.est <- exp(r.est[, -3])
## replace SEs with estimates for exponentiated coefficients
rexp.est[, "Robust SE"] <- spd

rexp.est
##                            Estimate  Robust SE        LL        UL
## (Intercept)              0.07792656 0.01356128 0.0554053 0.1096023
## HEALTH_CJLaw Enforcement 0.38313395 0.19599836 0.1405713 1.0442501
## HEALTH_CJOther           1.71164462 0.75342510 0.7223175 4.0560106

Is there a statistically significant difference between Health_CJ actors in Problem_Demand? (MPD)

library(sandwich)
library(msm)
mpd <- glm(Problem_Demand~HEALTH_CJ+offset(log(`TOTAL WORDS`)), family=poisson)
summary(mpd)
## 
## Call:
## glm(formula = Problem_Demand ~ HEALTH_CJ + offset(log(`TOTAL WORDS`)), 
##     family = poisson)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -31.337  -15.547  -10.138    0.826   75.883  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -2.551988   0.007847 -325.24   <2e-16 ***
## HEALTH_CJLaw Enforcement -0.959371   0.018091  -53.03   <2e-16 ***
## HEALTH_CJOther            0.537455   0.015500   34.68   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 61154  on 149  degrees of freedom
## Residual deviance: 55391  on 147  degrees of freedom
## AIC: 55986
## 
## Number of Fisher Scoring iterations: 7
#robust standard errors
cov.mpd <- vcovHC(mpd, type="HC0")
std.err <- sqrt(diag(cov.mpd))
r.est <- cbind(Estimate= coef(mpd), "Robust SE" = std.err,
"Pr(>|z|)" = 2 * pnorm(abs(coef(mpd)/std.err), lower.tail=FALSE),
LL = coef(mpd) - 1.96 * std.err,
UL = coef(mpd) + 1.96 * std.err)

#chisquare
with(mpd, cbind(res.deviance = deviance, df = df.residual,
  p = pchisq(deviance, df.residual, lower.tail=FALSE))) 
##      res.deviance  df p
## [1,]     55391.49 147 0
#IRR
spd <- deltamethod(list(~ exp(x1), ~ exp(x2), ~ exp(x3)), 
                                                coef(mpd), cov.mpd)

## exponentiate old estimates dropping the p values
rexp.est <- exp(r.est[, -3])
## replace SEs with estimates for exponentiated coefficients
rexp.est[, "Robust SE"] <- spd

rexp.est
##                            Estimate  Robust SE        LL        UL
## (Intercept)              0.07792656 0.01356128 0.0554053 0.1096023
## HEALTH_CJLaw Enforcement 0.38313395 0.19599836 0.1405713 1.0442501
## HEALTH_CJOther           1.71164462 0.75342510 0.7223175 4.0560106

Is there overdispersion?

with(Dissertation_Dataset, tapply(Problem_Demand, HEALTH_CJ, function(x) {
    sprintf("M (SD) = %1.2f (%1.2f)", mean(x), sd(x))
}))
##                     Health            Law Enforcement 
## "M (SD) = 203.03 (316.87)"  "M (SD) = 81.80 (260.19)" 
##                      Other 
## "M (SD) = 233.21 (503.91)"

Since the variance at each level of Health_CJ are higher than the mean it suggests overdispersion. Therefore, a negative binomial analysis may better suit the data.

Negative Binomial

??Not sure if I should use the log of the offset for the negative binomial model as well…

library(MASS)
summary(m1 <- glm.nb(Dissertation_Dataset$Problem_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))))
## 
## Call:
## glm.nb(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), init.theta = 0.1482280511, 
##     link = log)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.55893  -1.41633  -0.45537  -0.00702   1.84518  
## 
## Coefficients:
##                                               Estimate Std. Error z value
## (Intercept)                                    -2.4600     0.2905  -8.468
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  -0.7417     0.4810  -1.542
## Dissertation_Dataset$HEALTH_CJOther             0.2567     0.6048   0.424
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement    0.123    
## Dissertation_Dataset$HEALTH_CJOther              0.671    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.1482) family taken to be 1)
## 
##     Null deviance: 154.15  on 149  degrees of freedom
## Residual deviance: 151.24  on 147  degrees of freedom
## AIC: 1411.2
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.1482 
##           Std. Err.:  0.0182 
## 
##  2 x log-likelihood:  -1403.1660

The dispesion parameter is less than 1 indicating that dispersion is not an issue and the poisson model is the better fit.

Set others as the reference level

Dissertation_Dataset$HEALTH_CJ <- relevel(Dissertation_Dataset$HEALTH_CJ, ref="Health") #set others as reference level
msdothers <- glm(Problem_Demand~HEALTH_CJ+offset(log(`TOTAL WORDS`)), data=Dissertation_Dataset, family=poisson)
(coefmsdo <-cbind(Estimate = coef(msdothers)))
##                            Estimate
## (Intercept)              -2.5519884
## HEALTH_CJLaw Enforcement -0.9593706
## HEALTH_CJOther            0.5374547
exp(coefmsdo)
##                            Estimate
## (Intercept)              0.07792656
## HEALTH_CJLaw Enforcement 0.38313395
## HEALTH_CJOther           1.71164462

Health_CJ actors in Problem_Supply

library(sandwich)
library(msm)
mpd3 <- glm(Problem_Supply~HEALTH_CJ+offset(log(`TOTAL WORDS`)), family=poisson)
summary(mpd3)
## 
## Call:
## glm(formula = Problem_Supply ~ HEALTH_CJ + offset(log(`TOTAL WORDS`)), 
##     family = poisson)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -43.785  -21.290  -10.348    8.053   66.663  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -2.209632   0.006612 -334.18   <2e-16 ***
## HEALTH_CJLaw Enforcement  0.811164   0.008709   93.14   <2e-16 ***
## HEALTH_CJOther           -0.791884   0.022872  -34.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 88724  on 149  degrees of freedom
## Residual deviance: 75832  on 147  degrees of freedom
## AIC: 76581
## 
## Number of Fisher Scoring iterations: 6
#robust standard errors
cov.mpd3 <- vcovHC(mpd3, type="HC0")
std.err3 <- sqrt(diag(cov.mpd3))
r.est3 <- cbind(Estimate= coef(mpd3), "Robust SE" = std.err,
"Pr(>|z|)" = 2 * pnorm(abs(coef(mpd3)/std.err), lower.tail=FALSE),
LL = coef(mpd3) - 1.96 * std.err,
UL = coef(mpd3) + 1.96 * std.err)

#chisquare
with(mpd3, cbind(res.deviance = deviance, df = df.residual,
  p = pchisq(deviance, df.residual, lower.tail=FALSE))) 
##      res.deviance  df p
## [1,]     75832.09 147 0
#IRR
spd3 <- deltamethod(list(~ exp(x1), ~ exp(x2), ~ exp(x3)), 
                                                coef(mpd3), cov.mpd3)

## exponentiate old estimates dropping the p values
rexp.est3 <- exp(r.est3[, -3])
## replace SEs with estimates for exponentiated coefficients
rexp.est3[, "Robust SE"] <- spd3

rexp.est3
##                           Estimate  Robust SE         LL        UL
## (Intercept)              0.1097411 0.02091593 0.07802521 0.1543488
## HEALTH_CJLaw Enforcement 2.2505259 0.53974332 0.82571486 6.1339173
## HEALTH_CJOther           0.4529904 0.13103653 0.19116285 1.0734318

Health_CJ actors on Solutions Demand

msd2 <- glm(Solutions_Demand~HEALTH_CJ +offset(log(`TOTAL WORDS`)), family=poisson, data=Dissertation_Dataset)
summary(msd2)
## 
## Call:
## glm(formula = Solutions_Demand ~ HEALTH_CJ + offset(log(`TOTAL WORDS`)), 
##     family = poisson, data = Dissertation_Dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -67.903  -20.371   -5.575   13.398   54.310  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -1.071753   0.003743 -286.31   <2e-16 ***
## HEALTH_CJLaw Enforcement -0.893973   0.008406 -106.35   <2e-16 ***
## HEALTH_CJOther            0.093162   0.008799   10.59   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 99574  on 149  degrees of freedom
## Residual deviance: 84811  on 147  degrees of freedom
## AIC: 85876
## 
## Number of Fisher Scoring iterations: 5
(coefmsd2 <-cbind(Estimate = coef(msd2)))
##                             Estimate
## (Intercept)              -1.07175328
## HEALTH_CJLaw Enforcement -0.89397314
## HEALTH_CJOther            0.09316165
exp(coefmsd2)
##                           Estimate
## (Intercept)              0.3424077
## HEALTH_CJLaw Enforcement 0.4090274
## HEALTH_CJOther           1.0976391
summary(msd6 <- glm.nb(Solutions_Demand~HEALTH_CJ +offset(log(`TOTAL WORDS`))))
## 
## Call:
## glm.nb(formula = Solutions_Demand ~ HEALTH_CJ + offset(log(`TOTAL WORDS`)), 
##     init.theta = 0.542540841, link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0014  -0.8093  -0.2305   0.3149   1.5528  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -1.1242     0.1519  -7.403 1.33e-13 ***
## HEALTH_CJLaw Enforcement  -0.5659     0.2514  -2.251   0.0244 *  
## HEALTH_CJOther             0.0218     0.3162   0.069   0.9450    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.5425) family taken to be 1)
## 
##     Null deviance: 190.42  on 149  degrees of freedom
## Residual deviance: 185.22  on 147  degrees of freedom
## AIC: 2184.7
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.5425 
##           Std. Err.:  0.0578 
## 
##  2 x log-likelihood:  -2176.6570

Dispersion parameter is less than 1 so we don’t need to use a negative binomial model because overdispersion is not an issue.

Health_CJ actors on Solutions_Supply

msd3 <- glm(Solutions_Supply~HEALTH_CJ +offset(log(`TOTAL WORDS`)), family=poisson, data=Dissertation_Dataset)
summary(msd3)
## 
## Call:
## glm(formula = Solutions_Supply ~ HEALTH_CJ + offset(log(`TOTAL WORDS`)), 
##     family = poisson, data = Dissertation_Dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -52.652  -18.997   -4.503    8.681   46.606  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -1.580507   0.004828 -327.39   <2e-16 ***
## HEALTH_CJLaw Enforcement  0.645197   0.006597   97.80   <2e-16 ***
## HEALTH_CJOther           -0.930719   0.017802  -52.28   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 82877  on 149  degrees of freedom
## Residual deviance: 65967  on 147  degrees of freedom
## AIC: 66949
## 
## Number of Fisher Scoring iterations: 6
(coefmsd3 <-cbind(Estimate = coef(msd3)))
##                            Estimate
## (Intercept)              -1.5805073
## HEALTH_CJLaw Enforcement  0.6451971
## HEALTH_CJOther           -0.9307186
exp(coefmsd3)
##                           Estimate
## (Intercept)              0.2058706
## HEALTH_CJLaw Enforcement 1.9063627
## HEALTH_CJOther           0.3942703