library(readr)
library(ggplot2)
library(MASS)
library(kableExtra)
library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library('dplyr')      # for data manipulation

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:MASS':
## 
##     select

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library('tidyr')      # for reshaping data
library('ggplot2')    # plotting data
library('scales')     # for scale_y_continuous(label = percent)

## 
## Attaching package: 'scales'

## The following objects are masked from 'package:psych':
## 
##     alpha, rescale

## The following object is masked from 'package:readr':
## 
##     col_factor

library(tidyverse)

## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──

## ✔ tibble  1.4.2     ✔ stringr 1.3.1
## ✔ purrr   0.2.5     ✔ forcats 0.3.0

## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%()         masks ggplot2::%+%()
## ✖ scales::alpha()      masks psych::alpha(), ggplot2::alpha()
## ✖ scales::col_factor() masks readr::col_factor()
## ✖ purrr::discard()     masks scales::discard()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ dplyr::select()      masks MASS::select()

library(forcats)

Dissertation_Dataset <- read_csv("Dissertation_Dataset_2.csv")

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   CASE = col_character(),
##   SPEAKER_OR = col_character(),
##   SENATE_COM = col_character(),
##   HOUSE_COM = col_character(),
##   GROUP_TYPE = col_character(),
##   HEALTH_CJ = col_character(),
##   FED_STATE = col_character()
## )

## See spec(...) for full column specifications.

Dissertation_Dataset$FED_STATE <- as.factor(Dissertation_Dataset$FED_STATE)
Dissertation_Dataset$GROUP_TYPE <- as.factor(Dissertation_Dataset$GROUP_TYPE)
Dissertation_Dataset$HEALTH_CJ <- as.factor(Dissertation_Dataset$HEALTH_CJ)
Dissertation_Dataset$SENATE <- as.factor(Dissertation_Dataset$SENATE)
str(Dissertation_Dataset)

## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 150 obs. of  50 variables:
##  $ CASE            : chr  "Case #125 - 77 - 2015 WL 1384258" "Case #65 - 097 - 2015 WL 5999232" "Case #67 - 099 - 2015 WL 1886240" "Case #47 - 069 - 2016 WL 223739" ...
##  $ YEAR            : num  2015 2015 2015 2016 2015 ...
##  $ SPEAKER_OR      : chr  "Alcohol and Drug Abuse Institute  University of Washington" "Allegheny Health Network" "American Academy of Addiction Psychiatry" "American Academy of Pain Management" ...
##  $ SENATE_COM      : chr  NA "Finance" NA "Special Aging" ...
##  $ HOUSE_COM       : chr  "Energy and Commerce" NA "Energy and Commerce" NA ...
##  $ SENATE          : Factor w/ 2 levels "0","1": 1 2 1 2 1 1 2 2 2 2 ...
##  $ GROUP_TYPE      : Factor w/ 27 levels "Child Welfare",..: 5 18 25 18 18 25 20 27 23 23 ...
##  $ HEALTH_CJ       : Factor w/ 3 levels "Health","Law Enforcement",..: 1 1 1 1 1 1 2 2 2 2 ...
##  $ FED_STATE       : Factor w/ 5 levels "Federal","Indian Affairs",..: 5 4 4 4 4 4 5 2 5 5 ...
##  $ ps_OPR          : num  0 0 0 0 0 0 0 0 0 115 ...
##  $ ps_OPRS_H       : num  0 0 0 0 46 0 54 0 11 166 ...
##  $ ps_prescrip     : num  0 245 0 0 0 0 0 0 0 0 ...
##  $ ps_Prescribers  : num  0 0 0 0 0 0 0 0 0 26 ...
##  $ ps_Prescrib_Ed  : num  0 63 0 0 0 0 0 0 0 0 ...
##  $ ps_vital        : num  0 62 0 0 0 0 0 0 0 0 ...
##  $ ps_Manuf        : num  0 61 0 0 0 0 0 0 0 0 ...
##  $ ps_FDA          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ps_Left         : num  0 0 0 0 0 0 56 0 0 0 ...
##  $ ps_BadApp       : num  0 150 0 336 0 38 107 23 45 0 ...
##  $ ps_Foreign      : num  0 0 0 0 0 ...
##  $ pd_Disease      : num  29 0 0 0 270 199 0 0 0 0 ...
##  $ pd_Quality      : num  0 0 0 0 0 116 0 0 0 0 ...
##  $ pd_PSE          : num  0 0 0 0 0 0 0 56 0 0 ...
##  $ pd_CJ           : num  0 0 0 0 0 0 0 0 15 0 ...
##  $ pd_MAT          : num  38 0 0 0 243 581 0 0 0 0 ...
##  $ pd_Access       : num  0 0 0 0 58 27 0 28 0 0 ...
##  $ ds_PSE          : num  0 0 413 0 0 119 0 0 0 0 ...
##  $ ds_Prevent      : num  222 0 0 0 216 44 111 90 207 29 ...
##  $ ds_Stigma       : num  0 0 0 0 231 0 0 0 0 0 ...
##  $ ds_MAT          : num  22 0 447 0 457 965 0 0 0 0 ...
##  $ ds_Access       : num  0 0 550 43 689 82 0 0 0 0 ...
##  $ ds_Quality      : num  0 0 50 0 17 205 0 0 0 0 ...
##  $ ds_ODR          : num  162 0 0 0 554 0 0 0 175 31 ...
##  $ ds_Samari       : num  0 0 0 0 244 0 0 0 0 37 ...
##  $ ds_Coordinate   : num  0 0 0 0 0 0 31 0 181 0 ...
##  $ ds_Divert2PH    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ss_Take         : num  17 0 0 0 0 0 0 0 31 0 ...
##  $ ss_PDMP         : num  33 136 0 980 519 0 0 0 0 34 ...
##  $ ss_Reg          : num  0 0 0 0 40 0 0 0 0 0 ...
##  $ ss_New          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ss_Guide        : num  120 195 0 0 0 0 0 0 0 0 ...
##  $ ss_Cautious     : num  26 0 0 0 72 33 0 0 0 0 ...
##  $ ss_Prescrib_Ed  : num  12 0 0 929 488 22 0 0 0 0 ...
##  $ ss_Crim_Enforce : num  0 0 0 0 0 0 588 377 0 76 ...
##  $ ss_Penalt       : num  0 19 0 0 0 0 0 0 0 0 ...
##  $ Problem_Demand  : num  67 0 0 0 571 923 0 84 15 0 ...
##  $ Problem_Supply  : num  0 581 0 336 46 ...
##  $ Solutions_Demand: num  406 0 1460 43 2408 ...
##  $ Solutions_Supply: num  208 350 0 1909 1119 ...
##  $ TOTAL WORDS     : num  870 1386 973 2859 3381 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   CASE = col_character(),
##   ..   YEAR = col_double(),
##   ..   SPEAKER_OR = col_character(),
##   ..   SENATE_COM = col_character(),
##   ..   HOUSE_COM = col_character(),
##   ..   SENATE = col_double(),
##   ..   GROUP_TYPE = col_character(),
##   ..   HEALTH_CJ = col_character(),
##   ..   FED_STATE = col_character(),
##   ..   ps_OPR = col_double(),
##   ..   ps_OPRS_H = col_double(),
##   ..   ps_prescrip = col_double(),
##   ..   ps_Prescribers = col_double(),
##   ..   ps_Prescrib_Ed = col_double(),
##   ..   ps_vital = col_double(),
##   ..   ps_Manuf = col_double(),
##   ..   ps_FDA = col_double(),
##   ..   ps_Left = col_double(),
##   ..   ps_BadApp = col_double(),
##   ..   ps_Foreign = col_double(),
##   ..   pd_Disease = col_double(),
##   ..   pd_Quality = col_double(),
##   ..   pd_PSE = col_double(),
##   ..   pd_CJ = col_double(),
##   ..   pd_MAT = col_double(),
##   ..   pd_Access = col_double(),
##   ..   ds_PSE = col_double(),
##   ..   ds_Prevent = col_double(),
##   ..   ds_Stigma = col_double(),
##   ..   ds_MAT = col_double(),
##   ..   ds_Access = col_double(),
##   ..   ds_Quality = col_double(),
##   ..   ds_ODR = col_double(),
##   ..   ds_Samari = col_double(),
##   ..   ds_Coordinate = col_double(),
##   ..   ds_Divert2PH = col_double(),
##   ..   ss_Take = col_double(),
##   ..   ss_PDMP = col_double(),
##   ..   ss_Reg = col_double(),
##   ..   ss_New = col_double(),
##   ..   ss_Guide = col_double(),
##   ..   ss_Cautious = col_double(),
##   ..   ss_Prescrib_Ed = col_double(),
##   ..   ss_Crim_Enforce = col_double(),
##   ..   ss_Penalt = col_double(),
##   ..   Problem_Demand = col_double(),
##   ..   Problem_Supply = col_double(),
##   ..   Solutions_Demand = col_double(),
##   ..   Solutions_Supply = col_double(),
##   ..   `TOTAL WORDS` = col_double()
##   .. )

Variable Summary

text_tbl <- data.frame(
  Variables = c("HEALTH_CJ","FED_STATE","ps_OPR","ps_OPRS_H","ps_prescrip","ps_Prescribers",
      "ps_Prescrib_Ed","ps_vital","ps_Manuf","ps_FDA","ps_Left","ps_BadApp","ps_Foreign","pd_Disease",
      "pd_Quality","pd_PSE","pd_CJ","pd_MAT","pd_Access","ds_PSE","ds_Prevent","ds_Stigma","ds_MAT",
      "ds_Access","ds_Quality","ds_ODR","ds_Samari","ds_Coordinate","ds_Divert2PH","ss_Take","ss_PDMP",
      "ss_Reg","ss_New","ss_Guide","ss_Cautious","ss_Prescrib_Ed","ss_Crim_Enforce","ss_Penalt",
      "Problem_Supply","Problem_Demand","Solutions_Demand","Solutions_Supply","TOTAL WORDS"),
  Description = c(
      "Categorical Variable, which groups speaker orgs based on either health focus, criminal justice (CJ) focus, or other",
      "Categorical Variable, which groups speaker orgs based on either federal gov agency, state agency, local agency, regional group of agencies, or private entities",
      "Word count of problem definition (WCPD) blaming the characteristics of opioid prescriptions",
      "WCPD blaming opioid prescriptions for heroin use",
      "WCPD generally blaming overprescription of opioids",
      "WCPD blaming prescribers for overprescribing",
      "WCPD blaming lack of prescriber education",
      "WCPD blaming 5th vital sign of pain",
      "WCPD blaming drug manufacturers",
      "WCPD blaming FDA",
      "WCPD blaming diversion of left-over prescriptions",
      "WCPD blaming bad apples for increasing drug supply",
      "WCPD blaming foreign actors for increasing drug supply",
      "WCPD acknowleding addiction as a disease",
      "WCPD blaming poor quality of addiction treatment",
      "WCPD blaming psychological, sociological, environmental or economical (PSEE) factors",
      "WCPD blaming the mischaracterization of the problem as a CJ problem rather than a health problem",
      "WCPD blaming poor access to Medication Assisted Treatment (MAT)",
      "WCPD blaming poor access to Treatment Generally",
      "Word count of solutions (WCS) addressing PSEE factors",
      "WCS addressing demand side prevention (excluse preventioon of supply tactics)",
      "WCS addressing stigma",
      "WCS increasing access to MAT",
      "WCS increasing access to Treatment Generally",
      "WCS increasing quality of Treatment",
      "WCS increasing access to Overdose Reversal Medications",
      "WCS passage or strengthening of Good Samaritan Laws",
      "WCS coordinating between CJ and Health actors",
      "WCS that are alterntives to incarceration, like drug courts or treatment",
      "WCS involving drug take back programs",
      "WCS regarding Prescription Drug Monitoring Programs (PDMP)",
      "WCS involving rescheduling, adding black box labels, or regulation of opioids",
      "WCS funding new drugs to address pain, with the intent of decreasing the prescribing of opioids",
      "WCS promoting prescriber guidelines",
      "WCS calling for more cautious prescribing practices generally",
      "WCS of prescriber or distributor education",
      "WCS commitment to criminal enforcement",
      "WCS increasing or creating new criminal penalities",
      "Total WC of all subcategories that define the problem as an issue of drug supply",
      "Total WC of all subcategories that define the problem as the demand for drugs",
      "Total WC of all subcategories that propose soltions aimed at decreasing the demand",
      "Total WC of all subcategories that propose solutions aimed at decreasing the supply of drugs",
      "Total WC of all words in each case"
  )
)

kable(text_tbl, booktabs = T)%>% 
  kable_styling(font_size=10,latex_options = c("striped","scale_down")) %>%
  group_rows("Interest Group Type", 1, 2) %>%
  group_rows("Problem Definition - Supply", 3, 13) %>%
  group_rows("Problem Definition - Demand", 14, 19) %>%
  group_rows("Solutions - Demand", 20, 28) %>%
  group_rows("Solutions - Supply", 29, 37) %>%
  group_rows("Theme Definitions", 38, 42) %>%
  group_rows("Other", 43, 43) %>%
  column_spec(1, width = "10em") %>%
  column_spec(2, width = "40em") %>%
  row_spec(0, bold=T, color = "white", background = "black")

Variables	Description
Interest Group Type
HEALTH_CJ	Categorical Variable, which groups speaker orgs based on either health focus, criminal justice (CJ) focus, or other
FED_STATE	Categorical Variable, which groups speaker orgs based on either federal gov agency, state agency, local agency, regional group of agencies, or private entities
Problem Definition - Supply
ps_OPR	Word count of problem definition (WCPD) blaming the characteristics of opioid prescriptions
ps_OPRS_H	WCPD blaming opioid prescriptions for heroin use
ps_prescrip	WCPD generally blaming overprescription of opioids
ps_Prescribers	WCPD blaming prescribers for overprescribing
ps_Prescrib_Ed	WCPD blaming lack of prescriber education
ps_vital	WCPD blaming 5th vital sign of pain
ps_Manuf	WCPD blaming drug manufacturers
ps_FDA	WCPD blaming FDA
ps_Left	WCPD blaming diversion of left-over prescriptions
ps_BadApp	WCPD blaming bad apples for increasing drug supply
ps_Foreign	WCPD blaming foreign actors for increasing drug supply
Problem Definition - Demand
pd_Disease	WCPD acknowleding addiction as a disease
pd_Quality	WCPD blaming poor quality of addiction treatment
pd_PSE	WCPD blaming psychological, sociological, environmental or economical (PSEE) factors
pd_CJ	WCPD blaming the mischaracterization of the problem as a CJ problem rather than a health problem
pd_MAT	WCPD blaming poor access to Medication Assisted Treatment (MAT)
pd_Access	WCPD blaming poor access to Treatment Generally
Solutions - Demand
ds_PSE	Word count of solutions (WCS) addressing PSEE factors
ds_Prevent	WCS addressing demand side prevention (excluse preventioon of supply tactics)
ds_Stigma	WCS addressing stigma
ds_MAT	WCS increasing access to MAT
ds_Access	WCS increasing access to Treatment Generally
ds_Quality	WCS increasing quality of Treatment
ds_ODR	WCS increasing access to Overdose Reversal Medications
ds_Samari	WCS passage or strengthening of Good Samaritan Laws
ds_Coordinate	WCS coordinating between CJ and Health actors
Solutions - Supply
ds_Divert2PH	WCS that are alterntives to incarceration, like drug courts or treatment
ss_Take	WCS involving drug take back programs
ss_PDMP	WCS regarding Prescription Drug Monitoring Programs (PDMP)
ss_Reg	WCS involving rescheduling, adding black box labels, or regulation of opioids
ss_New	WCS funding new drugs to address pain, with the intent of decreasing the prescribing of opioids
ss_Guide	WCS promoting prescriber guidelines
ss_Cautious	WCS calling for more cautious prescribing practices generally
ss_Prescrib_Ed	WCS of prescriber or distributor education
ss_Crim_Enforce	WCS commitment to criminal enforcement
Theme Definitions
ss_Penalt	WCS increasing or creating new criminal penalities
Problem_Supply	Total WC of all subcategories that define the problem as an issue of drug supply
Problem_Demand	Total WC of all subcategories that define the problem as the demand for drugs
Solutions_Demand	Total WC of all subcategories that propose soltions aimed at decreasing the demand
Solutions_Supply	Total WC of all subcategories that propose solutions aimed at decreasing the supply of drugs
Other
TOTAL WORDS	Total WC of all words in each case

Supply vs. Demand Descriptive Statistics

sum_PD <- sum(Dissertation_Dataset$Problem_Demand)
sum_PS <- sum(Dissertation_Dataset$Problem_Supply)
sum_SD <- sum(Dissertation_Dataset$Solutions_Demand)
sum_SS <- sum(Dissertation_Dataset$Solutions_Supply)
total_words_coded = sum_PD +sum_PS + sum_SD +sum_SS
sum_PD

## [1] 25602

sum_PS

## [1] 56087

sum_SD

## [1] 104790

sum_SS

## [1] 95780

sum_total_words <- sum(Dissertation_Dataset$`TOTAL WORDS`)
sum_total_words

## [1] 376426

total_words_coded

## [1] 282259

total_words_coded/sum_total_words

## [1] 0.7498393

All of the hearing testimony amounted to 376426. 246096 of those words, or 65%, of those words were coded.

The least time was spent discussing demand-side causes of the problem (wc = 23921). Supply side causes were discussed at 2x the rate of demand side causes (47620 v. 23921). However, the amount of time spend discussing demand-side and supply-side problems was far out-shadowed by the time spent discussing the proposed solutions to the problem. Despite the preference for discussing supply side soluions, the time spent discussing supply side solutions was close to that spent supporting demand-side solutions (87640 v. 86915).

sum_PD + sum_PS

## [1] 81689

sum_SD + sum_SS

## [1] 200570

Overeall, more time was spent discussing the solution than the problem. Nearly double the time was spent discussing solutions vs. problems (174555 v. 71541).

If we are interested in changing these to percentages or proportions…

sum_PD/total_words_coded

## [1] 0.09070393

sum_PS/total_words_coded

## [1] 0.1987076

sum_SD/total_words_coded

## [1] 0.3712548

sum_SS/total_words_coded

## [1] 0.3393337

(sum_PD + sum_PS)/total_words_coded

## [1] 0.2894115

(sum_SD + sum_SS)/total_words_coded

## [1] 0.7105885

(sum_PS + sum_SS)/total_words_coded

## [1] 0.5380413

Percentages of Total Discourse Each Category Represents

Creating a Loop to give me the total for each variable in the dataset.

library(tidyverse)
Count_Data <- Dissertation_Dataset[10:50] #create dataset of counts only
output <- vector("double", ncol(Count_Data))  
for (i in seq_along(Count_Data)) {
  output[[i]] <- (sum(Count_Data[[i]]))}
output

##  [1]   6527   9755   2399   3580    736   2548   2039    912   1127  11283
## [11]  15181   5665   1539   2850   6002   3365   6181   4535  24567   2555
## [21]  20040  21109   5361  14959    852   4783   6029   4267  22551   1485
## [31]   3456   4727   4143  15671  38870    610  25602  56087 104790  95780
## [41] 376426

cd <- ((output/total_words_coded)*100)

var <-names(Count_Data)

kable(cbind(var, cd), caption = "% of coded discourse", booktabs = T) %>%
    kable_styling(font_size=12,latex_options = c("striped","scale_down")) %>%
    row_spec(0, bold=T, color = "white", background = "black") %>%
    column_spec(1, bold=T)

% of coded discourse
var	cd
ps_OPR	2.31241519313822
ps_OPRS_H	3.45604568853429
ps_prescrip	0.849928611665173
ps_Prescribers	1.26833865350618
ps_Prescrib_Ed	0.260753421502946
ps_vital	0.902717008137916
ps_Manuf	0.722386177234384
ps_FDA	0.323107500557998
ps_Left	0.399278676676386
ps_BadApp	3.99739246578497
ps_Foreign	5.37839360303834
pd_Disease	2.0070219195845
pd_Quality	0.545243907191622
pd_PSE	1.00971093924374
pd_CJ	2.12641580959332
pd_MAT	1.19216747738779
pd_Access	2.18983274226863
ds_PSE	1.60668038928785
ds_Prevent	8.70370829628108
ds_Stigma	0.905196999918515
ds_MAT	7.09986218331391
ds_Access	7.47859235666533
ds_Quality	1.8993194193985
ds_ODR	5.29974243513936
ds_Samari	0.301850428152867
ds_Coordinate	1.69454295522906
ds_Divert2PH	2.13598149217563
ss_Take	1.51173213254493
ss_PDMP	7.98947066346866
ss_Reg	0.526112542027004
ss_New	1.22440737053557
ss_Guide	1.67470302098427
ss_Cautious	1.46780084957433
ss_Prescrib_Ed	5.55199302768025
ss_Crim_Enforce	13.7710400731243
ss_Penalt	0.21611356945217
Problem_Demand	9.07039279526959
Problem_Supply	19.8707569997768
Solutions_Demand	37.1254769555621
Solutions_Supply	33.9333732493915
TOTAL WORDS	133.361912286234

Group Types

levels(Dissertation_Dataset$GROUP_TYPE)

##  [1] "Child Welfare"            "Coalition"               
##  [3] "Distributors"             "Drug Court"              
##  [5] "Expert"                   "Family"                  
##  [7] "Federal Health Agency"    "Federal LE Agency"       
##  [9] "Governor"                 "Hospital System"         
## [11] "Housing Provider"         "Local Coalition"         
## [13] "Local Government"         "Local Health Agency"     
## [15] "Local Law Enforcement"    "Military"                
## [17] "Millenial Marketing"      "Prescribers"             
## [19] "Public Interest"          "Regional Law Enforcement"
## [21] "State Civil Enforcement"  "State Health Agency"     
## [23] "State Law Enforcement"    "State Legislator"        
## [25] "SUD Providers"            "Tribal Government"       
## [27] "Tribal Law Enforcement"

table(Dissertation_Dataset$GROUP_TYPE)

## 
##            Child Welfare                Coalition             Distributors 
##                        1                        1                        1 
##               Drug Court                   Expert                   Family 
##                        1                       10                        6 
##    Federal Health Agency        Federal LE Agency                 Governor 
##                       30                       18                        2 
##          Hospital System         Housing Provider          Local Coalition 
##                        5                        1                        1 
##         Local Government      Local Health Agency    Local Law Enforcement 
##                        2                        3                       10 
##                 Military      Millenial Marketing              Prescribers 
##                        2                        1                        4 
##          Public Interest Regional Law Enforcement  State Civil Enforcement 
##                        8                        3                        1 
##      State Health Agency    State Law Enforcement         State Legislator 
##                        8                       11                        4 
##            SUD Providers        Tribal Government   Tribal Law Enforcement 
##                       13                        2                        1

table(Dissertation_Dataset$GROUP_TYPE, Dissertation_Dataset$HEALTH_CJ)

##                           
##                            Health Law Enforcement Other
##   Child Welfare                 0               0     1
##   Coalition                     1               0     0
##   Distributors                  1               0     0
##   Drug Court                    0               1     0
##   Expert                       10               0     0
##   Family                        0               0     6
##   Federal Health Agency        30               0     0
##   Federal LE Agency             0              18     0
##   Governor                      0               0     2
##   Hospital System               5               0     0
##   Housing Provider              0               0     1
##   Local Coalition               1               0     0
##   Local Government              0               0     2
##   Local Health Agency           3               0     0
##   Local Law Enforcement         0              10     0
##   Military                      0               2     0
##   Millenial Marketing           0               0     1
##   Prescribers                   4               0     0
##   Public Interest               3               0     5
##   Regional Law Enforcement      0               3     0
##   State Civil Enforcement       1               0     0
##   State Health Agency           8               0     0
##   State Law Enforcement         0              11     0
##   State Legislator              0               0     4
##   SUD Providers                13               0     0
##   Tribal Government             0               0     2
##   Tribal Law Enforcement        0               1     0

library(tidyverse)

Count_Data <- Dissertation_Dataset[10:50] #create dataset of counts only

##cd_grp <- tapply(Dissertation_Dataset$'Total Words', Dissertation_Dataset$GROUP_TYPE,FUN=mean) ##doesnt work

###tapply(total_words_coded, Dissertation_Dataset$GROUP_TYPE,FUN=sum) ###DIDNT WORK

TWC supply v. demand by group

sum_pd_gp <- aggregate(Dissertation_Dataset$Problem_Demand, by = list(Dissertation_Dataset$GROUP_TYPE), 
                                                         FUN = sum) ##YAY this works~
sum_ps_gp <- aggregate(Dissertation_Dataset$Problem_Supply, by = list(Dissertation_Dataset$GROUP_TYPE), 
                                                         FUN = sum)
sum_ds_gp<- aggregate(Dissertation_Dataset$Solutions_Demand, by = list(Dissertation_Dataset$GROUP_TYPE), 
                                                         FUN = sum)
sum_ss_gp <- aggregate(Dissertation_Dataset$Solutions_Supply, by = list(Dissertation_Dataset$GROUP_TYPE),                                                          FUN = sum)
cd_group <- (sum_pd_gp$x+sum_ps_gp$x+sum_ds_gp$x+sum_ss_gp$x) #total coded words per group, had to select the 2nd column titled x because the first column was the group names and it couldnt add up the group names. 
grp_names <- levels(Dissertation_Dataset$GROUP_TYPE) #extracting group names for table

library(kableExtra)
kable(cbind(grp_names, sum_pd_gp$x, sum_ps_gp$x, sum_ds_gp$x, sum_ss_gp$x, cd_group), caption = "% of coded discourse", booktabs = T) %>%
    kable_styling(font_size=12,latex_options = c("striped","scale_down"))

% of coded discourse
grp_names					cd_group
Child Welfare	86	0	513	0	599
Coalition	207	35	940	724	1906
Distributors	0	119	0	0	119
Drug Court	1384	171	393	36	1984
Expert	2216	1008	3736	3279	10239
Family	563	609	4177	476	5825
Federal Health Agency	3027	11610	34187	25332	74156
Federal LE Agency	225	18532	7631	31188	57576
Governor	22	124	1122	444	1712
Hospital System	689	329	1844	1073	3935
Housing Provider	310	0	374	0	684
Local Coalition	57	0	640	936	1633
Local Government	95	220	263	289	867
Local Health Agency	2129	178	8937	1445	12689
Local Law Enforcement	334	5691	4035	5045	15105
Military	0	951	0	3157	4108
Millenial Marketing	0	0	2718	0	2718
Prescribers	719	3761	2719	3913	11112
Public Interest	4355	614	6728	1775	13472
Regional Law Enforcement	0	3118	1043	1886	6047
State Civil Enforcement	0	900	114	16	1030
State Health Agency	1042	1325	8376	3390	14133
State Law Enforcement	1736	2642	4460	7776	16614
State Legislator	720	382	998	922	3022
SUD Providers	5129	3281	7548	1756	17714
Tribal Government	473	464	1204	545	2686
Tribal Law Enforcement	84	23	90	377	574

I spot checked and it seems that all the columns add up, becuase before they were not adding up. Now, I need to calculate the percentage of the groups coded word count that was represented by each category.

percA <- ((sum_pd_gp$x/cd_group)*100) 
percB <- ((sum_ps_gp$x/cd_group)*100)
percC <- ((sum_ds_gp$x/cd_group)*100)
percD <- ((sum_ss_gp$x/cd_group)*100)

library(kableExtra)
kable(cbind(grp_names, percA,percB,percC, percD), caption = "% of coded discourse", booktabs = T) %>%
    kable_styling(font_size=12,latex_options = c("striped","scale_down"))

% of coded discourse
grp_names	percA	percB	percC	percD
Child Welfare	14.3572621035058	0	85.6427378964942	0
Coalition	10.8604407135362	1.83630640083945	49.3179433368311	37.9853095487933
Distributors	0	100	0	0
Drug Court	69.758064516129	8.61895161290323	19.8084677419355	1.81451612903226
Expert	21.6427385486864	9.84471139759742	36.4879382752222	32.024611778494
Family	9.66523605150215	10.4549356223176	71.7081545064378	8.17167381974249
Federal Health Agency	4.08193537947031	15.6561842602082	46.1014617832677	34.1604185770538
Federal LE Agency	0.390787828261776	32.1870223704321	13.2537862998472	54.1684035014589
Governor	1.28504672897196	7.24299065420561	65.5373831775701	25.9345794392523
Hospital System	17.5095298602287	8.36086404066074	46.861499364676	27.2681067344346
Housing Provider	45.3216374269006	0	54.6783625730994	0
Local Coalition	3.49050826699326	0	39.1916717697489	57.3178199632578
Local Government	10.957324106113	25.3748558246828	30.3344867358708	33.3333333333333
Local Health Agency	16.7783119237135	1.40278981795256	70.4310820395618	11.3878162187722
Local Law Enforcement	2.21118834822906	37.6762661370407	26.7130089374379	33.3995365772923
Military	0	23.1499513145083	0	76.8500486854917
Millenial Marketing	0	0	100	0
Prescribers	6.47048236141109	33.8462922966163	24.4690424766019	35.2141828653708
Public Interest	32.3263064133017	4.55760095011876	49.9406175771972	13.1754750593824
Regional Law Enforcement	0	51.5627583925914	17.2482222589714	31.1890193484372
State Civil Enforcement	0	87.378640776699	11.0679611650485	1.55339805825243
State Health Agency	7.37281539658954	9.37522111370551	59.2655487157716	23.9864147739333
State Law Enforcement	10.4490188997231	15.9022511135187	26.8448296617311	46.8039003250271
State Legislator	23.8252812706817	12.6406353408339	33.0244870946393	30.5095962938451
SUD Providers	28.9544992661172	18.5220729366603	42.6103646833013	9.91306311392119
Tribal Government	17.6098287416232	17.2747580044676	44.825018615041	20.2903946388682
Tribal Law Enforcement	14.6341463414634	4.00696864111498	15.6794425087108	65.6794425087108

Now that we are done looking at the breakdown of group type for the themes, we can look at the individual narratives by group type.

wc_cat <-(aggregate(Count_Data, by = list(Dissertation_Dataset$GROUP_TYPE), 
                                                         FUN = sum)) #total number of words for each category by group type
wc_cat2 <-wc_cat %>% select(2:42) #select numeric columns
perc_grp <-((wc_cat2/cd_group)*100) #calc proportion then change to perc
kable(cbind(grp_names, perc_grp), caption = "% of coded discourse", booktabs = T) %>%
    kable_styling(font_size=12,latex_options = c("striped","scale_down"))

% of coded discourse
grp_names	ps_OPR	ps_OPRS_H	ps_prescrip	ps_Prescribers	ps_Prescrib_Ed	ps_vital	ps_Manuf	ps_FDA	ps_Left	ps_BadApp	ps_Foreign	pd_Disease	pd_Quality	pd_PSE	pd_CJ	pd_MAT	pd_Access	ds_PSE	ds_Prevent	ds_Stigma	ds_MAT	ds_Access	ds_Quality	ds_ODR	ds_Samari	ds_Coordinate	ds_Divert2PH	ss_Take	ss_PDMP	ss_Reg	ss_New	ss_Guide	ss_Cautious	ss_Prescrib_Ed	ss_Crim_Enforce	ss_Penalt	Problem_Demand	Problem_Supply	Solutions_Demand	Solutions_Supply	TOTAL WORDS
Child Welfare	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	4.507512	0.0000000	0.0000000	0.0000000	0.0000000	9.8497496	3.5058431	0.0000000	0.0000000	0.0000000	44.240401	0.0000000	0.0000000	0.0000000	0.0000000	37.8964942	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	14.3572621	0.000000	85.64274	0.000000	180.30050
Coalition	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	1.8363064	0.000000	6.925498	0.7345226	0.0000000	0.0000000	0.0000000	3.2004197	3.3578174	0.0000000	0.0000000	6.4533054	11.122770	7.8174187	10.5456453	10.0209864	0.0000000	0.0000000	7.1353620	14.690451	0.0000000	0.0000000	0.0000000	0.0000000	16.1594963	0.0000000	0.0000000	10.8604407	1.836306	49.31794	37.985309	120.77650
Distributors	0.0000000	100.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	100.000000	0.00000	0.000000	2894.95798
Drug Court	4.6875000	3.931452	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	0.000000	0.0000000	69.7580645	0.0000000	0.0000000	0.0000000	0.0000000	1.4616935	11.9959677	1.9657258	4.385081	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	1.8145161	0.0000000	0.0000000	0.0000000	69.7580645	8.618952	19.80847	1.814516	112.65121
Expert	2.1388808	1.054790	0.3320637	1.5821858	1.9142494	0.0000000	0.1367321	0.8594589	0.4492626	1.3770876	0.000000	9.444282	2.7248755	0.2246313	4.1019631	4.1605626	0.9864245	0.0000000	11.5440961	0.1953316	15.8218576	4.453560	1.2501221	3.2229710	0.0000000	0.0000000	0.0000000	1.5528860	19.601524	0.0000000	0.0683661	3.4769020	4.9125891	2.3830452	0.0292997	0.0000000	21.6427385	9.844711	36.48794	32.024612	204.96142
Family	2.2317597	2.008584	0.0000000	2.6437768	0.0000000	3.1244635	0.0000000	0.0000000	0.1201717	0.3261803	0.000000	5.493562	0.0000000	0.0000000	1.5107296	0.0000000	2.6609442	7.8798283	49.1845494	1.8712446	0.0000000	5.785408	2.8154506	3.5193133	0.0000000	0.6523605	0.0000000	0.0000000	2.472103	0.0000000	0.0000000	3.7081545	0.0000000	1.1845494	0.8068670	0.0000000	9.6652361	10.454936	71.70815	8.171674	215.75966
Federal Health Agency	5.1539997	2.520363	1.7571066	1.9593829	0.2966719	0.0000000	0.6850423	0.0000000	0.0000000	3.2836183	0.000000	1.011381	0.1065322	0.0000000	0.0000000	0.8657425	2.0982793	0.1860942	7.8914720	0.0647284	16.0135390	9.430120	2.1630077	8.0195803	0.0000000	0.5825557	1.7503641	0.5124332	13.181671	0.9277739	4.6186418	2.7509574	3.6962619	6.8409839	1.6316953	0.0000000	4.0819354	15.656184	46.10146	34.160419	133.49965
Federal LE Agency	2.1015701	10.007642	0.0885786	0.3056829	0.0000000	0.0000000	0.0000000	0.0000000	0.7086286	5.7385022	13.236418	0.041684	0.0000000	0.0000000	0.2674726	0.0000000	0.0816312	0.3577880	6.8778658	0.0000000	0.0000000	3.022092	0.2379464	0.3925247	0.0000000	2.2127275	0.1528415	3.9443518	4.779769	0.4394192	0.0000000	0.0000000	0.0000000	10.7423232	34.0176462	0.2448937	0.3907878	32.187022	13.25379	54.168403	100.21884
Governor	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	7.242991	1.285047	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	17.9906542	1.2850467	10.2219626	24.591121	0.0000000	3.9719626	0.0000000	0.0000000	7.4766355	3.9719626	3.971963	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	17.9906542	0.0000000	1.2850467	7.242991	65.53738	25.934579	105.43224
Hospital System	0.0000000	1.143583	2.0584498	0.0000000	0.0000000	3.1003812	0.0000000	0.0000000	0.0000000	2.0584498	0.000000	4.320203	0.0000000	0.0000000	1.8805591	4.2439644	7.0648030	0.0000000	0.8132147	8.0559085	4.5743329	27.598475	5.8195680	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	13.036849	0.0000000	0.0000000	9.5806861	3.1766201	1.4739517	0.0000000	0.0000000	17.5095299	8.360864	46.86150	27.268107	184.75222
Housing Provider	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	0.000000	0.0000000	13.5964912	0.0000000	6.7251462	25.0000000	35.9649123	0.0000000	6.7251462	3.3625731	4.970760	3.6549708	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	45.3216374	0.000000	54.67836	0.000000	157.01754
Local Coalition	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	3.4905083	0.0000000	11.8799755	0.0000000	4.5927740	4.715248	0.5511329	12.4311084	0.0000000	5.0214329	0.0000000	18.8609920	23.270055	0.0000000	0.0000000	0.0000000	0.0000000	15.1867728	0.0000000	0.0000000	3.4905083	0.000000	39.19167	57.317820	203.79669
Local Government	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	16.4936563	8.881199	8.765859	0.0000000	0.0000000	0.0000000	0.0000000	2.1914648	0.0000000	12.9181084	0.0000000	0.0000000	5.420992	0.0000000	5.8823529	0.0000000	6.1130334	0.0000000	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	33.3333333	0.0000000	10.9573241	25.374856	30.33449	33.333333	169.66551
Local Health Agency	0.0000000	0.000000	0.2679486	0.9614627	0.1733785	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	5.469304	0.0000000	0.7802033	4.6496966	1.6707384	4.2083695	7.3055402	6.8878556	3.6882339	7.7389865	19.221373	1.8362361	18.2283868	0.4570888	0.4728505	4.5945307	1.5604067	1.694381	1.1900071	0.1891402	3.9246592	0.2994720	2.5297502	0.0000000	0.0000000	16.7783119	1.402790	70.43108	11.387816	94.67255
Local Law Enforcement	1.2446210	2.118504	0.0000000	0.9069844	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	13.5518040	19.854353	0.000000	0.0000000	0.0000000	2.0721615	0.0000000	0.1390268	0.9467064	3.6809004	0.0000000	0.1986097	4.614366	0.0000000	5.0910295	0.0000000	6.4614366	5.7199603	0.8407812	2.846739	0.0000000	0.0000000	0.0000000	0.0000000	1.3902681	26.9182390	1.4035088	2.2111883	37.676266	26.71301	33.399537	120.51639
Military	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	23.149951	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	76.8500487	0.0000000	0.0000000	23.149951	0.00000	76.850049	279.99026
Millenial Marketing	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	100.0000000	0.0000000	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	100.00000	0.000000	103.67918
Prescribers	0.0000000	1.619870	3.4557235	3.0507559	0.5669546	13.5439165	1.4758819	0.0000000	4.5536357	5.2915767	0.287977	2.690785	0.0000000	0.0000000	1.0709143	2.1868251	0.5219582	0.0000000	2.1598272	2.0788337	5.7865371	6.803456	0.1529878	5.2915767	2.1958243	0.0000000	0.0000000	0.0000000	14.713823	2.2858171	0.0000000	4.0586753	0.8999280	13.0849532	0.0000000	0.1709863	6.4704824	33.846292	24.46904	35.214183	115.40677
Public Interest	1.4697150	0.601247	0.1633017	0.2078385	0.8461995	0.0000000	0.0000000	0.0000000	0.0000000	1.2692993	0.000000	1.573634	4.6763658	0.2597981	21.5261283	2.9765439	1.3138361	4.3349169	15.9516033	0.7274347	1.3954869	3.830166	11.2455463	11.0302850	0.3117577	0.0000000	1.1134204	1.2173397	5.433492	0.0000000	0.0000000	2.3307601	0.0296912	2.7835511	1.3806413	0.0000000	32.3263064	4.557601	49.94062	13.175475	123.37441
Regional Law Enforcement	0.0000000	2.017529	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.9260790	4.6303952	43.988755	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	5.3249545	0.0000000	0.0000000	1.868695	0.0000000	1.2568216	0.0000000	8.7977510	0.0000000	0.0000000	1.984455	0.0000000	0.0000000	0.0000000	0.5787994	0.7110964	26.1617331	1.7529353	0.0000000	51.562758	17.24822	31.189019	122.57318
State Civil Enforcement	0.0000000	0.000000	2.1359223	0.0000000	0.0000000	0.0000000	85.2427184	0.0000000	0.0000000	0.0000000	0.000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	3.4951456	0.0000000	3.3009709	2.427185	0.0000000	1.8446602	0.0000000	0.0000000	0.0000000	1.5533981	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	87.378641	11.06796	1.553398	151.45631
State Health Agency	0.4669921	0.077832	0.9552112	0.4033114	0.0000000	4.1746268	0.2759499	0.0000000	0.5377485	2.4835491	0.000000	2.299583	0.0000000	1.4222034	0.1273615	1.1816316	2.3420364	5.1227623	14.3140168	2.4623222	7.2525295	10.542701	4.2029293	10.8894078	0.0000000	1.8184391	2.6604401	2.8939362	10.875257	0.7075639	0.0000000	2.6745914	1.6344725	4.1038704	1.0967240	0.0000000	7.3728154	9.375221	59.26555	23.986415	131.44414
State Law Enforcement	0.6921873	2.455760	0.4574455	2.9794150	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	5.3388708	3.978572	1.203804	1.3422415	1.2579752	5.7662213	0.4815216	0.3972553	0.3551222	3.1298905	0.9028530	1.6793066	2.377513	0.4092934	3.9484772	1.1195377	4.3577706	8.5650656	0.1865896	1.011195	0.0000000	0.0000000	0.3972553	0.0000000	0.0421331	44.6310341	0.5356928	10.4490189	15.902251	26.84483	46.803900	148.36885
State Legislator	5.2945069	1.257445	1.4890801	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	4.5996029	0.000000	2.779616	0.0000000	0.0000000	2.2170748	9.3977498	9.4308405	2.8457975	0.0000000	0.4301787	12.0119126	3.706155	0.0000000	2.7134348	0.7941760	5.0959629	5.4268696	0.0000000	30.509596	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	23.8252813	12.640635	33.02449	30.509596	185.27465
SUD Providers	1.6822852	2.602461	1.1967935	1.6822852	0.6830755	0.8411426	2.3314892	4.6516879	0.0000000	2.8282714	0.022581	7.694479	0.8750141	2.5968161	1.5637349	3.9347409	12.2897144	4.5218471	2.6193971	2.5234278	13.4695721	13.452636	2.7605284	1.1290505	0.6040420	0.8637236	0.6661398	0.0000000	4.668624	0.2201648	0.0000000	0.0000000	1.8629333	2.5968161	0.3217794	0.2427459	28.9544993	18.522073	42.61036	9.913063	121.42373
Tribal Government	1.0424423	1.340283	0.0000000	5.9195830	0.0000000	0.0000000	0.8562919	0.0000000	1.0424423	5.1377513	1.935964	0.000000	5.9195830	10.7967238	0.8935220	0.0000000	0.0000000	2.8667163	1.5264334	0.0000000	0.0000000	15.934475	0.0000000	0.0000000	0.0000000	1.7870439	22.7103500	0.0000000	1.712584	0.0000000	0.0000000	1.1541325	0.0000000	1.3402829	16.0833954	0.0000000	17.6098287	17.274758	44.82502	20.290395	169.06180
Tribal Law Enforcement	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	4.0069686	0.000000	0.000000	0.0000000	9.7560976	0.0000000	0.0000000	4.8780488	0.0000000	15.6794425	0.0000000	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	65.6794425	0.0000000	14.6341463	4.006969	15.67944	65.679442	754.70383

Health vs. CJ

Supply vs. Demand

sum_pd_hc <- aggregate(Dissertation_Dataset$Problem_Demand, by = list(Dissertation_Dataset$HEALTH_CJ), 
                                                         FUN = sum)
sum_ps_hc <- aggregate(Dissertation_Dataset$Problem_Supply, by = list(Dissertation_Dataset$HEALTH_CJ), 
                                                         FUN = sum)
sum_ds_hc<- aggregate(Dissertation_Dataset$Solutions_Demand, by = list(Dissertation_Dataset$HEALTH_CJ), 
                                                         FUN = sum)
sum_ss_hc <- aggregate(Dissertation_Dataset$Solutions_Supply, by = list(Dissertation_Dataset$HEALTH_CJ),                                                          FUN = sum)
cd_hc <- (sum_pd_hc$x+sum_ps_hc$x+sum_ds_hc$x+sum_ss_hc$x) #total coded words per group, had to select the 2nd column titled x because the first column was the group names and it couldnt add up the group names. 

perc1 <- ((sum_pd_hc$x/cd_hc)*100)
perc2 <- ((sum_ps_hc$x/cd_hc)*100)
perc3 <- ((sum_ds_hc$x/cd_hc)*100)
perc4 <- ((sum_ss_hc$x/cd_hc)*100)

grp_names_hc <- levels(Dissertation_Dataset$HEALTH_CJ) #extracting group names for table

library(kableExtra)
kable(cbind(grp_names_hc, perc1, perc2, perc3, perc4), caption = "% of coded discourse", booktabs = T) %>%
    kable_styling(font_size=12,latex_options = c("striped","scale_down"))

% of coded discourse
grp_names_hc	perc1	perc2	perc3	perc4
Health	10.5886264513563	14.9115658676194	46.5261977560613	27.973609924963
Law Enforcement	3.68892635871696	30.5152537055917	17.3045251352835	48.4912948004078
Other	20.8376768428891	7.76619508562919	58.7155621742368	12.680565897245

wc_cat_hc <-(aggregate(Count_Data, by = list(Dissertation_Dataset$HEALTH_CJ), 
                                                         FUN = sum)) #total number of words for each category by Health_CJ
wc_cat2 <-wc_cat_hc %>% select(2:42) #select numeric columns
perc_hc <-((wc_cat2/cd_hc)*100) #calc proportion then change to perc
kable(cbind(grp_names_hc, perc_hc), caption = "% of coded discourse", booktabs = T) %>%
    kable_styling(font_size=12,latex_options = c("striped","scale_down"))

% of coded discourse
grp_names_hc	ps_OPR	ps_OPRS_H	ps_prescrip	ps_Prescribers	ps_Prescrib_Ed	ps_vital	ps_Manuf	ps_FDA	ps_Left	ps_BadApp	ps_Foreign	pd_Disease	pd_Quality	pd_PSE	pd_CJ	pd_MAT	pd_Access	ds_PSE	ds_Prevent	ds_Stigma	ds_MAT	ds_Access	ds_Quality	ds_ODR	ds_Samari	ds_Coordinate	ds_Divert2PH	ss_Take	ss_PDMP	ss_Reg	ss_New	ss_Guide	ss_Cautious	ss_Prescrib_Ed	ss_Crim_Enforce	ss_Penalt	Problem_Demand	Problem_Supply	Solutions_Demand	Solutions_Supply	TOTAL WORDS
Health	2.978010	1.820837	1.4518453	1.6030927	0.4798195	1.5424634	1.3142883	0.594559	0.4094112	2.693770	0.0234694	3.1846719	0.7542815	0.5332777	0.9765892	1.6650260	3.4747801	1.7732461	7.169912	1.2888631	12.3677400	10.438031	3.2146606	7.692759	0.3911572	0.6414979	1.548331	1.1056711	11.460255	0.8031762	2.253066	2.8215475	2.6520461	5.9084301	0.9289984	0.0404196	10.588627	14.911566	46.52620	27.97361	135.8795
Law Enforcement	1.574386	6.558309	0.1245000	0.7920947	0.0000000	0.0000000	0.0000000	0.000000	0.4548663	6.412242	14.5988550	0.2195906	0.2186103	1.6165399	1.3969493	0.0784252	0.1588111	0.3999686	5.369187	0.3803623	0.3411497	2.972316	0.2009646	1.693005	0.1823386	3.4369853	2.328249	2.3811858	3.401694	0.2480198	0.000000	0.0647008	0.0696024	6.3181319	35.4707474	0.5372128	3.688926	30.515254	17.30453	48.49129	123.5560
Other	1.314222	1.012658	0.1675354	1.1653016	0.0000000	0.6775875	0.0856292	0.000000	0.1303053	2.271035	0.9419211	2.0699926	0.5919583	1.4259121	11.4631422	2.7215190	2.5651526	5.2382725	30.126582	0.7073716	2.6842889	7.691735	0.8376768	5.331348	0.2457185	1.0908414	4.761728	0.5286672	5.591958	0.0000000	0.000000	1.2397617	0.0148920	0.6068503	4.6984363	0.0000000	20.837677	7.766195	58.71556	12.68057	156.2249

Is there a statistically significant difference between Health_CJ actors in Problem_Demand? (MPD)

library(sandwich)
library(msm)

attach(Dissertation_Dataset)
mpd <- glm(Problem_Demand~HEALTH_CJ+offset(log(`TOTAL WORDS`)), family=poisson)
summary(mpd)

## 
## Call:
## glm(formula = Problem_Demand ~ HEALTH_CJ + offset(log(`TOTAL WORDS`)), 
##     family = poisson)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -31.337  -15.547  -10.138    0.826   75.883  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -2.551988   0.007847 -325.24   <2e-16 ***
## HEALTH_CJLaw Enforcement -0.959371   0.018091  -53.03   <2e-16 ***
## HEALTH_CJOther            0.537455   0.015500   34.68   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 61154  on 149  degrees of freedom
## Residual deviance: 55391  on 147  degrees of freedom
## AIC: 55986
## 
## Number of Fisher Scoring iterations: 7

#robust standard errors
cov.mpd <- vcovHC(mpd, type="HC0")
std.err <- sqrt(diag(cov.mpd))
r.est <- cbind(Estimate= coef(mpd), "Robust SE" = std.err,
"Pr(>|z|)" = 2 * pnorm(abs(coef(mpd)/std.err), lower.tail=FALSE),
LL = coef(mpd) - 1.96 * std.err,
UL = coef(mpd) + 1.96 * std.err)

#chisquare
with(mpd, cbind(res.deviance = deviance, df = df.residual,
  p = pchisq(deviance, df.residual, lower.tail=FALSE)))

##      res.deviance  df p
## [1,]     55391.49 147 0

#IRR
spd <- deltamethod(list(~ exp(x1), ~ exp(x2), ~ exp(x3)), 
                                                coef(mpd), cov.mpd)

## exponentiate old estimates dropping the p values
rexp.est <- exp(r.est[, -3])
## replace SEs with estimates for exponentiated coefficients
rexp.est[, "Robust SE"] <- spd

rexp.est

##                            Estimate  Robust SE        LL        UL
## (Intercept)              0.07792656 0.01356128 0.0554053 0.1096023
## HEALTH_CJLaw Enforcement 0.38313395 0.19599836 0.1405713 1.0442501
## HEALTH_CJOther           1.71164462 0.75342510 0.7223175 4.0560106

Is there a statistically significant difference between Health_CJ actors in Problem_Demand? (MPD)

library(sandwich)
library(msm)

mpd <- glm(Problem_Demand~HEALTH_CJ+offset(log(`TOTAL WORDS`)), family=poisson)
summary(mpd)

## 
## Call:
## glm(formula = Problem_Demand ~ HEALTH_CJ + offset(log(`TOTAL WORDS`)), 
##     family = poisson)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -31.337  -15.547  -10.138    0.826   75.883  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -2.551988   0.007847 -325.24   <2e-16 ***
## HEALTH_CJLaw Enforcement -0.959371   0.018091  -53.03   <2e-16 ***
## HEALTH_CJOther            0.537455   0.015500   34.68   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 61154  on 149  degrees of freedom
## Residual deviance: 55391  on 147  degrees of freedom
## AIC: 55986
## 
## Number of Fisher Scoring iterations: 7

#robust standard errors
cov.mpd <- vcovHC(mpd, type="HC0")
std.err <- sqrt(diag(cov.mpd))
r.est <- cbind(Estimate= coef(mpd), "Robust SE" = std.err,
"Pr(>|z|)" = 2 * pnorm(abs(coef(mpd)/std.err), lower.tail=FALSE),
LL = coef(mpd) - 1.96 * std.err,
UL = coef(mpd) + 1.96 * std.err)

#chisquare
with(mpd, cbind(res.deviance = deviance, df = df.residual,
  p = pchisq(deviance, df.residual, lower.tail=FALSE)))

##      res.deviance  df p
## [1,]     55391.49 147 0

#IRR
spd <- deltamethod(list(~ exp(x1), ~ exp(x2), ~ exp(x3)), 
                                                coef(mpd), cov.mpd)

## exponentiate old estimates dropping the p values
rexp.est <- exp(r.est[, -3])
## replace SEs with estimates for exponentiated coefficients
rexp.est[, "Robust SE"] <- spd

rexp.est

##                            Estimate  Robust SE        LL        UL
## (Intercept)              0.07792656 0.01356128 0.0554053 0.1096023
## HEALTH_CJLaw Enforcement 0.38313395 0.19599836 0.1405713 1.0442501
## HEALTH_CJOther           1.71164462 0.75342510 0.7223175 4.0560106

Is there overdispersion?

with(Dissertation_Dataset, tapply(Problem_Demand, HEALTH_CJ, function(x) {
    sprintf("M (SD) = %1.2f (%1.2f)", mean(x), sd(x))
}))

##                     Health            Law Enforcement 
## "M (SD) = 203.03 (316.87)"  "M (SD) = 81.80 (260.19)" 
##                      Other 
## "M (SD) = 233.21 (503.91)"

Since the variance at each level of Health_CJ are higher than the mean it suggests overdispersion. Therefore, a negative binomial analysis may better suit the data.

Negative Binomial

??Not sure if I should use the log of the offset for the negative binomial model as well…

library(MASS)
summary(m1 <- glm.nb(Dissertation_Dataset$Problem_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))))

## 
## Call:
## glm.nb(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), init.theta = 0.1482280511, 
##     link = log)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.55893  -1.41633  -0.45537  -0.00702   1.84518  
## 
## Coefficients:
##                                               Estimate Std. Error z value
## (Intercept)                                    -2.4600     0.2905  -8.468
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  -0.7417     0.4810  -1.542
## Dissertation_Dataset$HEALTH_CJOther             0.2567     0.6048   0.424
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement    0.123    
## Dissertation_Dataset$HEALTH_CJOther              0.671    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.1482) family taken to be 1)
## 
##     Null deviance: 154.15  on 149  degrees of freedom
## Residual deviance: 151.24  on 147  degrees of freedom
## AIC: 1411.2
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.1482 
##           Std. Err.:  0.0182 
## 
##  2 x log-likelihood:  -1403.1660

The dispesion parameter is less than 1 indicating that dispersion is not an issue and the poisson model is the better fit.

Set others as the reference level

Dissertation_Dataset$HEALTH_CJ <- relevel(Dissertation_Dataset$HEALTH_CJ, ref="Health") #set others as reference level
msdothers <- glm(Problem_Demand~HEALTH_CJ+offset(log(`TOTAL WORDS`)), data=Dissertation_Dataset, family=poisson)
(coefmsdo <-cbind(Estimate = coef(msdothers)))

##                            Estimate
## (Intercept)              -2.5519884
## HEALTH_CJLaw Enforcement -0.9593706
## HEALTH_CJOther            0.5374547

exp(coefmsdo)

##                            Estimate
## (Intercept)              0.07792656
## HEALTH_CJLaw Enforcement 0.38313395
## HEALTH_CJOther           1.71164462

Health_CJ actors in Problem_Supply

library(sandwich)
library(msm)

mpd3 <- glm(Problem_Supply~HEALTH_CJ+offset(log(`TOTAL WORDS`)), family=poisson)
summary(mpd3)

## 
## Call:
## glm(formula = Problem_Supply ~ HEALTH_CJ + offset(log(`TOTAL WORDS`)), 
##     family = poisson)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -43.785  -21.290  -10.348    8.053   66.663  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -2.209632   0.006612 -334.18   <2e-16 ***
## HEALTH_CJLaw Enforcement  0.811164   0.008709   93.14   <2e-16 ***
## HEALTH_CJOther           -0.791884   0.022872  -34.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 88724  on 149  degrees of freedom
## Residual deviance: 75832  on 147  degrees of freedom
## AIC: 76581
## 
## Number of Fisher Scoring iterations: 6

#robust standard errors
cov.mpd3 <- vcovHC(mpd3, type="HC0")
std.err3 <- sqrt(diag(cov.mpd3))
r.est3 <- cbind(Estimate= coef(mpd3), "Robust SE" = std.err,
"Pr(>|z|)" = 2 * pnorm(abs(coef(mpd3)/std.err), lower.tail=FALSE),
LL = coef(mpd3) - 1.96 * std.err,
UL = coef(mpd3) + 1.96 * std.err)

#chisquare
with(mpd3, cbind(res.deviance = deviance, df = df.residual,
  p = pchisq(deviance, df.residual, lower.tail=FALSE)))

##      res.deviance  df p
## [1,]     75832.09 147 0

#IRR
spd3 <- deltamethod(list(~ exp(x1), ~ exp(x2), ~ exp(x3)), 
                                                coef(mpd3), cov.mpd3)

## exponentiate old estimates dropping the p values
rexp.est3 <- exp(r.est3[, -3])
## replace SEs with estimates for exponentiated coefficients
rexp.est3[, "Robust SE"] <- spd3

rexp.est3

##                           Estimate  Robust SE         LL        UL
## (Intercept)              0.1097411 0.02091593 0.07802521 0.1543488
## HEALTH_CJLaw Enforcement 2.2505259 0.53974332 0.82571486 6.1339173
## HEALTH_CJOther           0.4529904 0.13103653 0.19116285 1.0734318

Health_CJ actors on Solutions Demand

msd2 <- glm(Solutions_Demand~HEALTH_CJ +offset(log(`TOTAL WORDS`)), family=poisson, data=Dissertation_Dataset)
summary(msd2)

## 
## Call:
## glm(formula = Solutions_Demand ~ HEALTH_CJ + offset(log(`TOTAL WORDS`)), 
##     family = poisson, data = Dissertation_Dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -67.903  -20.371   -5.575   13.398   54.310  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -1.071753   0.003743 -286.31   <2e-16 ***
## HEALTH_CJLaw Enforcement -0.893973   0.008406 -106.35   <2e-16 ***
## HEALTH_CJOther            0.093162   0.008799   10.59   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 99574  on 149  degrees of freedom
## Residual deviance: 84811  on 147  degrees of freedom
## AIC: 85876
## 
## Number of Fisher Scoring iterations: 5

(coefmsd2 <-cbind(Estimate = coef(msd2)))

##                             Estimate
## (Intercept)              -1.07175328
## HEALTH_CJLaw Enforcement -0.89397314
## HEALTH_CJOther            0.09316165

exp(coefmsd2)

##                           Estimate
## (Intercept)              0.3424077
## HEALTH_CJLaw Enforcement 0.4090274
## HEALTH_CJOther           1.0976391

summary(msd6 <- glm.nb(Solutions_Demand~HEALTH_CJ +offset(log(`TOTAL WORDS`))))

## 
## Call:
## glm.nb(formula = Solutions_Demand ~ HEALTH_CJ + offset(log(`TOTAL WORDS`)), 
##     init.theta = 0.542540841, link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0014  -0.8093  -0.2305   0.3149   1.5528  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -1.1242     0.1519  -7.403 1.33e-13 ***
## HEALTH_CJLaw Enforcement  -0.5659     0.2514  -2.251   0.0244 *  
## HEALTH_CJOther             0.0218     0.3162   0.069   0.9450    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.5425) family taken to be 1)
## 
##     Null deviance: 190.42  on 149  degrees of freedom
## Residual deviance: 185.22  on 147  degrees of freedom
## AIC: 2184.7
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.5425 
##           Std. Err.:  0.0578 
## 
##  2 x log-likelihood:  -2176.6570

Dispersion parameter is less than 1 so we don’t need to use a negative binomial model because overdispersion is not an issue.

Health_CJ actors on Solutions_Supply

msd3 <- glm(Solutions_Supply~HEALTH_CJ +offset(log(`TOTAL WORDS`)), family=poisson, data=Dissertation_Dataset)
summary(msd3)

## 
## Call:
## glm(formula = Solutions_Supply ~ HEALTH_CJ + offset(log(`TOTAL WORDS`)), 
##     family = poisson, data = Dissertation_Dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -52.652  -18.997   -4.503    8.681   46.606  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -1.580507   0.004828 -327.39   <2e-16 ***
## HEALTH_CJLaw Enforcement  0.645197   0.006597   97.80   <2e-16 ***
## HEALTH_CJOther           -0.930719   0.017802  -52.28   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 82877  on 149  degrees of freedom
## Residual deviance: 65967  on 147  degrees of freedom
## AIC: 66949
## 
## Number of Fisher Scoring iterations: 6

(coefmsd3 <-cbind(Estimate = coef(msd3)))

##                            Estimate
## (Intercept)              -1.5805073
## HEALTH_CJLaw Enforcement  0.6451971
## HEALTH_CJOther           -0.9307186

exp(coefmsd3)

##                           Estimate
## (Intercept)              0.2058706
## HEALTH_CJLaw Enforcement 1.9063627
## HEALTH_CJOther           0.3942703

El-Sabawi Dissertation Website

Taleed El-Sabawi

1/2/2019

Variable Summary

Supply vs. Demand Descriptive Statistics

Percentages of Total Discourse Each Category Represents

Group Types

TWC supply v. demand by group

Health vs. CJ

Supply vs. Demand

Is there a statistically significant difference between Health_CJ actors in Problem_Demand? (MPD)

Is there a statistically significant difference between Health_CJ actors in Problem_Demand? (MPD)

Is there overdispersion?

Negative Binomial

Set others as the reference level

Health_CJ actors in Problem_Supply

Health_CJ actors on Solutions Demand

Health_CJ actors on Solutions_Supply