Import Dataset

library(readr)
library(expss)
Dissertation_Dataset <- read_csv("Dissertation_Dataset.csv", 
    col_types = cols(FED_STATE = col_factor(levels = c("Indian Affairs", "Federal", "Local", "State", "Private", "Regional")), HEALTH_CJ = col_factor(levels = c("Law Enforcement", "Health", "Generalist", "Other")), SENATE = col_factor(levels = c("0", "1", "2"))))

Dissertation_Dataset = apply_labels(Dissertation_Dataset,
                      HEALTH_CJ = "Health vs. Criminal Justice Groups",
                      FED_STATE = "Private, Public, & Level of Gov't")

Summary of Variables

The folllowing is a summary of descriptive statistics for the variables of interest. Please see the chart below it for a definition of each variable.

library(kableExtra)
library(psych)
options(knitr.table.format = "html")
Count_Data <- Dissertation_Dataset[7:49]
sum_table <- describe(Count_Data)

kable(cbind(sum_table), caption = "Summary Stats Count Variables", booktabs = T) %>%
    kable_styling(font_size=12,latex_options = c("striped","scale_down")) %>%
    row_spec(0, bold=T, color = "white", background = "black") %>%
    column_spec(1, bold=T)

Summary Stats Count Variables
	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
HEALTH_CJ*	1	144	1.972222	0.8924269	2.0	1.844828	0.0000	1	4	3	0.9916517	0.4678727	0.0743689
FED_STATE*	2	144	3.395833	1.2858496	3.0	3.396552	1.4826	1	6	5	0.0042446	-1.4272499	0.1071541
ps_OPR	3	144	34.076389	110.6041312	0.0	6.939655	0.0000	0	705	705	4.5262394	21.9248207	9.2170109
ps_OPRS_H	4	144	57.243056	140.0133483	0.0	18.655172	0.0000	0	648	648	2.9813662	8.2867048	11.6677790
ps_prescrip	5	144	12.826389	44.3629862	0.0	1.215517	0.0000	0	276	276	4.4182595	20.7450086	3.6969155
ps_Prescribers	6	144	23.597222	63.6338213	0.0	7.000000	0.0000	0	389	389	3.5454209	13.6041732	5.3028184
ps_Prescrib_Ed	7	144	4.472222	21.1600443	0.0	0.000000	0.0000	0	169	169	5.6656580	34.4148461	1.7633370
ps_vital	8	144	17.694444	128.0398363	0.0	0.000000	0.0000	0	1443	1443	9.9286633	104.8480298	10.6699864
ps_Manuf	9	144	12.395833	78.6083697	0.0	0.000000	0.0000	0	878	878	9.5397241	99.7276241	6.5506975
ps_FDA	10	144	6.333333	57.4216401	0.0	0.000000	0.0000	0	667	667	10.7209997	119.0504687	4.7851367
ps_Left	11	144	7.826389	47.7619871	0.0	0.000000	0.0000	0	506	506	8.6277872	82.2819784	3.9801656
ps_BadApp	12	144	72.131944	174.1233887	0.0	27.120690	0.0000	0	1203	1203	3.6030090	15.2237202	14.5102824
ps_Foreign	13	144	96.937500	290.9585129	0.0	14.698276	0.0000	0	1951	1951	3.7315216	15.4344554	24.2465427
pd_Disease	14	144	38.340278	98.7730437	0.0	14.594828	0.0000	0	851	851	4.8186381	31.5997937	8.2310870
pd_Quality	15	144	10.687500	61.4633291	0.0	0.000000	0.0000	0	630	630	7.9383697	71.4752903	5.1219441
pd_PSE	16	144	19.791667	122.7181588	0.0	0.000000	0.0000	0	1384	1384	9.7738479	103.6113885	10.2265132
pd_CJ	17	144	41.680556	217.9217044	0.0	4.853448	0.0000	0	2444	2444	9.5353772	100.0991385	18.1601420
pd_MAT	18	144	22.118056	69.9126825	0.0	4.275862	0.0000	0	581	581	4.8128317	29.3526491	5.8260569
pd_Access	19	144	38.770833	141.5748940	0.0	9.051724	0.0000	0	1464	1464	7.5913989	69.7376950	11.7979078
ds_PSE	20	144	31.493056	89.2544094	0.0	7.241379	0.0000	0	518	518	3.5578856	13.1018563	7.4378675
ds_Prevent	21	144	154.145833	361.9478185	36.0	78.120690	53.3736	0	2718	2718	5.1494407	31.1674924	30.1623182
ds_Stigma	22	144	17.743056	65.6199853	0.0	1.172414	0.0000	0	459	459	4.6235712	22.7786603	5.4683321
ds_MAT	23	144	122.805556	263.4593765	0.0	54.008621	0.0000	0	1320	1320	2.7309331	7.5306121	21.9549480
ds_Access	24	144	134.881944	206.5048636	54.5	89.931034	80.8017	0	1236	1236	2.6928503	9.1255366	17.2087386
ds_Quality	25	144	35.326389	138.9626820	0.0	8.931035	0.0000	0	1479	1479	8.2980834	79.6697730	11.5802235
ds_ODR	26	144	98.326389	190.6399539	0.0	53.879310	0.0000	0	1222	1222	3.1220471	12.3221127	15.8866628
ds_Samari	27	144	5.916667	29.2910402	0.0	0.000000	0.0000	0	244	244	6.3417633	42.4056401	2.4409200
ds_Coordinate	28	144	31.159722	73.6317299	0.0	13.905172	0.0000	0	501	501	4.0834581	21.0974342	6.1359775
ds_Divert2PH	29	144	41.868056	124.1610868	0.0	8.672414	0.0000	0	926	926	4.2100316	21.0434041	10.3467572
ss_Take	30	144	24.784722	65.0401880	0.0	7.008621	0.0000	0	308	308	3.0990896	9.2591265	5.4200157
ss_PDMP	31	144	144.812500	263.5796603	1.5	85.637931	2.2239	0	1438	1438	2.6789505	8.1230675	21.9649717
ss_Reg	32	144	10.312500	51.4106556	0.0	0.000000	0.0000	0	488	488	6.7109009	52.6919868	4.2842213
ss_New	33	144	16.416667	89.3887166	0.0	0.000000	0.0000	0	611	611	5.5551223	30.1890068	7.4490597
ss_Guide	34	144	32.229167	81.2566269	0.0	8.741379	0.0000	0	454	454	2.9105477	8.3120832	6.7713856
ss_Cautious	35	144	28.770833	108.7894431	0.0	3.956897	0.0000	0	952	952	5.9681218	41.0786119	9.0657869
ss_Prescrib_Ed	36	144	85.965278	182.0856024	0.0	39.413793	0.0000	0	929	929	2.9086055	8.7521426	15.1738002
ss_Crim_Enforce	37	144	249.472222	613.4331219	0.0	100.431034	0.0000	0	4113	4113	4.1663114	20.6558597	51.1194268
ss_Penalt	38	144	4.236111	17.0006113	0.0	0.000000	0.0000	0	106	106	4.2486691	18.0373984	1.4167176
Problem_Supply	39	144	295.930556	461.8098730	80.5	194.198276	119.3493	0	2367	2367	1.9541657	3.5299150	38.4841561
Problem_Demand	40	144	159.715278	329.0143293	30.0	80.172414	44.4780	0	2444	2444	3.7703214	18.1663251	27.4178608
Solutions_Demand	41	144	557.534722	713.6461574	301.0	409.879310	365.4609	0	3885	3885	2.1642430	5.0281759	59.4705131
Solutions_Supply	42	144	548.722222	724.0372768	332.5	403.913793	433.6605	0	4113	4113	2.3823470	7.1560209	60.3364397
TOTAL WORDS	43	144	2444.604167	1379.3977059	2235.0	2285.965517	1238.7123	561	8342	7781	1.2483781	1.9935355	114.9498088

text_tbl <- data.frame(
  Variables = c("HEALTH_CJ","FED_STATE","ps_OPR","ps_OPRS_H","ps_prescrip","ps_Prescribers",
      "ps_Prescrib_Ed","ps_vital","ps_Manuf","ps_FDA","ps_Left","ps_BadApp","ps_Foreign","pd_Disease",
      "pd_Quality","pd_PSE","pd_CJ","pd_MAT","pd_Access","ds_PSE","ds_Prevent","ds_Stigma","ds_MAT",
      "ds_Access","ds_Quality","ds_ODR","ds_Samari","ds_Coordinate","ds_Divert2PH","ss_Take","ss_PDMP",
      "ss_Reg","ss_New","ss_Guide","ss_Cautious","ss_Prescrib_Ed","ss_Crim_Enforce","ss_Penalt",
      "Problem_Supply","Problem_Demand","Solutions_Demand","Solutions_Supply","TOTAL WORDS"),
  Description = c(
      "Categorical Variable, which groups speaker orgs based on either health focus, criminal justice (CJ) focus, or other",
      "Categorical Variable, which groups speaker orgs based on either federal gov agency, state agency, local agency, regional group of agencies, or private entities",
      "Word count of problem definition (WCPD) blaming the characteristics of opioid prescriptions",
      "WCPD blaming opioid prescriptions for heroin use",
      "WCPD generally blaming overprescription of opioids",
      "WCPD blaming prescribers for overprescribing",
      "WCPD blaming lack of prescriber education",
      "WCPD blaming 5th vital sign of pain",
      "WCPD blaming drug manufacturers",
      "WCPD blaming FDA",
      "WCPD blaming diversion of left-over prescriptions",
      "WCPD blaming bad apples for increasing drug supply",
      "WCPD blaming foreign actors for increasing drug supply",
      "WCPD acknowleding addiction as a disease",
      "WCPD blaming poor quality of addiction treatment",
      "WCPD blaming psychological, sociological, environmental or economical (PSEE) factors",
      "WCPD blaming the mischaracterization of the problem as a CJ problem rather than a health problem",
      "WCPD blaming poor access to Medication Assisted Treatment (MAT)",
      "WCPD blaming poor access to Treatment Generally",
      "Word count of solutions (WCS) addressing PSEE factors",
      "WCS addressing demand side prevention (excluse preventioon of supply tactics)",
      "WCS addressing stigma",
      "WCS increasing access to MAT",
      "WCS increasing access to Treatment Generally",
      "WCS increasing quality of Treatment",
      "WCS increasing access to Overdose Reversal Medications",
      "WCS passage or strengthening of Good Samaritan Laws",
      "WCS coordinating between CJ and Health actors",
      "WCS that are alterntives to incarceration, like drug courts or treatment",
      "WCS involving drug take back programs",
      "WCS regarding Prescription Drug Monitoring Programs (PDMP)",
      "WCS involving rescheduling, adding black box labels, or regulation of opioids",
      "WCS funding new drugs to address pain, with the intent of decreasing the prescribing of opioids",
      "WCS promoting prescriber guidelines",
      "WCS calling for more cautious prescribing practices generally",
      "WCS of prescriber or distributor education",
      "WCS commitment to criminal enforcement",
      "WCS increasing or creating new criminal penalities",
      "Total WC of all subcategories that define the problem as an issue of drug supply",
      "Total WC of all subcategories that define the problem as the demand for drugs",
      "Total WC of all subcategories that propose soltions aimed at decreasing the demand",
      "Total WC of all subcategories that propose solutions aimed at decreasing the supply of drugs",
      "Total WC of all words in each case"
  )
)

kable(text_tbl, booktabs = T)%>% 
  kable_styling(font_size=10,latex_options = c("striped","scale_down")) %>%
  group_rows("Interest Group Type", 1, 2) %>%
  group_rows("Problem Definition - Supply", 3, 13) %>%
  group_rows("Problem Definition - Demand", 14, 19) %>%
  group_rows("Solutions - Demand", 20, 28) %>%
  group_rows("Problem Definition - Supply", 29, 37) %>%
  group_rows("Theme Definitions", 38, 42) %>%
  group_rows("Other", 43, 43) %>%
  column_spec(2, width = "40em") %>%
  row_spec(0, bold=T, color = "white", background = "black")

Variables	Description
Interest Group Type
HEALTH_CJ	Categorical Variable, which groups speaker orgs based on either health focus, criminal justice (CJ) focus, or other
FED_STATE	Categorical Variable, which groups speaker orgs based on either federal gov agency, state agency, local agency, regional group of agencies, or private entities
Problem Definition - Supply
ps_OPR	Word count of problem definition (WCPD) blaming the characteristics of opioid prescriptions
ps_OPRS_H	WCPD blaming opioid prescriptions for heroin use
ps_prescrip	WCPD generally blaming overprescription of opioids
ps_Prescribers	WCPD blaming prescribers for overprescribing
ps_Prescrib_Ed	WCPD blaming lack of prescriber education
ps_vital	WCPD blaming 5th vital sign of pain
ps_Manuf	WCPD blaming drug manufacturers
ps_FDA	WCPD blaming FDA
ps_Left	WCPD blaming diversion of left-over prescriptions
ps_BadApp	WCPD blaming bad apples for increasing drug supply
ps_Foreign	WCPD blaming foreign actors for increasing drug supply
Problem Definition - Demand
pd_Disease	WCPD acknowleding addiction as a disease
pd_Quality	WCPD blaming poor quality of addiction treatment
pd_PSE	WCPD blaming psychological, sociological, environmental or economical (PSEE) factors
pd_CJ	WCPD blaming the mischaracterization of the problem as a CJ problem rather than a health problem
pd_MAT	WCPD blaming poor access to Medication Assisted Treatment (MAT)
pd_Access	WCPD blaming poor access to Treatment Generally
Solutions - Demand
ds_PSE	Word count of solutions (WCS) addressing PSEE factors
ds_Prevent	WCS addressing demand side prevention (excluse preventioon of supply tactics)
ds_Stigma	WCS addressing stigma
ds_MAT	WCS increasing access to MAT
ds_Access	WCS increasing access to Treatment Generally
ds_Quality	WCS increasing quality of Treatment
ds_ODR	WCS increasing access to Overdose Reversal Medications
ds_Samari	WCS passage or strengthening of Good Samaritan Laws
ds_Coordinate	WCS coordinating between CJ and Health actors
Problem Definition - Supply
ds_Divert2PH	WCS that are alterntives to incarceration, like drug courts or treatment
ss_Take	WCS involving drug take back programs
ss_PDMP	WCS regarding Prescription Drug Monitoring Programs (PDMP)
ss_Reg	WCS involving rescheduling, adding black box labels, or regulation of opioids
ss_New	WCS funding new drugs to address pain, with the intent of decreasing the prescribing of opioids
ss_Guide	WCS promoting prescriber guidelines
ss_Cautious	WCS calling for more cautious prescribing practices generally
ss_Prescrib_Ed	WCS of prescriber or distributor education
ss_Crim_Enforce	WCS commitment to criminal enforcement
Theme Definitions
ss_Penalt	WCS increasing or creating new criminal penalities
Problem_Supply	Total WC of all subcategories that define the problem as an issue of drug supply
Problem_Demand	Total WC of all subcategories that define the problem as the demand for drugs
Solutions_Demand	Total WC of all subcategories that propose soltions aimed at decreasing the demand
Solutions_Supply	Total WC of all subcategories that propose solutions aimed at decreasing the supply of drugs
Other
TOTAL WORDS	Total WC of all words in each case

The chart below shows the breakdown of the number of cases (testimonies) for categories of health vs. criminal justice organizations.

library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

## The following object is masked from 'package:expss':
## 
##     vars

library(tidyverse)

## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──

## ✔ tibble  1.4.2     ✔ dplyr   0.7.8
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ purrr   0.2.5     ✔ forcats 0.3.0

## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()     masks psych::%+%()
## ✖ ggplot2::alpha()   masks psych::alpha()
## ✖ dplyr::between()   masks expss::between()
## ✖ dplyr::compute()   masks expss::compute()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::first()     masks expss::first()
## ✖ stringr::fixed()   masks expss::fixed()
## ✖ purrr::keep()      masks expss::keep()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ dplyr::last()      masks expss::last()
## ✖ purrr::modify()    masks expss::modify()
## ✖ purrr::modify_if() masks expss::modify_if()
## ✖ dplyr::na_if()     masks expss::na_if()
## ✖ tidyr::nest()      masks expss::nest()
## ✖ dplyr::recode()    masks expss::recode()
## ✖ stringr::regex()   masks expss::regex()
## ✖ purrr::transpose() masks expss::transpose()
## ✖ dplyr::vars()      masks ggplot2::vars(), expss::vars()

library(forcats)
q <- ggplot(Dissertation_Dataset, aes(HEALTH_CJ)) +
  geom_bar(width = 0.8, fill = "green") +
  geom_text(stat='count', aes(label=..count..), vjust=-0.75)
q + scale_x_discrete(limits=c("Generalist", "Other", "Law Enforcement", "Health"))

The amount of testimony offerred by health actors was almost double that of law enforcement actors (78 cases vs. 43). Comparatively, both law enforcement and health groups far outweighed the amount of testimony offerred by generalists (n=7) or other advocacy groups (n=16).

Aside from categorizing groups as law enforcement or health groups, the groups could be categorized based on public or private groups.

d <- ggplot(data = Dissertation_Dataset, aes(x = FED_STATE)) + 
  geom_bar(fill="steelblue") +
  geom_text(stat='count', aes(label=..count..), vjust=-0.75)
d + scale_x_discrete(limits=c("Regional", "Indian Affairs", "Local", "State", "Private", "Federal"))

The amount of testimony provided by public actors greatly exceed that offerred by private actors (105 v. 39), with state, local and regional group testimony exceeding federal government testimony, but not by a great deal (54 v. 47).

If we combine the two ways of categorizing the groups testifying, Health v. Crimina Justice and Public vs. Private, we get the following: Federal Law Enforcement testimony = 29 Private Health group testimony = 27 Federal Health agency testimony = 17 State Law Enforcement testimony = 14 + Local Law Enforcement testimony = 13 + Regional Law Enforcement testimony=1 (Total = 28) State Health Agency testimony = 11 + Local Health Agency testimony = 7 (Total= 18)

z <- ggplot(Dissertation_Dataset, aes(FED_STATE, fill = HEALTH_CJ)) +
  geom_bar()
z + scale_x_discrete(limits=c("Regional", "Indian Affairs", "Local", "State", "Private", "Federal"))

Another way to display the information would be to look at the composition of groups as a proportion of each. See below.

ggplot(data = Dissertation_Dataset,aes(x = FED_STATE, fill = HEALTH_CJ)) + geom_bar(position = "fill")

Which house received the most testimony?

The Senate received almost double the amount of testimony as the house (n=89 vs. n=49). There was also 6 testimonies that were given to both the House and the Senate. This would be good to mention when describing the dataset. 0=House 1=Senate 2=Both

senate_tab <-table(Dissertation_Dataset$SENATE)
senate_tab

## 
##  0  1  2 
## 49 89  6

kable(cbind(senate_tab), caption = "Count of Senate Testimony", booktabs = T) %>%
    kable_styling(font_size=12)

Count of Senate Testimony
	senate_tab
0	49
1	89
2	6

It seems from the graph below that the proprtion of law enforcement to health testimony varied by the House and the Senate. Since each body hears a separate set of testimony, it may be useful in the future to disaggregate and analyze separately the House vs. Senate testimony. Although, in this case, both the House and Senate voted near unanimously to CARA, so the different composition of testimony may not have been influential. Then again, CARA was introduced in the Senate and the House had produced a flurry of opioid bills at the same time. I believe they were later combined in reconciliation. Therefore, future analysis could compare the House vs. Senate testimony and the proposals that were supported by each house.

ggplot(Dissertation_Dataset, aes(SENATE, fill = HEALTH_CJ)) +
  geom_bar(position = "dodge")

#Proportion of FED_STATE that were HEALTH_CJ

ggplot(data = Dissertation_Dataset,aes(x = FED_STATE, fill = HEALTH_CJ)) + geom_bar(position = "fill")

#Findings of Overall Saliency of Themes: Supply vs. Demand; Problem vs. Solutions

ggplot(data=Dissertation_Dataset, aes(x = HEALTH_CJ, y = Problem_Demand, fill = FED_STATE)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Number of Words Describing the Problem as Demand",
    subtitle = "Comparing Types of Actors Problem Definition",
    caption = "Measured in word counts",
    x = "Health vs. Criminal Justice",
    y = "Word Count",
    fill = "Level"
    )

p <- ggplot(data=Dissertation_Dataset, aes(x = FED_STATE, y = Problem_Demand)) +
  geom_bar(stat = "identity", fill="Blue")
p + scale_x_discrete(limits=c("Regional", "Indian Affairs", "Federal", "State", "Local"))

## Warning: Removed 39 rows containing missing values (position_stack).

ggplot(Dissertation_Dataset, aes(x=Problem_Supply, y=Solutions_Supply)) +
    geom_point(shape=1)     # Use hollow circles

    #geom_smooth()            # Add a loess smoothed fit curve with confidence region
    #geom_smooth(method=lm, se=FALSE) #Add linear regression line,Don't add shaded confidence region

ggplot(Dissertation_Dataset, aes(x=Problem_Demand, y=Solutions_Demand)) +
    geom_point(shape=1)     # Use hollow circles

    #geom_smooth()            # Add a loess smoothed fit curve with confidence region
    #geom_smooth(method=lm, se=FALSE) #Add linear regression line,Don't add shaded confidence region

```

Dissertation

Taleed El-Sabawi

12/9/2018

Import Dataset

Summary of Variables

Which house received the most testimony?