#getwd()
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggthemes)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

#PROJECT 2: I plan to look at the proposition that “the emergence of democratic political systems has depended largely on nations having low rates of infectious disease”

DATA

Dataset was originally obtained from the organization “Global Infectious Diseases and Epidemiology Network” (GIDEON) and the book “Democratization: A Comparative Analysis of 170 Countries”

# read dataset into a tibble
gideon <- read_csv("C:/Users/libcl/OneDrive/Documents/disease_democ.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   country = col_character(),
##   income_group = col_character(),
##   democ_score = col_double(),
##   infect_rate = col_double()
## )
#Examine the dataset
str(gideon)
## tibble [168 x 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ country     : chr [1:168] "Bahrain" "Bahamas, The" "Qatar" "Latvia" ...
##  $ income_group: chr [1:168] "High income: non-OECD" "High income: non-OECD" "High income: non-OECD" "High income: non-OECD" ...
##  $ democ_score : num [1:168] 45.6 48.4 50.4 52.8 46 64 65.8 70.6 57.6 40.6 ...
##  $ infect_rate : num [1:168] 23 24 24 25 26 26 26 26 27 28 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   country = col_character(),
##   ..   income_group = col_character(),
##   ..   democ_score = col_double(),
##   ..   infect_rate = col_double()
##   .. )
summary(gideon)
##    country          income_group        democ_score     infect_rate   
##  Length:168         Length:168         Min.   :15.80   Min.   :23.00  
##  Class :character   Class :character   1st Qu.:28.40   1st Qu.:27.00  
##  Mode  :character   Mode  :character   Median :38.40   Median :32.00  
##                                        Mean   :42.78   Mean   :33.33  
##                                        3rd Qu.:52.65   3rd Qu.:39.00  
##                                        Max.   :86.60   Max.   :48.00

VARIABLES: The two numeric variables of interest are democ_score and infect_rate for each country (country being a character variable)

Look at the two numeric variables’ distribution of the data

gideon %>% 
  ggplot() +
  geom_boxplot(aes(x = democ_score, color = "red")) +
  xlab("Democracy Scores") +
  ggtitle("Distribution of Democracy Scores - All Countries")

gideon %>% 
  ggplot() +
  geom_boxplot(aes(x = infect_rate, color = "red")) +
  xlab("Infection Scores") +
  ggtitle("Distribution of Infection Rate - All Countries")

Look at the histograms of the two numeric variables

gideon %>% 
  ggplot() +
  geom_histogram(aes(x = democ_score), binwidth = 25) + 
  xlab("Democracy Scores") +
  ggtitle("Democracy Scores - All Countries")

gideon %>% 
  ggplot() +
  geom_histogram(aes(x = infect_rate), binwidth = 5) +
  xlab("Infection Scores") +
  ggtitle("Infection Rate - All Countries")

### Show moderate negative correllation of the two variables

#check correlation of the two numeric variable for all
correlate_all <- cor(gideon$democ_score, gideon$infect_rate)
correlate_all
## [1] -0.6664911
#on it's face, there appears to be weak correlation,  looking at all countries as a whole entity.
gideon %>% 
  group_by(country) %>% 
  ggplot(aes(x = infect_rate, y = democ_score)) +
  geom_point(aes(alpha = 0.5)) +
  geom_smooth(aes(x = infect_rate, y = democ_score), method = "lm", color = "red") +
  ggtitle("Plotted Variables for Each Country") +
  xlab("Infection Rate") +
  ylab("Democracy Score")
## `geom_smooth()` using formula 'y ~ x'

Continue analysis of these two variable using a regression model because, so far, it appears to fit. Produce a visualization to see how well the model fits using residuals and using color by country for these two variables. Also, see if there are other outside factors using the “income” character class variable.

#calculate residuals for infection and democracy and mutate into new tibble
resid_gideon <- gideon %>% 
  group_by(country) %>%
  mutate(resid_infect = infect_rate - correlate_all, resid_democ = democ_score - correlate_all)
head(resid_gideon)
## # A tibble: 6 x 6
## # Groups:   country [6]
##   country     income_group      democ_score infect_rate resid_infect resid_democ
##   <chr>       <chr>                   <dbl>       <dbl>        <dbl>       <dbl>
## 1 Bahrain     High income: non~        45.6          23         23.7        46.3
## 2 Bahamas, T~ High income: non~        48.4          24         24.7        49.1
## 3 Qatar       High income: non~        50.4          24         24.7        51.1
## 4 Latvia      High income: non~        52.8          25         25.7        53.5
## 5 Barbados    High income: non~        46            26         26.7        46.7
## 6 Singapore   High income: non~        64            26         26.7        64.7

Name linear model to obtain coefficients

# obtain summary statistics for the linear model
linearMod <- lm(democ_score ~ infect_rate, data = gideon)  # build linear regression model on full data
summary(linearMod)
## 
## Call:
## lm(formula = democ_score ~ infect_rate, data = gideon)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.838  -9.689  -1.512   7.775  31.763 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 104.4458     5.4627   19.12   <2e-16 ***
## infect_rate  -1.8503     0.1606  -11.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.08 on 166 degrees of freedom
## Multiple R-squared:  0.4442, Adjusted R-squared:  0.4409 
## F-statistic: 132.7 on 1 and 166 DF,  p-value: < 2.2e-16

Using model democ_score = (-1.85) * infect_rate + 104.45

resid_gideon$predicted <- predict(linearMod)   # Save the predicted values
resid_gideon$residuals <- residuals(linearMod) # Save the residual values
#plot residuals for democracy as dependent variable
resid_plot <- resid_gideon %>% 
  group_by(country) %>% 
  mutate(predicted_democ = predicted, residual_democ = residuals) %>% 
  ggplot(aes(x = infect_rate, y = democ_score)) +
  geom_smooth(method = "lm", se = FALSE, color = "lightgrey") +  # Plot regression line in light grey
  geom_segment(aes(xend = infect_rate, yend = predicted_democ)) +
  geom_point(aes(color = abs(residual_democ), size = abs(residual_democ))) + # size also mapped
  scale_color_continuous(low = "black", high = "red") +
  guides(color = FALSE, size = FALSE) +  # Size legend also removed+
  geom_point(aes(y = predicted_democ), shape = 1) + 
  xlab("Infection Rate") +
  ylab("Democracy Score") +
  ggtitle("Residual Plot") +
  theme_wsj()
resid_plot
## `geom_smooth()` using formula 'y ~ x'

The residual plot seems to show the model predicts well for countries with high infection rates, but not as well for countries with low infection rates. This suggests that there are other factors to consider in countries with low infection rates; R^2 indicates not great fit; now look at discrete variable of income

gideon %>% 
  ggplot() +
  geom_bar(aes(x = income_group)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

#Use plotly to view income groups to give bar chart in more depth

#This is the same plot as the first scatterplot showing regression model, but coloring income groups to see if there is any effect
plot1 <- gideon %>% 
  ggplot(aes(x = infect_rate, y = democ_score)) +
  geom_point(aes(x = infect_rate, y = democ_score, color = income_group)) +
  geom_smooth(aes(x = infect_rate, y = democ_score), method = "lm", color = "red") +
  ggtitle("Compare Variables for Each Country") +
  xlab("Infection Rate") +
  ylab("Democracy Score") +
  theme_fivethirtyeight()
gideon1 <- plot_ly(data = gideon, x = ~infect_rate, y = ~democ_score, hoverinfo = "text", text = ~paste("Country:", country, "   DemScore:", democ_score, "   InfRate:", infect_rate), color = ~income_group, size = 50, title = "Compare Variables by Income Group")
#could not get title to appear; not sure what problem is
gideon1
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

High income : OECD seems to go with high democracy score/low infection rate while low income is vice-versa.

#ESSAY

   Democracy and Infectious Disease

I looked at the proposition that “the emergence of democratic political systems has depended largely on nations having low rates of infectious disease” which is put forth in the book “Democratization: A Comparative Analysis of 170 Countries”, using data from “Global Infectious Diseases and Epidemiology Network” (GIDEON). I was unable to obtain access to the book itself, but a journal review states that the author argues that emergence of democracy is an evolutionary process dependent on “distribution of intellectual and economic resources (Christiansen, 2004).” An alternative view is that the opposite occurs, i.e. that equitable distribution of resources are dependent on democracy being in place as argued by observers of the AIDS epidemic (Justesen, 2012). I will examine the former proposition, although in the end the model works either way on this dataset.

In the data , each country listed has the main numeric variables democ_score which is a number between 1 to 100 scoring each country in the dataset, with higher scores trending to more democratic, and lower toward authoritarian, and infect_rate which is an infection rate for each of these countries. Additionally, there is a discrete variable income_group that classifies each country into one of five income groups. The Organisation for Economic Co-operation and Development (OECD) in this group refers to an international organisation working to “improve lives through better policies to which the United States belongs (OECD.org)”. I chose this dataset because we are currently experiencing severe stress from the Covid19 infection in our own democracy that may be impacting our political outlook, and I am concerned that we are at risk in maintaining our system. First, I looked at the two numeric variables’ distribution of the data. By, looking at the boxplots and histograms, they appear to be approximately normal with no outliers. Next, I did a scatterplot with infection rates on the x-axis, and democracy score one the y-axis. There appears to be a negative correlation between these two. I fitted a regression line and then examined the residuals. The regression model appears to predict better for higher rates of infection than for lower rates. In effect, at lower rates of infection, where democracy score is higher, the residuals have a larger spread from the predicted value of the model’s regression line, whereas higher rates of infection and lower democracy scores have a smaller spread or a better fit to the regression model. Because of this, I chose to go on to visualize if the other variable of income group impacted the democracy/infection regression model . I examined the income group variable with a bar graph, and then I went on to produce another interactive scatterplot with 5 separate colors for the income groups. It appears that high-income OECD countries have lowest infection/highest democracy scores while low income countries have the highest infection/lowest democracy scores. This is not necessarily a surprise, but I was somewhat surprised that one simple change to my first graph allowed this to pop up so clearly visually without having to do a lot of other calculation. My conclusion is that while it appears infection rate is negatively correlated with democracy, there needs to be consideration of the fact that not having resources in the first place may be a more important stumbling block for countries to achieve democracy: you cannot distribute what you don’t have. Any further study along these variables needs to note the dependency of both democratic scoring and infections rate upon the resources or income available to each country. With respect to our current situation in the United States, we have plenty of resources, but authoritarian ideology is withholding aid in a pandemic particularly to areas that are perceived as disloyal to the party in control; we are in a very dangerous place. This data does not make me feel better.

Works Cited

Christiansen, L. S. (2004). Reviewed Work(s): Democratization: A Comparative Analysis of 170 Countries by Tatu. Journal of Peace Research, 649.

Justesen, M. K. (2012). Democracy, dictatorship, and disease: Political regimes and HIV/AIDS. European Journal of Political Economy, 373-389.

OECD.org and gideononline.com