#getwd()
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggthemes)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#PROJECT 2: I plan to look at the proposition that “the emergence of democratic political systems has depended largely on nations having low rates of infectious disease”
# read dataset into a tibble
gideon <- read_csv("C:/Users/libcl/OneDrive/Documents/disease_democ.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## country = col_character(),
## income_group = col_character(),
## democ_score = col_double(),
## infect_rate = col_double()
## )
#Examine the dataset
str(gideon)
## tibble [168 x 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ country : chr [1:168] "Bahrain" "Bahamas, The" "Qatar" "Latvia" ...
## $ income_group: chr [1:168] "High income: non-OECD" "High income: non-OECD" "High income: non-OECD" "High income: non-OECD" ...
## $ democ_score : num [1:168] 45.6 48.4 50.4 52.8 46 64 65.8 70.6 57.6 40.6 ...
## $ infect_rate : num [1:168] 23 24 24 25 26 26 26 26 27 28 ...
## - attr(*, "spec")=
## .. cols(
## .. country = col_character(),
## .. income_group = col_character(),
## .. democ_score = col_double(),
## .. infect_rate = col_double()
## .. )
summary(gideon)
## country income_group democ_score infect_rate
## Length:168 Length:168 Min. :15.80 Min. :23.00
## Class :character Class :character 1st Qu.:28.40 1st Qu.:27.00
## Mode :character Mode :character Median :38.40 Median :32.00
## Mean :42.78 Mean :33.33
## 3rd Qu.:52.65 3rd Qu.:39.00
## Max. :86.60 Max. :48.00
gideon %>%
ggplot() +
geom_boxplot(aes(x = democ_score, color = "red")) +
xlab("Democracy Scores") +
ggtitle("Distribution of Democracy Scores - All Countries")
gideon %>%
ggplot() +
geom_boxplot(aes(x = infect_rate, color = "red")) +
xlab("Infection Scores") +
ggtitle("Distribution of Infection Rate - All Countries")
gideon %>%
ggplot() +
geom_histogram(aes(x = democ_score), binwidth = 25) +
xlab("Democracy Scores") +
ggtitle("Democracy Scores - All Countries")
gideon %>%
ggplot() +
geom_histogram(aes(x = infect_rate), binwidth = 5) +
xlab("Infection Scores") +
ggtitle("Infection Rate - All Countries")
### Show moderate negative correllation of the two variables
#check correlation of the two numeric variable for all
correlate_all <- cor(gideon$democ_score, gideon$infect_rate)
correlate_all
## [1] -0.6664911
#on it's face, there appears to be weak correlation, looking at all countries as a whole entity.
gideon %>%
group_by(country) %>%
ggplot(aes(x = infect_rate, y = democ_score)) +
geom_point(aes(alpha = 0.5)) +
geom_smooth(aes(x = infect_rate, y = democ_score), method = "lm", color = "red") +
ggtitle("Plotted Variables for Each Country") +
xlab("Infection Rate") +
ylab("Democracy Score")
## `geom_smooth()` using formula 'y ~ x'
#calculate residuals for infection and democracy and mutate into new tibble
resid_gideon <- gideon %>%
group_by(country) %>%
mutate(resid_infect = infect_rate - correlate_all, resid_democ = democ_score - correlate_all)
head(resid_gideon)
## # A tibble: 6 x 6
## # Groups: country [6]
## country income_group democ_score infect_rate resid_infect resid_democ
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Bahrain High income: non~ 45.6 23 23.7 46.3
## 2 Bahamas, T~ High income: non~ 48.4 24 24.7 49.1
## 3 Qatar High income: non~ 50.4 24 24.7 51.1
## 4 Latvia High income: non~ 52.8 25 25.7 53.5
## 5 Barbados High income: non~ 46 26 26.7 46.7
## 6 Singapore High income: non~ 64 26 26.7 64.7
# obtain summary statistics for the linear model
linearMod <- lm(democ_score ~ infect_rate, data = gideon) # build linear regression model on full data
summary(linearMod)
##
## Call:
## lm(formula = democ_score ~ infect_rate, data = gideon)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.838 -9.689 -1.512 7.775 31.763
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 104.4458 5.4627 19.12 <2e-16 ***
## infect_rate -1.8503 0.1606 -11.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.08 on 166 degrees of freedom
## Multiple R-squared: 0.4442, Adjusted R-squared: 0.4409
## F-statistic: 132.7 on 1 and 166 DF, p-value: < 2.2e-16
resid_gideon$predicted <- predict(linearMod) # Save the predicted values
resid_gideon$residuals <- residuals(linearMod) # Save the residual values
#plot residuals for democracy as dependent variable
resid_plot <- resid_gideon %>%
group_by(country) %>%
mutate(predicted_democ = predicted, residual_democ = residuals) %>%
ggplot(aes(x = infect_rate, y = democ_score)) +
geom_smooth(method = "lm", se = FALSE, color = "lightgrey") + # Plot regression line in light grey
geom_segment(aes(xend = infect_rate, yend = predicted_democ)) +
geom_point(aes(color = abs(residual_democ), size = abs(residual_democ))) + # size also mapped
scale_color_continuous(low = "black", high = "red") +
guides(color = FALSE, size = FALSE) + # Size legend also removed+
geom_point(aes(y = predicted_democ), shape = 1) +
xlab("Infection Rate") +
ylab("Democracy Score") +
ggtitle("Residual Plot") +
theme_wsj()
resid_plot
## `geom_smooth()` using formula 'y ~ x'
gideon %>%
ggplot() +
geom_bar(aes(x = income_group)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
#Use plotly to view income groups to give bar chart in more depth
#This is the same plot as the first scatterplot showing regression model, but coloring income groups to see if there is any effect
plot1 <- gideon %>%
ggplot(aes(x = infect_rate, y = democ_score)) +
geom_point(aes(x = infect_rate, y = democ_score, color = income_group)) +
geom_smooth(aes(x = infect_rate, y = democ_score), method = "lm", color = "red") +
ggtitle("Compare Variables for Each Country") +
xlab("Infection Rate") +
ylab("Democracy Score") +
theme_fivethirtyeight()
gideon1 <- plot_ly(data = gideon, x = ~infect_rate, y = ~democ_score, hoverinfo = "text", text = ~paste("Country:", country, " DemScore:", democ_score, " InfRate:", infect_rate), color = ~income_group, size = 50, title = "Compare Variables by Income Group")
#could not get title to appear; not sure what problem is
gideon1
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'scatter' objects don't have these attributes: 'title'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
#ESSAY
Democracy and Infectious Disease
I looked at the proposition that “the emergence of democratic political systems has depended largely on nations having low rates of infectious disease” which is put forth in the book “Democratization: A Comparative Analysis of 170 Countries”, using data from “Global Infectious Diseases and Epidemiology Network” (GIDEON). I was unable to obtain access to the book itself, but a journal review states that the author argues that emergence of democracy is an evolutionary process dependent on “distribution of intellectual and economic resources (Christiansen, 2004).” An alternative view is that the opposite occurs, i.e. that equitable distribution of resources are dependent on democracy being in place as argued by observers of the AIDS epidemic (Justesen, 2012). I will examine the former proposition, although in the end the model works either way on this dataset.
In the data , each country listed has the main numeric variables democ_score which is a number between 1 to 100 scoring each country in the dataset, with higher scores trending to more democratic, and lower toward authoritarian, and infect_rate which is an infection rate for each of these countries. Additionally, there is a discrete variable income_group that classifies each country into one of five income groups. The Organisation for Economic Co-operation and Development (OECD) in this group refers to an international organisation working to “improve lives through better policies to which the United States belongs (OECD.org)”. I chose this dataset because we are currently experiencing severe stress from the Covid19 infection in our own democracy that may be impacting our political outlook, and I am concerned that we are at risk in maintaining our system. First, I looked at the two numeric variables’ distribution of the data. By, looking at the boxplots and histograms, they appear to be approximately normal with no outliers. Next, I did a scatterplot with infection rates on the x-axis, and democracy score one the y-axis. There appears to be a negative correlation between these two. I fitted a regression line and then examined the residuals. The regression model appears to predict better for higher rates of infection than for lower rates. In effect, at lower rates of infection, where democracy score is higher, the residuals have a larger spread from the predicted value of the model’s regression line, whereas higher rates of infection and lower democracy scores have a smaller spread or a better fit to the regression model. Because of this, I chose to go on to visualize if the other variable of income group impacted the democracy/infection regression model . I examined the income group variable with a bar graph, and then I went on to produce another interactive scatterplot with 5 separate colors for the income groups. It appears that high-income OECD countries have lowest infection/highest democracy scores while low income countries have the highest infection/lowest democracy scores. This is not necessarily a surprise, but I was somewhat surprised that one simple change to my first graph allowed this to pop up so clearly visually without having to do a lot of other calculation. My conclusion is that while it appears infection rate is negatively correlated with democracy, there needs to be consideration of the fact that not having resources in the first place may be a more important stumbling block for countries to achieve democracy: you cannot distribute what you don’t have. Any further study along these variables needs to note the dependency of both democratic scoring and infections rate upon the resources or income available to each country. With respect to our current situation in the United States, we have plenty of resources, but authoritarian ideology is withholding aid in a pandemic particularly to areas that are perceived as disloyal to the party in control; we are in a very dangerous place. This data does not make me feel better.
Works Cited
Christiansen, L. S. (2004). Reviewed Work(s): Democratization: A Comparative Analysis of 170 Countries by Tatu. Journal of Peace Research, 649.
Justesen, M. K. (2012). Democracy, dictatorship, and disease: Political regimes and HIV/AIDS. European Journal of Political Economy, 373-389.
OECD.org and gideononline.com