Install Packages

#install.packages("dplyr")
#install.packages("ggplot2")
#install.packages("tidyverse")
#install.packages("plotly")

Load libraries

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages -------------------------------------------------- tidyverse 1.2.1 --
## v tibble  2.0.1     v purrr   0.3.0
## v tidyr   0.8.2     v stringr 1.3.1
## v readr   1.3.1     v forcats 0.3.0
## -- Conflicts ----------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## Warning: package 'plotly' was built under R version 3.5.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Read files

disease_democ <-
  read.csv("c:/Users/dwilliams/documents/montgomery/data 110/data/disease_democ.csv")

head(disease_democ)
##        country          income_group democ_score infect_rate
## 1      Bahrain High income: non-OECD        45.6          23
## 2 Bahamas, The High income: non-OECD        48.4          24
## 3        Qatar High income: non-OECD        50.4          24
## 4       Latvia High income: non-OECD        52.8          25
## 5     Barbados High income: non-OECD        46.0          26
## 6    Singapore High income: non-OECD        64.0          26

Filter into income groups

low_income          <-  filter(disease_democ,income_group == 'Low income')

lower_middle_income <-  filter(disease_democ,income_group == 'Lower middle income')

upper_middle_income <-  filter(disease_democ,income_group == 'Upper middle income')

high_nonoecd_income <-  filter(disease_democ,income_group == 'High income: non-OECD')

high_oecd_income    <-  filter(disease_democ,income_group == 'High income: OECD')

Rename columns

disease_democ <- rename(disease_democ,Democracy = democ_score, Infections = infect_rate)

low_income <- rename(low_income,Democracy = democ_score, Infections = infect_rate)

lower_middle_income <- rename(lower_middle_income,Democracy = democ_score, Infections = infect_rate)

upper_middle_income <- rename(upper_middle_income,Democracy = democ_score, Infections = infect_rate)

high_nonoecd_income <- rename(high_nonoecd_income,Democracy = democ_score, Infections = infect_rate)

high_oecd_income <- rename(high_oecd_income,Democracy = democ_score, Infections = infect_rate)

View groups

View (disease_democ)

View(low_income)

View (lower_middle_income)

View (upper_middle_income)

View (high_oecd_income)

View (high_nonoecd_income)

Get summary statistics and standard deviation

# Low Income Summary

summary(low_income)
##          country                  income_group   Democracy    
##  Afghanistan : 1   High income: non-OECD: 0    Min.   :15.80  
##  Bangladesh  : 1   High income: OECD    : 0    1st Qu.:21.55  
##  Benin       : 1   Low income           :40    Median :24.60  
##  Burkina Faso: 1   Lower middle income  : 0    Mean   :24.70  
##  Burundi     : 1   Upper middle income  : 0    3rd Qu.:28.10  
##  Cambodia    : 1                               Max.   :33.00  
##  (Other)     :34                                              
##    Infections   
##  Min.   :27.00  
##  1st Qu.:35.50  
##  Median :40.00  
##  Mean   :38.95  
##  3rd Qu.:43.00  
##  Max.   :48.00  
## 
sd(low_income$Infections)
## [1] 5.710764
sd(low_income$Democracy)
## [1] 4.79615
# Lower Middle Income Summary

summary(lower_middle_income)
##            country                  income_group   Democracy   
##  Angola        : 1   High income: non-OECD: 0    Min.   :22.2  
##  Armenia       : 1   High income: OECD    : 0    1st Qu.:28.4  
##  Belize        : 1   Low income           : 0    Median :32.4  
##  Bhutan        : 1   Lower middle income  :45    Mean   :34.1  
##  Bolivia       : 1   Upper middle income  : 0    3rd Qu.:39.0  
##  Côte d'Ivoire: 1                               Max.   :54.4  
##  (Other)       :39                                             
##    Infections   
##  Min.   :26.00  
##  1st Qu.:31.00  
##  Median :35.00  
##  Mean   :35.27  
##  3rd Qu.:38.00  
##  Max.   :47.00  
## 
sd(lower_middle_income$Infections)
## [1] 5.638343
sd(lower_middle_income$Democracy)
## [1] 8.005864
# Upper Middle Income Summary

summary(upper_middle_income)
##                    country                  income_group   Democracy    
##  Albania               : 1   High income: non-OECD: 0    Min.   :31.20  
##  Algeria               : 1   High income: OECD    : 0    1st Qu.:37.40  
##  Argentina             : 1   Low income           : 0    Median :41.70  
##  Azerbaijan            : 1   Lower middle income  : 0    Mean   :42.99  
##  Belarus               : 1   Upper middle income  :36    3rd Qu.:47.55  
##  Bosnia and Herzegovina: 1                               Max.   :57.60  
##  (Other)               :30                                              
##    Infections   
##  Min.   :24.00  
##  1st Qu.:27.00  
##  Median :32.50  
##  Mean   :32.86  
##  3rd Qu.:37.25  
##  Max.   :45.00  
## 
sd(upper_middle_income$Infections)
## [1] 6.010243
sd(upper_middle_income$Democracy)
## [1] 6.7728
# High Income OECD Income Summary

summary(high_oecd_income)
##            country                  income_group   Democracy   
##  Australia     : 1   High income: non-OECD: 0    Min.   :54.0  
##  Austria       : 1   High income: OECD    :31    1st Qu.:69.8  
##  Belgium       : 1   Low income           : 0    Median :77.8  
##  Canada        : 1   Lower middle income  : 0    Mean   :74.2  
##  Czech Republic: 1   Upper middle income  : 0    3rd Qu.:81.6  
##  Denmark       : 1                               Max.   :86.6  
##  (Other)       :25                                             
##    Infections   
##  Min.   :23.00  
##  1st Qu.:25.00  
##  Median :26.00  
##  Mean   :26.55  
##  3rd Qu.:28.00  
##  Max.   :32.00  
## 
sd(high_oecd_income$Infections)
## [1] 2.218689
sd(high_oecd_income$Democracy)
## [1] 10.03939
# High Incomme non OECD Income Summary

summary(high_nonoecd_income)
##               country                  income_group   Democracy    
##  Bahamas, The     : 1   High income: non-OECD:16    Min.   :28.40  
##  Bahrain          : 1   High income: OECD    : 0    1st Qu.:44.35  
##  Barbados         : 1   Low income           : 0    Median :49.00  
##  Croatia          : 1   Lower middle income  : 0    Mean   :51.06  
##  Cyprus           : 1   Upper middle income  : 0    3rd Qu.:59.20  
##  Equatorial Guinea: 1                               Max.   :77.60  
##  (Other)          :10                                              
##    Infections   
##  Min.   :23.00  
##  1st Qu.:25.75  
##  Median :26.50  
##  Mean   :28.00  
##  3rd Qu.:28.25  
##  Max.   :37.00  
## 
sd(high_nonoecd_income$Infections)
## [1] 4.305036
sd(high_nonoecd_income$Democracy)
## [1] 13.3492

Create Histograms

# Histogram of all Income levels

ggplot(disease_democ,aes(Infections)) +
  geom_histogram(aes(fill=income_group,color = income_group)) +
    theme_light() + ggtitle("Infection Rate by Income") +
  labs(title ="All Incomes vs Infection Rate",subtitle="Income",
       caption="Infections",x="Infections",y="Number")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Histogram ofLow Income

ggplot(low_income,aes(Infections)) +
  geom_histogram(aes(fill=income_group,color = income_group)) +
  theme_light() + ggtitle("Infection Rate by Income") +
  labs(title ="Low Income vs Infection Rate",subtitle="Income",
       caption="Infections",x="Infections",y="Number") +
  scale_fill_brewer(palette = "Set5")
## Warning in pal_name(palette, type): Unknown palette Set5
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Histogram of Lower Middle Income

ggplot(lower_middle_income,aes(Infections)) +
  geom_histogram(aes(fill=income_group,color = income_group)) +
  theme_light() + ggtitle("Infection Rate by Income") +
  labs(title ="Low Middle Income vs Infection Rate",subtitle="Income",
       caption="Infections",x="Infections",y="Number") +
  scale_fill_brewer(palette = "Set3")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Histogram of Upper Middle Income

ggplot(upper_middle_income,aes(Infections)) +
  geom_histogram(aes(fill=income_group,color = income_group)) +
  theme_light() + ggtitle("Infection Rate by Income") +
  labs(title ="Upper Middle Income vs Infection Rate",subtitle="Income",
       caption="Infections",x="Infections",y="Number") +
  scale_fill_brewer(palette = "Set4")
## Warning in pal_name(palette, type): Unknown palette Set4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Histogram of High Income: non-OECD Income

ggplot(high_nonoecd_income,aes(Infections)) +
  geom_histogram(aes(fill=income_group,color = income_group)) +
  theme_light() + ggtitle("Infection Rate by Income") +
  labs(title ="High non-OECD Income vs Infection Rate",subtitle="Income",
       caption="Infections",x="Infections",y="Number") +
  scale_fill_brewer(palette = "Set1")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Histogram of High Income: OECD Income

ggplot(high_oecd_income,aes(Infections)) +
  geom_histogram(aes(fill=income_group,color = income_group)) +
  theme_light() + ggtitle("Infection Rate by Income") +
  labs(title ="High OECD Income vs Infection Rate",subtitle="Income",
       caption="Infections",x="Infections",y="Number") +
  scale_fill_brewer(palette = "Set2")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Create Scatter plot

# Scatter plot of democracy vs Infections Rate

p <- ggplot(data=disease_democ,aes(x=Infections,y=Democracy)) +
  geom_point(aes(color=income_group)) +
  theme_light() + ggtitle("Infection Rate by Income") 
  labs(title ="Democracy vs Infection Rate",subtitle="Income",
       caption="Infections",x="Infections",y="Number")
## $x
## [1] "Infections"
## 
## $y
## [1] "Number"
## 
## $title
## [1] "Democracy vs Infection Rate"
## 
## $subtitle
## [1] "Income"
## 
## $caption
## [1] "Infections"
## 
## attr(,"class")
## [1] "labels"
p <- ggplotly(p)
  
p
# Scatter plot of income vs Infections Rate

q <- ggplot(data=disease_democ,mapping = aes(x=income_group,y=Infections)) +
  geom_point(aes(color=income_group)) +
  theme_light() + ggtitle("Income by Infections") 
  labs(title ="Income",subtitle="Income",
       caption="Income",x="Income",y="Infections") +
            theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())
## NULL
q <- ggplotly(q)
  
q

Create the Linear regression model

fit1 <- lm(Infections ~ Democracy, data = disease_democ)

cor(disease_democ$Infections, disease_democ$Democracy)
## [1] -0.6664911
summary(fit1)
## 
## Call:
## lm(formula = Infections ~ Democracy, data = disease_democ)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.6506 -3.7633  0.2188  3.6332 10.4621 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 43.59815    0.97374   44.77   <2e-16 ***
## Democracy   -0.24008    0.02084  -11.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.071 on 166 degrees of freedom
## Multiple R-squared:  0.4442, Adjusted R-squared:  0.4409 
## F-statistic: 132.7 on 1 and 166 DF,  p-value: < 2.2e-16