library(readxl)
STAT_705_DATA <- read_excel("C:/Users/nyoun/Downloads/STAT 705 DATA.xlsx",sheet = "Data")
View(STAT_705_DATA)

#The following five values are used as shorthand for specific variables in some of the functions.

Age = c(STAT_705_DATA$`Median age`)

ObeseRate = c(STAT_705_DATA$`Obesity rate.  (obese adults per 100)`)

PopDens = c(STAT_705_DATA$`Population Density (Square mile)`[which(STAT_705_DATA$`Population Density (Square mile)`<8000)])

CasesCap = c(STAT_705_DATA$`Cases Per Capita`[which(STAT_705_DATA$`Population Density (Square mile)`<8000)])

CasesCap2 = c(STAT_705_DATA$`Cases Per Capita`)

DeathsCapita = c(STAT_705_DATA$`Deaths per Thousand`)



#This regression was used in order to determine the effect of median age on death rates.  While median age is unlikely to significantly effect number of cases, among those people who are infected, it is generally accepted that the elderly are much more likely to suffer death.
AgeDeath = lm(STAT_705_DATA$`Deaths per Thousand` ~ STAT_705_DATA$`Median age`)
summary(AgeDeath)
## 
## Call:
## lm(formula = STAT_705_DATA$`Deaths per Thousand` ~ STAT_705_DATA$`Median age`)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0019571 -0.0006003  0.0001449  0.0005902  0.0013824 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)
## (Intercept)                2.157e-03  1.841e-03   1.172    0.247
## STAT_705_DATA$`Median age` 1.773e-05  4.786e-05   0.370    0.713
## 
## Residual standard error: 0.000819 on 49 degrees of freedom
## Multiple R-squared:  0.002792,   Adjusted R-squared:  -0.01756 
## F-statistic: 0.1372 on 1 and 49 DF,  p-value: 0.7127
confint(AgeDeath)
##                                    2.5 %       97.5 %
## (Intercept)                -1.542143e-03 0.0058562116
## STAT_705_DATA$`Median age` -7.844459e-05 0.0001138958
#This regression was used to determine the effect of per capita GDP on death rates.  Richer people tend to have superior health outcomes in every country, because they tend to be physically healthier, as well as more able to take time off from work in the case of illness.  In America, this discrepancy is excacerbated by the privatized nature of our healthcare system.
GDPDeaths = lm(STAT_705_DATA$`Deaths per Thousand` ~ STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)`)
summary(GDPDeaths)
## 
## Call:
## lm(formula = STAT_705_DATA$`Deaths per Thousand` ~ STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)`)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0020023 -0.0005187  0.0002885  0.0005612  0.0011173 
## 
## Coefficients:
##                                                         Estimate Std. Error
## (Intercept)                                            3.487e-03  3.169e-04
## STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)` -1.123e-05  5.142e-06
##                                                       t value Pr(>|t|)    
## (Intercept)                                            11.002 7.69e-15 ***
## STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)`  -2.183   0.0339 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0007829 on 49 degrees of freedom
## Multiple R-squared:  0.08862,    Adjusted R-squared:  0.07002 
## F-statistic: 4.765 on 1 and 49 DF,  p-value: 0.03387
confint(GDPDeaths)
##                                                               2.5 %
## (Intercept)                                            2.849713e-03
## STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)` -2.155937e-05
##                                                              97.5 %
## (Intercept)                                            4.123385e-03
## STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)` -8.909592e-07
#Related to the above regression, this one looks at age, GDP per capita. and how they interacted.  It is a generally accepted fact that older people tend to be wealthier and higher paid.
GDPAGEDeaths = lm(STAT_705_DATA$`Deaths per Thousand` ~ STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)`+STAT_705_DATA$`Median age`+STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)`*STAT_705_DATA$`Median age`)
summary(GDPAGEDeaths)
## 
## Call:
## lm(formula = STAT_705_DATA$`Deaths per Thousand` ~ STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)` + 
##     STAT_705_DATA$`Median age` + STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)` * 
##     STAT_705_DATA$`Median age`)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0020038 -0.0005229  0.0002738  0.0005254  0.0010257 
## 
## Coefficients:
##                                                                                    Estimate
## (Intercept)                                                                       1.521e-03
## STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)`                             3.834e-05
## STAT_705_DATA$`Median age`                                                        5.903e-05
## STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)`:STAT_705_DATA$`Median age` -1.437e-06
##                                                                                  Std. Error
## (Intercept)                                                                       5.197e-03
## STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)`                             8.938e-05
## STAT_705_DATA$`Median age`                                                        1.453e-04
## STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)`:STAT_705_DATA$`Median age`  2.552e-06
##                                                                                  t value
## (Intercept)                                                                        0.293
## STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)`                              0.429
## STAT_705_DATA$`Median age`                                                         0.406
## STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)`:STAT_705_DATA$`Median age`  -0.563
##                                                                                  Pr(>|t|)
## (Intercept)                                                                         0.771
## STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)`                               0.670
## STAT_705_DATA$`Median age`                                                          0.686
## STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)`:STAT_705_DATA$`Median age`    0.576
## 
## Residual standard error: 0.0007956 on 47 degrees of freedom
## Multiple R-squared:  0.09726,    Adjusted R-squared:  0.03964 
## F-statistic: 1.688 on 3 and 47 DF,  p-value: 0.1824
plot(STAT_705_DATA$`GDP Per Capita (Thousands of Dollars)`, STAT_705_DATA$`Deaths per Thousand`, xlab = 'GDP per capita(thousands of dollars)', ylab = 'Deaths per Thousand')
abline(GDPDeaths)

#This histogram is used as an easy illustration of the difference in death rate between states.  In addition, it shows that death rates from COVID-19 are generally lower than most people believe, though this says nothing about rates of lasting complications from contracting the disease.
hist(STAT_705_DATA$`Deaths per Thousand`, main = "Deaths Per Thousand people", breaks = 50, ylab = "# of states", xlab = "Death Rate")

#This regression is used to determine the effects of population density on disease spread.  It is generally accepted more densely populated areas tend towards faster rates of spread of diseases.  It is relevant to know to what degree this is true for America.
DensCasesLM = lm(STAT_705_DATA$`Cases Per Capita`~ STAT_705_DATA$`Population Density (Square mile)`)
summary(DensCasesLM)
## 
## Call:
## lm(formula = STAT_705_DATA$`Cases Per Capita` ~ STAT_705_DATA$`Population Density (Square mile)`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.21064 -0.02422 -0.00096  0.02359  0.50982 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                       2.461e-01  1.304e-02  18.879
## STAT_705_DATA$`Population Density (Square mile)` -4.204e-06  9.270e-06  -0.454
##                                                  Pr(>|t|)    
## (Intercept)                                        <2e-16 ***
## STAT_705_DATA$`Population Density (Square mile)`    0.652    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08993 on 49 degrees of freedom
## Multiple R-squared:  0.00418,    Adjusted R-squared:  -0.01614 
## F-statistic: 0.2057 on 1 and 49 DF,  p-value: 0.6522
confint(DensCasesLM)
##                                                         2.5 %       97.5 %
## (Intercept)                                       2.19914e-01 2.723088e-01
## STAT_705_DATA$`Population Density (Square mile)` -2.28334e-05 1.442473e-05
plot(PopDens, CasesCap, xlab = 'Population Density', ylab = 'Cases Per Capita')
abline(DensCasesLM)

#This plot is used to demonstrate the effect of median age on mortality rates.
plot(Age, DeathsCapita, ylab = "Deaths per Thousand People", xlab = "Median Age")
abline(AgeDeath)

#These functions, as well as the related plots, are used to illustrate the effect of median age on disease virality.

AgeCap2 = lm(CasesCap2~Age)
summary(AgeCap2)
## 
## Call:
## lm(formula = CasesCap2 ~ Age)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.21599 -0.03084 -0.00134  0.02211  0.51110 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.363816   0.201833   1.803   0.0776 .
## Age         -0.003106   0.005247  -0.592   0.5566  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0898 on 49 degrees of freedom
## Multiple R-squared:  0.007099,   Adjusted R-squared:  -0.01316 
## F-statistic: 0.3504 on 1 and 49 DF,  p-value: 0.5566
plot(Age, CasesCap2, xlab = 'Median Age', ylab = 'Cases Per Capita')
abline(AgeCap2)

#This regression was used to measure the effects of differing obesity rates on death rates.  The plot was then used to illustrate it.
ObeseDeath = lm(STAT_705_DATA$`Deaths per Thousand`~ ObeseRate)
summary(ObeseDeath)
## 
## Call:
## lm(formula = STAT_705_DATA$`Deaths per Thousand` ~ ObeseRate)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.187e-03 -4.505e-04  4.675e-05  4.535e-04  1.357e-03 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.149e-04  7.217e-04  -1.129    0.264    
## ObeseRate    1.126e-04  2.207e-05   5.103 5.43e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0006627 on 49 degrees of freedom
## Multiple R-squared:  0.347,  Adjusted R-squared:  0.3337 
## F-statistic: 26.04 on 1 and 49 DF,  p-value: 5.426e-06
confint(ObeseDeath)
##                     2.5 %       97.5 %
## (Intercept) -2.265258e-03 0.0006354155
## ObeseRate    6.827174e-05 0.0001569697
plot(ObeseRate, STAT_705_DATA$`Deaths per Thousand`, xlab = "Obesity per hundred", ylab = "Death Rate.")
abline(ObeseDeath)