library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(readxl)
library(pastecs)
## 
## Attaching package: 'pastecs'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
dataset<-read.csv("state_data_combo.csv")
summary(dataset$Unemployment.rate.2019)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.200   3.300   4.100   4.192   5.100   6.900
summary(dataset$Poverty.Rate....)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7.90   11.15   13.70   13.65   15.75   20.80
summary(dataset$Percent.with.no.home.computer..2018.)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.60    9.90   11.50   11.56   12.75   18.50
summary(dataset$Percent.with.no.home.Internet..2018.)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13.10   16.80   19.10   19.72   21.35   31.50
summary(dataset$Percent.with.home.Broadband)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   68.00   78.05   80.00   79.78   82.65   86.50
summary(dataset$Population.for.whom.broadband.available..2019....)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   43.90   67.30   77.50   76.88   85.90   99.00
summary(dataset$Library.Visits.Per.Capita)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.600   1.770   2.290   2.330   3.025   3.820
summary(dataset$Total.Circulation.Per.Capita)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.640   4.365   5.460   6.038   7.535  12.170
summary(dataset$Registered.Users.Per.Capita)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2200  0.4100  0.5000  0.4875  0.5600  0.7500

##Economic Linear Models##

linear_model1<-lm(dataset$Library.Visits.Per.Capita~dataset$Unemployment.rate.2019+dataset$Poverty.Rate....,data=dataset)
linear_model2<-lm(dataset$Total.Circulation.Per.Capita~dataset$Unemployment.rate.2019+dataset$Poverty.Rate....,data=dataset)
linear_model3<-lm(dataset$Registered.Users.Per.Capita~dataset$Unemployment.rate.2019+dataset$Poverty.Rate....,data=dataset)
summary(linear_model1)
## 
## Call:
## lm(formula = dataset$Library.Visits.Per.Capita ~ dataset$Unemployment.rate.2019 + 
##     dataset$Poverty.Rate...., data = dataset)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.09175 -0.56386 -0.03499  0.47953  1.76540 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     3.60202    0.48268   7.463 1.44e-09 ***
## dataset$Unemployment.rate.2019 -0.02690    0.09923  -0.271   0.7875    
## dataset$Poverty.Rate....       -0.08488    0.03926  -2.162   0.0356 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6887 on 48 degrees of freedom
## Multiple R-squared:  0.1348, Adjusted R-squared:  0.09873 
## F-statistic: 3.739 on 2 and 48 DF,  p-value: 0.03097
summary(linear_model2)
## 
## Call:
## lm(formula = dataset$Total.Circulation.Per.Capita ~ dataset$Unemployment.rate.2019 + 
##     dataset$Poverty.Rate...., data = dataset)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1236 -1.6941 -0.4231  1.0820  6.3615 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    10.12931    1.55977   6.494 4.39e-08 ***
## dataset$Unemployment.rate.2019  0.02216    0.32065   0.069   0.9452    
## dataset$Poverty.Rate....       -0.30639    0.12686  -2.415   0.0196 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.225 on 48 degrees of freedom
## Multiple R-squared:  0.141,  Adjusted R-squared:  0.1052 
## F-statistic: 3.941 on 2 and 48 DF,  p-value: 0.02602
summary(linear_model3)
## 
## Call:
## lm(formula = dataset$Registered.Users.Per.Capita ~ dataset$Unemployment.rate.2019 + 
##     dataset$Poverty.Rate...., data = dataset)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.253055 -0.080136  0.002839  0.065831  0.227498 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.414842   0.075941   5.463 1.64e-06 ***
## dataset$Unemployment.rate.2019 -0.005461   0.015612  -0.350    0.728    
## dataset$Poverty.Rate....        0.006994   0.006176   1.132    0.263    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1083 on 48 degrees of freedom
## Multiple R-squared:  0.02775,    Adjusted R-squared:  -0.01276 
## F-statistic: 0.6851 on 2 and 48 DF,  p-value: 0.5089

##Linearity tests:##

plot(linear_model1,which=1)

plot(linear_model2,which=1)

plot(linear_model3,which=1)

raintest(linear_model1)
## 
##  Rainbow test
## 
## data:  linear_model1
## Rain = 2.5368, df1 = 26, df2 = 22, p-value = 0.01491
raintest(linear_model2)
## 
##  Rainbow test
## 
## data:  linear_model2
## Rain = 0.83434, df1 = 26, df2 = 22, p-value = 0.6736
raintest(linear_model3)
## 
##  Rainbow test
## 
## data:  linear_model3
## Rain = 1.0316, df1 = 26, df2 = 22, p-value = 0.4746

##Independence of errors test:##

durbinWatsonTest(linear_model1)
##  lag Autocorrelation D-W Statistic p-value
##    1     -0.09076969      2.111369    0.74
##  Alternative hypothesis: rho != 0
durbinWatsonTest(linear_model2)
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.1577056      2.309699   0.262
##  Alternative hypothesis: rho != 0
durbinWatsonTest(linear_model3)
##  lag Autocorrelation D-W Statistic p-value
##    1     -0.03319849      2.044397    0.89
##  Alternative hypothesis: rho != 0

##Homoscedastciity tests:##

plot(linear_model1,which=3)

plot(linear_model2,which=3)

plot(linear_model3,which=3)

bptest(linear_model1)
## 
##  studentized Breusch-Pagan test
## 
## data:  linear_model1
## BP = 0.37311, df = 2, p-value = 0.8298
bptest(linear_model2)
## 
##  studentized Breusch-Pagan test
## 
## data:  linear_model2
## BP = 0.26688, df = 2, p-value = 0.8751
bptest(linear_model3)
## 
##  studentized Breusch-Pagan test
## 
## data:  linear_model3
## BP = 1.8898, df = 2, p-value = 0.3887

##Normality of residuals tests:##

plot(linear_model1,which=2)

plot(linear_model2,which=2)

plot(linear_model3,which=2)

shapiro.test(linear_model1$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  linear_model1$residuals
## W = 0.96601, p-value = 0.1503
shapiro.test(linear_model2$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  linear_model2$residuals
## W = 0.92975, p-value = 0.004871
shapiro.test(linear_model3$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  linear_model3$residuals
## W = 0.99196, p-value = 0.9794

##Test for non-multicolinearity:##

cor(dataset$Library.Visits.Per.Capita,dataset$Poverty.Rate....)
## [1] -0.365319
cor(dataset$Library.Visits.Per.Capita,dataset$Poverty.Rate....)
## [1] -0.365319
cor(dataset$Library.Visits.Per.Capita,dataset$Poverty.Rate....)
## [1] -0.365319

##Computer Use Multiple Regression Model##

multiple_model1<-lm(dataset$Library.Visits.Per.Capita~dataset$Percent.with.no.home.computer..2018.+dataset$Percent.with.no.home.Internet..2018.+dataset$Percent.with.home.Broadband+dataset$Population.for.whom.broadband.available..2019....,data=dataset)
summary(multiple_model1)
## 
## Call:
## lm(formula = dataset$Library.Visits.Per.Capita ~ dataset$Percent.with.no.home.computer..2018. + 
##     dataset$Percent.with.no.home.Internet..2018. + dataset$Percent.with.home.Broadband + 
##     dataset$Population.for.whom.broadband.available..2019...., 
##     data = dataset)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5732 -0.4351 -0.0867  0.4856  1.2813 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                               199.323949  62.223582
## dataset$Percent.with.no.home.computer..2018.               -0.136093   0.102884
## dataset$Percent.with.no.home.Internet..2018.               -1.943373   0.622389
## dataset$Percent.with.home.Broadband                        -1.986395   0.626655
## dataset$Population.for.whom.broadband.available..2019....   0.017767   0.008621
##                                                           t value Pr(>|t|)   
## (Intercept)                                                 3.203  0.00247 **
## dataset$Percent.with.no.home.computer..2018.               -1.323  0.19245   
## dataset$Percent.with.no.home.Internet..2018.               -3.122  0.00310 **
## dataset$Percent.with.home.Broadband                        -3.170  0.00271 **
## dataset$Population.for.whom.broadband.available..2019....   2.061  0.04500 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6417 on 46 degrees of freedom
## Multiple R-squared:  0.2801, Adjusted R-squared:  0.2174 
## F-statistic: 4.473 on 4 and 46 DF,  p-value: 0.003886