library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(readxl)
library(pastecs)
##
## Attaching package: 'pastecs'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## The following object is masked from 'package:tidyr':
##
## extract
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
dataset<-read.csv("state_data_combo.csv")
summary(dataset$Unemployment.rate.2019)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.200 3.300 4.100 4.192 5.100 6.900
summary(dataset$Poverty.Rate....)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.90 11.15 13.70 13.65 15.75 20.80
summary(dataset$Percent.with.no.home.computer..2018.)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.60 9.90 11.50 11.56 12.75 18.50
summary(dataset$Percent.with.no.home.Internet..2018.)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.10 16.80 19.10 19.72 21.35 31.50
summary(dataset$Percent.with.home.Broadband)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 68.00 78.05 80.00 79.78 82.65 86.50
summary(dataset$Population.for.whom.broadband.available..2019....)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 43.90 67.30 77.50 76.88 85.90 99.00
summary(dataset$Library.Visits.Per.Capita)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.600 1.770 2.290 2.330 3.025 3.820
summary(dataset$Total.Circulation.Per.Capita)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.640 4.365 5.460 6.038 7.535 12.170
summary(dataset$Registered.Users.Per.Capita)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2200 0.4100 0.5000 0.4875 0.5600 0.7500
##Economic Linear Models##
linear_model1<-lm(dataset$Library.Visits.Per.Capita~dataset$Unemployment.rate.2019+dataset$Poverty.Rate....,data=dataset)
linear_model2<-lm(dataset$Total.Circulation.Per.Capita~dataset$Unemployment.rate.2019+dataset$Poverty.Rate....,data=dataset)
linear_model3<-lm(dataset$Registered.Users.Per.Capita~dataset$Unemployment.rate.2019+dataset$Poverty.Rate....,data=dataset)
summary(linear_model1)
##
## Call:
## lm(formula = dataset$Library.Visits.Per.Capita ~ dataset$Unemployment.rate.2019 +
## dataset$Poverty.Rate...., data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.09175 -0.56386 -0.03499 0.47953 1.76540
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.60202 0.48268 7.463 1.44e-09 ***
## dataset$Unemployment.rate.2019 -0.02690 0.09923 -0.271 0.7875
## dataset$Poverty.Rate.... -0.08488 0.03926 -2.162 0.0356 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6887 on 48 degrees of freedom
## Multiple R-squared: 0.1348, Adjusted R-squared: 0.09873
## F-statistic: 3.739 on 2 and 48 DF, p-value: 0.03097
summary(linear_model2)
##
## Call:
## lm(formula = dataset$Total.Circulation.Per.Capita ~ dataset$Unemployment.rate.2019 +
## dataset$Poverty.Rate...., data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1236 -1.6941 -0.4231 1.0820 6.3615
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.12931 1.55977 6.494 4.39e-08 ***
## dataset$Unemployment.rate.2019 0.02216 0.32065 0.069 0.9452
## dataset$Poverty.Rate.... -0.30639 0.12686 -2.415 0.0196 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.225 on 48 degrees of freedom
## Multiple R-squared: 0.141, Adjusted R-squared: 0.1052
## F-statistic: 3.941 on 2 and 48 DF, p-value: 0.02602
summary(linear_model3)
##
## Call:
## lm(formula = dataset$Registered.Users.Per.Capita ~ dataset$Unemployment.rate.2019 +
## dataset$Poverty.Rate...., data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.253055 -0.080136 0.002839 0.065831 0.227498
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.414842 0.075941 5.463 1.64e-06 ***
## dataset$Unemployment.rate.2019 -0.005461 0.015612 -0.350 0.728
## dataset$Poverty.Rate.... 0.006994 0.006176 1.132 0.263
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1083 on 48 degrees of freedom
## Multiple R-squared: 0.02775, Adjusted R-squared: -0.01276
## F-statistic: 0.6851 on 2 and 48 DF, p-value: 0.5089
##Linearity tests:##
plot(linear_model1,which=1)
plot(linear_model2,which=1)
plot(linear_model3,which=1)
raintest(linear_model1)
##
## Rainbow test
##
## data: linear_model1
## Rain = 2.5368, df1 = 26, df2 = 22, p-value = 0.01491
raintest(linear_model2)
##
## Rainbow test
##
## data: linear_model2
## Rain = 0.83434, df1 = 26, df2 = 22, p-value = 0.6736
raintest(linear_model3)
##
## Rainbow test
##
## data: linear_model3
## Rain = 1.0316, df1 = 26, df2 = 22, p-value = 0.4746
##Independence of errors test:##
durbinWatsonTest(linear_model1)
## lag Autocorrelation D-W Statistic p-value
## 1 -0.09076969 2.111369 0.74
## Alternative hypothesis: rho != 0
durbinWatsonTest(linear_model2)
## lag Autocorrelation D-W Statistic p-value
## 1 -0.1577056 2.309699 0.262
## Alternative hypothesis: rho != 0
durbinWatsonTest(linear_model3)
## lag Autocorrelation D-W Statistic p-value
## 1 -0.03319849 2.044397 0.89
## Alternative hypothesis: rho != 0
##Homoscedastciity tests:##
plot(linear_model1,which=3)
plot(linear_model2,which=3)
plot(linear_model3,which=3)
bptest(linear_model1)
##
## studentized Breusch-Pagan test
##
## data: linear_model1
## BP = 0.37311, df = 2, p-value = 0.8298
bptest(linear_model2)
##
## studentized Breusch-Pagan test
##
## data: linear_model2
## BP = 0.26688, df = 2, p-value = 0.8751
bptest(linear_model3)
##
## studentized Breusch-Pagan test
##
## data: linear_model3
## BP = 1.8898, df = 2, p-value = 0.3887
##Normality of residuals tests:##
plot(linear_model1,which=2)
plot(linear_model2,which=2)
plot(linear_model3,which=2)
shapiro.test(linear_model1$residuals)
##
## Shapiro-Wilk normality test
##
## data: linear_model1$residuals
## W = 0.96601, p-value = 0.1503
shapiro.test(linear_model2$residuals)
##
## Shapiro-Wilk normality test
##
## data: linear_model2$residuals
## W = 0.92975, p-value = 0.004871
shapiro.test(linear_model3$residuals)
##
## Shapiro-Wilk normality test
##
## data: linear_model3$residuals
## W = 0.99196, p-value = 0.9794
##Test for non-multicolinearity:##
cor(dataset$Library.Visits.Per.Capita,dataset$Poverty.Rate....)
## [1] -0.365319
cor(dataset$Library.Visits.Per.Capita,dataset$Poverty.Rate....)
## [1] -0.365319
cor(dataset$Library.Visits.Per.Capita,dataset$Poverty.Rate....)
## [1] -0.365319
##Computer Use Multiple Regression Model##
multiple_model1<-lm(dataset$Library.Visits.Per.Capita~dataset$Percent.with.no.home.computer..2018.+dataset$Percent.with.no.home.Internet..2018.+dataset$Percent.with.home.Broadband+dataset$Population.for.whom.broadband.available..2019....,data=dataset)
summary(multiple_model1)
##
## Call:
## lm(formula = dataset$Library.Visits.Per.Capita ~ dataset$Percent.with.no.home.computer..2018. +
## dataset$Percent.with.no.home.Internet..2018. + dataset$Percent.with.home.Broadband +
## dataset$Population.for.whom.broadband.available..2019....,
## data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5732 -0.4351 -0.0867 0.4856 1.2813
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 199.323949 62.223582
## dataset$Percent.with.no.home.computer..2018. -0.136093 0.102884
## dataset$Percent.with.no.home.Internet..2018. -1.943373 0.622389
## dataset$Percent.with.home.Broadband -1.986395 0.626655
## dataset$Population.for.whom.broadband.available..2019.... 0.017767 0.008621
## t value Pr(>|t|)
## (Intercept) 3.203 0.00247 **
## dataset$Percent.with.no.home.computer..2018. -1.323 0.19245
## dataset$Percent.with.no.home.Internet..2018. -3.122 0.00310 **
## dataset$Percent.with.home.Broadband -3.170 0.00271 **
## dataset$Population.for.whom.broadband.available..2019.... 2.061 0.04500 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6417 on 46 degrees of freedom
## Multiple R-squared: 0.2801, Adjusted R-squared: 0.2174
## F-statistic: 4.473 on 4 and 46 DF, p-value: 0.003886