library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(readxl)
library(pastecs)
## 
## Attaching package: 'pastecs'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(stargazer)
## 
## Please cite as: 
## 
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
dataset<-read.csv("state_data_combo.csv")
summary(dataset$Unemployment.rate.2019)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.200   3.300   4.100   4.192   5.100   6.900
summary(dataset$Poverty.Rate....)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7.90   11.15   13.70   13.65   15.75   20.80
summary(dataset$Percent.with.no.home.computer..2018.)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.60    9.90   11.50   11.56   12.75   18.50
summary(dataset$Percent.with.no.home.Internet..2018.)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13.10   16.80   19.10   19.72   21.35   31.50
summary(dataset$Percent.with.home.Broadband)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   68.00   78.05   80.00   79.78   82.65   86.50
summary(dataset$Population.for.whom.broadband.available..2019....)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   43.90   67.30   77.50   76.88   85.90   99.00
summary(dataset$Library.Visits.Per.Capita)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.600   1.770   2.290   2.330   3.025   3.820
summary(dataset$Total.Circulation.Per.Capita)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.640   4.365   5.460   6.038   7.535  12.170
summary(dataset$Registered.Users.Per.Capita)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2200  0.4100  0.5000  0.4875  0.5600  0.7500

##Economic Linear Models##

linear_model1<-lm(dataset$Library.Visits.Per.Capita~dataset$Unemployment.rate.2019+dataset$Poverty.Rate....,data=dataset)
linear_model2<-lm(dataset$Total.Circulation.Per.Capita~dataset$Unemployment.rate.2019+dataset$Poverty.Rate....,data=dataset)
linear_model3<-lm(dataset$Registered.Users.Per.Capita~dataset$Unemployment.rate.2019+dataset$Poverty.Rate....,data=dataset)
summary(linear_model1)
## 
## Call:
## lm(formula = dataset$Library.Visits.Per.Capita ~ dataset$Unemployment.rate.2019 + 
##     dataset$Poverty.Rate...., data = dataset)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.09175 -0.56386 -0.03499  0.47953  1.76540 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     3.60202    0.48268   7.463 1.44e-09 ***
## dataset$Unemployment.rate.2019 -0.02690    0.09923  -0.271   0.7875    
## dataset$Poverty.Rate....       -0.08488    0.03926  -2.162   0.0356 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6887 on 48 degrees of freedom
## Multiple R-squared:  0.1348, Adjusted R-squared:  0.09873 
## F-statistic: 3.739 on 2 and 48 DF,  p-value: 0.03097
summary(linear_model2)
## 
## Call:
## lm(formula = dataset$Total.Circulation.Per.Capita ~ dataset$Unemployment.rate.2019 + 
##     dataset$Poverty.Rate...., data = dataset)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1236 -1.6941 -0.4231  1.0820  6.3615 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    10.12931    1.55977   6.494 4.39e-08 ***
## dataset$Unemployment.rate.2019  0.02216    0.32065   0.069   0.9452    
## dataset$Poverty.Rate....       -0.30639    0.12686  -2.415   0.0196 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.225 on 48 degrees of freedom
## Multiple R-squared:  0.141,  Adjusted R-squared:  0.1052 
## F-statistic: 3.941 on 2 and 48 DF,  p-value: 0.02602
summary(linear_model3)
## 
## Call:
## lm(formula = dataset$Registered.Users.Per.Capita ~ dataset$Unemployment.rate.2019 + 
##     dataset$Poverty.Rate...., data = dataset)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.253055 -0.080136  0.002839  0.065831  0.227498 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.414842   0.075941   5.463 1.64e-06 ***
## dataset$Unemployment.rate.2019 -0.005461   0.015612  -0.350    0.728    
## dataset$Poverty.Rate....        0.006994   0.006176   1.132    0.263    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1083 on 48 degrees of freedom
## Multiple R-squared:  0.02775,    Adjusted R-squared:  -0.01276 
## F-statistic: 0.6851 on 2 and 48 DF,  p-value: 0.5089
stargazer(linear_model1,linear_model2,linear_model3,type="html")
Dependent variable:
Library.Visits.Per.Capita Total.Circulation.Per.Capita Registered.Users.Per.Capita
(1) (2) (3)
Unemployment.rate.2019 -0.027 0.022 -0.005
(0.099) (0.321) (0.016)
Poverty.Rate…. -0.085** -0.306** 0.007
(0.039) (0.127) (0.006)
Constant 3.602*** 10.129*** 0.415***
(0.483) (1.560) (0.076)
Observations 51 51 51
R2 0.135 0.141 0.028
Adjusted R2 0.099 0.105 -0.013
Residual Std. Error (df = 48) 0.689 2.225 0.108
F Statistic (df = 2; 48) 3.739** 3.941** 0.685
Note: p<0.1; p<0.05; p<0.01

##Linearity tests:##

plot(linear_model1,which=1)

plot(linear_model2,which=1)

plot(linear_model3,which=1)

raintest(linear_model1)
## 
##  Rainbow test
## 
## data:  linear_model1
## Rain = 2.5368, df1 = 26, df2 = 22, p-value = 0.01491
raintest(linear_model2)
## 
##  Rainbow test
## 
## data:  linear_model2
## Rain = 0.83434, df1 = 26, df2 = 22, p-value = 0.6736
raintest(linear_model3)
## 
##  Rainbow test
## 
## data:  linear_model3
## Rain = 1.0316, df1 = 26, df2 = 22, p-value = 0.4746

##Independence of errors test:##

durbinWatsonTest(linear_model1)
##  lag Autocorrelation D-W Statistic p-value
##    1     -0.09076969      2.111369   0.664
##  Alternative hypothesis: rho != 0
durbinWatsonTest(linear_model2)
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.1577056      2.309699    0.29
##  Alternative hypothesis: rho != 0
durbinWatsonTest(linear_model3)
##  lag Autocorrelation D-W Statistic p-value
##    1     -0.03319849      2.044397    0.91
##  Alternative hypothesis: rho != 0

##Homoscedastciity tests:##

plot(linear_model1,which=3)

plot(linear_model2,which=3)

plot(linear_model3,which=3)

bptest(linear_model1)
## 
##  studentized Breusch-Pagan test
## 
## data:  linear_model1
## BP = 0.37311, df = 2, p-value = 0.8298
bptest(linear_model2)
## 
##  studentized Breusch-Pagan test
## 
## data:  linear_model2
## BP = 0.26688, df = 2, p-value = 0.8751
bptest(linear_model3)
## 
##  studentized Breusch-Pagan test
## 
## data:  linear_model3
## BP = 1.8898, df = 2, p-value = 0.3887

##Normality of residuals tests:##

plot(linear_model1,which=2)

plot(linear_model2,which=2)

plot(linear_model3,which=2)

shapiro.test(linear_model1$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  linear_model1$residuals
## W = 0.96601, p-value = 0.1503
shapiro.test(linear_model2$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  linear_model2$residuals
## W = 0.92975, p-value = 0.004871
shapiro.test(linear_model3$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  linear_model3$residuals
## W = 0.99196, p-value = 0.9794

##Test for non-multicolinearity:##

cor(dataset$Library.Visits.Per.Capita,dataset$Poverty.Rate....)
## [1] -0.365319
cor(dataset$Library.Visits.Per.Capita,dataset$Poverty.Rate....)
## [1] -0.365319
cor(dataset$Library.Visits.Per.Capita,dataset$Poverty.Rate....)
## [1] -0.365319

##Computer Use Multiple Regression Model##

internet_model<-lm(dataset$Library.Visits.Per.Capita~dataset$Percent.with.no.home.Internet..2018.)
summary(internet_model)
## 
## Call:
## lm(formula = dataset$Library.Visits.Per.Capita ~ dataset$Percent.with.no.home.Internet..2018.)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.91152 -0.54634 -0.00078  0.49885  1.44897 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                   3.37684    0.47783   7.067
## dataset$Percent.with.no.home.Internet..2018. -0.05309    0.02372  -2.238
##                                              Pr(>|t|)    
## (Intercept)                                  5.22e-09 ***
## dataset$Percent.with.no.home.Internet..2018.   0.0298 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.698 on 49 degrees of freedom
## Multiple R-squared:  0.09272,    Adjusted R-squared:  0.0742 
## F-statistic: 5.007 on 1 and 49 DF,  p-value: 0.02982
computer_model<-lm(dataset$Library.Visits.Per.Capita~dataset$Percent.with.no.home.computer..2018.)
summary(computer_model)
## 
## Call:
## lm(formula = dataset$Library.Visits.Per.Capita ~ dataset$Percent.with.no.home.computer..2018.)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.84501 -0.56869  0.05763  0.55671  1.34970 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                   3.18064    0.41054   7.748
## dataset$Percent.with.no.home.computer..2018. -0.07356    0.03448  -2.133
##                                              Pr(>|t|)    
## (Intercept)                                  4.66e-10 ***
## dataset$Percent.with.no.home.computer..2018.   0.0379 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7009 on 49 degrees of freedom
## Multiple R-squared:  0.08499,    Adjusted R-squared:  0.06632 
## F-statistic: 4.551 on 1 and 49 DF,  p-value: 0.03792
plot(computer_model,which=1)

stargazer(computer_model,internet_model,type="html")
Dependent variable:
Library.Visits.Per.Capita
(1) (2)
Percent.with.no.home.computer..2018. -0.074**
(0.034)
Percent.with.no.home.Internet..2018. -0.053**
(0.024)
Constant 3.181*** 3.377***
(0.411) (0.478)
Observations 51 51
R2 0.085 0.093
Adjusted R2 0.066 0.074
Residual Std. Error (df = 49) 0.701 0.698
F Statistic (df = 1; 49) 4.551** 5.007**
Note: p<0.1; p<0.05; p<0.01