This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
install.packages("ggplot2")
Error in install.packages : Updating loaded packages
install.packages("dplyr")
Error in install.packages : Updating loaded packages
install.packages("broom")
Error in install.packages : Updating loaded packages
install.packages("ggpubr")
Error in install.packages : Updating loaded packages
library(readr)
library(ggplot2)
library(dplyr)
library(broom)
library(ggpubr)
library(estimatr)
data <- read_csv("caschools-1993.csv") # your data file goes here. <---------
Rows: 137 Columns: 17
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): school, county, grades
dbl (14): X, district, students, teachers, calworks, lunch, computer, expenditure, income, english, read, math, te...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(data)
Rows: 137
Columns: 17
$ X <dbl> 96, 26, 263, 420, 312, 286, 192, 1, 319, 223, 380, 309, 212, 174, 165, 367, 310, 117, 75, 250, 1…
$ district <dbl> 72017, 72272, 68973, 72751, 68361, 71035, 70649, 75119, 70938, 68635, 70896, 70714, 67348, 72363…
$ school <chr> "Oak Valley Union Elementary", "Woodlake Union Elementary", "Millbrae Elementary", "Wheatland El…
$ county <chr> "Tulare", "Tulare", "San Mateo", "Yuba", "San Diego", "Sonoma", "Sonoma", "Alameda", "Sonoma", "…
$ grades <chr> "KK-08", "KK-08", "KK-08", "KK-08", "KK-08", "KK-06", "KK-06", "KK-08", "KK-08", "KK-08", "KK-06…
$ students <dbl> 354, 1588, 2325, 1778, 8432, 1252, 287, 195, 1425, 332, 2707, 573, 3669, 501, 8787, 139, 721, 62…
$ teachers <dbl> 17.80, 85.00, 115.30, 93.40, 360.50, 65.11, 16.17, 10.90, 71.83, 18.80, 141.02, 29.60, 201.97, 3…
$ calworks <dbl> 10.2778, 22.4072, 2.8817, 6.9235, 6.3923, 9.4512, 3.7931, 0.5102, 5.0926, 9.0361, 5.1396, 6.9930…
$ lunch <dbl> 59.7222, 85.1472, 15.6559, 47.5712, 22.9839, 43.2165, 31.0345, 2.0408, 19.8302, 34.0361, 19.2308…
$ computer <dbl> 28, 198, 302, 313, 0, 113, 65, 67, 65, 73, 300, 48, 500, 163, 1058, 40, 82, 1338, 107, 129, 36, …
$ expenditure <dbl> 4663.811, 5415.153, 4890.987, 5993.393, 5397.689, 5284.164, 5973.925, 6384.911, 4999.469, 4963.0…
$ income <dbl> 10.098000, 8.523000, 21.110500, 12.502000, 14.097667, 12.584250, 17.156000, 22.690001, 17.507999…
$ english <dbl> 9.8870058, 40.4911842, 22.7096786, 5.0056243, 4.9691653, 20.7667732, 14.9825783, 0.0000000, 4.91…
$ read <dbl> 636.7, 625.0, 662.0, 660.5, 663.7, 659.4, 655.1, 691.6, 669.8, 653.2, 678.7, 671.0, 652.7, 652.6…
$ math <dbl> 639.3, 621.2, 660.9, 651.0, 669.5, 668.3, 649.5, 690.0, 664.6, 657.6, 680.8, 661.3, 656.5, 647.1…
$ testscore <dbl> 638.00, 623.10, 661.45, 655.75, 666.60, 663.85, 652.30, 690.80, 667.20, 655.40, 679.75, 666.15, …
$ str <dbl> 19.88764, 18.68235, 20.16479, 19.03640, 23.38974, 19.22900, 17.74892, 17.88991, 19.83851, 17.659…
str = (data$str)+2
summary(str)
Min. 1st Qu. Median Mean 3rd Qu. Max.
17.14 20.58 21.84 21.80 22.87 27.79
#-------homokedestic---------
lm1 <- lm(testscore~str, data=data, )
summary(lm1)
Call:
lm(formula = testscore ~ str, data = data)
Residuals:
Min 1Q Median 3Q Max
-47.240 -14.457 1.004 11.934 41.826
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 684.9736 16.2337 42.194 <2e-16 ***
str -1.5889 0.8163 -1.946 0.0537 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 18.23 on 135 degrees of freedom
Multiple R-squared: 0.0273, Adjusted R-squared: 0.02009
F-statistic: 3.789 on 1 and 135 DF, p-value: 0.05367
confint(lm1, level = 0.90)
5 % 95 %
(Intercept) 658.087015 711.8602121
str -2.940836 -0.2369331
# ------------heterokedestic-----------------
lm_hetero <- lm_robust(testscore~str, data = data, se_type = "HC1")
summary(lm_hetero)
Call:
lm_robust(formula = testscore ~ str, data = data, se_type = "HC1")
Standard error type: HC1
Coefficients:
Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper DF
(Intercept) 684.974 16.7223 40.962 5.132e-78 651.902 718.04509 135
str -1.589 0.8302 -1.914 5.776e-02 -3.231 0.05301 135
Multiple R-squared: 0.0273 , Adjusted R-squared: 0.02009
F-statistic: 3.663 on 1 and 135 DF, p-value: 0.05776
confint(lm_hetero, level = 0.90)
5 % 95 %
(Intercept) 657.277892 712.6693348
str -2.963885 -0.2138834
#----------after increasing data 10 times---------
test= (data$testscore)*10
strn= (data$str)*10
lm_hetero <- lm_robust(test~strn, data = data, se_type = "HC1")
summary(lm_hetero)
Call:
lm_robust(formula = test ~ strn, data = data, se_type = "HC1")
Standard error type: HC1
Coefficients:
Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper DF
(Intercept) 6849.736 167.2226 40.962 5.132e-78 6519.021 7.180e+03 135
strn -1.589 0.8302 -1.914 5.776e-02 -3.231 5.301e-02 135
Multiple R-squared: 0.0273 , Adjusted R-squared: 0.02009
F-statistic: 3.663 on 1 and 135 DF, p-value: 0.05776
confint(lm_hetero, level = 0.90)
5 % 95 %
(Intercept) 6572.778925 7126.6933481
strn -2.963885 -0.2138834
lm_hetero <- lm_robust(testscore~str, data = data, se_type = "HC1")
summary(lm_hetero)
Call:
lm_robust(formula = testscore ~ str, data = data, se_type = "HC1")
Standard error type: HC1
Coefficients:
Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper DF
(Intercept) 684.974 16.7223 40.962 5.132e-78 651.902 718.04509 135
str -1.589 0.8302 -1.914 5.776e-02 -3.231 0.05301 135
Multiple R-squared: 0.0273 , Adjusted R-squared: 0.02009
F-statistic: 3.663 on 1 and 135 DF, p-value: 0.05776
confint(lm_hetero, level = 0.90)
5 % 95 %
(Intercept) 657.277892 712.6693348
str -2.963885 -0.2138834