(https://rpubs.com/schen0181/1248575)
## # A tibble: 55 × 13
## name pid6 enroll tfedrev tstrev tlocrev totalexp ppcstot tpayoth tcurssch
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 BARBOU… 1834… 2144 7559 16584 5872 28021 11885 48 1224
## 2 BERKEL… 1879… 19722 48407 140127 86699 264253 12704 2187 13128
## 3 BOONE … 1150… 3177 8194 26858 14564 48642 14663 45 2815
## 4 BRAXTO… 1834… 1747 5479 12748 6404 24417 13153 523 1310
## 5 BROOKE… 1364… 2582 6791 17114 21352 41908 15642 90 2258
## 6 CABELL… 1150… 11667 42518 88337 66699 183621 14538 1018 8944
## 7 CALHOU… 1150… 861 3254 9953 3190 15154 16085 44 738
## 8 CLAY C… 1150… 1669 6157 17655 2791 25963 13825 21 1262
## 9 DODDRI… 1834… 1082 3455 3999 31752 38493 23563 244 1294
## 10 FAYETT… 2028… 5594 15293 51759 23477 83373 13777 28 4192
## # ℹ 45 more rows
## # ℹ 3 more variables: tcapout <dbl>, debtout <dbl>, county <chr>
## # A tibble: 2 × 2
## county_name total_expenditures_gen_fund
## <chr> <dbl>
## 1 Barbour 19487610.
## 2 Berkeley 211946860.
## # A tibble: 59 × 8
## dist total_exp_state total_exp_fed avg_state_exp_per_pup avg_fed_exp_per_pup
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 002 13711911. 4292155. 6836. 1919.
## 2 004 139224217. 24921087. 7445. 1509.
## 3 006 23242775. 6768952. 7675. 2362.
## 4 008 12388162. 5512281. 7686. 3850.
## 5 010 22211516. 2647410. 9528. 1283.
## 6 012 81316443. 23375114. 7397. 2288.
## 7 014 6694016. 2936742. 8097. 3975.
## 8 016 11833748. 3580995. 8004. 2607.
## 9 018 12107075. 1900587. 10716. 1833.
## 10 020 37477328 8382223. 7271. 1693.
## # ℹ 49 more rows
## # ℹ 3 more variables: avg_sh_state_per_pup <dbl>, avg_sh_fedper_pup <dbl>,
## # av_gr_tot <dbl>
## # A tibble: 55 × 19
## county science_proficiency proficiency pid6 enroll tfedrev tstrev tlocrev
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Barbour 26.0 26.0 1834… 2144 7559 16584 5872
## 2 Berkeley 28.6 28.6 1879… 19722 48407 140127 86699
## 3 Boone 19.6 19.6 1150… 3177 8194 26858 14564
## 4 Braxton 22.6 22.6 1834… 1747 5479 12748 6404
## 5 Brooke 21.1 21.1 1364… 2582 6791 17114 21352
## 6 Cabell 30.8 30.8 1150… 11667 42518 88337 66699
## 7 Calhoun 27.8 27.8 1150… 861 3254 9953 3190
## 8 Clay 23.3 23.3 1150… 1669 6157 17655 2791
## 9 Doddridge 31.3 31.3 1834… 1082 3455 3999 31752
## 10 Fayette 17.4 17.4 2028… 5594 15293 51759 23477
## # ℹ 45 more rows
## # ℹ 11 more variables: totalexp <dbl>, ppcstot <dbl>, tpayoth <dbl>,
## # tcurssch <dbl>, tcapout <dbl>, debtout <dbl>, fips <chr>, unemployed <dbl>,
## # people_unemployed <dbl>, edu_percent <dbl>, income_family <dbl>
##
## Call:
## lm(formula = proficiency ~ totalexp + income_family + edu_percent,
## data = t_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.7003 -2.9758 -0.1353 3.1444 8.6829
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.411e+01 6.881e+00 2.050 0.04550 *
## totalexp 5.898e-06 9.371e-06 0.629 0.53189
## income_family 2.102e-04 7.097e-05 2.962 0.00463 **
## edu_percent -2.781e-01 2.130e-01 -1.306 0.19756
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.426 on 51 degrees of freedom
## Multiple R-squared: 0.4533, Adjusted R-squared: 0.4211
## F-statistic: 14.09 on 3 and 51 DF, p-value: 8.178e-07
# ggplot(t_combined, aes(x = proficiency, y = totalexp)+
# geom_point(size = 5) +
# labs(title = ""))
# set.seed(123)
# k_means_result <- kmeans(t_combined %>% select(edu_percent, proficiency, totalexp), centers = 3)
#
# t_combined_2 <- t_combined %>%
# mutate(cluster = factor(k_means_result$cluster))
#
# ggplot(t_combined_2, aes(x = proficiency, y = totalexp) +
# geom_point(size = 5) +
# labs(title = ""))
# t_pca <- select(is_combined, where(is.numeric))
#
# pca_result <- prcomp(t_pca, rank = 5, scale = TRUE, center = TRUE)
# summary(pca_result)
#
# t_pca_3 <- as_tibble(pca_result$x)
#
# ggplot(t_pca_3, aes(x = PC1, y = PC2))+
# geom_point(size = 5)+
# ggtitle("PCA proficiency")
library(neuralnet)
library(tm)
library(e1071)
# sample <- sample(c(1,0), size = nrow(t_combined), replace = TRUE, prob = c(0.7,0.3))
#
# t_train <- t[sample == 1, ]
# t_test <- t[samplpe == 0, ]
#
# set.seed(1)
#
# n <- neuralnet(proficiency ~.,
# data = t_train,
# hidden = 1,
# linear.output = FALSE)
#
# plot(n)
#
#
# #proficiency is placeholder
# vector_predicted <- predict(n, t_train)
# vector_predicted <- round(vector_predicted, digits = 0)
#
# accuracy <- mean(vector_predicted == t_train$proficiency)
# print(accuracy)
# table(vector_predicted, t_train$proficiency)
#
#
# # Measure accuracy on test data
# vector_predicted <- predict(n, t_test)
# vector_predicted <- round(vector_predicted, digits = 0)
#
# accuracy <- mean(vector_predicted == t_test$proficiency)
# print(accuracy)
# table(vector_predicted, t_test$proficiency)
-Proficiency data by grade and school district, https://zoomwv.k12.wv.us/Dashboard/dashboard/7310
-Balanced scorecard(exp per pupil), https://wveis.k12.wv.us/essa/dashboard.html
-Snapshot, https://wvde.us/wp-content/uploads/2023/11/29196-Education-Snapshot-Infographic-v1.pdf
-WV Checkbook 2023