Transform your raw data into actionable insights. Let my expertise in R and advanced data analysis techniques unlock the power of your information. Get a personalized consultation and see how I can streamline your projects, saving you time and driving better decision-making. Contact me today at info@data03.online or visit to schedule a call.
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
df <- read_excel("appqe_project_sample - full sample (758 schools) (1).xlsx", sheet = "data")
df1<- read_excel("appqe_ts_project_01.xlsx", sheet = "Data_for_R")
df %>% select(SCHNAME,EXP) %>% slice_max(EXP, n=10) %>%
ggplot() +
aes(x = SCHNAME, y = EXP) +
geom_jitter(size = 2.5) +coord_flip()+
labs(x = "SChool Name", y = "School Expenditure (all categories) (£)",
title = "Top 10 Schools which have School Expenditure (all categories) (£)") +
theme_minimal()
df %>% select(LANAME,EXP) %>% slice_max(EXP, n=10) %>%
ggplot() +
aes(x = LANAME, y = EXP) +
geom_jitter(size = 2.5) +coord_flip()+
labs(x = "School Local Authority", y = "School Expenditure (all categories) (£)",
title = "School Local Authority and School Expenditure (all categories) (£)") +
theme_minimal()
df %>% select(PUP,EXP) %>% slice_max(EXP, n=10) %>%
ggplot() +
aes(x = PUP, y = EXP) +
geom_jitter(size = 2.5) +coord_flip()+
labs(x = "Number of full-time equivalent (FTE) pupils", y = "School Expenditure (all categories) (£)",
title = "Top 10 Schools which have School Expenditure (all categories) (£)") +
theme_minimal()
dfa<-df %>% mutate_if(is.character, as.factor)
dfa<-dfa %>% mutate_if(is.factor, as.numeric)
dfa<-dfa[,c(1:5,7:17)]
names(dfa)
## [1] "URN" "SCHNAME" "LANAME" "REGION" "GENDER" "PUP"
## [7] "PUPKS4" "PSEN" "PFSM" "PENGFL" "RATPUPTEA" "TEA"
## [13] "WTEA" "LAPAY" "INC" "EXP"
df[,-c(1:2)] %>% select_if(is.numeric) %>% cor()
## Warning in cor(.): the standard deviation is zero
## ISSECONDARY PUP PUPKS4 PSEN PFSM
## ISSECONDARY 1 NA NA NA NA
## PUP NA 1.00000000 0.908998835 -0.199737291 -0.209895288
## PUPKS4 NA 0.90899883 1.000000000 -0.186147831 -0.219145566
## PSEN NA -0.19973729 -0.186147831 1.000000000 0.127022230
## PFSM NA -0.20989529 -0.219145566 0.127022230 1.000000000
## PENGFL NA -0.11281591 -0.033198195 0.004207267 -0.413128000
## RATPUPTEA NA 0.23777854 0.251319991 -0.229417543 -0.457892911
## TEA NA 0.93585034 0.844444843 -0.138285183 -0.044284864
## WTEA NA 0.07594602 0.010477485 0.069657694 0.278308209
## LAPAY NA 0.07672167 -0.015353949 0.159009303 0.004959045
## INC NA 0.87788312 0.773820248 -0.070580598 0.108913589
## EXP NA 0.87012906 0.769540158 -0.071429337 0.115469819
## SUR NA 0.12370778 0.084329230 0.004295183 -0.057381079
## INCP NA -0.28834705 -0.317539472 0.308918242 0.651183938
## EXPP NA -0.33948662 -0.356708820 0.301686223 0.644418638
## SURP NA 0.26430275 0.220605668 -0.028334167 -0.094566809
## EXPLRESP NA -0.11519799 -0.148621319 0.065256029 0.256861752
## SHEXPLRES NA 0.03272356 -0.004996756 -0.080578617 -0.046789409
## SHSELFINC NA -0.06362002 -0.073605820 -0.033727798 -0.223649727
## ATT8SCR NA 0.25639186 0.188560470 -0.211737372 -0.488737750
## DMIXED NA 0.03780895 0.119298972 0.195864996 0.037055993
## ISPRIMARY NA 0.07015000 -0.013444197 0.006219301 0.031684634
## ISPOST16 NA 0.38263630 0.148701097 -0.115682813 -0.143628004
## DLONDON NA 0.14356997 0.021416785 0.079091347 0.240103578
## PENGFL RATPUPTEA TEA WTEA LAPAY
## ISSECONDARY NA NA NA NA NA
## PUP -0.112815913 0.237778536 0.93585034 0.07594602 0.076721673
## PUPKS4 -0.033198195 0.251319991 0.84444484 0.01047749 -0.015353949
## PSEN 0.004207267 -0.229417543 -0.13828518 0.06965769 0.159009303
## PFSM -0.413128000 -0.457892911 -0.04428486 0.27830821 0.004959045
## PENGFL 1.000000000 0.234224850 -0.22746267 -0.38910845 -0.297650153
## RATPUPTEA 0.234224850 1.000000000 -0.05715985 -0.13553855 -0.109366995
## TEA -0.227462668 -0.057159847 1.00000000 0.13869837 0.118909623
## WTEA -0.389108450 -0.135538546 0.13869837 1.00000000 0.390137024
## LAPAY -0.297650153 -0.109366995 0.11890962 0.39013702 1.000000000
## INC -0.360279807 -0.009185962 0.91710707 0.29510731 0.200406729
## EXP -0.357180971 -0.029300245 0.91718310 0.30283579 0.201402517
## SUR -0.049962396 0.194069071 0.05013803 -0.05839251 0.001483989
## INCP -0.445576403 -0.578003616 -0.08185206 0.38794186 0.262366284
## EXPP -0.409871962 -0.608513263 -0.12418021 0.38188816 0.250223401
## SURP -0.062972440 0.233968860 0.18929649 -0.04802485 0.000590896
## EXPLRESP -0.191962836 -0.212327610 -0.01414068 0.12999400 0.213929283
## SHEXPLRES -0.010338257 0.080287354 0.02925961 -0.05412768 0.116553277
## SHSELFINC 0.158421701 0.016484109 -0.07854199 -0.09331210 0.069427330
## ATT8SCR -0.003451036 0.290077241 0.17995763 0.08223282 0.193674353
## DMIXED 0.228235817 0.025061377 0.01336602 -0.26578788 -0.226069198
## ISPRIMARY -0.131918720 0.010130937 0.09560809 -0.02059292 0.034811273
## ISPOST16 -0.104690710 0.021396453 0.37884589 0.15354080 0.237742438
## DLONDON -0.587860217 -0.224193472 0.24360249 0.68114557 0.604708145
## INC EXP SUR INCP EXPP
## ISSECONDARY NA NA NA NA NA
## PUP 0.877883124 0.870129062 0.123707777 -0.28834705 -0.33948662
## PUPKS4 0.773820248 0.769540158 0.084329230 -0.31753947 -0.35670882
## PSEN -0.070580598 -0.071429337 0.004295183 0.30891824 0.30168622
## PFSM 0.108913589 0.115469819 -0.057381079 0.65118394 0.64441864
## PENGFL -0.360279807 -0.357180971 -0.049962396 -0.44557640 -0.40987196
## RATPUPTEA -0.009185962 -0.029300245 0.194069071 -0.57800362 -0.60851326
## TEA 0.917107070 0.917183096 0.050138028 -0.08185206 -0.12418021
## WTEA 0.295107310 0.302835786 -0.058392513 0.38794186 0.38188816
## LAPAY 0.200406729 0.201402517 0.001483989 0.26236628 0.25022340
## INC 1.000000000 0.994703766 0.106705617 0.16438067 0.09894500
## EXP 0.994703766 1.000000000 0.003943953 0.16759720 0.12426752
## SUR 0.106705617 0.003943953 1.000000000 -0.02199705 -0.23947264
## INCP 0.164380674 0.167597196 -0.021997047 1.00000000 0.97005878
## EXPP 0.098945001 0.124267518 -0.239472638 0.97005878 1.00000000
## SURP 0.237969934 0.146537572 0.897685699 -0.06488870 -0.30530426
## EXPLRESP 0.082627880 0.094384752 -0.109148415 0.42805186 0.44002443
## SHEXPLRES 0.043872683 0.043205191 0.008890779 0.01603924 0.01152645
## SHSELFINC -0.012349852 -0.016275953 0.037294678 0.17193026 0.15332676
## ATT8SCR 0.129274492 0.122397362 0.073698055 -0.26856637 -0.28274342
## DMIXED -0.004886337 -0.006876974 0.018985681 -0.08490885 -0.07790683
## ISPRIMARY 0.110445453 0.117300276 -0.060184601 0.08123808 0.08326769
## ISPOST16 0.359006349 0.359763854 0.012586847 -0.05259296 -0.06325333
## DLONDON 0.405510730 0.412987793 -0.049836166 0.47643816 0.46119932
## SURP EXPLRESP SHEXPLRES SHSELFINC ATT8SCR
## ISSECONDARY NA NA NA NA NA
## PUP 0.264302753 -0.11519799 0.032723555 -0.063620017 0.256391856
## PUPKS4 0.220605668 -0.14862132 -0.004996756 -0.073605820 0.188560470
## PSEN -0.028334167 0.06525603 -0.080578617 -0.033727798 -0.211737372
## PFSM -0.094566809 0.25686175 -0.046789409 -0.223649727 -0.488737750
## PENGFL -0.062972440 -0.19196284 -0.010338257 0.158421701 -0.003451036
## RATPUPTEA 0.233968860 -0.21232761 0.080287354 0.016484109 0.290077241
## TEA 0.189296486 -0.01414068 0.029259614 -0.078541986 0.179957630
## WTEA -0.048024850 0.12999400 -0.054127682 -0.093312103 0.082232816
## LAPAY 0.000590896 0.21392928 0.116553277 0.069427330 0.193674353
## INC 0.237969934 0.08262788 0.043872683 -0.012349852 0.129274492
## EXP 0.146537572 0.09438475 0.043205191 -0.016275953 0.122397362
## SUR 0.897685699 -0.10914842 0.008890779 0.037294678 0.073698055
## INCP -0.064888701 0.42805186 0.016039244 0.171930261 -0.268566367
## EXPP -0.305304265 0.44002443 0.011526453 0.153326762 -0.282743420
## SURP 1.000000000 -0.12962731 0.015527989 0.044129728 0.108716129
## EXPLRESP -0.129627308 1.00000000 0.885182310 0.212848759 0.051382514
## SHEXPLRES 0.015527989 0.88518231 1.000000000 0.223417803 0.229833798
## SHSELFINC 0.044129728 0.21284876 0.223417803 1.000000000 0.253777666
## ATT8SCR 0.108716129 0.05138251 0.229833798 0.253777666 1.000000000
## DMIXED -0.012814331 -0.12186697 -0.117832221 -0.078611485 -0.376890662
## ISPRIMARY -0.023604581 0.04493689 0.004707491 0.004932698 -0.055890334
## ISPOST16 0.053683478 0.02243417 0.062976680 0.036506637 0.171361011
## DLONDON -0.026914765 0.23452209 0.030633226 -0.086854831 0.179553485
## DMIXED ISPRIMARY ISPOST16 DLONDON
## ISSECONDARY NA NA NA NA
## PUP 0.037808951 0.070150004 0.382636297 0.14356997
## PUPKS4 0.119298972 -0.013444197 0.148701097 0.02141679
## PSEN 0.195864996 0.006219301 -0.115682813 0.07909135
## PFSM 0.037055993 0.031684634 -0.143628004 0.24010358
## PENGFL 0.228235817 -0.131918720 -0.104690710 -0.58786022
## RATPUPTEA 0.025061377 0.010130937 0.021396453 -0.22419347
## TEA 0.013366021 0.095608089 0.378845891 0.24360249
## WTEA -0.265787876 -0.020592920 0.153540803 0.68114557
## LAPAY -0.226069198 0.034811273 0.237742438 0.60470814
## INC -0.004886337 0.110445453 0.359006349 0.40551073
## EXP -0.006876974 0.117300276 0.359763854 0.41298779
## SUR 0.018985681 -0.060184601 0.012586847 -0.04983617
## INCP -0.084908852 0.081238081 -0.052592961 0.47643816
## EXPP -0.077906835 0.083267694 -0.063253329 0.46119932
## SURP -0.012814331 -0.023604581 0.053683478 -0.02691477
## EXPLRESP -0.121866969 0.044936892 0.022434169 0.23452209
## SHEXPLRES -0.117832221 0.004707491 0.062976680 0.03063323
## SHSELFINC -0.078611485 0.004932698 0.036506637 -0.08685483
## ATT8SCR -0.376890662 -0.055890334 0.171361011 0.17955349
## DMIXED 1.000000000 0.063125751 -0.161297231 -0.31725656
## ISPRIMARY 0.063125751 1.000000000 0.007671837 0.06390684
## ISPOST16 -0.161297231 0.007671837 1.000000000 0.21541885
## DLONDON -0.317256560 0.063906836 0.215418848 1.00000000
mts <- ts(df1$`Total Gross Expenditure`, start =1999, end = 2016 )
plot(mts, xlab ="Yearly Data", ylab ="Total Gross Expenditure",
main ="Total Gross Expenditure 1999 to 2016",
col.main ="darkgreen")
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
x<-ts(cbind("Total Gross Expenditure"=df1$`Total Gross Expenditure`,
"Total Funding"=df1$`Total Funding`,
"GDP deflator"=df1$`GDP deflator`),
start =1999, end = 2016)
plot(x)
forecast(x, 5)
## Total Gross Expenditure
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2017 5973.917 5323.477 6624.358 4979.1547 6968.680
## 2018 5667.305 4340.230 6994.379 3637.7190 7696.890
## 2019 5422.014 3382.513 7461.516 2302.8665 8541.162
## 2020 5225.782 2468.125 7983.439 1008.3097 9443.255
## 2021 5068.796 1600.827 8536.766 -235.0049 10372.598
##
## Total Funding
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2017 5476.537 4878.514 6074.559 4561.9402 6391.133
## 2018 5135.766 3997.224 6274.308 3394.5164 6877.016
## 2019 4863.150 3170.900 6555.400 2275.0774 7451.223
## 2020 4645.057 2403.751 6886.363 1217.2756 8072.838
## 2021 4470.582 1693.571 7247.594 223.5095 8717.655
##
## GDP deflator
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2017 101.6899 101.0087 102.3711 100.6481 102.7318
## 2018 103.3809 102.4186 104.3432 101.9092 104.8526
## 2019 105.0719 103.8938 106.2501 103.2701 106.8737
## 2020 106.7630 105.4028 108.1231 104.6827 108.8432
## 2021 108.4540 106.9333 109.9746 106.1284 110.7796
df<-df %>% mutate_if(is.character, as.factor)
model<-lm(EXP~., df[,-c(1:5)])
summary(model)
##
## Call:
## lm(formula = EXP ~ ., data = df[, -c(1:5)])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.788e-07 -6.630e-10 -6.000e-12 8.980e-10 2.580e-08
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.318e-08 9.517e-09 1.385e+00 0.16649
## ISSECONDARY NA NA NA NA
## PUP 6.685e-12 4.989e-12 1.340e+00 0.18074
## PUPKS4 7.620e-12 1.351e-11 5.640e-01 0.57277
## PSEN 6.172e-10 2.035e-10 3.033e+00 0.00251 **
## PFSM -8.245e-11 4.389e-11 -1.879e+00 0.06069 .
## PENGFL -2.191e-11 1.570e-11 -1.396e+00 0.16319
## RATPUPTEA 1.496e-11 2.537e-10 5.900e-02 0.95298
## TEA 3.649e-11 6.219e-11 5.870e-01 0.55756
## WTEA -2.579e-13 1.200e-13 -2.150e+00 0.03189 *
## LAPAY -1.084e-11 5.168e-12 -2.098e+00 0.03626 *
## INC 1.000e+00 6.808e-16 1.469e+15 < 2e-16 ***
## SUR -1.000e+00 2.702e-15 -3.701e+14 < 2e-16 ***
## INCP 1.146e-12 2.287e-12 5.010e-01 0.61629
## EXPP -1.861e-12 2.209e-12 -8.420e-01 0.39980
## SURP NA NA NA NA
## EXPLRESP 2.862e-11 1.052e-11 2.719e+00 0.00670 **
## SHEXPLRES -1.699e-09 6.895e-10 -2.464e+00 0.01395 *
## SHSELFINC 4.673e-11 7.639e-11 6.120e-01 0.54086
## ATT8SCR 1.677e-11 5.269e-11 3.180e-01 0.75033
## DMIXED -8.771e-10 9.289e-10 -9.440e-01 0.34536
## ISPRIMARY -2.189e-10 1.649e-09 -1.330e-01 0.89443
## ISPOST16 -5.665e-10 6.736e-10 -8.410e-01 0.40065
## DLONDON 2.018e-09 1.299e-09 1.553e+00 0.12089
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.923e-09 on 736 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 3.559e+30 on 21 and 736 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model)