Need Help?

Transform your raw data into actionable insights. Let my expertise in R and advanced data analysis techniques unlock the power of your information. Get a personalized consultation and see how I can streamline your projects, saving you time and driving better decision-making. Contact me today at or visit to schedule a call.

Load the required Packages

library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

Load the dataset

df <- read_excel("appqe_project_sample - full sample (758 schools) (1).xlsx", sheet = "data")
df1<- read_excel("appqe_ts_project_01.xlsx", sheet = "Data_for_R")

1. What is the extent of regional inequalities in expenditure by secondary schools?

df %>% select(SCHNAME,EXP) %>% slice_max(EXP, n=10) %>% 
  ggplot() +
  aes(x = SCHNAME, y = EXP) +
  geom_jitter(size = 2.5) +coord_flip()+
  labs(x = "SChool Name", y = "School Expenditure (all categories) (£)",
       title = "Top 10 Schools which have School Expenditure (all categories) (£)") +
  theme_minimal()

df %>% select(LANAME,EXP) %>% slice_max(EXP, n=10) %>% 
  ggplot() +
  aes(x = LANAME, y = EXP) +
  geom_jitter(size = 2.5) +coord_flip()+
  labs(x = "School Local Authority", y = "School Expenditure (all categories) (£)",
       title = "School Local Authority and School Expenditure (all categories) (£)") +
  theme_minimal()

df %>% select(PUP,EXP) %>% slice_max(EXP, n=10) %>% 
  ggplot() +
  aes(x = PUP, y = EXP) +
  geom_jitter(size = 2.5) +coord_flip()+
  labs(x = "Number of full-time equivalent (FTE) pupils", y = "School Expenditure (all categories) (£)",
       title = "Top 10 Schools which have School Expenditure (all categories) (£)") +
  theme_minimal()

Association between Expenditure and other variables.

dfa<-df %>% mutate_if(is.character, as.factor)
dfa<-dfa %>% mutate_if(is.factor, as.numeric)
dfa<-dfa[,c(1:5,7:17)]
names(dfa)
##  [1] "URN"       "SCHNAME"   "LANAME"    "REGION"    "GENDER"    "PUP"      
##  [7] "PUPKS4"    "PSEN"      "PFSM"      "PENGFL"    "RATPUPTEA" "TEA"      
## [13] "WTEA"      "LAPAY"     "INC"       "EXP"
df[,-c(1:2)] %>% select_if(is.numeric) %>% cor()
## Warning in cor(.): the standard deviation is zero
##             ISSECONDARY         PUP       PUPKS4         PSEN         PFSM
## ISSECONDARY           1          NA           NA           NA           NA
## PUP                  NA  1.00000000  0.908998835 -0.199737291 -0.209895288
## PUPKS4               NA  0.90899883  1.000000000 -0.186147831 -0.219145566
## PSEN                 NA -0.19973729 -0.186147831  1.000000000  0.127022230
## PFSM                 NA -0.20989529 -0.219145566  0.127022230  1.000000000
## PENGFL               NA -0.11281591 -0.033198195  0.004207267 -0.413128000
## RATPUPTEA            NA  0.23777854  0.251319991 -0.229417543 -0.457892911
## TEA                  NA  0.93585034  0.844444843 -0.138285183 -0.044284864
## WTEA                 NA  0.07594602  0.010477485  0.069657694  0.278308209
## LAPAY                NA  0.07672167 -0.015353949  0.159009303  0.004959045
## INC                  NA  0.87788312  0.773820248 -0.070580598  0.108913589
## EXP                  NA  0.87012906  0.769540158 -0.071429337  0.115469819
## SUR                  NA  0.12370778  0.084329230  0.004295183 -0.057381079
## INCP                 NA -0.28834705 -0.317539472  0.308918242  0.651183938
## EXPP                 NA -0.33948662 -0.356708820  0.301686223  0.644418638
## SURP                 NA  0.26430275  0.220605668 -0.028334167 -0.094566809
## EXPLRESP             NA -0.11519799 -0.148621319  0.065256029  0.256861752
## SHEXPLRES            NA  0.03272356 -0.004996756 -0.080578617 -0.046789409
## SHSELFINC            NA -0.06362002 -0.073605820 -0.033727798 -0.223649727
## ATT8SCR              NA  0.25639186  0.188560470 -0.211737372 -0.488737750
## DMIXED               NA  0.03780895  0.119298972  0.195864996  0.037055993
## ISPRIMARY            NA  0.07015000 -0.013444197  0.006219301  0.031684634
## ISPOST16             NA  0.38263630  0.148701097 -0.115682813 -0.143628004
## DLONDON              NA  0.14356997  0.021416785  0.079091347  0.240103578
##                   PENGFL    RATPUPTEA         TEA        WTEA        LAPAY
## ISSECONDARY           NA           NA          NA          NA           NA
## PUP         -0.112815913  0.237778536  0.93585034  0.07594602  0.076721673
## PUPKS4      -0.033198195  0.251319991  0.84444484  0.01047749 -0.015353949
## PSEN         0.004207267 -0.229417543 -0.13828518  0.06965769  0.159009303
## PFSM        -0.413128000 -0.457892911 -0.04428486  0.27830821  0.004959045
## PENGFL       1.000000000  0.234224850 -0.22746267 -0.38910845 -0.297650153
## RATPUPTEA    0.234224850  1.000000000 -0.05715985 -0.13553855 -0.109366995
## TEA         -0.227462668 -0.057159847  1.00000000  0.13869837  0.118909623
## WTEA        -0.389108450 -0.135538546  0.13869837  1.00000000  0.390137024
## LAPAY       -0.297650153 -0.109366995  0.11890962  0.39013702  1.000000000
## INC         -0.360279807 -0.009185962  0.91710707  0.29510731  0.200406729
## EXP         -0.357180971 -0.029300245  0.91718310  0.30283579  0.201402517
## SUR         -0.049962396  0.194069071  0.05013803 -0.05839251  0.001483989
## INCP        -0.445576403 -0.578003616 -0.08185206  0.38794186  0.262366284
## EXPP        -0.409871962 -0.608513263 -0.12418021  0.38188816  0.250223401
## SURP        -0.062972440  0.233968860  0.18929649 -0.04802485  0.000590896
## EXPLRESP    -0.191962836 -0.212327610 -0.01414068  0.12999400  0.213929283
## SHEXPLRES   -0.010338257  0.080287354  0.02925961 -0.05412768  0.116553277
## SHSELFINC    0.158421701  0.016484109 -0.07854199 -0.09331210  0.069427330
## ATT8SCR     -0.003451036  0.290077241  0.17995763  0.08223282  0.193674353
## DMIXED       0.228235817  0.025061377  0.01336602 -0.26578788 -0.226069198
## ISPRIMARY   -0.131918720  0.010130937  0.09560809 -0.02059292  0.034811273
## ISPOST16    -0.104690710  0.021396453  0.37884589  0.15354080  0.237742438
## DLONDON     -0.587860217 -0.224193472  0.24360249  0.68114557  0.604708145
##                      INC          EXP          SUR        INCP        EXPP
## ISSECONDARY           NA           NA           NA          NA          NA
## PUP          0.877883124  0.870129062  0.123707777 -0.28834705 -0.33948662
## PUPKS4       0.773820248  0.769540158  0.084329230 -0.31753947 -0.35670882
## PSEN        -0.070580598 -0.071429337  0.004295183  0.30891824  0.30168622
## PFSM         0.108913589  0.115469819 -0.057381079  0.65118394  0.64441864
## PENGFL      -0.360279807 -0.357180971 -0.049962396 -0.44557640 -0.40987196
## RATPUPTEA   -0.009185962 -0.029300245  0.194069071 -0.57800362 -0.60851326
## TEA          0.917107070  0.917183096  0.050138028 -0.08185206 -0.12418021
## WTEA         0.295107310  0.302835786 -0.058392513  0.38794186  0.38188816
## LAPAY        0.200406729  0.201402517  0.001483989  0.26236628  0.25022340
## INC          1.000000000  0.994703766  0.106705617  0.16438067  0.09894500
## EXP          0.994703766  1.000000000  0.003943953  0.16759720  0.12426752
## SUR          0.106705617  0.003943953  1.000000000 -0.02199705 -0.23947264
## INCP         0.164380674  0.167597196 -0.021997047  1.00000000  0.97005878
## EXPP         0.098945001  0.124267518 -0.239472638  0.97005878  1.00000000
## SURP         0.237969934  0.146537572  0.897685699 -0.06488870 -0.30530426
## EXPLRESP     0.082627880  0.094384752 -0.109148415  0.42805186  0.44002443
## SHEXPLRES    0.043872683  0.043205191  0.008890779  0.01603924  0.01152645
## SHSELFINC   -0.012349852 -0.016275953  0.037294678  0.17193026  0.15332676
## ATT8SCR      0.129274492  0.122397362  0.073698055 -0.26856637 -0.28274342
## DMIXED      -0.004886337 -0.006876974  0.018985681 -0.08490885 -0.07790683
## ISPRIMARY    0.110445453  0.117300276 -0.060184601  0.08123808  0.08326769
## ISPOST16     0.359006349  0.359763854  0.012586847 -0.05259296 -0.06325333
## DLONDON      0.405510730  0.412987793 -0.049836166  0.47643816  0.46119932
##                     SURP    EXPLRESP    SHEXPLRES    SHSELFINC      ATT8SCR
## ISSECONDARY           NA          NA           NA           NA           NA
## PUP          0.264302753 -0.11519799  0.032723555 -0.063620017  0.256391856
## PUPKS4       0.220605668 -0.14862132 -0.004996756 -0.073605820  0.188560470
## PSEN        -0.028334167  0.06525603 -0.080578617 -0.033727798 -0.211737372
## PFSM        -0.094566809  0.25686175 -0.046789409 -0.223649727 -0.488737750
## PENGFL      -0.062972440 -0.19196284 -0.010338257  0.158421701 -0.003451036
## RATPUPTEA    0.233968860 -0.21232761  0.080287354  0.016484109  0.290077241
## TEA          0.189296486 -0.01414068  0.029259614 -0.078541986  0.179957630
## WTEA        -0.048024850  0.12999400 -0.054127682 -0.093312103  0.082232816
## LAPAY        0.000590896  0.21392928  0.116553277  0.069427330  0.193674353
## INC          0.237969934  0.08262788  0.043872683 -0.012349852  0.129274492
## EXP          0.146537572  0.09438475  0.043205191 -0.016275953  0.122397362
## SUR          0.897685699 -0.10914842  0.008890779  0.037294678  0.073698055
## INCP        -0.064888701  0.42805186  0.016039244  0.171930261 -0.268566367
## EXPP        -0.305304265  0.44002443  0.011526453  0.153326762 -0.282743420
## SURP         1.000000000 -0.12962731  0.015527989  0.044129728  0.108716129
## EXPLRESP    -0.129627308  1.00000000  0.885182310  0.212848759  0.051382514
## SHEXPLRES    0.015527989  0.88518231  1.000000000  0.223417803  0.229833798
## SHSELFINC    0.044129728  0.21284876  0.223417803  1.000000000  0.253777666
## ATT8SCR      0.108716129  0.05138251  0.229833798  0.253777666  1.000000000
## DMIXED      -0.012814331 -0.12186697 -0.117832221 -0.078611485 -0.376890662
## ISPRIMARY   -0.023604581  0.04493689  0.004707491  0.004932698 -0.055890334
## ISPOST16     0.053683478  0.02243417  0.062976680  0.036506637  0.171361011
## DLONDON     -0.026914765  0.23452209  0.030633226 -0.086854831  0.179553485
##                   DMIXED    ISPRIMARY     ISPOST16     DLONDON
## ISSECONDARY           NA           NA           NA          NA
## PUP          0.037808951  0.070150004  0.382636297  0.14356997
## PUPKS4       0.119298972 -0.013444197  0.148701097  0.02141679
## PSEN         0.195864996  0.006219301 -0.115682813  0.07909135
## PFSM         0.037055993  0.031684634 -0.143628004  0.24010358
## PENGFL       0.228235817 -0.131918720 -0.104690710 -0.58786022
## RATPUPTEA    0.025061377  0.010130937  0.021396453 -0.22419347
## TEA          0.013366021  0.095608089  0.378845891  0.24360249
## WTEA        -0.265787876 -0.020592920  0.153540803  0.68114557
## LAPAY       -0.226069198  0.034811273  0.237742438  0.60470814
## INC         -0.004886337  0.110445453  0.359006349  0.40551073
## EXP         -0.006876974  0.117300276  0.359763854  0.41298779
## SUR          0.018985681 -0.060184601  0.012586847 -0.04983617
## INCP        -0.084908852  0.081238081 -0.052592961  0.47643816
## EXPP        -0.077906835  0.083267694 -0.063253329  0.46119932
## SURP        -0.012814331 -0.023604581  0.053683478 -0.02691477
## EXPLRESP    -0.121866969  0.044936892  0.022434169  0.23452209
## SHEXPLRES   -0.117832221  0.004707491  0.062976680  0.03063323
## SHSELFINC   -0.078611485  0.004932698  0.036506637 -0.08685483
## ATT8SCR     -0.376890662 -0.055890334  0.171361011  0.17955349
## DMIXED       1.000000000  0.063125751 -0.161297231 -0.31725656
## ISPRIMARY    0.063125751  1.000000000  0.007671837  0.06390684
## ISPOST16    -0.161297231  0.007671837  1.000000000  0.21541885
## DLONDON     -0.317256560  0.063906836  0.215418848  1.00000000

2. What has been the recent evolution of total gross expenditure by secondary schools?

mts <- ts(df1$`Total Gross Expenditure`, start =1999, end = 2016 )
plot(mts, xlab ="Yearly Data", ylab ="Total Gross Expenditure",
     main ="Total Gross Expenditure 1999 to 2016",
     col.main ="darkgreen")

library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
x<-ts(cbind("Total Gross Expenditure"=df1$`Total Gross Expenditure`, 
            "Total Funding"=df1$`Total Funding`,
            "GDP deflator"=df1$`GDP deflator`),
      start =1999, end = 2016)

plot(x)

forecast(x, 5)
## Total Gross Expenditure
##      Point Forecast    Lo 80    Hi 80     Lo 95     Hi 95
## 2017       5973.917 5323.477 6624.358 4979.1547  6968.680
## 2018       5667.305 4340.230 6994.379 3637.7190  7696.890
## 2019       5422.014 3382.513 7461.516 2302.8665  8541.162
## 2020       5225.782 2468.125 7983.439 1008.3097  9443.255
## 2021       5068.796 1600.827 8536.766 -235.0049 10372.598
## 
## Total Funding
##      Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## 2017       5476.537 4878.514 6074.559 4561.9402 6391.133
## 2018       5135.766 3997.224 6274.308 3394.5164 6877.016
## 2019       4863.150 3170.900 6555.400 2275.0774 7451.223
## 2020       4645.057 2403.751 6886.363 1217.2756 8072.838
## 2021       4470.582 1693.571 7247.594  223.5095 8717.655
## 
## GDP deflator
##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2017       101.6899 101.0087 102.3711 100.6481 102.7318
## 2018       103.3809 102.4186 104.3432 101.9092 104.8526
## 2019       105.0719 103.8938 106.2501 103.2701 106.8737
## 2020       106.7630 105.4028 108.1231 104.6827 108.8432
## 2021       108.4540 106.9333 109.9746 106.1284 110.7796

3. What are the determinants of secondary school expenditure?

df<-df %>% mutate_if(is.character, as.factor)
model<-lm(EXP~., df[,-c(1:5)])
summary(model)
## 
## Call:
## lm(formula = EXP ~ ., data = df[, -c(1:5)])
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.788e-07 -6.630e-10 -6.000e-12  8.980e-10  2.580e-08 
## 
## Coefficients: (2 not defined because of singularities)
##               Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)  1.318e-08  9.517e-09  1.385e+00  0.16649    
## ISSECONDARY         NA         NA         NA       NA    
## PUP          6.685e-12  4.989e-12  1.340e+00  0.18074    
## PUPKS4       7.620e-12  1.351e-11  5.640e-01  0.57277    
## PSEN         6.172e-10  2.035e-10  3.033e+00  0.00251 ** 
## PFSM        -8.245e-11  4.389e-11 -1.879e+00  0.06069 .  
## PENGFL      -2.191e-11  1.570e-11 -1.396e+00  0.16319    
## RATPUPTEA    1.496e-11  2.537e-10  5.900e-02  0.95298    
## TEA          3.649e-11  6.219e-11  5.870e-01  0.55756    
## WTEA        -2.579e-13  1.200e-13 -2.150e+00  0.03189 *  
## LAPAY       -1.084e-11  5.168e-12 -2.098e+00  0.03626 *  
## INC          1.000e+00  6.808e-16  1.469e+15  < 2e-16 ***
## SUR         -1.000e+00  2.702e-15 -3.701e+14  < 2e-16 ***
## INCP         1.146e-12  2.287e-12  5.010e-01  0.61629    
## EXPP        -1.861e-12  2.209e-12 -8.420e-01  0.39980    
## SURP                NA         NA         NA       NA    
## EXPLRESP     2.862e-11  1.052e-11  2.719e+00  0.00670 ** 
## SHEXPLRES   -1.699e-09  6.895e-10 -2.464e+00  0.01395 *  
## SHSELFINC    4.673e-11  7.639e-11  6.120e-01  0.54086    
## ATT8SCR      1.677e-11  5.269e-11  3.180e-01  0.75033    
## DMIXED      -8.771e-10  9.289e-10 -9.440e-01  0.34536    
## ISPRIMARY   -2.189e-10  1.649e-09 -1.330e-01  0.89443    
## ISPOST16    -5.665e-10  6.736e-10 -8.410e-01  0.40065    
## DLONDON      2.018e-09  1.299e-09  1.553e+00  0.12089    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.923e-09 on 736 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 3.559e+30 on 21 and 736 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model)