# declare libraries to use
library(readr)
library(tidyverse)
library(htmlwidgets)
library(ggplot2)
library(leaflet)
library(htmlwidgets)
library(DT)
library(plotly)
# read DepEd School 2015 data for knowledge Exchange Conference
# note the use of read_csv instead of read.csv (10x faster)
df <-  read_csv("g://Downloads//deped_school_location_with_enrolment_2014_2015.csv", na = c("", NA))
Parsed with column specification:
cols(
  the_geom = col_character(),
  offering = col_character(),
  region = col_character(),
  province = col_character(),
  municipality = col_character(),
  division = col_character(),
  district = col_character(),
  enrollment_sy_2014_2015 = col_integer(),
  school_name = col_character(),
  mooe_in_php_for_fy_2015 = col_integer(),
  latitude = col_double(),
  longitude = col_double(),
  school_id = col_integer(),
  cartodb_id = col_integer(),
  created_at = col_datetime(format = ""),
  updated_at = col_datetime(format = "")
)

Descriptive Statistics

summary(df)
   the_geom           offering            region            province        
 Length:46624       Length:46624       Length:46624       Length:46624      
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
                                                                            
 municipality         division           district         enrollment_sy_2014_2015
 Length:46624       Length:46624       Length:46624       Min.   :    0.0        
 Class :character   Class :character   Class :character   1st Qu.:  142.0        
 Mode  :character   Mode  :character   Mode  :character   Median :  249.0        
                                                          Mean   :  451.3        
                                                          3rd Qu.:  464.0        
                                                          Max.   :12613.0        
                                                                                 
 school_name        mooe_in_php_for_fy_2015    latitude        longitude    
 Length:46624       Min.   :       0        Min.   : 4.706   Min.   :117.0  
 Class :character   1st Qu.:  116000        1st Qu.: 9.003   1st Qu.:121.1  
 Mode  :character   Median :  161000        Median :11.552   Median :123.0  
                    Mean   :  304660        Mean   :11.884   Mean   :122.9  
                    3rd Qu.:  292000        3rd Qu.:14.601   3rd Qu.:124.5  
                    Max.   :11218000        Max.   :20.787   Max.   :126.6  
                    NA's   :2548            NA's   :6111     NA's   :6111   
   school_id        cartodb_id      created_at                 
 Min.   :100001   Min.   :    1   Min.   :2015-04-16 08:49:16  
 1st Qu.:111777   1st Qu.:11657   1st Qu.:2015-04-16 08:49:16  
 Median :123553   Median :23313   Median :2015-04-16 08:49:16  
 Mean   :154430   Mean   :23313   Mean   :2015-04-16 08:49:16  
 3rd Qu.:135651   3rd Qu.:34968   3rd Qu.:2015-04-16 08:49:16  
 Max.   :356059   Max.   :46624   Max.   :2015-04-16 08:49:16  
                                                               
   updated_at                 
 Min.   :2015-04-16 08:49:16  
 1st Qu.:2015-04-16 08:49:16  
 Median :2015-04-16 08:49:16  
 Mean   :2015-04-16 08:49:16  
 3rd Qu.:2015-04-16 08:49:16  
 Max.   :2015-04-16 08:49:16  
                              
df2 <- df %>% 
  group_by(region , division, offering) %>% 
  summarise(TotalStudents = sum(enrollment_sy_2014_2015) ,  
          TotalBudget = sum(mooe_in_php_for_fy_2015) , n = n())
df2

View the Table:

head(df2,10)

Plot on a map

# filter data without lat and long
df3 <-  df %>% filter(!is.na(latitude))
# assign plotting arguments
# addTiles command adds in OpenStreetmap by default
pal <- colorQuantile("YlOrRd", NULL, n = 2)
leaflet(width = 900, height = 600) %>%
  addCircleMarkers(data = df3, lat = ~as.numeric(latitude), lng = ~as.numeric(longitude),
                   label = ~paste(df3$school_name,
                            df3$municipality,
                            df3$enrollment_sy_2014_2015,        
                            df3$mooe_in_php_for_fy_2015, 
                            sep=", "), 
                   color = ~pal(df3$mooe_in_php_for_fy_2015))  %>%
                   setView(120.228817,16.033739,  zoom = 10) %>%
                   addTiles() %>%
                   addMarkers(120.2287961,16.033760, popup = "Sison Auditorium") 

Create a Histogram

# Compute the ratio of MOE to students and create a new field
df$ratio <-  df$mooe_in_php_for_fy_2015 / df$enrollment_sy_2014_2015

Visualizing the Data

ggplot(data = df) +
  geom_bar(mapping = aes(x = df$region, fill = df$offering)) +
  xlab("Regions") +
  theme(axis.text = element_text(size = 6))

ggplot(data = df) +
  geom_bar(mapping = aes(x = df$enrollment_sy_2014_2015, color = offering, na.rm = TRUE))
Ignoring unknown aesthetics: na.rm

ggplot(data = df) +
  geom_point(mapping = aes(x = df$enrollment_sy_2014_2015, df$mooe_in_php_for_fy_2015, color = offering))+
  xlab("Number of Students Enrolled") +
  ylab("Budget Allocated")

# display only a region 
df1 <- df %>% filter(df$region == "CAR")
ggplot(data = df1) +
  geom_boxplot(mapping = aes(df1$province, df1$mooe_in_php_for_fy_2015, color = offering))+
  xlab("Provinces") +
  ylab("Budget")

ggplot(data = df) +
  geom_histogram(mapping = aes(x = df$enrollment_sy_2014_2015), binwidth = 500) +
  xlab("Enrollment figures")

LS0tDQp0aXRsZTogIktub3dsZWRnZSBFeGNoYW5nZSBDb25mZXJlbmNlIDIwMTgiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCiMgZGVjbGFyZSBsaWJyYXJpZXMgdG8gdXNlDQpsaWJyYXJ5KHJlYWRyKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KGh0bWx3aWRnZXRzKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShsZWFmbGV0KQ0KbGlicmFyeShodG1sd2lkZ2V0cykNCmxpYnJhcnkoRFQpDQpsaWJyYXJ5KHBsb3RseSkNCg0KIyByZWFkIERlcEVkIFNjaG9vbCAyMDE1IGRhdGEgZm9yIGtub3dsZWRnZSBFeGNoYW5nZSBDb25mZXJlbmNlDQojIG5vdGUgdGhlIHVzZSBvZiByZWFkX2NzdiBpbnN0ZWFkIG9mIHJlYWQuY3N2ICgxMHggZmFzdGVyKQ0KDQpkZiA8LSAgcmVhZF9jc3YoImc6Ly9Eb3dubG9hZHMvL2RlcGVkX3NjaG9vbF9sb2NhdGlvbl93aXRoX2Vucm9sbWVudF8yMDE0XzIwMTUuY3N2IiwgbmEgPSBjKCIiLCBOQSkpDQoNCmBgYA0KRGVzY3JpcHRpdmUgU3RhdGlzdGljcyAgDQoNCmBgYHtyfQ0Kc3VtbWFyeShkZikNCmBgYA0KDQpgYGB7cn0NCmRmMiA8LSBkZiAlPiUgDQogIGdyb3VwX2J5KHJlZ2lvbiAsIGRpdmlzaW9uLCBvZmZlcmluZykgJT4lIA0KICBzdW1tYXJpc2UoVG90YWxTdHVkZW50cyA9IHN1bShlbnJvbGxtZW50X3N5XzIwMTRfMjAxNSkgLCAgDQogICAgICAgICAgVG90YWxCdWRnZXQgPSBzdW0obW9vZV9pbl9waHBfZm9yX2Z5XzIwMTUpICwgbiA9IG4oKSkNCg0KZGYyDQpgYGANCg0KDQpWaWV3IHRoZSBUYWJsZToNCmBgYHtyfQ0KaGVhZChkZjIsMTApDQpgYGANCg0KUGxvdCBvbiBhIG1hcA0KDQpgYGB7cn0NCiMgZmlsdGVyIGRhdGEgd2l0aG91dCBsYXQgYW5kIGxvbmcNCmRmMyA8LSAgZGYgJT4lIGZpbHRlcighaXMubmEobGF0aXR1ZGUpKQ0KDQojIGFzc2lnbiBwbG90dGluZyBhcmd1bWVudHMNCiMgYWRkVGlsZXMgY29tbWFuZCBhZGRzIGluIE9wZW5TdHJlZXRtYXAgYnkgZGVmYXVsdA0KcGFsIDwtIGNvbG9yUXVhbnRpbGUoIllsT3JSZCIsIE5VTEwsIG4gPSAyKQ0KbGVhZmxldCh3aWR0aCA9IDkwMCwgaGVpZ2h0ID0gNjAwKSAlPiUNCiAgYWRkQ2lyY2xlTWFya2VycyhkYXRhID0gZGYzLCBsYXQgPSB+YXMubnVtZXJpYyhsYXRpdHVkZSksIGxuZyA9IH5hcy5udW1lcmljKGxvbmdpdHVkZSksDQogICAgICAgICAgICAgICAgICAgbGFiZWwgPSB+cGFzdGUoZGYzJHNjaG9vbF9uYW1lLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRmMyRtdW5pY2lwYWxpdHksDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgZGYzJGVucm9sbG1lbnRfc3lfMjAxNF8yMDE1LCAgICAgICAgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgZGYzJG1vb2VfaW5fcGhwX2Zvcl9meV8yMDE1LCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBzZXA9IiwgIiksIA0KICAgICAgICAgICAgICAgICAgIGNvbG9yID0gfnBhbChkZjMkbW9vZV9pbl9waHBfZm9yX2Z5XzIwMTUpKSAgJT4lDQogICAgICAgICAgICAgICAgICAgc2V0VmlldygxMjAuMjI4ODE3LDE2LjAzMzczOSwgIHpvb20gPSAxMCkgJT4lDQogICAgICAgICAgICAgICAgICAgYWRkVGlsZXMoKSAlPiUNCiAgICAgICAgICAgICAgICAgICBhZGRNYXJrZXJzKDEyMC4yMjg3OTYxLDE2LjAzMzc2MCwgcG9wdXAgPSAiU2lzb24gQXVkaXRvcml1bSIpIA0KDQoNCg0KYGBgDQoNCkNyZWF0ZSBhIEhpc3RvZ3JhbQ0KDQpgYGB7cn0NCiMgQ29tcHV0ZSB0aGUgcmF0aW8gb2YgTU9FIHRvIHN0dWRlbnRzIGFuZCBjcmVhdGUgYSBuZXcgZmllbGQNCmRmJHJhdGlvIDwtICBkZiRtb29lX2luX3BocF9mb3JfZnlfMjAxNSAvIGRmJGVucm9sbG1lbnRfc3lfMjAxNF8yMDE1DQpgYGANCg0KVmlzdWFsaXppbmcgdGhlIERhdGENCg0KYGBge3J9DQpnZ3Bsb3QoZGF0YSA9IGRmKSArDQogIGdlb21fYmFyKG1hcHBpbmcgPSBhZXMoeCA9IGRmJHJlZ2lvbiwgZmlsbCA9IGRmJG9mZmVyaW5nKSkgKw0KICB4bGFiKCJSZWdpb25zIikgKw0KICB0aGVtZShheGlzLnRleHQgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDYpKQ0KYGBgDQoNCmBgYHtyfQ0KZ2dwbG90KGRhdGEgPSBkZikgKw0KICBnZW9tX2JhcihtYXBwaW5nID0gYWVzKHggPSBkZiRlbnJvbGxtZW50X3N5XzIwMTRfMjAxNSwgY29sb3IgPSBvZmZlcmluZywgbmEucm0gPSBUUlVFKSkNCmBgYA0KDQpgYGB7cn0NCmdncGxvdChkYXRhID0gZGYpICsNCiAgZ2VvbV9wb2ludChtYXBwaW5nID0gYWVzKHggPSBkZiRlbnJvbGxtZW50X3N5XzIwMTRfMjAxNSwgZGYkbW9vZV9pbl9waHBfZm9yX2Z5XzIwMTUsIGNvbG9yID0gb2ZmZXJpbmcpKSsNCiAgeGxhYigiTnVtYmVyIG9mIFN0dWRlbnRzIEVucm9sbGVkIikgKw0KICB5bGFiKCJCdWRnZXQgQWxsb2NhdGVkIikNCmBgYA0KDQpgYGB7cn0NCiMgZGlzcGxheSBvbmx5IGEgcmVnaW9uIA0KZGYxIDwtIGRmICU+JSBmaWx0ZXIoZGYkcmVnaW9uID09ICJDQVIiKQ0KZ2dwbG90KGRhdGEgPSBkZjEpICsNCiAgZ2VvbV9ib3hwbG90KG1hcHBpbmcgPSBhZXMoZGYxJHByb3ZpbmNlLCBkZjEkbW9vZV9pbl9waHBfZm9yX2Z5XzIwMTUsIGNvbG9yID0gb2ZmZXJpbmcpKSsNCiAgeGxhYigiUHJvdmluY2VzIikgKw0KICB5bGFiKCJCdWRnZXQiKQ0KYGBgDQpgYGB7cn0NCmdncGxvdChkYXRhID0gZGYpICsNCiAgZ2VvbV9oaXN0b2dyYW0obWFwcGluZyA9IGFlcyh4ID0gZGYkZW5yb2xsbWVudF9zeV8yMDE0XzIwMTUpLCBiaW53aWR0aCA9IDUwMCkgKw0KICB4bGFiKCJFbnJvbGxtZW50IGZpZ3VyZXMiKQ0KYGBgDQoNCg0K