# declare libraries to use
library(readr)
library(tidyverse)
library(htmlwidgets)
library(ggplot2)
library(leaflet)
library(htmlwidgets)
library(DT)
library(plotly)
# read DepEd School 2015 data for knowledge Exchange Conference
# note the use of read_csv instead of read.csv (10x faster)
df <- read_csv("g://Downloads//deped_school_location_with_enrolment_2014_2015.csv", na = c("", NA))
Parsed with column specification:
cols(
the_geom = col_character(),
offering = col_character(),
region = col_character(),
province = col_character(),
municipality = col_character(),
division = col_character(),
district = col_character(),
enrollment_sy_2014_2015 = col_integer(),
school_name = col_character(),
mooe_in_php_for_fy_2015 = col_integer(),
latitude = col_double(),
longitude = col_double(),
school_id = col_integer(),
cartodb_id = col_integer(),
created_at = col_datetime(format = ""),
updated_at = col_datetime(format = "")
)
Descriptive Statistics
summary(df)
the_geom offering region province
Length:46624 Length:46624 Length:46624 Length:46624
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
municipality division district enrollment_sy_2014_2015
Length:46624 Length:46624 Length:46624 Min. : 0.0
Class :character Class :character Class :character 1st Qu.: 142.0
Mode :character Mode :character Mode :character Median : 249.0
Mean : 451.3
3rd Qu.: 464.0
Max. :12613.0
school_name mooe_in_php_for_fy_2015 latitude longitude
Length:46624 Min. : 0 Min. : 4.706 Min. :117.0
Class :character 1st Qu.: 116000 1st Qu.: 9.003 1st Qu.:121.1
Mode :character Median : 161000 Median :11.552 Median :123.0
Mean : 304660 Mean :11.884 Mean :122.9
3rd Qu.: 292000 3rd Qu.:14.601 3rd Qu.:124.5
Max. :11218000 Max. :20.787 Max. :126.6
NA's :2548 NA's :6111 NA's :6111
school_id cartodb_id created_at
Min. :100001 Min. : 1 Min. :2015-04-16 08:49:16
1st Qu.:111777 1st Qu.:11657 1st Qu.:2015-04-16 08:49:16
Median :123553 Median :23313 Median :2015-04-16 08:49:16
Mean :154430 Mean :23313 Mean :2015-04-16 08:49:16
3rd Qu.:135651 3rd Qu.:34968 3rd Qu.:2015-04-16 08:49:16
Max. :356059 Max. :46624 Max. :2015-04-16 08:49:16
updated_at
Min. :2015-04-16 08:49:16
1st Qu.:2015-04-16 08:49:16
Median :2015-04-16 08:49:16
Mean :2015-04-16 08:49:16
3rd Qu.:2015-04-16 08:49:16
Max. :2015-04-16 08:49:16
df2 <- df %>%
group_by(region , division, offering) %>%
summarise(TotalStudents = sum(enrollment_sy_2014_2015) ,
TotalBudget = sum(mooe_in_php_for_fy_2015) , n = n())
df2
View the Table:
head(df2,10)
Plot on a map
# filter data without lat and long
df3 <- df %>% filter(!is.na(latitude))
# assign plotting arguments
# addTiles command adds in OpenStreetmap by default
pal <- colorQuantile("YlOrRd", NULL, n = 2)
leaflet(width = 900, height = 600) %>%
addCircleMarkers(data = df3, lat = ~as.numeric(latitude), lng = ~as.numeric(longitude),
label = ~paste(df3$school_name,
df3$municipality,
df3$enrollment_sy_2014_2015,
df3$mooe_in_php_for_fy_2015,
sep=", "),
color = ~pal(df3$mooe_in_php_for_fy_2015)) %>%
setView(120.228817,16.033739, zoom = 10) %>%
addTiles() %>%
addMarkers(120.2287961,16.033760, popup = "Sison Auditorium")
Create a Histogram
# Compute the ratio of MOE to students and create a new field
df$ratio <- df$mooe_in_php_for_fy_2015 / df$enrollment_sy_2014_2015
Visualizing the Data
ggplot(data = df) +
geom_bar(mapping = aes(x = df$region, fill = df$offering)) +
xlab("Regions") +
theme(axis.text = element_text(size = 6))
ggplot(data = df) +
geom_bar(mapping = aes(x = df$enrollment_sy_2014_2015, color = offering, na.rm = TRUE))
Ignoring unknown aesthetics: na.rm
ggplot(data = df) +
geom_point(mapping = aes(x = df$enrollment_sy_2014_2015, df$mooe_in_php_for_fy_2015, color = offering))+
xlab("Number of Students Enrolled") +
ylab("Budget Allocated")
# display only a region
df1 <- df %>% filter(df$region == "CAR")
ggplot(data = df1) +
geom_boxplot(mapping = aes(df1$province, df1$mooe_in_php_for_fy_2015, color = offering))+
xlab("Provinces") +
ylab("Budget")
ggplot(data = df) +
geom_histogram(mapping = aes(x = df$enrollment_sy_2014_2015), binwidth = 500) +
xlab("Enrollment figures")