knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, results = "markup")

# Install packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl) # for importing excel files

Import data

# excel file
data <- read_excel("../00_data/myData NH Public Schools.xlsx")
data
## # A tibble: 492 × 10
##    CITY          STATE COUNTY AREA  SCHOOL_LEVEL LEVEL_AGE_POPULATION ENROLLMENT
##    <chr>         <chr> <chr>  <chr> <chr>                       <dbl>      <dbl>
##  1 ALTON         NH    BELKN… R     High                          571        526
##  2 ALTON         NH    BELKN… R     Primary                       582        538
##  3 BELMONT       NH    BELKN… R     High                          484        451
##  4 BELMONT       NH    BELKN… R     Middle                        440        406
##  5 BELMONT       NH    BELKN… R     Primary                       456        428
##  6 CTR. BARNSTE… NH    BELKN… R     Primary                       562        512
##  7 GILFORD       NH    BELKN… R     Middle                        369        337
##  8 GILFORD       NH    BELKN… R     Primary                       405        376
##  9 GILFORD       NH    BELKN… R     High                          588        540
## 10 GILMANTON IW  NH    BELKN… R     Primary                       439        407
## # ℹ 482 more rows
## # ℹ 3 more variables: START_GRADE <chr>, END_GRADE <chr>, NOT_ENROLLED <dbl>

Apply the following dplyr verbs to your data

Filter rows

filter(data, AREA == "R")
## # A tibble: 210 × 10
##    CITY          STATE COUNTY AREA  SCHOOL_LEVEL LEVEL_AGE_POPULATION ENROLLMENT
##    <chr>         <chr> <chr>  <chr> <chr>                       <dbl>      <dbl>
##  1 ALTON         NH    BELKN… R     High                          571        526
##  2 ALTON         NH    BELKN… R     Primary                       582        538
##  3 BELMONT       NH    BELKN… R     High                          484        451
##  4 BELMONT       NH    BELKN… R     Middle                        440        406
##  5 BELMONT       NH    BELKN… R     Primary                       456        428
##  6 CTR. BARNSTE… NH    BELKN… R     Primary                       562        512
##  7 GILFORD       NH    BELKN… R     Middle                        369        337
##  8 GILFORD       NH    BELKN… R     Primary                       405        376
##  9 GILFORD       NH    BELKN… R     High                          588        540
## 10 GILMANTON IW  NH    BELKN… R     Primary                       439        407
## # ℹ 200 more rows
## # ℹ 3 more variables: START_GRADE <chr>, END_GRADE <chr>, NOT_ENROLLED <dbl>

Arrange rows

arrange(data, desc(ENROLLMENT))
## # A tibble: 492 × 10
##    CITY        STATE COUNTY   AREA  SCHOOL_LEVEL LEVEL_AGE_POPULATION ENROLLMENT
##    <chr>       <chr> <chr>    <chr> <chr>                       <dbl>      <dbl>
##  1 DERRY       NH    ROCKING… U     High                         3344       3113
##  2 MANCHESTER  NH    HILLSBO… U     High                         2371       2235
##  3 MANCHESTER  NH    HILLSBO… U     High                         2140       2012
##  4 NASHUA      NH    HILLSBO… U     High                         2087       1953
##  5 NASHUA      NH    HILLSBO… U     High                         1917       1798
##  6 CONCORD     NH    MERRIMA… U     High                         1889       1779
##  7 LONDONDERRY NH    ROCKING… U     High                         1828       1701
##  8 EXETER      NH    ROCKING… U     High                         1815       1697
##  9 KEENE       NH    CHESHIR… R     High                         1690       1558
## 10 SALEM       NH    ROCKING… U     High                         1614       1498
## # ℹ 482 more rows
## # ℹ 3 more variables: START_GRADE <chr>, END_GRADE <chr>, NOT_ENROLLED <dbl>

Select columns

select(data, AREA:ENROLLMENT)
## # A tibble: 492 × 4
##    AREA  SCHOOL_LEVEL LEVEL_AGE_POPULATION ENROLLMENT
##    <chr> <chr>                       <dbl>      <dbl>
##  1 R     High                          571        526
##  2 R     Primary                       582        538
##  3 R     High                          484        451
##  4 R     Middle                        440        406
##  5 R     Primary                       456        428
##  6 R     Primary                       562        512
##  7 R     Middle                        369        337
##  8 R     Primary                       405        376
##  9 R     High                          588        540
## 10 R     Primary                       439        407
## # ℹ 482 more rows

Add columns

mutate(data, loss = LEVEL_AGE_POPULATION - ENROLLMENT) %>%
    
    # Select Area, School Level, Level Age Population, and Enrollment
    select(AREA:ENROLLMENT, loss)
## # A tibble: 492 × 5
##    AREA  SCHOOL_LEVEL LEVEL_AGE_POPULATION ENROLLMENT  loss
##    <chr> <chr>                       <dbl>      <dbl> <dbl>
##  1 R     High                          571        526    45
##  2 R     Primary                       582        538    44
##  3 R     High                          484        451    33
##  4 R     Middle                        440        406    34
##  5 R     Primary                       456        428    28
##  6 R     Primary                       562        512    50
##  7 R     Middle                        369        337    32
##  8 R     Primary                       405        376    29
##  9 R     High                          588        540    48
## 10 R     Primary                       439        407    32
## # ℹ 482 more rows

Summarize by groups

data %>%
    
    # Group by school level
    group_by(SCHOOL_LEVEL) %>%
    
    # Calculate enrollment by school level
    summarise(students = mean(ENROLLMENT))
## # A tibble: 5 × 2
##   SCHOOL_LEVEL students
##   <chr>           <dbl>
## 1 High             701.
## 2 Middle           438.
## 3 Other             12 
## 4 Primary          298.
## 5 Unknown           -2
data %>%
    
    # Group by school level
    group_by(SCHOOL_LEVEL) %>%
    
    # Calculate by school level
    summarise(students = mean(ENROLLMENT)) %>%
    
    ungroup()
## # A tibble: 5 × 2
##   SCHOOL_LEVEL students
##   <chr>           <dbl>
## 1 High             701.
## 2 Middle           438.
## 3 Other             12 
## 4 Primary          298.
## 5 Unknown           -2