knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, results = "markup")

# Install packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl) # for importing excel files

Import data

# excel file
data <- read_excel("../00_data/myData NH Public Schools.xlsx")
data
## # A tibble: 492 × 10
##    CITY          STATE COUNTY AREA  SCHOOL_LEVEL LEVEL_AGE_POPULATION ENROLLMENT
##    <chr>         <chr> <chr>  <chr> <chr>                       <dbl>      <dbl>
##  1 ALTON         NH    BELKN… R     High                          571        526
##  2 ALTON         NH    BELKN… R     Primary                       582        538
##  3 BELMONT       NH    BELKN… R     High                          484        451
##  4 BELMONT       NH    BELKN… R     Middle                        440        406
##  5 BELMONT       NH    BELKN… R     Primary                       456        428
##  6 CTR. BARNSTE… NH    BELKN… R     Primary                       562        512
##  7 GILFORD       NH    BELKN… R     Middle                        369        337
##  8 GILFORD       NH    BELKN… R     Primary                       405        376
##  9 GILFORD       NH    BELKN… R     High                          588        540
## 10 GILMANTON IW  NH    BELKN… R     Primary                       439        407
## # ℹ 482 more rows
## # ℹ 3 more variables: START_GRADE <chr>, END_GRADE <chr>, NOT_ENROLLED <dbl>

State one question

Do rural areas have more school-aged youth than urban areas?

Plot data

ggplot(data = data) + 
  geom_point(alpha = 0.3, mapping = aes(x = ENROLLMENT, y = SCHOOL_LEVEL, color = AREA))

Interpret

Evidence shows that there are a higher number of school-aged youth in urban areas than rural areas?