Prepare

This script starts with the New_Mexico_SGP_LONG_Data_2024.RData object generated with the SGP package.

#clear the global environment
rm(list = ls())

Load the necessary packages

#load data object
load("Data/New_Mexico_SGP_LONG_Data_2024.RData")

data <- get("New_Mexico_SGP_LONG_Data_2024")

It then runs a sequence of functions to group and summarize by the subgroups required:

Reporting Requirements

Each file that reports on student subgroups should include the following groups:

  • 1.All

  • 2.Female

  • 3.Male

  • 4.Hispanic

  • 5.White

  • 6.Black

  • 7.Asian

  • 8.Native

  • 9.Multirace

  • 10.FRL

  • 11.NotFRL

  • 12.DirecrCert

  • 13.NotDirectCert

  • 14.SwD

  • 15.NotSwD

  • 16.EL

  • 17.NotEL

  • 18.Migrant

  • 19.Homeless

  • 20.Military

  • 21.Foster

  • 22.AnyNative

  • 23.AnyBlack

All indicators for which data is available must include:

  • Statewide numerators, denominators, and rates.

  • District-level numerators, denominators, and rates.

  • School-level numerators, denominators, and rates.

Content areas include: - ELA - MATHEMATICS

Start by examining the grouping variables.

View all variables

names(data)
##  [1] "VALID_CASE"                                                   
##  [2] "CONTENT_AREA"                                                 
##  [3] "YEAR"                                                         
##  [4] "ID"                                                           
##  [5] "GRADE_ENROLLED"                                               
##  [6] "SCALE_SCORE"                                                  
##  [7] "ACHIEVEMENT_LEVEL"                                            
##  [8] "GRADE"                                                        
##  [9] "SCHOOL_NUMBER"                                                
## [10] "AccountabilityModel"                                          
## [11] "ExclAccountabiltyReason"                                      
## [12] "Test"                                                         
## [13] "Hispanic.4"                                                   
## [14] "White.5"                                                      
## [15] "Black.6"                                                      
## [16] "Asian.7"                                                      
## [17] "Native.8"                                                     
## [18] "Multirace.9"                                                  
## [19] "FRL.10"                                                       
## [20] "NotFRL.11"                                                    
## [21] "DirectCert.12"                                                
## [22] "NotDirectCert.13"                                             
## [23] "SwD.14"                                                       
## [24] "NotSwD.15"                                                    
## [25] "EL.16"                                                        
## [26] "NotEL.17"                                                     
## [27] "Migrant.18"                                                   
## [28] "Homeless.19"                                                  
## [29] "Military.20"                                                  
## [30] "Any_Native.22"                                                
## [31] "Any_Black.23"                                                 
## [32] "Foster.21"                                                    
## [33] "GENDER"                                                       
## [34] "SGP"                                                          
## [35] "SCALE_SCORE_PRIOR"                                            
## [36] "SCALE_SCORE_PRIOR_STANDARDIZED"                               
## [37] "SGP_LEVEL"                                                    
## [38] "SGP_NORM_GROUP"                                               
## [39] "SGP_TARGET_3_YEAR_CURRENT"                                    
## [40] "SGP_TARGET_3_YEAR_CURRENT_NUM_YEARS_TO_TARGET"                
## [41] "SGP_PROJECTION_NOTE"                                          
## [42] "CATCH_UP_KEEP_UP_STATUS_INITIAL_CURRENT"                      
## [43] "SGP_PROJECTION_GROUP"                                         
## [44] "SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR_CURRENT"                    
## [45] "SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR_CURRENT_NUM_YEARS_TO_TARGET"
## [46] "MOVE_UP_STAY_UP_STATUS_INITIAL_CURRENT"                       
## [47] "SGP_TARGET_3_YEAR"                                            
## [48] "SGP_TARGET_3_YEAR_NUM_YEARS_TO_TARGET"                        
## [49] "ACHIEVEMENT_LEVEL_PRIOR"                                      
## [50] "SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR"                            
## [51] "SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR_NUM_YEARS_TO_TARGET"        
## [52] "CATCH_UP_KEEP_UP_STATUS_3_YEAR"                               
## [53] "MOVE_UP_STAY_UP_STATUS_3_YEAR"

Count NAs. This list will point to demographic variables with missing data to fill in.

# Calculate the sum of NAs for each variable and filter out variables with no NAs
na_summary <- sapply(data, function(x) sum(is.na(x)))

# Filter to keep only variables where NA count is greater than 0
na_summary <- na_summary[na_summary > 0]

# Display the resulting filtered dataframe
print(na_summary)
##                                                 SCHOOL_NUMBER 
##                                                         13471 
##                                                    Hispanic.4 
##                                                         21519 
##                                                       White.5 
##                                                          1711 
##                                                       Black.6 
##                                                          1711 
##                                                       Asian.7 
##                                                          1711 
##                                                      Native.8 
##                                                          1711 
##                                                   Multirace.9 
##                                                          1711 
##                                                        FRL.10 
##                                                         33567 
##                                                     NotFRL.11 
##                                                         33567 
##                                                 DirectCert.12 
##                                                          1711 
##                                              NotDirectCert.13 
##                                                          1711 
##                                                        SwD.14 
##                                                          1711 
##                                                     NotSwD.15 
##                                                          1711 
##                                                         EL.16 
##                                                         68022 
##                                                      NotEL.17 
##                                                         68022 
##                                                    Migrant.18 
##                                                          1711 
##                                                   Homeless.19 
##                                                          1711 
##                                                   Military.20 
##                                                          1711 
##                                                 Any_Native.22 
##                                                        263527 
##                                                  Any_Black.23 
##                                                        263527 
##                                                     Foster.21 
##                                                        263527 
##                                                        GENDER 
##                                                          1943 
##                                                           SGP 
##                                                         57442 
##                                             SCALE_SCORE_PRIOR 
##                                                         57442 
##                                SCALE_SCORE_PRIOR_STANDARDIZED 
##                                                         57442 
##                                                     SGP_LEVEL 
##                                                         57442 
##                                                SGP_NORM_GROUP 
##                                                         57442 
##                                     SGP_TARGET_3_YEAR_CURRENT 
##                                                         45346 
##                 SGP_TARGET_3_YEAR_CURRENT_NUM_YEARS_TO_TARGET 
##                                                         45346 
##                                           SGP_PROJECTION_NOTE 
##                                                        216721 
##                       CATCH_UP_KEEP_UP_STATUS_INITIAL_CURRENT 
##                                                         45346 
##                                          SGP_PROJECTION_GROUP 
##                                                          3284 
##                     SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR_CURRENT 
##                                                        193024 
## SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR_CURRENT_NUM_YEARS_TO_TARGET 
##                                                        193024 
##                        MOVE_UP_STAY_UP_STATUS_INITIAL_CURRENT 
##                                                        193024 
##                                             SGP_TARGET_3_YEAR 
##                                                         57442 
##                         SGP_TARGET_3_YEAR_NUM_YEARS_TO_TARGET 
##                                                         57442 
##                                       ACHIEVEMENT_LEVEL_PRIOR 
##                                                         57442 
##                             SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR 
##                                                        196011 
##         SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR_NUM_YEARS_TO_TARGET 
##                                                        196011 
##                                CATCH_UP_KEEP_UP_STATUS_3_YEAR 
##                                                         57442 
##                                 MOVE_UP_STAY_UP_STATUS_3_YEAR 
##                                                        196011

Merge final AVE demographics

Replace the following with values matching ID and StID from AVE:

                                               Hispanic.4 
                                                    18050 
                                                   FRL.10 
                                                    28766 
                                                NotFRL.11 
                                                    28766 
                                                    EL.16 
                                                    64108 
                                                 NotEL.17 
                                                    64108 
                                            Any_Native.22 
                                                   250056 
                                             Any_Black.23 
                                                   250056 
                                                Foster.21 
                                                   250056 
                                                   GENDER 
                                                       71 
                                                       
ave24 <- read_csv("SY 2023-24, AVE, All Demographic and Attendance Records, V20240918 DW.csv")
## Rows: 346573 Columns: 49
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (8): SchoolYear, District, School, NonAccountableReason, StudentLastNam...
## dbl (41): DistCode, SchNumb, StID, Grade, SnapsPresent2, SnapsPresent3, Snap...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Update demographic vars in data with matching values from ave24 
data$Hispanic.4 <- ave24$Hispanic.4[match(data$ID, ave24$StID)]
table(data$Hispanic.4, useNA="always")
## 
##      0      1   <NA> 
##  92408 149554  21565
data$FRL.10 <- ave24$FRL.10[match(data$ID, ave24$StID)]
table(data$FRL.10, useNA="always")
## 
##      0      1   <NA> 
##  21907 208053  33567
data$EL.16 <- ave24$EL.16[match(data$ID, ave24$StID)]
table(data$EL.16, useNA="always")
## 
##      0      1   <NA> 
## 142483  53197  67847
data$Any_Native.22 <- ave24$AnyNative.22[match(data$ID, ave24$StID)]
table(data$Any_Native.22, useNA="always")
## 
##      0      1   <NA> 
## 226824  34998   1705
data$Any_Black.23 <- ave24$AnyBlack.23[match(data$ID, ave24$StID)]
table(data$Any_Black.23, useNA="always")
## 
##      0      1   <NA> 
## 249258  12564   1705
data$Female.2 <- ave24$Female.2[match(data$ID, ave24$StID)]
table(data$Female.2, useNA="always")
## 
##      0      1   <NA> 
## 133126 128410   1991
data$Male.3 <- ave24$Male.3[match(data$ID, ave24$StID)]
table(data$Male.3, useNA="always")
## 
##      0      1   <NA> 
## 128410 133126   1991

Add a district and statewide code

#drop records with missing SCHOOL_NUMBER
data <- data[!is.na(data$SCHOOL_NUMBER),]

#add a district code
data$DISTCODE <- floor(data$SCHOOL_NUMBER/1000)

#add a statewide code
data$STATEWIDE <- 999999

#move these columns in front of SCHOOL_NUMBER
data <- data %>%
  relocate(STATEWIDE, DISTCODE, .after = "GRADE")

Transform GENDER into Female & Male dichotomous variables

data$Female.2 <- ifelse(data$GENDER == "Female", 1, 0)
data$Male.3 <- ifelse(data$GENDER == "Male", 1, 0)

data <- data %>%
  relocate(Female.2, Male.3, .after = Test)

#examine var counts
table(data$GENDER, data$Female.2, useNA = "always")
##         
##               0      1   <NA>
##   Female      0 122594      0
##   Male   127391      0      0
##   <NA>        0      0     71
table(data$GENDER, data$AccountabilityModel, useNA = "always")
##         
##               1   <NA>
##   Female 122594      0
##   Male   127391      0
##   <NA>       71      0

Aggregate by level, by group, by content area

#Create a list of grouping variables to use at all levels

grouping_vars <- names(data[, 15:36])

#a function to call to summarize at level

summarize_data <- function(group_var, level_var) {
  summary_df <- data %>%
    group_by(.data[[group_var]], .data[[level_var]], CONTENT_AREA) %>%  # Group by both group_var and level_var
    summarize(
      median_SGP = median(SGP, na.rm = TRUE),
      count = n(),
      .groups = 'drop'
    ) %>%
    mutate(grouping_value = .data[[group_var]],  # Store the primary grouping value
           level_value = .data[[level_var]],     # Store the level variable value
           group = group_var,                   # Store the grouping variable name
           level = level_var,                  # Store the level variable name
           content = CONTENT_AREA) %>%        # Store the content area
    select(level_value, grouping_value, group, content, count, median_SGP)  

  return(summary_df)
}

# Name of level variable is passed as level_var
statewide_growth <- do.call(
  rbind, 
  lapply(grouping_vars, function(var) summarize_data(var, "STATEWIDE")))

district_growth <- do.call(
  rbind, 
  lapply(grouping_vars, function(var) summarize_data(var, "DISTCODE")))

school_growth <- do.call(
  rbind, 
  lapply(grouping_vars, function(var) summarize_data(var, "SCHOOL_NUMBER")))


#rbind all three levels
growth <- rbind(statewide_growth, district_growth, school_growth)

growth <- growth %>%
  arrange(level_value)

growth$group <- ifelse(!is.na(growth$grouping_value), 
                       ifelse(growth$grouping_value == 0, 
                              paste0(growth$group, " Not"),
                              growth$group),
                       paste0(growth$group, " NA")
)

head(growth)
## # A tibble: 6 × 6
##   level_value grouping_value group        content     count median_SGP
##         <dbl> <chr>          <chr>        <chr>       <int>      <dbl>
## 1           1 0              Female.2 Not ELA         15073         46
## 2           1 0              Female.2 Not MATHEMATICS 15080         49
## 3           1 1              Female.2     ELA         14285         50
## 4           1 1              Female.2     MATHEMATICS 14297         49
## 5           1 <NA>           Female.2 NA  ELA            13         52
## 6           1 <NA>           Female.2 NA  MATHEMATICS    13         49
tail(growth)
## # A tibble: 6 × 6
##   level_value grouping_value group            content      count median_SGP
##         <dbl> <chr>          <chr>            <chr>        <int>      <dbl>
## 1      999999 0              Any_Black.23 Not ELA         119047         50
## 2      999999 0              Any_Black.23 Not MATHEMATICS 119038         50
## 3      999999 1              Any_Black.23     ELA           5980         49
## 4      999999 1              Any_Black.23     MATHEMATICS   5991         49
## 5      999999 <NA>           Foster.21 NA     ELA         125027         50
## 6      999999 <NA>           Foster.21 NA     MATHEMATICS 125029         50

Write as .csv

write.csv(growth, "AMD SY 2023-24, SGP All Entities, 20240924 SH.csv", row.names = FALSE)