This script starts with the New_Mexico_SGP_LONG_Data_2024.RData object generated with the SGP package.
#clear the global environment
rm(list = ls())
#load data object
load("Data/New_Mexico_SGP_LONG_Data_2024.RData")
data <- get("New_Mexico_SGP_LONG_Data_2024")
It then runs a sequence of functions to group and summarize by the subgroups required:
1.All
2.Female
3.Male
4.Hispanic
5.White
6.Black
7.Asian
8.Native
9.Multirace
10.FRL
11.NotFRL
12.DirecrCert
13.NotDirectCert
14.SwD
15.NotSwD
16.EL
17.NotEL
18.Migrant
19.Homeless
20.Military
21.Foster
22.AnyNative
23.AnyBlack
Statewide numerators, denominators, and rates.
District-level numerators, denominators, and rates.
School-level numerators, denominators, and rates.
View all variables
names(data)
## [1] "VALID_CASE"
## [2] "CONTENT_AREA"
## [3] "YEAR"
## [4] "ID"
## [5] "GRADE_ENROLLED"
## [6] "SCALE_SCORE"
## [7] "ACHIEVEMENT_LEVEL"
## [8] "GRADE"
## [9] "SCHOOL_NUMBER"
## [10] "AccountabilityModel"
## [11] "ExclAccountabiltyReason"
## [12] "Test"
## [13] "Hispanic.4"
## [14] "White.5"
## [15] "Black.6"
## [16] "Asian.7"
## [17] "Native.8"
## [18] "Multirace.9"
## [19] "FRL.10"
## [20] "NotFRL.11"
## [21] "DirectCert.12"
## [22] "NotDirectCert.13"
## [23] "SwD.14"
## [24] "NotSwD.15"
## [25] "EL.16"
## [26] "NotEL.17"
## [27] "Migrant.18"
## [28] "Homeless.19"
## [29] "Military.20"
## [30] "Any_Native.22"
## [31] "Any_Black.23"
## [32] "Foster.21"
## [33] "GENDER"
## [34] "SGP"
## [35] "SCALE_SCORE_PRIOR"
## [36] "SCALE_SCORE_PRIOR_STANDARDIZED"
## [37] "SGP_LEVEL"
## [38] "SGP_NORM_GROUP"
## [39] "SGP_TARGET_3_YEAR_CURRENT"
## [40] "SGP_TARGET_3_YEAR_CURRENT_NUM_YEARS_TO_TARGET"
## [41] "SGP_PROJECTION_NOTE"
## [42] "CATCH_UP_KEEP_UP_STATUS_INITIAL_CURRENT"
## [43] "SGP_PROJECTION_GROUP"
## [44] "SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR_CURRENT"
## [45] "SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR_CURRENT_NUM_YEARS_TO_TARGET"
## [46] "MOVE_UP_STAY_UP_STATUS_INITIAL_CURRENT"
## [47] "SGP_TARGET_3_YEAR"
## [48] "SGP_TARGET_3_YEAR_NUM_YEARS_TO_TARGET"
## [49] "ACHIEVEMENT_LEVEL_PRIOR"
## [50] "SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR"
## [51] "SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR_NUM_YEARS_TO_TARGET"
## [52] "CATCH_UP_KEEP_UP_STATUS_3_YEAR"
## [53] "MOVE_UP_STAY_UP_STATUS_3_YEAR"
Count NAs. This list will point to demographic variables with missing data to fill in.
# Calculate the sum of NAs for each variable and filter out variables with no NAs
na_summary <- sapply(data, function(x) sum(is.na(x)))
# Filter to keep only variables where NA count is greater than 0
na_summary <- na_summary[na_summary > 0]
# Display the resulting filtered dataframe
print(na_summary)
## SCHOOL_NUMBER
## 13471
## Hispanic.4
## 21519
## White.5
## 1711
## Black.6
## 1711
## Asian.7
## 1711
## Native.8
## 1711
## Multirace.9
## 1711
## FRL.10
## 33567
## NotFRL.11
## 33567
## DirectCert.12
## 1711
## NotDirectCert.13
## 1711
## SwD.14
## 1711
## NotSwD.15
## 1711
## EL.16
## 68022
## NotEL.17
## 68022
## Migrant.18
## 1711
## Homeless.19
## 1711
## Military.20
## 1711
## Any_Native.22
## 263527
## Any_Black.23
## 263527
## Foster.21
## 263527
## GENDER
## 1943
## SGP
## 57442
## SCALE_SCORE_PRIOR
## 57442
## SCALE_SCORE_PRIOR_STANDARDIZED
## 57442
## SGP_LEVEL
## 57442
## SGP_NORM_GROUP
## 57442
## SGP_TARGET_3_YEAR_CURRENT
## 45346
## SGP_TARGET_3_YEAR_CURRENT_NUM_YEARS_TO_TARGET
## 45346
## SGP_PROJECTION_NOTE
## 216721
## CATCH_UP_KEEP_UP_STATUS_INITIAL_CURRENT
## 45346
## SGP_PROJECTION_GROUP
## 3284
## SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR_CURRENT
## 193024
## SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR_CURRENT_NUM_YEARS_TO_TARGET
## 193024
## MOVE_UP_STAY_UP_STATUS_INITIAL_CURRENT
## 193024
## SGP_TARGET_3_YEAR
## 57442
## SGP_TARGET_3_YEAR_NUM_YEARS_TO_TARGET
## 57442
## ACHIEVEMENT_LEVEL_PRIOR
## 57442
## SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR
## 196011
## SGP_TARGET_MOVE_UP_STAY_UP_3_YEAR_NUM_YEARS_TO_TARGET
## 196011
## CATCH_UP_KEEP_UP_STATUS_3_YEAR
## 57442
## MOVE_UP_STAY_UP_STATUS_3_YEAR
## 196011
Replace the following with values matching ID and StID from AVE:
Hispanic.4
18050
FRL.10
28766
NotFRL.11
28766
EL.16
64108
NotEL.17
64108
Any_Native.22
250056
Any_Black.23
250056
Foster.21
250056
GENDER
71
ave24 <- read_csv("SY 2023-24, AVE, All Demographic and Attendance Records, V20240918 DW.csv")
## Rows: 346573 Columns: 49
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): SchoolYear, District, School, NonAccountableReason, StudentLastNam...
## dbl (41): DistCode, SchNumb, StID, Grade, SnapsPresent2, SnapsPresent3, Snap...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Update demographic vars in data with matching values from ave24
data$Hispanic.4 <- ave24$Hispanic.4[match(data$ID, ave24$StID)]
table(data$Hispanic.4, useNA="always")
##
## 0 1 <NA>
## 92408 149554 21565
data$FRL.10 <- ave24$FRL.10[match(data$ID, ave24$StID)]
table(data$FRL.10, useNA="always")
##
## 0 1 <NA>
## 21907 208053 33567
data$EL.16 <- ave24$EL.16[match(data$ID, ave24$StID)]
table(data$EL.16, useNA="always")
##
## 0 1 <NA>
## 142483 53197 67847
data$Any_Native.22 <- ave24$AnyNative.22[match(data$ID, ave24$StID)]
table(data$Any_Native.22, useNA="always")
##
## 0 1 <NA>
## 226824 34998 1705
data$Any_Black.23 <- ave24$AnyBlack.23[match(data$ID, ave24$StID)]
table(data$Any_Black.23, useNA="always")
##
## 0 1 <NA>
## 249258 12564 1705
data$Female.2 <- ave24$Female.2[match(data$ID, ave24$StID)]
table(data$Female.2, useNA="always")
##
## 0 1 <NA>
## 133126 128410 1991
data$Male.3 <- ave24$Male.3[match(data$ID, ave24$StID)]
table(data$Male.3, useNA="always")
##
## 0 1 <NA>
## 128410 133126 1991
#drop records with missing SCHOOL_NUMBER
data <- data[!is.na(data$SCHOOL_NUMBER),]
#add a district code
data$DISTCODE <- floor(data$SCHOOL_NUMBER/1000)
#add a statewide code
data$STATEWIDE <- 999999
#move these columns in front of SCHOOL_NUMBER
data <- data %>%
relocate(STATEWIDE, DISTCODE, .after = "GRADE")
data$Female.2 <- ifelse(data$GENDER == "Female", 1, 0)
data$Male.3 <- ifelse(data$GENDER == "Male", 1, 0)
data <- data %>%
relocate(Female.2, Male.3, .after = Test)
#examine var counts
table(data$GENDER, data$Female.2, useNA = "always")
##
## 0 1 <NA>
## Female 0 122594 0
## Male 127391 0 0
## <NA> 0 0 71
table(data$GENDER, data$AccountabilityModel, useNA = "always")
##
## 1 <NA>
## Female 122594 0
## Male 127391 0
## <NA> 71 0
#Create a list of grouping variables to use at all levels
grouping_vars <- names(data[, 15:36])
#a function to call to summarize at level
summarize_data <- function(group_var, level_var) {
summary_df <- data %>%
group_by(.data[[group_var]], .data[[level_var]], CONTENT_AREA) %>% # Group by both group_var and level_var
summarize(
median_SGP = median(SGP, na.rm = TRUE),
count = n(),
.groups = 'drop'
) %>%
mutate(grouping_value = .data[[group_var]], # Store the primary grouping value
level_value = .data[[level_var]], # Store the level variable value
group = group_var, # Store the grouping variable name
level = level_var, # Store the level variable name
content = CONTENT_AREA) %>% # Store the content area
select(level_value, grouping_value, group, content, count, median_SGP)
return(summary_df)
}
# Name of level variable is passed as level_var
statewide_growth <- do.call(
rbind,
lapply(grouping_vars, function(var) summarize_data(var, "STATEWIDE")))
district_growth <- do.call(
rbind,
lapply(grouping_vars, function(var) summarize_data(var, "DISTCODE")))
school_growth <- do.call(
rbind,
lapply(grouping_vars, function(var) summarize_data(var, "SCHOOL_NUMBER")))
#rbind all three levels
growth <- rbind(statewide_growth, district_growth, school_growth)
growth <- growth %>%
arrange(level_value)
growth$group <- ifelse(!is.na(growth$grouping_value),
ifelse(growth$grouping_value == 0,
paste0(growth$group, " Not"),
growth$group),
paste0(growth$group, " NA")
)
head(growth)
## # A tibble: 6 × 6
## level_value grouping_value group content count median_SGP
## <dbl> <chr> <chr> <chr> <int> <dbl>
## 1 1 0 Female.2 Not ELA 15073 46
## 2 1 0 Female.2 Not MATHEMATICS 15080 49
## 3 1 1 Female.2 ELA 14285 50
## 4 1 1 Female.2 MATHEMATICS 14297 49
## 5 1 <NA> Female.2 NA ELA 13 52
## 6 1 <NA> Female.2 NA MATHEMATICS 13 49
tail(growth)
## # A tibble: 6 × 6
## level_value grouping_value group content count median_SGP
## <dbl> <chr> <chr> <chr> <int> <dbl>
## 1 999999 0 Any_Black.23 Not ELA 119047 50
## 2 999999 0 Any_Black.23 Not MATHEMATICS 119038 50
## 3 999999 1 Any_Black.23 ELA 5980 49
## 4 999999 1 Any_Black.23 MATHEMATICS 5991 49
## 5 999999 <NA> Foster.21 NA ELA 125027 50
## 6 999999 <NA> Foster.21 NA MATHEMATICS 125029 50
Write as .csv
write.csv(growth, "AMD SY 2023-24, SGP All Entities, 20240924 SH.csv", row.names = FALSE)