library(readxl)
district<-read_excel("district.xls")
pastecs::stat.desc("district.xls")
## nbr.val nbr.null nbr.na min max range sum median
## NA NA NA NA NA NA NA NA
## mean SE.mean CI.mean var std.dev coef.var
## NA NA NA NA NA NA
library(tidyr)
library(dplyr)
clean_district_data<-district |> drop_na()
print(clean_district_data)
## # A tibble: 323 × 137
## DISTNAME DISTRICT DZCNTYNM REGION DZRATING DZCAMPUS DPETALLC DPETBLAP
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 PALESTINE ISD 001907 001 AND… 07 B 6 3360 25.1
## 2 HUDSON ISD 003902 003 ANG… 07 A 5 2799 7.2
## 3 LUFKIN ISD 003903 003 ANG… 07 B 17 7318 28.7
## 4 HUNTINGTON ISD 003904 003 ANG… 07 B 5 1612 2.4
## 5 ARANSAS COUNTY … 004901 004 ARA… 02 B 4 3005 1.3
## 6 PLEASANTON ISD 007905 007 ATA… 20 B 6 3374 0.7
## 7 BANDERA ISD 010902 010 BAN… 20 Not Rat… 4 2301 0.2
## 8 BASTROP ISD 011901 011 BAS… 13 C 14 11947 3
## 9 ELGIN ISD 011902 011 BAS… 13 C 8 4985 9
## 10 SMITHVILLE ISD 011904 011 BAS… 13 B 4 1780 6.9
## # ℹ 313 more rows
## # ℹ 129 more variables: DPETHISP <dbl>, DPETWHIP <dbl>, DPETINDP <dbl>,
## # DPETASIP <dbl>, DPETPCIP <dbl>, DPETTWOP <dbl>, DPETECOP <dbl>,
## # DPETLEPP <dbl>, DPETSPEP <dbl>, DPETBILP <dbl>, DPETVOCP <dbl>,
## # DPETGIFP <dbl>, DA0AT21R <dbl>, DA0912DR21R <dbl>, DAGC4X21R <dbl>,
## # DAGC5X20R <dbl>, DAGC6X19R <dbl>, DA0GR21N <dbl>, DA0GS21N <dbl>,
## # DDA00A001S22R <dbl>, DDA00A001222R <dbl>, DDA00A001322R <dbl>, …
pastecs::stat.desc(clean_district_data$DPSTBLFP)
## nbr.val nbr.null nbr.na min max range
## 323.0000000 20.0000000 0.0000000 0.0000000 80.2000000 80.2000000
## sum median mean SE.mean CI.mean.0.95 var
## 2580.4000000 3.9000000 7.9888545 0.6178283 1.2154898 123.2929189
## std.dev coef.var
## 11.1037345 1.3899032
#This variable is measuring the % of African American teachers within a given district. I will use this as my dependent variable to see if it has a positive effect on African American students’ STAAR test scores.
hist(clean_district_data$DPSTBLFP)
clean_district_data_sqrt<-clean_district_data |> mutate(DPSTBLFP_SQRT=sqrt(DPSTBLFP)) |> select(DPSTBLFP,DPSTBLFP_SQRT)
head(clean_district_data_sqrt)
## # A tibble: 6 × 2
## DPSTBLFP DPSTBLFP_SQRT
## <dbl> <dbl>
## 1 9.6 3.10
## 2 0.5 0.707
## 3 12.1 3.48
## 4 0 0
## 5 1.4 1.18
## 6 1.2 1.10
hist(clean_district_data_sqrt$DPSTBLFP_SQRT)