HW 5

library(readxl)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)

district<-read_excel("district.xls")

numeric_clean_district_data<-district |> dplyr::select(where(is.numeric)) |> drop_na()

head(numeric_clean_district_data)

## # A tibble: 6 × 128
##   DZCAMPUS DPETALLC DPETBLAP DPETHISP DPETWHIP DPETINDP DPETASIP DPETPCIP
##      <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1        6     3360     25.1     42.9     27.3      0.2      0.7      0.1
## 2        5     2799      7.2     27.9     60.6      0.3      1        0.1
## 3       17     7318     28.7     43.1     24        0.1      1.1      0.1
## 4        5     1612      2.4      6.6     87        0.3      0.1      0.2
## 5        4     3005      1.3     44.1     49.6      0.3      2        0.1
## 6        6     3374      0.7     69.6     27.6      0.4      0.5      0.1
## # ℹ 120 more variables: DPETTWOP <dbl>, DPETECOP <dbl>, DPETLEPP <dbl>,
## #   DPETSPEP <dbl>, DPETBILP <dbl>, DPETVOCP <dbl>, DPETGIFP <dbl>,
## #   DA0AT21R <dbl>, DA0912DR21R <dbl>, DAGC4X21R <dbl>, DAGC5X20R <dbl>,
## #   DAGC6X19R <dbl>, DA0GR21N <dbl>, DA0GS21N <dbl>, DDA00A001S22R <dbl>,
## #   DDA00A001222R <dbl>, DDA00A001322R <dbl>, DDA00AR01S22R <dbl>,
## #   DDA00AR01222R <dbl>, DDA00AR01322R <dbl>, DDA00AM01S22R <dbl>,
## #   DDA00AM01222R <dbl>, DDA00AM01322R <dbl>, DDA00AC01S22R <dbl>, …

cor(numeric_clean_district_data$DPSTBLFP,numeric_clean_district_data$DPSTKIDR)

## [1] 0.1782736

pairs(~DPSTBLFP+DPETBLAP+DPSTKIDR, data=numeric_clean_district_data)

cor.test(numeric_clean_district_data$DPSTBLFP,numeric_clean_district_data$DPETBLAP, method="kendall")

## 
##  Kendall's rank correlation tau
## 
## data:  numeric_clean_district_data$DPSTBLFP and numeric_clean_district_data$DPETBLAP
## z = 18.04, p-value < 2.2e-16
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
##       tau 
## 0.6778578

Kendall’s Tau of 0.6778578 shows a strong positive relationship between the two variables of DPSTBLFP (percent of African American teachers) and DPETBLAP (percent of African American students) with a very low p-value of 2.2e-16 which also makes the correlation statistically significant. I chose this correlation method because it seems like a catch all for any irregular or atypical data (small sample size, not normal, etc.)

HW 5

Alexis Garay

2025-10-22