library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
setwd("C:/Users/Administrator/Desktop/Graduate School/Applied Quant Methods/My Class Stuff/Week 3 Class")
district<-read_excel("district.xls")
Question 2
three_variable_district_data <- district |> select(DISTNAME, DPETSPEP, DPFPASPEP)
Question 3
two_variable_district_data <- district |> select(DPETSPEP, DPFPASPEP)
summary(two_variable_district_data)
## DPETSPEP DPFPASPEP
## Min. : 0.00 Min. : 0.000
## 1st Qu.: 9.90 1st Qu.: 5.800
## Median :12.10 Median : 8.900
## Mean :12.27 Mean : 9.711
## 3rd Qu.:14.20 3rd Qu.:12.500
## Max. :51.70 Max. :49.000
## NA's :5
Question 4 The variable DPFPASPEP has 5 missing values
Question 5 1202 observations are left after dropping the NAs
two_variable_district_data |> drop_na()
## # A tibble: 1,202 × 2
## DPETSPEP DPFPASPEP
## <dbl> <dbl>
## 1 14.6 28.9
## 2 12.1 8.8
## 3 13.1 8.4
## 4 10.5 10.1
## 5 13.5 6.1
## 6 14.5 9.4
## 7 14.7 9.9
## 8 10.4 10.9
## 9 11.6 9.2
## 10 11.9 10.3
## # ℹ 1,192 more rows
compare_two <-two_variable_district_data |> drop_na()
Question 6
ggplot(compare_two, aes(x=DPETSPEP, y= DPFPASPEP)) +geom_point()
Question 7
cor(compare_two$DPETSPEP, compare_two$DPFPASPEP)
## [1] 0.3700234
Question 8 There is a somewhat significant positive correlation between DPFPASPEP (money spent on special education) and DPETSPEP (percent special education). The higher the percent of special education the more money is spent, though again it is a slight positive correlation.