knitr::opts_chunk$set(echo = TRUE)
theURL <- "https://vincentarelbundock.github.io/Rdatasets/csv/AER/CASchools.csv"
ca_test_scores <- read.table(file=theURL, header=TRUE, sep=",")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
summary(ca_test_scores)
## X district school county
## Min. : 1.0 Min. :61382 Length:420 Length:420
## 1st Qu.:105.8 1st Qu.:64308 Class :character Class :character
## Median :210.5 Median :67760 Mode :character Mode :character
## Mean :210.5 Mean :67473
## 3rd Qu.:315.2 3rd Qu.:70419
## Max. :420.0 Max. :75440
## grades students teachers calworks
## Length:420 Min. : 81.0 Min. : 4.85 Min. : 0.000
## Class :character 1st Qu.: 379.0 1st Qu.: 19.66 1st Qu.: 4.395
## Mode :character Median : 950.5 Median : 48.56 Median :10.520
## Mean : 2628.8 Mean : 129.07 Mean :13.246
## 3rd Qu.: 3008.0 3rd Qu.: 146.35 3rd Qu.:18.981
## Max. :27176.0 Max. :1429.00 Max. :78.994
## lunch computer expenditure income
## Min. : 0.00 Min. : 0.0 Min. :3926 Min. : 5.335
## 1st Qu.: 23.28 1st Qu.: 46.0 1st Qu.:4906 1st Qu.:10.639
## Median : 41.75 Median : 117.5 Median :5215 Median :13.728
## Mean : 44.71 Mean : 303.4 Mean :5312 Mean :15.317
## 3rd Qu.: 66.86 3rd Qu.: 375.2 3rd Qu.:5601 3rd Qu.:17.629
## Max. :100.00 Max. :3324.0 Max. :7712 Max. :55.328
## english read math
## Min. : 0.000 Min. :604.5 Min. :605.4
## 1st Qu.: 1.941 1st Qu.:640.4 1st Qu.:639.4
## Median : 8.778 Median :655.8 Median :652.5
## Mean :15.768 Mean :655.0 Mean :653.3
## 3rd Qu.:22.970 3rd Qu.:668.7 3rd Qu.:665.9
## Max. :85.540 Max. :704.0 Max. :709.5
mean(ca_test_scores$income)
## [1] 15.31659
median(ca_test_scores$income)
## [1] 13.7278
mean(ca_test_scores$read)
## [1] 654.9705
median(ca_test_scores$read)
## [1] 655.75
mean(ca_test_scores$math)
## [1] 653.3426
median(ca_test_scores$math)
## [1] 652.45
top_half_income <- select(filter(ca_test_scores, income>=13.7278), c(school,county,read,math))
top_half_income <- top_half_income %>%
rename("School" = "school", "County" = "county", "Reading_Scores" = "read", "Math_Scores" = "math")
summary(top_half_income)
## School County Reading_Scores Math_Scores
## Length:210 Length:210 Min. :624.4 Min. :625.3
## Class :character Class :character 1st Qu.:655.1 1st Qu.:651.1
## Mode :character Mode :character Median :666.2 Median :663.0
## Mean :665.8 Mean :662.9
## 3rd Qu.:676.1 3rd Qu.:673.0
## Max. :704.0 Max. :709.5
mean(top_half_income$Reading_Scores)
## [1] 665.7657
median(top_half_income$Reading_Scores)
## [1] 666.2
mean(top_half_income$Math_Scores)
## [1] 662.9329
median(top_half_income$Math_Scores)
## [1] 662.95
top_half_income$County[top_half_income$County=="Los Angeles"] <- "LA"
top_half_income$County[top_half_income$County=="San Bernardino"] <- "SB"
top_half_income$County[top_half_income$County=="Santa Clara"] <- "SC"
head(ca_test_scores, 47)
head(top_half_income, 20)