Load csv
library(data.table)
CASchools <- fread("C:\\Users\\NBMF48\\Desktop\\SPS\\Bridge\\R\\Homework\\Homework2\\CASchools.csv")
Question 1
## Overview of the data set
summary(CASchools)
## V1 district school county
## Min. : 1.0 Min. :61382 Length:420 Length:420
## 1st Qu.:105.8 1st Qu.:64308 Class :character Class :character
## Median :210.5 Median :67761 Mode :character Mode :character
## Mean :210.5 Mean :67473
## 3rd Qu.:315.2 3rd Qu.:70419
## Max. :420.0 Max. :75440
## grades students teachers calworks
## Length:420 Min. : 81.0 Min. : 4.85 Min. : 0.000
## Class :character 1st Qu.: 379.0 1st Qu.: 19.66 1st Qu.: 4.395
## Mode :character Median : 950.5 Median : 48.56 Median :10.520
## Mean : 2628.8 Mean : 129.07 Mean :13.246
## 3rd Qu.: 3008.0 3rd Qu.: 146.35 3rd Qu.:18.981
## Max. :27176.0 Max. :1429.00 Max. :78.994
## lunch computer expenditure income
## Min. : 0.00 Min. : 0.0 Min. :3926 Min. : 5.335
## 1st Qu.: 23.28 1st Qu.: 46.0 1st Qu.:4906 1st Qu.:10.639
## Median : 41.75 Median : 117.5 Median :5215 Median :13.728
## Mean : 44.71 Mean : 303.4 Mean :5312 Mean :15.317
## 3rd Qu.: 66.86 3rd Qu.: 375.2 3rd Qu.:5601 3rd Qu.:17.629
## Max. :100.00 Max. :3324.0 Max. :7712 Max. :55.328
## english read math
## Min. : 0.000 Min. :604.5 Min. :605.4
## 1st Qu.: 1.941 1st Qu.:640.4 1st Qu.:639.4
## Median : 8.778 Median :655.8 Median :652.5
## Mean :15.768 Mean :655.0 Mean :653.3
## 3rd Qu.:22.970 3rd Qu.:668.7 3rd Qu.:665.9
## Max. :85.540 Max. :704.0 Max. :709.5
## mean and median of english and math across all schools
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
summarize(CASchools, EnglishAvg = mean(english, na.rm = TRUE), EnglishMedian = median(english, na.rm = TRUE),
MathAVg = mean(math, na.rm = TRUE), MathMedian = median(math, na.rm = TRUE))
## EnglishAvg EnglishMedian MathAVg MathMedian
## 1 15.76816 8.777634 653.3426 652.45
Question 2
CASchool_grades <- select(filter(CASchools, students > 500), school, english, read, math)
head(CASchool_grades)
## school english read math
## 1: Thermalito Union Elementary 30.00000 636.3 650.9
## 2: Palermo Union Elementary 13.85768 641.8 639.9
## 3: Vineland Elementary 46.95946 605.5 612.5
## 4: Del Paso Heights Elementary 40.27592 611.9 613.4
## 5: West Fresno Elementary 54.60993 616.6 616.0
## 6: Woodville Elementary 80.12326 611.9 621.0
Question 3
CASchool_grades <- select(CASchool_grades, Schools = school, EnglishGrades = english, ReadGrades = read,
MathGrades = math)
CASchool_grades
## Schools EnglishGrades ReadGrades MathGrades
## 1: Thermalito Union Elementary 30.000002 636.3 650.9
## 2: Palermo Union Elementary 13.857677 641.8 639.9
## 3: Vineland Elementary 46.959461 605.5 612.5
## 4: Del Paso Heights Elementary 40.275921 611.9 613.4
## 5: West Fresno Elementary 54.609932 616.6 616.0
## ---
## 276: Portola Valley Elementary 1.164483 698.3 699.9
## 277: Saratoga Union Elementary 2.050406 698.9 701.7
## 278: Las Lomitas Elementary 5.995935 700.9 707.7
## 279: Los Altos Elementary 4.726101 704.0 709.5
## 280: Wheatland Elementary 5.005624 660.5 651.0
Question 4
summary(CASchool_grades)
## Schools EnglishGrades ReadGrades MathGrades
## Length:280 Min. : 0.00 Min. :605.5 Min. :612.5
## Class :character 1st Qu.: 2.47 1st Qu.:639.4 1st Qu.:639.6
## Mode :character Median :10.65 Median :656.6 Median :653.6
## Mean :18.19 Mean :655.1 Mean :654.1
## 3rd Qu.:30.40 3rd Qu.:668.1 3rd Qu.:666.0
## Max. :80.42 Max. :704.0 Max. :709.5
summarize(CASchool_grades, EnglishGradeshAvg = mean(EnglishGrades, na.rm = TRUE),
EnglishGradesMedian = median(EnglishGrades, na.rm = TRUE),
MathGradesAVg = mean(MathGrades, na.rm = TRUE), MathGradesMedian = median(MathGrades, na.rm = TRUE))
## EnglishGradeshAvg EnglishGradesMedian MathGradesAVg MathGradesMedian
## 1 18.18838 10.64659 654.065 653.6
The values are the different which I expect since the number of rows
in tables are less
Question 5
CASchools$grades[CASchools$grades == 'KK-08'] <- 'Grade 8'
CASchools$grades[CASchools$grades == 'KK-06'] <- 'Grade 6'
CASchools$students[CASchools$students > 500] <- 'High Volume'
CASchools$students[CASchools$students <= 500] <- 'Low Volume'
CASchools
## V1 district school county grades
## 1: 1 75119 Sunol Glen Unified Alameda Grade 8
## 2: 2 61499 Manzanita Elementary Butte Grade 8
## 3: 3 61549 Thermalito Union Elementary Butte Grade 8
## 4: 4 61457 Golden Feather Union Elementary Butte Grade 8
## 5: 5 61523 Palermo Union Elementary Butte Grade 8
## ---
## 416: 416 68957 Las Lomitas Elementary San Mateo Grade 8
## 417: 417 69518 Los Altos Elementary Santa Clara Grade 8
## 418: 418 72611 Somis Union Elementary Ventura Grade 8
## 419: 419 72744 Plumas Elementary Yuba Grade 8
## 420: 420 72751 Wheatland Elementary Yuba Grade 8
## students teachers calworks lunch computer expenditure income
## 1: Low Volume 10.90 0.5102 2.0408 67 6384.911 22.690001
## 2: Low Volume 11.15 15.4167 47.9167 101 5099.381 9.824000
## 3: High Volume 82.90 55.0323 76.3226 169 5501.955 8.978000
## 4: Low Volume 14.00 36.4754 77.0492 85 7101.831 8.978000
## 5: High Volume 71.50 33.1086 78.4270 171 5235.988 9.080333
## ---
## 416: High Volume 59.73 0.1016 3.5569 195 7290.339 28.716999
## 417: High Volume 208.48 1.0741 1.5038 721 5741.463 41.734108
## 418: Low Volume 20.15 3.5635 37.1938 45 4402.832 23.733000
## 419: Low Volume 5.00 11.8812 59.4059 14 4776.336 9.952000
## 420: High Volume 93.40 6.9235 47.5712 313 5993.393 12.502000
## english read math
## 1: 0.000000 691.6 690.0
## 2: 4.583333 660.5 661.9
## 3: 30.000002 636.3 650.9
## 4: 0.000000 651.9 643.5
## 5: 13.857677 641.8 639.9
## ---
## 416: 5.995935 700.9 707.7
## 417: 4.726101 704.0 709.5
## 418: 24.263039 648.3 641.7
## 419: 2.970297 667.9 676.5
## 420: 5.005624 660.5 651.0
Question 6
head(CASchools, 10)
## V1 district school county grades students
## 1: 1 75119 Sunol Glen Unified Alameda Grade 8 Low Volume
## 2: 2 61499 Manzanita Elementary Butte Grade 8 Low Volume
## 3: 3 61549 Thermalito Union Elementary Butte Grade 8 High Volume
## 4: 4 61457 Golden Feather Union Elementary Butte Grade 8 Low Volume
## 5: 5 61523 Palermo Union Elementary Butte Grade 8 High Volume
## 6: 6 62042 Burrel Union Elementary Fresno Grade 8 Low Volume
## 7: 7 68536 Holt Union Elementary San Joaquin Grade 8 Low Volume
## 8: 8 63834 Vineland Elementary Kern Grade 8 High Volume
## 9: 9 62331 Orange Center Elementary Fresno Grade 8 Low Volume
## 10: 10 67306 Del Paso Heights Elementary Sacramento Grade 6 High Volume
## teachers calworks lunch computer expenditure income english read
## 1: 10.90 0.5102 2.0408 67 6384.911 22.690001 0.000000 691.6
## 2: 11.15 15.4167 47.9167 101 5099.381 9.824000 4.583333 660.5
## 3: 82.90 55.0323 76.3226 169 5501.955 8.978000 30.000002 636.3
## 4: 14.00 36.4754 77.0492 85 7101.831 8.978000 0.000000 651.9
## 5: 71.50 33.1086 78.4270 171 5235.988 9.080333 13.857677 641.8
## 6: 6.40 12.3188 86.9565 25 5580.147 10.415000 12.408759 605.7
## 7: 10.00 12.9032 94.6237 28 5253.331 6.577000 68.717949 604.5
## 8: 42.50 18.8063 100.0000 66 4565.746 8.174000 46.959461 605.5
## 9: 19.00 32.1900 93.1398 35 5355.548 7.385000 30.079157 608.9
## 10: 108.00 78.9942 87.3164 0 5036.211 11.613333 40.275921 611.9
## math
## 1: 690.0
## 2: 661.9
## 3: 650.9
## 4: 643.5
## 5: 639.9
## 6: 605.4
## 7: 609.0
## 8: 612.5
## 9: 616.1
## 10: 613.4
Question 7
github_url <- "https://raw.githubusercontent.com/folushoa/Data-Science/CASchoolsCSV/CASchools.csv"
github_CASchools <- fread(input = github_url, sep = ",", header = TRUE)
github_CASchools
## V1 district school county grades students
## 1: 1 75119 Sunol Glen Unified Alameda KK-08 195
## 2: 2 61499 Manzanita Elementary Butte KK-08 240
## 3: 3 61549 Thermalito Union Elementary Butte KK-08 1550
## 4: 4 61457 Golden Feather Union Elementary Butte KK-08 243
## 5: 5 61523 Palermo Union Elementary Butte KK-08 1335
## ---
## 416: 416 68957 Las Lomitas Elementary San Mateo KK-08 984
## 417: 417 69518 Los Altos Elementary Santa Clara KK-08 3724
## 418: 418 72611 Somis Union Elementary Ventura KK-08 441
## 419: 419 72744 Plumas Elementary Yuba KK-08 101
## 420: 420 72751 Wheatland Elementary Yuba KK-08 1778
## teachers calworks lunch computer expenditure income english read
## 1: 10.90 0.5102 2.0408 67 6384.911 22.690001 0.000000 691.6
## 2: 11.15 15.4167 47.9167 101 5099.381 9.824000 4.583333 660.5
## 3: 82.90 55.0323 76.3226 169 5501.955 8.978000 30.000002 636.3
## 4: 14.00 36.4754 77.0492 85 7101.831 8.978000 0.000000 651.9
## 5: 71.50 33.1086 78.4270 171 5235.988 9.080333 13.857677 641.8
## ---
## 416: 59.73 0.1016 3.5569 195 7290.339 28.716999 5.995935 700.9
## 417: 208.48 1.0741 1.5038 721 5741.463 41.734108 4.726101 704.0
## 418: 20.15 3.5635 37.1938 45 4402.832 23.733000 24.263039 648.3
## 419: 5.00 11.8812 59.4059 14 4776.336 9.952000 2.970297 667.9
## 420: 93.40 6.9235 47.5712 313 5993.393 12.502000 5.005624 660.5
## math
## 1: 690.0
## 2: 661.9
## 3: 650.9
## 4: 643.5
## 5: 639.9
## ---
## 416: 707.7
## 417: 709.5
## 418: 641.7
## 419: 676.5
## 420: 651.0