Load csv

library(data.table)

CASchools <- fread("C:\\Users\\NBMF48\\Desktop\\SPS\\Bridge\\R\\Homework\\Homework2\\CASchools.csv")

Question 1

## Overview of the data set
summary(CASchools)
##        V1           district        school             county         
##  Min.   :  1.0   Min.   :61382   Length:420         Length:420        
##  1st Qu.:105.8   1st Qu.:64308   Class :character   Class :character  
##  Median :210.5   Median :67761   Mode  :character   Mode  :character  
##  Mean   :210.5   Mean   :67473                                        
##  3rd Qu.:315.2   3rd Qu.:70419                                        
##  Max.   :420.0   Max.   :75440                                        
##     grades             students          teachers          calworks     
##  Length:420         Min.   :   81.0   Min.   :   4.85   Min.   : 0.000  
##  Class :character   1st Qu.:  379.0   1st Qu.:  19.66   1st Qu.: 4.395  
##  Mode  :character   Median :  950.5   Median :  48.56   Median :10.520  
##                     Mean   : 2628.8   Mean   : 129.07   Mean   :13.246  
##                     3rd Qu.: 3008.0   3rd Qu.: 146.35   3rd Qu.:18.981  
##                     Max.   :27176.0   Max.   :1429.00   Max.   :78.994  
##      lunch           computer       expenditure       income      
##  Min.   :  0.00   Min.   :   0.0   Min.   :3926   Min.   : 5.335  
##  1st Qu.: 23.28   1st Qu.:  46.0   1st Qu.:4906   1st Qu.:10.639  
##  Median : 41.75   Median : 117.5   Median :5215   Median :13.728  
##  Mean   : 44.71   Mean   : 303.4   Mean   :5312   Mean   :15.317  
##  3rd Qu.: 66.86   3rd Qu.: 375.2   3rd Qu.:5601   3rd Qu.:17.629  
##  Max.   :100.00   Max.   :3324.0   Max.   :7712   Max.   :55.328  
##     english            read            math      
##  Min.   : 0.000   Min.   :604.5   Min.   :605.4  
##  1st Qu.: 1.941   1st Qu.:640.4   1st Qu.:639.4  
##  Median : 8.778   Median :655.8   Median :652.5  
##  Mean   :15.768   Mean   :655.0   Mean   :653.3  
##  3rd Qu.:22.970   3rd Qu.:668.7   3rd Qu.:665.9  
##  Max.   :85.540   Max.   :704.0   Max.   :709.5
## mean and median of english and math across all schools
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
summarize(CASchools, EnglishAvg = mean(english, na.rm = TRUE), EnglishMedian = median(english, na.rm = TRUE),
          MathAVg = mean(math, na.rm = TRUE), MathMedian = median(math, na.rm = TRUE))
##   EnglishAvg EnglishMedian  MathAVg MathMedian
## 1   15.76816      8.777634 653.3426     652.45

Question 2

CASchool_grades <- select(filter(CASchools, students > 500), school, english, read, math)
head(CASchool_grades)
##                         school  english  read  math
## 1: Thermalito Union Elementary 30.00000 636.3 650.9
## 2:    Palermo Union Elementary 13.85768 641.8 639.9
## 3:         Vineland Elementary 46.95946 605.5 612.5
## 4: Del Paso Heights Elementary 40.27592 611.9 613.4
## 5:      West Fresno Elementary 54.60993 616.6 616.0
## 6:        Woodville Elementary 80.12326 611.9 621.0

Question 3

CASchool_grades <- select(CASchool_grades, Schools = school, EnglishGrades = english, ReadGrades = read,
                          MathGrades = math)
CASchool_grades
##                          Schools EnglishGrades ReadGrades MathGrades
##   1: Thermalito Union Elementary     30.000002      636.3      650.9
##   2:    Palermo Union Elementary     13.857677      641.8      639.9
##   3:         Vineland Elementary     46.959461      605.5      612.5
##   4: Del Paso Heights Elementary     40.275921      611.9      613.4
##   5:      West Fresno Elementary     54.609932      616.6      616.0
##  ---                                                                
## 276:   Portola Valley Elementary      1.164483      698.3      699.9
## 277:   Saratoga Union Elementary      2.050406      698.9      701.7
## 278:      Las Lomitas Elementary      5.995935      700.9      707.7
## 279:        Los Altos Elementary      4.726101      704.0      709.5
## 280:        Wheatland Elementary      5.005624      660.5      651.0

Question 4

summary(CASchool_grades)
##    Schools          EnglishGrades     ReadGrades      MathGrades   
##  Length:280         Min.   : 0.00   Min.   :605.5   Min.   :612.5  
##  Class :character   1st Qu.: 2.47   1st Qu.:639.4   1st Qu.:639.6  
##  Mode  :character   Median :10.65   Median :656.6   Median :653.6  
##                     Mean   :18.19   Mean   :655.1   Mean   :654.1  
##                     3rd Qu.:30.40   3rd Qu.:668.1   3rd Qu.:666.0  
##                     Max.   :80.42   Max.   :704.0   Max.   :709.5
summarize(CASchool_grades, EnglishGradeshAvg = mean(EnglishGrades, na.rm = TRUE),
          EnglishGradesMedian = median(EnglishGrades, na.rm = TRUE),
          MathGradesAVg = mean(MathGrades, na.rm = TRUE), MathGradesMedian = median(MathGrades, na.rm = TRUE))
##   EnglishGradeshAvg EnglishGradesMedian MathGradesAVg MathGradesMedian
## 1          18.18838            10.64659       654.065            653.6

The values are the different which I expect since the number of rows in tables are less

Question 5

CASchools$grades[CASchools$grades == 'KK-08'] <- 'Grade 8'
CASchools$grades[CASchools$grades == 'KK-06'] <- 'Grade 6'
CASchools$students[CASchools$students > 500] <- 'High Volume'
CASchools$students[CASchools$students <= 500] <- 'Low Volume'
CASchools
##       V1 district                          school      county  grades
##   1:   1    75119              Sunol Glen Unified     Alameda Grade 8
##   2:   2    61499            Manzanita Elementary       Butte Grade 8
##   3:   3    61549     Thermalito Union Elementary       Butte Grade 8
##   4:   4    61457 Golden Feather Union Elementary       Butte Grade 8
##   5:   5    61523        Palermo Union Elementary       Butte Grade 8
##  ---                                                                 
## 416: 416    68957          Las Lomitas Elementary   San Mateo Grade 8
## 417: 417    69518            Los Altos Elementary Santa Clara Grade 8
## 418: 418    72611          Somis Union Elementary     Ventura Grade 8
## 419: 419    72744               Plumas Elementary        Yuba Grade 8
## 420: 420    72751            Wheatland Elementary        Yuba Grade 8
##         students teachers calworks   lunch computer expenditure    income
##   1:  Low Volume    10.90   0.5102  2.0408       67    6384.911 22.690001
##   2:  Low Volume    11.15  15.4167 47.9167      101    5099.381  9.824000
##   3: High Volume    82.90  55.0323 76.3226      169    5501.955  8.978000
##   4:  Low Volume    14.00  36.4754 77.0492       85    7101.831  8.978000
##   5: High Volume    71.50  33.1086 78.4270      171    5235.988  9.080333
##  ---                                                                     
## 416: High Volume    59.73   0.1016  3.5569      195    7290.339 28.716999
## 417: High Volume   208.48   1.0741  1.5038      721    5741.463 41.734108
## 418:  Low Volume    20.15   3.5635 37.1938       45    4402.832 23.733000
## 419:  Low Volume     5.00  11.8812 59.4059       14    4776.336  9.952000
## 420: High Volume    93.40   6.9235 47.5712      313    5993.393 12.502000
##        english  read  math
##   1:  0.000000 691.6 690.0
##   2:  4.583333 660.5 661.9
##   3: 30.000002 636.3 650.9
##   4:  0.000000 651.9 643.5
##   5: 13.857677 641.8 639.9
##  ---                      
## 416:  5.995935 700.9 707.7
## 417:  4.726101 704.0 709.5
## 418: 24.263039 648.3 641.7
## 419:  2.970297 667.9 676.5
## 420:  5.005624 660.5 651.0

Question 6

head(CASchools, 10)
##     V1 district                          school      county  grades    students
##  1:  1    75119              Sunol Glen Unified     Alameda Grade 8  Low Volume
##  2:  2    61499            Manzanita Elementary       Butte Grade 8  Low Volume
##  3:  3    61549     Thermalito Union Elementary       Butte Grade 8 High Volume
##  4:  4    61457 Golden Feather Union Elementary       Butte Grade 8  Low Volume
##  5:  5    61523        Palermo Union Elementary       Butte Grade 8 High Volume
##  6:  6    62042         Burrel Union Elementary      Fresno Grade 8  Low Volume
##  7:  7    68536           Holt Union Elementary San Joaquin Grade 8  Low Volume
##  8:  8    63834             Vineland Elementary        Kern Grade 8 High Volume
##  9:  9    62331        Orange Center Elementary      Fresno Grade 8  Low Volume
## 10: 10    67306     Del Paso Heights Elementary  Sacramento Grade 6 High Volume
##     teachers calworks    lunch computer expenditure    income   english  read
##  1:    10.90   0.5102   2.0408       67    6384.911 22.690001  0.000000 691.6
##  2:    11.15  15.4167  47.9167      101    5099.381  9.824000  4.583333 660.5
##  3:    82.90  55.0323  76.3226      169    5501.955  8.978000 30.000002 636.3
##  4:    14.00  36.4754  77.0492       85    7101.831  8.978000  0.000000 651.9
##  5:    71.50  33.1086  78.4270      171    5235.988  9.080333 13.857677 641.8
##  6:     6.40  12.3188  86.9565       25    5580.147 10.415000 12.408759 605.7
##  7:    10.00  12.9032  94.6237       28    5253.331  6.577000 68.717949 604.5
##  8:    42.50  18.8063 100.0000       66    4565.746  8.174000 46.959461 605.5
##  9:    19.00  32.1900  93.1398       35    5355.548  7.385000 30.079157 608.9
## 10:   108.00  78.9942  87.3164        0    5036.211 11.613333 40.275921 611.9
##      math
##  1: 690.0
##  2: 661.9
##  3: 650.9
##  4: 643.5
##  5: 639.9
##  6: 605.4
##  7: 609.0
##  8: 612.5
##  9: 616.1
## 10: 613.4

Question 7

github_url <- "https://raw.githubusercontent.com/folushoa/Data-Science/CASchoolsCSV/CASchools.csv"
github_CASchools <- fread(input = github_url, sep = ",", header = TRUE)
github_CASchools
##       V1 district                          school      county grades students
##   1:   1    75119              Sunol Glen Unified     Alameda  KK-08      195
##   2:   2    61499            Manzanita Elementary       Butte  KK-08      240
##   3:   3    61549     Thermalito Union Elementary       Butte  KK-08     1550
##   4:   4    61457 Golden Feather Union Elementary       Butte  KK-08      243
##   5:   5    61523        Palermo Union Elementary       Butte  KK-08     1335
##  ---                                                                         
## 416: 416    68957          Las Lomitas Elementary   San Mateo  KK-08      984
## 417: 417    69518            Los Altos Elementary Santa Clara  KK-08     3724
## 418: 418    72611          Somis Union Elementary     Ventura  KK-08      441
## 419: 419    72744               Plumas Elementary        Yuba  KK-08      101
## 420: 420    72751            Wheatland Elementary        Yuba  KK-08     1778
##      teachers calworks   lunch computer expenditure    income   english  read
##   1:    10.90   0.5102  2.0408       67    6384.911 22.690001  0.000000 691.6
##   2:    11.15  15.4167 47.9167      101    5099.381  9.824000  4.583333 660.5
##   3:    82.90  55.0323 76.3226      169    5501.955  8.978000 30.000002 636.3
##   4:    14.00  36.4754 77.0492       85    7101.831  8.978000  0.000000 651.9
##   5:    71.50  33.1086 78.4270      171    5235.988  9.080333 13.857677 641.8
##  ---                                                                         
## 416:    59.73   0.1016  3.5569      195    7290.339 28.716999  5.995935 700.9
## 417:   208.48   1.0741  1.5038      721    5741.463 41.734108  4.726101 704.0
## 418:    20.15   3.5635 37.1938       45    4402.832 23.733000 24.263039 648.3
## 419:     5.00  11.8812 59.4059       14    4776.336  9.952000  2.970297 667.9
## 420:    93.40   6.9235 47.5712      313    5993.393 12.502000  5.005624 660.5
##       math
##   1: 690.0
##   2: 661.9
##   3: 650.9
##   4: 643.5
##   5: 639.9
##  ---      
## 416: 707.7
## 417: 709.5
## 418: 641.7
## 419: 676.5
## 420: 651.0