Install necessary packages
#install.packages("ggplot2")
require(ggplot2)
## Loading required package: ggplot2
1.a) Load demographic data from file.
file.path <- '/temp/demographicdata.csv'
demographic.data <- read.table(file.path, header=TRUE,sep=",")
1.b) structure of demographic data is
# School.Name and School.Year are factors
# Grade.1, Grade.2, Grade.3, Grade.4 and Grade.5 are numerical variables.
str(demographic.data)
## 'data.frame': 32 obs. of 7 variables:
## $ School.Name: Factor w/ 4 levels "P.S. 015 ROBERTO CLEMENTE",..: 1 1 1 1 1 1 1 1 2 2 ...
## $ School.Year: Factor w/ 8 levels "2005-2006","2006-2007",..: 1 2 3 4 5 6 7 8 1 2 ...
## $ Grade.1 : int 40 39 39 44 28 35 35 26 55 45 ...
## $ Grade.2 : int 33 38 36 32 32 33 28 22 53 52 ...
## $ Grade.3 : int 38 34 38 34 30 30 25 26 68 47 ...
## $ Grade.4 : int 52 42 47 39 24 30 28 23 59 61 ...
## $ Grade.5 : int 29 46 40 49 38 25 29 24 64 57 ...
1.c) Top 15 rows to show the sample of data
head(demographic.data, n=15)
## School.Name School.Year Grade.1 Grade.2 Grade.3 Grade.4
## 1 P.S. 015 ROBERTO CLEMENTE 2005-2006 40 33 38 52
## 2 P.S. 015 ROBERTO CLEMENTE 2006-2007 39 38 34 42
## 3 P.S. 015 ROBERTO CLEMENTE 2007-2008 39 36 38 47
## 4 P.S. 015 ROBERTO CLEMENTE 2008-2009 44 32 34 39
## 5 P.S. 015 ROBERTO CLEMENTE 2009-2010 28 32 30 24
## 6 P.S. 015 ROBERTO CLEMENTE 2010-2011 35 33 30 30
## 7 P.S. 015 ROBERTO CLEMENTE 2011-2012 35 28 25 28
## 8 P.S. 015 ROBERTO CLEMENTE 2012-2013 26 22 26 23
## 9 P.S. 019 ASHER LEVY 2005-2006 55 53 68 59
## 10 P.S. 019 ASHER LEVY 2006-2007 45 52 47 61
## 11 P.S. 019 ASHER LEVY 2007-2008 46 47 53 48
## 12 P.S. 019 ASHER LEVY 2008-2009 56 44 53 48
## 13 P.S. 019 ASHER LEVY 2009-2010 49 56 39 50
## 14 P.S. 019 ASHER LEVY 2010-2011 53 47 41 49
## 15 P.S. 019 ASHER LEVY 2011-2012 52 54 52 46
## Grade.5
## 1 29
## 2 46
## 3 40
## 4 49
## 5 38
## 6 25
## 7 29
## 8 24
## 9 64
## 10 57
## 11 68
## 12 47
## 13 44
## 14 49
## 15 46
1.d) Last 15 rows to show the sample of data
tail(demographic.data, n=15)
## School.Name School.Year Grade.1 Grade.2 Grade.3 Grade.4
## 18 PS 020 ANNA SILVER 2006-2007 106 101 109 105
## 19 PS 020 ANNA SILVER 2007-2008 107 94 89 101
## 20 PS 020 ANNA SILVER 2008-2009 91 100 99 93
## 21 PS 020 ANNA SILVER 2009-2010 89 80 100 103
## 22 PS 020 ANNA SILVER 2010-2011 98 84 82 100
## 23 PS 020 ANNA SILVER 2011-2012 121 87 88 85
## 24 PS 020 ANNA SILVER 2012-2013 107 106 86 77
## 25 PS 034 FRANKLIN D ROOSEVELT 2005-2006 40 46 31 46
## 26 PS 034 FRANKLIN D ROOSEVELT 2006-2007 29 34 45 36
## 27 PS 034 FRANKLIN D ROOSEVELT 2007-2008 32 33 35 44
## 28 PS 034 FRANKLIN D ROOSEVELT 2008-2009 30 34 37 37
## 29 PS 034 FRANKLIN D ROOSEVELT 2009-2010 38 31 33 39
## 30 PS 034 FRANKLIN D ROOSEVELT 2010-2011 38 43 28 37
## 31 PS 034 FRANKLIN D ROOSEVELT 2011-2012 38 36 45 28
## 32 PS 034 FRANKLIN D ROOSEVELT 2012-2013 34 37 44 50
## Grade.5
## 18 97
## 19 102
## 20 86
## 21 82
## 22 89
## 23 91
## 24 75
## 25 54
## 26 39
## 27 32
## 28 45
## 29 39
## 30 37
## 31 40
## 32 32
summary(demographic.data)
## School.Name School.Year Grade.1
## P.S. 015 ROBERTO CLEMENTE :8 2005-2006:4 Min. : 26.00
## P.S. 019 ASHER LEVY :8 2006-2007:4 1st Qu.: 37.25
## PS 020 ANNA SILVER :8 2007-2008:4 Median : 44.50
## PS 034 FRANKLIN D ROOSEVELT:8 2008-2009:4 Mean : 57.09
## 2009-2010:4 3rd Qu.: 64.25
## 2010-2011:4 Max. :141.00
## (Other) :8
## Grade.2 Grade.3 Grade.4 Grade.5
## Min. : 22.00 Min. : 25.00 Min. : 23.00 Min. : 24.00
## 1st Qu.: 33.75 1st Qu.: 34.00 1st Qu.: 38.50 1st Qu.: 38.75
## Median : 45.00 Median : 44.50 Median : 48.00 Median : 46.00
## Mean : 53.56 Mean : 53.88 Mean : 55.81 Mean : 54.34
## 3rd Qu.: 62.00 3rd Qu.: 71.50 3rd Qu.: 65.00 3rd Qu.: 69.75
## Max. :115.00 Max. :117.00 Max. :109.00 Max. :102.00
##
table(demographic.data$School.Name)
##
## P.S. 015 ROBERTO CLEMENTE P.S. 019 ASHER LEVY
## 8 8
## PS 020 ANNA SILVER PS 034 FRANKLIN D ROOSEVELT
## 8 8
table(demographic.data$School.Year, demographic.data$School.Name)
##
## P.S. 015 ROBERTO CLEMENTE P.S. 019 ASHER LEVY
## 2005-2006 1 1
## 2006-2007 1 1
## 2007-2008 1 1
## 2008-2009 1 1
## 2009-2010 1 1
## 2010-2011 1 1
## 2011-2012 1 1
## 2012-2013 1 1
##
## PS 020 ANNA SILVER PS 034 FRANKLIN D ROOSEVELT
## 2005-2006 1 1
## 2006-2007 1 1
## 2007-2008 1 1
## 2008-2009 1 1
## 2009-2010 1 1
## 2010-2011 1 1
## 2011-2012 1 1
## 2012-2013 1 1
-> sum of enrollment by School Year
aggregate((Grade.1 + Grade.2 + Grade.3 + Grade.4 + Grade.5) ~ School.Year, data=demographic.data, sum)
## School.Year (Grade.1 + Grade.2 + Grade.3 + Grade.4 + Grade.5)
## 1 2005-2006 1292
## 2 2006-2007 1162
## 3 2007-2008 1131
## 4 2008-2009 1098
## 5 2009-2010 1024
## 6 2010-2011 1028
## 7 2011-2012 1054
## 8 2012-2013 1001
-> sum of enrollment by School Name
aggregate((Grade.1 + Grade.2 + Grade.3 + Grade.4 + Grade.5) ~ School.Name, data=demographic.data, sum)
## School.Name
## 1 P.S. 015 ROBERTO CLEMENTE
## 2 P.S. 019 ASHER LEVY
## 3 PS 020 ANNA SILVER
## 4 PS 034 FRANKLIN D ROOSEVELT
## (Grade.1 + Grade.2 + Grade.3 + Grade.4 + Grade.5)
## 1 1360
## 2 2030
## 3 3894
## 4 1506
5.a) boxplot function from base r package
boxplot(demographic.data$Grade.1)
5.a) histogram function from base r package
hist(demographic.data$Grade.1)
5.c) qplot function from ggplot2 package
qplot(demographic.data$Grade.1)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
6.a) Plot function from base r package
plot(demographic.data$Grade.1 ~ demographic.data$Grade.2)
6.b) qplot function from ggplot2 package
qplot(Grade.1, Grade.2, data=demographic.data)