Non-graphical representation of numerical data

Install necessary packages

#install.packages("ggplot2")
require(ggplot2)
## Loading required package: ggplot2

1.a) Load demographic data from file.

file.path <- '/temp/demographicdata.csv'
demographic.data <- read.table(file.path, header=TRUE,sep=",")

1.b) structure of demographic data is

# School.Name and School.Year are factors
# Grade.1, Grade.2, Grade.3, Grade.4 and Grade.5 are numerical variables.
str(demographic.data)
## 'data.frame':    32 obs. of  7 variables:
##  $ School.Name: Factor w/ 4 levels "P.S. 015 ROBERTO CLEMENTE",..: 1 1 1 1 1 1 1 1 2 2 ...
##  $ School.Year: Factor w/ 8 levels "2005-2006","2006-2007",..: 1 2 3 4 5 6 7 8 1 2 ...
##  $ Grade.1    : int  40 39 39 44 28 35 35 26 55 45 ...
##  $ Grade.2    : int  33 38 36 32 32 33 28 22 53 52 ...
##  $ Grade.3    : int  38 34 38 34 30 30 25 26 68 47 ...
##  $ Grade.4    : int  52 42 47 39 24 30 28 23 59 61 ...
##  $ Grade.5    : int  29 46 40 49 38 25 29 24 64 57 ...

1.c) Top 15 rows to show the sample of data

head(demographic.data, n=15)
##                  School.Name School.Year Grade.1 Grade.2 Grade.3 Grade.4
## 1  P.S. 015 ROBERTO CLEMENTE   2005-2006      40      33      38      52
## 2  P.S. 015 ROBERTO CLEMENTE   2006-2007      39      38      34      42
## 3  P.S. 015 ROBERTO CLEMENTE   2007-2008      39      36      38      47
## 4  P.S. 015 ROBERTO CLEMENTE   2008-2009      44      32      34      39
## 5  P.S. 015 ROBERTO CLEMENTE   2009-2010      28      32      30      24
## 6  P.S. 015 ROBERTO CLEMENTE   2010-2011      35      33      30      30
## 7  P.S. 015 ROBERTO CLEMENTE   2011-2012      35      28      25      28
## 8  P.S. 015 ROBERTO CLEMENTE   2012-2013      26      22      26      23
## 9        P.S. 019 ASHER LEVY   2005-2006      55      53      68      59
## 10       P.S. 019 ASHER LEVY   2006-2007      45      52      47      61
## 11       P.S. 019 ASHER LEVY   2007-2008      46      47      53      48
## 12       P.S. 019 ASHER LEVY   2008-2009      56      44      53      48
## 13       P.S. 019 ASHER LEVY   2009-2010      49      56      39      50
## 14       P.S. 019 ASHER LEVY   2010-2011      53      47      41      49
## 15       P.S. 019 ASHER LEVY   2011-2012      52      54      52      46
##    Grade.5
## 1       29
## 2       46
## 3       40
## 4       49
## 5       38
## 6       25
## 7       29
## 8       24
## 9       64
## 10      57
## 11      68
## 12      47
## 13      44
## 14      49
## 15      46

1.d) Last 15 rows to show the sample of data

tail(demographic.data, n=15)
##                    School.Name School.Year Grade.1 Grade.2 Grade.3 Grade.4
## 18          PS 020 ANNA SILVER   2006-2007     106     101     109     105
## 19          PS 020 ANNA SILVER   2007-2008     107      94      89     101
## 20          PS 020 ANNA SILVER   2008-2009      91     100      99      93
## 21          PS 020 ANNA SILVER   2009-2010      89      80     100     103
## 22          PS 020 ANNA SILVER   2010-2011      98      84      82     100
## 23          PS 020 ANNA SILVER   2011-2012     121      87      88      85
## 24          PS 020 ANNA SILVER   2012-2013     107     106      86      77
## 25 PS 034 FRANKLIN D ROOSEVELT   2005-2006      40      46      31      46
## 26 PS 034 FRANKLIN D ROOSEVELT   2006-2007      29      34      45      36
## 27 PS 034 FRANKLIN D ROOSEVELT   2007-2008      32      33      35      44
## 28 PS 034 FRANKLIN D ROOSEVELT   2008-2009      30      34      37      37
## 29 PS 034 FRANKLIN D ROOSEVELT   2009-2010      38      31      33      39
## 30 PS 034 FRANKLIN D ROOSEVELT   2010-2011      38      43      28      37
## 31 PS 034 FRANKLIN D ROOSEVELT   2011-2012      38      36      45      28
## 32 PS 034 FRANKLIN D ROOSEVELT   2012-2013      34      37      44      50
##    Grade.5
## 18      97
## 19     102
## 20      86
## 21      82
## 22      89
## 23      91
## 24      75
## 25      54
## 26      39
## 27      32
## 28      45
## 29      39
## 30      37
## 31      40
## 32      32
  1. Statistical Summary of data
summary(demographic.data)
##                       School.Name    School.Year    Grade.1      
##  P.S. 015 ROBERTO CLEMENTE  :8    2005-2006:4    Min.   : 26.00  
##  P.S. 019 ASHER LEVY        :8    2006-2007:4    1st Qu.: 37.25  
##  PS 020 ANNA SILVER         :8    2007-2008:4    Median : 44.50  
##  PS 034 FRANKLIN D ROOSEVELT:8    2008-2009:4    Mean   : 57.09  
##                                   2009-2010:4    3rd Qu.: 64.25  
##                                   2010-2011:4    Max.   :141.00  
##                                   (Other)  :8                    
##     Grade.2          Grade.3          Grade.4          Grade.5      
##  Min.   : 22.00   Min.   : 25.00   Min.   : 23.00   Min.   : 24.00  
##  1st Qu.: 33.75   1st Qu.: 34.00   1st Qu.: 38.50   1st Qu.: 38.75  
##  Median : 45.00   Median : 44.50   Median : 48.00   Median : 46.00  
##  Mean   : 53.56   Mean   : 53.88   Mean   : 55.81   Mean   : 54.34  
##  3rd Qu.: 62.00   3rd Qu.: 71.50   3rd Qu.: 65.00   3rd Qu.: 69.75  
##  Max.   :115.00   Max.   :117.00   Max.   :109.00   Max.   :102.00  
## 
  1. Frequency by School Name
table(demographic.data$School.Name)
## 
##   P.S. 015 ROBERTO CLEMENTE         P.S. 019 ASHER LEVY 
##                           8                           8 
##          PS 020 ANNA SILVER PS 034 FRANKLIN D ROOSEVELT 
##                           8                           8
  1. Frequency by School Year over School Name
table(demographic.data$School.Year, demographic.data$School.Name)
##            
##             P.S. 015 ROBERTO CLEMENTE P.S. 019 ASHER LEVY
##   2005-2006                         1                   1
##   2006-2007                         1                   1
##   2007-2008                         1                   1
##   2008-2009                         1                   1
##   2009-2010                         1                   1
##   2010-2011                         1                   1
##   2011-2012                         1                   1
##   2012-2013                         1                   1
##            
##             PS 020 ANNA SILVER PS 034 FRANKLIN D ROOSEVELT
##   2005-2006                  1                           1
##   2006-2007                  1                           1
##   2007-2008                  1                           1
##   2008-2009                  1                           1
##   2009-2010                  1                           1
##   2010-2011                  1                           1
##   2011-2012                  1                           1
##   2012-2013                  1                           1

-> sum of enrollment by School Year

aggregate((Grade.1 + Grade.2 + Grade.3 + Grade.4 + Grade.5) ~ School.Year, data=demographic.data, sum)
##   School.Year (Grade.1 + Grade.2 + Grade.3 + Grade.4 + Grade.5)
## 1   2005-2006                                              1292
## 2   2006-2007                                              1162
## 3   2007-2008                                              1131
## 4   2008-2009                                              1098
## 5   2009-2010                                              1024
## 6   2010-2011                                              1028
## 7   2011-2012                                              1054
## 8   2012-2013                                              1001

-> sum of enrollment by School Name

aggregate((Grade.1 + Grade.2 + Grade.3 + Grade.4 + Grade.5) ~ School.Name, data=demographic.data, sum)
##                   School.Name
## 1   P.S. 015 ROBERTO CLEMENTE
## 2         P.S. 019 ASHER LEVY
## 3          PS 020 ANNA SILVER
## 4 PS 034 FRANKLIN D ROOSEVELT
##   (Grade.1 + Grade.2 + Grade.3 + Grade.4 + Grade.5)
## 1                                              1360
## 2                                              2030
## 3                                              3894
## 4                                              1506

Graphical representation of numerical data

  1. Create different kinds of graphs for Grade1 data (single numeric value)

5.a) boxplot function from base r package

boxplot(demographic.data$Grade.1)

5.a) histogram function from base r package

hist(demographic.data$Grade.1)

5.c) qplot function from ggplot2 package

qplot(demographic.data$Grade.1)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

  1. Create scatterplot graph for Grade1 and Grade2 data (Two numeric values)

6.a) Plot function from base r package

plot(demographic.data$Grade.1 ~ demographic.data$Grade.2)

6.b) qplot function from ggplot2 package

qplot(Grade.1, Grade.2, data=demographic.data)