library(ggplot2)
psy.331 <- read.csv("~/R/STAT527_ADA/331.hmanalysis/331b.csv", stringsAsFactors=FALSE)
colnames(psy.331) <- c("l.name", "f.name", "u.name", "ID", "l.access", "avail", "ch1quiz", "assign.1")
psy.331$assign.1 <- as.numeric(psy.331$assign.1)
psy.331 <- subset(psy.331, is.na(assign.1) == FALSE, select = assign.1)

p1 <- ggplot(psy.331, aes(x = assign.1))
  # Histogram with density instead of count on y-axis
p1 <- p1 + geom_histogram(aes(y=..density..))
p1 <- p1 + geom_density(alpha=0.1, fill="white")
p1 <- p1 + geom_rug()
p1 <- p1 + xlab("grade 0-100")
print(p1)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

p1.sum <- summary(psy.331$assign.1, na.rm=T)
print(dim(psy.331))
## [1] 126   1
print(sd(psy.331$assign.1))
## [1] 11.68965
print(p1.sum)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   42.00   78.50   88.00   84.56   92.00  100.00
xtabs(~ assign.1, data = psy.331)
## assign.1
##  42  56  62  66  67  68  70  72  73  74  76  78  80  82  83  84  86  88 
##   3   2   1   1   2   1   3   3   1   6   6   3   3   7   1   9   7  26 
##  90  92  95  96 100 
##   4  13   1  10  13

Removing scores below 70

psy.331.s <- subset(psy.331, assign.1 >=70, select = assign.1)
p2 <- ggplot(psy.331.s, aes(x = assign.1))
  # Histogram with density instead of count on y-axis
p2 <- p2 + geom_histogram(aes(y=..density..))
p2 <- p2 + geom_density(alpha=0.1, fill="white")
p2 <- p2 + geom_rug()
p2 <- p2 + xlab("grade 0-100")
print(p2)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

p2.sum <- summary(psy.331.s$assign.1, na.rm=T)
print(dim(psy.331.s))
## [1] 116   1
print(sd(psy.331.s$assign.1))
## [1] 8.131947
print(p2.sum)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   70.00   82.00   88.00   86.96   92.00  100.00