library(ggplot2)
psy.331 <- read.csv("~/R/STAT527_ADA/331.hmanalysis/331b.csv", stringsAsFactors=FALSE)
colnames(psy.331) <- c("l.name", "f.name", "u.name", "ID", "l.access", "avail", "ch1quiz", "assign.1")
psy.331$assign.1 <- as.numeric(psy.331$assign.1)
psy.331 <- subset(psy.331, is.na(assign.1) == FALSE, select = assign.1)
p1 <- ggplot(psy.331, aes(x = assign.1))
# Histogram with density instead of count on y-axis
p1 <- p1 + geom_histogram(aes(y=..density..))
p1 <- p1 + geom_density(alpha=0.1, fill="white")
p1 <- p1 + geom_rug()
p1 <- p1 + xlab("grade 0-100")
print(p1)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

p1.sum <- summary(psy.331$assign.1, na.rm=T)
print(dim(psy.331))
## [1] 126 1
print(sd(psy.331$assign.1))
## [1] 11.68965
print(p1.sum)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 42.00 78.50 88.00 84.56 92.00 100.00
xtabs(~ assign.1, data = psy.331)
## assign.1
## 42 56 62 66 67 68 70 72 73 74 76 78 80 82 83 84 86 88
## 3 2 1 1 2 1 3 3 1 6 6 3 3 7 1 9 7 26
## 90 92 95 96 100
## 4 13 1 10 13
Removing scores below 70
psy.331.s <- subset(psy.331, assign.1 >=70, select = assign.1)
p2 <- ggplot(psy.331.s, aes(x = assign.1))
# Histogram with density instead of count on y-axis
p2 <- p2 + geom_histogram(aes(y=..density..))
p2 <- p2 + geom_density(alpha=0.1, fill="white")
p2 <- p2 + geom_rug()
p2 <- p2 + xlab("grade 0-100")
print(p2)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

p2.sum <- summary(psy.331.s$assign.1, na.rm=T)
print(dim(psy.331.s))
## [1] 116 1
print(sd(psy.331.s$assign.1))
## [1] 8.131947
print(p2.sum)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 70.00 82.00 88.00 86.96 92.00 100.00