set directory

setwd("C:/doug/Hanna/dli analysis")

dli = read.csv("AAPPL 2013-2015-2.csv", na.strings="")

first some language data

table(dli$Language)
## 
##    Chinese    English     French Portuguese    Spanish 
##       5591         49       2913        247      10459
dli_spanish <- subset(dli, Language=='Spanish')
dliF <- (which(dli$Language=='French'))
dli_french <- subset(dli, Language=='French')
summary(dli_french$Grade.Level)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.000   3.000   4.000   3.879   4.000   7.000
table(dli_spanish$Grade.Level)
## 
##    3    4    5    6    7    8    9 
## 4159 2809 1845 1092  320  232    2
#summary(dliF$Grade.Level)
#table(dliF$Grade.Level)
table(dli_french$Grade.Level)
## 
##    3    4    5    6    7 
## 1274  938  483  216    2

now some box plots

boxplot(dli_spanish$ILS...Speaking~dli_spanish$Grade.Level, main="Spanish Speaking, 2015 data source", 
        xlab="Grade Level", ylab="Speaking Score")

boxplot(dli_french$ILS...Speaking~dli_french$Grade.Level, main="French Speaking, 2015 data source", 
        xlab="Grade Level", ylab="Speaking Score")

and of course, this:

print(aggregate(dli[,2:5], by = list(dli$Language), mean, na.rm = TRUE),digits = 3)
##      Group.1 ILS...Speaking PW...Writing IL...Listening IR...Reading
## 1    Chinese           3.51         3.40           4.72         3.07
## 2    English           6.22          NaN           7.10         5.97
## 3     French           5.10         4.77           5.59         5.51
## 4 Portuguese           4.73          NaN            NaN          NaN
## 5    Spanish           5.22         5.72           6.39         5.45

print(aggregate(dli[,2:5], by = list(dli$Language), mean, na.rm = TRUE),digits = 3)