Introducción

Para el análisis de frecuencias y patrones en los errores en el aprendizaje del CPDnA se leen los datos del fichero LLP.csv proporcionado por Javier Villalva Díez.

#
setwd('~/git/JVD_HEALTH/JVD_LLP')
dd=read.csv(file="LLP2.csv",sep=",",skip=2,header=FALSE,stringsAsFactors=FALSE)
dd$row=as.numeric(rownames(dd))
colnames(dd)=c("Country","ID","CW","PO",paste("Step_",1:(ncol(dd)-5),sep=""),"Row")

Vamos a agregar el rendimiento de cada PO

#
dat=sqldf(paste("select count(ID) as cid, PO, CW, sum(Step_1) as s1, ",
                "sum(Step_2) as s2, sum(Step_3) as s3, sum(Step_4) as s4, ",
                "sum(Step_5) as s5, sum(Step_5) as s5, sum(Step_6) as s6, ",
                "sum(Step_7) as s7, sum(Step_8) as s8, sum(Step_9) as s9, ",
                "sum(Step_10) as s10 from dd where ",
          " Country='Japan' group by CW, PO order by CW asc, PO asc", sep=" "),
          stringsAsFactors=FALSE)
## Loading required package: tcltk
#
dat=dd %>% group_by(CW,PO) %>% summarize(cid=n(),
                                      s1=sum(Step_1, na.rm=T), s2=sum(Step_2, na.rm=T),
                                     s3=sum(Step_3, na.rm=T),s4=sum(Step_4, na.rm=T),
                                     s5=sum(Step_5, na.rm=T),s6=sum(Step_6, na.rm=T),
                                     s7=sum(Step_7, na.rm=T),s8=sum(Step_8, na.rm=T),
                                     s9=sum(Step_9, na.rm=T),s10=sum(Step_10, na.rm=T)
                            )
dat2=dd %>% group_by(CW,PO) %>% summarize(cid=n(),
                                      err=sum(Step_1, na.rm=T) + sum(Step_2, na.rm=T) +
                                     sum(Step_3, na.rm=T)+sum(Step_4, na.rm=T)+
                                     sum(Step_5, na.rm=T)+sum(Step_6, na.rm=T)+
                                     sum(Step_7, na.rm=T)+sum(Step_8, na.rm=T)+
                                     sum(Step_9, na.rm=T)+sum(Step_10, na.rm=T),
                                     per=err/cid
                            )
dat2$PO=as.factor(dat2$PO)
#

Representación de los patrones de aprendizaje

#
ggplot(dat2,aes(x=CW,y=per,group=PO)) + geom_line(aes(colour=PO)) +
     xlab("CW") + ylab("Errors / Number of CPDnA being worked-out")

#
g.top=ggplot(dat2,aes(x=CW,y=cid,group=PO)) + geom_line(aes(colour=PO)) + 
  theme_bw() +   theme(plot.margin = unit(c(1,5,-30,6),units="points"),
        axis.title.y = element_text(vjust =0.25)) +   labs(y="# CPDnA")
g.main=ggplot(dat2,aes(x=CW,y=err,group=PO)) + geom_line(aes(colour=PO)) +
   theme_bw() +   theme(plot.margin = unit(c(0,5,1,1),units="points")) +
     labs(x="CW", y="Total number of Errors")
grid.arrange(g.top,g.main,heights=c(1/4,3/4))

#
dat3=sqldf("select PO, min(CW) as Lasts from dat2 where per < 0.001 group by PO")
print(xtable(dat3),type="html")
PO Lasts
1 1 9
2 2 17
3 3 10
#