Para el análisis de frecuencias y patrones en los errores en el aprendizaje del CPDnA se leen los datos del fichero LLP.csv proporcionado por Javier Villalva DÃez.
#
setwd('~/git/JVD_HEALTH/JVD_LLP')
dd=read.csv(file="LLP2.csv",sep=",",skip=2,header=FALSE,stringsAsFactors=FALSE)
dd$row=as.numeric(rownames(dd))
colnames(dd)=c("Country","ID","CW","PO",paste("Step_",1:(ncol(dd)-5),sep=""),"Row")
Vamos a agregar el rendimiento de cada PO
#
dat=sqldf(paste("select count(ID) as cid, PO, CW, sum(Step_1) as s1, ",
"sum(Step_2) as s2, sum(Step_3) as s3, sum(Step_4) as s4, ",
"sum(Step_5) as s5, sum(Step_5) as s5, sum(Step_6) as s6, ",
"sum(Step_7) as s7, sum(Step_8) as s8, sum(Step_9) as s9, ",
"sum(Step_10) as s10 from dd where ",
" Country='Japan' group by CW, PO order by CW asc, PO asc", sep=" "),
stringsAsFactors=FALSE)
## Loading required package: tcltk
#
dat=dd %>% group_by(CW,PO) %>% summarize(cid=n(),
s1=sum(Step_1, na.rm=T), s2=sum(Step_2, na.rm=T),
s3=sum(Step_3, na.rm=T),s4=sum(Step_4, na.rm=T),
s5=sum(Step_5, na.rm=T),s6=sum(Step_6, na.rm=T),
s7=sum(Step_7, na.rm=T),s8=sum(Step_8, na.rm=T),
s9=sum(Step_9, na.rm=T),s10=sum(Step_10, na.rm=T)
)
dat2=dd %>% group_by(CW,PO) %>% summarize(cid=n(),
err=sum(Step_1, na.rm=T) + sum(Step_2, na.rm=T) +
sum(Step_3, na.rm=T)+sum(Step_4, na.rm=T)+
sum(Step_5, na.rm=T)+sum(Step_6, na.rm=T)+
sum(Step_7, na.rm=T)+sum(Step_8, na.rm=T)+
sum(Step_9, na.rm=T)+sum(Step_10, na.rm=T),
per=err/cid
)
dat2$PO=as.factor(dat2$PO)
#
#
ggplot(dat2,aes(x=CW,y=per,group=PO)) + geom_line(aes(colour=PO)) +
xlab("CW") + ylab("Errors / Number of CPDnA being worked-out")
#
g.top=ggplot(dat2,aes(x=CW,y=cid,group=PO)) + geom_line(aes(colour=PO)) +
theme_bw() + theme(plot.margin = unit(c(1,5,-30,6),units="points"),
axis.title.y = element_text(vjust =0.25)) + labs(y="# CPDnA")
g.main=ggplot(dat2,aes(x=CW,y=err,group=PO)) + geom_line(aes(colour=PO)) +
theme_bw() + theme(plot.margin = unit(c(0,5,1,1),units="points")) +
labs(x="CW", y="Total number of Errors")
grid.arrange(g.top,g.main,heights=c(1/4,3/4))
#
dat3=sqldf("select PO, min(CW) as Lasts from dat2 where per < 0.001 group by PO")
print(xtable(dat3),type="html")
| PO | Lasts | |
|---|---|---|
| 1 | 1 | 9 |
| 2 | 2 | 17 |
| 3 | 3 | 10 |
#