setwd(“C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados”)
Timeline <-read.csv("C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados/timeline2.csv", sep=",", dec=",",fill=TRUE, header=TRUE)
str(Timeline)
## 'data.frame': 157 obs. of 14 variables:
## $ X : Factor w/ 157 levels "2004-01","2004-02",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Cartilha : int 38 46 58 47 38 44 34 49 42 38 ...
## $ DA : int 49 49 19 54 51 59 42 44 76 58 ...
## $ DAs : int 37 61 100 69 62 63 63 61 46 85 ...
## $ FE : int 31 43 75 79 89 90 55 51 69 64 ...
## $ PA : int 12 18 25 49 39 35 25 17 36 30 ...
## $ DE : int 12 12 25 20 8 16 8 7 20 10 ...
## $ TA : int 3 3 3 3 2 2 2 3 3 3 ...
## $ DA1 : int 10 16 26 18 16 16 16 16 12 22 ...
## $ Dislexia : int 35 26 33 40 82 72 57 49 65 61 ...
## $ Disgrafia : int 13 10 10 5 6 4 4 5 10 3 ...
## $ Discalculia: int 3 3 3 4 5 3 2 2 3 4 ...
## $ Tempo : Factor w/ 157 levels "2004-01-30","2004-03-01",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ tempo : Factor w/ 157 levels "2004-01-29 22:00:00",..: 1 2 3 4 5 6 7 8 9 10 ...
tail(Timeline)
## X Cartilha DA DAs FE PA DE TA DA1 Dislexia Disgrafia Discalculia
## 152 2016-08 24 17 18 4 4 5 1 5 20 1 2
## 153 2016-09 25 19 22 8 6 5 1 6 31 2 2
## 154 2016-10 24 17 19 8 5 5 1 5 21 2 2
## 155 2016-11 23 22 23 8 6 7 1 6 20 2 3
## 156 2016-12 14 18 16 3 5 5 1 4 17 1 2
## 157 2017-01 15 9 11 3 3 2 0 3 14 1 1
## Tempo tempo
## 152 2016-08-30 2016-08-29 21:00:00
## 153 2016-09-30 2016-09-29 21:00:00
## 154 2016-10-30 2016-10-29 22:00:00
## 155 2016-11-30 2016-11-29 22:00:00
## 156 2016-12-30 2016-12-29 22:00:00
## 157 2017-01-30 2017-01-29 22:00:00
Timeline_5anos<-read.csv("C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados/Timeline_5anos.csv", sep=",", dec=",",fill=TRUE, header=TRUE)
summary(Timeline)
## X Cartilha DA DAs
## 2004-01: 1 Min. : 14.00 Min. : 7.00 Min. : 9.00
## 2004-02: 1 1st Qu.: 24.00 1st Qu.:16.00 1st Qu.: 23.00
## 2004-03: 1 Median : 28.00 Median :19.00 Median : 29.00
## 2004-04: 1 Mean : 29.56 Mean :22.77 Mean : 32.03
## 2004-05: 1 3rd Qu.: 33.00 3rd Qu.:27.00 3rd Qu.: 38.00
## 2004-06: 1 Max. :100.00 Max. :76.00 Max. :100.00
## (Other):151
## FE PA DE TA
## Min. : 3.0 Min. : 2.00 Min. : 2.000 Min. :0.0000
## 1st Qu.: 7.0 1st Qu.: 5.00 1st Qu.: 5.000 1st Qu.:0.0000
## Median :12.0 Median : 7.00 Median : 6.000 Median :1.0000
## Mean :20.2 Mean :10.11 Mean : 6.739 Mean :0.9045
## 3rd Qu.:27.0 3rd Qu.:12.00 3rd Qu.: 8.000 3rd Qu.:1.0000
## Max. :90.0 Max. :49.00 Max. :25.000 Max. :4.0000
##
## DA1 Dislexia Disgrafia Discalculia
## Min. : 2.000 Min. : 13.00 Min. : 0.000 Min. :0.000
## 1st Qu.: 6.000 1st Qu.: 21.00 1st Qu.: 1.000 1st Qu.:1.000
## Median : 7.000 Median : 25.00 Median : 2.000 Median :2.000
## Mean : 8.344 Mean : 30.59 Mean : 2.363 Mean :2.089
## 3rd Qu.:10.000 3rd Qu.: 35.00 3rd Qu.: 3.000 3rd Qu.:2.000
## Max. :26.000 Max. :100.00 Max. :13.000 Max. :6.000
##
## Tempo tempo
## 2004-01-30: 1 2004-01-29 22:00:00: 1
## 2004-03-01: 1 2004-02-29 21:00:00: 1
## 2004-03-30: 1 2004-03-29 21:00:00: 1
## 2004-04-30: 1 2004-04-29 21:00:00: 1
## 2004-05-30: 1 2004-05-29 21:00:00: 1
## 2004-06-30: 1 2004-06-29 21:00:00: 1
## (Other) :151 (Other) :151
Select Study period (months 97:156 - 5 year exactly)
Timeline_5anos <- Timeline[97:156, ]
head(Timeline_5anos)
## X Cartilha DA DAs FE PA DE TA DA1 Dislexia Disgrafia Discalculia
## 97 2012-01 15 10 19 5 3 3 0 5 22 1 1
## 98 2012-02 20 15 19 6 7 4 0 5 20 1 1
## 99 2012-03 26 18 30 9 7 5 0 8 27 2 2
## 100 2012-04 25 25 29 11 8 5 1 8 27 2 1
## 101 2012-05 29 24 35 12 10 7 1 9 24 2 2
## 102 2012-06 25 18 31 12 7 6 1 8 23 2 2
## Tempo tempo
## 97 2012-01-30 2012-01-29 22:00:00
## 98 2012-03-01 2012-02-29 21:00:00
## 99 2012-03-30 2012-03-29 21:00:00
## 100 2012-04-30 2012-04-29 21:00:00
## 101 2012-05-30 2012-05-29 21:00:00
## 102 2012-06-30 2012-06-29 21:00:00
tail(Timeline_5anos)
## X Cartilha DA DAs FE PA DE TA DA1 Dislexia Disgrafia Discalculia
## 151 2016-07 21 14 15 4 3 4 1 4 22 1 1
## 152 2016-08 24 17 18 4 4 5 1 5 20 1 2
## 153 2016-09 25 19 22 8 6 5 1 6 31 2 2
## 154 2016-10 24 17 19 8 5 5 1 5 21 2 2
## 155 2016-11 23 22 23 8 6 7 1 6 20 2 3
## 156 2016-12 14 18 16 3 5 5 1 4 17 1 2
## Tempo tempo
## 151 2016-07-30 2016-07-29 21:00:00
## 152 2016-08-30 2016-08-29 21:00:00
## 153 2016-09-30 2016-09-29 21:00:00
## 154 2016-10-30 2016-10-29 22:00:00
## 155 2016-11-30 2016-11-29 22:00:00
## 156 2016-12-30 2016-12-29 22:00:00
write.csv(Timeline_5anos,file="Timeline_5anos.csv")
summary(Timeline_5anos)
## X Cartilha DA DAs
## 2012-01: 1 Min. :14.00 Min. : 7.00 Min. : 9.00
## 2012-02: 1 1st Qu.:21.75 1st Qu.:14.00 1st Qu.:19.00
## 2012-03: 1 Median :25.00 Median :17.00 Median :23.00
## 2012-04: 1 Mean :23.75 Mean :16.27 Mean :22.45
## 2012-05: 1 3rd Qu.:26.00 3rd Qu.:19.00 3rd Qu.:26.00
## 2012-06: 1 Max. :32.00 Max. :25.00 Max. :35.00
## (Other):54
## FE PA DE TA
## Min. : 3.000 Min. : 2.000 Min. :2.000 Min. :0.00
## 1st Qu.: 5.000 1st Qu.: 4.000 1st Qu.:4.000 1st Qu.:0.00
## Median : 7.000 Median : 5.000 Median :5.000 Median :1.00
## Mean : 6.833 Mean : 5.267 Mean :5.183 Mean :0.65
## 3rd Qu.: 8.000 3rd Qu.: 6.000 3rd Qu.:6.000 3rd Qu.:1.00
## Max. :12.000 Max. :10.000 Max. :8.000 Max. :1.00
##
## DA1 Dislexia Disgrafia Discalculia
## Min. :2.000 Min. :13.00 Min. :1.00 Min. :1.000
## 1st Qu.:5.000 1st Qu.:18.75 1st Qu.:1.00 1st Qu.:1.000
## Median :6.000 Median :21.00 Median :1.00 Median :2.000
## Mean :5.833 Mean :21.12 Mean :1.45 Mean :1.783
## 3rd Qu.:7.000 3rd Qu.:23.00 3rd Qu.:2.00 3rd Qu.:2.000
## Max. :9.000 Max. :35.00 Max. :2.00 Max. :3.000
##
## Tempo tempo
## 2012-01-30: 1 2012-01-29 22:00:00: 1
## 2012-03-01: 1 2012-02-29 21:00:00: 1
## 2012-03-30: 1 2012-03-29 21:00:00: 1
## 2012-04-30: 1 2012-04-29 21:00:00: 1
## 2012-05-30: 1 2012-05-29 21:00:00: 1
## 2012-06-30: 1 2012-06-29 21:00:00: 1
## (Other) :54 (Other) :54
Organizando o banco por mes
#2004 a 2017
Timeline$date <- seq(as.Date("2004/01/30"), as.Date("2017/01/30"), by = "month")
Timeline$Date <- as.POSIXct(Timeline$date, "%d.%m.%y", tz="America/Sao_Paulo")
#2012 A 2016
Timeline_5anos$date <- seq(as.Date("2012/01/30"), as.Date("2017/01/01"), by = "month")
Timeline_5anos$Date <- as.POSIXct(Timeline_5anos$date, "%d.%m.%y", tz="America/Sao_Paulo")
Retirando outliers # borrowed from aL3xa - # http://stackoverflow.com/questions/4787332/how-to-remove-outliers-from-a-dataset
remove_outliers <- function(x, na.rm = TRUE, ...) {
qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...)
H <- 1.5 * IQR(x, na.rm = na.rm)
y <- x
y[x < (qnt[1] - H)] <- NA
y[x > (qnt[2] + H)] <- NA
y
}
#2004 a 2017
Timeline$DA_clean <- remove_outliers(Timeline$DA)
Timeline$DAs_clean <- remove_outliers(Timeline$DAs)
Timeline$Dislexia_clean <- remove_outliers(Timeline$Dislexia)
Timeline$FE_clean <- remove_outliers(Timeline$FE)
Timeline$PA_clean <- remove_outliers(Timeline$PA)
Timeline$DE_clean <- remove_outliers(Timeline$DE)
Timeline$Discalculia_clean <- remove_outliers(Timeline$Discalculia)
Timeline$Disgrafia_clean <- remove_outliers(Timeline$Disgrafia)
Timeline$TA_clean <- remove_outliers(Timeline$TA)
#2012 a 2017
Timeline_5anos$DA_clean <- remove_outliers(Timeline_5anos$DA)
Timeline_5anos$DAs_clean <- remove_outliers(Timeline_5anos$DAs)
Timeline_5anos$Dislexia_clean <- remove_outliers(Timeline_5anos$Dislexia)
Timeline_5anos$FE_clean <- remove_outliers(Timeline_5anos$FE)
Timeline_5anos$PA_clean <- remove_outliers(Timeline_5anos$PA)
Timeline_5anos$DE_clean <- remove_outliers(Timeline_5anos$DE)
Timeline_5anos$Discalculia_clean <- remove_outliers(Timeline_5anos$Discalculia)
Timeline_5anos$Disgrafia_clean <- remove_outliers(Timeline_5anos$Disgrafia)
Timeline_5anos$TA_clean <- remove_outliers(Timeline_5anos$TA)
Relacao das Palavras x Tempo
GRafico Comparativo de frequencia - 2004 a 2017
##Grafico
library(scales) ##instalar e citar o pacote scales
#grafico de frequencia
plot(Timeline$Dislexia~Timeline$Date, type = "l", lty=1, lwd=2, col="red", xaxt="n", axes=F, ylim=c(0,100), xlab="", ylab="Frequencia relativa", main = "Frequencia relativa pelo tempo, 2004 a 2017")
axis.POSIXct(1, at=seq(as.Date("2004/01/30"), as.Date("2017/01/30"), by = "4 month"), format="%Y %b", las=2, cex.axis=0.9)
lines(Timeline$DAs~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkblue")
lines(Timeline$FE~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkgray")
lines(Timeline$PA~Timeline$Date, type = "h", lty=1, lwd=2,col= "pink")
lines(Timeline$DE~Timeline$Date, type = "h", lty=1, lwd=2,col= "black")
lines(Timeline$DA~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkred")
lines(Timeline$Discalculia~Timeline$Date, type = "h", lty=1, lwd=2,col= "green")
lines(Timeline$Disgrafia~Timeline$Date, type = "h", lty=1, lwd=2,col= "orange")
lines(Timeline$TA~Timeline$Date, type = "h", lty=1, lwd=2,col= "purple")
legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem", "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "green"), box.col="white",lwd=c(2,1.5), cex=0.9)
#removidos outliers
plot(Timeline$Dislexia_clean~Timeline$Date, type = "l", lty=1, lwd=2, col="red", xaxt="n", axes=F, ylim=c(0,100), xlab="", ylab="Frequencia relativa", main = "Frequencia relativa pelo tempo, 2004 a 2017, removidos outliers")
axis.POSIXct(1, at=seq(as.Date("2004/01/30"), as.Date("2017/01/30"), by = "4 month"), format="%Y %b", las=2, cex.axis=0.9)
lines(Timeline$DAs_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkblue")
lines(Timeline$FE_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkgray")
lines(Timeline$PA_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "pink")
lines(Timeline$DE_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "black")
lines(Timeline$DA_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkred")
lines(Timeline$Discalculia_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "green")
lines(Timeline$Disgrafia_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "orange")
lines(Timeline$TA_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "purple")
legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem", "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "green"), box.col="white",lwd=c(2,1.5), cex=0.9)
GRafico Comparativo de frequencia - ultimos 5 anos
plot(Timeline_5anos$Dislexia~Timeline_5anos$Date, type = "l", lty=1, lwd=2, col="red", xaxt="n", axes=F, ylim=c(0,100), xlab="", ylab="Frequencia relativa", main = "Frequencia relativa pelo tempo, 2012 a 2017")
axis.POSIXct(1, at=seq(as.Date("2012/01/30"), as.Date("2017/01/01"), by = "4 month"), format="%Y %b", las=2, cex.axis=0.9)
lines(Timeline_5anos$DAs~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkblue")
lines(Timeline_5anos$FE~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkgray")
lines(Timeline_5anos$PA~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "pink")
lines(Timeline_5anos$DE~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "black")
lines(Timeline_5anos$DA~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkred")
lines(Timeline_5anos$Discalculia~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "green")
lines(Timeline_5anos$Disgrafia~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "orange")
lines(Timeline_5anos$TA~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "purple")
legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem", "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "darkgreen"), box.col="white",lwd=c(2), cex=0.9)
#removidos outliers
plot(Timeline_5anos$Dislexia_clean~Timeline_5anos$Date, type = "l", lty=1, lwd=2, col="red", xaxt="n", axes=F, ylim=c(0,100), xlab="", ylab="Frequencia relativa", main = "Frequencia relativa pelo tempo, 2012 a 2017, removidos outliers")
axis.POSIXct(1, at=seq(as.Date("2012/01/30"), as.Date("2017/01/01"), by = "4 month"), format="%Y %b", las=2, cex.axis=0.9)
lines(Timeline_5anos$DAs_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkblue")
lines(Timeline_5anos$FE~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkgray")
lines(Timeline_5anos$PA_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "pink")
lines(Timeline_5anos$DE_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "black")
lines(Timeline_5anos$DA_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkred")
lines(Timeline_5anos$Discalculia_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "green")
lines(Timeline_5anos$Disgrafia_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "orange")
lines(Timeline_5anos$TA_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "purple")
legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem", "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "darkgreen"), box.col="white",lwd=c(2), cex=0.9)
Analise da densidade das palavras # borrowed from melina.leite- # http://ecologia.ib.usp.br/bie5782/doku.php?id=bie5782:03_apostila:05-exploratoria
#grafico de densidade
plot(density (Timeline$Dislexia, bw=1.5), col="red", xlab="", ylab="Densidade Probabilistica", main = "Densidade pela frequencia")
lines(density(Timeline$DAs, bw=1.5), col= "darkblue")
lines(density(Timeline$FE, bw=1.5),col= "darkgray")
lines(density(Timeline$PA, bw=1.5),col= "pink")
lines(density(Timeline$DE, bw=1.5),col= "black")
lines(density(Timeline$DA, bw=1.5),col= "darkred")
lines(density(Timeline$Discalculia, bw=1.5),col= "green")
lines(density(Timeline$Disgrafia, bw=1.5),col= "orange")
lines(density(Timeline$TA, bw=1.5),col= "purple")
legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem", "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "green"), box.col="white",lwd=c(2,1.5), cex=0.9)
Relacao entre as palavras (1) Dificiculdades de Aprendizagem x todas as palavras (2) Dislexia x todas as palavras
Estatistica Descritiva
#valores maximos e minimos
max = apply(Timeline[3:12], MARGIN=2, FUN= max)
max_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN= max)
min = apply(Timeline[3:12], MARGIN=2, FUN= min)
min_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN= min)
#mediana
mediana = apply(Timeline[3:12], MARGIN=2, FUN=median)
mediana_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN=median)
#medias
medias = apply(Timeline[3:12], MARGIN=2, FUN=mean)
medias_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN=mean)
#desvio padrao
dp = apply(Timeline[3:12], MARGIN=2, FUN=sd)
dp_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN=sd)
#variance
var = apply(Timeline[3:12], 2, var)
var_5anos = apply(Timeline_5anos[3:12], 2, var)
#Tabela descritiva, 2004 a 2017
TabelaDes = data.frame( min, max, mediana, medias, dp, var)
TabelaDes
## min max mediana medias dp var
## DA 7 76 19 22.7707006 11.1768262 124.9214437
## DAs 9 100 29 32.0254777 15.0298630 225.8967826
## FE 3 90 12 20.1974522 18.7211109 350.4799935
## PA 2 49 7 10.1146497 7.9775070 63.6406173
## DE 2 25 6 6.7388535 3.4271662 11.7454679
## TA 0 4 1 0.9044586 0.8147946 0.6638902
## DA1 2 26 7 8.3439490 3.9300182 15.4450433
## Dislexia 13 100 25 30.5859873 15.6745934 245.6928793
## Disgrafia 0 13 2 2.3630573 1.8402650 3.3865752
## Discalculia 0 6 2 2.0891720 1.1057878 1.2227666
#Tabela descrita, 2012 a 2017
TabelaDes_5anos = data.frame(min_5anos, max_5anos, mediana_5anos, medias_5anos, dp_5anos, var_5anos)
TabelaDes_5anos
## min_5anos max_5anos mediana_5anos medias_5anos dp_5anos
## DA 7 25 17 16.266667 3.8658289
## DAs 9 35 23 22.450000 5.6521332
## FE 3 12 7 6.833333 2.3733454
## PA 2 10 5 5.266667 1.6142025
## DE 2 8 5 5.183333 1.2952543
## TA 0 1 1 0.650000 0.4809947
## DA1 2 9 6 5.833333 1.4976441
## Dislexia 13 35 21 21.116667 4.2469996
## Disgrafia 1 2 1 1.450000 0.5016921
## Discalculia 1 3 2 1.783333 0.5551505
## var_5anos
## DA 14.9446328
## DAs 31.9466102
## FE 5.6327684
## PA 2.6056497
## DE 1.6776836
## TA 0.2313559
## DA1 2.2429379
## Dislexia 18.0370056
## Disgrafia 0.2516949
## Discalculia 0.3081921
#correlacao entre as variaveis
explicativas = Timeline[,3:12]
cor(explicativas, method = "spearman")
## DA DAs FE PA DE TA
## DA 1.0000000 0.8540771 0.8209625 0.7936141 0.6392457 0.5180165
## DAs 0.8540771 1.0000000 0.8524418 0.8269852 0.6485852 0.4435616
## FE 0.8209625 0.8524418 1.0000000 0.8653836 0.5799170 0.4378294
## PA 0.7936141 0.8269852 0.8653836 1.0000000 0.6016096 0.4329864
## DE 0.6392457 0.6485852 0.5799170 0.6016096 1.0000000 0.3825523
## TA 0.5180165 0.4435616 0.4378294 0.4329864 0.3825523 1.0000000
## DA1 0.8503754 0.9935315 0.8494201 0.8172536 0.6425757 0.4314246
## Dislexia 0.7587646 0.7326280 0.8566252 0.7994667 0.4988009 0.3492648
## Disgrafia 0.7968748 0.8085197 0.7878822 0.7858998 0.5889306 0.5114184
## Discalculia 0.5809042 0.5118086 0.4850222 0.5163804 0.5044344 0.5069295
## DA1 Dislexia Disgrafia Discalculia
## DA 0.8503754 0.7587646 0.7968748 0.5809042
## DAs 0.9935315 0.7326280 0.8085197 0.5118086
## FE 0.8494201 0.8566252 0.7878822 0.4850222
## PA 0.8172536 0.7994667 0.7858998 0.5163804
## DE 0.6425757 0.4988009 0.5889306 0.5044344
## TA 0.4314246 0.3492648 0.5114184 0.5069295
## DA1 1.0000000 0.7325950 0.8125736 0.5189100
## Dislexia 0.7325950 1.0000000 0.7344780 0.4313871
## Disgrafia 0.8125736 0.7344780 1.0000000 0.5902121
## Discalculia 0.5189100 0.4313871 0.5902121 1.0000000
pairs(explicativas)
Testes normalidade das variaveis
library(nortest)
#Teste de Shapiro-Francis para normalidade - n< 50
sf.test = apply(Timeline[17:25], MARGIN=2, FUN=sf.test)
sf.test
## $DA_clean
##
## Shapiro-Francia normality test
##
## data: newX[, i]
## W = 0.95516, p-value = 0.0002326
##
##
## $DAs_clean
##
## Shapiro-Francia normality test
##
## data: newX[, i]
## W = 0.96458, p-value = 0.001309
##
##
## $Dislexia_clean
##
## Shapiro-Francia normality test
##
## data: newX[, i]
## W = 0.92617, p-value = 3.676e-06
##
##
## $FE_clean
##
## Shapiro-Francia normality test
##
## data: newX[, i]
## W = 0.84329, p-value = 7.899e-10
##
##
## $PA_clean
##
## Shapiro-Francia normality test
##
## data: newX[, i]
## W = 0.87275, p-value = 1.165e-08
##
##
## $DE_clean
##
## Shapiro-Francia normality test
##
## data: newX[, i]
## W = 0.95808, p-value = 0.0003847
##
##
## $Discalculia_clean
##
## Shapiro-Francia normality test
##
## data: newX[, i]
## W = 0.82941, p-value = 3.222e-10
##
##
## $Disgrafia_clean
##
## Shapiro-Francia normality test
##
## data: newX[, i]
## W = 0.82525, p-value = 1.137e-10
##
##
## $TA_clean
##
## Shapiro-Francia normality test
##
## data: newX[, i]
## W = 0.72974, p-value = 4.65e-13
#Testes de Shapiro-Wilk para normalidade
shapiro.test = apply(Timeline[17:25], MARGIN=2, FUN=shapiro.test)
shapiro.test
## $DA_clean
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.95302, p-value = 7.019e-05
##
##
## $DAs_clean
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.96262, p-value = 0.0005275
##
##
## $Dislexia_clean
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.92287, p-value = 4.782e-07
##
##
## $FE_clean
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.8404, p-value = 2.398e-11
##
##
## $PA_clean
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.87041, p-value = 6.218e-10
##
##
## $DE_clean
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.95616, p-value = 0.0001306
##
##
## $Discalculia_clean
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.82723, p-value = 8.761e-12
##
##
## $Disgrafia_clean
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.82471, p-value = 2.961e-12
##
##
## $TA_clean
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.72669, p-value = 3.211e-15
#Teste de normalidade Anderson-Darling
ad.test = apply(Timeline[17:25], MARGIN=2, FUN=ad.test)
ad.test
## $DA_clean
##
## Anderson-Darling normality test
##
## data: newX[, i]
## A = 2.2692, p-value = 8.88e-06
##
##
## $DAs_clean
##
## Anderson-Darling normality test
##
## data: newX[, i]
## A = 1.5723, p-value = 0.0004605
##
##
## $Dislexia_clean
##
## Anderson-Darling normality test
##
## data: newX[, i]
## A = 3.6684, p-value = 3.375e-09
##
##
## $FE_clean
##
## Anderson-Darling normality test
##
## data: newX[, i]
## A = 8.4665, p-value < 2.2e-16
##
##
## $PA_clean
##
## Anderson-Darling normality test
##
## data: newX[, i]
## A = 6.3436, p-value = 1.2e-15
##
##
## $DE_clean
##
## Anderson-Darling normality test
##
## data: newX[, i]
## A = 2.3517, p-value = 5.571e-06
##
##
## $Discalculia_clean
##
## Anderson-Darling normality test
##
## data: newX[, i]
## A = 11.851, p-value < 2.2e-16
##
##
## $Disgrafia_clean
##
## Anderson-Darling normality test
##
## data: newX[, i]
## A = 9.308, p-value < 2.2e-16
##
##
## $TA_clean
##
## Anderson-Darling normality test
##
## data: newX[, i]
## A = 20.155, p-value < 2.2e-16
#Teste de Cramer-von Mises para normalidade
cvm.test = apply(Timeline[17:25], MARGIN=2, FUN=cvm.test)
## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately
## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately
## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately
## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately
## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately
cvm.test
## $DA_clean
##
## Cramer-von Mises normality test
##
## data: newX[, i]
## W = 0.43175, p-value = 1.219e-05
##
##
## $DAs_clean
##
## Cramer-von Mises normality test
##
## data: newX[, i]
## W = 0.27493, p-value = 0.0006369
##
##
## $Dislexia_clean
##
## Cramer-von Mises normality test
##
## data: newX[, i]
## W = 0.64513, p-value = 1.552e-07
##
##
## $FE_clean
##
## Cramer-von Mises normality test
##
## data: newX[, i]
## W = 1.5153, p-value = 7.37e-10
##
##
## $PA_clean
##
## Cramer-von Mises normality test
##
## data: newX[, i]
## W = 1.1283, p-value = 7.37e-10
##
##
## $DE_clean
##
## Cramer-von Mises normality test
##
## data: newX[, i]
## W = 0.43382, p-value = 1.162e-05
##
##
## $Discalculia_clean
##
## Cramer-von Mises normality test
##
## data: newX[, i]
## W = 2.1864, p-value = 7.37e-10
##
##
## $Disgrafia_clean
##
## Cramer-von Mises normality test
##
## data: newX[, i]
## W = 1.6243, p-value = 7.37e-10
##
##
## $TA_clean
##
## Cramer-von Mises normality test
##
## data: newX[, i]
## W = 3.7942, p-value = 7.37e-10
#Teste de Lilliefors (correção do KS) para normalidade do ajuste, n < 50
lillie.test = apply(Timeline[17:25], MARGIN=2, FUN=lillie.test)
lillie.test
## $DA_clean
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: newX[, i]
## D = 0.14452, p-value = 5.78e-08
##
##
## $DAs_clean
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: newX[, i]
## D = 0.10969, p-value = 0.000191
##
##
## $Dislexia_clean
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: newX[, i]
## D = 0.13847, p-value = 3.542e-07
##
##
## $FE_clean
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: newX[, i]
## D = 0.2038, p-value < 2.2e-16
##
##
## $PA_clean
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: newX[, i]
## D = 0.20385, p-value = 3.174e-16
##
##
## $DE_clean
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: newX[, i]
## D = 0.14544, p-value = 4.518e-08
##
##
## $Discalculia_clean
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: newX[, i]
## D = 0.27422, p-value < 2.2e-16
##
##
## $Disgrafia_clean
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: newX[, i]
## D = 0.2652, p-value < 2.2e-16
##
##
## $TA_clean
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: newX[, i]
## D = 0.3607, p-value < 2.2e-16
#Teste Qui-quadrado de Pearson para normalidade considerando o número de classes
pearson.test = apply(Timeline[17:25], MARGIN=2, FUN=pearson.test)
pearson.test
## $DA_clean
##
## Pearson chi-square normality test
##
## data: newX[, i]
## P = 26.367, p-value = 0.009519
##
##
## $DAs_clean
##
## Pearson chi-square normality test
##
## data: newX[, i]
## P = 29.685, p-value = 0.003114
##
##
## $Dislexia_clean
##
## Pearson chi-square normality test
##
## data: newX[, i]
## P = 46.483, p-value = 5.728e-06
##
##
## $FE_clean
##
## Pearson chi-square normality test
##
## data: newX[, i]
## P = 137.59, p-value < 2.2e-16
##
##
## $PA_clean
##
## Pearson chi-square normality test
##
## data: newX[, i]
## P = 76.483, p-value = 1.925e-11
##
##
## $DE_clean
##
## Pearson chi-square normality test
##
## data: newX[, i]
## P = 172.49, p-value < 2.2e-16
##
##
## $Discalculia_clean
##
## Pearson chi-square normality test
##
## data: newX[, i]
## P = 693.24, p-value < 2.2e-16
##
##
## $Disgrafia_clean
##
## Pearson chi-square normality test
##
## data: newX[, i]
## P = 504.94, p-value < 2.2e-16
##
##
## $TA_clean
##
## Pearson chi-square normality test
##
## data: newX[, i]
## P = 940.86, p-value < 2.2e-16
CONCLUSAO: a distribuicao dos dados nao e normal e, portanto, deve se adotar metodos nao-parametricos.
Graficos de diagnostico da variavel - Dif.s de Aprendizagem
#Gráfico de diagnóstico e Histograma com ajuste não paramétrico de densidade probabilística mais curva normal, com e sem outliers
par(mfrow=c(2,2))
boxplot(Timeline$DAs, notch = T, main = "Dif.s de Aprendizagem", ylab = "valores observados")
boxplot(Timeline$DAs_clean, notch = T, main = "removidos outliers", ylab = "valores observados")
hist(Timeline$DAs, prob=T, main = "Densid. probabilística e curva normal", xlab = "valores observados", ylab = "Densidade probabilística")
lines( density(Timeline$DAs),col="darkblue", lwd=c(2))
curve(expr = dnorm(x,mean=mean(Timeline$DAs),sd=sd(Timeline$DAs)),add=T, col="darkred", lwd=c(2))
legend("topright", c("Dens. prob.", "curva normal"), bty="n", lty=c(1),
col=c("darkblue","darkred"), box.col="white",lwd=c(2), cex=0.9)
hist(Timeline$DAs_clean, prob=T, main = "removidos outliers", xlab = "valores observados", ylab = "Densidade probabilística")
lines( density(Timeline$DAs),col="darkblue", lwd=c(2) )
curve(expr = dnorm(x,mean=mean(Timeline$DAs),sd=sd(Timeline$DAs)),add=T, col="darkred", lwd=c(2))
legend("topright", c("Dens. prob.", "curva normal"), bty="n", lty=c(1),
col=c("darkblue","darkred"), box.col="white",lwd=c(2), cex=0.9)
par(mfrow=c(1,1))
#testes de normalidade
Teste_norm_DAs = c(sf.test$DAs_clean$method, ad.test$DAs_clean$method, cvm.test$DAs_clean$method, lillie.test$DAs_clean$method, pearson.test$DAs_clean$method, shapiro.test$DAs_clean$method)
pvalor_norm_DAs = c(sf.test$DAs_clean$p.value, ad.test$DAs_clean$p.value, cvm.test$DAs_clean$p.value, lillie.test$DAs_clean$p.value, pearson.test$DAs_clean$p.value, shapiro.test$DAs_clean$p.value)
Tab_norm_DAs = data.frame(Teste_norm_DAs, pvalor_norm_DAs)
Tab_norm_DAs
## Teste_norm_DAs pvalor_norm_DAs
## 1 Shapiro-Francia normality test 0.0013094786
## 2 Anderson-Darling normality test 0.0004604626
## 3 Cramer-von Mises normality test 0.0006369276
## 4 Lilliefors (Kolmogorov-Smirnov) normality test 0.0001909866
## 5 Pearson chi-square normality test 0.0031139409
## 6 Shapiro-Wilk normality test 0.0005274689
Correlacao entre a variavel dependente e as demais, metodo Spearman (não parametrico), retirados outliers
cor.das.da = cor.test(Timeline$DAs_clean, Timeline$DA_clean, method = "spearman") ## DificuldadeS de Aprendizagem
## Warning in cor.test.default(Timeline$DAs_clean, Timeline$DA_clean, method =
## "spearman"): Cannot compute exact p-value with ties
cor.das.fe = cor.test(Timeline$DAs_clean, Timeline$FE_clean, method = "spearman") ## Fracasso Escolar
## Warning in cor.test.default(Timeline$DAs_clean, Timeline$FE_clean, method =
## "spearman"): Cannot compute exact p-value with ties
cor.das.pa = cor.test(Timeline$DAs_clean, Timeline$PA_clean, method = "spearman") ## Problemas de Aprendizagem
## Warning in cor.test.default(Timeline$DAs_clean, Timeline$PA_clean, method =
## "spearman"): Cannot compute exact p-value with ties
cor.das.de = cor.test(Timeline$DAs_clean, Timeline$DE_clean, method = "spearman") ## Desempenho escolar
## Warning in cor.test.default(Timeline$DAs_clean, Timeline$DE_clean, method =
## "spearman"): Cannot compute exact p-value with ties
cor.das.dis1 = cor.test(Timeline$DAs_clean, Timeline$Dislexia_clean, method = "spearman") #Dislexia
## Warning in cor.test.default(Timeline$DAs_clean, Timeline$Dislexia_clean, :
## Cannot compute exact p-value with ties
cor.das.dis2 = cor.test(Timeline$DAs_clean, Timeline$Discalculia_clean, method = "spearman") #Discalculia
## Warning in cor.test.default(Timeline$DAs_clean, Timeline
## $Discalculia_clean, : Cannot compute exact p-value with ties
cor.das.dis3 = cor.test(Timeline$DAs_clean, Timeline$Disgrafia_clean, method = "spearman") #Digrafia
## Warning in cor.test.default(Timeline$DAs_clean, Timeline$Disgrafia_clean, :
## Cannot compute exact p-value with ties
cor.das.ta = cor.test(Timeline$DAs_clean, Timeline$TA_clean, method = "spearman") ## Transtorno de Aprendizagem
## Warning in cor.test.default(Timeline$DAs_clean, Timeline$TA_clean, method =
## "spearman"): Cannot compute exact p-value with ties
Nome = c("Dif.s de aprend. com Dif. de Aprend.", " com Fracasso Escolar", " com Problema de Aprend.", " com Desempenho Escolar", " com Dislexia", " com Discalculia", " com Disgrafia", " com Transt. de Aprend.")
pvalor_cor = c(cor.das.da$p.value, cor.das.fe$p.value, cor.das.pa$p.value, cor.das.de$p.value, cor.das.dis1$p.value, cor.das.dis2$p.value, cor.das.dis3$p.value, cor.das.ta$p.value)
valor_cor = c(cor.das.da$estimate, cor.das.fe$estimate, cor.das.pa$estimate, cor.das.de$estimate, cor.das.dis1$estimate, cor.das.dis2$estimate, cor.das.dis3$estimate, cor.das.ta$estimate)
tab_cor_DAs = data.frame(Nome, valor_cor, pvalor_cor)
tab_cor_DAs
## Nome valor_cor pvalor_cor
## 1 Dif.s de aprend. com Dif. de Aprend. 0.8284254 4.629895e-37
## 2 com Fracasso Escolar 0.8092017 2.240052e-34
## 3 com Problema de Aprend. 0.7745652 3.061280e-29
## 4 com Desempenho Escolar 0.5891858 2.355150e-14
## 5 com Dislexia 0.7775874 3.442925e-29
## 6 com Discalculia 0.3835854 3.416791e-06
## 7 com Disgrafia 0.7638980 8.502890e-29
## 8 com Transt. de Aprend. 0.2659142 1.381377e-03
Grafico de dispersão com linha de regressão, método loess (não parametrico) - Dif.s de Aprendizagem, retirados outliers
library("ggplot2")
library("devtools")
ggplot(Timeline, aes(y=DAs_clean, x=DA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).
qplot(DA_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Dif. de Aprendizagem")
## `geom_smooth()` using method = 'loess'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).
ggplot(Timeline, aes(y=DAs_clean, x=FE_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 14 rows containing missing values (geom_point).
qplot(FE_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Fracasso Escolar")
## `geom_smooth()` using method = 'loess'
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 14 rows containing missing values (geom_point).
ggplot(Timeline, aes(y=DAs_clean, x=PA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).
qplot(PA_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Problemas de Aprendizagem")
## `geom_smooth()` using method = 'loess'
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).
ggplot(Timeline, aes(y=DAs_clean, x=DE_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).
qplot(DE_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Desempenho Escolar")
## `geom_smooth()` using method = 'loess'
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).
ggplot(Timeline, aes(y=DAs_clean, x=Dislexia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
## Warning: Removed 19 rows containing missing values (geom_point).
qplot(Dislexia_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Dislexia")
## `geom_smooth()` using method = 'loess'
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
## Warning: Removed 19 rows containing missing values (geom_point).
ggplot(Timeline, aes(y=DAs_clean, x=Discalculia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.0341e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 3.0341e-016
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0602
## Warning: Removed 19 rows containing missing values (geom_point).
qplot(Discalculia_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Discalculia")
## `geom_smooth()` using method = 'loess'
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.0341e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 3.0341e-016
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0602
## Warning: Removed 19 rows containing missing values (geom_point).
ggplot(Timeline, aes(y=DAs_clean, x=Disgrafia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.03
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.03
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.2507e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.03
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.03
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 1.2507e-016
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 1
## Warning: Removed 13 rows containing missing values (geom_point).
qplot(Disgrafia_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Disgrafia")
## `geom_smooth()` using method = 'loess'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.03
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.03
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.2507e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.03
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.03
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 1.2507e-016
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 1
## Warning: Removed 13 rows containing missing values (geom_point).
ggplot(Timeline, aes(y=DAs_clean, x=TA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.01
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.01
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0401
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.01
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.01
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0401
## Warning: Removed 15 rows containing missing values (geom_point).
qplot(TA_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Transtorno de Aprendizagem")
## `geom_smooth()` using method = 'loess'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.01
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.01
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0401
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.01
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.01
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0401
## Warning: Removed 15 rows containing missing values (geom_point).
Modelo de Regressao linear multipla (prescinde de testes de normalidade, homogeneidade e dependência dos resíduos) - Dificuldades de Aprendizagem
#Regressão Linear
RL_DAs1 = lm( DAs ~ DA + FE + PA + DE + Dislexia + Discalculia + Disgrafia + TA, data=Timeline)
summary( RL_DAs1 )
##
## Call:
## lm(formula = DAs ~ DA + FE + PA + DE + Dislexia + Discalculia +
## Disgrafia + TA, data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.7796 -4.4051 -0.2952 3.2602 23.5186
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.73371 2.16432 7.270 1.93e-11 ***
## DA 0.12492 0.11400 1.096 0.2750
## FE 0.60800 0.08085 7.520 4.89e-12 ***
## PA -0.14275 0.16756 -0.852 0.3956
## DE 0.48093 0.25784 1.865 0.0641 .
## Dislexia -0.11573 0.05563 -2.080 0.0392 *
## Discalculia 0.30267 0.69808 0.434 0.6652
## Disgrafia 0.54605 0.55880 0.977 0.3301
## TA 1.09128 1.11393 0.980 0.3288
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.531 on 148 degrees of freedom
## Multiple R-squared: 0.7618, Adjusted R-squared: 0.7489
## F-statistic: 59.16 on 8 and 148 DF, p-value: < 2.2e-16
#Testes de normalidade de ajuste do residuo
lillie.test(RL_DAs1$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: RL_DAs1$residuals
## D = 0.091273, p-value = 0.002761
#Regressão Linear, removidos os outliers
RL_DAs = lm( DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + Dislexia_clean + Discalculia_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary( RL_DAs )
##
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean +
## Dislexia_clean + Discalculia_clean + Disgrafia_clean + TA_clean,
## data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.5469 -3.0775 0.0795 2.7979 15.5779
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.93634 2.01764 2.447 0.01594 *
## DA_clean 0.58738 0.12460 4.714 6.87e-06 ***
## FE_clean 0.07087 0.08772 0.808 0.42083
## PA_clean 0.36834 0.19833 1.857 0.06585 .
## DE_clean 0.27743 0.27266 1.017 0.31105
## Dislexia_clean 0.06703 0.10516 0.637 0.52512
## Discalculia_clean 0.06120 0.74686 0.082 0.93484
## Disgrafia_clean 2.70418 0.81104 3.334 0.00115 **
## TA_clean -1.10400 0.95187 -1.160 0.24852
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.707 on 115 degrees of freedom
## (33 observations deleted due to missingness)
## Multiple R-squared: 0.7557, Adjusted R-squared: 0.7387
## F-statistic: 44.46 on 8 and 115 DF, p-value: < 2.2e-16
#Testes de normalidade de ajuste do residuo
lillie.test(RL_DAs$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: RL_DAs$residuals
## D = 0.062922, p-value = 0.2666
par(mfrow=c(2,2))
plot(RL_DAs)
library(car)
par(mfrow=c(1,2))
residualPlot(RL_DAs1, main = "Gráfico de Residuos")
residualPlot(RL_DAs, main = "Gráfico de Residuos, sem outliers")
#Ajuste de modelo pelo método Stepwise Backward
#retirando dislcaculia, p = 0,93484
RL_DAs_aj1 = lm( DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + Dislexia_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary(RL_DAs_aj1)
##
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean +
## Dislexia_clean + Disgrafia_clean + TA_clean, data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.6171 -3.0958 -0.1424 2.9445 15.1224
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.33080 1.92615 2.768 0.00655 **
## DA_clean 0.57281 0.12157 4.712 6.72e-06 ***
## FE_clean 0.13517 0.07651 1.767 0.07984 .
## PA_clean 0.28327 0.19356 1.463 0.14597
## DE_clean 0.32855 0.26187 1.255 0.21208
## Dislexia_clean 0.04887 0.10183 0.480 0.63220
## Disgrafia_clean 2.52339 0.77280 3.265 0.00143 **
## TA_clean -0.63608 0.91365 -0.696 0.48767
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.708 on 119 degrees of freedom
## (30 observations deleted due to missingness)
## Multiple R-squared: 0.7592, Adjusted R-squared: 0.745
## F-statistic: 53.59 on 7 and 119 DF, p-value: < 2.2e-16
#retirando Dislexia, p = 0,63220
RL_DAs_aj2 = lm( DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary(RL_DAs_aj2)
##
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean +
## Disgrafia_clean + TA_clean, data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.3148 -3.2140 -0.2363 3.0104 16.4652
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.18463 1.58960 3.262 0.001425 **
## DA_clean 0.66417 0.11270 5.893 3.25e-08 ***
## FE_clean 0.11066 0.06831 1.620 0.107726
## PA_clean 0.20945 0.18241 1.148 0.253045
## DE_clean 0.33812 0.25668 1.317 0.190124
## Disgrafia_clean 2.66941 0.76217 3.502 0.000639 ***
## TA_clean -0.65964 0.89343 -0.738 0.461688
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.774 on 126 degrees of freedom
## (24 observations deleted due to missingness)
## Multiple R-squared: 0.7476, Adjusted R-squared: 0.7356
## F-statistic: 62.21 on 6 and 126 DF, p-value: < 2.2e-16
#retirando Transtorno de Aprendizagem, p = 0,461688
RL_DAs_aj3 = lm(DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + Disgrafia_clean, data=Timeline)
summary(RL_DAs_aj3)
##
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean +
## Disgrafia_clean, data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.5361 -3.1722 -0.0087 2.9604 16.3183
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.04710 1.57582 3.203 0.001720 **
## DA_clean 0.65120 0.11113 5.860 3.75e-08 ***
## FE_clean 0.11182 0.06817 1.640 0.103400
## PA_clean 0.21728 0.18178 1.195 0.234193
## DE_clean 0.33401 0.25615 1.304 0.194616
## Disgrafia_clean 2.61139 0.75675 3.451 0.000759 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.766 on 127 degrees of freedom
## (24 observations deleted due to missingness)
## Multiple R-squared: 0.7465, Adjusted R-squared: 0.7366
## F-statistic: 74.81 on 5 and 127 DF, p-value: < 2.2e-16
#Retirando Problema de Aprendizagem, p = 0,234193
RL_DAs_aj4 = lm( DAs_clean ~ DA_clean + FE_clean + DE_clean + Disgrafia_clean, data=Timeline)
summary(RL_DAs_aj4)
##
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + DE_clean + Disgrafia_clean,
## data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.8319 -2.8564 -0.3094 3.0077 17.4098
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.74149 1.58509 4.253 3.96e-05 ***
## DA_clean 0.65757 0.11435 5.750 5.89e-08 ***
## FE_clean 0.16225 0.06418 2.528 0.01265 *
## DE_clean 0.37681 0.25370 1.485 0.13986
## Disgrafia_clean 1.89478 0.70271 2.696 0.00792 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.996 on 132 degrees of freedom
## (20 observations deleted due to missingness)
## Multiple R-squared: 0.7318, Adjusted R-squared: 0.7237
## F-statistic: 90.04 on 4 and 132 DF, p-value: < 2.2e-16
#Retirando Desempenho Escolar, p = 0,13986
RL_DAs_aj5 = lm( DAs_clean ~ DA_clean + FE_clean + Disgrafia_clean, data=Timeline)
summary(RL_DAs_aj5)
##
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + Disgrafia_clean,
## data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.5990 -3.1871 -0.3789 3.1076 16.8447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.20862 1.42196 5.069 1.27e-06 ***
## DA_clean 0.76572 0.10467 7.316 1.95e-11 ***
## FE_clean 0.15344 0.06429 2.387 0.0184 *
## Disgrafia_clean 1.75992 0.70563 2.494 0.0138 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.08 on 137 degrees of freedom
## (16 observations deleted due to missingness)
## Multiple R-squared: 0.7448, Adjusted R-squared: 0.7392
## F-statistic: 133.3 on 3 and 137 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(RL_DAs_aj5)
#Teste de normalidade de ajuste do residuo
shapiro.test(RL_DAs_aj5$residuals)
##
## Shapiro-Wilk normality test
##
## data: RL_DAs_aj5$residuals
## W = 0.97898, p-value = 0.02861
sf.test(RL_DAs_aj5$residuals)
##
## Shapiro-Francia normality test
##
## data: RL_DAs_aj5$residuals
## W = 0.97438, p-value = 0.01106
ad.test(RL_DAs_aj5$residuals)
##
## Anderson-Darling normality test
##
## data: RL_DAs_aj5$residuals
## A = 0.57491, p-value = 0.1331
cvm.test(RL_DAs_aj5$residuals)
##
## Cramer-von Mises normality test
##
## data: RL_DAs_aj5$residuals
## W = 0.070668, p-value = 0.2721
lillie.test(RL_DAs_aj5$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: RL_DAs_aj5$residuals
## D = 0.04998, p-value = 0.5265
pearson.test(RL_DAs_aj5$residuals)
##
## Pearson chi-square normality test
##
## data: RL_DAs_aj5$residuals
## P = 12.723, p-value = 0.3895
#Teste de homoscedasticidade dos resíduos
residualPlot(RL_DAs_aj5)
library(lmtest)
## Warning: package 'lmtest' was built under R version 3.3.3
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
#Teste Goldfeld-Quandt test against heteroskedasticity (homoscedasticidade dos resíduos) n > 30
gqtest(RL_DAs_aj5)
##
## Goldfeld-Quandt test
##
## data: RL_DAs_aj5
## GQ = 0.19071, df1 = 67, df2 = 66, p-value = 1
## alternative hypothesis: variance increases from segment 1 to 2
#Teste de Breusch-Pagan: hipótese nula de que as variâncias dos erros são iguais (homoscedasticidade)
bptest(RL_DAs_aj5)
##
## studentized Breusch-Pagan test
##
## data: RL_DAs_aj5
## BP = 23.965, df = 3, p-value = 2.54e-05
#Teste de independencia - Teste de Durbin Watson
plot(RL_DAs_aj5$residuals)
dwtest(RL_DAs_aj5)
##
## Durbin-Watson test
##
## data: RL_DAs_aj5
## DW = 1.5931, p-value = 0.005695
## alternative hypothesis: true autocorrelation is greater than 0
Graficos de diagnostico da variavel - Dislexia
#Gráfico de diagnóstico e Histograma com ajuste não paramétrico de densidade probabilística mais curva normal, com e sem outliers
par(mfrow=c(2,2))
boxplot(Timeline$Dislexia, notch = T, main = "Dif.s de Aprendizagem", ylab = "valores observados")
boxplot(Timeline$Dislexia_clean, notch = T, main = "removidos outliers", ylab = "valores observados")
hist(Timeline$Dislexia, prob=T, main = "Densid. probabilística e curva normal", xlab = "valores observados", ylab = "Densidade probabilística")
lines( density(Timeline$Dislexia),col="darkblue", lwd=c(2))
curve(expr = dnorm(x,mean=mean(Timeline$Dislexia),sd=sd(Timeline$Dislexia)),add=T, col="darkred", lwd=c(2))
legend("topright", c("Dens. prob.", "curva normal"), bty="n", lty=c(1),
col=c("darkblue","darkred"), box.col="white",lwd=c(2), cex=0.9)
hist(Timeline$Dislexia_clean, prob=T, main = "removidos outliers", xlab = "valores observados", ylab = "Densidade probabilística")
lines( density(Timeline$Dislexia),col="darkblue", lwd=c(2) )
curve(expr = dnorm(x,mean=mean(Timeline$Dislexia),sd=sd(Timeline$Dislexia)),add=T, col="darkred", lwd=c(2))
legend("topright", c("Dens. prob.", "curva normal"), bty="n", lty=c(1),
col=c("darkblue","darkred"), box.col="white",lwd=c(2), cex=0.9)
par(mfrow=c(1,1))
#testes de normalidade
Teste_norm_Dis = c(sf.test$Dislexia_clean$method, ad.test$Dislexia_clean$method, cvm.test$Dislexia_clean$method, lillie.test$Dislexia_clean$method, pearson.test$Dislexia_clean$method)
pvalor_norm_Dis = c(sf.test$Dislexia_clean$p.value, ad.test$Dislexia_clean$p.value, cvm.test$Dislexia_clean$p.value, lillie.test$Dislexia_clean$p.value, pearson.test$Dislexia_clean$p.value)
Tab_norm_Dis = data.frame(Teste_norm_Dis, pvalor_norm_Dis)
Tab_norm_Dis
## Teste_norm_Dis pvalor_norm_Dis
## 1 Shapiro-Francia normality test 3.675661e-06
## 2 Anderson-Darling normality test 3.374538e-09
## 3 Cramer-von Mises normality test 1.552354e-07
## 4 Lilliefors (Kolmogorov-Smirnov) normality test 3.542177e-07
## 5 Pearson chi-square normality test 5.727799e-06
Correlacao entre a variavel dependente e as demais, metodo Spearman (não parametrico), retirados outliers
cor.dis.da = cor.test(Timeline$Dislexia_clean, Timeline$DAs_clean, method = "spearman") ## DificuldadeS de Aprendizagem
## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$DAs_clean, :
## Cannot compute exact p-value with ties
cor.dis.fe = cor.test(Timeline$Dislexia_clean, Timeline$FE_clean, method = "spearman") ## Fracasso Escolar
## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$FE_clean, :
## Cannot compute exact p-value with ties
cor.dis.pa = cor.test(Timeline$Dislexia_clean, Timeline$PA_clean, method = "spearman") ## Problemas de Aprendizagem
## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$PA_clean, :
## Cannot compute exact p-value with ties
cor.dis.de = cor.test(Timeline$Dislexia_clean, Timeline$DE_clean, method = "spearman") ## Desempenho escolar
## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$DE_clean, :
## Cannot compute exact p-value with ties
cor.dis.da = cor.test(Timeline$Dislexia_clean, Timeline$DA_clean, method = "spearman") ## Dificuldade de Aprendizagem
## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$DA_clean, :
## Cannot compute exact p-value with ties
cor.dis.dis2 = cor.test(Timeline$Dislexia_clean, Timeline$Discalculia_clean, method = "spearman") #Discalculia
## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline
## $Discalculia_clean, : Cannot compute exact p-value with ties
cor.dis.dis3 = cor.test(Timeline$Dislexia_clean, Timeline$Disgrafia_clean, method = "spearman") #Digrafia
## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline
## $Disgrafia_clean, : Cannot compute exact p-value with ties
cor.dis.ta = cor.test(Timeline$Dislexia_clean, Timeline$TA_clean, method = "spearman") ## Transtorno de Aprendizagem
## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$TA_clean, :
## Cannot compute exact p-value with ties
Nome_dis = c("Dislexia com Dif.s de Aprend.", " com Fracasso Escolar", " com Problema de Aprend.", " com Desempenho Escolar", " com Dif.s de Aprend.", " com Discalculia", " com Disgrafia", " com Transt. de Aprend.")
pvalor_cor_dis = c(cor.dis.da$p.value, cor.dis.fe$p.value, cor.dis.pa$p.value, cor.dis.de$p.value, cor.dis.da$p.value, cor.dis.dis2$p.value, cor.dis.dis3$p.value, cor.dis.ta$p.value)
valor_cor_dis = c(cor.dis.da$estimate, cor.dis.fe$estimate, cor.dis.pa$estimate, cor.dis.de$estimate, cor.dis.da$estimate, cor.dis.dis2$estimate, cor.dis.dis3$estimate, cor.dis.ta$estimate)
tab_cor_dis = data.frame(Nome_dis, valor_cor_dis, pvalor_cor_dis)
tab_cor_dis
## Nome_dis valor_cor_dis pvalor_cor_dis
## 1 Dislexia com Dif.s de Aprend. 0.7935912 2.345935e-31
## 2 com Fracasso Escolar 0.8303787 1.304910e-36
## 3 com Problema de Aprend. 0.7696528 1.769850e-28
## 4 com Desempenho Escolar 0.4603439 1.515041e-08
## 5 com Dif.s de Aprend. 0.7935912 2.345935e-31
## 6 com Discalculia 0.3494107 3.270965e-05
## 7 com Disgrafia 0.7623783 3.038961e-28
## 8 com Transt. de Aprend. 0.2593444 2.211352e-03
Grafico de dispersão com linha de regressão, método loess (não parametrico) - Dif.s de Aprendizagem, retirados outliers
library("ggplot2")
library("devtools")
ggplot(Timeline, aes(y=Dislexia_clean, x=DAs_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
## Warning: Removed 19 rows containing missing values (geom_point).
qplot(DAs_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
## Warning: Removed 19 rows containing missing values (geom_point).
ggplot(Timeline, aes(y=Dislexia_clean, x=FE_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).
qplot(FE_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).
ggplot(Timeline, aes(y=Dislexia_clean, x=PA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).
qplot(PA_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).
ggplot(Timeline, aes(y=Dislexia_clean, x=DE_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning: Removed 20 rows containing missing values (geom_point).
qplot(DE_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning: Removed 20 rows containing missing values (geom_point).
ggplot(Timeline, aes(y=Dislexia_clean, x=DA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).
qplot(Dislexia_clean, DA_clean, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).
ggplot(Timeline, aes(y=Dislexia_clean, x=Discalculia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.03e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 1.03e-016
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0602
## Warning: Removed 22 rows containing missing values (geom_point).
qplot(Discalculia_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.03e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 1.03e-016
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0602
## Warning: Removed 22 rows containing missing values (geom_point).
ggplot(Timeline, aes(y=Dislexia_clean, x=Disgrafia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 2
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 2
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning: Removed 15 rows containing missing values (geom_point).
qplot(Disgrafia_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 2
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 2
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning: Removed 15 rows containing missing values (geom_point).
ggplot(Timeline, aes(y=Dislexia_clean, x=TA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.01
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.01
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0401
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.01
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.01
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0401
## Warning: Removed 20 rows containing missing values (geom_point).
qplot(TA_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.01
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.01
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0401
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.01
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.01
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0401
## Warning: Removed 20 rows containing missing values (geom_point).
Modelo de Regressao linear multipla (com testes de normalidade, homogeneidade e dependencia dos residuos) - Dislexia
#Regressão Linear
RL_Dis1 = lm(Dislexia ~ DAs + FE + PA + DE + Discalculia + Disgrafia + TA, data=Timeline)
summary( RL_Dis1 )
##
## Call:
## lm(formula = Dislexia ~ DAs + FE + PA + DE + Discalculia + Disgrafia +
## TA, data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.441 -5.510 -0.710 2.743 59.849
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.35399 2.82475 8.976 1.13e-15 ***
## DAs -0.23414 0.11800 -1.984 0.0491 *
## FE 0.71912 0.12302 5.846 3.07e-08 ***
## PA 0.41386 0.23537 1.758 0.0807 .
## DE -0.44224 0.37831 -1.169 0.2443
## Discalculia -0.01647 1.01752 -0.016 0.9871
## Disgrafia 0.03539 0.79757 0.044 0.9647
## TA -3.37112 1.59567 -2.113 0.0363 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11 on 149 degrees of freedom
## Multiple R-squared: 0.5298, Adjusted R-squared: 0.5077
## F-statistic: 23.99 on 7 and 149 DF, p-value: < 2.2e-16
#Testes de normalidade de ajuste do residuo
lillie.test(RL_Dis1$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: RL_Dis1$residuals
## D = 0.17193, p-value = 2.682e-12
residualPlot(RL_Dis1)
#Regressão Linear, revidos outliers
RL_Dis = lm(Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + DE_clean + Discalculia_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary( RL_Dis )
##
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean +
## DE_clean + Discalculia_clean + Disgrafia_clean + TA_clean,
## data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.587 -2.578 -0.374 2.544 15.027
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.52645 1.60088 6.575 1.45e-09 ***
## DAs_clean 0.14754 0.07633 1.933 0.0557 .
## FE_clean 0.39563 0.06842 5.782 6.34e-08 ***
## PA_clean 0.33230 0.18022 1.844 0.0678 .
## DE_clean 0.39148 0.24356 1.607 0.1107
## Discalculia_clean -0.02204 0.66670 -0.033 0.9737
## Disgrafia_clean 0.47063 0.76995 0.611 0.5422
## TA_clean -0.31212 0.85288 -0.366 0.7151
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.276 on 116 degrees of freedom
## (33 observations deleted due to missingness)
## Multiple R-squared: 0.7328, Adjusted R-squared: 0.7167
## F-statistic: 45.45 on 7 and 116 DF, p-value: < 2.2e-16
#Testes de normalidade de ajuste do residuo
lillie.test(RL_Dis$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: RL_Dis$residuals
## D = 0.067493, p-value = 0.18
residualPlot(RL_Dis)
#Ajuste de modelo pelo método Backward
#retirando dislcaculia, p = 0,9737
RL_Dis_aj1 = lm( Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + DE_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary(RL_Dis_aj1)
##
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean +
## DE_clean + Disgrafia_clean + TA_clean, data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.9415 -2.5155 -0.5345 2.5451 15.5681
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.75901 1.55378 6.924 2.32e-10 ***
## DAs_clean 0.14681 0.07713 1.903 0.0594 .
## FE_clean 0.33840 0.06408 5.281 5.82e-07 ***
## PA_clean 0.44907 0.17694 2.538 0.0124 *
## DE_clean 0.30316 0.24020 1.262 0.2093
## Disgrafia_clean 0.67710 0.74728 0.906 0.3667
## TA_clean -0.76740 0.83231 -0.922 0.3584
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.38 on 120 degrees of freedom
## (30 observations deleted due to missingness)
## Multiple R-squared: 0.7183, Adjusted R-squared: 0.7043
## F-statistic: 51.01 on 6 and 120 DF, p-value: < 2.2e-16
#retirando disgrafia, p = 0,3667
RL_Dis_aj2 = lm( Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + DE_clean + TA_clean, data=Timeline)
summary(RL_Dis_aj2)
##
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean +
## DE_clean + TA_clean, data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.833 -2.752 -0.384 2.503 15.128
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.80233 1.55190 6.961 1.88e-10 ***
## DAs_clean 0.17132 0.07218 2.373 0.01920 *
## FE_clean 0.34973 0.06280 5.569 1.58e-07 ***
## PA_clean 0.48416 0.17252 2.806 0.00584 **
## DE_clean 0.30986 0.23991 1.292 0.19896
## TA_clean -0.69829 0.82819 -0.843 0.40081
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.376 on 121 degrees of freedom
## (30 observations deleted due to missingness)
## Multiple R-squared: 0.7164, Adjusted R-squared: 0.7047
## F-statistic: 61.13 on 5 and 121 DF, p-value: < 2.2e-16
#retirando Transtorno de Aprendizagem, p = 0,40081
RL_Dis_aj3 = lm( Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + DE_clean, data=Timeline)
summary(RL_Dis_aj3)
##
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean +
## DE_clean, data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.0563 -2.6252 -0.3376 2.4554 15.8577
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.61148 1.50119 7.069 1.03e-10 ***
## DAs_clean 0.17203 0.07146 2.407 0.01755 *
## FE_clean 0.34293 0.06188 5.542 1.73e-07 ***
## PA_clean 0.48198 0.17134 2.813 0.00572 **
## DE_clean 0.27460 0.22984 1.195 0.23450
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.356 on 123 degrees of freedom
## (29 observations deleted due to missingness)
## Multiple R-squared: 0.7176, Adjusted R-squared: 0.7084
## F-statistic: 78.15 on 4 and 123 DF, p-value: < 2.2e-16
#Retirando Desempenho Escolar, p = 0,23450
RL_Dis_aj4 = lm( Dislexia_clean ~ DAs_clean + FE_clean + PA_clean, data=Timeline)
summary(RL_Dis_aj4)
##
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean,
## data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.485 -2.823 -0.171 2.425 17.152
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.51395 1.39305 8.983 2.89e-15 ***
## DAs_clean 0.15267 0.06987 2.185 0.0307 *
## FE_clean 0.33976 0.06520 5.211 7.31e-07 ***
## PA_clean 0.51939 0.16696 3.111 0.0023 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.666 on 128 degrees of freedom
## (25 observations deleted due to missingness)
## Multiple R-squared: 0.6858, Adjusted R-squared: 0.6785
## F-statistic: 93.15 on 3 and 128 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(RL_Dis_aj4)
#Teste de normalidade de ajuste do residuo
shapiro.test(RL_Dis_aj4$residuals)
##
## Shapiro-Wilk normality test
##
## data: RL_Dis_aj4$residuals
## W = 0.95136, p-value = 0.0001274
sf.test(RL_Dis_aj4$residuals)
##
## Shapiro-Francia normality test
##
## data: RL_Dis_aj4$residuals
## W = 0.94688, p-value = 0.0001391
ad.test(RL_Dis_aj4$residuals)
##
## Anderson-Darling normality test
##
## data: RL_Dis_aj4$residuals
## A = 1.3736, p-value = 0.001418
cvm.test(RL_Dis_aj4$residuals)
##
## Cramer-von Mises normality test
##
## data: RL_Dis_aj4$residuals
## W = 0.18547, p-value = 0.008073
lillie.test(RL_Dis_aj4$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: RL_Dis_aj4$residuals
## D = 0.076317, p-value = 0.05712
pearson.test(RL_Dis_aj4$residuals)
##
## Pearson chi-square normality test
##
## data: RL_Dis_aj4$residuals
## P = 16.409, p-value = 0.1732
#Teste de homoscedasticidade dos resíduos: verificar gráficos
#Teste Goldfeld-Quandt test against heteroskedasticity (homoscedasticidade dos resíduos) n > 30
gqtest(RL_Dis_aj4)
##
## Goldfeld-Quandt test
##
## data: RL_Dis_aj4
## GQ = 0.33945, df1 = 62, df2 = 62, p-value = 1
## alternative hypothesis: variance increases from segment 1 to 2
#Teste de Breusch-Pagan: hipótese nula de que as variâncias dos erros são iguais (homoscedasticidade)
bptest(RL_Dis_aj4)
##
## studentized Breusch-Pagan test
##
## data: RL_Dis_aj4
## BP = 15.84, df = 3, p-value = 0.001223
#Teste de independencia - Teste de Durbin Watson
plot(RL_Dis_aj4$residuals)
dwtest(RL_Dis_aj4)
##
## Durbin-Watson test
##
## data: RL_Dis_aj4
## DW = 1.5059, p-value = 0.001571
## alternative hypothesis: true autocorrelation is greater than 0
Grafico de tendencia utilizando Regressao linear # borrowed from amunatagui- # http://amunategui.github.io/google-trends-walkthrough/
library(ggplot2)
ggplot(Timeline,aes(x = Date)) +
stat_smooth(aes(y = DA_clean, group=1, colour="Dif. de Aprendizagem"), method=lm, formula = y ~ poly(x,1), level=0.95) +
stat_smooth(aes(y = DAs_clean, group=1, colour= "Dif.s de Aprendizagem"), method=lm, formula = y ~ poly(x,2), level=0.95) +
stat_smooth(aes(y = Dislexia_clean, group=1, colour="Dislexia"), method=lm, formula = y ~ poly(x,3), level=0.95) +
stat_smooth(aes(y = FE_clean, group=1, colour="Fracasso Escolar"), method=lm, formula = y ~ poly(x,4), level=0.95) +
stat_smooth(aes(y = PA_clean, group=1, colour= "Problemas de Aprendizagem"), method=lm, formula = y ~ poly(x,5), level=0.95) +
stat_smooth(aes(y = DE_clean, group=1, colour= "Desempenho Escolar"), method=lm, formula = y ~ poly(x,6), level=0.95) +
stat_smooth(aes(y = Discalculia_clean, group=1, colour="Discalculia"), method=lm, formula = y ~ poly(x,7), level=0.95) +
stat_smooth(aes(y = Disgrafia_clean, group=1, colour="Disgrafia"), method=lm, formula = y ~ poly(x,8), level=0.95) +
stat_smooth(aes(y = TA_clean, group=1, colour="Transtorno da Aprendizagem"), method=lm, formula = y ~ poly(x,9), level=0.95) +
geom_point (aes(y = DA_clean, colour = "Dif. de Aprendizagem"), size=1) +
geom_point (aes(y = DAs_clean, colour = "Dif.s de Aprendizagem"), size=1) +
geom_point (aes(y = Dislexia_clean, colour ="Dislexia"), size=1) +
geom_point (aes(y = FE_clean, colour ="Fracasso Escolar"), size=1) +
geom_point (aes(y = PA_clean, colour ="Problemas de Aprendizagem"), size=1) +
geom_point (aes(y = DE_clean, colour ="Desempenho Escolar"), size=1) +
geom_point (aes(y = Discalculia_clean, colour ="Discalculia"), size=1) +
geom_point (aes(y = Disgrafia_clean, colour ="Disgrafia"), size=1) +
geom_point (aes(y = TA_clean, colour = "Transtorno da Aprendizagem"), size=1) +
scale_colour_manual("Termos de Pesquisa", breaks = c("Dif. de Aprendizagem", "Dif.s de Aprendizagem", "Dislexia", "Fracasso Escolar", "Problemas de Aprendizagem", "Desempenho Escolar", "Discalculia", "Disgrafia", "Transtorno da Aprendizagem"), values = c("blue","green", "red", "darkgreen", "darkred", "darkblue", "purple", "orange", "pink")) +
theme_bw() +
xlab("Periodo") +
ylab("frequencia relativa") +
ggtitle("Regressao linear das Palavras-chave")
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (geom_point).
Grafico de tendencia utilizando Regressao Linear com a retirada de outliers - ultimos 5 anos # borrowed from aL3xa - # http://stackoverflow.com/questions/4787332/how-to-remove-outliers-from-a-dataset
library(ggplot2)
ggplot(Timeline_5anos,aes(x=Date)) +
stat_smooth(aes(y = DA_clean, group=1, colour="Dif. de Aprendizagem"), method=lm, formula = y ~ poly(x,1), level=0.95) +
stat_smooth(aes(y = DAs_clean, group=1, colour= "Dif.s de Aprendizagem"), method=lm, formula = y ~ poly(x,2), level=0.95) +
stat_smooth(aes(y = Dislexia_clean, group=1, colour="Dislexia"), method=lm, formula = y ~ poly(x,3), level=0.95) +
stat_smooth(aes(y = FE_clean, group=1, colour="Fracasso Escolar"), method=lm, formula = y ~ poly(x,4), level=0.95) +
stat_smooth(aes(y = PA_clean, group=1, colour= "Problemas de Aprendizagem"), method=lm, formula = y ~ poly(x,5), level=0.95) +
stat_smooth(aes(y = DE_clean, group=1, colour= "Desempenho Escolar"), method=lm, formula = y ~ poly(x,6), level=0.95) +
stat_smooth(aes(y = Discalculia_clean, group=1, colour="Discalculia"), method=lm, formula = y ~ poly(x,7), level=0.95) +
stat_smooth(aes(y = Disgrafia_clean, group=1, colour="Disgrafia"), method=lm, formula = y ~ poly(x,8), level=0.95) +
stat_smooth(aes(y = TA_clean, group=1, colour="Transtorno da Aprendizagem"), method=lm, formula = y ~ poly(x,9), level=0.95) +
geom_point (aes(y = DA_clean, colour = "Dif. de Aprendizagem"), size=1) +
geom_point (aes(y = DAs_clean, colour = "Dif.s de Aprendizagem"), size=1) +
geom_point (aes(y = Dislexia_clean, colour ="Dislexia"), size=1) +
geom_point (aes(y = FE_clean, colour ="Fracasso Escolar"), size=1) +
geom_point (aes(y = PA_clean, colour ="Problemas de Aprendizagem"), size=1) +
geom_point (aes(y = DE_clean, colour ="Desempenho Escolar"), size=1) +
geom_point (aes(y = Discalculia_clean, colour ="Discalculia"), size=1) +
geom_point (aes(y = Disgrafia_clean, colour ="Disgrafia"), size=1) +
geom_point (aes(y = TA_clean, colour = "Transtorno da Aprendizagem"), size=1) +
scale_colour_manual("Termos de Pesquisa", breaks = c("Dif. de Aprendizagem", "Dif.s de Aprendizagem", "Dislexia", "Fracasso Escolar", "Problemas de Aprendizagem", "Desempenho Escolar", "Discalculia", "Disgrafia", "Transtorno da Aprendizagem"), values = c("blue","green", "red", "darkgreen", "darkred", "darkblue", "purple", "orange", "pink")) +
theme_bw() +
xlab("Periodo") +
ylab("frequencia relativa") +
ggtitle("Regressao linear das Palavras-chave, sem outliers, últimos 5 anos")
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
Series temporais: avaliacao de sazonalidade e tendencia # borrowed from marcos-lopes #https://gist.github.com/marco-lopes/8550667
# Ativando o módulo de séries temporais
library(tseries)
## Warning: package 'tseries' was built under R version 3.3.3
library(forecast)
## Warning: package 'forecast' was built under R version 3.3.3
# Gráfico da serie - apresente o comportamento da média dos dados referente as váriaveis
ts.plot(Timeline$DAs, main = "Dif.s de Aprendizagem")
ts.plot(Timeline$DA, main = "Dif. de Aprendizagem" )
ts.plot(Timeline$Dislexia, main = "Dislexia")
ts.plot(Timeline$FE, main = "Fracasso Escolar")
ts.plot(Timeline$DE, main = "Desempenho Escolar")
ts.plot(Timeline$PA, main = "Problemas de Aprendizagem")
ts.plot(Timeline$Discalculia, main = "Discalculia")
ts.plot(Timeline$Disgrafia, main = "Disgrafia")
ts.plot(Timeline$TA, main = "Transtorno de Aprendizagem")
#analise de ETS (Erro/Tendência/Sazonalidade) - Exponential smoothing state space model;the first letter denotes the error type ("A", "M" or "Z"); the second letter denotes the trend type ("N","A","M" or "Z"); and the third letter denotes the season type ("N","A","M" or "Z"). Sendo "N"=none, "A"=additive, "M"= multiplicative and "Z"= automatically by Hyndman et al (2008).
library(forecast)
ets(Timeline$DAs)
## ETS(M,N,N)
##
## Call:
## ets(y = Timeline$DAs)
##
## Smoothing parameters:
## alpha = 0.1533
##
## Initial states:
## l = 66.5315
##
## sigma: 0.2536
##
## AIC AICc BIC
## 1455.723 1455.880 1464.892
ets(Timeline$DA)
## ETS(M,Ad,N)
##
## Call:
## ets(y = Timeline$DA)
##
## Smoothing parameters:
## alpha = 0.0023
## beta = 1e-04
## phi = 0.975
##
## Initial states:
## l = 46.3385
## b = -0.82
##
## sigma: 0.27
##
## AIC AICc BIC
## 1353.667 1354.227 1372.005
ets(Timeline$Dislexia)
## ETS(M,A,N)
##
## Call:
## ets(y = Timeline$Dislexia)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.0188
##
## Initial states:
## l = 27.6036
## b = 6.2167
##
## sigma: 0.2472
##
## AIC AICc BIC
## 1427.194 1427.592 1442.476
ets(Timeline$FE)
## ETS(M,A,N)
##
## Call:
## ets(y = Timeline$FE)
##
## Smoothing parameters:
## alpha = 0.008
## beta = 0.008
##
## Initial states:
## l = 66.964
## b = -1.6441
##
## sigma: 0.3457
##
## AIC AICc BIC
## 1298.135 1298.533 1313.416
ets(Timeline$DE)
## ETS(M,N,N)
##
## Call:
## ets(y = Timeline$DE)
##
## Smoothing parameters:
## alpha = 0.1279
##
## Initial states:
## l = 14.5963
##
## sigma: 0.3395
##
## AIC AICc BIC
## 1065.517 1065.674 1074.686
ets(Timeline$PA)
## ETS(M,A,N)
##
## Call:
## ets(y = Timeline$PA)
##
## Smoothing parameters:
## alpha = 0.1118
## beta = 1e-04
##
## Initial states:
## l = 26.3409
## b = -0.0546
##
## sigma: 0.358
##
## AIC AICc BIC
## 1169.117 1169.515 1184.399
ets(Timeline$Discalculia)
## ETS(A,N,N)
##
## Call:
## ets(y = Timeline$Discalculia)
##
## Smoothing parameters:
## alpha = 0.1624
##
## Initial states:
## l = 3.4083
##
## sigma: 0.946
##
## AIC AICc BIC
## 782.3836 782.5405 791.5524
ets(Timeline$Disgrafia)
## ETS(A,Ad,N)
##
## Call:
## ets(y = Timeline$Disgrafia)
##
## Smoothing parameters:
## alpha = 0.0296
## beta = 0.0296
## phi = 0.8
##
## Initial states:
## l = 13.2798
## b = -2.5129
##
## sigma: 1.0301
##
## AIC AICc BIC
## 815.1467 815.7067 833.4841
ets(Timeline$TA)
## ETS(A,Ad,N)
##
## Call:
## ets(y = Timeline$TA)
##
## Smoothing parameters:
## alpha = 0.046
## beta = 3e-04
## phi = 0.9524
##
## Initial states:
## l = 3.4225
## b = -0.149
##
## sigma: 0.56
##
## AIC AICc BIC
## 623.7844 624.3444 642.1218
# teste Augmented Dickey-Fuller para verificar se a serie e estacionaria (for the null that x has a unit root); p-values interpolated from Table 4.2 of Banerjee et al. (1993). Generate a warning message. Avalia se a serie e estacionaria.
library(tseries)
adf.test(Timeline$DAs)
##
## Augmented Dickey-Fuller Test
##
## data: Timeline$DAs
## Dickey-Fuller = -3.965, Lag order = 5, p-value = 0.01276
## alternative hypothesis: stationary
adf.test(Timeline$DA)
##
## Augmented Dickey-Fuller Test
##
## data: Timeline$DA
## Dickey-Fuller = -3.7972, Lag order = 5, p-value = 0.02098
## alternative hypothesis: stationary
adf.test(Timeline$Dislexia)
## Warning in adf.test(Timeline$Dislexia): p-value smaller than printed p-
## value
##
## Augmented Dickey-Fuller Test
##
## data: Timeline$Dislexia
## Dickey-Fuller = -4.5001, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
adf.test(Timeline$FE)
##
## Augmented Dickey-Fuller Test
##
## data: Timeline$FE
## Dickey-Fuller = -3.7467, Lag order = 5, p-value = 0.02345
## alternative hypothesis: stationary
adf.test(Timeline$DE)
## Warning in adf.test(Timeline$DE): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: Timeline$DE
## Dickey-Fuller = -5.642, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
adf.test(Timeline$PA)
## Warning in adf.test(Timeline$PA): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: Timeline$PA
## Dickey-Fuller = -4.8606, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
adf.test(Timeline$Discalculia)
##
## Augmented Dickey-Fuller Test
##
## data: Timeline$Discalculia
## Dickey-Fuller = -2.7087, Lag order = 5, p-value = 0.2809
## alternative hypothesis: stationary
adf.test(Timeline$Disgrafia)
## Warning in adf.test(Timeline$Disgrafia): p-value smaller than printed p-
## value
##
## Augmented Dickey-Fuller Test
##
## data: Timeline$Disgrafia
## Dickey-Fuller = -4.5735, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
adf.test(Timeline$TA)
##
## Augmented Dickey-Fuller Test
##
## data: Timeline$TA
## Dickey-Fuller = -3.0983, Lag order = 5, p-value = 0.1184
## alternative hypothesis: stationary
# Gráfico do ACF e PACF - Auto- and Cross- Covariance and -Correlation Function Estimation. Os coeficientes da ACF e da PACF diminuem rapidamente para zero quando o valor "k" (lags) aumenta e se os "lags" ultrapassam os níveis de controle.
par(mfrow=c(1,2))
acf(Timeline$DAs, main = "Dif.s de Aprendizagem")
pacf(Timeline$DAs)
par(mfrow=c(1,2))
acf(Timeline$DA, main = "Dif. de Aprendizagem")
pacf(Timeline$DA)
par(mfrow=c(1,2))
acf(Timeline$FE, main = "Fracasso Escolar")
pacf(Timeline$FE)
par(mfrow=c(1,2))
acf(Timeline$DE, main = "Desempenho Escolar")
pacf(Timeline$DE)
par(mfrow=c(1,2))
acf(Timeline$PA, main = "Problemas de Aprendizagem")
pacf(Timeline$PA)
par(mfrow=c(1,2))
acf(Timeline$Dislexia, main = "Dislexia")
pacf(Timeline$Dislexia)
par(mfrow=c(1,2))
acf(Timeline$Discalculia, main = "Discalculia")
pacf(Timeline$Discalculia)
par(mfrow=c(1,2))
acf(Timeline$Disgrafia, main = "Disgrafia")
pacf(Timeline$Disgrafia)
par(mfrow=c(1,2))
acf(Timeline$TA, main = "Transtorno de Aprendizagem")
pacf(Timeline$TA)
#TESTE DE COX-STUART: distribuição binomial para avaliar tendencia na serie, idea of the evolution of values obtained
library(randtests)
##
## Attaching package: 'randtests'
## The following object is masked from 'package:tseries':
##
## runs.test
cox.stuart.test(Timeline$DAs)
##
## Cox Stuart test
##
## data: Timeline$DAs
## statistic = 9, n = 78, p-value = 1.381e-12
## alternative hypothesis: non randomness
#TESTE DE WALD-WOLFOWITS: teste não-paramétrico para verificar a aleatoriedade de uma sequência de dados, EM série temporal avalia tendencia.
library(adehabitat)
## Warning: package 'adehabitat' was built under R version 3.3.3
## Loading required package: ade4
## Warning: package 'ade4' was built under R version 3.3.3
## Loading required package: tkrplot
## Loading required package: tcltk
## Loading required package: shapefiles
## Warning: package 'shapefiles' was built under R version 3.3.3
## Loading required package: foreign
##
## Attaching package: 'shapefiles'
## The following objects are masked from 'package:foreign':
##
## read.dbf, write.dbf
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.3.3
##
## ************************************************
## ************************************************
## THE PACKAGE adehabitat IS NOW DEPRECATED!!!!!!!
## It is dangerous to use it, as bugs will no longer be corrected.
## It is now recommended to use the packages adehabitatMA, adehabitatLT, adehabitatHR, and adehabitatHS.
## These 4 packages are the future of adehabitat.
## They have a vignette explaining in detail how they can be used.
## They implement more methods than adehabitat
## They are based on the more common and more clever spatial classes implemented in sp.
## Bugs are corrected frequently.
## Really, avoid to use the classical adehabitat, unless you have a very good reason for it.
##
## *****THIS IS THE VERY LAST WARNING*****
## This is the last version of adehabitat submitted to CRAN (at the time of writing: 2015-03-27).
## THE NEXT VERSION OF adehabitat WILL JUST BE A VIRTUAL PACKAGE LOADING ALL THE PACKAGES DESCRIBED ABOVE.
##
## Attaching package: 'adehabitat'
## The following object is masked from 'package:zoo':
##
## is.regular
wawotest(Timeline$DAs)
## a ea va za p
## 109.306108 -1.000000 149.880596 9.010043 0.000000
#teste de Mann-Kendall para tendência somente se a série for serialmente independente
library(Kendall)
## Warning: package 'Kendall' was built under R version 3.3.3
MannKendall(Timeline$DAs)
## tau = -0.573, 2-sided pvalue =< 2.22e-16
Analise de series temporais, por mes ##borrowed from claudio ##https://gustibuseconomia.com/2014/03/26/momento-r-do-dia-furacoes-ou-uma-imagem-vale-mais-do-que-mil-palavras-mas-qual-imagem-e-esta-a-pergunta-de-um-milhao-de-imagens/
Timeline$mes = as.factor(c(rep(month.name, 13), "January"))
Timeline_5anos$mes = as.factor(c(rep(month.name, 5)))
#equivalencia dos dados com o mes
Timeline$ordem = as.factor(c(rep(1:12, 13), 1))
Timeline_5anos$ordem = as.factor(c(rep(1:12, 5)))
library(ggplot2)
library(lattice)
library(latticeExtra)
## Loading required package: RColorBrewer
##
## Attaching package: 'latticeExtra'
## The following object is masked from 'package:ggplot2':
##
## layer
#If the notches of two plots do not overlap this is 'strong evidence' that the two #medians differ (Chambers et al, 1983, p. 62).
# os graficos por mes desde 2004
#Dislexia
op <- par(mfrow = c(1,2))
boxplot(Dislexia~ordem,data=Timeline, notch = T)
## Warning in bxp(structure(list(stats = structure(c(13, 15, 18.5, 23, 35, :
## some notches went outside hinges ('box'): maybe set notch=FALSE
monthplot(Timeline$Dislexia, col = "purple",ylim=c(min(Timeline$Dislexia),max(Timeline$Dislexia)),main="Dislexia",xlab="meses",ylab="media de frequencia")
par(op)
#Dificuldades de Aprendizagem
op <- par(mfrow = c(1,2))
boxplot(DAs~ordem,data=Timeline)
monthplot(Timeline$DAs, col="darkblue",ylim=c(min(Timeline$DAs),max(Timeline$DAs)),main="Dif. de Aprendizagem",xlab="meses",ylab="media de frequencia")
par(op)
#Organizacao por mes nos utimos cinco anos
op <- par(mfrow = c(2,2))
monthplot(Timeline_5anos$Dislexia, xlab = "Dislexia", ylab = "", col="purple", cex.axis = 0.8)
monthplot(Timeline_5anos$DAs, xlab = "Dif.s de Aprendizagem", ylab = "", col="darkblue", cex.axis = 0.8)
monthplot(Timeline_5anos$Dislexia, ylab = "", type = "h", col="purple", cex.axis = 0.8)
monthplot(Timeline_5anos$DAs, ylab = "", type = "h", col="darkblue", cex.axis = 0.8)
par(op)
op <- par(mfrow = c(2,2))
boxplot(Timeline_5anos$Dislexia~Timeline_5anos$ordem, main="Dislexia", xlab="mes", ylab="frequencia", outline = F, col="purple")
boxplot(Timeline_5anos$DAs~Timeline_5anos$ordem, main="Dif.s de Aprendizagem", xlab="mes", ylab="frequencia", outline = F, col="darkblue")
par(op)
Analise de variancias, por mes ##borrowed from Salvatore S. Mangiafico ##http://rcompanion.org/rcompanion/d_06.html
#install these packages if they are not already installed: ("dplyr"); ("FSA"); ("DescTools"); ("rcompanion"), ("multcompView")
##Dificuldade de Aprendizagem
### Specify the order of factor levels
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:adehabitat':
##
## id
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Timeline = mutate(Timeline,
mes = factor(mes, levels=unique(mes)))
# Medians and descriptive statistics
library(FSA)
## Warning: package 'FSA' was built under R version 3.3.3
## ## FSA v0.8.13. See citation('FSA') if used in publication.
## ## Run fishR() for related website and fishR('IFAR') for related book.
Summarize(DAs ~ mes,data = Timeline)
## mes n mean sd min Q1 median Q3 max
## 1 January 14 20.50000 12.965932 9 12.25 16 19 50
## 2 February 13 25.30769 12.795732 14 19.00 22 25 61
## 3 March 13 41.53846 23.827909 23 25.00 30 49 100
## 4 April 13 37.69231 13.616073 23 29.00 33 42 69
## 5 May 13 37.00000 11.350477 23 28.00 38 40 62
## 6 June 13 34.61538 11.087068 22 28.00 35 38 63
## 7 July 13 29.07692 13.726262 15 24.00 25 26 63
## 8 August 13 34.15385 14.461709 18 26.00 30 36 61
## 9 September 13 32.76923 7.949359 22 29.00 31 36 49
## 10 October 13 34.38462 17.342330 19 23.00 30 41 85
## 11 November 13 36.69231 15.200793 23 28.00 31 40 76
## 12 December 13 21.46154 7.556624 15 16.00 20 25 43
# Histograms for each group
library(lattice)
histogram(~ DAs | ordem, data=Timeline, layout=c(12,1))
#Analise de variancia para dados nao-parametricos (p-value menos de 0,05, deve-se rejeitar a hipotese nula).Kruskal-Wallis test be interpreted as a difference in medians. applied to one-way data with more than two groups
kruskal.test(DAs ~ mes, data = Timeline)
##
## Kruskal-Wallis rank sum test
##
## data: DAs by mes
## Kruskal-Wallis chi-squared = 44.058, df = 11, p-value = 7.098e-06
#TESTES POS HOC
#Nemenyi test for multiple comparisons, not appropriate for groups with unequal numbers of observations. Utilizando qui quadrado.
library(DescTools)
## Warning: package 'DescTools' was built under R version 3.3.3
##
## Attaching package: 'DescTools'
## The following object is masked from 'package:forecast':
##
## BoxCox
## The following object is masked from 'package:car':
##
## Recode
NemenyiTest(x = Timeline$DAs, g = Timeline$mes, dist="chisq")
##
## Nemenyi's test of multiple comparisons for independent samples (chisq)
##
## mean.rank.diff pval
## February-January 13.7472527 1.0000
## March-January 61.4010989 0.3431
## April-January 66.5549451 0.2103
## May-January 67.2472527 0.1953
## June-January 59.3626374 0.4043
## July-January 31.5934066 0.9870
## August-January 51.4780220 0.6560
## September-January 57.5934066 0.4600
## October-January 49.9395604 0.7023
## November-January 61.0164835 0.3543
## December-January 2.8241758 1.0000
## March-February 47.6538462 0.7883
## April-February 52.8076923 0.6443
## May-February 53.5000000 0.6229
## June-February 45.6153846 0.8354
## July-February 17.8461538 0.9999
## August-February 37.7307692 0.9541
## September-February 43.8461538 0.8709
## October-February 36.1923077 0.9665
## November-February 47.2692308 0.7977
## December-February -10.9230769 1.0000
## April-March 5.1538462 1.0000
## May-March 5.8461538 1.0000
## June-March -2.0384615 1.0000
## July-March -29.8076923 0.9932
## August-March -9.9230769 1.0000
## September-March -3.8076923 1.0000
## October-March -11.4615385 1.0000
## November-March -0.3846154 1.0000
## December-March -58.5769231 0.4623
## May-April 0.6923077 1.0000
## June-April -7.1923077 1.0000
## July-April -34.9615385 0.9744
## August-April -15.0769231 1.0000
## September-April -8.9615385 1.0000
## October-April -16.6153846 1.0000
## November-April -5.5384615 1.0000
## December-April -63.7307692 0.3096
## June-May -7.8846154 1.0000
## July-May -35.6538462 0.9701
## August-May -15.7692308 1.0000
## September-May -9.6538462 1.0000
## October-May -17.3076923 1.0000
## November-May -6.2307692 1.0000
## December-May -64.4230769 0.2911
## July-June -27.7692308 0.9964
## August-June -7.8846154 1.0000
## September-June -1.7692308 1.0000
## October-June -9.4230769 1.0000
## November-June 1.6538462 1.0000
## December-June -56.5384615 0.5269
## August-July 19.8846154 0.9999
## September-July 26.0000000 0.9980
## October-July 18.3461538 0.9999
## November-July 29.4230769 0.9939
## December-July -28.7692308 0.9950
## September-August 6.1153846 1.0000
## October-August -1.5384615 1.0000
## November-August 9.5384615 1.0000
## December-August -48.6538462 0.7629
## October-September -7.6538462 1.0000
## November-September 3.4230769 1.0000
## December-September -54.7692308 0.5832
## November-October 11.0769231 1.0000
## December-October -47.1153846 0.8014
## December-November -58.1923077 0.4744
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Dunn test for multiple comparisons; If the Kruskal-Wallis test is significant, a post-hoc analysis can be performed to determine which levels of the independent variable differ from each other level. Dunn's Kruskal-Wallis Multiple Comparisons
library(FSA)
dunnTest(DAs ~ mes, data=Timeline)
## Dunn (1964) Kruskal-Wallis multiple comparison
## p-values adjusted with the Holm method.
## Comparison Z P.unadj P.adj
## 1 April - August 0.84597902 0.3975644470 1.000000000
## 2 April - December 3.57598786 0.0003489078 0.021981194
## 3 August - December 2.73000883 0.0063332628 0.316663142
## 4 April - February 2.96308469 0.0030457276 0.161423563
## 5 August - February 2.11710567 0.0342508810 1.000000000
## 6 December - February -0.61290317 0.5399403677 1.000000000
## 7 April - January 3.80298259 0.0001429644 0.009292686
## 8 August - January 2.94147973 0.0032664822 0.169857076
## 9 December - January 0.16137481 0.8717982066 1.000000000
## 10 February - January 0.78552485 0.4321459091 1.000000000
## 11 April - July 1.96172176 0.0497948866 1.000000000
## 12 August - July 1.11574274 0.2645322697 1.000000000
## 13 December - July -1.61426609 0.1064697213 1.000000000
## 14 February - July -1.00136292 0.3166513818 1.000000000
## 15 January - July -1.80526293 0.0710335470 1.000000000
## 16 April - June 0.40356652 0.6865315071 1.000000000
## 17 August - June -0.44241250 0.6581907361 1.000000000
## 18 December - June -3.17242133 0.0015117351 0.084657165
## 19 February - June -2.55951816 0.0104817378 0.482159939
## 20 January - June -3.39201056 0.0006938177 0.041629059
## 21 July - June -1.55815524 0.1191964526 1.000000000
## 22 April - March 0.28918671 0.7724385041 1.000000000
## 23 August - March -0.55679232 0.5776693347 1.000000000
## 24 December - March -3.28680115 0.0010133236 0.058772769
## 25 February - March -2.67389798 0.0074975256 0.367378754
## 26 January - March -3.50848927 0.0004506594 0.027940884
## 27 July - March -1.67253506 0.0944188610 1.000000000
## 28 June - March -0.11437982 0.9089367129 1.000000000
## 29 April - May -0.03884598 0.9690131893 1.000000000
## 30 August - May -0.88482500 0.3762510249 1.000000000
## 31 December - May -3.61483383 0.0003005403 0.019234581
## 32 February - May -3.00193066 0.0026827327 0.144867567
## 33 January - May -3.84254140 0.0001217669 0.008036613
## 34 July - May -2.00056774 0.0454389932 1.000000000
## 35 June - May -0.44241250 0.6581907361 1.000000000
## 36 March - May -0.32803268 0.7428869519 1.000000000
## 37 April - November 0.31076780 0.7559771472 1.000000000
## 38 August - November -0.53521122 0.5925038099 1.000000000
## 39 December - November -3.26522005 0.0010937907 0.062346068
## 40 February - November -2.65231688 0.0079941477 0.383719089
## 41 January - November -3.48651215 0.0004893631 0.029851152
## 42 July - November -1.65095396 0.0987479765 1.000000000
## 43 June - November -0.09279872 0.9260634685 1.000000000
## 44 March - November 0.02158110 0.9827821120 0.982782112
## 45 May - November 0.34961378 0.7266285682 1.000000000
## 46 April - October 0.93230341 0.3511797448 1.000000000
## 47 August - October 0.08632439 0.9312085504 1.000000000
## 48 December - October -2.64368444 0.0082009071 0.385442634
## 49 February - October -2.03078128 0.0422771873 1.000000000
## 50 January - October -2.85357127 0.0043230830 0.220477231
## 51 July - October -1.02941835 0.3032831294 1.000000000
## 52 June - October 0.52873689 0.5969879835 1.000000000
## 53 March - October 0.64311671 0.5201483754 1.000000000
## 54 May - October 0.97114939 0.3314738931 1.000000000
## 55 November - October 0.62153561 0.5342472733 1.000000000
## 56 April - September 0.50283957 0.6150770693 1.000000000
## 57 August - September -0.34313945 0.7314935537 1.000000000
## 58 December - September -3.07314828 0.0021181321 0.116497264
## 59 February - September -2.46024512 0.0138842154 0.624789692
## 60 January - September -3.29091584 0.0009986179 0.058918453
## 61 July - September -1.45888219 0.1445975355 1.000000000
## 62 June - September 0.09927305 0.9209214769 1.000000000
## 63 March - September 0.21365287 0.8308177706 1.000000000
## 64 May - September 0.54168555 0.5880351477 1.000000000
## 65 November - September 0.19207177 0.8476859892 1.000000000
## 66 October - September -0.42946384 0.6675857033 1.000000000
library(stats)
#Teste de Bartlett com a hipótese nula de que as variâncias dos grupos são
#iguais.
bartlett.test(DAs_clean~ordem, Timeline)
##
## Bartlett test of homogeneity of variances
##
## data: DAs_clean by ordem
## Bartlett's K-squared = 9.7493, df = 11, p-value = 0.5531
library(car)
#Teste da homogeneidade das variancias (a homogeneidade sera atingida com valores de p acima de 0,05)
leveneTest(DAs_clean~ordem, Timeline)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 11 0.3731 0.9644
## 134
##Dislexia
### Specify the order of factor levels
library(dplyr)
Timeline = mutate(Timeline,
mes = factor(mes, levels=unique(mes)))
# Medians and descriptive statistics
library(FSA)
Summarize(Dislexia ~ mes,data = Timeline)
## mes n mean sd min Q1 median Q3 max
## 1 January 14 25.35714 22.321453 13 15.25 18.5 22.75 100
## 2 February 13 28.23077 16.798580 17 20.00 21.0 26.00 77
## 3 March 13 32.07692 11.700866 19 25.00 27.0 35.00 58
## 4 April 13 33.23077 9.790760 20 25.00 33.0 40.00 52
## 5 May 13 37.69231 18.345683 21 24.00 31.0 48.00 82
## 6 June 13 33.46154 15.425171 17 23.00 28.0 36.00 72
## 7 July 13 25.84615 11.260892 16 20.00 22.0 25.00 57
## 8 August 13 28.23077 9.558189 18 23.00 25.0 31.00 49
## 9 September 13 32.92308 12.277768 18 26.00 31.0 38.00 65
## 10 October 13 34.00000 19.113695 18 22.00 25.0 39.00 84
## 11 November 13 31.84615 16.456236 19 24.00 26.0 37.00 81
## 12 December 13 24.53846 18.919194 14 15.00 18.0 26.00 85
# Histograms for each group
library(lattice)
histogram(~ Dislexia | ordem, data=Timeline, layout=c(12,1))
#Analise de variancia para dados nao-parametricos (p-value menos de 0,05, deve-se rejeitar a hipotese nula).Kruskal-Wallis test be interpreted as a difference in medians. applied to one-way data with more than two groups
kruskal.test(Dislexia ~ mes, data = Timeline)
##
## Kruskal-Wallis rank sum test
##
## data: Dislexia by mes
## Kruskal-Wallis chi-squared = 31.264, df = 11, p-value = 0.001
#TESTES POS HOC
#Nemenyi test for multiple comparisons, not appropriate for groups with unequal numbers of observations. Utilizando qui quadrado.
library(DescTools)
NemenyiTest(x = Timeline$Dislexia, g = Timeline$mes, dist="chisq")
##
## Nemenyi's test of multiple comparisons for independent samples (chisq)
##
## mean.rank.diff pval
## February-January 20.1126374 0.9998
## March-January 50.4587912 0.6871
## April-January 57.7664835 0.4547
## May-January 58.5357143 0.4303
## June-January 48.5357143 0.7425
## July-January 18.1895604 0.9999
## August-January 35.5741758 0.9663
## September-January 53.0741758 0.6061
## October-January 43.6895604 0.8587
## November-January 42.6895604 0.8778
## December-January 0.5357143 1.0000
## March-February 30.3461538 0.9921
## April-February 37.6538462 0.9548
## May-February 38.4230769 0.9475
## June-February 28.4230769 0.9955
## July-February -1.9230769 1.0000
## August-February 15.4615385 1.0000
## September-February 32.9615385 0.9841
## October-February 23.5769231 0.9992
## November-February 22.5769231 0.9995
## December-February -19.5769231 0.9999
## April-March 7.3076923 1.0000
## May-March 8.0769231 1.0000
## June-March -1.9230769 1.0000
## July-March -32.2692308 0.9867
## August-March -14.8846154 1.0000
## September-March 2.6153846 1.0000
## October-March -6.7692308 1.0000
## November-March -7.7692308 1.0000
## December-March -49.9230769 0.7289
## May-April 0.7692308 1.0000
## June-April -9.2307692 1.0000
## July-April -39.5769231 0.9351
## August-April -22.1923077 0.9996
## September-April -4.6923077 1.0000
## October-April -14.0769231 1.0000
## November-April -15.0769231 1.0000
## December-April -57.2307692 0.5051
## June-May -10.0000000 1.0000
## July-May -40.3461538 0.9257
## August-May -22.9615385 0.9994
## September-May -5.4615385 1.0000
## October-May -14.8461538 1.0000
## November-May -15.8461538 1.0000
## December-May -58.0000000 0.4807
## July-June -30.3461538 0.9921
## August-June -12.9615385 1.0000
## September-June 4.5384615 1.0000
## October-June -4.8461538 1.0000
## November-June -5.8461538 1.0000
## December-June -48.0000000 0.7799
## August-July 17.3846154 1.0000
## September-July 34.8846154 0.9749
## October-July 25.5000000 0.9983
## November-July 24.5000000 0.9989
## December-July -17.6538462 1.0000
## September-August 17.5000000 1.0000
## October-August 8.1153846 1.0000
## November-August 7.1153846 1.0000
## December-August -35.0384615 0.9740
## October-September -9.3846154 1.0000
## November-September -10.3846154 1.0000
## December-September -52.5384615 0.6527
## November-October -1.0000000 1.0000
## December-October -43.1538462 0.8834
## December-November -42.1538462 0.8999
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Dunn test for multiple comparisons; If the Kruskal-Wallis test is significant, a post-hoc analysis can be performed to determine which levels of the independent variable differ from each other level. Dunn's Kruskal-Wallis Multiple Comparisons
library(FSA)
dunnTest(Dislexia ~ mes, data=Timeline)
## Dunn (1964) Kruskal-Wallis multiple comparison
## p-values adjusted with the Holm method.
## Comparison Z P.unadj P.adj
## 1 April - August 1.24537821 0.2129927596 1.00000000
## 2 April - December 3.21165126 0.0013197448 0.08314392
## 3 August - December 1.96627305 0.0492670785 1.00000000
## 4 April - February 2.11304206 0.0345971684 1.00000000
## 5 August - February 0.86766385 0.3855783828 1.00000000
## 6 December - February -1.09860920 0.2719385633 1.00000000
## 7 April - January 3.30120019 0.0009627216 0.06257691
## 8 August - January 2.03296910 0.0420556448 1.00000000
## 9 December - January 0.03061464 0.9755768665 0.97557687
## 10 February - January 1.14938349 0.2503978856 1.00000000
## 11 April - July 2.22096044 0.0263536406 1.00000000
## 12 August - July 0.97558224 0.3292715310 1.00000000
## 13 December - July -0.99069081 0.3218365796 1.00000000
## 14 February - July 0.10791839 0.9140604300 1.00000000
## 15 January - July -1.03948478 0.2985793329 1.00000000
## 16 April - June 0.51800827 0.6044525014 1.00000000
## 17 August - June -0.72736994 0.4669993629 1.00000000
## 18 December - June -2.69364299 0.0070675823 0.40285219
## 19 February - June -1.59503379 0.1107046801 1.00000000
## 20 January - June -2.77368639 0.0055425079 0.32146546
## 21 July - June -1.70295218 0.0885770182 1.00000000
## 22 April - March 0.41008988 0.6817400175 1.00000000
## 23 August - March -0.83528833 0.4035553862 1.00000000
## 24 December - March -2.80156138 0.0050855966 0.30005020
## 25 February - March -1.70295218 0.0885770182 1.00000000
## 26 January - March -2.88358510 0.0039317640 0.23590584
## 27 July - March -1.81087057 0.0701608897 1.00000000
## 28 June - March -0.10791839 0.9140604300 1.00000000
## 29 April - May -0.04316736 0.9655681273 1.00000000
## 30 August - May -1.28854556 0.1975561186 1.00000000
## 31 December - May -3.25481861 0.0011346481 0.07261748
## 32 February - May -2.15620941 0.0310673170 1.00000000
## 33 January - May -3.34515968 0.0008223521 0.05427524
## 34 July - May -2.26412780 0.0235662556 1.00000000
## 35 June - May -0.56117562 0.5746778195 1.00000000
## 36 March - May -0.45325723 0.6503635241 1.00000000
## 37 April - November 0.84608017 0.3975080226 1.00000000
## 38 August - November -0.39929804 0.6896736118 1.00000000
## 39 December - November -2.36557109 0.0180022869 0.95412121
## 40 February - November -1.26696189 0.2051689184 1.00000000
## 41 January - November -2.43959432 0.0147037637 0.80870700
## 42 July - November -1.37488028 0.1691685653 1.00000000
## 43 June - November 0.32807190 0.7428572979 1.00000000
## 44 March - November 0.43599029 0.6628437736 1.00000000
## 45 May - November 0.88924753 0.3738700664 1.00000000
## 46 April - October 0.78996261 0.4295496063 1.00000000
## 47 August - October -0.45541560 0.6488102733 1.00000000
## 48 December - October -2.42168865 0.0154485803 0.83422334
## 49 February - October -1.32307945 0.1858089583 1.00000000
## 50 January - October -2.49674165 0.0125340237 0.70190533
## 51 July - October -1.43099784 0.1524308349 1.00000000
## 52 June - October 0.27195434 0.7856571278 1.00000000
## 53 March - October 0.37987273 0.7040398916 1.00000000
## 54 May - October 0.83312996 0.4047714368 1.00000000
## 55 November - October -0.05611756 0.9552481533 1.00000000
## 56 April - September 0.26332087 0.7923032759 1.00000000
## 57 August - September -0.98205734 0.3260716006 1.00000000
## 58 December - September -2.94833039 0.0031949541 0.19489220
## 59 February - September -1.84972119 0.0643537452 1.00000000
## 60 January - September -3.03304734 0.0024209761 0.15010052
## 61 July - September -1.95763958 0.0502723201 1.00000000
## 62 June - September -0.25468740 0.7989645503 1.00000000
## 63 March - September -0.14676901 0.8833143470 1.00000000
## 64 May - September 0.30648822 0.7592329441 1.00000000
## 65 November - September -0.58275930 0.5600553469 1.00000000
## 66 October - September -0.52664174 0.5984424018 1.00000000
#carregar pacote para rodar a funcao do teste para homogeneidade das variancias
#Caso nao tenho instalado: install.packages("car")
library(stats)
#Teste de Bartlett com a hipótese nula de que as variâncias dos grupos são
#iguais.
bartlett.test(Dislexia~ordem, Timeline)
##
## Bartlett test of homogeneity of variances
##
## data: Dislexia by ordem
## Bartlett's K-squared = 19.84, df = 11, p-value = 0.04759
library(car)
#Teste da homogeneidade das variancias (a homogeneidade sera atingida com valores de p acima de 0,05)
leveneTest(Dislexia~ordem, Timeline)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 11 0.281 0.9885
## 145
Descricao por estado
GeoMap <-read.csv("C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados/GeoMap.csv", sep=",", dec=",",fill=TRUE, header=TRUE)
str(GeoMap)
## 'data.frame': 27 obs. of 21 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Região : Factor w/ 27 levels "Acre","Alagoas",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ DA_Shop : int NA 0 0 10 3 6 7 0 2 0 ...
## $ Dislexia_Shop: int NA 13 100 10 2 6 7 0 4 11 ...
## $ Cartilha_Shop: int NA 20 99 20 14 12 29 6 5 17 ...
## $ Cartilha : int 61 46 91 50 46 38 50 37 43 52 ...
## $ DA : int NA 77 NA 58 70 64 40 49 49 100 ...
## $ Dislexia : int 59 47 100 54 49 51 46 48 49 55 ...
## $ Pais : int 78 81 98 77 81 77 74 74 70 82 ...
## $ Pais1 : int 73 74 90 70 76 71 69 68 65 74 ...
## $ Fam1 : int 26 29 44 29 27 27 27 23 22 30 ...
## $ EF1 : int 24 28 32 18 29 23 17 27 19 30 ...
## $ Crianca1 : int 83 82 100 72 80 80 85 74 73 81 ...
## $ TA1 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ DA1 : int 0 23 0 15 21 20 11 14 14 28 ...
## $ Dislexia1 : int 61 43 100 55 50 49 44 47 49 54 ...
## $ Disgrafia1 : int 0 0 0 0 4 4 4 0 4 0 ...
## $ Discalculia1 : int 0 4 0 4 4 4 3 4 3 6 ...
## $ Cartilha_sug : int 80 52 95 58 52 44 57 43 49 59 ...
## $ DA_sug : int NA 83 NA 53 75 71 41 51 51 100 ...
## $ Pais_sug : int 78 79 96 75 81 76 73 73 69 80 ...
tail(GeoMap)
## X Região DA_Shop Dislexia_Shop Cartilha_Shop Cartilha DA
## 22 22 Rond<U+0093>nia 0 0 54 52 NA
## 23 23 Roraima NA NA NA 100 NA
## 24 24 Santa Catarina 3 0 11 32 43
## 25 25 SÆo Paulo 2 5 20 32 31
## 26 26 Sergipe 0 35 18 46 NA
## 27 27 Tocantins 0 30 30 65 NA
## Dislexia Pais Pais1 Fam1 EF1 Crianca1 TA1 DA1 Dislexia1 Disgrafia1
## 22 55 77 71 21 24 77 0 0 55 0
## 23 77 100 94 34 33 95 0 0 79 0
## 24 48 77 71 19 27 63 0 13 48 3
## 25 46 71 64 18 19 65 1 9 45 2
## 26 46 78 72 27 22 77 0 0 42 0
## 27 41 81 72 24 34 81 0 0 41 0
## Discalculia1 Cartilha_sug DA_sug Pais_sug
## 22 0 63 NA 76
## 23 0 100 NA 100
## 24 3 36 48 75
## 25 3 37 33 69
## 26 0 54 NA 77
## 27 0 75 NA 77
summary(GeoMap)
## X Região DA_Shop Dislexia_Shop
## Min. : 1.0 Acre : 1 Min. : 0.00 Min. : 0.00
## 1st Qu.: 7.5 Alagoas : 1 1st Qu.: 0.00 1st Qu.: 4.00
## Median :14.0 Amap : 1 Median : 2.00 Median : 7.00
## Mean :14.0 Amazonas: 1 Mean : 2.56 Mean : 12.32
## 3rd Qu.:20.5 Bahia : 1 3rd Qu.: 3.00 3rd Qu.: 11.00
## Max. :27.0 Cear : 1 Max. :10.00 Max. :100.00
## (Other) :21 NA's :2 NA's :2
## Cartilha_Shop Cartilha DA Dislexia
## Min. : 5.00 Min. : 29.00 Min. : 28.00 Min. : 41.00
## 1st Qu.: 9.00 1st Qu.: 37.50 1st Qu.: 42.50 1st Qu.: 46.00
## Median :17.00 Median : 46.00 Median : 55.00 Median : 49.00
## Mean :20.84 Mean : 48.63 Mean : 56.53 Mean : 52.11
## 3rd Qu.:20.00 3rd Qu.: 52.50 3rd Qu.: 68.00 3rd Qu.: 54.00
## Max. :99.00 Max. :100.00 Max. :100.00 Max. :100.00
## NA's :2 NA's :8
## Pais Pais1 Fam1 EF1
## Min. : 67.00 Min. :62.00 Min. :18.00 Min. :17.00
## 1st Qu.: 72.00 1st Qu.:67.00 1st Qu.:21.50 1st Qu.:20.50
## Median : 75.00 Median :69.00 Median :26.00 Median :25.00
## Mean : 76.89 Mean :70.74 Mean :25.22 Mean :25.04
## 3rd Qu.: 78.00 3rd Qu.:72.00 3rd Qu.:27.00 3rd Qu.:29.00
## Max. :100.00 Max. :94.00 Max. :44.00 Max. :34.00
##
## Crianca1 TA1 DA1 Dislexia1
## Min. : 60.00 Min. :0.00000 Min. : 0.00 Min. : 41.00
## 1st Qu.: 71.50 1st Qu.:0.00000 1st Qu.: 0.00 1st Qu.: 46.00
## Median : 77.00 Median :0.00000 Median :13.00 Median : 49.00
## Mean : 76.11 Mean :0.03704 Mean :11.48 Mean : 52.07
## 3rd Qu.: 80.50 3rd Qu.:0.00000 3rd Qu.:17.50 3rd Qu.: 54.50
## Max. :100.00 Max. :1.00000 Max. :28.00 Max. :100.00
##
## Disgrafia1 Discalculia1 Cartilha_sug DA_sug
## Min. :0.000 Min. :0.000 Min. : 32.00 Min. : 30.00
## 1st Qu.:0.000 1st Qu.:1.500 1st Qu.: 43.50 1st Qu.: 46.00
## Median :0.000 Median :4.000 Median : 52.00 Median : 53.00
## Mean :1.593 Mean :2.926 Mean : 55.33 Mean : 59.05
## 3rd Qu.:3.500 3rd Qu.:4.000 3rd Qu.: 63.00 3rd Qu.: 73.00
## Max. :5.000 Max. :6.000 Max. :100.00 Max. :100.00
## NA's :8
## Pais_sug
## Min. : 67.00
## 1st Qu.: 71.50
## Median : 74.00
## Mean : 75.52
## 3rd Qu.: 77.00
## Max. :100.00
##
Relacao Palavras x Regiao
GRafico de frequencia Comparativa - 2004 a 2017
plot(GeoMap$Dislexia1~GeoMap$Região , type = "l", lty=1, lwd=2, col="red", ylim=c(0,100), xlab="região", ylab="Frequencia relativa", main = "Frequencia relativa por regiao geografica, 2004 a 2017")
lines(GeoMap$Dislexia1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "red")
lines(GeoMap$DA1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "darkblue")
lines(GeoMap$Discalculia1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "green")
lines(GeoMap$Disgrafia1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "orange")
lines(GeoMap$TA1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "purple")
legend("topright", c("Dislexia", "Dif. de Aprendizagem", "Discalculia", "Disgrafia", "Transt. da Aprendizagem"), bty="n", lty=c(1),
col=c("red","darkblue", "green", "orange", "purple"), box.col="white",lwd=c(2,2), cex=0.9)