Analise de Palavras-Chave

setwd(“C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados”)

Timeline <-read.csv("C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados/timeline2.csv", sep=",", dec=",",fill=TRUE, header=TRUE)
str(Timeline)

## 'data.frame':    157 obs. of  14 variables:
##  $ X          : Factor w/ 157 levels "2004-01","2004-02",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Cartilha   : int  38 46 58 47 38 44 34 49 42 38 ...
##  $ DA         : int  49 49 19 54 51 59 42 44 76 58 ...
##  $ DAs        : int  37 61 100 69 62 63 63 61 46 85 ...
##  $ FE         : int  31 43 75 79 89 90 55 51 69 64 ...
##  $ PA         : int  12 18 25 49 39 35 25 17 36 30 ...
##  $ DE         : int  12 12 25 20 8 16 8 7 20 10 ...
##  $ TA         : int  3 3 3 3 2 2 2 3 3 3 ...
##  $ DA1        : int  10 16 26 18 16 16 16 16 12 22 ...
##  $ Dislexia   : int  35 26 33 40 82 72 57 49 65 61 ...
##  $ Disgrafia  : int  13 10 10 5 6 4 4 5 10 3 ...
##  $ Discalculia: int  3 3 3 4 5 3 2 2 3 4 ...
##  $ Tempo      : Factor w/ 157 levels "2004-01-30","2004-03-01",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ tempo      : Factor w/ 157 levels "2004-01-29 22:00:00",..: 1 2 3 4 5 6 7 8 9 10 ...

tail(Timeline)

##           X Cartilha DA DAs FE PA DE TA DA1 Dislexia Disgrafia Discalculia
## 152 2016-08       24 17  18  4  4  5  1   5       20         1           2
## 153 2016-09       25 19  22  8  6  5  1   6       31         2           2
## 154 2016-10       24 17  19  8  5  5  1   5       21         2           2
## 155 2016-11       23 22  23  8  6  7  1   6       20         2           3
## 156 2016-12       14 18  16  3  5  5  1   4       17         1           2
## 157 2017-01       15  9  11  3  3  2  0   3       14         1           1
##          Tempo               tempo
## 152 2016-08-30 2016-08-29 21:00:00
## 153 2016-09-30 2016-09-29 21:00:00
## 154 2016-10-30 2016-10-29 22:00:00
## 155 2016-11-30 2016-11-29 22:00:00
## 156 2016-12-30 2016-12-29 22:00:00
## 157 2017-01-30 2017-01-29 22:00:00

Timeline_5anos <-read.csv("C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados/Timeline_5anos.csv", sep=",", dec=",",fill=TRUE, header=TRUE)

summary(Timeline)

##        X          Cartilha            DA             DAs        
##  2004-01:  1   Min.   : 14.00   Min.   : 7.00   Min.   :  9.00  
##  2004-02:  1   1st Qu.: 24.00   1st Qu.:16.00   1st Qu.: 23.00  
##  2004-03:  1   Median : 28.00   Median :19.00   Median : 29.00  
##  2004-04:  1   Mean   : 29.56   Mean   :22.77   Mean   : 32.03  
##  2004-05:  1   3rd Qu.: 33.00   3rd Qu.:27.00   3rd Qu.: 38.00  
##  2004-06:  1   Max.   :100.00   Max.   :76.00   Max.   :100.00  
##  (Other):151                                                    
##        FE             PA              DE               TA        
##  Min.   : 3.0   Min.   : 2.00   Min.   : 2.000   Min.   :0.0000  
##  1st Qu.: 7.0   1st Qu.: 5.00   1st Qu.: 5.000   1st Qu.:0.0000  
##  Median :12.0   Median : 7.00   Median : 6.000   Median :1.0000  
##  Mean   :20.2   Mean   :10.11   Mean   : 6.739   Mean   :0.9045  
##  3rd Qu.:27.0   3rd Qu.:12.00   3rd Qu.: 8.000   3rd Qu.:1.0000  
##  Max.   :90.0   Max.   :49.00   Max.   :25.000   Max.   :4.0000  
##                                                                  
##       DA1            Dislexia        Disgrafia       Discalculia   
##  Min.   : 2.000   Min.   : 13.00   Min.   : 0.000   Min.   :0.000  
##  1st Qu.: 6.000   1st Qu.: 21.00   1st Qu.: 1.000   1st Qu.:1.000  
##  Median : 7.000   Median : 25.00   Median : 2.000   Median :2.000  
##  Mean   : 8.344   Mean   : 30.59   Mean   : 2.363   Mean   :2.089  
##  3rd Qu.:10.000   3rd Qu.: 35.00   3rd Qu.: 3.000   3rd Qu.:2.000  
##  Max.   :26.000   Max.   :100.00   Max.   :13.000   Max.   :6.000  
##                                                                    
##         Tempo                     tempo    
##  2004-01-30:  1   2004-01-29 22:00:00:  1  
##  2004-03-01:  1   2004-02-29 21:00:00:  1  
##  2004-03-30:  1   2004-03-29 21:00:00:  1  
##  2004-04-30:  1   2004-04-29 21:00:00:  1  
##  2004-05-30:  1   2004-05-29 21:00:00:  1  
##  2004-06-30:  1   2004-06-29 21:00:00:  1  
##  (Other)   :151   (Other)            :151

Select Study period (months 97:156 - 5 year exactly)

Timeline_5anos <- Timeline[97:156, ]
head(Timeline_5anos)

##           X Cartilha DA DAs FE PA DE TA DA1 Dislexia Disgrafia Discalculia
## 97  2012-01       15 10  19  5  3  3  0   5       22         1           1
## 98  2012-02       20 15  19  6  7  4  0   5       20         1           1
## 99  2012-03       26 18  30  9  7  5  0   8       27         2           2
## 100 2012-04       25 25  29 11  8  5  1   8       27         2           1
## 101 2012-05       29 24  35 12 10  7  1   9       24         2           2
## 102 2012-06       25 18  31 12  7  6  1   8       23         2           2
##          Tempo               tempo
## 97  2012-01-30 2012-01-29 22:00:00
## 98  2012-03-01 2012-02-29 21:00:00
## 99  2012-03-30 2012-03-29 21:00:00
## 100 2012-04-30 2012-04-29 21:00:00
## 101 2012-05-30 2012-05-29 21:00:00
## 102 2012-06-30 2012-06-29 21:00:00

tail(Timeline_5anos)

##           X Cartilha DA DAs FE PA DE TA DA1 Dislexia Disgrafia Discalculia
## 151 2016-07       21 14  15  4  3  4  1   4       22         1           1
## 152 2016-08       24 17  18  4  4  5  1   5       20         1           2
## 153 2016-09       25 19  22  8  6  5  1   6       31         2           2
## 154 2016-10       24 17  19  8  5  5  1   5       21         2           2
## 155 2016-11       23 22  23  8  6  7  1   6       20         2           3
## 156 2016-12       14 18  16  3  5  5  1   4       17         1           2
##          Tempo               tempo
## 151 2016-07-30 2016-07-29 21:00:00
## 152 2016-08-30 2016-08-29 21:00:00
## 153 2016-09-30 2016-09-29 21:00:00
## 154 2016-10-30 2016-10-29 22:00:00
## 155 2016-11-30 2016-11-29 22:00:00
## 156 2016-12-30 2016-12-29 22:00:00

write.csv(Timeline_5anos,file="Timeline_5anos.csv")

summary(Timeline_5anos)

##        X         Cartilha           DA             DAs       
##  2012-01: 1   Min.   :14.00   Min.   : 7.00   Min.   : 9.00  
##  2012-02: 1   1st Qu.:21.75   1st Qu.:14.00   1st Qu.:19.00  
##  2012-03: 1   Median :25.00   Median :17.00   Median :23.00  
##  2012-04: 1   Mean   :23.75   Mean   :16.27   Mean   :22.45  
##  2012-05: 1   3rd Qu.:26.00   3rd Qu.:19.00   3rd Qu.:26.00  
##  2012-06: 1   Max.   :32.00   Max.   :25.00   Max.   :35.00  
##  (Other):54                                                  
##        FE               PA               DE              TA      
##  Min.   : 3.000   Min.   : 2.000   Min.   :2.000   Min.   :0.00  
##  1st Qu.: 5.000   1st Qu.: 4.000   1st Qu.:4.000   1st Qu.:0.00  
##  Median : 7.000   Median : 5.000   Median :5.000   Median :1.00  
##  Mean   : 6.833   Mean   : 5.267   Mean   :5.183   Mean   :0.65  
##  3rd Qu.: 8.000   3rd Qu.: 6.000   3rd Qu.:6.000   3rd Qu.:1.00  
##  Max.   :12.000   Max.   :10.000   Max.   :8.000   Max.   :1.00  
##                                                                  
##       DA1           Dislexia       Disgrafia     Discalculia   
##  Min.   :2.000   Min.   :13.00   Min.   :1.00   Min.   :1.000  
##  1st Qu.:5.000   1st Qu.:18.75   1st Qu.:1.00   1st Qu.:1.000  
##  Median :6.000   Median :21.00   Median :1.00   Median :2.000  
##  Mean   :5.833   Mean   :21.12   Mean   :1.45   Mean   :1.783  
##  3rd Qu.:7.000   3rd Qu.:23.00   3rd Qu.:2.00   3rd Qu.:2.000  
##  Max.   :9.000   Max.   :35.00   Max.   :2.00   Max.   :3.000  
##                                                                
##         Tempo                    tempo   
##  2012-01-30: 1   2012-01-29 22:00:00: 1  
##  2012-03-01: 1   2012-02-29 21:00:00: 1  
##  2012-03-30: 1   2012-03-29 21:00:00: 1  
##  2012-04-30: 1   2012-04-29 21:00:00: 1  
##  2012-05-30: 1   2012-05-29 21:00:00: 1  
##  2012-06-30: 1   2012-06-29 21:00:00: 1  
##  (Other)   :54   (Other)            :54

Organizando o banco por mes

#2004 a 2017
Timeline$date <- seq(as.Date("2004/01/30"), as.Date("2017/01/30"), by = "month")
Timeline$Date <- as.POSIXct(Timeline$date, "%d.%m.%y", tz="America/Sao_Paulo")

#2012 A 2016
Timeline_5anos$date <- seq(as.Date("2012/01/30"), as.Date("2017/01/01"), by = "month")
Timeline_5anos$Date <- as.POSIXct(Timeline_5anos$date, "%d.%m.%y", tz="America/Sao_Paulo")

Retirando outliers # borrowed from aL3xa - # http://stackoverflow.com/questions/4787332/how-to-remove-outliers-from-a-dataset

remove_outliers <- function(x, na.rm = TRUE, ...) {
        qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...)
        H <- 1.5 * IQR(x, na.rm = na.rm)
        y <- x
        y[x < (qnt[1] - H)] <- NA
        y[x > (qnt[2] + H)] <- NA
        y
}
#2004 a 2017
Timeline$DA_clean <- remove_outliers(Timeline$DA)
Timeline$DAs_clean <- remove_outliers(Timeline$DAs)
Timeline$Dislexia_clean <- remove_outliers(Timeline$Dislexia)
Timeline$FE_clean <- remove_outliers(Timeline$FE)
Timeline$PA_clean <- remove_outliers(Timeline$PA)
Timeline$DE_clean <- remove_outliers(Timeline$DE)
Timeline$Discalculia_clean <- remove_outliers(Timeline$Discalculia)
Timeline$Disgrafia_clean <- remove_outliers(Timeline$Disgrafia)
Timeline$TA_clean <- remove_outliers(Timeline$TA)
#2012 a 2017
Timeline_5anos$DA_clean <- remove_outliers(Timeline_5anos$DA)
Timeline_5anos$DAs_clean <- remove_outliers(Timeline_5anos$DAs)
Timeline_5anos$Dislexia_clean <- remove_outliers(Timeline_5anos$Dislexia)
Timeline_5anos$FE_clean <- remove_outliers(Timeline_5anos$FE)
Timeline_5anos$PA_clean <- remove_outliers(Timeline_5anos$PA)
Timeline_5anos$DE_clean <- remove_outliers(Timeline_5anos$DE)
Timeline_5anos$Discalculia_clean <- remove_outliers(Timeline_5anos$Discalculia)
Timeline_5anos$Disgrafia_clean <- remove_outliers(Timeline_5anos$Disgrafia)
Timeline_5anos$TA_clean <- remove_outliers(Timeline_5anos$TA)

Relacao das Palavras x Tempo

GRafico Comparativo de frequencia - 2004 a 2017

##Grafico 
library(scales) ##instalar e citar o pacote scales

#grafico de frequencia
plot(Timeline$Dislexia~Timeline$Date, type = "l", lty=1, lwd=2, col="red", xaxt="n", axes=F, ylim=c(0,100), xlab="", ylab="Frequencia relativa", main = "Frequencia relativa pelo tempo, 2004 a 2017")
axis.POSIXct(1, at=seq(as.Date("2004/01/30"), as.Date("2017/01/30"), by = "4 month"), format="%Y %b", las=2, cex.axis=0.9)
lines(Timeline$DAs~Timeline$Date, type = "l", lty=1, lwd=2,col= "darkblue")
lines(Timeline$FE~Timeline$Date, type = "l", lty=1, lwd=2,col= "darkgray")
lines(Timeline$PA~Timeline$Date, type = "l", lty=1, lwd=2,col= "pink")
lines(Timeline$DE~Timeline$Date, type = "l", lty=1, lwd=2,col= "black")
lines(Timeline$DA~Timeline$Date, type = "l", lty=1, lwd=2,col= "darkred")
lines(Timeline$Discalculia~Timeline$Date, type = "l", lty=1, lwd=2,col= "green")
lines(Timeline$Disgrafia~Timeline$Date, type = "l", lty=1, lwd=2,col= "orange")
lines(Timeline$TA~Timeline$Date, type = "l", lty=1, lwd=2,col= "purple")

legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem",  "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
       col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "green"), box.col="white",lwd=c(2,1.5), cex=0.9)

#removidos outliers
plot(Timeline$Dislexia_clean~Timeline$Date, type = "l", lty=1, lwd=2, col="red", xaxt="n", axes=F, ylim=c(0,100), xlab="", ylab="Frequencia relativa", main = "Frequencia relativa pelo tempo, 2004 a 2017, removidos outliers")
axis.POSIXct(1, at=seq(as.Date("2004/01/30"), as.Date("2017/01/30"), by = "4 month"), format="%Y %b", las=2, cex.axis=0.9)
lines(Timeline$DAs_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkblue")
lines(Timeline$FE_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkgray")
lines(Timeline$PA_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "pink")
lines(Timeline$DE_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "black")
lines(Timeline$DA_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkred")
lines(Timeline$Discalculia_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "green")
lines(Timeline$Disgrafia_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "orange")
lines(Timeline$TA_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "purple")

legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem",  "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
       col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "green"), box.col="white",lwd=c(2,1.5), cex=0.9)

GRafico Comparativo de frequencia - ultimos 5 anos

plot(Timeline_5anos$Dislexia~Timeline_5anos$Date, type = "l", lty=1, lwd=2, col="red", xaxt="n", axes=F, ylim=c(0,100), xlab="", ylab="Frequencia relativa", main = "Frequencia relativa pelo tempo, 2012 a 2017")
axis.POSIXct(1, at=seq(as.Date("2012/01/30"), as.Date("2017/01/01"), by = "4 month"), format="%Y %b", las=2, cex.axis=0.9)
lines(Timeline_5anos$DAs~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkblue")
lines(Timeline_5anos$FE~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkgray")
lines(Timeline_5anos$PA~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "pink")
lines(Timeline_5anos$DE~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "black")
lines(Timeline_5anos$DA~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkred")
lines(Timeline_5anos$Discalculia~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "green")
lines(Timeline_5anos$Disgrafia~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "orange")
lines(Timeline_5anos$TA~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "purple")
legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem",  "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
       col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "darkgreen"), box.col="white",lwd=c(2), cex=0.9)

#removidos outliers
plot(Timeline_5anos$Dislexia_clean~Timeline_5anos$Date, type = "l", lty=1, lwd=2, col="red", xaxt="n", axes=F, ylim=c(0,100), xlab="", ylab="Frequencia relativa", main = "Frequencia relativa pelo tempo, 2012 a 2017, removidos outliers")
axis.POSIXct(1, at=seq(as.Date("2012/01/30"), as.Date("2017/01/01"), by = "4 month"), format="%Y %b", las=2, cex.axis=0.9)
lines(Timeline_5anos$DAs_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkblue")
lines(Timeline_5anos$FE~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkgray")
lines(Timeline_5anos$PA_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "pink")
lines(Timeline_5anos$DE_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "black")
lines(Timeline_5anos$DA_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkred")
lines(Timeline_5anos$Discalculia_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "green")
lines(Timeline_5anos$Disgrafia_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "orange")
lines(Timeline_5anos$TA_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "purple")
legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem",  "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
       col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "darkgreen"), box.col="white",lwd=c(2), cex=0.9)

Analise da densidade das palavras # borrowed from melina.leite- # http://ecologia.ib.usp.br/bie5782/doku.php?id=bie5782:03_apostila:05-exploratoria

#grafico de densidade
plot(density (Timeline$Dislexia, bw=1.5), col="red", xlab="", ylab="Densidade Probabilistica", main = "Densidade pela frequencia")
lines(density(Timeline$DAs, bw=1.5), col= "darkblue")
lines(density(Timeline$FE, bw=1.5),col= "darkgray")
lines(density(Timeline$PA, bw=1.5),col= "pink")
lines(density(Timeline$DE, bw=1.5),col= "black")
lines(density(Timeline$DA, bw=1.5),col= "darkred")
lines(density(Timeline$Discalculia, bw=1.5),col= "green")
lines(density(Timeline$Disgrafia, bw=1.5),col= "orange")
lines(density(Timeline$TA, bw=1.5),col= "purple")

legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem",  "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
       col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "green"), box.col="white",lwd=c(2,2), cex=0.9)

Relacao entre as palavras (1) Dificiculdades de Aprendizagem x todas as palavras (2) Dislexia x todas as palavras

Estatistica Descritiva

#valores maximos e minimos
max = apply(Timeline[3:12], MARGIN=2, FUN= max)
max_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN= max)
min = apply(Timeline[3:12], MARGIN=2, FUN= min)
min_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN= min)

#mediana
mediana = apply(Timeline[3:12], MARGIN=2, FUN=median)
mediana_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN=median)

#medias
medias = apply(Timeline[3:12], MARGIN=2, FUN=mean)
medias_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN=mean)

#desvio padrao
dp = apply(Timeline[3:12], MARGIN=2, FUN=sd)
dp_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN=sd)

#variance
var = apply(Timeline[3:12], 2, var)
var_5anos = apply(Timeline_5anos[3:12], 2, var)

#Tabela descritiva, 2004 a 2017
TabelaDes = data.frame( min, max, mediana, medias, dp, var)
TabelaDes

##             min max mediana     medias         dp         var
## DA            7  76      19 22.7707006 11.1768262 124.9214437
## DAs           9 100      29 32.0254777 15.0298630 225.8967826
## FE            3  90      12 20.1974522 18.7211109 350.4799935
## PA            2  49       7 10.1146497  7.9775070  63.6406173
## DE            2  25       6  6.7388535  3.4271662  11.7454679
## TA            0   4       1  0.9044586  0.8147946   0.6638902
## DA1           2  26       7  8.3439490  3.9300182  15.4450433
## Dislexia     13 100      25 30.5859873 15.6745934 245.6928793
## Disgrafia     0  13       2  2.3630573  1.8402650   3.3865752
## Discalculia   0   6       2  2.0891720  1.1057878   1.2227666

#Tabela descrita, 2012 a 2017
TabelaDes_5anos = data.frame(min_5anos, max_5anos, mediana_5anos, medias_5anos,  dp_5anos,  var_5anos)
TabelaDes_5anos

##             min_5anos max_5anos mediana_5anos medias_5anos  dp_5anos
## DA                  7        25            17    16.266667 3.8658289
## DAs                 9        35            23    22.450000 5.6521332
## FE                  3        12             7     6.833333 2.3733454
## PA                  2        10             5     5.266667 1.6142025
## DE                  2         8             5     5.183333 1.2952543
## TA                  0         1             1     0.650000 0.4809947
## DA1                 2         9             6     5.833333 1.4976441
## Dislexia           13        35            21    21.116667 4.2469996
## Disgrafia           1         2             1     1.450000 0.5016921
## Discalculia         1         3             2     1.783333 0.5551505
##              var_5anos
## DA          14.9446328
## DAs         31.9466102
## FE           5.6327684
## PA           2.6056497
## DE           1.6776836
## TA           0.2313559
## DA1          2.2429379
## Dislexia    18.0370056
## Disgrafia    0.2516949
## Discalculia  0.3081921

#correlacao entre as variaveis
explicativas = Timeline[,3:12]
cor(explicativas, method = "spearman")

##                    DA       DAs        FE        PA        DE        TA
## DA          1.0000000 0.8540771 0.8209625 0.7936141 0.6392457 0.5180165
## DAs         0.8540771 1.0000000 0.8524418 0.8269852 0.6485852 0.4435616
## FE          0.8209625 0.8524418 1.0000000 0.8653836 0.5799170 0.4378294
## PA          0.7936141 0.8269852 0.8653836 1.0000000 0.6016096 0.4329864
## DE          0.6392457 0.6485852 0.5799170 0.6016096 1.0000000 0.3825523
## TA          0.5180165 0.4435616 0.4378294 0.4329864 0.3825523 1.0000000
## DA1         0.8503754 0.9935315 0.8494201 0.8172536 0.6425757 0.4314246
## Dislexia    0.7587646 0.7326280 0.8566252 0.7994667 0.4988009 0.3492648
## Disgrafia   0.7968748 0.8085197 0.7878822 0.7858998 0.5889306 0.5114184
## Discalculia 0.5809042 0.5118086 0.4850222 0.5163804 0.5044344 0.5069295
##                   DA1  Dislexia Disgrafia Discalculia
## DA          0.8503754 0.7587646 0.7968748   0.5809042
## DAs         0.9935315 0.7326280 0.8085197   0.5118086
## FE          0.8494201 0.8566252 0.7878822   0.4850222
## PA          0.8172536 0.7994667 0.7858998   0.5163804
## DE          0.6425757 0.4988009 0.5889306   0.5044344
## TA          0.4314246 0.3492648 0.5114184   0.5069295
## DA1         1.0000000 0.7325950 0.8125736   0.5189100
## Dislexia    0.7325950 1.0000000 0.7344780   0.4313871
## Disgrafia   0.8125736 0.7344780 1.0000000   0.5902121
## Discalculia 0.5189100 0.4313871 0.5902121   1.0000000

pairs(explicativas)

Testes normalidade das variaveis

library(nortest)

#Teste de Shapiro-Francis para normalidade -  n< 50
sf.test = apply(Timeline[17:25],  MARGIN=2, FUN=sf.test)
sf.test

## $DA_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.95516, p-value = 0.0002326
## 
## 
## $DAs_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.96458, p-value = 0.001309
## 
## 
## $Dislexia_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.92617, p-value = 3.676e-06
## 
## 
## $FE_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.84329, p-value = 7.899e-10
## 
## 
## $PA_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.87275, p-value = 1.165e-08
## 
## 
## $DE_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.95808, p-value = 0.0003847
## 
## 
## $Discalculia_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.82941, p-value = 3.222e-10
## 
## 
## $Disgrafia_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.82525, p-value = 1.137e-10
## 
## 
## $TA_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.72974, p-value = 4.65e-13

#Testes de Shapiro-Wilk para normalidade
shapiro.test = apply(Timeline[17:25],  MARGIN=2, FUN=shapiro.test)
shapiro.test

## $DA_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.95302, p-value = 7.019e-05
## 
## 
## $DAs_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.96262, p-value = 0.0005275
## 
## 
## $Dislexia_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.92287, p-value = 4.782e-07
## 
## 
## $FE_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.8404, p-value = 2.398e-11
## 
## 
## $PA_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.87041, p-value = 6.218e-10
## 
## 
## $DE_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.95616, p-value = 0.0001306
## 
## 
## $Discalculia_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.82723, p-value = 8.761e-12
## 
## 
## $Disgrafia_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.82471, p-value = 2.961e-12
## 
## 
## $TA_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.72669, p-value = 3.211e-15

#Teste de normalidade Anderson-Darling
ad.test = apply(Timeline[17:25], MARGIN=2, FUN=ad.test)
ad.test

## $DA_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 2.2692, p-value = 8.88e-06
## 
## 
## $DAs_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 1.5723, p-value = 0.0004605
## 
## 
## $Dislexia_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 3.6684, p-value = 3.375e-09
## 
## 
## $FE_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 8.4665, p-value < 2.2e-16
## 
## 
## $PA_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 6.3436, p-value = 1.2e-15
## 
## 
## $DE_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 2.3517, p-value = 5.571e-06
## 
## 
## $Discalculia_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 11.851, p-value < 2.2e-16
## 
## 
## $Disgrafia_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 9.308, p-value < 2.2e-16
## 
## 
## $TA_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 20.155, p-value < 2.2e-16

#Teste de Cramer-von Mises para normalidade
cvm.test = apply(Timeline[17:25], MARGIN=2, FUN=cvm.test)

## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately

## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately

## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately

## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately

## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately

cvm.test

## $DA_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 0.43175, p-value = 1.219e-05
## 
## 
## $DAs_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 0.27493, p-value = 0.0006369
## 
## 
## $Dislexia_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 0.64513, p-value = 1.552e-07
## 
## 
## $FE_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 1.5153, p-value = 7.37e-10
## 
## 
## $PA_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 1.1283, p-value = 7.37e-10
## 
## 
## $DE_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 0.43382, p-value = 1.162e-05
## 
## 
## $Discalculia_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 2.1864, p-value = 7.37e-10
## 
## 
## $Disgrafia_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 1.6243, p-value = 7.37e-10
## 
## 
## $TA_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 3.7942, p-value = 7.37e-10

#Teste de Lilliefors (correção do KS) para normalidade do ajuste, n < 50
lillie.test = apply(Timeline[17:25], MARGIN=2, FUN=lillie.test)
lillie.test

## $DA_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.14452, p-value = 5.78e-08
## 
## 
## $DAs_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.10969, p-value = 0.000191
## 
## 
## $Dislexia_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.13847, p-value = 3.542e-07
## 
## 
## $FE_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.2038, p-value < 2.2e-16
## 
## 
## $PA_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.20385, p-value = 3.174e-16
## 
## 
## $DE_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.14544, p-value = 4.518e-08
## 
## 
## $Discalculia_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.27422, p-value < 2.2e-16
## 
## 
## $Disgrafia_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.2652, p-value < 2.2e-16
## 
## 
## $TA_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.3607, p-value < 2.2e-16

#Teste Qui-quadrado de Pearson para normalidade considerando o número de classes
pearson.test = apply(Timeline[17:25], MARGIN=2, FUN=pearson.test)
pearson.test

## $DA_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 26.367, p-value = 0.009519
## 
## 
## $DAs_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 29.685, p-value = 0.003114
## 
## 
## $Dislexia_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 46.483, p-value = 5.728e-06
## 
## 
## $FE_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 137.59, p-value < 2.2e-16
## 
## 
## $PA_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 76.483, p-value = 1.925e-11
## 
## 
## $DE_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 172.49, p-value < 2.2e-16
## 
## 
## $Discalculia_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 693.24, p-value < 2.2e-16
## 
## 
## $Disgrafia_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 504.94, p-value < 2.2e-16
## 
## 
## $TA_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 940.86, p-value < 2.2e-16

CONCLUSAO: a distribuicao dos dados nao e normal e, portanto, deve se adotar metodos nao-parametricos.

Graficos de diagnostico da variavel - Dif.s de Aprendizagem

#Gráfico de diagnóstico e Histograma com ajuste não paramétrico de densidade probabilística mais curva normal, com e sem outliers
par(mfrow=c(2,2))
boxplot(Timeline$DAs, notch = T, main = "Dif.s de Aprendizagem", ylab = "valores observados")
boxplot(Timeline$DAs_clean, notch = T, main = "removidos outliers", ylab = "valores observados")
hist(Timeline$DAs, prob=T, main = "Densid. probabilística e curva normal", xlab = "valores observados", ylab = "Densidade probabilística")
lines( density(Timeline$DAs),col="darkblue", lwd=c(2))
curve(expr = dnorm(x,mean=mean(Timeline$DAs),sd=sd(Timeline$DAs)),add=T, col="darkred", lwd=c(2))
legend("topright", c("Dens. prob.", "curva normal"), bty="n", lty=c(1),
       col=c("darkblue","darkred"), box.col="white",lwd=c(2), cex=0.9)
hist(Timeline$DAs_clean, prob=T, main = "removidos outliers", xlab = "valores observados", ylab = "Densidade probabilística")
lines( density(Timeline$DAs),col="darkblue", lwd=c(2) )
curve(expr = dnorm(x,mean=mean(Timeline$DAs),sd=sd(Timeline$DAs)),add=T, col="darkred", lwd=c(2))
legend("topright", c("Dens. prob.", "curva normal"), bty="n", lty=c(1),
       col=c("darkblue","darkred"), box.col="white",lwd=c(2), cex=0.9)

par(mfrow=c(1,1))

#testes de normalidade
Teste_norm_DAs = c(sf.test$DAs_clean$method, ad.test$DAs_clean$method, cvm.test$DAs_clean$method, lillie.test$DAs_clean$method, pearson.test$DAs_clean$method, shapiro.test$DAs_clean$method)
pvalor_norm_DAs = c(sf.test$DAs_clean$p.value, ad.test$DAs_clean$p.value, cvm.test$DAs_clean$p.value, lillie.test$DAs_clean$p.value, pearson.test$DAs_clean$p.value, shapiro.test$DAs_clean$p.value)

Tab_norm_DAs = data.frame(Teste_norm_DAs, pvalor_norm_DAs)
Tab_norm_DAs

##                                   Teste_norm_DAs pvalor_norm_DAs
## 1                 Shapiro-Francia normality test    0.0013094786
## 2                Anderson-Darling normality test    0.0004604626
## 3                Cramer-von Mises normality test    0.0006369276
## 4 Lilliefors (Kolmogorov-Smirnov) normality test    0.0001909866
## 5              Pearson chi-square normality test    0.0031139409
## 6                    Shapiro-Wilk normality test    0.0005274689

Correlacao entre a variavel dependente e as demais, metodo Spearman (não parametrico), retirados outliers

cor.das.da = cor.test(Timeline$DAs_clean, Timeline$DA_clean, method = "spearman") ## DificuldadeS de Aprendizagem

## Warning in cor.test.default(Timeline$DAs_clean, Timeline$DA_clean, method =
## "spearman"): Cannot compute exact p-value with ties

cor.das.fe = cor.test(Timeline$DAs_clean, Timeline$FE_clean, method = "spearman") ## Fracasso Escolar

## Warning in cor.test.default(Timeline$DAs_clean, Timeline$FE_clean, method =
## "spearman"): Cannot compute exact p-value with ties

cor.das.pa = cor.test(Timeline$DAs_clean, Timeline$PA_clean, method = "spearman") ## Problemas de Aprendizagem

## Warning in cor.test.default(Timeline$DAs_clean, Timeline$PA_clean, method =
## "spearman"): Cannot compute exact p-value with ties

cor.das.de = cor.test(Timeline$DAs_clean, Timeline$DE_clean, method = "spearman") ## Desempenho escolar

## Warning in cor.test.default(Timeline$DAs_clean, Timeline$DE_clean, method =
## "spearman"): Cannot compute exact p-value with ties

cor.das.dis1 = cor.test(Timeline$DAs_clean, Timeline$Dislexia_clean, method = "spearman") #Dislexia

## Warning in cor.test.default(Timeline$DAs_clean, Timeline$Dislexia_clean, :
## Cannot compute exact p-value with ties

cor.das.dis2 = cor.test(Timeline$DAs_clean, Timeline$Discalculia_clean, method = "spearman") #Discalculia

## Warning in cor.test.default(Timeline$DAs_clean, Timeline
## $Discalculia_clean, : Cannot compute exact p-value with ties

cor.das.dis3 = cor.test(Timeline$DAs_clean, Timeline$Disgrafia_clean, method = "spearman") #Digrafia

## Warning in cor.test.default(Timeline$DAs_clean, Timeline$Disgrafia_clean, :
## Cannot compute exact p-value with ties

cor.das.ta = cor.test(Timeline$DAs_clean, Timeline$TA_clean, method = "spearman") ## Transtorno de Aprendizagem

## Warning in cor.test.default(Timeline$DAs_clean, Timeline$TA_clean, method =
## "spearman"): Cannot compute exact p-value with ties

Nome = c("Dif.s de aprend. com Dif. de Aprend.", "   com Fracasso Escolar", "   com Problema de Aprend.", "   com Desempenho Escolar", "   com Dislexia", "   com Discalculia", "   com Disgrafia", "   com Transt. de Aprend.")
  
pvalor_cor = c(cor.das.da$p.value, cor.das.fe$p.value, cor.das.pa$p.value, cor.das.de$p.value, cor.das.dis1$p.value, cor.das.dis2$p.value, cor.das.dis3$p.value, cor.das.ta$p.value) 

valor_cor = c(cor.das.da$estimate, cor.das.fe$estimate, cor.das.pa$estimate, cor.das.de$estimate, cor.das.dis1$estimate, cor.das.dis2$estimate, cor.das.dis3$estimate, cor.das.ta$estimate)

tab_cor_DAs = data.frame(Nome, valor_cor, pvalor_cor)
  

  tab_cor_DAs

##                                   Nome valor_cor   pvalor_cor
## 1 Dif.s de aprend. com Dif. de Aprend. 0.8284254 4.629895e-37
## 2                 com Fracasso Escolar 0.8092017 2.240052e-34
## 3              com Problema de Aprend. 0.7745652 3.061280e-29
## 4               com Desempenho Escolar 0.5891858 2.355150e-14
## 5                         com Dislexia 0.7775874 3.442925e-29
## 6                      com Discalculia 0.3835854 3.416791e-06
## 7                        com Disgrafia 0.7638980 8.502890e-29
## 8               com Transt. de Aprend. 0.2659142 1.381377e-03

Grafico de dispersão com linha de regressão, método loess (não parametrico) - Dif.s de Aprendizagem, retirados outliers

library("ggplot2")
library("devtools")

ggplot(Timeline, aes(y=DAs_clean, x=DA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning: Removed 15 rows containing missing values (geom_point).

qplot(DA_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Dif. de Aprendizagem")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=DAs_clean, x=FE_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 14 rows containing non-finite values (stat_smooth).

## Warning: Removed 14 rows containing missing values (geom_point).

qplot(FE_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Fracasso Escolar")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 14 rows containing non-finite values (stat_smooth).

## Warning: Removed 14 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=DAs_clean, x=PA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 17 rows containing non-finite values (stat_smooth).

## Warning: Removed 17 rows containing missing values (geom_point).

qplot(PA_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Problemas de Aprendizagem")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 17 rows containing non-finite values (stat_smooth).

## Warning: Removed 17 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=DAs_clean, x=DE_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

qplot(DE_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Desempenho Escolar")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=DAs_clean, x=Dislexia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 19 rows containing non-finite values (stat_smooth).

## Warning: Removed 19 rows containing missing values (geom_point).

qplot(Dislexia_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Dislexia")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 19 rows containing non-finite values (stat_smooth).

## Warning: Removed 19 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=DAs_clean, x=Discalculia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 19 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.0341e-016

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 3.0341e-016

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0602

## Warning: Removed 19 rows containing missing values (geom_point).

qplot(Discalculia_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Discalculia")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 19 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.0341e-016

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 3.0341e-016

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0602

## Warning: Removed 19 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=DAs_clean, x=Disgrafia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 13 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.03

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.03

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.2507e-016

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.03

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.03

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 1.2507e-016

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 1

## Warning: Removed 13 rows containing missing values (geom_point).

qplot(Disgrafia_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Disgrafia")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 13 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.03

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.03

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.2507e-016

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.03

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.03

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 1.2507e-016

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 1

## Warning: Removed 13 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=DAs_clean, x=TA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0401

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0401

## Warning: Removed 15 rows containing missing values (geom_point).

qplot(TA_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Transtorno de Aprendizagem")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0401

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0401

## Warning: Removed 15 rows containing missing values (geom_point).

Modelo de Regressao linear multipla (prescinde de testes de normalidade, homogeneidade e dependência dos resíduos) - Dificuldades de Aprendizagem

#Regressão Linear Múltipla
## observar o R2 -  coeficiente de determinacao, quanto o modelo consegue explicar os valores observados valores entre 0 e 1, convertido em porcentagem. E  a distribuição F - relação entre a variável dependente e independente, p < 0,05 ao menos uma variável explicativa contribui significativamente para o modelo
RL_DAs1 = lm( DAs ~ DA + FE + PA + DE + Dislexia + Discalculia + Disgrafia + TA, data=Timeline)
summary( RL_DAs1 )

## 
## Call:
## lm(formula = DAs ~ DA + FE + PA + DE + Dislexia + Discalculia + 
##     Disgrafia + TA, data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.7796  -4.4051  -0.2952   3.2602  23.5186 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 15.73371    2.16432   7.270 1.93e-11 ***
## DA           0.12492    0.11400   1.096   0.2750    
## FE           0.60800    0.08085   7.520 4.89e-12 ***
## PA          -0.14275    0.16756  -0.852   0.3956    
## DE           0.48093    0.25784   1.865   0.0641 .  
## Dislexia    -0.11573    0.05563  -2.080   0.0392 *  
## Discalculia  0.30267    0.69808   0.434   0.6652    
## Disgrafia    0.54605    0.55880   0.977   0.3301    
## TA           1.09128    1.11393   0.980   0.3288    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.531 on 148 degrees of freedom
## Multiple R-squared:  0.7618, Adjusted R-squared:  0.7489 
## F-statistic: 59.16 on 8 and 148 DF,  p-value: < 2.2e-16

#Testes de normalidade de ajuste do residuo
lillie.test(RL_DAs1$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  RL_DAs1$residuals
## D = 0.091273, p-value = 0.002761

#Regressão Linear, removidos os outliers
RL_DAs = lm( DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + Dislexia_clean + Discalculia_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary( RL_DAs )

## 
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + 
##     Dislexia_clean + Discalculia_clean + Disgrafia_clean + TA_clean, 
##     data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.5469  -3.0775   0.0795   2.7979  15.5779 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        4.93634    2.01764   2.447  0.01594 *  
## DA_clean           0.58738    0.12460   4.714 6.87e-06 ***
## FE_clean           0.07087    0.08772   0.808  0.42083    
## PA_clean           0.36834    0.19833   1.857  0.06585 .  
## DE_clean           0.27743    0.27266   1.017  0.31105    
## Dislexia_clean     0.06703    0.10516   0.637  0.52512    
## Discalculia_clean  0.06120    0.74686   0.082  0.93484    
## Disgrafia_clean    2.70418    0.81104   3.334  0.00115 ** 
## TA_clean          -1.10400    0.95187  -1.160  0.24852    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.707 on 115 degrees of freedom
##   (33 observations deleted due to missingness)
## Multiple R-squared:  0.7557, Adjusted R-squared:  0.7387 
## F-statistic: 44.46 on 8 and 115 DF,  p-value: < 2.2e-16

#Testes de normalidade de ajuste do residuo
lillie.test(RL_DAs$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  RL_DAs$residuals
## D = 0.062922, p-value = 0.2666

par(mfrow=c(2,2))
plot(RL_DAs)

library(car)
par(mfrow=c(1,2))
residualPlot(RL_DAs1, main = "Gráfico de Residuos")
residualPlot(RL_DAs, main = "Gráfico de Residuos, sem outliers")

#Ajuste de modelo pelo método Stepwise Backward
#retirando dislcaculia, p = 0,93484
RL_DAs_aj1 = lm( DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + Dislexia_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary(RL_DAs_aj1)

## 
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + 
##     Dislexia_clean + Disgrafia_clean + TA_clean, data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.6171  -3.0958  -0.1424   2.9445  15.1224 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.33080    1.92615   2.768  0.00655 ** 
## DA_clean         0.57281    0.12157   4.712 6.72e-06 ***
## FE_clean         0.13517    0.07651   1.767  0.07984 .  
## PA_clean         0.28327    0.19356   1.463  0.14597    
## DE_clean         0.32855    0.26187   1.255  0.21208    
## Dislexia_clean   0.04887    0.10183   0.480  0.63220    
## Disgrafia_clean  2.52339    0.77280   3.265  0.00143 ** 
## TA_clean        -0.63608    0.91365  -0.696  0.48767    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.708 on 119 degrees of freedom
##   (30 observations deleted due to missingness)
## Multiple R-squared:  0.7592, Adjusted R-squared:  0.745 
## F-statistic: 53.59 on 7 and 119 DF,  p-value: < 2.2e-16

#retirando Dislexia, p = 0,63220
RL_DAs_aj2 = lm( DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary(RL_DAs_aj2)

## 
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + 
##     Disgrafia_clean + TA_clean, data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.3148  -3.2140  -0.2363   3.0104  16.4652 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.18463    1.58960   3.262 0.001425 ** 
## DA_clean         0.66417    0.11270   5.893 3.25e-08 ***
## FE_clean         0.11066    0.06831   1.620 0.107726    
## PA_clean         0.20945    0.18241   1.148 0.253045    
## DE_clean         0.33812    0.25668   1.317 0.190124    
## Disgrafia_clean  2.66941    0.76217   3.502 0.000639 ***
## TA_clean        -0.65964    0.89343  -0.738 0.461688    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.774 on 126 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.7476, Adjusted R-squared:  0.7356 
## F-statistic: 62.21 on 6 and 126 DF,  p-value: < 2.2e-16

#retirando Transtorno de Aprendizagem, p = 0,461688
RL_DAs_aj3 = lm(DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + Disgrafia_clean, data=Timeline)
summary(RL_DAs_aj3)

## 
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + 
##     Disgrafia_clean, data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.5361  -3.1722  -0.0087   2.9604  16.3183 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.04710    1.57582   3.203 0.001720 ** 
## DA_clean         0.65120    0.11113   5.860 3.75e-08 ***
## FE_clean         0.11182    0.06817   1.640 0.103400    
## PA_clean         0.21728    0.18178   1.195 0.234193    
## DE_clean         0.33401    0.25615   1.304 0.194616    
## Disgrafia_clean  2.61139    0.75675   3.451 0.000759 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.766 on 127 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.7465, Adjusted R-squared:  0.7366 
## F-statistic: 74.81 on 5 and 127 DF,  p-value: < 2.2e-16

#Retirando Problema de Aprendizagem, p = 0,234193
RL_DAs_aj4 = lm( DAs_clean ~ DA_clean + FE_clean + DE_clean + Disgrafia_clean, data=Timeline)
summary(RL_DAs_aj4)

## 
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + DE_clean + Disgrafia_clean, 
##     data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.8319  -2.8564  -0.3094   3.0077  17.4098 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      6.74149    1.58509   4.253 3.96e-05 ***
## DA_clean         0.65757    0.11435   5.750 5.89e-08 ***
## FE_clean         0.16225    0.06418   2.528  0.01265 *  
## DE_clean         0.37681    0.25370   1.485  0.13986    
## Disgrafia_clean  1.89478    0.70271   2.696  0.00792 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.996 on 132 degrees of freedom
##   (20 observations deleted due to missingness)
## Multiple R-squared:  0.7318, Adjusted R-squared:  0.7237 
## F-statistic: 90.04 on 4 and 132 DF,  p-value: < 2.2e-16

#Retirando Desempenho Escolar, p = 0,13986
RL_DAs_aj5 = lm( DAs_clean ~ DA_clean + FE_clean + Disgrafia_clean, data=Timeline)

summary(RL_DAs_aj5)

## 
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + Disgrafia_clean, 
##     data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.5990  -3.1871  -0.3789   3.1076  16.8447 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      7.20862    1.42196   5.069 1.27e-06 ***
## DA_clean         0.76572    0.10467   7.316 1.95e-11 ***
## FE_clean         0.15344    0.06429   2.387   0.0184 *  
## Disgrafia_clean  1.75992    0.70563   2.494   0.0138 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.08 on 137 degrees of freedom
##   (16 observations deleted due to missingness)
## Multiple R-squared:  0.7448, Adjusted R-squared:  0.7392 
## F-statistic: 133.3 on 3 and 137 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2))
plot(RL_DAs_aj5)

#Teste de normalidade de ajuste do residuo
shapiro.test(RL_DAs_aj5$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  RL_DAs_aj5$residuals
## W = 0.97898, p-value = 0.02861

sf.test(RL_DAs_aj5$residuals)

## 
##  Shapiro-Francia normality test
## 
## data:  RL_DAs_aj5$residuals
## W = 0.97438, p-value = 0.01106

ad.test(RL_DAs_aj5$residuals)

## 
##  Anderson-Darling normality test
## 
## data:  RL_DAs_aj5$residuals
## A = 0.57491, p-value = 0.1331

cvm.test(RL_DAs_aj5$residuals)

## 
##  Cramer-von Mises normality test
## 
## data:  RL_DAs_aj5$residuals
## W = 0.070668, p-value = 0.2721

lillie.test(RL_DAs_aj5$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  RL_DAs_aj5$residuals
## D = 0.04998, p-value = 0.5265

pearson.test(RL_DAs_aj5$residuals)

## 
##  Pearson chi-square normality test
## 
## data:  RL_DAs_aj5$residuals
## P = 12.723, p-value = 0.3895

#Teste de homoscedasticidade dos resíduos
residualPlot(RL_DAs_aj5)
library(lmtest)

## Warning: package 'lmtest' was built under R version 3.3.3

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

#Teste Goldfeld-Quandt test against heteroskedasticity (homoscedasticidade dos resíduos) n > 30
gqtest(RL_DAs_aj5)

## 
##  Goldfeld-Quandt test
## 
## data:  RL_DAs_aj5
## GQ = 0.19071, df1 = 67, df2 = 66, p-value = 1
## alternative hypothesis: variance increases from segment 1 to 2

#Teste de Breusch-Pagan: hipótese nula de que as variâncias dos erros são iguais (homoscedasticidade)
bptest(RL_DAs_aj5)

## 
##  studentized Breusch-Pagan test
## 
## data:  RL_DAs_aj5
## BP = 23.965, df = 3, p-value = 2.54e-05

#Teste de independencia - Teste de Durbin Watson
plot(RL_DAs_aj5$residuals)
dwtest(RL_DAs_aj5)

## 
##  Durbin-Watson test
## 
## data:  RL_DAs_aj5
## DW = 1.5931, p-value = 0.005695
## alternative hypothesis: true autocorrelation is greater than 0

#Retirando Disgrafia, p = 0.0138
RL_DAs_aj6 = lm( DAs_clean ~ DA_clean + FE_clean, data=Timeline)

summary(RL_DAs_aj6)

## 
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean, data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.8988  -3.5731  -0.4507   2.9881  18.6894 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.56890    1.44111   5.252 5.56e-07 ***
## DA_clean     0.86677    0.09832   8.816 4.58e-15 ***
## FE_clean     0.21938    0.05970   3.675  0.00034 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.175 on 138 degrees of freedom
##   (16 observations deleted due to missingness)
## Multiple R-squared:  0.7332, Adjusted R-squared:  0.7293 
## F-statistic: 189.6 on 2 and 138 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2))

plot(RL_DAs_aj6)

#Teste de normalidade de ajuste do residuo
shapiro.test(RL_DAs_aj6$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  RL_DAs_aj6$residuals
## W = 0.98169, p-value = 0.05599

sf.test(RL_DAs_aj6$residuals)

## 
##  Shapiro-Francia normality test
## 
## data:  RL_DAs_aj6$residuals
## W = 0.97817, p-value = 0.0247

ad.test(RL_DAs_aj6$residuals)

## 
##  Anderson-Darling normality test
## 
## data:  RL_DAs_aj6$residuals
## A = 0.42498, p-value = 0.3127

cvm.test(RL_DAs_aj6$residuals)

## 
##  Cramer-von Mises normality test
## 
## data:  RL_DAs_aj6$residuals
## W = 0.051552, p-value = 0.4864

lillie.test(RL_DAs_aj6$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  RL_DAs_aj6$residuals
## D = 0.050816, p-value = 0.4994

pearson.test(RL_DAs_aj6$residuals)

## 
##  Pearson chi-square normality test
## 
## data:  RL_DAs_aj6$residuals
## P = 12.298, p-value = 0.4221

#Teste de homoscedasticidade dos resíduos
residualPlot(RL_DAs_aj6)
library(lmtest)
#Teste Goldfeld-Quandt - test against heteroskedasticity (homoscedasticidade, ou variância constante dos erros, dos resíduos) n > 30
## p > 0,05 não rejeitamos a hipótese de homoscedasticidade dos erros
gqtest(RL_DAs_aj6)

## 
##  Goldfeld-Quandt test
## 
## data:  RL_DAs_aj6
## GQ = 0.17646, df1 = 68, df2 = 67, p-value = 1
## alternative hypothesis: variance increases from segment 1 to 2

#Teste de Breusch-Pagan: hipótese nula de que as variâncias dos erros são iguais (homoscedasticidade)
##p > 0,05 não rejeitamos a hipótese de homoscedasticidade dos erros
bptest(RL_DAs_aj6)

## 
##  studentized Breusch-Pagan test
## 
## data:  RL_DAs_aj6
## BP = 15.706, df = 2, p-value = 0.0003885

#Teste de independencia - Teste de Durbin Watson
## p > 0,05, não rejeita Hipotese nula, os resíduos são independentes.
plot(RL_DAs_aj5$residuals)
dwtest(RL_DAs_aj6)

## 
##  Durbin-Watson test
## 
## data:  RL_DAs_aj6
## DW = 1.5928, p-value = 0.005752
## alternative hypothesis: true autocorrelation is greater than 0

Graficos de diagnostico da variavel - Dislexia

#Gráfico de diagnóstico e Histograma com ajuste não paramétrico de densidade probabilística mais curva normal, com e sem outliers

par(mfrow=c(2,2))
boxplot(Timeline$Dislexia, notch = T, main = "Dislexia", ylab = "valores observados")
boxplot(Timeline$Dislexia_clean, notch = T, main = "removidos outliers", ylab = "valores observados")
hist(Timeline$Dislexia, prob=T, main = "Densid. probabilística e curva normal", xlab = "valores observados", ylab = "Densidade probabilística")
lines( density(Timeline$Dislexia),col="darkblue", lwd=c(2))
curve(expr = dnorm(x,mean=mean(Timeline$Dislexia),sd=sd(Timeline$Dislexia)),add=T, col="darkred", lwd=c(2))
legend("topright", c("Dens. prob.", "curva normal"), bty="n", lty=c(1),
       col=c("darkblue","darkred"), box.col="white",lwd=c(2), cex=0.9)
hist(Timeline$Dislexia_clean, prob=T, main = "removidos outliers", xlab = "valores observados", ylab = "Densidade probabilística")
lines( density(Timeline$Dislexia),col="darkblue", lwd=c(2) )
curve(expr = dnorm(x,mean=mean(Timeline$Dislexia),sd=sd(Timeline$Dislexia)),add=T, col="darkred", lwd=c(2))
legend("topright", c("Dens. prob.", "curva normal"), bty="n", lty=c(1),
       col=c("darkblue","darkred"), box.col="white",lwd=c(2), cex=0.9)

par(mfrow=c(1,1))

#testes de normalidade
Teste_norm_Dis = c(sf.test$Dislexia_clean$method, ad.test$Dislexia_clean$method, cvm.test$Dislexia_clean$method, lillie.test$Dislexia_clean$method, pearson.test$Dislexia_clean$method)
pvalor_norm_Dis = c(sf.test$Dislexia_clean$p.value, ad.test$Dislexia_clean$p.value, cvm.test$Dislexia_clean$p.value, lillie.test$Dislexia_clean$p.value, pearson.test$Dislexia_clean$p.value)

Tab_norm_Dis = data.frame(Teste_norm_Dis, pvalor_norm_Dis)
Tab_norm_Dis

##                                   Teste_norm_Dis pvalor_norm_Dis
## 1                 Shapiro-Francia normality test    3.675661e-06
## 2                Anderson-Darling normality test    3.374538e-09
## 3                Cramer-von Mises normality test    1.552354e-07
## 4 Lilliefors (Kolmogorov-Smirnov) normality test    3.542177e-07
## 5              Pearson chi-square normality test    5.727799e-06

Correlacao entre a variavel dependente e as demais, metodo Spearman (não parametrico), retirados outliers

cor.dis.das = cor.test(Timeline$Dislexia_clean, Timeline$DAs_clean, method = "spearman") ## DificuldadeS de Aprendizagem

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$DAs_clean, :
## Cannot compute exact p-value with ties

cor.dis.fe = cor.test(Timeline$Dislexia_clean, Timeline$FE_clean, method = "spearman") ## Fracasso Escolar

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$FE_clean, :
## Cannot compute exact p-value with ties

cor.dis.pa = cor.test(Timeline$Dislexia_clean, Timeline$PA_clean, method = "spearman") ## Problemas de Aprendizagem

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$PA_clean, :
## Cannot compute exact p-value with ties

cor.dis.de = cor.test(Timeline$Dislexia_clean, Timeline$DE_clean, method = "spearman") ## Desempenho escolar

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$DE_clean, :
## Cannot compute exact p-value with ties

cor.dis.da = cor.test(Timeline$Dislexia_clean, Timeline$DA_clean, method = "spearman") ## Dificuldade de Aprendizagem

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$DA_clean, :
## Cannot compute exact p-value with ties

cor.dis.dis2 = cor.test(Timeline$Dislexia_clean, Timeline$Discalculia_clean, method = "spearman") #Discalculia

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline
## $Discalculia_clean, : Cannot compute exact p-value with ties

cor.dis.dis3 = cor.test(Timeline$Dislexia_clean, Timeline$Disgrafia_clean, method = "spearman") #Digrafia

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline
## $Disgrafia_clean, : Cannot compute exact p-value with ties

cor.dis.ta = cor.test(Timeline$Dislexia_clean, Timeline$TA_clean, method = "spearman") ## Transtorno de Aprendizagem

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$TA_clean, :
## Cannot compute exact p-value with ties

Nome_dis = c("Dislexia com Dif.s de Aprend.", "   com Fracasso Escolar", "   com Problema de Aprend.", "   com Desempenho Escolar", "   com Dif. de Aprend.", "   com Discalculia", "   com Disgrafia", "   com Transt. de Aprend.")
  
pvalor_cor_dis = c(cor.dis.das$p.value, cor.dis.fe$p.value, cor.dis.pa$p.value, cor.dis.de$p.value, cor.dis.da$p.value, cor.dis.dis2$p.value, cor.dis.dis3$p.value, cor.dis.ta$p.value) 

valor_cor_dis = c(cor.dis.das$estimate, cor.dis.fe$estimate, cor.dis.pa$estimate, cor.dis.de$estimate, cor.dis.da$estimate, cor.dis.dis2$estimate, cor.dis.dis3$estimate, cor.dis.ta$estimate)

tab_cor_dis = data.frame(Nome_dis, valor_cor_dis, pvalor_cor_dis)
  

  tab_cor_dis

##                        Nome_dis valor_cor_dis pvalor_cor_dis
## 1 Dislexia com Dif.s de Aprend.     0.7775874   3.442925e-29
## 2          com Fracasso Escolar     0.8303787   1.304910e-36
## 3       com Problema de Aprend.     0.7696528   1.769850e-28
## 4        com Desempenho Escolar     0.4603439   1.515041e-08
## 5           com Dif. de Aprend.     0.7935912   2.345935e-31
## 6               com Discalculia     0.3494107   3.270965e-05
## 7                 com Disgrafia     0.7623783   3.038961e-28
## 8        com Transt. de Aprend.     0.2593444   2.211352e-03

Grafico de dispersão com linha de regressão, método loess (não parametrico) - Dif.s de Aprendizagem, retirados outliers

library("ggplot2")
library("devtools")

ggplot(Timeline, aes(y=Dislexia_clean, x=DAs_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 19 rows containing non-finite values (stat_smooth).

## Warning: Removed 19 rows containing missing values (geom_point).

qplot(DAs_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 19 rows containing non-finite values (stat_smooth).

## Warning: Removed 19 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=Dislexia_clean, x=FE_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

qplot(FE_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=Dislexia_clean, x=PA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

qplot(PA_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=Dislexia_clean, x=DE_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 20 rows containing non-finite values (stat_smooth).

## Warning: Removed 20 rows containing missing values (geom_point).

qplot(DE_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 20 rows containing non-finite values (stat_smooth).

## Warning: Removed 20 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=Dislexia_clean, x=DA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

qplot(DA_clean, Dislexia_clean,  data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=Dislexia_clean, x=Discalculia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 22 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.03e-016

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 1.03e-016

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0602

## Warning: Removed 22 rows containing missing values (geom_point).

qplot(Discalculia_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 22 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.03e-016

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 1.03e-016

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0602

## Warning: Removed 22 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=Dislexia_clean, x=Disgrafia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 2

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 2

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0

## Warning: Removed 15 rows containing missing values (geom_point).

qplot(Disgrafia_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 2

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 2

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0

## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=Dislexia_clean, x=TA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 20 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0401

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0401

## Warning: Removed 20 rows containing missing values (geom_point).

qplot(TA_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 20 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0401

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0401

## Warning: Removed 20 rows containing missing values (geom_point).

Modelo de Regressao linear multipla (com testes de normalidade, homogeneidade e dependencia dos residuos) - Dislexia

#Regressão Linear
RL_Dis1 = lm(Dislexia ~ DAs + FE + PA + DE + Discalculia + Disgrafia + TA, data=Timeline)
summary( RL_Dis1 )

## 
## Call:
## lm(formula = Dislexia ~ DAs + FE + PA + DE + Discalculia + Disgrafia + 
##     TA, data = Timeline)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.441  -5.510  -0.710   2.743  59.849 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 25.35399    2.82475   8.976 1.13e-15 ***
## DAs         -0.23414    0.11800  -1.984   0.0491 *  
## FE           0.71912    0.12302   5.846 3.07e-08 ***
## PA           0.41386    0.23537   1.758   0.0807 .  
## DE          -0.44224    0.37831  -1.169   0.2443    
## Discalculia -0.01647    1.01752  -0.016   0.9871    
## Disgrafia    0.03539    0.79757   0.044   0.9647    
## TA          -3.37112    1.59567  -2.113   0.0363 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11 on 149 degrees of freedom
## Multiple R-squared:  0.5298, Adjusted R-squared:  0.5077 
## F-statistic: 23.99 on 7 and 149 DF,  p-value: < 2.2e-16

#Testes de normalidade de ajuste do residuo
lillie.test(RL_Dis1$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  RL_Dis1$residuals
## D = 0.17193, p-value = 2.682e-12

residualPlot(RL_Dis1)

#Regressão Linear, revidos outliers
RL_Dis = lm(Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + DE_clean + Discalculia_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary( RL_Dis )

## 
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + 
##     DE_clean + Discalculia_clean + Disgrafia_clean + TA_clean, 
##     data = Timeline)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.587  -2.578  -0.374   2.544  15.027 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       10.52645    1.60088   6.575 1.45e-09 ***
## DAs_clean          0.14754    0.07633   1.933   0.0557 .  
## FE_clean           0.39563    0.06842   5.782 6.34e-08 ***
## PA_clean           0.33230    0.18022   1.844   0.0678 .  
## DE_clean           0.39148    0.24356   1.607   0.1107    
## Discalculia_clean -0.02204    0.66670  -0.033   0.9737    
## Disgrafia_clean    0.47063    0.76995   0.611   0.5422    
## TA_clean          -0.31212    0.85288  -0.366   0.7151    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.276 on 116 degrees of freedom
##   (33 observations deleted due to missingness)
## Multiple R-squared:  0.7328, Adjusted R-squared:  0.7167 
## F-statistic: 45.45 on 7 and 116 DF,  p-value: < 2.2e-16

#Testes de normalidade de ajuste do residuo
lillie.test(RL_Dis$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  RL_Dis$residuals
## D = 0.067493, p-value = 0.18

residualPlot(RL_Dis)

#Ajuste de modelo pelo método Backward
#retirando dislcaculia, p = 0,9737
RL_Dis_aj1 = lm( Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + DE_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary(RL_Dis_aj1)

## 
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + 
##     DE_clean + Disgrafia_clean + TA_clean, data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.9415  -2.5155  -0.5345   2.5451  15.5681 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     10.75901    1.55378   6.924 2.32e-10 ***
## DAs_clean        0.14681    0.07713   1.903   0.0594 .  
## FE_clean         0.33840    0.06408   5.281 5.82e-07 ***
## PA_clean         0.44907    0.17694   2.538   0.0124 *  
## DE_clean         0.30316    0.24020   1.262   0.2093    
## Disgrafia_clean  0.67710    0.74728   0.906   0.3667    
## TA_clean        -0.76740    0.83231  -0.922   0.3584    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.38 on 120 degrees of freedom
##   (30 observations deleted due to missingness)
## Multiple R-squared:  0.7183, Adjusted R-squared:  0.7043 
## F-statistic: 51.01 on 6 and 120 DF,  p-value: < 2.2e-16

#retirando disgrafia, p = 0,3667
RL_Dis_aj2 = lm( Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + DE_clean + TA_clean, data=Timeline)
summary(RL_Dis_aj2)

## 
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + 
##     DE_clean + TA_clean, data = Timeline)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.833  -2.752  -0.384   2.503  15.128 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.80233    1.55190   6.961 1.88e-10 ***
## DAs_clean    0.17132    0.07218   2.373  0.01920 *  
## FE_clean     0.34973    0.06280   5.569 1.58e-07 ***
## PA_clean     0.48416    0.17252   2.806  0.00584 ** 
## DE_clean     0.30986    0.23991   1.292  0.19896    
## TA_clean    -0.69829    0.82819  -0.843  0.40081    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.376 on 121 degrees of freedom
##   (30 observations deleted due to missingness)
## Multiple R-squared:  0.7164, Adjusted R-squared:  0.7047 
## F-statistic: 61.13 on 5 and 121 DF,  p-value: < 2.2e-16

#retirando Transtorno de Aprendizagem, p = 0,40081
RL_Dis_aj3 = lm( Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + DE_clean, data=Timeline)
summary(RL_Dis_aj3)

## 
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + 
##     DE_clean, data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.0563  -2.6252  -0.3376   2.4554  15.8577 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.61148    1.50119   7.069 1.03e-10 ***
## DAs_clean    0.17203    0.07146   2.407  0.01755 *  
## FE_clean     0.34293    0.06188   5.542 1.73e-07 ***
## PA_clean     0.48198    0.17134   2.813  0.00572 ** 
## DE_clean     0.27460    0.22984   1.195  0.23450    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.356 on 123 degrees of freedom
##   (29 observations deleted due to missingness)
## Multiple R-squared:  0.7176, Adjusted R-squared:  0.7084 
## F-statistic: 78.15 on 4 and 123 DF,  p-value: < 2.2e-16

#Retirando Desempenho Escolar, p = 0,23450
RL_Dis_aj4 = lm( Dislexia_clean ~ DAs_clean + FE_clean + PA_clean, data=Timeline)

summary(RL_Dis_aj4)

## 
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean, 
##     data = Timeline)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.485  -2.823  -0.171   2.425  17.152 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 12.51395    1.39305   8.983 2.89e-15 ***
## DAs_clean    0.15267    0.06987   2.185   0.0307 *  
## FE_clean     0.33976    0.06520   5.211 7.31e-07 ***
## PA_clean     0.51939    0.16696   3.111   0.0023 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.666 on 128 degrees of freedom
##   (25 observations deleted due to missingness)
## Multiple R-squared:  0.6858, Adjusted R-squared:  0.6785 
## F-statistic: 93.15 on 3 and 128 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2))
plot(RL_Dis_aj4)

#Teste de normalidade de ajuste do residuo
shapiro.test(RL_Dis_aj4$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  RL_Dis_aj4$residuals
## W = 0.95136, p-value = 0.0001274

sf.test(RL_Dis_aj4$residuals)

## 
##  Shapiro-Francia normality test
## 
## data:  RL_Dis_aj4$residuals
## W = 0.94688, p-value = 0.0001391

ad.test(RL_Dis_aj4$residuals)

## 
##  Anderson-Darling normality test
## 
## data:  RL_Dis_aj4$residuals
## A = 1.3736, p-value = 0.001418

cvm.test(RL_Dis_aj4$residuals)

## 
##  Cramer-von Mises normality test
## 
## data:  RL_Dis_aj4$residuals
## W = 0.18547, p-value = 0.008073

lillie.test(RL_Dis_aj4$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  RL_Dis_aj4$residuals
## D = 0.076317, p-value = 0.05712

pearson.test(RL_Dis_aj4$residuals)

## 
##  Pearson chi-square normality test
## 
## data:  RL_Dis_aj4$residuals
## P = 16.409, p-value = 0.1732

#Teste Goldfeld-Quandt - test against heteroskedasticity (homoscedasticidade, ou variância constante dos erros, dos resíduos) n > 30
## p > 0,05 não rejeitamos a hipótese de homoscedasticidade dos erros
gqtest(RL_Dis_aj4)

## 
##  Goldfeld-Quandt test
## 
## data:  RL_Dis_aj4
## GQ = 0.33945, df1 = 62, df2 = 62, p-value = 1
## alternative hypothesis: variance increases from segment 1 to 2

#Teste de Breusch-Pagan: hipótese nula de que as variâncias dos erros são iguais (homoscedasticidade)
##p > 0,05 não rejeitamos a hipótese de homoscedasticidade dos erros
bptest(RL_Dis_aj4)

## 
##  studentized Breusch-Pagan test
## 
## data:  RL_Dis_aj4
## BP = 15.84, df = 3, p-value = 0.001223

#Teste de independencia - Teste de Durbin Watson
## p > 0,05, não rejeita Hipotese nula, os resíduos são independentes.
plot(RL_Dis_aj4$residuals)
dwtest(RL_Dis_aj4)

## 
##  Durbin-Watson test
## 
## data:  RL_Dis_aj4
## DW = 1.5059, p-value = 0.001571
## alternative hypothesis: true autocorrelation is greater than 0

Grafico de tendencia utilizando Regressao linear # borrowed from amunatagui- # http://amunategui.github.io/google-trends-walkthrough/

library(ggplot2)
ggplot(Timeline,aes(x = Date)) +
        stat_smooth(aes(y = DA_clean, group=1, colour="Dif. de Aprendizagem"), method=lm, formula = y ~ poly(x,1), level=0.95) +
        stat_smooth(aes(y = DAs_clean, group=1, colour= "Dif.s de Aprendizagem"), method=lm, formula = y ~ poly(x,2), level=0.95) +
  stat_smooth(aes(y = Dislexia_clean, group=1, colour="Dislexia"), method=lm, formula = y ~ poly(x,3), level=0.95) +
        stat_smooth(aes(y = FE_clean, group=1, colour="Fracasso Escolar"), method=lm, formula = y ~ poly(x,4), level=0.95) +
        stat_smooth(aes(y = PA_clean, group=1, colour= "Problemas de Aprendizagem"), method=lm, formula = y ~ poly(x,5), level=0.95) +
        stat_smooth(aes(y = DE_clean, group=1, colour= "Desempenho Escolar"), method=lm, formula = y ~ poly(x,6), level=0.95) +
        stat_smooth(aes(y = Discalculia_clean, group=1, colour="Discalculia"), method=lm, formula = y ~ poly(x,7), level=0.95) +
        stat_smooth(aes(y = Disgrafia_clean, group=1, colour="Disgrafia"), method=lm, formula = y ~ poly(x,8), level=0.95) +
        stat_smooth(aes(y = TA_clean, group=1, colour="Transtorno da Aprendizagem"), method=lm, formula = y ~ poly(x,9), level=0.95) +  
          geom_point (aes(y = DA_clean, colour = "Dif. de Aprendizagem"), size=1) +
        geom_point (aes(y = DAs_clean, colour = "Dif.s de Aprendizagem"), size=1) +
        geom_point (aes(y = Dislexia_clean, colour ="Dislexia"), size=1) +
        geom_point (aes(y = FE_clean, colour ="Fracasso Escolar"), size=1) +
        geom_point (aes(y = PA_clean, colour ="Problemas de Aprendizagem"), size=1) +
        geom_point (aes(y = DE_clean, colour ="Desempenho Escolar"), size=1) +
        geom_point (aes(y = Discalculia_clean, colour ="Discalculia"), size=1) +
        geom_point (aes(y = Disgrafia_clean, colour ="Disgrafia"), size=1) +
        geom_point (aes(y = TA_clean, colour = "Transtorno da Aprendizagem"), size=1) +
        scale_colour_manual("Termos de Pesquisa", breaks = c("Dif. de Aprendizagem", "Dif.s de Aprendizagem", "Dislexia", "Fracasso Escolar", "Problemas de Aprendizagem", "Desempenho Escolar", "Discalculia", "Disgrafia", "Transtorno da Aprendizagem"), values = c("blue","green", "red", "darkgreen", "darkred", "darkblue", "purple", "orange", "pink")) +
        theme_bw() +
        xlab("Periodo") +
        ylab("frequencia relativa") +
        ggtitle("Regressao linear das Palavras-chave")

## Warning: Removed 10 rows containing non-finite values (stat_smooth).

## Warning: Removed 11 rows containing non-finite values (stat_smooth).

## Warning: Removed 12 rows containing non-finite values (stat_smooth).

## Warning: Removed 10 rows containing non-finite values (stat_smooth).

## Warning: Removed 12 rows containing non-finite values (stat_smooth).

## Warning: Removed 10 rows containing non-finite values (stat_smooth).

## Warning: Removed 12 rows containing non-finite values (stat_smooth).

## Warning: Removed 4 rows containing non-finite values (stat_smooth).

## Warning: Removed 10 rows containing non-finite values (stat_smooth).

## Warning: Removed 10 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (geom_point).

Grafico de tendencia utilizando Regressao Linear com a retirada de outliers - ultimos 5 anos # borrowed from aL3xa - # http://stackoverflow.com/questions/4787332/how-to-remove-outliers-from-a-dataset

library(ggplot2)
ggplot(Timeline_5anos,aes(x=Date)) +
        stat_smooth(aes(y = DA_clean, group=1, colour="Dif. de Aprendizagem"), method=lm, formula = y ~ poly(x,1), level=0.95) +
        stat_smooth(aes(y = DAs_clean, group=1, colour= "Dif.s de Aprendizagem"), method=lm, formula = y ~ poly(x,2), level=0.95) +
  stat_smooth(aes(y = Dislexia_clean, group=1, colour="Dislexia"), method=lm, formula = y ~ poly(x,3), level=0.95) +
        stat_smooth(aes(y = FE_clean, group=1, colour="Fracasso Escolar"), method=lm, formula = y ~ poly(x,4), level=0.95) +
        stat_smooth(aes(y = PA_clean, group=1, colour= "Problemas de Aprendizagem"), method=lm, formula = y ~ poly(x,5), level=0.95) +
        stat_smooth(aes(y = DE_clean, group=1, colour= "Desempenho Escolar"), method=lm, formula = y ~ poly(x,6), level=0.95) +
        stat_smooth(aes(y = Discalculia_clean, group=1, colour="Discalculia"), method=lm, formula = y ~ poly(x,7), level=0.95) +
        stat_smooth(aes(y = Disgrafia_clean, group=1, colour="Disgrafia"), method=lm, formula = y ~ poly(x,8), level=0.95) +
        stat_smooth(aes(y = TA_clean, group=1, colour="Transtorno da Aprendizagem"), method=lm, formula = y ~ poly(x,9), level=0.95) +
        geom_point (aes(y = DA_clean, colour = "Dif. de Aprendizagem"), size=1) +
        geom_point (aes(y = DAs_clean, colour = "Dif.s de Aprendizagem"), size=1) +
        geom_point (aes(y = Dislexia_clean, colour ="Dislexia"), size=1) +
        geom_point (aes(y = FE_clean, colour ="Fracasso Escolar"), size=1) +
        geom_point (aes(y = PA_clean, colour ="Problemas de Aprendizagem"), size=1) +
        geom_point (aes(y = DE_clean, colour ="Desempenho Escolar"), size=1) +
        geom_point (aes(y = Discalculia_clean, colour ="Discalculia"), size=1) +
        geom_point (aes(y = Disgrafia_clean, colour ="Disgrafia"), size=1) +
        geom_point (aes(y = TA_clean, colour = "Transtorno da Aprendizagem"), size=1) +
        scale_colour_manual("Termos de Pesquisa", breaks = c("Dif. de Aprendizagem", "Dif.s de Aprendizagem", "Dislexia", "Fracasso Escolar", "Problemas de Aprendizagem", "Desempenho Escolar", "Discalculia", "Disgrafia", "Transtorno da Aprendizagem"), values = c("blue","green", "red", "darkgreen", "darkred", "darkblue", "purple", "orange", "pink")) +
        theme_bw() +
        xlab("Periodo") +
        ylab("frequencia relativa") +
        ggtitle("Regressao linear das Palavras-chave, sem outliers, últimos 5 anos")

## Warning: Removed 2 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

Series temporais: avaliacao de sazonalidade e tendencia “Série Temporal é um conjunto de observações sobre uma variável, ordenado no tempo”, em períodos regulares. Intui identificar padrões não aleatórios de uma variável de interesse permitindo previsões e orientando a tomada de decisões. # borrowed from marcos-lopes #https://gist.github.com/marco-lopes/8550667

# Ativando o módulo de séries temporais

library(tseries)

## Warning: package 'tseries' was built under R version 3.3.3

library(forecast)

## Warning: package 'forecast' was built under R version 3.3.3

# Gráfico da serie - apresente o comportamento da média dos dados referente as váriaveis
ts.plot(Timeline$DAs, main = "Dif.s de Aprendizagem")

ts.plot(Timeline$DA, main = "Dif. de Aprendizagem" )

ts.plot(Timeline$Dislexia,  main = "Dislexia")

ts.plot(Timeline$FE,  main = "Fracasso Escolar")

ts.plot(Timeline$DE,  main = "Desempenho Escolar")

ts.plot(Timeline$PA,  main = "Problemas de Aprendizagem")

ts.plot(Timeline$Discalculia,  main = "Discalculia")

ts.plot(Timeline$Disgrafia,  main = "Disgrafia")

ts.plot(Timeline$TA,  main = "Transtorno de Aprendizagem")

#TESTE DE COX-STUART: distribuicao binomial para avaliar tendencia, ou evolução dos valores observados, na série (p < 0,05, existe tendencia).
library(randtests)

## 
## Attaching package: 'randtests'

## The following object is masked from 'package:tseries':
## 
##     runs.test

cox.stuart.test(Timeline$DAs)

## 
##  Cox Stuart test
## 
## data:  Timeline$DAs
## statistic = 9, n = 78, p-value = 1.381e-12
## alternative hypothesis: non randomness

cox.stuart.test(Timeline$DA)

## 
##  Cox Stuart test
## 
## data:  Timeline$DA
## statistic = 10, n = 77, p-value = 1.697e-11
## alternative hypothesis: non randomness

cox.stuart.test(Timeline$Dislexia)

## 
##  Cox Stuart test
## 
## data:  Timeline$Dislexia
## statistic = 7, n = 76, p-value = 6.418e-14
## alternative hypothesis: non randomness

cox.stuart.test(Timeline$FE)

## 
##  Cox Stuart test
## 
## data:  Timeline$FE
## statistic = 0, n = 78, p-value < 2.2e-16
## alternative hypothesis: non randomness

cox.stuart.test(Timeline$DE)

## 
##  Cox Stuart test
## 
## data:  Timeline$DE
## statistic = 18, n = 69, p-value = 8.769e-05
## alternative hypothesis: non randomness

cox.stuart.test(Timeline$PA)

## 
##  Cox Stuart test
## 
## data:  Timeline$PA
## statistic = 4, n = 75, p-value < 2.2e-16
## alternative hypothesis: non randomness

cox.stuart.test(Timeline$Discalculia)

## 
##  Cox Stuart test
## 
## data:  Timeline$Discalculia
## statistic = 16, n = 55, p-value = 0.002667
## alternative hypothesis: non randomness

cox.stuart.test(Timeline$Disgrafia)

## 
##  Cox Stuart test
## 
## data:  Timeline$Disgrafia
## statistic = 2, n = 58, p-value = 1.188e-14
## alternative hypothesis: non randomness

cox.stuart.test(Timeline$TA)

## 
##  Cox Stuart test
## 
## data:  Timeline$TA
## statistic = 14, n = 53, p-value = 0.0008023
## alternative hypothesis: non randomness

#TESTE DE WALD-WOLFOWITS: teste não-paramétrico para verificar a aleatoriedade de uma sequência de dados, Em série temporal avalia tendencia.
library(adehabitat)

## Warning: package 'adehabitat' was built under R version 3.3.3

## Loading required package: ade4

## Warning: package 'ade4' was built under R version 3.3.3

## Loading required package: tkrplot

## Loading required package: tcltk

## Loading required package: shapefiles

## Warning: package 'shapefiles' was built under R version 3.3.3

## Loading required package: foreign

## 
## Attaching package: 'shapefiles'

## The following objects are masked from 'package:foreign':
## 
##     read.dbf, write.dbf

## Loading required package: sp

## Warning: package 'sp' was built under R version 3.3.3

## 
## ************************************************
## ************************************************
## THE PACKAGE adehabitat IS NOW DEPRECATED!!!!!!!
##  It is dangerous to use it, as bugs will no longer be corrected.
## It is now recommended to use the packages adehabitatMA, adehabitatLT, adehabitatHR, and adehabitatHS.
## These 4 packages are the future of adehabitat.
##  They have a vignette explaining in detail how they can be used.
## They implement more methods than adehabitat
## They are based on the more common and more clever spatial classes implemented in sp.
## Bugs are corrected frequently.
## Really, avoid to use the classical adehabitat, unless you have a very good reason for it.
## 
## *****THIS IS THE VERY LAST WARNING*****
##  This is the last version of adehabitat submitted to CRAN (at the time of writing: 2015-03-27).
##  THE NEXT VERSION OF adehabitat WILL JUST BE A VIRTUAL PACKAGE LOADING ALL THE PACKAGES DESCRIBED ABOVE.

## 
## Attaching package: 'adehabitat'

## The following object is masked from 'package:zoo':
## 
##     is.regular

wawotest(Timeline$DAs)

##          a         ea         va         za          p 
## 109.306108  -1.000000 149.880596   9.010043   0.000000

wawotest(Timeline$DA)

##          a         ea         va         za          p 
## 107.161307  -1.000000 149.260260   8.853191   0.000000

wawotest(Timeline$Dislexia)

##         a        ea        va        za         p 
## 130.02448  -1.00000 148.68584  10.74528   0.00000

wawotest(Timeline$FE)

##         a        ea        va        za         p 
## 136.88907  -1.00000 150.70706  11.23215   0.00000

wawotest(Timeline$DE)

##             a            ea            va            za             p 
##  5.613169e+01 -1.000000e+00  1.461242e+02  4.726243e+00  1.143558e-06

wawotest(Timeline$PA)

##          a         ea         va         za          p 
## 118.602500  -1.000000 147.777092   9.838677   0.000000

wawotest(Timeline$Discalculia)

##             a            ea            va            za             p 
##  6.440443e+01 -1.000000e+00  1.499614e+02  5.340937e+00  4.623381e-08

wawotest(Timeline$Disgrafia)

##             a            ea            va            za             p 
##  8.985648e+01 -1.000000e+00  1.422142e+02  7.618762e+00  1.276756e-14

wawotest(Timeline$TA)

##             a            ea            va            za             p 
##  9.273653e+01 -1.000000e+00  1.503839e+02  7.643781e+00  1.054712e-14

#teste de Mann-Kendall para tendencia, avalia se a serie é serialmente independente e igualmente distribuidas
library(Kendall)

## Warning: package 'Kendall' was built under R version 3.3.3

MannKendall(Timeline$DAs)

## tau = -0.573, 2-sided pvalue =< 2.22e-16

MannKendall(Timeline$DA)

## tau = -0.494, 2-sided pvalue =< 2.22e-16

MannKendall(Timeline$Dislexia)

## tau = -0.539, 2-sided pvalue =< 2.22e-16

MannKendall(Timeline$FE)

## tau = -0.73, 2-sided pvalue =< 2.22e-16

MannKendall(Timeline$DE)

## tau = -0.335, 2-sided pvalue =3.4537e-09

MannKendall(Timeline$PA)

## tau = -0.6, 2-sided pvalue =< 2.22e-16

MannKendall(Timeline$Discalculia)

## tau = -0.238, 2-sided pvalue =9.1823e-05

MannKendall(Timeline$Disgrafia)

## tau = -0.53, 2-sided pvalue =< 2.22e-16

MannKendall(Timeline$TA)

## tau = -0.297, 2-sided pvalue =2.2074e-06

#últimos cinco anos
cox.stuart.test(Timeline_5anos$DAs)

## 
##  Cox Stuart test
## 
## data:  Timeline_5anos$DAs
## statistic = 7, n = 27, p-value = 0.01916
## alternative hypothesis: non randomness

cox.stuart.test(Timeline_5anos$DA)

## 
##  Cox Stuart test
## 
## data:  Timeline_5anos$DA
## statistic = 15, n = 27, p-value = 0.7011
## alternative hypothesis: non randomness

cox.stuart.test(Timeline_5anos$Dislexia)

## 
##  Cox Stuart test
## 
## data:  Timeline_5anos$Dislexia
## statistic = 11, n = 27, p-value = 0.4421
## alternative hypothesis: non randomness

cox.stuart.test(Timeline_5anos$FE)

## 
##  Cox Stuart test
## 
## data:  Timeline_5anos$FE
## statistic = 7, n = 24, p-value = 0.06391
## alternative hypothesis: non randomness

cox.stuart.test(Timeline_5anos$DE)

## 
##  Cox Stuart test
## 
## data:  Timeline_5anos$DE
## statistic = 11, n = 24, p-value = 0.8388
## alternative hypothesis: non randomness

cox.stuart.test(Timeline_5anos$PA)

## 
##  Cox Stuart test
## 
## data:  Timeline_5anos$PA
## statistic = 8, n = 22, p-value = 0.2863
## alternative hypothesis: non randomness

cox.stuart.test(Timeline_5anos$Discalculia)

## 
##  Cox Stuart test
## 
## data:  Timeline_5anos$Discalculia
## statistic = 11, n = 14, p-value = 0.05737
## alternative hypothesis: non randomness

cox.stuart.test(Timeline_5anos$Disgrafia)

## 
##  Cox Stuart test
## 
## data:  Timeline_5anos$Disgrafia
## statistic = 6, n = 13, p-value = 1
## alternative hypothesis: non randomness

cox.stuart.test(Timeline_5anos$TA)

## 
##  Cox Stuart test
## 
## data:  Timeline_5anos$TA
## statistic = 14, n = 17, p-value = 0.01273
## alternative hypothesis: non randomness

Analise de series temporais, por mes ##borrowed from claudio ##https://gustibuseconomia.com/2014/03/26/momento-r-do-dia-furacoes-ou-uma-imagem-vale-mais-do-que-mil-palavras-mas-qual-imagem-e-esta-a-pergunta-de-um-milhao-de-imagens/

Timeline$mes = as.factor(c(rep(month.name, 13), "January"))
Timeline_5anos$mes = as.factor(c(rep(month.name, 5)))

#equivalencia dos dados com o mes
Timeline$ordem = as.factor(c(rep(1:12, 13), 1))
Timeline_5anos$ordem = as.factor(c(rep(1:12, 5)))

library(ggplot2)
library(lattice)
library(latticeExtra)

## Loading required package: RColorBrewer

## 
## Attaching package: 'latticeExtra'

## The following object is masked from 'package:ggplot2':
## 
##     layer

#If the notches of two plots do not overlap this is 'strong evidence' that the two #medians differ (Chambers et al, 1983, p. 62).

# os graficos por mes desde 2004
#Dislexia
op <- par(mfrow = c(1,2))
boxplot(Dislexia~ordem,data=Timeline)
monthplot(Timeline$Dislexia, col = "purple",ylim=c(min(Timeline$Dislexia),max(Timeline$Dislexia)),main="Dislexia",xlab="meses",ylab="media de frequencia")

par(op)

#Dificuldades de Aprendizagem
op <- par(mfrow = c(1,2))
boxplot(DAs~ordem,data=Timeline)
monthplot(Timeline$DAs, col="darkblue",ylim=c(min(Timeline$DAs),max(Timeline$DAs)),main="Dif. de  Aprendizagem",xlab="meses",ylab="media de frequencia")

par(op)
#Organizacao por mes
op <- par(mfrow = c(2,2))
monthplot(Timeline$Dislexia, xlab = "Dislexia", ylab = "", col="purple", cex.axis = 0.8)
monthplot(Timeline$DAs, xlab = "Dif.s de Aprendizagem", ylab = "", col="darkblue", cex.axis = 0.8)
monthplot(Timeline$Dislexia, ylab = "", type = "h",  col="purple", cex.axis = 0.8)
monthplot(Timeline$DAs, ylab = "", type = "h",  col="darkblue", cex.axis = 0.8)

par(op)

#Organizacao por mes nos utimos cinco anos

op <- par(mfrow = c(2,2))
monthplot(Timeline_5anos$Dislexia, xlab = "Dislexia", ylab = "", col="purple", cex.axis = 0.8)
monthplot(Timeline_5anos$DAs, xlab = "Dif.s de Aprendizagem", ylab = "", col="darkblue", cex.axis = 0.8)
monthplot(Timeline_5anos$Dislexia, ylab = "", type = "h",  col="purple", cex.axis = 0.8)
monthplot(Timeline_5anos$DAs, ylab = "", type = "h",  col="darkblue", cex.axis = 0.8)

par(op)

op <- par(mfrow = c(2,2))
boxplot(Timeline_5anos$Dislexia~Timeline_5anos$ordem, main="Dislexia", xlab="mes", ylab="frequencia", outline = F, col="purple")
boxplot(Timeline_5anos$DAs~Timeline_5anos$ordem, main="Dif.s de Aprendizagem", xlab="mes", ylab="frequencia", outline = F, col="darkblue")
par(op)

Analise de variancias, por mes ##borrowed from Salvatore S. Mangiafico ##http://rcompanion.org/rcompanion/d_06.html

#install these packages if they are not already installed: ("dplyr"); ("FSA"); ("DescTools"); ("rcompanion"), ("multcompView")

##Dificuldade de Aprendizagem
### Specify the order of factor levels

library(dplyr)

## Warning: package 'dplyr' was built under R version 3.3.3

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:adehabitat':
## 
##     id

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Timeline = mutate(Timeline_5anos,
       mes = factor(mes, levels=unique(mes)))
# Medians and descriptive statistics
library(FSA)

## Warning: package 'FSA' was built under R version 3.3.3

## ## FSA v0.8.13. See citation('FSA') if used in publication.
## ## Run fishR() for related website and fishR('IFAR') for related book.

Summarize(DAs ~ mes,data = Timeline_5anos)

##          mes n mean       sd min Q1 median Q3 max
## 1      April 5 27.2 3.701351  23 24     28 29  32
## 2     August 5 23.0 3.674235  18 21     23 26  27
## 3   December 5 16.4 2.073644  15 15     16 16  20
## 4   February 5 17.0 2.828427  14 14     18 19  20
## 5    January 5 12.8 3.768289   9 11     12 13  19
## 6       July 5 20.0 4.358899  15 17     19 24  25
## 7       June 5 25.6 3.781534  22 23     24 28  31
## 8      March 5 25.6 2.701851  23 24     25 26  30
## 9        May 5 27.2 4.764452  23 24     26 28  35
## 10  November 5 25.8 2.588436  23 24     25 28  29
## 11   October 5 22.4 3.507136  19 20     22 23  28
## 12 September 5 26.4 3.646917  22 24     26 29  31

# Histograms for each group
library(lattice)
histogram(~ DAs | ordem, data=Timeline_5anos, layout=c(12,1))

#Analise de variancia para dados nao-parametricos (p-value menos de 0,05, deve-se rejeitar a hipotese nula), interpreted as a difference in medians. applied to one-way data with more than two groups
kruskal.test(DAs ~ mes, data = Timeline_5anos)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  DAs by mes
## Kruskal-Wallis chi-squared = 37.858, df = 11, p-value = 8.269e-05

#TESTES POS HOC
#Nemenyi test for multiple comparisons, not appropriate for groups with unequal numbers of observations. Utilizando qui quadrado.
library(DescTools)

## Warning: package 'DescTools' was built under R version 3.3.3

## 
## Attaching package: 'DescTools'

## The following object is masked from 'package:forecast':
## 
##     BoxCox

## The following object is masked from 'package:car':
## 
##     Recode

NemenyiTest(x = Timeline_5anos$DAs, g = Timeline_5anos$mes, dist="tukey")

## 
##  Nemenyi's test of multiple comparisons for independent samples (tukey)  
## 
##                    mean.rank.diff   pval    
## August-April                -14.4 0.9789    
## December-April              -34.2 0.0831 .  
## February-April              -33.4 0.1016    
## January-April               -40.3 0.0140 *  
## July-April                  -22.9 0.6421    
## June-April                   -6.3 1.0000    
## March-April                  -4.3 1.0000    
## May-April                    -1.7 1.0000    
## November-April               -3.6 1.0000    
## October-April               -17.7 0.9087    
## September-April              -2.4 1.0000    
## December-August             -19.8 0.8225    
## February-August             -19.0 0.8593    
## January-August              -25.9 0.4443    
## July-August                  -8.5 0.9998    
## June-August                   8.1 0.9999    
## March-August                 10.1 0.9990    
## May-August                   12.7 0.9923    
## November-August              10.8 0.9981    
## October-August               -3.3 1.0000    
## September-August             12.0 0.9953    
## February-December             0.8 1.0000    
## January-December             -6.1 1.0000    
## July-December                11.3 0.9972    
## June-December                27.9 0.3234    
## March-December               29.9 0.2225    
## May-December                 32.5 0.1262    
## November-December            30.6 0.1927    
## October-December             16.5 0.9427    
## September-December           31.8 0.1483    
## January-February             -6.9 1.0000    
## July-February                10.5 0.9986    
## June-February                27.1 0.3698    
## March-February               29.1 0.2601    
## May-February                 31.7 0.1517    
## November-February            29.8 0.2270    
## October-February             15.7 0.9596    
## September-February           31.0 0.1769    
## July-January                 17.4 0.9182    
## June-January                 34.0 0.0874 .  
## March-January                36.0 0.0514 .  
## May-January                  38.6 0.0240 *  
## November-January             36.7 0.0422 *  
## October-January              22.6 0.6615    
## September-January            37.9 0.0297 *  
## June-July                    16.6 0.9402    
## March-July                   18.6 0.8759    
## May-July                     21.2 0.7473    
## November-July                19.3 0.8461    
## October-July                  5.2 1.0000    
## September-July               20.5 0.7865    
## March-June                    2.0 1.0000    
## May-June                      4.6 1.0000    
## November-June                 2.7 1.0000    
## October-June                -11.4 0.9970    
## September-June                3.9 1.0000    
## May-March                     2.6 1.0000    
## November-March                0.7 1.0000    
## October-March               -13.4 0.9881    
## September-March               1.9 1.0000    
## November-May                 -1.9 1.0000    
## October-May                 -16.0 0.9538    
## September-May                -0.7 1.0000    
## October-November            -14.1 0.9821    
## September-November            1.2 1.0000    
## September-October            15.3 0.9666    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

require(PMCMR)

## Loading required package: PMCMR

## Warning: package 'PMCMR' was built under R version 3.3.3

posthoc.kruskal.nemenyi.test(x = Timeline_5anos$DAs, g = Timeline_5anos$mes, dist="Chisquare")

## Warning in posthoc.kruskal.nemenyi.test.default(x = Timeline_5anos$DAs, :
## Ties are present. Chi-sq was corrected for ties.

## 
##  Pairwise comparisons using Nemenyi-test with Chi-squared    
##                        approximation for independent samples 
## 
## data:  Timeline_5anos$DAs and Timeline_5anos$mes 
## 
##           April August December February January July June March May 
## August    1.00  -      -        -        -       -    -    -     -   
## December  0.56  0.99   -        -        -       -    -    -     -   
## February  0.60  0.99   1.00     -        -       -    -    -     -   
## January   0.27  0.90   1.00     1.00     -       -    -    -     -   
## July      0.96  1.00   1.00     1.00     1.00    -    -    -     -   
## June      1.00  1.00   0.84     0.87     0.57    1.00 -    -     -   
## March     1.00  1.00   0.77     0.80     0.47    0.99 1.00 -     -   
## May       1.00  1.00   0.65     0.69     0.34    0.98 1.00 1.00  -   
## November  1.00  1.00   0.74     0.77     0.44    0.99 1.00 1.00  1.00
## October   1.00  1.00   1.00     1.00     0.96    1.00 1.00 1.00  1.00
## September 1.00  1.00   0.68     0.72     0.38    0.98 1.00 1.00  1.00
##           November October
## August    -        -      
## December  -        -      
## February  -        -      
## January   -        -      
## July      -        -      
## June      -        -      
## March     -        -      
## May       -        -      
## November  -        -      
## October   1.00     -      
## September 1.00     1.00   
## 
## P value adjustment method: none

##Dunn test for multiple comparisons; If the Kruskal-Wallis test is significant, a post-hoc analysis can be performed to determine which levels of the independent variable differ from each other level. Dunn's Kruskal-Wallis Multiple Comparisons
library(FSA)
dunnTest(DAs ~ mes, data=Timeline_5anos)

## Dunn (1964) Kruskal-Wallis multiple comparison

##   p-values adjusted with the Holm method.

##              Comparison           Z      P.unadj      P.adj
## 1        April - August  1.30691423 0.1912418440 1.00000000
## 2      April - December  3.10392129 0.0019097413 0.11649422
## 3     August - December  1.79700706 0.0723344988 1.00000000
## 4      April - February  3.03131494 0.0024349113 0.14365977
## 5     August - February  1.72440072 0.0846355286 1.00000000
## 6   December - February -0.07260635 0.9421193768 1.00000000
## 7       April - January  3.65754467 0.0002546429 0.01680643
## 8      August - January  2.35063045 0.0187416370 0.89959858
## 9    December - January  0.55362339 0.5798366168 1.00000000
## 10   February - January  0.62622973 0.5311642674 1.00000000
## 11         April - July  2.07835665 0.0376765222 1.00000000
## 12        August - July  0.77144243 0.4404447365 1.00000000
## 13      December - July -1.02556464 0.3050968439 1.00000000
## 14      February - July -0.95295829 0.3406112043 1.00000000
## 15       January - July -1.57918802 0.1142929362 1.00000000
## 16         April - June  0.57177497 0.5674744339 1.00000000
## 17        August - June -0.73513925 0.4622546910 1.00000000
## 18      December - June -2.53214631 0.0113366675 0.56683338
## 19      February - June -2.45953997 0.0139115207 0.68166451
## 20       January - June -3.08576970 0.0020302601 0.12181560
## 21          July - June -1.50658168 0.1319179168 1.00000000
## 22        April - March  0.39025911 0.6963449569 1.00000000
## 23       August - March -0.91665512 0.3593233924 1.00000000
## 24     December - March -2.71366218 0.0066543987 0.35268313
## 25     February - March -2.64105583 0.0082648098 0.42150530
## 26      January - March -3.26728557 0.0010858409 0.06732214
## 27         July - March -1.68809754 0.0913925062 1.00000000
## 28         June - March -0.18151586 0.8559626817 1.00000000
## 29          April - May  0.15428849 0.8773822762 1.00000000
## 30         August - May -1.15262574 0.2490640330 1.00000000
## 31       December - May -2.94963280 0.0031815181 0.18452805
## 32       February - May -2.87702646 0.0040144194 0.22480748
## 33        January - May -3.50325619 0.0004596072 0.02987447
## 34           July - May -1.92406817 0.0543460397 1.00000000
## 35           June - May -0.41748649 0.6763226048 1.00000000
## 36          March - May -0.23597062 0.8134554651 1.00000000
## 37     April - November  0.32672856 0.7438731996 1.00000000
## 38    August - November -0.98018567 0.3269944769 1.00000000
## 39  December - November -2.77719273 0.0054830656 0.29608554
## 40  February - November -2.70458638 0.0068389485 0.35562532
## 41   January - November -3.33081612 0.0008659178 0.05455282
## 42      July - November -1.75162809 0.0798377784 1.00000000
## 43      June - November -0.24504642 0.8064204908 1.00000000
## 44     March - November -0.06353055 0.9493440309 1.00000000
## 45       May - November  0.17244007 0.8630915715 1.00000000
## 46      April - October  1.60641540 0.1081826688 1.00000000
## 47     August - October  0.29950118 0.7645576742 1.00000000
## 48   December - October -1.49750588 0.1342616756 1.00000000
## 49   February - October -1.42489954 0.1541862431 1.00000000
## 50    January - October -2.05112927 0.0402543594 1.00000000
## 51       July - October -0.47194125 0.6369687248 1.00000000
## 52       June - October  1.03464043 0.3008368658 1.00000000
## 53      March - October  1.21615629 0.2239253894 1.00000000
## 54        May - October  1.45212692 0.1464663185 1.00000000
## 55   November - October  1.27968685 0.2006552924 1.00000000
## 56    April - September  0.21781904 0.8275701107 1.00000000
## 57   August - September -1.08909519 0.2761119115 1.00000000
## 58 December - September -2.88610225 0.0039004542 0.22232589
## 59 February - September -2.81349590 0.0049006011 0.26953306
## 60  January - September -3.43972564 0.0005823042 0.03726747
## 61     July - September -1.86053761 0.0628095020 1.00000000
## 62     June - September -0.35395594 0.7233719059 1.00000000
## 63    March - September -0.17244007 0.8630915715 1.00000000
## 64      May - September  0.06353055 0.9493440309 0.94934403
## 65 November - September -0.10890952 0.9132742566 1.00000000
## 66  October - September -1.38859637 0.1649555232 1.00000000

require(PMCMR)
posthoc.kruskal.dunn.test(DAs ~ mes, data=Timeline_5anos, p.adjust.method="bonferroni")

## Warning in posthoc.kruskal.dunn.test.default(c(19L, 19L, 30L, 29L, 35L, :
## Ties are present. z-quantiles were corrected for ties.

## 
##  Pairwise comparisons using Dunn's-test for multiple 
##                          comparisons of independent samples 
## 
## data:  DAs by mes 
## 
##           April August December February January July  June  March May  
## August    1.000 -      -        -        -       -     -     -     -    
## December  0.126 1.000  -        -        -       -     -     -     -    
## February  0.161 1.000  1.000    -        -       -     -     -     -    
## January   0.017 1.000  1.000    1.000    -       -     -     -     -    
## July      1.000 1.000  1.000    1.000    1.000   -     -     -     -    
## June      1.000 1.000  0.748    0.918    0.134   1.000 -     -     -    
## March     1.000 1.000  0.439    0.545    0.072   1.000 1.000 -     -    
## May       1.000 1.000  0.210    0.265    0.030   1.000 1.000 1.000 -    
## November  1.000 1.000  0.362    0.451    0.057   1.000 1.000 1.000 1.000
## October   1.000 1.000  1.000    1.000    1.000   1.000 1.000 1.000 1.000
## September 1.000 1.000  0.257    0.323    0.038   1.000 1.000 1.000 1.000
##           November October
## August    -        -      
## December  -        -      
## February  -        -      
## January   -        -      
## July      -        -      
## June      -        -      
## March     -        -      
## May       -        -      
## November  -        -      
## October   1.000    -      
## September 1.000    1.000  
## 
## P value adjustment method: bonferroni

posthoc.kruskal.dunn.test(DAs ~ mes, data=Timeline_5anos, p.adjust.method="none")

## Warning in posthoc.kruskal.dunn.test.default(c(19L, 19L, 30L, 29L, 35L, :
## Ties are present. z-quantiles were corrected for ties.

## 
##  Pairwise comparisons using Dunn's-test for multiple 
##                          comparisons of independent samples 
## 
## data:  DAs by mes 
## 
##           April   August  December February January July    June   
## August    0.19124 -       -        -        -       -       -      
## December  0.00191 0.07233 -        -        -       -       -      
## February  0.00243 0.08464 0.94212  -        -       -       -      
## January   0.00025 0.01874 0.57984  0.53116  -       -       -      
## July      0.03768 0.44044 0.30510  0.34061  0.11429 -       -      
## June      0.56747 0.46225 0.01134  0.01391  0.00203 0.13192 -      
## March     0.69634 0.35932 0.00665  0.00826  0.00109 0.09139 0.85596
## May       0.87738 0.24906 0.00318  0.00401  0.00046 0.05435 0.67632
## November  0.74387 0.32699 0.00548  0.00684  0.00087 0.07984 0.80642
## October   0.10818 0.76456 0.13426  0.15419  0.04025 0.63697 0.30084
## September 0.82757 0.27611 0.00390  0.00490  0.00058 0.06281 0.72337
##           March   May     November October
## August    -       -       -        -      
## December  -       -       -        -      
## February  -       -       -        -      
## January   -       -       -        -      
## July      -       -       -        -      
## June      -       -       -        -      
## March     -       -       -        -      
## May       0.81346 -       -        -      
## November  0.94934 0.86309 -        -      
## October   0.22393 0.14647 0.20066  -      
## September 0.86309 0.94934 0.91327  0.16496
## 
## P value adjustment method: none

#Teste de normalidade


library(stats)
#Teste de Bartlett com a hipótese nula de que as variâncias dos grupos são iguais (p > 0,05).
bartlett.test(DAs~ordem, Timeline)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  DAs by ordem
## Bartlett's K-squared = 4.1341, df = 11, p-value = 0.9658

library(car)
#Teste da homogeneidade das variancias (a homogeneidade sera atingida com p > 0,05, hipótese de igualdade de variâncias)
leveneTest(DAs~ordem, Timeline)

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group 11  0.3663  0.963
##       48

##Dislexia
### Specify the order of factor levels

library(dplyr)

Timeline = mutate(Timeline_5anos,
       mes = factor(mes, levels=unique(mes)))
# Medians and descriptive statistics
library(FSA)
Summarize(Dislexia ~ mes,data = Timeline_5anos)

##          mes n mean       sd min Q1 median Q3 max
## 1      April 5 25.6 5.856620  20 22     24 27  35
## 2     August 5 20.8 2.588436  18 19     20 23  24
## 3   December 5 16.0 2.000000  14 15     15 17  19
## 4   February 5 19.0 1.870829  17 17     20 20  21
## 5    January 5 16.6 3.781534  13 14     15 19  22
## 6       July 5 19.2 2.774887  16 17     19 22  22
## 7       June 5 22.2 3.962323  17 21     22 23  28
## 8      March 5 23.2 3.271085  19 22     22 26  27
## 9        May 5 23.0 1.224745  21 23     23 24  24
## 10  November 5 22.6 3.781534  19 20     21 25  28
## 11   October 5 21.2 2.588436  18 20     21 22  25
## 12 September 5 24.0 5.338539  18 21     22 28  31

# Histograms for each group
library(lattice)
histogram(~ Dislexia | ordem, data=Timeline_5anos, layout=c(12,1))

#Analise de variancia para dados nao-parametricos (p-value menos de 0,05, deve-se rejeitar a hipotese nula).Kruskal-Wallis test be interpreted as a difference in medians. applied to one-way data with more than two groups
kruskal.test(Dislexia ~ mes, data = Timeline_5anos)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Dislexia by mes
## Kruskal-Wallis chi-squared = 26.593, df = 11, p-value = 0.005292

#TESTES POS HOC
#Nemenyi test for multiple comparisons, not appropriate for groups with unequal numbers of observations. Utilizando qui quadrado.
library(DescTools)
NemenyiTest(x = Timeline_5anos$Dislexia, g = Timeline_5anos$mes, dist="chisq")

## 
##  Nemenyi's test of multiple comparisons for independent samples (chisq)  
## 
##                    mean.rank.diff   pval    
## August-April                -15.1 0.9989    
## December-April              -36.9 0.4371    
## February-April              -25.2 0.9229    
## January-April               -32.1 0.6789    
## July-April                  -22.8 0.9628    
## June-April                   -9.1 1.0000    
## March-April                  -4.7 1.0000    
## May-April                    -1.8 1.0000    
## November-April               -8.7 1.0000    
## October-April               -13.4 0.9997    
## September-April              -5.4 1.0000    
## December-August             -21.8 0.9737    
## February-August             -10.1 1.0000    
## January-August              -17.0 0.9968    
## July-August                  -7.7 1.0000    
## June-August                   6.0 1.0000    
## March-August                 10.4 1.0000    
## May-August                   13.3 0.9997    
## November-August               6.4 1.0000    
## October-August                1.7 1.0000    
## September-August              9.7 1.0000    
## February-December            11.7 0.9999    
## January-December              4.8 1.0000    
## July-December                14.1 0.9995    
## June-December                27.8 0.8537    
## March-December               32.2 0.6741    
## May-December                 35.1 0.5286    
## November-December            28.2 0.8404    
## October-December             23.5 0.9533    
## September-December           31.5 0.7070    
## January-February             -6.9 1.0000    
## July-February                 2.4 1.0000    
## June-February                16.1 0.9981    
## March-February               20.5 0.9840    
## May-February                 23.4 0.9547    
## November-February            16.5 0.9976    
## October-February             11.8 0.9999    
## September-February           19.8 0.9880    
## July-January                  9.3 1.0000    
## June-January                 23.0 0.9602    
## March-January                27.4 0.8662    
## May-January                  30.3 0.7601    
## November-January             23.4 0.9547    
## October-January              18.7 0.9926    
## September-January            26.7 0.8865    
## June-July                    13.7 0.9996    
## March-July                   18.1 0.9945    
## May-July                     21.0 0.9805    
## November-July                14.1 0.9995    
## October-July                  9.4 1.0000    
## September-July               17.4 0.9961    
## March-June                    4.4 1.0000    
## May-June                      7.3 1.0000    
## November-June                 0.4 1.0000    
## October-June                 -4.3 1.0000    
## September-June                3.7 1.0000    
## May-March                     2.9 1.0000    
## November-March               -4.0 1.0000    
## October-March                -8.7 1.0000    
## September-March              -0.7 1.0000    
## November-May                 -6.9 1.0000    
## October-May                 -11.6 0.9999    
## September-May                -3.6 1.0000    
## October-November             -4.7 1.0000    
## September-November            3.3 1.0000    
## September-October             8.0 1.0000    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##Dunn test for multiple comparisons; If the Kruskal-Wallis test is significant, a post-hoc analysis can be performed to determine which levels of the independent variable differ from each other level. Dunn's Kruskal-Wallis Multiple Comparisons
library(FSA)
dunnTest(Dislexia ~ mes, data=Timeline_5anos)

## Dunn (1964) Kruskal-Wallis multiple comparison

##   p-values adjusted with the Holm method.

##              Comparison         Z      P.unadj      P.adj
## 1        April - August  1.372439 0.1699268364 1.00000000
## 2      April - December  3.353841 0.0007969819 0.05260081
## 3     August - December  1.981402 0.0475462202 1.00000000
## 4      April - February  2.290428 0.0219965242 1.00000000
## 5     August - February  0.917989 0.3586246475 1.00000000
## 6   December - February -1.063413 0.2875947227 1.00000000
## 7       April - January  2.917569 0.0035277174 0.22224620
## 8      August - January  1.545130 0.1223148395 1.00000000
## 9    December - January -0.436272 0.6626394094 1.00000000
## 10   February - January  0.627141 0.5305668331 1.00000000
## 11         April - July  2.072292 0.0382382272 1.00000000
## 12        August - July  0.699853 0.4840191325 1.00000000
## 13      December - July -1.281549 0.2000009218 1.00000000
## 14      February - July -0.218136 0.8273231568 1.00000000
## 15       January - July -0.845277 0.3979562218 1.00000000
## 16         April - June  0.827099 0.4081809731 1.00000000
## 17        August - June -0.545340 0.5855197159 1.00000000
## 18      December - June -2.526742 0.0115126081 0.67924388
## 19      February - June -1.463329 0.1433773950 1.00000000
## 20       January - June -2.090470 0.0365756094 1.00000000
## 21          July - June -1.245193 0.2130608367 1.00000000
## 22        April - March  0.427183 0.6692460572 1.00000000
## 23       August - March -0.945256 0.3445282187 1.00000000
## 24     December - March -2.926658 0.0034262557 0.21928037
## 25     February - March -1.863245 0.0624278167 1.00000000
## 26      January - March -2.490386 0.0127604467 0.74010591
## 27         July - March -1.645109 0.0999473510 1.00000000
## 28         June - March -0.399916 0.6892184011 1.00000000
## 29          April - May  0.163602 0.8700444762 1.00000000
## 30         August - May -1.208837 0.2267254956 1.00000000
## 31       December - May -3.190239 0.0014215525 0.09240091
## 32       February - May -2.126826 0.0334345515 1.00000000
## 33        January - May -2.753967 0.0058877728 0.35915414
## 34           July - May -1.908690 0.0563021056 1.00000000
## 35           June - May -0.663497 0.5070123203 1.00000000
## 36          March - May -0.263581 0.7921028103 1.00000000
## 37     April - November  0.790743 0.4290939999 1.00000000
## 38    August - November -0.581696 0.5607714848 1.00000000
## 39  December - November -2.563098 0.0103742797 0.62245678
## 40  February - November -1.499685 0.1336960363 1.00000000
## 41   January - November -2.126826 0.0334345515 1.00000000
## 42      July - November -1.281549 0.2000009218 1.00000000
## 43      June - November -0.036356 0.9709984992 0.97099850
## 44     March - November  0.363560 0.7161866137 1.00000000
## 45       May - November  0.627141 0.5305668331 1.00000000
## 46      April - October  1.217926 0.2232521168 1.00000000
## 47     August - October -0.154513 0.8772052676 1.00000000
## 48   December - October -2.135915 0.0326863510 1.00000000
## 49   February - October -1.072502 0.2834946389 1.00000000
## 50    January - October -1.699643 0.0891981121 1.00000000
## 51       July - October -0.854366 0.3929022508 1.00000000
## 52       June - October  0.390827 0.6959251297 1.00000000
## 53      March - October  0.790743 0.4290939999 1.00000000
## 54        May - October  1.054324 0.2917346272 1.00000000
## 55   November - October  0.427183 0.6692460572 1.00000000
## 56    April - September  0.490806 0.6235636824 1.00000000
## 57   August - September -0.881633 0.3779753261 1.00000000
## 58 December - September -2.863035 0.0041960439 0.26015472
## 59 February - September -1.799622 0.0719203583 1.00000000
## 60  January - September -2.426763 0.0152342089 0.86834991
## 61     July - September -1.581486 0.1137669746 1.00000000
## 62     June - September -0.336293 0.7366499400 1.00000000
## 63    March - September  0.063623 0.9492704199 1.00000000
## 64      May - September  0.327204 0.7435136074 1.00000000
## 65 November - September -0.299937 0.7642252218 1.00000000
## 66  October - September -0.727120 0.4671524689 1.00000000

require(PMCMR)
posthoc.kruskal.nemenyi.test(x = Timeline_5anos$Dislexia, g = Timeline_5anos$mes, dist="Chisquare")

## Warning in posthoc.kruskal.nemenyi.test.default(x = Timeline_5anos
## $Dislexia, : Ties are present. Chi-sq was corrected for ties.

## 
##  Pairwise comparisons using Nemenyi-test with Chi-squared    
##                        approximation for independent samples 
## 
## data:  Timeline_5anos$Dislexia and Timeline_5anos$mes 
## 
##           April August December February January July June March May 
## August    1.00  -      -        -        -       -    -    -     -   
## December  0.42  0.97   -        -        -       -    -    -     -   
## February  0.92  1.00   1.00     -        -       -    -    -     -   
## January   0.67  1.00   1.00     1.00     -       -    -    -     -   
## July      0.96  1.00   1.00     1.00     1.00    -    -    -     -   
## June      1.00  1.00   0.85     1.00     0.96    1.00 -    -     -   
## March     1.00  1.00   0.66     0.98     0.86    0.99 1.00 -     -   
## May       1.00  1.00   0.51     0.95     0.75    0.98 1.00 1.00  -   
## November  1.00  1.00   0.83     1.00     0.95    1.00 1.00 1.00  1.00
## October   1.00  1.00   0.95     1.00     0.99    1.00 1.00 1.00  1.00
## September 1.00  1.00   0.70     0.99     0.88    1.00 1.00 1.00  1.00
##           November October
## August    -        -      
## December  -        -      
## February  -        -      
## January   -        -      
## July      -        -      
## June      -        -      
## March     -        -      
## May       -        -      
## November  -        -      
## October   1.00     -      
## September 1.00     1.00   
## 
## P value adjustment method: none

posthoc.kruskal.dunn.test(Dislexia ~ mes, data=Timeline_5anos, p.adjust.method="bonferroni")

## Warning in posthoc.kruskal.dunn.test.default(c(22L, 20L, 27L, 27L, 24L, :
## Ties are present. z-quantiles were corrected for ties.

## 
##  Pairwise comparisons using Dunn's-test for multiple 
##                          comparisons of independent samples 
## 
## data:  Dislexia by mes 
## 
##           April August December February January July  June  March May  
## August    1.000 -      -        -        -       -     -     -     -    
## December  0.053 1.000  -        -        -       -     -     -     -    
## February  1.000 1.000  1.000    -        -       -     -     -     -    
## January   0.233 1.000  1.000    1.000    -       -     -     -     -    
## July      1.000 1.000  1.000    1.000    1.000   -     -     -     -    
## June      1.000 1.000  0.760    1.000    1.000   1.000 -     -     -    
## March     1.000 1.000  0.226    1.000    0.842   1.000 1.000 -     -    
## May       1.000 1.000  0.094    1.000    0.389   1.000 1.000 1.000 -    
## November  1.000 1.000  0.685    1.000    1.000   1.000 1.000 1.000 1.000
## October   1.000 1.000  1.000    1.000    1.000   1.000 1.000 1.000 1.000
## September 1.000 1.000  0.277    1.000    1.000   1.000 1.000 1.000 1.000
##           November October
## August    -        -      
## December  -        -      
## February  -        -      
## January   -        -      
## July      -        -      
## June      -        -      
## March     -        -      
## May       -        -      
## November  -        -      
## October   1.000    -      
## September 1.000    1.000  
## 
## P value adjustment method: bonferroni

posthoc.kruskal.dunn.test(Dislexia ~ mes, data=Timeline_5anos, p.adjust.method="none")

## Warning in posthoc.kruskal.dunn.test.default(c(22L, 20L, 27L, 27L, 24L, :
## Ties are present. z-quantiles were corrected for ties.

## 
##  Pairwise comparisons using Dunn's-test for multiple 
##                          comparisons of independent samples 
## 
## data:  Dislexia by mes 
## 
##           April  August December February January July   June   March 
## August    0.1699 -      -        -        -       -      -      -     
## December  0.0008 0.0475 -        -        -       -      -      -     
## February  0.0220 0.3586 0.2876   -        -       -      -      -     
## January   0.0035 0.1223 0.6626   0.5306   -       -      -      -     
## July      0.0382 0.4840 0.2000   0.8273   0.3980  -      -      -     
## June      0.4082 0.5855 0.0115   0.1434   0.0366  0.2131 -      -     
## March     0.6692 0.3445 0.0034   0.0624   0.0128  0.0999 0.6892 -     
## May       0.8700 0.2267 0.0014   0.0334   0.0059  0.0563 0.5070 0.7921
## November  0.4291 0.5608 0.0104   0.1337   0.0334  0.2000 0.9710 0.7162
## October   0.2233 0.8772 0.0327   0.2835   0.0892  0.3929 0.6959 0.4291
## September 0.6236 0.3780 0.0042   0.0719   0.0152  0.1138 0.7366 0.9493
##           May    November October
## August    -      -        -      
## December  -      -        -      
## February  -      -        -      
## January   -      -        -      
## July      -      -        -      
## June      -      -        -      
## March     -      -        -      
## May       -      -        -      
## November  0.5306 -        -      
## October   0.2917 0.6692   -      
## September 0.7435 0.7642   0.4672 
## 
## P value adjustment method: none

#carregar pacote para rodar a funcao do teste para homogeneidade das variancias
#Caso nao tenho instalado: install.packages("car")
library(stats)
#Teste de Bartlett com a hipótese nula de que as variâncias dos grupos são
#iguais.
bartlett.test(Dislexia~ordem, Timeline)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  Dislexia by ordem
## Bartlett's K-squared = 14.382, df = 11, p-value = 0.2126

library(car)
#Teste da homogeneidade das variancias (a homogeneidade sera atingida com valores de p acima de 0,05)
leveneTest(Dislexia~ordem, Timeline)

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group 11  0.8041 0.6355
##       48

Descricao por estado

GeoMap <-read.csv("C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados/GeoMap.csv", sep=",", dec=",",fill=TRUE, header=TRUE)
str(GeoMap)

## 'data.frame':    27 obs. of  21 variables:
##  $ X            : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Região       : Factor w/ 27 levels "Acre","Alagoas",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ DA_Shop      : int  NA 0 0 10 3 6 7 0 2 0 ...
##  $ Dislexia_Shop: int  NA 13 100 10 2 6 7 0 4 11 ...
##  $ Cartilha_Shop: int  NA 20 99 20 14 12 29 6 5 17 ...
##  $ Cartilha     : int  61 46 91 50 46 38 50 37 43 52 ...
##  $ DA           : int  NA 77 NA 58 70 64 40 49 49 100 ...
##  $ Dislexia     : int  59 47 100 54 49 51 46 48 49 55 ...
##  $ Pais         : int  78 81 98 77 81 77 74 74 70 82 ...
##  $ Pais1        : int  73 74 90 70 76 71 69 68 65 74 ...
##  $ Fam1         : int  26 29 44 29 27 27 27 23 22 30 ...
##  $ EF1          : int  24 28 32 18 29 23 17 27 19 30 ...
##  $ Crianca1     : int  83 82 100 72 80 80 85 74 73 81 ...
##  $ TA1          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ DA1          : int  0 23 0 15 21 20 11 14 14 28 ...
##  $ Dislexia1    : int  61 43 100 55 50 49 44 47 49 54 ...
##  $ Disgrafia1   : int  0 0 0 0 4 4 4 0 4 0 ...
##  $ Discalculia1 : int  0 4 0 4 4 4 3 4 3 6 ...
##  $ Cartilha_sug : int  80 52 95 58 52 44 57 43 49 59 ...
##  $ DA_sug       : int  NA 83 NA 53 75 71 41 51 51 100 ...
##  $ Pais_sug     : int  78 79 96 75 81 76 73 73 69 80 ...

tail(GeoMap)

##     X         Região DA_Shop Dislexia_Shop Cartilha_Shop Cartilha DA
## 22 22       Rond<U+0093>nia       0             0            54       52 NA
## 23 23        Roraima      NA            NA            NA      100 NA
## 24 24 Santa Catarina       3             0            11       32 43
## 25 25      SÆo Paulo       2             5            20       32 31
## 26 26        Sergipe       0            35            18       46 NA
## 27 27      Tocantins       0            30            30       65 NA
##    Dislexia Pais Pais1 Fam1 EF1 Crianca1 TA1 DA1 Dislexia1 Disgrafia1
## 22       55   77    71   21  24       77   0   0        55          0
## 23       77  100    94   34  33       95   0   0        79          0
## 24       48   77    71   19  27       63   0  13        48          3
## 25       46   71    64   18  19       65   1   9        45          2
## 26       46   78    72   27  22       77   0   0        42          0
## 27       41   81    72   24  34       81   0   0        41          0
##    Discalculia1 Cartilha_sug DA_sug Pais_sug
## 22            0           63     NA       76
## 23            0          100     NA      100
## 24            3           36     48       75
## 25            3           37     33       69
## 26            0           54     NA       77
## 27            0           75     NA       77

summary(GeoMap)

##        X             Região      DA_Shop      Dislexia_Shop   
##  Min.   : 1.0   Acre    : 1   Min.   : 0.00   Min.   :  0.00  
##  1st Qu.: 7.5   Alagoas : 1   1st Qu.: 0.00   1st Qu.:  4.00  
##  Median :14.0   Amap    : 1   Median : 2.00   Median :  7.00  
##  Mean   :14.0   Amazonas: 1   Mean   : 2.56   Mean   : 12.32  
##  3rd Qu.:20.5   Bahia   : 1   3rd Qu.: 3.00   3rd Qu.: 11.00  
##  Max.   :27.0   Cear    : 1   Max.   :10.00   Max.   :100.00  
##                 (Other) :21   NA's   :2       NA's   :2       
##  Cartilha_Shop      Cartilha            DA            Dislexia     
##  Min.   : 5.00   Min.   : 29.00   Min.   : 28.00   Min.   : 41.00  
##  1st Qu.: 9.00   1st Qu.: 37.50   1st Qu.: 42.50   1st Qu.: 46.00  
##  Median :17.00   Median : 46.00   Median : 55.00   Median : 49.00  
##  Mean   :20.84   Mean   : 48.63   Mean   : 56.53   Mean   : 52.11  
##  3rd Qu.:20.00   3rd Qu.: 52.50   3rd Qu.: 68.00   3rd Qu.: 54.00  
##  Max.   :99.00   Max.   :100.00   Max.   :100.00   Max.   :100.00  
##  NA's   :2                        NA's   :8                        
##       Pais            Pais1            Fam1            EF1       
##  Min.   : 67.00   Min.   :62.00   Min.   :18.00   Min.   :17.00  
##  1st Qu.: 72.00   1st Qu.:67.00   1st Qu.:21.50   1st Qu.:20.50  
##  Median : 75.00   Median :69.00   Median :26.00   Median :25.00  
##  Mean   : 76.89   Mean   :70.74   Mean   :25.22   Mean   :25.04  
##  3rd Qu.: 78.00   3rd Qu.:72.00   3rd Qu.:27.00   3rd Qu.:29.00  
##  Max.   :100.00   Max.   :94.00   Max.   :44.00   Max.   :34.00  
##                                                                  
##     Crianca1           TA1               DA1          Dislexia1     
##  Min.   : 60.00   Min.   :0.00000   Min.   : 0.00   Min.   : 41.00  
##  1st Qu.: 71.50   1st Qu.:0.00000   1st Qu.: 0.00   1st Qu.: 46.00  
##  Median : 77.00   Median :0.00000   Median :13.00   Median : 49.00  
##  Mean   : 76.11   Mean   :0.03704   Mean   :11.48   Mean   : 52.07  
##  3rd Qu.: 80.50   3rd Qu.:0.00000   3rd Qu.:17.50   3rd Qu.: 54.50  
##  Max.   :100.00   Max.   :1.00000   Max.   :28.00   Max.   :100.00  
##                                                                     
##    Disgrafia1     Discalculia1    Cartilha_sug        DA_sug      
##  Min.   :0.000   Min.   :0.000   Min.   : 32.00   Min.   : 30.00  
##  1st Qu.:0.000   1st Qu.:1.500   1st Qu.: 43.50   1st Qu.: 46.00  
##  Median :0.000   Median :4.000   Median : 52.00   Median : 53.00  
##  Mean   :1.593   Mean   :2.926   Mean   : 55.33   Mean   : 59.05  
##  3rd Qu.:3.500   3rd Qu.:4.000   3rd Qu.: 63.00   3rd Qu.: 73.00  
##  Max.   :5.000   Max.   :6.000   Max.   :100.00   Max.   :100.00  
##                                                   NA's   :8       
##     Pais_sug     
##  Min.   : 67.00  
##  1st Qu.: 71.50  
##  Median : 74.00  
##  Mean   : 75.52  
##  3rd Qu.: 77.00  
##  Max.   :100.00  
##

Relacao Palavras x Regiao

GRafico de frequencia Comparativa - 2004 a 2017

plot(GeoMap$Dislexia1~GeoMap$Região , type = "l", lty=1, lwd=2, col="red", ylim=c(0,100), xlab="região", ylab="Frequencia relativa", main = "Frequencia relativa por regiao geografica, 2004 a 2017")
lines(GeoMap$Dislexia1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "red")
lines(GeoMap$DA1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "darkblue")
lines(GeoMap$Discalculia1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "green")
lines(GeoMap$Disgrafia1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "orange")
lines(GeoMap$TA1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "purple")

legend("topright", c("Dislexia", "Dif. de Aprendizagem", "Discalculia",  "Disgrafia",  "Transt. da Aprendizagem"), bty="n", lty=c(1),
       col=c("red","darkblue",  "green", "orange", "purple"), box.col="white",lwd=c(2,2), cex=0.9)

Analise de Palavras-Chave

Karina Marques

Junho de 2017