Analise de Palavras-Chave

setwd(“C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados”)

Timeline <-read.csv("C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados/timeline2.csv", sep=",", dec=",",fill=TRUE, header=TRUE)
str(Timeline)

## 'data.frame':    157 obs. of  14 variables:
##  $ X          : Factor w/ 157 levels "2004-01","2004-02",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Cartilha   : int  38 46 58 47 38 44 34 49 42 38 ...
##  $ DA         : int  49 49 19 54 51 59 42 44 76 58 ...
##  $ DAs        : int  37 61 100 69 62 63 63 61 46 85 ...
##  $ FE         : int  31 43 75 79 89 90 55 51 69 64 ...
##  $ PA         : int  12 18 25 49 39 35 25 17 36 30 ...
##  $ DE         : int  12 12 25 20 8 16 8 7 20 10 ...
##  $ TA         : int  3 3 3 3 2 2 2 3 3 3 ...
##  $ DA1        : int  10 16 26 18 16 16 16 16 12 22 ...
##  $ Dislexia   : int  35 26 33 40 82 72 57 49 65 61 ...
##  $ Disgrafia  : int  13 10 10 5 6 4 4 5 10 3 ...
##  $ Discalculia: int  3 3 3 4 5 3 2 2 3 4 ...
##  $ Tempo      : Factor w/ 157 levels "2004-01-30","2004-03-01",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ tempo      : Factor w/ 157 levels "2004-01-29 22:00:00",..: 1 2 3 4 5 6 7 8 9 10 ...

tail(Timeline)

##           X Cartilha DA DAs FE PA DE TA DA1 Dislexia Disgrafia Discalculia
## 152 2016-08       24 17  18  4  4  5  1   5       20         1           2
## 153 2016-09       25 19  22  8  6  5  1   6       31         2           2
## 154 2016-10       24 17  19  8  5  5  1   5       21         2           2
## 155 2016-11       23 22  23  8  6  7  1   6       20         2           3
## 156 2016-12       14 18  16  3  5  5  1   4       17         1           2
## 157 2017-01       15  9  11  3  3  2  0   3       14         1           1
##          Tempo               tempo
## 152 2016-08-30 2016-08-29 21:00:00
## 153 2016-09-30 2016-09-29 21:00:00
## 154 2016-10-30 2016-10-29 22:00:00
## 155 2016-11-30 2016-11-29 22:00:00
## 156 2016-12-30 2016-12-29 22:00:00
## 157 2017-01-30 2017-01-29 22:00:00

Timeline_5anos<-read.csv("C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados/Timeline_5anos.csv", sep=",", dec=",",fill=TRUE, header=TRUE)

summary(Timeline)

##        X          Cartilha            DA             DAs        
##  2004-01:  1   Min.   : 14.00   Min.   : 7.00   Min.   :  9.00  
##  2004-02:  1   1st Qu.: 24.00   1st Qu.:16.00   1st Qu.: 23.00  
##  2004-03:  1   Median : 28.00   Median :19.00   Median : 29.00  
##  2004-04:  1   Mean   : 29.56   Mean   :22.77   Mean   : 32.03  
##  2004-05:  1   3rd Qu.: 33.00   3rd Qu.:27.00   3rd Qu.: 38.00  
##  2004-06:  1   Max.   :100.00   Max.   :76.00   Max.   :100.00  
##  (Other):151                                                    
##        FE             PA              DE               TA        
##  Min.   : 3.0   Min.   : 2.00   Min.   : 2.000   Min.   :0.0000  
##  1st Qu.: 7.0   1st Qu.: 5.00   1st Qu.: 5.000   1st Qu.:0.0000  
##  Median :12.0   Median : 7.00   Median : 6.000   Median :1.0000  
##  Mean   :20.2   Mean   :10.11   Mean   : 6.739   Mean   :0.9045  
##  3rd Qu.:27.0   3rd Qu.:12.00   3rd Qu.: 8.000   3rd Qu.:1.0000  
##  Max.   :90.0   Max.   :49.00   Max.   :25.000   Max.   :4.0000  
##                                                                  
##       DA1            Dislexia        Disgrafia       Discalculia   
##  Min.   : 2.000   Min.   : 13.00   Min.   : 0.000   Min.   :0.000  
##  1st Qu.: 6.000   1st Qu.: 21.00   1st Qu.: 1.000   1st Qu.:1.000  
##  Median : 7.000   Median : 25.00   Median : 2.000   Median :2.000  
##  Mean   : 8.344   Mean   : 30.59   Mean   : 2.363   Mean   :2.089  
##  3rd Qu.:10.000   3rd Qu.: 35.00   3rd Qu.: 3.000   3rd Qu.:2.000  
##  Max.   :26.000   Max.   :100.00   Max.   :13.000   Max.   :6.000  
##                                                                    
##         Tempo                     tempo    
##  2004-01-30:  1   2004-01-29 22:00:00:  1  
##  2004-03-01:  1   2004-02-29 21:00:00:  1  
##  2004-03-30:  1   2004-03-29 21:00:00:  1  
##  2004-04-30:  1   2004-04-29 21:00:00:  1  
##  2004-05-30:  1   2004-05-29 21:00:00:  1  
##  2004-06-30:  1   2004-06-29 21:00:00:  1  
##  (Other)   :151   (Other)            :151

Select Study period (months 97:156 - 5 year exactly)

Timeline_5anos <- Timeline[97:156, ]
head(Timeline_5anos)

##           X Cartilha DA DAs FE PA DE TA DA1 Dislexia Disgrafia Discalculia
## 97  2012-01       15 10  19  5  3  3  0   5       22         1           1
## 98  2012-02       20 15  19  6  7  4  0   5       20         1           1
## 99  2012-03       26 18  30  9  7  5  0   8       27         2           2
## 100 2012-04       25 25  29 11  8  5  1   8       27         2           1
## 101 2012-05       29 24  35 12 10  7  1   9       24         2           2
## 102 2012-06       25 18  31 12  7  6  1   8       23         2           2
##          Tempo               tempo
## 97  2012-01-30 2012-01-29 22:00:00
## 98  2012-03-01 2012-02-29 21:00:00
## 99  2012-03-30 2012-03-29 21:00:00
## 100 2012-04-30 2012-04-29 21:00:00
## 101 2012-05-30 2012-05-29 21:00:00
## 102 2012-06-30 2012-06-29 21:00:00

tail(Timeline_5anos)

##           X Cartilha DA DAs FE PA DE TA DA1 Dislexia Disgrafia Discalculia
## 151 2016-07       21 14  15  4  3  4  1   4       22         1           1
## 152 2016-08       24 17  18  4  4  5  1   5       20         1           2
## 153 2016-09       25 19  22  8  6  5  1   6       31         2           2
## 154 2016-10       24 17  19  8  5  5  1   5       21         2           2
## 155 2016-11       23 22  23  8  6  7  1   6       20         2           3
## 156 2016-12       14 18  16  3  5  5  1   4       17         1           2
##          Tempo               tempo
## 151 2016-07-30 2016-07-29 21:00:00
## 152 2016-08-30 2016-08-29 21:00:00
## 153 2016-09-30 2016-09-29 21:00:00
## 154 2016-10-30 2016-10-29 22:00:00
## 155 2016-11-30 2016-11-29 22:00:00
## 156 2016-12-30 2016-12-29 22:00:00

write.csv(Timeline_5anos,file="Timeline_5anos.csv")

summary(Timeline_5anos)

##        X         Cartilha           DA             DAs       
##  2012-01: 1   Min.   :14.00   Min.   : 7.00   Min.   : 9.00  
##  2012-02: 1   1st Qu.:21.75   1st Qu.:14.00   1st Qu.:19.00  
##  2012-03: 1   Median :25.00   Median :17.00   Median :23.00  
##  2012-04: 1   Mean   :23.75   Mean   :16.27   Mean   :22.45  
##  2012-05: 1   3rd Qu.:26.00   3rd Qu.:19.00   3rd Qu.:26.00  
##  2012-06: 1   Max.   :32.00   Max.   :25.00   Max.   :35.00  
##  (Other):54                                                  
##        FE               PA               DE              TA      
##  Min.   : 3.000   Min.   : 2.000   Min.   :2.000   Min.   :0.00  
##  1st Qu.: 5.000   1st Qu.: 4.000   1st Qu.:4.000   1st Qu.:0.00  
##  Median : 7.000   Median : 5.000   Median :5.000   Median :1.00  
##  Mean   : 6.833   Mean   : 5.267   Mean   :5.183   Mean   :0.65  
##  3rd Qu.: 8.000   3rd Qu.: 6.000   3rd Qu.:6.000   3rd Qu.:1.00  
##  Max.   :12.000   Max.   :10.000   Max.   :8.000   Max.   :1.00  
##                                                                  
##       DA1           Dislexia       Disgrafia     Discalculia   
##  Min.   :2.000   Min.   :13.00   Min.   :1.00   Min.   :1.000  
##  1st Qu.:5.000   1st Qu.:18.75   1st Qu.:1.00   1st Qu.:1.000  
##  Median :6.000   Median :21.00   Median :1.00   Median :2.000  
##  Mean   :5.833   Mean   :21.12   Mean   :1.45   Mean   :1.783  
##  3rd Qu.:7.000   3rd Qu.:23.00   3rd Qu.:2.00   3rd Qu.:2.000  
##  Max.   :9.000   Max.   :35.00   Max.   :2.00   Max.   :3.000  
##                                                                
##         Tempo                    tempo   
##  2012-01-30: 1   2012-01-29 22:00:00: 1  
##  2012-03-01: 1   2012-02-29 21:00:00: 1  
##  2012-03-30: 1   2012-03-29 21:00:00: 1  
##  2012-04-30: 1   2012-04-29 21:00:00: 1  
##  2012-05-30: 1   2012-05-29 21:00:00: 1  
##  2012-06-30: 1   2012-06-29 21:00:00: 1  
##  (Other)   :54   (Other)            :54

Organizando o banco por mes

#2004 a 2017
Timeline$date <- seq(as.Date("2004/01/30"), as.Date("2017/01/30"), by = "month")
Timeline$Date <- as.POSIXct(Timeline$date, "%d.%m.%y", tz="America/Sao_Paulo")

#2012 A 2016
Timeline_5anos$date <- seq(as.Date("2012/01/30"), as.Date("2017/01/01"), by = "month")
Timeline_5anos$Date <- as.POSIXct(Timeline_5anos$date, "%d.%m.%y", tz="America/Sao_Paulo")

Retirando outliers # borrowed from aL3xa - # http://stackoverflow.com/questions/4787332/how-to-remove-outliers-from-a-dataset

remove_outliers <- function(x, na.rm = TRUE, ...) {
        qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...)
        H <- 1.5 * IQR(x, na.rm = na.rm)
        y <- x
        y[x < (qnt[1] - H)] <- NA
        y[x > (qnt[2] + H)] <- NA
        y
}
#2004 a 2017
Timeline$DA_clean <- remove_outliers(Timeline$DA)
Timeline$DAs_clean <- remove_outliers(Timeline$DAs)
Timeline$Dislexia_clean <- remove_outliers(Timeline$Dislexia)
Timeline$FE_clean <- remove_outliers(Timeline$FE)
Timeline$PA_clean <- remove_outliers(Timeline$PA)
Timeline$DE_clean <- remove_outliers(Timeline$DE)
Timeline$Discalculia_clean <- remove_outliers(Timeline$Discalculia)
Timeline$Disgrafia_clean <- remove_outliers(Timeline$Disgrafia)
Timeline$TA_clean <- remove_outliers(Timeline$TA)
#2012 a 2017
Timeline_5anos$DA_clean <- remove_outliers(Timeline_5anos$DA)
Timeline_5anos$DAs_clean <- remove_outliers(Timeline_5anos$DAs)
Timeline_5anos$Dislexia_clean <- remove_outliers(Timeline_5anos$Dislexia)
Timeline_5anos$FE_clean <- remove_outliers(Timeline_5anos$FE)
Timeline_5anos$PA_clean <- remove_outliers(Timeline_5anos$PA)
Timeline_5anos$DE_clean <- remove_outliers(Timeline_5anos$DE)
Timeline_5anos$Discalculia_clean <- remove_outliers(Timeline_5anos$Discalculia)
Timeline_5anos$Disgrafia_clean <- remove_outliers(Timeline_5anos$Disgrafia)
Timeline_5anos$TA_clean <- remove_outliers(Timeline_5anos$TA)

Relacao das Palavras x Tempo

GRafico Comparativo de frequencia - 2004 a 2017

##Grafico 
library(scales) ##instalar e citar o pacote scales

#grafico de frequencia
plot(Timeline$Dislexia~Timeline$Date, type = "l", lty=1, lwd=2, col="red", xaxt="n", axes=F, ylim=c(0,100), xlab="", ylab="Frequencia relativa", main = "Frequencia relativa pelo tempo, 2004 a 2017")
axis.POSIXct(1, at=seq(as.Date("2004/01/30"), as.Date("2017/01/30"), by = "4 month"), format="%Y %b", las=2, cex.axis=0.9)
lines(Timeline$DAs~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkblue")
lines(Timeline$FE~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkgray")
lines(Timeline$PA~Timeline$Date, type = "h", lty=1, lwd=2,col= "pink")
lines(Timeline$DE~Timeline$Date, type = "h", lty=1, lwd=2,col= "black")
lines(Timeline$DA~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkred")
lines(Timeline$Discalculia~Timeline$Date, type = "h", lty=1, lwd=2,col= "green")
lines(Timeline$Disgrafia~Timeline$Date, type = "h", lty=1, lwd=2,col= "orange")
lines(Timeline$TA~Timeline$Date, type = "h", lty=1, lwd=2,col= "purple")

legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem",  "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
       col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "green"), box.col="white",lwd=c(2,1.5), cex=0.9)

#removidos outliers
plot(Timeline$Dislexia_clean~Timeline$Date, type = "l", lty=1, lwd=2, col="red", xaxt="n", axes=F, ylim=c(0,100), xlab="", ylab="Frequencia relativa", main = "Frequencia relativa pelo tempo, 2004 a 2017, removidos outliers")
axis.POSIXct(1, at=seq(as.Date("2004/01/30"), as.Date("2017/01/30"), by = "4 month"), format="%Y %b", las=2, cex.axis=0.9)
lines(Timeline$DAs_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkblue")
lines(Timeline$FE_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkgray")
lines(Timeline$PA_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "pink")
lines(Timeline$DE_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "black")
lines(Timeline$DA_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "darkred")
lines(Timeline$Discalculia_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "green")
lines(Timeline$Disgrafia_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "orange")
lines(Timeline$TA_clean~Timeline$Date, type = "h", lty=1, lwd=2,col= "purple")

legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem",  "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
       col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "green"), box.col="white",lwd=c(2,1.5), cex=0.9)

GRafico Comparativo de frequencia - ultimos 5 anos

plot(Timeline_5anos$Dislexia~Timeline_5anos$Date, type = "l", lty=1, lwd=2, col="red", xaxt="n", axes=F, ylim=c(0,100), xlab="", ylab="Frequencia relativa", main = "Frequencia relativa pelo tempo, 2012 a 2017")
axis.POSIXct(1, at=seq(as.Date("2012/01/30"), as.Date("2017/01/01"), by = "4 month"), format="%Y %b", las=2, cex.axis=0.9)
lines(Timeline_5anos$DAs~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkblue")
lines(Timeline_5anos$FE~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkgray")
lines(Timeline_5anos$PA~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "pink")
lines(Timeline_5anos$DE~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "black")
lines(Timeline_5anos$DA~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkred")
lines(Timeline_5anos$Discalculia~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "green")
lines(Timeline_5anos$Disgrafia~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "orange")
lines(Timeline_5anos$TA~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "purple")
legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem",  "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
       col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "darkgreen"), box.col="white",lwd=c(2), cex=0.9)

#removidos outliers
plot(Timeline_5anos$Dislexia_clean~Timeline_5anos$Date, type = "l", lty=1, lwd=2, col="red", xaxt="n", axes=F, ylim=c(0,100), xlab="", ylab="Frequencia relativa", main = "Frequencia relativa pelo tempo, 2012 a 2017, removidos outliers")
axis.POSIXct(1, at=seq(as.Date("2012/01/30"), as.Date("2017/01/01"), by = "4 month"), format="%Y %b", las=2, cex.axis=0.9)
lines(Timeline_5anos$DAs_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkblue")
lines(Timeline_5anos$FE~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkgray")
lines(Timeline_5anos$PA_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "pink")
lines(Timeline_5anos$DE_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "black")
lines(Timeline_5anos$DA_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "darkred")
lines(Timeline_5anos$Discalculia_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "green")
lines(Timeline_5anos$Disgrafia_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "orange")
lines(Timeline_5anos$TA_clean~Timeline_5anos$Date, type = "h", lty=1, lwd=2,col= "purple")
legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem",  "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
       col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "darkgreen"), box.col="white",lwd=c(2), cex=0.9)

Analise da densidade das palavras # borrowed from melina.leite- # http://ecologia.ib.usp.br/bie5782/doku.php?id=bie5782:03_apostila:05-exploratoria

#grafico de densidade
plot(density (Timeline$Dislexia, bw=1.5), col="red", xlab="", ylab="Densidade Probabilistica", main = "Densidade pela frequencia")
lines(density(Timeline$DAs, bw=1.5), col= "darkblue")
lines(density(Timeline$FE, bw=1.5),col= "darkgray")
lines(density(Timeline$PA, bw=1.5),col= "pink")
lines(density(Timeline$DE, bw=1.5),col= "black")
lines(density(Timeline$DA, bw=1.5),col= "darkred")
lines(density(Timeline$Discalculia, bw=1.5),col= "green")
lines(density(Timeline$Disgrafia, bw=1.5),col= "orange")
lines(density(Timeline$TA, bw=1.5),col= "purple")

legend("topright", c("Dislexia", "Dif.s de Aprendizagem", "Dif. de Aprendizagem", "Fracasso Escolar", "Transt. da Aprendizagem",  "Disgrafia", "Probl. de Aprendizagem", "Discalculia"), bty="n", lty=c(1),
       col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "green"), box.col="white",lwd=c(2,1.5), cex=0.9)

Relacao entre as palavras (1) Dificiculdades de Aprendizagem x todas as palavras (2) Dislexia x todas as palavras

Estatistica Descritiva

#valores maximos e minimos
max = apply(Timeline[3:12], MARGIN=2, FUN= max)
max_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN= max)
min = apply(Timeline[3:12], MARGIN=2, FUN= min)
min_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN= min)

#mediana
mediana = apply(Timeline[3:12], MARGIN=2, FUN=median)
mediana_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN=median)

#medias
medias = apply(Timeline[3:12], MARGIN=2, FUN=mean)
medias_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN=mean)

#desvio padrao
dp = apply(Timeline[3:12], MARGIN=2, FUN=sd)
dp_5anos = apply(Timeline_5anos[3:12], MARGIN=2, FUN=sd)

#variance
var = apply(Timeline[3:12], 2, var)
var_5anos = apply(Timeline_5anos[3:12], 2, var)

#Tabela descritiva, 2004 a 2017
TabelaDes = data.frame( min, max, mediana, medias, dp, var)
TabelaDes

##             min max mediana     medias         dp         var
## DA            7  76      19 22.7707006 11.1768262 124.9214437
## DAs           9 100      29 32.0254777 15.0298630 225.8967826
## FE            3  90      12 20.1974522 18.7211109 350.4799935
## PA            2  49       7 10.1146497  7.9775070  63.6406173
## DE            2  25       6  6.7388535  3.4271662  11.7454679
## TA            0   4       1  0.9044586  0.8147946   0.6638902
## DA1           2  26       7  8.3439490  3.9300182  15.4450433
## Dislexia     13 100      25 30.5859873 15.6745934 245.6928793
## Disgrafia     0  13       2  2.3630573  1.8402650   3.3865752
## Discalculia   0   6       2  2.0891720  1.1057878   1.2227666

#Tabela descrita, 2012 a 2017
TabelaDes_5anos = data.frame(min_5anos, max_5anos, mediana_5anos, medias_5anos,  dp_5anos,  var_5anos)
TabelaDes_5anos

##             min_5anos max_5anos mediana_5anos medias_5anos  dp_5anos
## DA                  7        25            17    16.266667 3.8658289
## DAs                 9        35            23    22.450000 5.6521332
## FE                  3        12             7     6.833333 2.3733454
## PA                  2        10             5     5.266667 1.6142025
## DE                  2         8             5     5.183333 1.2952543
## TA                  0         1             1     0.650000 0.4809947
## DA1                 2         9             6     5.833333 1.4976441
## Dislexia           13        35            21    21.116667 4.2469996
## Disgrafia           1         2             1     1.450000 0.5016921
## Discalculia         1         3             2     1.783333 0.5551505
##              var_5anos
## DA          14.9446328
## DAs         31.9466102
## FE           5.6327684
## PA           2.6056497
## DE           1.6776836
## TA           0.2313559
## DA1          2.2429379
## Dislexia    18.0370056
## Disgrafia    0.2516949
## Discalculia  0.3081921

#correlacao entre as variaveis
explicativas = Timeline[,3:12]
cor(explicativas, method = "spearman")

##                    DA       DAs        FE        PA        DE        TA
## DA          1.0000000 0.8540771 0.8209625 0.7936141 0.6392457 0.5180165
## DAs         0.8540771 1.0000000 0.8524418 0.8269852 0.6485852 0.4435616
## FE          0.8209625 0.8524418 1.0000000 0.8653836 0.5799170 0.4378294
## PA          0.7936141 0.8269852 0.8653836 1.0000000 0.6016096 0.4329864
## DE          0.6392457 0.6485852 0.5799170 0.6016096 1.0000000 0.3825523
## TA          0.5180165 0.4435616 0.4378294 0.4329864 0.3825523 1.0000000
## DA1         0.8503754 0.9935315 0.8494201 0.8172536 0.6425757 0.4314246
## Dislexia    0.7587646 0.7326280 0.8566252 0.7994667 0.4988009 0.3492648
## Disgrafia   0.7968748 0.8085197 0.7878822 0.7858998 0.5889306 0.5114184
## Discalculia 0.5809042 0.5118086 0.4850222 0.5163804 0.5044344 0.5069295
##                   DA1  Dislexia Disgrafia Discalculia
## DA          0.8503754 0.7587646 0.7968748   0.5809042
## DAs         0.9935315 0.7326280 0.8085197   0.5118086
## FE          0.8494201 0.8566252 0.7878822   0.4850222
## PA          0.8172536 0.7994667 0.7858998   0.5163804
## DE          0.6425757 0.4988009 0.5889306   0.5044344
## TA          0.4314246 0.3492648 0.5114184   0.5069295
## DA1         1.0000000 0.7325950 0.8125736   0.5189100
## Dislexia    0.7325950 1.0000000 0.7344780   0.4313871
## Disgrafia   0.8125736 0.7344780 1.0000000   0.5902121
## Discalculia 0.5189100 0.4313871 0.5902121   1.0000000

pairs(explicativas)

Testes normalidade das variaveis

library(nortest)

#Teste de Shapiro-Francis para normalidade -  n< 50
sf.test = apply(Timeline[17:25],  MARGIN=2, FUN=sf.test)
sf.test

## $DA_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.95516, p-value = 0.0002326
## 
## 
## $DAs_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.96458, p-value = 0.001309
## 
## 
## $Dislexia_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.92617, p-value = 3.676e-06
## 
## 
## $FE_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.84329, p-value = 7.899e-10
## 
## 
## $PA_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.87275, p-value = 1.165e-08
## 
## 
## $DE_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.95808, p-value = 0.0003847
## 
## 
## $Discalculia_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.82941, p-value = 3.222e-10
## 
## 
## $Disgrafia_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.82525, p-value = 1.137e-10
## 
## 
## $TA_clean
## 
##  Shapiro-Francia normality test
## 
## data:  newX[, i]
## W = 0.72974, p-value = 4.65e-13

#Testes de Shapiro-Wilk para normalidade
shapiro.test = apply(Timeline[17:25],  MARGIN=2, FUN=shapiro.test)
shapiro.test

## $DA_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.95302, p-value = 7.019e-05
## 
## 
## $DAs_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.96262, p-value = 0.0005275
## 
## 
## $Dislexia_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.92287, p-value = 4.782e-07
## 
## 
## $FE_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.8404, p-value = 2.398e-11
## 
## 
## $PA_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.87041, p-value = 6.218e-10
## 
## 
## $DE_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.95616, p-value = 0.0001306
## 
## 
## $Discalculia_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.82723, p-value = 8.761e-12
## 
## 
## $Disgrafia_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.82471, p-value = 2.961e-12
## 
## 
## $TA_clean
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.72669, p-value = 3.211e-15

#Teste de normalidade Anderson-Darling
ad.test = apply(Timeline[17:25], MARGIN=2, FUN=ad.test)
ad.test

## $DA_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 2.2692, p-value = 8.88e-06
## 
## 
## $DAs_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 1.5723, p-value = 0.0004605
## 
## 
## $Dislexia_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 3.6684, p-value = 3.375e-09
## 
## 
## $FE_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 8.4665, p-value < 2.2e-16
## 
## 
## $PA_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 6.3436, p-value = 1.2e-15
## 
## 
## $DE_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 2.3517, p-value = 5.571e-06
## 
## 
## $Discalculia_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 11.851, p-value < 2.2e-16
## 
## 
## $Disgrafia_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 9.308, p-value < 2.2e-16
## 
## 
## $TA_clean
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 20.155, p-value < 2.2e-16

#Teste de Cramer-von Mises para normalidade
cvm.test = apply(Timeline[17:25], MARGIN=2, FUN=cvm.test)

## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately

## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately

## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately

## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately

## Warning in FUN(newX[, i], ...): p-value is smaller than 7.37e-10, cannot be
## computed more accurately

cvm.test

## $DA_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 0.43175, p-value = 1.219e-05
## 
## 
## $DAs_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 0.27493, p-value = 0.0006369
## 
## 
## $Dislexia_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 0.64513, p-value = 1.552e-07
## 
## 
## $FE_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 1.5153, p-value = 7.37e-10
## 
## 
## $PA_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 1.1283, p-value = 7.37e-10
## 
## 
## $DE_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 0.43382, p-value = 1.162e-05
## 
## 
## $Discalculia_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 2.1864, p-value = 7.37e-10
## 
## 
## $Disgrafia_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 1.6243, p-value = 7.37e-10
## 
## 
## $TA_clean
## 
##  Cramer-von Mises normality test
## 
## data:  newX[, i]
## W = 3.7942, p-value = 7.37e-10

#Teste de Lilliefors (correção do KS) para normalidade do ajuste, n < 50
lillie.test = apply(Timeline[17:25], MARGIN=2, FUN=lillie.test)
lillie.test

## $DA_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.14452, p-value = 5.78e-08
## 
## 
## $DAs_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.10969, p-value = 0.000191
## 
## 
## $Dislexia_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.13847, p-value = 3.542e-07
## 
## 
## $FE_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.2038, p-value < 2.2e-16
## 
## 
## $PA_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.20385, p-value = 3.174e-16
## 
## 
## $DE_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.14544, p-value = 4.518e-08
## 
## 
## $Discalculia_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.27422, p-value < 2.2e-16
## 
## 
## $Disgrafia_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.2652, p-value < 2.2e-16
## 
## 
## $TA_clean
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  newX[, i]
## D = 0.3607, p-value < 2.2e-16

#Teste Qui-quadrado de Pearson para normalidade considerando o número de classes
pearson.test = apply(Timeline[17:25], MARGIN=2, FUN=pearson.test)
pearson.test

## $DA_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 26.367, p-value = 0.009519
## 
## 
## $DAs_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 29.685, p-value = 0.003114
## 
## 
## $Dislexia_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 46.483, p-value = 5.728e-06
## 
## 
## $FE_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 137.59, p-value < 2.2e-16
## 
## 
## $PA_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 76.483, p-value = 1.925e-11
## 
## 
## $DE_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 172.49, p-value < 2.2e-16
## 
## 
## $Discalculia_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 693.24, p-value < 2.2e-16
## 
## 
## $Disgrafia_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 504.94, p-value < 2.2e-16
## 
## 
## $TA_clean
## 
##  Pearson chi-square normality test
## 
## data:  newX[, i]
## P = 940.86, p-value < 2.2e-16

CONCLUSAO: a distribuicao dos dados nao e normal e, portanto, deve se adotar metodos nao-parametricos.

Graficos de diagnostico da variavel - Dif.s de Aprendizagem

#Gráfico de diagnóstico e Histograma com ajuste não paramétrico de densidade probabilística mais curva normal, com e sem outliers
par(mfrow=c(2,2))
boxplot(Timeline$DAs, notch = T, main = "Dif.s de Aprendizagem", ylab = "valores observados")
boxplot(Timeline$DAs_clean, notch = T, main = "removidos outliers", ylab = "valores observados")
hist(Timeline$DAs, prob=T, main = "Densid. probabilística e curva normal", xlab = "valores observados", ylab = "Densidade probabilística")
lines( density(Timeline$DAs),col="darkblue", lwd=c(2))
curve(expr = dnorm(x,mean=mean(Timeline$DAs),sd=sd(Timeline$DAs)),add=T, col="darkred", lwd=c(2))
legend("topright", c("Dens. prob.", "curva normal"), bty="n", lty=c(1),
       col=c("darkblue","darkred"), box.col="white",lwd=c(2), cex=0.9)
hist(Timeline$DAs_clean, prob=T, main = "removidos outliers", xlab = "valores observados", ylab = "Densidade probabilística")
lines( density(Timeline$DAs),col="darkblue", lwd=c(2) )
curve(expr = dnorm(x,mean=mean(Timeline$DAs),sd=sd(Timeline$DAs)),add=T, col="darkred", lwd=c(2))
legend("topright", c("Dens. prob.", "curva normal"), bty="n", lty=c(1),
       col=c("darkblue","darkred"), box.col="white",lwd=c(2), cex=0.9)

par(mfrow=c(1,1))

#testes de normalidade
Teste_norm_DAs = c(sf.test$DAs_clean$method, ad.test$DAs_clean$method, cvm.test$DAs_clean$method, lillie.test$DAs_clean$method, pearson.test$DAs_clean$method, shapiro.test$DAs_clean$method)
pvalor_norm_DAs = c(sf.test$DAs_clean$p.value, ad.test$DAs_clean$p.value, cvm.test$DAs_clean$p.value, lillie.test$DAs_clean$p.value, pearson.test$DAs_clean$p.value, shapiro.test$DAs_clean$p.value)

Tab_norm_DAs = data.frame(Teste_norm_DAs, pvalor_norm_DAs)
Tab_norm_DAs

##                                   Teste_norm_DAs pvalor_norm_DAs
## 1                 Shapiro-Francia normality test    0.0013094786
## 2                Anderson-Darling normality test    0.0004604626
## 3                Cramer-von Mises normality test    0.0006369276
## 4 Lilliefors (Kolmogorov-Smirnov) normality test    0.0001909866
## 5              Pearson chi-square normality test    0.0031139409
## 6                    Shapiro-Wilk normality test    0.0005274689

Correlacao entre a variavel dependente e as demais, metodo Spearman (não parametrico), retirados outliers

cor.das.da = cor.test(Timeline$DAs_clean, Timeline$DA_clean, method = "spearman") ## DificuldadeS de Aprendizagem

## Warning in cor.test.default(Timeline$DAs_clean, Timeline$DA_clean, method =
## "spearman"): Cannot compute exact p-value with ties

cor.das.fe = cor.test(Timeline$DAs_clean, Timeline$FE_clean, method = "spearman") ## Fracasso Escolar

## Warning in cor.test.default(Timeline$DAs_clean, Timeline$FE_clean, method =
## "spearman"): Cannot compute exact p-value with ties

cor.das.pa = cor.test(Timeline$DAs_clean, Timeline$PA_clean, method = "spearman") ## Problemas de Aprendizagem

## Warning in cor.test.default(Timeline$DAs_clean, Timeline$PA_clean, method =
## "spearman"): Cannot compute exact p-value with ties

cor.das.de = cor.test(Timeline$DAs_clean, Timeline$DE_clean, method = "spearman") ## Desempenho escolar

## Warning in cor.test.default(Timeline$DAs_clean, Timeline$DE_clean, method =
## "spearman"): Cannot compute exact p-value with ties

cor.das.dis1 = cor.test(Timeline$DAs_clean, Timeline$Dislexia_clean, method = "spearman") #Dislexia

## Warning in cor.test.default(Timeline$DAs_clean, Timeline$Dislexia_clean, :
## Cannot compute exact p-value with ties

cor.das.dis2 = cor.test(Timeline$DAs_clean, Timeline$Discalculia_clean, method = "spearman") #Discalculia

## Warning in cor.test.default(Timeline$DAs_clean, Timeline
## $Discalculia_clean, : Cannot compute exact p-value with ties

cor.das.dis3 = cor.test(Timeline$DAs_clean, Timeline$Disgrafia_clean, method = "spearman") #Digrafia

## Warning in cor.test.default(Timeline$DAs_clean, Timeline$Disgrafia_clean, :
## Cannot compute exact p-value with ties

cor.das.ta = cor.test(Timeline$DAs_clean, Timeline$TA_clean, method = "spearman") ## Transtorno de Aprendizagem

## Warning in cor.test.default(Timeline$DAs_clean, Timeline$TA_clean, method =
## "spearman"): Cannot compute exact p-value with ties

Nome = c("Dif.s de aprend. com Dif. de Aprend.", "   com Fracasso Escolar", "   com Problema de Aprend.", "   com Desempenho Escolar", "   com Dislexia", "   com Discalculia", "   com Disgrafia", "   com Transt. de Aprend.")
  
pvalor_cor = c(cor.das.da$p.value, cor.das.fe$p.value, cor.das.pa$p.value, cor.das.de$p.value, cor.das.dis1$p.value, cor.das.dis2$p.value, cor.das.dis3$p.value, cor.das.ta$p.value) 

valor_cor = c(cor.das.da$estimate, cor.das.fe$estimate, cor.das.pa$estimate, cor.das.de$estimate, cor.das.dis1$estimate, cor.das.dis2$estimate, cor.das.dis3$estimate, cor.das.ta$estimate)

tab_cor_DAs = data.frame(Nome, valor_cor, pvalor_cor)
  

  tab_cor_DAs

##                                   Nome valor_cor   pvalor_cor
## 1 Dif.s de aprend. com Dif. de Aprend. 0.8284254 4.629895e-37
## 2                 com Fracasso Escolar 0.8092017 2.240052e-34
## 3              com Problema de Aprend. 0.7745652 3.061280e-29
## 4               com Desempenho Escolar 0.5891858 2.355150e-14
## 5                         com Dislexia 0.7775874 3.442925e-29
## 6                      com Discalculia 0.3835854 3.416791e-06
## 7                        com Disgrafia 0.7638980 8.502890e-29
## 8               com Transt. de Aprend. 0.2659142 1.381377e-03

Grafico de dispersão com linha de regressão, método loess (não parametrico) - Dif.s de Aprendizagem, retirados outliers

library("ggplot2")
library("devtools")

ggplot(Timeline, aes(y=DAs_clean, x=DA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning: Removed 15 rows containing missing values (geom_point).

qplot(DA_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Dif. de Aprendizagem")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=DAs_clean, x=FE_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 14 rows containing non-finite values (stat_smooth).

## Warning: Removed 14 rows containing missing values (geom_point).

qplot(FE_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Fracasso Escolar")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 14 rows containing non-finite values (stat_smooth).

## Warning: Removed 14 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=DAs_clean, x=PA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 17 rows containing non-finite values (stat_smooth).

## Warning: Removed 17 rows containing missing values (geom_point).

qplot(PA_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Problemas de Aprendizagem")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 17 rows containing non-finite values (stat_smooth).

## Warning: Removed 17 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=DAs_clean, x=DE_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

qplot(DE_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Desempenho Escolar")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=DAs_clean, x=Dislexia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 19 rows containing non-finite values (stat_smooth).

## Warning: Removed 19 rows containing missing values (geom_point).

qplot(Dislexia_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Dislexia")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 19 rows containing non-finite values (stat_smooth).

## Warning: Removed 19 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=DAs_clean, x=Discalculia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 19 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.0341e-016

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 3.0341e-016

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0602

## Warning: Removed 19 rows containing missing values (geom_point).

qplot(Discalculia_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Discalculia")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 19 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.0341e-016

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 3.0341e-016

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0602

## Warning: Removed 19 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=DAs_clean, x=Disgrafia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 13 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.03

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.03

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.2507e-016

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.03

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.03

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 1.2507e-016

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 1

## Warning: Removed 13 rows containing missing values (geom_point).

qplot(Disgrafia_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Disgrafia")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 13 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.03

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.03

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.2507e-016

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.03

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.03

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 1.2507e-016

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 1

## Warning: Removed 13 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=DAs_clean, x=TA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0401

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0401

## Warning: Removed 15 rows containing missing values (geom_point).

qplot(TA_clean, DAs_clean, data = Timeline, geom = c("point", "smooth"), main = "Dif.s x Transtorno de Aprendizagem")

## `geom_smooth()` using method = 'loess'

## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0401

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0401

## Warning: Removed 15 rows containing missing values (geom_point).

Modelo de Regressao linear multipla (prescinde de testes de normalidade, homogeneidade e dependência dos resíduos) - Dificuldades de Aprendizagem

#Regressão Linear
RL_DAs1 = lm( DAs ~ DA + FE + PA + DE + Dislexia + Discalculia + Disgrafia + TA, data=Timeline)
summary( RL_DAs1 )

## 
## Call:
## lm(formula = DAs ~ DA + FE + PA + DE + Dislexia + Discalculia + 
##     Disgrafia + TA, data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.7796  -4.4051  -0.2952   3.2602  23.5186 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 15.73371    2.16432   7.270 1.93e-11 ***
## DA           0.12492    0.11400   1.096   0.2750    
## FE           0.60800    0.08085   7.520 4.89e-12 ***
## PA          -0.14275    0.16756  -0.852   0.3956    
## DE           0.48093    0.25784   1.865   0.0641 .  
## Dislexia    -0.11573    0.05563  -2.080   0.0392 *  
## Discalculia  0.30267    0.69808   0.434   0.6652    
## Disgrafia    0.54605    0.55880   0.977   0.3301    
## TA           1.09128    1.11393   0.980   0.3288    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.531 on 148 degrees of freedom
## Multiple R-squared:  0.7618, Adjusted R-squared:  0.7489 
## F-statistic: 59.16 on 8 and 148 DF,  p-value: < 2.2e-16

#Testes de normalidade de ajuste do residuo
lillie.test(RL_DAs1$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  RL_DAs1$residuals
## D = 0.091273, p-value = 0.002761

#Regressão Linear, removidos os outliers
RL_DAs = lm( DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + Dislexia_clean + Discalculia_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary( RL_DAs )

## 
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + 
##     Dislexia_clean + Discalculia_clean + Disgrafia_clean + TA_clean, 
##     data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.5469  -3.0775   0.0795   2.7979  15.5779 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        4.93634    2.01764   2.447  0.01594 *  
## DA_clean           0.58738    0.12460   4.714 6.87e-06 ***
## FE_clean           0.07087    0.08772   0.808  0.42083    
## PA_clean           0.36834    0.19833   1.857  0.06585 .  
## DE_clean           0.27743    0.27266   1.017  0.31105    
## Dislexia_clean     0.06703    0.10516   0.637  0.52512    
## Discalculia_clean  0.06120    0.74686   0.082  0.93484    
## Disgrafia_clean    2.70418    0.81104   3.334  0.00115 ** 
## TA_clean          -1.10400    0.95187  -1.160  0.24852    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.707 on 115 degrees of freedom
##   (33 observations deleted due to missingness)
## Multiple R-squared:  0.7557, Adjusted R-squared:  0.7387 
## F-statistic: 44.46 on 8 and 115 DF,  p-value: < 2.2e-16

#Testes de normalidade de ajuste do residuo
lillie.test(RL_DAs$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  RL_DAs$residuals
## D = 0.062922, p-value = 0.2666

par(mfrow=c(2,2))
plot(RL_DAs)

library(car)
par(mfrow=c(1,2))
residualPlot(RL_DAs1, main = "Gráfico de Residuos")
residualPlot(RL_DAs, main = "Gráfico de Residuos, sem outliers")

#Ajuste de modelo pelo método Stepwise Backward
#retirando dislcaculia, p = 0,93484
RL_DAs_aj1 = lm( DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + Dislexia_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary(RL_DAs_aj1)

## 
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + 
##     Dislexia_clean + Disgrafia_clean + TA_clean, data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.6171  -3.0958  -0.1424   2.9445  15.1224 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.33080    1.92615   2.768  0.00655 ** 
## DA_clean         0.57281    0.12157   4.712 6.72e-06 ***
## FE_clean         0.13517    0.07651   1.767  0.07984 .  
## PA_clean         0.28327    0.19356   1.463  0.14597    
## DE_clean         0.32855    0.26187   1.255  0.21208    
## Dislexia_clean   0.04887    0.10183   0.480  0.63220    
## Disgrafia_clean  2.52339    0.77280   3.265  0.00143 ** 
## TA_clean        -0.63608    0.91365  -0.696  0.48767    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.708 on 119 degrees of freedom
##   (30 observations deleted due to missingness)
## Multiple R-squared:  0.7592, Adjusted R-squared:  0.745 
## F-statistic: 53.59 on 7 and 119 DF,  p-value: < 2.2e-16

#retirando Dislexia, p = 0,63220
RL_DAs_aj2 = lm( DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary(RL_DAs_aj2)

## 
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + 
##     Disgrafia_clean + TA_clean, data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.3148  -3.2140  -0.2363   3.0104  16.4652 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.18463    1.58960   3.262 0.001425 ** 
## DA_clean         0.66417    0.11270   5.893 3.25e-08 ***
## FE_clean         0.11066    0.06831   1.620 0.107726    
## PA_clean         0.20945    0.18241   1.148 0.253045    
## DE_clean         0.33812    0.25668   1.317 0.190124    
## Disgrafia_clean  2.66941    0.76217   3.502 0.000639 ***
## TA_clean        -0.65964    0.89343  -0.738 0.461688    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.774 on 126 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.7476, Adjusted R-squared:  0.7356 
## F-statistic: 62.21 on 6 and 126 DF,  p-value: < 2.2e-16

#retirando Transtorno de Aprendizagem, p = 0,461688
RL_DAs_aj3 = lm(DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + Disgrafia_clean, data=Timeline)
summary(RL_DAs_aj3)

## 
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + PA_clean + DE_clean + 
##     Disgrafia_clean, data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.5361  -3.1722  -0.0087   2.9604  16.3183 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.04710    1.57582   3.203 0.001720 ** 
## DA_clean         0.65120    0.11113   5.860 3.75e-08 ***
## FE_clean         0.11182    0.06817   1.640 0.103400    
## PA_clean         0.21728    0.18178   1.195 0.234193    
## DE_clean         0.33401    0.25615   1.304 0.194616    
## Disgrafia_clean  2.61139    0.75675   3.451 0.000759 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.766 on 127 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.7465, Adjusted R-squared:  0.7366 
## F-statistic: 74.81 on 5 and 127 DF,  p-value: < 2.2e-16

#Retirando Problema de Aprendizagem, p = 0,234193
RL_DAs_aj4 = lm( DAs_clean ~ DA_clean + FE_clean + DE_clean + Disgrafia_clean, data=Timeline)
summary(RL_DAs_aj4)

## 
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + DE_clean + Disgrafia_clean, 
##     data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.8319  -2.8564  -0.3094   3.0077  17.4098 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      6.74149    1.58509   4.253 3.96e-05 ***
## DA_clean         0.65757    0.11435   5.750 5.89e-08 ***
## FE_clean         0.16225    0.06418   2.528  0.01265 *  
## DE_clean         0.37681    0.25370   1.485  0.13986    
## Disgrafia_clean  1.89478    0.70271   2.696  0.00792 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.996 on 132 degrees of freedom
##   (20 observations deleted due to missingness)
## Multiple R-squared:  0.7318, Adjusted R-squared:  0.7237 
## F-statistic: 90.04 on 4 and 132 DF,  p-value: < 2.2e-16

#Retirando Desempenho Escolar, p = 0,13986
RL_DAs_aj5 = lm( DAs_clean ~ DA_clean + FE_clean + Disgrafia_clean, data=Timeline)

summary(RL_DAs_aj5)

## 
## Call:
## lm(formula = DAs_clean ~ DA_clean + FE_clean + Disgrafia_clean, 
##     data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.5990  -3.1871  -0.3789   3.1076  16.8447 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      7.20862    1.42196   5.069 1.27e-06 ***
## DA_clean         0.76572    0.10467   7.316 1.95e-11 ***
## FE_clean         0.15344    0.06429   2.387   0.0184 *  
## Disgrafia_clean  1.75992    0.70563   2.494   0.0138 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.08 on 137 degrees of freedom
##   (16 observations deleted due to missingness)
## Multiple R-squared:  0.7448, Adjusted R-squared:  0.7392 
## F-statistic: 133.3 on 3 and 137 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2))
plot(RL_DAs_aj5)

#Teste de normalidade de ajuste do residuo
shapiro.test(RL_DAs_aj5$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  RL_DAs_aj5$residuals
## W = 0.97898, p-value = 0.02861

sf.test(RL_DAs_aj5$residuals)

## 
##  Shapiro-Francia normality test
## 
## data:  RL_DAs_aj5$residuals
## W = 0.97438, p-value = 0.01106

ad.test(RL_DAs_aj5$residuals)

## 
##  Anderson-Darling normality test
## 
## data:  RL_DAs_aj5$residuals
## A = 0.57491, p-value = 0.1331

cvm.test(RL_DAs_aj5$residuals)

## 
##  Cramer-von Mises normality test
## 
## data:  RL_DAs_aj5$residuals
## W = 0.070668, p-value = 0.2721

lillie.test(RL_DAs_aj5$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  RL_DAs_aj5$residuals
## D = 0.04998, p-value = 0.5265

pearson.test(RL_DAs_aj5$residuals)

## 
##  Pearson chi-square normality test
## 
## data:  RL_DAs_aj5$residuals
## P = 12.723, p-value = 0.3895

#Teste de homoscedasticidade dos resíduos
residualPlot(RL_DAs_aj5)
library(lmtest)

## Warning: package 'lmtest' was built under R version 3.3.3

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

#Teste Goldfeld-Quandt test against heteroskedasticity (homoscedasticidade dos resíduos) n > 30
gqtest(RL_DAs_aj5)

## 
##  Goldfeld-Quandt test
## 
## data:  RL_DAs_aj5
## GQ = 0.19071, df1 = 67, df2 = 66, p-value = 1
## alternative hypothesis: variance increases from segment 1 to 2

#Teste de Breusch-Pagan: hipótese nula de que as variâncias dos erros são iguais (homoscedasticidade)
bptest(RL_DAs_aj5)

## 
##  studentized Breusch-Pagan test
## 
## data:  RL_DAs_aj5
## BP = 23.965, df = 3, p-value = 2.54e-05

#Teste de independencia - Teste de Durbin Watson
plot(RL_DAs_aj5$residuals)
dwtest(RL_DAs_aj5)

## 
##  Durbin-Watson test
## 
## data:  RL_DAs_aj5
## DW = 1.5931, p-value = 0.005695
## alternative hypothesis: true autocorrelation is greater than 0

Graficos de diagnostico da variavel - Dislexia

#Gráfico de diagnóstico e Histograma com ajuste não paramétrico de densidade probabilística mais curva normal, com e sem outliers

par(mfrow=c(2,2))
boxplot(Timeline$Dislexia, notch = T, main = "Dif.s de Aprendizagem", ylab = "valores observados")
boxplot(Timeline$Dislexia_clean, notch = T, main = "removidos outliers", ylab = "valores observados")
hist(Timeline$Dislexia, prob=T, main = "Densid. probabilística e curva normal", xlab = "valores observados", ylab = "Densidade probabilística")
lines( density(Timeline$Dislexia),col="darkblue", lwd=c(2))
curve(expr = dnorm(x,mean=mean(Timeline$Dislexia),sd=sd(Timeline$Dislexia)),add=T, col="darkred", lwd=c(2))
legend("topright", c("Dens. prob.", "curva normal"), bty="n", lty=c(1),
       col=c("darkblue","darkred"), box.col="white",lwd=c(2), cex=0.9)
hist(Timeline$Dislexia_clean, prob=T, main = "removidos outliers", xlab = "valores observados", ylab = "Densidade probabilística")
lines( density(Timeline$Dislexia),col="darkblue", lwd=c(2) )
curve(expr = dnorm(x,mean=mean(Timeline$Dislexia),sd=sd(Timeline$Dislexia)),add=T, col="darkred", lwd=c(2))
legend("topright", c("Dens. prob.", "curva normal"), bty="n", lty=c(1),
       col=c("darkblue","darkred"), box.col="white",lwd=c(2), cex=0.9)

par(mfrow=c(1,1))

#testes de normalidade
Teste_norm_Dis = c(sf.test$Dislexia_clean$method, ad.test$Dislexia_clean$method, cvm.test$Dislexia_clean$method, lillie.test$Dislexia_clean$method, pearson.test$Dislexia_clean$method)
pvalor_norm_Dis = c(sf.test$Dislexia_clean$p.value, ad.test$Dislexia_clean$p.value, cvm.test$Dislexia_clean$p.value, lillie.test$Dislexia_clean$p.value, pearson.test$Dislexia_clean$p.value)

Tab_norm_Dis = data.frame(Teste_norm_Dis, pvalor_norm_Dis)
Tab_norm_Dis

##                                   Teste_norm_Dis pvalor_norm_Dis
## 1                 Shapiro-Francia normality test    3.675661e-06
## 2                Anderson-Darling normality test    3.374538e-09
## 3                Cramer-von Mises normality test    1.552354e-07
## 4 Lilliefors (Kolmogorov-Smirnov) normality test    3.542177e-07
## 5              Pearson chi-square normality test    5.727799e-06

Correlacao entre a variavel dependente e as demais, metodo Spearman (não parametrico), retirados outliers

cor.dis.da = cor.test(Timeline$Dislexia_clean, Timeline$DAs_clean, method = "spearman") ## DificuldadeS de Aprendizagem

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$DAs_clean, :
## Cannot compute exact p-value with ties

cor.dis.fe = cor.test(Timeline$Dislexia_clean, Timeline$FE_clean, method = "spearman") ## Fracasso Escolar

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$FE_clean, :
## Cannot compute exact p-value with ties

cor.dis.pa = cor.test(Timeline$Dislexia_clean, Timeline$PA_clean, method = "spearman") ## Problemas de Aprendizagem

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$PA_clean, :
## Cannot compute exact p-value with ties

cor.dis.de = cor.test(Timeline$Dislexia_clean, Timeline$DE_clean, method = "spearman") ## Desempenho escolar

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$DE_clean, :
## Cannot compute exact p-value with ties

cor.dis.da = cor.test(Timeline$Dislexia_clean, Timeline$DA_clean, method = "spearman") ## Dificuldade de Aprendizagem

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$DA_clean, :
## Cannot compute exact p-value with ties

cor.dis.dis2 = cor.test(Timeline$Dislexia_clean, Timeline$Discalculia_clean, method = "spearman") #Discalculia

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline
## $Discalculia_clean, : Cannot compute exact p-value with ties

cor.dis.dis3 = cor.test(Timeline$Dislexia_clean, Timeline$Disgrafia_clean, method = "spearman") #Digrafia

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline
## $Disgrafia_clean, : Cannot compute exact p-value with ties

cor.dis.ta = cor.test(Timeline$Dislexia_clean, Timeline$TA_clean, method = "spearman") ## Transtorno de Aprendizagem

## Warning in cor.test.default(Timeline$Dislexia_clean, Timeline$TA_clean, :
## Cannot compute exact p-value with ties

Nome_dis = c("Dislexia com Dif.s de Aprend.", "   com Fracasso Escolar", "   com Problema de Aprend.", "   com Desempenho Escolar", "   com Dif.s de Aprend.", "   com Discalculia", "   com Disgrafia", "   com Transt. de Aprend.")
  
pvalor_cor_dis = c(cor.dis.da$p.value, cor.dis.fe$p.value, cor.dis.pa$p.value, cor.dis.de$p.value, cor.dis.da$p.value, cor.dis.dis2$p.value, cor.dis.dis3$p.value, cor.dis.ta$p.value) 

valor_cor_dis = c(cor.dis.da$estimate, cor.dis.fe$estimate, cor.dis.pa$estimate, cor.dis.de$estimate, cor.dis.da$estimate, cor.dis.dis2$estimate, cor.dis.dis3$estimate, cor.dis.ta$estimate)

tab_cor_dis = data.frame(Nome_dis, valor_cor_dis, pvalor_cor_dis)
  

  tab_cor_dis

##                        Nome_dis valor_cor_dis pvalor_cor_dis
## 1 Dislexia com Dif.s de Aprend.     0.7935912   2.345935e-31
## 2          com Fracasso Escolar     0.8303787   1.304910e-36
## 3       com Problema de Aprend.     0.7696528   1.769850e-28
## 4        com Desempenho Escolar     0.4603439   1.515041e-08
## 5          com Dif.s de Aprend.     0.7935912   2.345935e-31
## 6               com Discalculia     0.3494107   3.270965e-05
## 7                 com Disgrafia     0.7623783   3.038961e-28
## 8        com Transt. de Aprend.     0.2593444   2.211352e-03

Grafico de dispersão com linha de regressão, método loess (não parametrico) - Dif.s de Aprendizagem, retirados outliers

library("ggplot2")
library("devtools")

ggplot(Timeline, aes(y=Dislexia_clean, x=DAs_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 19 rows containing non-finite values (stat_smooth).

## Warning: Removed 19 rows containing missing values (geom_point).

qplot(DAs_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 19 rows containing non-finite values (stat_smooth).

## Warning: Removed 19 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=Dislexia_clean, x=FE_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

qplot(FE_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=Dislexia_clean, x=PA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

qplot(PA_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=Dislexia_clean, x=DE_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 20 rows containing non-finite values (stat_smooth).

## Warning: Removed 20 rows containing missing values (geom_point).

qplot(DE_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 20 rows containing non-finite values (stat_smooth).

## Warning: Removed 20 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=Dislexia_clean, x=DA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

qplot(Dislexia_clean, DA_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 18 rows containing non-finite values (stat_smooth).

## Warning: Removed 18 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=Dislexia_clean, x=Discalculia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 22 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.03e-016

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 1.03e-016

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0602

## Warning: Removed 22 rows containing missing values (geom_point).

qplot(Discalculia_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 22 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.03e-016

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 1.03e-016

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0602

## Warning: Removed 22 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=Dislexia_clean, x=Disgrafia_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 2

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 2

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0

## Warning: Removed 15 rows containing missing values (geom_point).

qplot(Disgrafia_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 2

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 2

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0

## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(Timeline, aes(y=Dislexia_clean, x=TA_clean)) + geom_point(shape=1) + geom_smooth(method=loess)

## Warning: Removed 20 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0401

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0401

## Warning: Removed 20 rows containing missing values (geom_point).

qplot(TA_clean, Dislexia_clean, data = Timeline, geom = c("point", "smooth"))

## `geom_smooth()` using method = 'loess'

## Warning: Removed 20 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.01

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0401

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.01

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 4.0401

## Warning: Removed 20 rows containing missing values (geom_point).

Modelo de Regressao linear multipla (com testes de normalidade, homogeneidade e dependencia dos residuos) - Dislexia

#Regressão Linear
RL_Dis1 = lm(Dislexia ~ DAs + FE + PA + DE + Discalculia + Disgrafia + TA, data=Timeline)
summary( RL_Dis1 )

## 
## Call:
## lm(formula = Dislexia ~ DAs + FE + PA + DE + Discalculia + Disgrafia + 
##     TA, data = Timeline)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.441  -5.510  -0.710   2.743  59.849 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 25.35399    2.82475   8.976 1.13e-15 ***
## DAs         -0.23414    0.11800  -1.984   0.0491 *  
## FE           0.71912    0.12302   5.846 3.07e-08 ***
## PA           0.41386    0.23537   1.758   0.0807 .  
## DE          -0.44224    0.37831  -1.169   0.2443    
## Discalculia -0.01647    1.01752  -0.016   0.9871    
## Disgrafia    0.03539    0.79757   0.044   0.9647    
## TA          -3.37112    1.59567  -2.113   0.0363 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11 on 149 degrees of freedom
## Multiple R-squared:  0.5298, Adjusted R-squared:  0.5077 
## F-statistic: 23.99 on 7 and 149 DF,  p-value: < 2.2e-16

#Testes de normalidade de ajuste do residuo
lillie.test(RL_Dis1$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  RL_Dis1$residuals
## D = 0.17193, p-value = 2.682e-12

residualPlot(RL_Dis1)

#Regressão Linear, revidos outliers
RL_Dis = lm(Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + DE_clean + Discalculia_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary( RL_Dis )

## 
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + 
##     DE_clean + Discalculia_clean + Disgrafia_clean + TA_clean, 
##     data = Timeline)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.587  -2.578  -0.374   2.544  15.027 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       10.52645    1.60088   6.575 1.45e-09 ***
## DAs_clean          0.14754    0.07633   1.933   0.0557 .  
## FE_clean           0.39563    0.06842   5.782 6.34e-08 ***
## PA_clean           0.33230    0.18022   1.844   0.0678 .  
## DE_clean           0.39148    0.24356   1.607   0.1107    
## Discalculia_clean -0.02204    0.66670  -0.033   0.9737    
## Disgrafia_clean    0.47063    0.76995   0.611   0.5422    
## TA_clean          -0.31212    0.85288  -0.366   0.7151    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.276 on 116 degrees of freedom
##   (33 observations deleted due to missingness)
## Multiple R-squared:  0.7328, Adjusted R-squared:  0.7167 
## F-statistic: 45.45 on 7 and 116 DF,  p-value: < 2.2e-16

#Testes de normalidade de ajuste do residuo
lillie.test(RL_Dis$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  RL_Dis$residuals
## D = 0.067493, p-value = 0.18

residualPlot(RL_Dis)

#Ajuste de modelo pelo método Backward
#retirando dislcaculia, p = 0,9737
RL_Dis_aj1 = lm( Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + DE_clean + Disgrafia_clean + TA_clean, data=Timeline)
summary(RL_Dis_aj1)

## 
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + 
##     DE_clean + Disgrafia_clean + TA_clean, data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.9415  -2.5155  -0.5345   2.5451  15.5681 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     10.75901    1.55378   6.924 2.32e-10 ***
## DAs_clean        0.14681    0.07713   1.903   0.0594 .  
## FE_clean         0.33840    0.06408   5.281 5.82e-07 ***
## PA_clean         0.44907    0.17694   2.538   0.0124 *  
## DE_clean         0.30316    0.24020   1.262   0.2093    
## Disgrafia_clean  0.67710    0.74728   0.906   0.3667    
## TA_clean        -0.76740    0.83231  -0.922   0.3584    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.38 on 120 degrees of freedom
##   (30 observations deleted due to missingness)
## Multiple R-squared:  0.7183, Adjusted R-squared:  0.7043 
## F-statistic: 51.01 on 6 and 120 DF,  p-value: < 2.2e-16

#retirando disgrafia, p = 0,3667
RL_Dis_aj2 = lm( Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + DE_clean + TA_clean, data=Timeline)
summary(RL_Dis_aj2)

## 
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + 
##     DE_clean + TA_clean, data = Timeline)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.833  -2.752  -0.384   2.503  15.128 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.80233    1.55190   6.961 1.88e-10 ***
## DAs_clean    0.17132    0.07218   2.373  0.01920 *  
## FE_clean     0.34973    0.06280   5.569 1.58e-07 ***
## PA_clean     0.48416    0.17252   2.806  0.00584 ** 
## DE_clean     0.30986    0.23991   1.292  0.19896    
## TA_clean    -0.69829    0.82819  -0.843  0.40081    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.376 on 121 degrees of freedom
##   (30 observations deleted due to missingness)
## Multiple R-squared:  0.7164, Adjusted R-squared:  0.7047 
## F-statistic: 61.13 on 5 and 121 DF,  p-value: < 2.2e-16

#retirando Transtorno de Aprendizagem, p = 0,40081
RL_Dis_aj3 = lm( Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + DE_clean, data=Timeline)
summary(RL_Dis_aj3)

## 
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean + 
##     DE_clean, data = Timeline)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.0563  -2.6252  -0.3376   2.4554  15.8577 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.61148    1.50119   7.069 1.03e-10 ***
## DAs_clean    0.17203    0.07146   2.407  0.01755 *  
## FE_clean     0.34293    0.06188   5.542 1.73e-07 ***
## PA_clean     0.48198    0.17134   2.813  0.00572 ** 
## DE_clean     0.27460    0.22984   1.195  0.23450    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.356 on 123 degrees of freedom
##   (29 observations deleted due to missingness)
## Multiple R-squared:  0.7176, Adjusted R-squared:  0.7084 
## F-statistic: 78.15 on 4 and 123 DF,  p-value: < 2.2e-16

#Retirando Desempenho Escolar, p = 0,23450
RL_Dis_aj4 = lm( Dislexia_clean ~ DAs_clean + FE_clean + PA_clean, data=Timeline)

summary(RL_Dis_aj4)

## 
## Call:
## lm(formula = Dislexia_clean ~ DAs_clean + FE_clean + PA_clean, 
##     data = Timeline)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.485  -2.823  -0.171   2.425  17.152 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 12.51395    1.39305   8.983 2.89e-15 ***
## DAs_clean    0.15267    0.06987   2.185   0.0307 *  
## FE_clean     0.33976    0.06520   5.211 7.31e-07 ***
## PA_clean     0.51939    0.16696   3.111   0.0023 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.666 on 128 degrees of freedom
##   (25 observations deleted due to missingness)
## Multiple R-squared:  0.6858, Adjusted R-squared:  0.6785 
## F-statistic: 93.15 on 3 and 128 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2))
plot(RL_Dis_aj4)

#Teste de normalidade de ajuste do residuo
shapiro.test(RL_Dis_aj4$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  RL_Dis_aj4$residuals
## W = 0.95136, p-value = 0.0001274

sf.test(RL_Dis_aj4$residuals)

## 
##  Shapiro-Francia normality test
## 
## data:  RL_Dis_aj4$residuals
## W = 0.94688, p-value = 0.0001391

ad.test(RL_Dis_aj4$residuals)

## 
##  Anderson-Darling normality test
## 
## data:  RL_Dis_aj4$residuals
## A = 1.3736, p-value = 0.001418

cvm.test(RL_Dis_aj4$residuals)

## 
##  Cramer-von Mises normality test
## 
## data:  RL_Dis_aj4$residuals
## W = 0.18547, p-value = 0.008073

lillie.test(RL_Dis_aj4$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  RL_Dis_aj4$residuals
## D = 0.076317, p-value = 0.05712

pearson.test(RL_Dis_aj4$residuals)

## 
##  Pearson chi-square normality test
## 
## data:  RL_Dis_aj4$residuals
## P = 16.409, p-value = 0.1732

#Teste de homoscedasticidade dos resíduos: verificar gráficos
#Teste Goldfeld-Quandt test against heteroskedasticity (homoscedasticidade dos resíduos) n > 30
gqtest(RL_Dis_aj4)

## 
##  Goldfeld-Quandt test
## 
## data:  RL_Dis_aj4
## GQ = 0.33945, df1 = 62, df2 = 62, p-value = 1
## alternative hypothesis: variance increases from segment 1 to 2

#Teste de Breusch-Pagan: hipótese nula de que as variâncias dos erros são iguais (homoscedasticidade)
bptest(RL_Dis_aj4)

## 
##  studentized Breusch-Pagan test
## 
## data:  RL_Dis_aj4
## BP = 15.84, df = 3, p-value = 0.001223

#Teste de independencia - Teste de Durbin Watson
plot(RL_Dis_aj4$residuals)
dwtest(RL_Dis_aj4)

## 
##  Durbin-Watson test
## 
## data:  RL_Dis_aj4
## DW = 1.5059, p-value = 0.001571
## alternative hypothesis: true autocorrelation is greater than 0

Grafico de tendencia utilizando Regressao linear # borrowed from amunatagui- # http://amunategui.github.io/google-trends-walkthrough/

library(ggplot2)
ggplot(Timeline,aes(x = Date)) +
        stat_smooth(aes(y = DA_clean, group=1, colour="Dif. de Aprendizagem"), method=lm, formula = y ~ poly(x,1), level=0.95) +
        stat_smooth(aes(y = DAs_clean, group=1, colour= "Dif.s de Aprendizagem"), method=lm, formula = y ~ poly(x,2), level=0.95) +
  stat_smooth(aes(y = Dislexia_clean, group=1, colour="Dislexia"), method=lm, formula = y ~ poly(x,3), level=0.95) +
        stat_smooth(aes(y = FE_clean, group=1, colour="Fracasso Escolar"), method=lm, formula = y ~ poly(x,4), level=0.95) +
        stat_smooth(aes(y = PA_clean, group=1, colour= "Problemas de Aprendizagem"), method=lm, formula = y ~ poly(x,5), level=0.95) +
        stat_smooth(aes(y = DE_clean, group=1, colour= "Desempenho Escolar"), method=lm, formula = y ~ poly(x,6), level=0.95) +
        stat_smooth(aes(y = Discalculia_clean, group=1, colour="Discalculia"), method=lm, formula = y ~ poly(x,7), level=0.95) +
        stat_smooth(aes(y = Disgrafia_clean, group=1, colour="Disgrafia"), method=lm, formula = y ~ poly(x,8), level=0.95) +
        stat_smooth(aes(y = TA_clean, group=1, colour="Transtorno da Aprendizagem"), method=lm, formula = y ~ poly(x,9), level=0.95) +  
          geom_point (aes(y = DA_clean, colour = "Dif. de Aprendizagem"), size=1) +
        geom_point (aes(y = DAs_clean, colour = "Dif.s de Aprendizagem"), size=1) +
        geom_point (aes(y = Dislexia_clean, colour ="Dislexia"), size=1) +
        geom_point (aes(y = FE_clean, colour ="Fracasso Escolar"), size=1) +
        geom_point (aes(y = PA_clean, colour ="Problemas de Aprendizagem"), size=1) +
        geom_point (aes(y = DE_clean, colour ="Desempenho Escolar"), size=1) +
        geom_point (aes(y = Discalculia_clean, colour ="Discalculia"), size=1) +
        geom_point (aes(y = Disgrafia_clean, colour ="Disgrafia"), size=1) +
        geom_point (aes(y = TA_clean, colour = "Transtorno da Aprendizagem"), size=1) +
        scale_colour_manual("Termos de Pesquisa", breaks = c("Dif. de Aprendizagem", "Dif.s de Aprendizagem", "Dislexia", "Fracasso Escolar", "Problemas de Aprendizagem", "Desempenho Escolar", "Discalculia", "Disgrafia", "Transtorno da Aprendizagem"), values = c("blue","green", "red", "darkgreen", "darkred", "darkblue", "purple", "orange", "pink")) +
        theme_bw() +
        xlab("Periodo") +
        ylab("frequencia relativa") +
        ggtitle("Regressao linear das Palavras-chave")

## Warning: Removed 10 rows containing non-finite values (stat_smooth).

## Warning: Removed 11 rows containing non-finite values (stat_smooth).

## Warning: Removed 12 rows containing non-finite values (stat_smooth).

## Warning: Removed 10 rows containing non-finite values (stat_smooth).

## Warning: Removed 12 rows containing non-finite values (stat_smooth).

## Warning: Removed 10 rows containing non-finite values (stat_smooth).

## Warning: Removed 12 rows containing non-finite values (stat_smooth).

## Warning: Removed 4 rows containing non-finite values (stat_smooth).

## Warning: Removed 10 rows containing non-finite values (stat_smooth).

## Warning: Removed 10 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (geom_point).

Grafico de tendencia utilizando Regressao Linear com a retirada de outliers - ultimos 5 anos # borrowed from aL3xa - # http://stackoverflow.com/questions/4787332/how-to-remove-outliers-from-a-dataset

library(ggplot2)
ggplot(Timeline_5anos,aes(x=Date)) +
        stat_smooth(aes(y = DA_clean, group=1, colour="Dif. de Aprendizagem"), method=lm, formula = y ~ poly(x,1), level=0.95) +
        stat_smooth(aes(y = DAs_clean, group=1, colour= "Dif.s de Aprendizagem"), method=lm, formula = y ~ poly(x,2), level=0.95) +
  stat_smooth(aes(y = Dislexia_clean, group=1, colour="Dislexia"), method=lm, formula = y ~ poly(x,3), level=0.95) +
        stat_smooth(aes(y = FE_clean, group=1, colour="Fracasso Escolar"), method=lm, formula = y ~ poly(x,4), level=0.95) +
        stat_smooth(aes(y = PA_clean, group=1, colour= "Problemas de Aprendizagem"), method=lm, formula = y ~ poly(x,5), level=0.95) +
        stat_smooth(aes(y = DE_clean, group=1, colour= "Desempenho Escolar"), method=lm, formula = y ~ poly(x,6), level=0.95) +
        stat_smooth(aes(y = Discalculia_clean, group=1, colour="Discalculia"), method=lm, formula = y ~ poly(x,7), level=0.95) +
        stat_smooth(aes(y = Disgrafia_clean, group=1, colour="Disgrafia"), method=lm, formula = y ~ poly(x,8), level=0.95) +
        stat_smooth(aes(y = TA_clean, group=1, colour="Transtorno da Aprendizagem"), method=lm, formula = y ~ poly(x,9), level=0.95) +
        geom_point (aes(y = DA_clean, colour = "Dif. de Aprendizagem"), size=1) +
        geom_point (aes(y = DAs_clean, colour = "Dif.s de Aprendizagem"), size=1) +
        geom_point (aes(y = Dislexia_clean, colour ="Dislexia"), size=1) +
        geom_point (aes(y = FE_clean, colour ="Fracasso Escolar"), size=1) +
        geom_point (aes(y = PA_clean, colour ="Problemas de Aprendizagem"), size=1) +
        geom_point (aes(y = DE_clean, colour ="Desempenho Escolar"), size=1) +
        geom_point (aes(y = Discalculia_clean, colour ="Discalculia"), size=1) +
        geom_point (aes(y = Disgrafia_clean, colour ="Disgrafia"), size=1) +
        geom_point (aes(y = TA_clean, colour = "Transtorno da Aprendizagem"), size=1) +
        scale_colour_manual("Termos de Pesquisa", breaks = c("Dif. de Aprendizagem", "Dif.s de Aprendizagem", "Dislexia", "Fracasso Escolar", "Problemas de Aprendizagem", "Desempenho Escolar", "Discalculia", "Disgrafia", "Transtorno da Aprendizagem"), values = c("blue","green", "red", "darkgreen", "darkred", "darkblue", "purple", "orange", "pink")) +
        theme_bw() +
        xlab("Periodo") +
        ylab("frequencia relativa") +
        ggtitle("Regressao linear das Palavras-chave, sem outliers, últimos 5 anos")

## Warning: Removed 2 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

Series temporais: avaliacao de sazonalidade e tendencia # borrowed from marcos-lopes #https://gist.github.com/marco-lopes/8550667

# Ativando o módulo de séries temporais

library(tseries)

## Warning: package 'tseries' was built under R version 3.3.3

library(forecast)

## Warning: package 'forecast' was built under R version 3.3.3

# Gráfico da serie - apresente o comportamento da média dos dados referente as váriaveis
ts.plot(Timeline$DAs, main = "Dif.s de Aprendizagem")

ts.plot(Timeline$DA, main = "Dif. de Aprendizagem" )

ts.plot(Timeline$Dislexia,  main = "Dislexia")

ts.plot(Timeline$FE,  main = "Fracasso Escolar")

ts.plot(Timeline$DE,  main = "Desempenho Escolar")

ts.plot(Timeline$PA,  main = "Problemas de Aprendizagem")

ts.plot(Timeline$Discalculia,  main = "Discalculia")

ts.plot(Timeline$Disgrafia,  main = "Disgrafia")

ts.plot(Timeline$TA,  main = "Transtorno de Aprendizagem")

#analise de ETS (Erro/Tendência/Sazonalidade) - Exponential smoothing state space model;the first letter denotes the error type ("A", "M" or "Z"); the second letter denotes the trend type ("N","A","M" or "Z"); and the third letter denotes the season type ("N","A","M" or "Z"). Sendo "N"=none, "A"=additive, "M"= multiplicative and "Z"= automatically by Hyndman et al (2008).  
library(forecast)
ets(Timeline$DAs)

## ETS(M,N,N) 
## 
## Call:
##  ets(y = Timeline$DAs) 
## 
##   Smoothing parameters:
##     alpha = 0.1533 
## 
##   Initial states:
##     l = 66.5315 
## 
##   sigma:  0.2536
## 
##      AIC     AICc      BIC 
## 1455.723 1455.880 1464.892

ets(Timeline$DA)

## ETS(M,Ad,N) 
## 
## Call:
##  ets(y = Timeline$DA) 
## 
##   Smoothing parameters:
##     alpha = 0.0023 
##     beta  = 1e-04 
##     phi   = 0.975 
## 
##   Initial states:
##     l = 46.3385 
##     b = -0.82 
## 
##   sigma:  0.27
## 
##      AIC     AICc      BIC 
## 1353.667 1354.227 1372.005

ets(Timeline$Dislexia)

## ETS(M,A,N) 
## 
## Call:
##  ets(y = Timeline$Dislexia) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.0188 
## 
##   Initial states:
##     l = 27.6036 
##     b = 6.2167 
## 
##   sigma:  0.2472
## 
##      AIC     AICc      BIC 
## 1427.194 1427.592 1442.476

ets(Timeline$FE)

## ETS(M,A,N) 
## 
## Call:
##  ets(y = Timeline$FE) 
## 
##   Smoothing parameters:
##     alpha = 0.008 
##     beta  = 0.008 
## 
##   Initial states:
##     l = 66.964 
##     b = -1.6441 
## 
##   sigma:  0.3457
## 
##      AIC     AICc      BIC 
## 1298.135 1298.533 1313.416

ets(Timeline$DE)

## ETS(M,N,N) 
## 
## Call:
##  ets(y = Timeline$DE) 
## 
##   Smoothing parameters:
##     alpha = 0.1279 
## 
##   Initial states:
##     l = 14.5963 
## 
##   sigma:  0.3395
## 
##      AIC     AICc      BIC 
## 1065.517 1065.674 1074.686

ets(Timeline$PA)

## ETS(M,A,N) 
## 
## Call:
##  ets(y = Timeline$PA) 
## 
##   Smoothing parameters:
##     alpha = 0.1118 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 26.3409 
##     b = -0.0546 
## 
##   sigma:  0.358
## 
##      AIC     AICc      BIC 
## 1169.117 1169.515 1184.399

ets(Timeline$Discalculia)

## ETS(A,N,N) 
## 
## Call:
##  ets(y = Timeline$Discalculia) 
## 
##   Smoothing parameters:
##     alpha = 0.1624 
## 
##   Initial states:
##     l = 3.4083 
## 
##   sigma:  0.946
## 
##      AIC     AICc      BIC 
## 782.3836 782.5405 791.5524

ets(Timeline$Disgrafia)

## ETS(A,Ad,N) 
## 
## Call:
##  ets(y = Timeline$Disgrafia) 
## 
##   Smoothing parameters:
##     alpha = 0.0296 
##     beta  = 0.0296 
##     phi   = 0.8 
## 
##   Initial states:
##     l = 13.2798 
##     b = -2.5129 
## 
##   sigma:  1.0301
## 
##      AIC     AICc      BIC 
## 815.1467 815.7067 833.4841

ets(Timeline$TA)

## ETS(A,Ad,N) 
## 
## Call:
##  ets(y = Timeline$TA) 
## 
##   Smoothing parameters:
##     alpha = 0.046 
##     beta  = 3e-04 
##     phi   = 0.9524 
## 
##   Initial states:
##     l = 3.4225 
##     b = -0.149 
## 
##   sigma:  0.56
## 
##      AIC     AICc      BIC 
## 623.7844 624.3444 642.1218

# teste Augmented Dickey-Fuller para verificar se a serie e estacionaria (for the null that x has a unit root);  p-values interpolated from Table 4.2 of Banerjee et al. (1993). Generate a warning message. Avalia se a serie e estacionaria.
library(tseries)
adf.test(Timeline$DAs)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  Timeline$DAs
## Dickey-Fuller = -3.965, Lag order = 5, p-value = 0.01276
## alternative hypothesis: stationary

adf.test(Timeline$DA)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  Timeline$DA
## Dickey-Fuller = -3.7972, Lag order = 5, p-value = 0.02098
## alternative hypothesis: stationary

adf.test(Timeline$Dislexia)

## Warning in adf.test(Timeline$Dislexia): p-value smaller than printed p-
## value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  Timeline$Dislexia
## Dickey-Fuller = -4.5001, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

adf.test(Timeline$FE)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  Timeline$FE
## Dickey-Fuller = -3.7467, Lag order = 5, p-value = 0.02345
## alternative hypothesis: stationary

adf.test(Timeline$DE)

## Warning in adf.test(Timeline$DE): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  Timeline$DE
## Dickey-Fuller = -5.642, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

adf.test(Timeline$PA)

## Warning in adf.test(Timeline$PA): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  Timeline$PA
## Dickey-Fuller = -4.8606, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

adf.test(Timeline$Discalculia)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  Timeline$Discalculia
## Dickey-Fuller = -2.7087, Lag order = 5, p-value = 0.2809
## alternative hypothesis: stationary

adf.test(Timeline$Disgrafia)

## Warning in adf.test(Timeline$Disgrafia): p-value smaller than printed p-
## value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  Timeline$Disgrafia
## Dickey-Fuller = -4.5735, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

adf.test(Timeline$TA)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  Timeline$TA
## Dickey-Fuller = -3.0983, Lag order = 5, p-value = 0.1184
## alternative hypothesis: stationary

# Gráfico do ACF e PACF - Auto- and Cross- Covariance and -Correlation Function Estimation. Os coeficientes da ACF e da PACF diminuem rapidamente para zero quando o valor "k" (lags) aumenta e se os "lags" ultrapassam os níveis de controle.
par(mfrow=c(1,2))
acf(Timeline$DAs, main = "Dif.s de Aprendizagem")
pacf(Timeline$DAs)

par(mfrow=c(1,2))
acf(Timeline$DA, main = "Dif. de Aprendizagem")
pacf(Timeline$DA)

par(mfrow=c(1,2))
acf(Timeline$FE,  main = "Fracasso Escolar")
pacf(Timeline$FE)

par(mfrow=c(1,2))
acf(Timeline$DE,  main = "Desempenho Escolar")
pacf(Timeline$DE)

par(mfrow=c(1,2))
acf(Timeline$PA,  main = "Problemas de Aprendizagem")
pacf(Timeline$PA)

par(mfrow=c(1,2))
acf(Timeline$Dislexia,  main = "Dislexia")
pacf(Timeline$Dislexia)

par(mfrow=c(1,2))
acf(Timeline$Discalculia,  main = "Discalculia")
pacf(Timeline$Discalculia)

par(mfrow=c(1,2))
acf(Timeline$Disgrafia,  main = "Disgrafia")
pacf(Timeline$Disgrafia)

par(mfrow=c(1,2))
acf(Timeline$TA,  main = "Transtorno de Aprendizagem")
pacf(Timeline$TA)

#TESTE DE COX-STUART: distribuição binomial para avaliar tendencia na serie, idea of the evolution of values obtained
library(randtests)

## 
## Attaching package: 'randtests'

## The following object is masked from 'package:tseries':
## 
##     runs.test

cox.stuart.test(Timeline$DAs)

## 
##  Cox Stuart test
## 
## data:  Timeline$DAs
## statistic = 9, n = 78, p-value = 1.381e-12
## alternative hypothesis: non randomness

#TESTE DE WALD-WOLFOWITS: teste não-paramétrico para verificar a aleatoriedade de uma sequência de dados, EM série temporal avalia tendencia.
library(adehabitat)

## Warning: package 'adehabitat' was built under R version 3.3.3

## Loading required package: ade4

## Warning: package 'ade4' was built under R version 3.3.3

## Loading required package: tkrplot

## Loading required package: tcltk

## Loading required package: shapefiles

## Warning: package 'shapefiles' was built under R version 3.3.3

## Loading required package: foreign

## 
## Attaching package: 'shapefiles'

## The following objects are masked from 'package:foreign':
## 
##     read.dbf, write.dbf

## Loading required package: sp

## Warning: package 'sp' was built under R version 3.3.3

## 
## ************************************************
## ************************************************
## THE PACKAGE adehabitat IS NOW DEPRECATED!!!!!!!
##  It is dangerous to use it, as bugs will no longer be corrected.
## It is now recommended to use the packages adehabitatMA, adehabitatLT, adehabitatHR, and adehabitatHS.
## These 4 packages are the future of adehabitat.
##  They have a vignette explaining in detail how they can be used.
## They implement more methods than adehabitat
## They are based on the more common and more clever spatial classes implemented in sp.
## Bugs are corrected frequently.
## Really, avoid to use the classical adehabitat, unless you have a very good reason for it.
## 
## *****THIS IS THE VERY LAST WARNING*****
##  This is the last version of adehabitat submitted to CRAN (at the time of writing: 2015-03-27).
##  THE NEXT VERSION OF adehabitat WILL JUST BE A VIRTUAL PACKAGE LOADING ALL THE PACKAGES DESCRIBED ABOVE.

## 
## Attaching package: 'adehabitat'

## The following object is masked from 'package:zoo':
## 
##     is.regular

wawotest(Timeline$DAs)

##          a         ea         va         za          p 
## 109.306108  -1.000000 149.880596   9.010043   0.000000

#teste de Mann-Kendall para tendência somente se a série for serialmente independente
library(Kendall)

## Warning: package 'Kendall' was built under R version 3.3.3

MannKendall(Timeline$DAs)

## tau = -0.573, 2-sided pvalue =< 2.22e-16

Analise de series temporais, por mes ##borrowed from claudio ##https://gustibuseconomia.com/2014/03/26/momento-r-do-dia-furacoes-ou-uma-imagem-vale-mais-do-que-mil-palavras-mas-qual-imagem-e-esta-a-pergunta-de-um-milhao-de-imagens/

Timeline$mes = as.factor(c(rep(month.name, 13), "January"))
Timeline_5anos$mes = as.factor(c(rep(month.name, 5)))

#equivalencia dos dados com o mes
Timeline$ordem = as.factor(c(rep(1:12, 13), 1))
Timeline_5anos$ordem = as.factor(c(rep(1:12, 5)))

library(ggplot2)
library(lattice)
library(latticeExtra)

## Loading required package: RColorBrewer

## 
## Attaching package: 'latticeExtra'

## The following object is masked from 'package:ggplot2':
## 
##     layer

#If the notches of two plots do not overlap this is 'strong evidence' that the two #medians differ (Chambers et al, 1983, p. 62).

# os graficos por mes desde 2004
#Dislexia
op <- par(mfrow = c(1,2))
boxplot(Dislexia~ordem,data=Timeline, notch = T)

## Warning in bxp(structure(list(stats = structure(c(13, 15, 18.5, 23, 35, :
## some notches went outside hinges ('box'): maybe set notch=FALSE

monthplot(Timeline$Dislexia, col = "purple",ylim=c(min(Timeline$Dislexia),max(Timeline$Dislexia)),main="Dislexia",xlab="meses",ylab="media de frequencia")

par(op)

#Dificuldades de Aprendizagem
op <- par(mfrow = c(1,2))
boxplot(DAs~ordem,data=Timeline)
monthplot(Timeline$DAs, col="darkblue",ylim=c(min(Timeline$DAs),max(Timeline$DAs)),main="Dif. de  Aprendizagem",xlab="meses",ylab="media de frequencia")

par(op)

#Organizacao por mes nos utimos cinco anos

op <- par(mfrow = c(2,2))
monthplot(Timeline_5anos$Dislexia, xlab = "Dislexia", ylab = "", col="purple", cex.axis = 0.8)
monthplot(Timeline_5anos$DAs, xlab = "Dif.s de Aprendizagem", ylab = "", col="darkblue", cex.axis = 0.8)
monthplot(Timeline_5anos$Dislexia, ylab = "", type = "h",  col="purple", cex.axis = 0.8)
monthplot(Timeline_5anos$DAs, ylab = "", type = "h",  col="darkblue", cex.axis = 0.8)

par(op)

op <- par(mfrow = c(2,2))
boxplot(Timeline_5anos$Dislexia~Timeline_5anos$ordem, main="Dislexia", xlab="mes", ylab="frequencia", outline = F, col="purple")
boxplot(Timeline_5anos$DAs~Timeline_5anos$ordem, main="Dif.s de Aprendizagem", xlab="mes", ylab="frequencia", outline = F, col="darkblue")
par(op)

Analise de variancias, por mes ##borrowed from Salvatore S. Mangiafico ##http://rcompanion.org/rcompanion/d_06.html

#install these packages if they are not already installed: ("dplyr"); ("FSA"); ("DescTools"); ("rcompanion"), ("multcompView")

##Dificuldade de Aprendizagem
### Specify the order of factor levels

library(dplyr)

## Warning: package 'dplyr' was built under R version 3.3.3

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:adehabitat':
## 
##     id

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Timeline = mutate(Timeline,
       mes = factor(mes, levels=unique(mes)))
# Medians and descriptive statistics
library(FSA)

## Warning: package 'FSA' was built under R version 3.3.3

## ## FSA v0.8.13. See citation('FSA') if used in publication.
## ## Run fishR() for related website and fishR('IFAR') for related book.

Summarize(DAs ~ mes,data = Timeline)

##          mes  n     mean        sd min    Q1 median Q3 max
## 1    January 14 20.50000 12.965932   9 12.25     16 19  50
## 2   February 13 25.30769 12.795732  14 19.00     22 25  61
## 3      March 13 41.53846 23.827909  23 25.00     30 49 100
## 4      April 13 37.69231 13.616073  23 29.00     33 42  69
## 5        May 13 37.00000 11.350477  23 28.00     38 40  62
## 6       June 13 34.61538 11.087068  22 28.00     35 38  63
## 7       July 13 29.07692 13.726262  15 24.00     25 26  63
## 8     August 13 34.15385 14.461709  18 26.00     30 36  61
## 9  September 13 32.76923  7.949359  22 29.00     31 36  49
## 10   October 13 34.38462 17.342330  19 23.00     30 41  85
## 11  November 13 36.69231 15.200793  23 28.00     31 40  76
## 12  December 13 21.46154  7.556624  15 16.00     20 25  43

# Histograms for each group
library(lattice)
histogram(~ DAs | ordem, data=Timeline, layout=c(12,1))

#Analise de variancia para dados nao-parametricos (p-value menos de 0,05, deve-se rejeitar a hipotese nula).Kruskal-Wallis test be interpreted as a difference in medians. applied to one-way data with more than two groups
kruskal.test(DAs ~ mes, data = Timeline)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  DAs by mes
## Kruskal-Wallis chi-squared = 44.058, df = 11, p-value = 7.098e-06

#TESTES POS HOC
#Nemenyi test for multiple comparisons, not appropriate for groups with unequal numbers of observations. Utilizando qui quadrado.
library(DescTools)

## Warning: package 'DescTools' was built under R version 3.3.3

## 
## Attaching package: 'DescTools'

## The following object is masked from 'package:forecast':
## 
##     BoxCox

## The following object is masked from 'package:car':
## 
##     Recode

NemenyiTest(x = Timeline$DAs, g = Timeline$mes, dist="chisq")

## 
##  Nemenyi's test of multiple comparisons for independent samples (chisq)  
## 
##                    mean.rank.diff   pval    
## February-January       13.7472527 1.0000    
## March-January          61.4010989 0.3431    
## April-January          66.5549451 0.2103    
## May-January            67.2472527 0.1953    
## June-January           59.3626374 0.4043    
## July-January           31.5934066 0.9870    
## August-January         51.4780220 0.6560    
## September-January      57.5934066 0.4600    
## October-January        49.9395604 0.7023    
## November-January       61.0164835 0.3543    
## December-January        2.8241758 1.0000    
## March-February         47.6538462 0.7883    
## April-February         52.8076923 0.6443    
## May-February           53.5000000 0.6229    
## June-February          45.6153846 0.8354    
## July-February          17.8461538 0.9999    
## August-February        37.7307692 0.9541    
## September-February     43.8461538 0.8709    
## October-February       36.1923077 0.9665    
## November-February      47.2692308 0.7977    
## December-February     -10.9230769 1.0000    
## April-March             5.1538462 1.0000    
## May-March               5.8461538 1.0000    
## June-March             -2.0384615 1.0000    
## July-March            -29.8076923 0.9932    
## August-March           -9.9230769 1.0000    
## September-March        -3.8076923 1.0000    
## October-March         -11.4615385 1.0000    
## November-March         -0.3846154 1.0000    
## December-March        -58.5769231 0.4623    
## May-April               0.6923077 1.0000    
## June-April             -7.1923077 1.0000    
## July-April            -34.9615385 0.9744    
## August-April          -15.0769231 1.0000    
## September-April        -8.9615385 1.0000    
## October-April         -16.6153846 1.0000    
## November-April         -5.5384615 1.0000    
## December-April        -63.7307692 0.3096    
## June-May               -7.8846154 1.0000    
## July-May              -35.6538462 0.9701    
## August-May            -15.7692308 1.0000    
## September-May          -9.6538462 1.0000    
## October-May           -17.3076923 1.0000    
## November-May           -6.2307692 1.0000    
## December-May          -64.4230769 0.2911    
## July-June             -27.7692308 0.9964    
## August-June            -7.8846154 1.0000    
## September-June         -1.7692308 1.0000    
## October-June           -9.4230769 1.0000    
## November-June           1.6538462 1.0000    
## December-June         -56.5384615 0.5269    
## August-July            19.8846154 0.9999    
## September-July         26.0000000 0.9980    
## October-July           18.3461538 0.9999    
## November-July          29.4230769 0.9939    
## December-July         -28.7692308 0.9950    
## September-August        6.1153846 1.0000    
## October-August         -1.5384615 1.0000    
## November-August         9.5384615 1.0000    
## December-August       -48.6538462 0.7629    
## October-September      -7.6538462 1.0000    
## November-September      3.4230769 1.0000    
## December-September    -54.7692308 0.5832    
## November-October       11.0769231 1.0000    
## December-October      -47.1153846 0.8014    
## December-November     -58.1923077 0.4744    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##Dunn test for multiple comparisons; If the Kruskal-Wallis test is significant, a post-hoc analysis can be performed to determine which levels of the independent variable differ from each other level. Dunn's Kruskal-Wallis Multiple Comparisons
library(FSA)
dunnTest(DAs ~ mes, data=Timeline)

## Dunn (1964) Kruskal-Wallis multiple comparison

##   p-values adjusted with the Holm method.

##              Comparison           Z      P.unadj       P.adj
## 1        April - August  0.84597902 0.3975644470 1.000000000
## 2      April - December  3.57598786 0.0003489078 0.021981194
## 3     August - December  2.73000883 0.0063332628 0.316663142
## 4      April - February  2.96308469 0.0030457276 0.161423563
## 5     August - February  2.11710567 0.0342508810 1.000000000
## 6   December - February -0.61290317 0.5399403677 1.000000000
## 7       April - January  3.80298259 0.0001429644 0.009292686
## 8      August - January  2.94147973 0.0032664822 0.169857076
## 9    December - January  0.16137481 0.8717982066 1.000000000
## 10   February - January  0.78552485 0.4321459091 1.000000000
## 11         April - July  1.96172176 0.0497948866 1.000000000
## 12        August - July  1.11574274 0.2645322697 1.000000000
## 13      December - July -1.61426609 0.1064697213 1.000000000
## 14      February - July -1.00136292 0.3166513818 1.000000000
## 15       January - July -1.80526293 0.0710335470 1.000000000
## 16         April - June  0.40356652 0.6865315071 1.000000000
## 17        August - June -0.44241250 0.6581907361 1.000000000
## 18      December - June -3.17242133 0.0015117351 0.084657165
## 19      February - June -2.55951816 0.0104817378 0.482159939
## 20       January - June -3.39201056 0.0006938177 0.041629059
## 21          July - June -1.55815524 0.1191964526 1.000000000
## 22        April - March  0.28918671 0.7724385041 1.000000000
## 23       August - March -0.55679232 0.5776693347 1.000000000
## 24     December - March -3.28680115 0.0010133236 0.058772769
## 25     February - March -2.67389798 0.0074975256 0.367378754
## 26      January - March -3.50848927 0.0004506594 0.027940884
## 27         July - March -1.67253506 0.0944188610 1.000000000
## 28         June - March -0.11437982 0.9089367129 1.000000000
## 29          April - May -0.03884598 0.9690131893 1.000000000
## 30         August - May -0.88482500 0.3762510249 1.000000000
## 31       December - May -3.61483383 0.0003005403 0.019234581
## 32       February - May -3.00193066 0.0026827327 0.144867567
## 33        January - May -3.84254140 0.0001217669 0.008036613
## 34           July - May -2.00056774 0.0454389932 1.000000000
## 35           June - May -0.44241250 0.6581907361 1.000000000
## 36          March - May -0.32803268 0.7428869519 1.000000000
## 37     April - November  0.31076780 0.7559771472 1.000000000
## 38    August - November -0.53521122 0.5925038099 1.000000000
## 39  December - November -3.26522005 0.0010937907 0.062346068
## 40  February - November -2.65231688 0.0079941477 0.383719089
## 41   January - November -3.48651215 0.0004893631 0.029851152
## 42      July - November -1.65095396 0.0987479765 1.000000000
## 43      June - November -0.09279872 0.9260634685 1.000000000
## 44     March - November  0.02158110 0.9827821120 0.982782112
## 45       May - November  0.34961378 0.7266285682 1.000000000
## 46      April - October  0.93230341 0.3511797448 1.000000000
## 47     August - October  0.08632439 0.9312085504 1.000000000
## 48   December - October -2.64368444 0.0082009071 0.385442634
## 49   February - October -2.03078128 0.0422771873 1.000000000
## 50    January - October -2.85357127 0.0043230830 0.220477231
## 51       July - October -1.02941835 0.3032831294 1.000000000
## 52       June - October  0.52873689 0.5969879835 1.000000000
## 53      March - October  0.64311671 0.5201483754 1.000000000
## 54        May - October  0.97114939 0.3314738931 1.000000000
## 55   November - October  0.62153561 0.5342472733 1.000000000
## 56    April - September  0.50283957 0.6150770693 1.000000000
## 57   August - September -0.34313945 0.7314935537 1.000000000
## 58 December - September -3.07314828 0.0021181321 0.116497264
## 59 February - September -2.46024512 0.0138842154 0.624789692
## 60  January - September -3.29091584 0.0009986179 0.058918453
## 61     July - September -1.45888219 0.1445975355 1.000000000
## 62     June - September  0.09927305 0.9209214769 1.000000000
## 63    March - September  0.21365287 0.8308177706 1.000000000
## 64      May - September  0.54168555 0.5880351477 1.000000000
## 65 November - September  0.19207177 0.8476859892 1.000000000
## 66  October - September -0.42946384 0.6675857033 1.000000000

library(stats)
#Teste de Bartlett com a hipótese nula de que as variâncias dos grupos são
#iguais.
bartlett.test(DAs_clean~ordem, Timeline)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  DAs_clean by ordem
## Bartlett's K-squared = 9.7493, df = 11, p-value = 0.5531

library(car)
#Teste da homogeneidade das variancias (a homogeneidade sera atingida com valores de p acima de 0,05)
leveneTest(DAs_clean~ordem, Timeline)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group  11  0.3731 0.9644
##       134

##Dislexia
### Specify the order of factor levels

library(dplyr)

Timeline = mutate(Timeline,
       mes = factor(mes, levels=unique(mes)))
# Medians and descriptive statistics
library(FSA)
Summarize(Dislexia ~ mes,data = Timeline)

##          mes  n     mean        sd min    Q1 median    Q3 max
## 1    January 14 25.35714 22.321453  13 15.25   18.5 22.75 100
## 2   February 13 28.23077 16.798580  17 20.00   21.0 26.00  77
## 3      March 13 32.07692 11.700866  19 25.00   27.0 35.00  58
## 4      April 13 33.23077  9.790760  20 25.00   33.0 40.00  52
## 5        May 13 37.69231 18.345683  21 24.00   31.0 48.00  82
## 6       June 13 33.46154 15.425171  17 23.00   28.0 36.00  72
## 7       July 13 25.84615 11.260892  16 20.00   22.0 25.00  57
## 8     August 13 28.23077  9.558189  18 23.00   25.0 31.00  49
## 9  September 13 32.92308 12.277768  18 26.00   31.0 38.00  65
## 10   October 13 34.00000 19.113695  18 22.00   25.0 39.00  84
## 11  November 13 31.84615 16.456236  19 24.00   26.0 37.00  81
## 12  December 13 24.53846 18.919194  14 15.00   18.0 26.00  85

# Histograms for each group
library(lattice)
histogram(~ Dislexia | ordem, data=Timeline, layout=c(12,1))

#Analise de variancia para dados nao-parametricos (p-value menos de 0,05, deve-se rejeitar a hipotese nula).Kruskal-Wallis test be interpreted as a difference in medians. applied to one-way data with more than two groups
kruskal.test(Dislexia ~ mes, data = Timeline)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Dislexia by mes
## Kruskal-Wallis chi-squared = 31.264, df = 11, p-value = 0.001

#TESTES POS HOC
#Nemenyi test for multiple comparisons, not appropriate for groups with unequal numbers of observations. Utilizando qui quadrado.
library(DescTools)
NemenyiTest(x = Timeline$Dislexia, g = Timeline$mes, dist="chisq")

## 
##  Nemenyi's test of multiple comparisons for independent samples (chisq)  
## 
##                    mean.rank.diff   pval    
## February-January       20.1126374 0.9998    
## March-January          50.4587912 0.6871    
## April-January          57.7664835 0.4547    
## May-January            58.5357143 0.4303    
## June-January           48.5357143 0.7425    
## July-January           18.1895604 0.9999    
## August-January         35.5741758 0.9663    
## September-January      53.0741758 0.6061    
## October-January        43.6895604 0.8587    
## November-January       42.6895604 0.8778    
## December-January        0.5357143 1.0000    
## March-February         30.3461538 0.9921    
## April-February         37.6538462 0.9548    
## May-February           38.4230769 0.9475    
## June-February          28.4230769 0.9955    
## July-February          -1.9230769 1.0000    
## August-February        15.4615385 1.0000    
## September-February     32.9615385 0.9841    
## October-February       23.5769231 0.9992    
## November-February      22.5769231 0.9995    
## December-February     -19.5769231 0.9999    
## April-March             7.3076923 1.0000    
## May-March               8.0769231 1.0000    
## June-March             -1.9230769 1.0000    
## July-March            -32.2692308 0.9867    
## August-March          -14.8846154 1.0000    
## September-March         2.6153846 1.0000    
## October-March          -6.7692308 1.0000    
## November-March         -7.7692308 1.0000    
## December-March        -49.9230769 0.7289    
## May-April               0.7692308 1.0000    
## June-April             -9.2307692 1.0000    
## July-April            -39.5769231 0.9351    
## August-April          -22.1923077 0.9996    
## September-April        -4.6923077 1.0000    
## October-April         -14.0769231 1.0000    
## November-April        -15.0769231 1.0000    
## December-April        -57.2307692 0.5051    
## June-May              -10.0000000 1.0000    
## July-May              -40.3461538 0.9257    
## August-May            -22.9615385 0.9994    
## September-May          -5.4615385 1.0000    
## October-May           -14.8461538 1.0000    
## November-May          -15.8461538 1.0000    
## December-May          -58.0000000 0.4807    
## July-June             -30.3461538 0.9921    
## August-June           -12.9615385 1.0000    
## September-June          4.5384615 1.0000    
## October-June           -4.8461538 1.0000    
## November-June          -5.8461538 1.0000    
## December-June         -48.0000000 0.7799    
## August-July            17.3846154 1.0000    
## September-July         34.8846154 0.9749    
## October-July           25.5000000 0.9983    
## November-July          24.5000000 0.9989    
## December-July         -17.6538462 1.0000    
## September-August       17.5000000 1.0000    
## October-August          8.1153846 1.0000    
## November-August         7.1153846 1.0000    
## December-August       -35.0384615 0.9740    
## October-September      -9.3846154 1.0000    
## November-September    -10.3846154 1.0000    
## December-September    -52.5384615 0.6527    
## November-October       -1.0000000 1.0000    
## December-October      -43.1538462 0.8834    
## December-November     -42.1538462 0.8999    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##Dunn test for multiple comparisons; If the Kruskal-Wallis test is significant, a post-hoc analysis can be performed to determine which levels of the independent variable differ from each other level. Dunn's Kruskal-Wallis Multiple Comparisons
library(FSA)
dunnTest(Dislexia ~ mes, data=Timeline)

## Dunn (1964) Kruskal-Wallis multiple comparison

##   p-values adjusted with the Holm method.

##              Comparison           Z      P.unadj      P.adj
## 1        April - August  1.24537821 0.2129927596 1.00000000
## 2      April - December  3.21165126 0.0013197448 0.08314392
## 3     August - December  1.96627305 0.0492670785 1.00000000
## 4      April - February  2.11304206 0.0345971684 1.00000000
## 5     August - February  0.86766385 0.3855783828 1.00000000
## 6   December - February -1.09860920 0.2719385633 1.00000000
## 7       April - January  3.30120019 0.0009627216 0.06257691
## 8      August - January  2.03296910 0.0420556448 1.00000000
## 9    December - January  0.03061464 0.9755768665 0.97557687
## 10   February - January  1.14938349 0.2503978856 1.00000000
## 11         April - July  2.22096044 0.0263536406 1.00000000
## 12        August - July  0.97558224 0.3292715310 1.00000000
## 13      December - July -0.99069081 0.3218365796 1.00000000
## 14      February - July  0.10791839 0.9140604300 1.00000000
## 15       January - July -1.03948478 0.2985793329 1.00000000
## 16         April - June  0.51800827 0.6044525014 1.00000000
## 17        August - June -0.72736994 0.4669993629 1.00000000
## 18      December - June -2.69364299 0.0070675823 0.40285219
## 19      February - June -1.59503379 0.1107046801 1.00000000
## 20       January - June -2.77368639 0.0055425079 0.32146546
## 21          July - June -1.70295218 0.0885770182 1.00000000
## 22        April - March  0.41008988 0.6817400175 1.00000000
## 23       August - March -0.83528833 0.4035553862 1.00000000
## 24     December - March -2.80156138 0.0050855966 0.30005020
## 25     February - March -1.70295218 0.0885770182 1.00000000
## 26      January - March -2.88358510 0.0039317640 0.23590584
## 27         July - March -1.81087057 0.0701608897 1.00000000
## 28         June - March -0.10791839 0.9140604300 1.00000000
## 29          April - May -0.04316736 0.9655681273 1.00000000
## 30         August - May -1.28854556 0.1975561186 1.00000000
## 31       December - May -3.25481861 0.0011346481 0.07261748
## 32       February - May -2.15620941 0.0310673170 1.00000000
## 33        January - May -3.34515968 0.0008223521 0.05427524
## 34           July - May -2.26412780 0.0235662556 1.00000000
## 35           June - May -0.56117562 0.5746778195 1.00000000
## 36          March - May -0.45325723 0.6503635241 1.00000000
## 37     April - November  0.84608017 0.3975080226 1.00000000
## 38    August - November -0.39929804 0.6896736118 1.00000000
## 39  December - November -2.36557109 0.0180022869 0.95412121
## 40  February - November -1.26696189 0.2051689184 1.00000000
## 41   January - November -2.43959432 0.0147037637 0.80870700
## 42      July - November -1.37488028 0.1691685653 1.00000000
## 43      June - November  0.32807190 0.7428572979 1.00000000
## 44     March - November  0.43599029 0.6628437736 1.00000000
## 45       May - November  0.88924753 0.3738700664 1.00000000
## 46      April - October  0.78996261 0.4295496063 1.00000000
## 47     August - October -0.45541560 0.6488102733 1.00000000
## 48   December - October -2.42168865 0.0154485803 0.83422334
## 49   February - October -1.32307945 0.1858089583 1.00000000
## 50    January - October -2.49674165 0.0125340237 0.70190533
## 51       July - October -1.43099784 0.1524308349 1.00000000
## 52       June - October  0.27195434 0.7856571278 1.00000000
## 53      March - October  0.37987273 0.7040398916 1.00000000
## 54        May - October  0.83312996 0.4047714368 1.00000000
## 55   November - October -0.05611756 0.9552481533 1.00000000
## 56    April - September  0.26332087 0.7923032759 1.00000000
## 57   August - September -0.98205734 0.3260716006 1.00000000
## 58 December - September -2.94833039 0.0031949541 0.19489220
## 59 February - September -1.84972119 0.0643537452 1.00000000
## 60  January - September -3.03304734 0.0024209761 0.15010052
## 61     July - September -1.95763958 0.0502723201 1.00000000
## 62     June - September -0.25468740 0.7989645503 1.00000000
## 63    March - September -0.14676901 0.8833143470 1.00000000
## 64      May - September  0.30648822 0.7592329441 1.00000000
## 65 November - September -0.58275930 0.5600553469 1.00000000
## 66  October - September -0.52664174 0.5984424018 1.00000000

#carregar pacote para rodar a funcao do teste para homogeneidade das variancias
#Caso nao tenho instalado: install.packages("car")
library(stats)
#Teste de Bartlett com a hipótese nula de que as variâncias dos grupos são
#iguais.
bartlett.test(Dislexia~ordem, Timeline)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  Dislexia by ordem
## Bartlett's K-squared = 19.84, df = 11, p-value = 0.04759

library(car)
#Teste da homogeneidade das variancias (a homogeneidade sera atingida com valores de p acima de 0,05)
leveneTest(Dislexia~ordem, Timeline)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group  11   0.281 0.9885
##       145

Descricao por estado

GeoMap <-read.csv("C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados/GeoMap.csv", sep=",", dec=",",fill=TRUE, header=TRUE)
str(GeoMap)

## 'data.frame':    27 obs. of  21 variables:
##  $ X            : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Região       : Factor w/ 27 levels "Acre","Alagoas",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ DA_Shop      : int  NA 0 0 10 3 6 7 0 2 0 ...
##  $ Dislexia_Shop: int  NA 13 100 10 2 6 7 0 4 11 ...
##  $ Cartilha_Shop: int  NA 20 99 20 14 12 29 6 5 17 ...
##  $ Cartilha     : int  61 46 91 50 46 38 50 37 43 52 ...
##  $ DA           : int  NA 77 NA 58 70 64 40 49 49 100 ...
##  $ Dislexia     : int  59 47 100 54 49 51 46 48 49 55 ...
##  $ Pais         : int  78 81 98 77 81 77 74 74 70 82 ...
##  $ Pais1        : int  73 74 90 70 76 71 69 68 65 74 ...
##  $ Fam1         : int  26 29 44 29 27 27 27 23 22 30 ...
##  $ EF1          : int  24 28 32 18 29 23 17 27 19 30 ...
##  $ Crianca1     : int  83 82 100 72 80 80 85 74 73 81 ...
##  $ TA1          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ DA1          : int  0 23 0 15 21 20 11 14 14 28 ...
##  $ Dislexia1    : int  61 43 100 55 50 49 44 47 49 54 ...
##  $ Disgrafia1   : int  0 0 0 0 4 4 4 0 4 0 ...
##  $ Discalculia1 : int  0 4 0 4 4 4 3 4 3 6 ...
##  $ Cartilha_sug : int  80 52 95 58 52 44 57 43 49 59 ...
##  $ DA_sug       : int  NA 83 NA 53 75 71 41 51 51 100 ...
##  $ Pais_sug     : int  78 79 96 75 81 76 73 73 69 80 ...

tail(GeoMap)

##     X         Região DA_Shop Dislexia_Shop Cartilha_Shop Cartilha DA
## 22 22       Rond<U+0093>nia       0             0            54       52 NA
## 23 23        Roraima      NA            NA            NA      100 NA
## 24 24 Santa Catarina       3             0            11       32 43
## 25 25      SÆo Paulo       2             5            20       32 31
## 26 26        Sergipe       0            35            18       46 NA
## 27 27      Tocantins       0            30            30       65 NA
##    Dislexia Pais Pais1 Fam1 EF1 Crianca1 TA1 DA1 Dislexia1 Disgrafia1
## 22       55   77    71   21  24       77   0   0        55          0
## 23       77  100    94   34  33       95   0   0        79          0
## 24       48   77    71   19  27       63   0  13        48          3
## 25       46   71    64   18  19       65   1   9        45          2
## 26       46   78    72   27  22       77   0   0        42          0
## 27       41   81    72   24  34       81   0   0        41          0
##    Discalculia1 Cartilha_sug DA_sug Pais_sug
## 22            0           63     NA       76
## 23            0          100     NA      100
## 24            3           36     48       75
## 25            3           37     33       69
## 26            0           54     NA       77
## 27            0           75     NA       77

summary(GeoMap)

##        X             Região      DA_Shop      Dislexia_Shop   
##  Min.   : 1.0   Acre    : 1   Min.   : 0.00   Min.   :  0.00  
##  1st Qu.: 7.5   Alagoas : 1   1st Qu.: 0.00   1st Qu.:  4.00  
##  Median :14.0   Amap    : 1   Median : 2.00   Median :  7.00  
##  Mean   :14.0   Amazonas: 1   Mean   : 2.56   Mean   : 12.32  
##  3rd Qu.:20.5   Bahia   : 1   3rd Qu.: 3.00   3rd Qu.: 11.00  
##  Max.   :27.0   Cear    : 1   Max.   :10.00   Max.   :100.00  
##                 (Other) :21   NA's   :2       NA's   :2       
##  Cartilha_Shop      Cartilha            DA            Dislexia     
##  Min.   : 5.00   Min.   : 29.00   Min.   : 28.00   Min.   : 41.00  
##  1st Qu.: 9.00   1st Qu.: 37.50   1st Qu.: 42.50   1st Qu.: 46.00  
##  Median :17.00   Median : 46.00   Median : 55.00   Median : 49.00  
##  Mean   :20.84   Mean   : 48.63   Mean   : 56.53   Mean   : 52.11  
##  3rd Qu.:20.00   3rd Qu.: 52.50   3rd Qu.: 68.00   3rd Qu.: 54.00  
##  Max.   :99.00   Max.   :100.00   Max.   :100.00   Max.   :100.00  
##  NA's   :2                        NA's   :8                        
##       Pais            Pais1            Fam1            EF1       
##  Min.   : 67.00   Min.   :62.00   Min.   :18.00   Min.   :17.00  
##  1st Qu.: 72.00   1st Qu.:67.00   1st Qu.:21.50   1st Qu.:20.50  
##  Median : 75.00   Median :69.00   Median :26.00   Median :25.00  
##  Mean   : 76.89   Mean   :70.74   Mean   :25.22   Mean   :25.04  
##  3rd Qu.: 78.00   3rd Qu.:72.00   3rd Qu.:27.00   3rd Qu.:29.00  
##  Max.   :100.00   Max.   :94.00   Max.   :44.00   Max.   :34.00  
##                                                                  
##     Crianca1           TA1               DA1          Dislexia1     
##  Min.   : 60.00   Min.   :0.00000   Min.   : 0.00   Min.   : 41.00  
##  1st Qu.: 71.50   1st Qu.:0.00000   1st Qu.: 0.00   1st Qu.: 46.00  
##  Median : 77.00   Median :0.00000   Median :13.00   Median : 49.00  
##  Mean   : 76.11   Mean   :0.03704   Mean   :11.48   Mean   : 52.07  
##  3rd Qu.: 80.50   3rd Qu.:0.00000   3rd Qu.:17.50   3rd Qu.: 54.50  
##  Max.   :100.00   Max.   :1.00000   Max.   :28.00   Max.   :100.00  
##                                                                     
##    Disgrafia1     Discalculia1    Cartilha_sug        DA_sug      
##  Min.   :0.000   Min.   :0.000   Min.   : 32.00   Min.   : 30.00  
##  1st Qu.:0.000   1st Qu.:1.500   1st Qu.: 43.50   1st Qu.: 46.00  
##  Median :0.000   Median :4.000   Median : 52.00   Median : 53.00  
##  Mean   :1.593   Mean   :2.926   Mean   : 55.33   Mean   : 59.05  
##  3rd Qu.:3.500   3rd Qu.:4.000   3rd Qu.: 63.00   3rd Qu.: 73.00  
##  Max.   :5.000   Max.   :6.000   Max.   :100.00   Max.   :100.00  
##                                                   NA's   :8       
##     Pais_sug     
##  Min.   : 67.00  
##  1st Qu.: 71.50  
##  Median : 74.00  
##  Mean   : 75.52  
##  3rd Qu.: 77.00  
##  Max.   :100.00  
##

Relacao Palavras x Regiao

GRafico de frequencia Comparativa - 2004 a 2017

plot(GeoMap$Dislexia1~GeoMap$Região , type = "l", lty=1, lwd=2, col="red", ylim=c(0,100), xlab="região", ylab="Frequencia relativa", main = "Frequencia relativa por regiao geografica, 2004 a 2017")
lines(GeoMap$Dislexia1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "red")
lines(GeoMap$DA1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "darkblue")
lines(GeoMap$Discalculia1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "green")
lines(GeoMap$Disgrafia1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "orange")
lines(GeoMap$TA1~GeoMap$Região, type = "l", lty=1, lwd=2,col= "purple")

legend("topright", c("Dislexia", "Dif. de Aprendizagem", "Discalculia",  "Disgrafia",  "Transt. da Aprendizagem"), bty="n", lty=c(1),
       col=c("red","darkblue",  "green", "orange", "purple"), box.col="white",lwd=c(2,2), cex=0.9)

Analise de Palavras-Chave

Karina Marques

Maio de 2017