setwd(“C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados”)
Timeline <-read.csv("C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados/timeline2.csv", sep=",", dec=",",fill=TRUE, header=TRUE)
str(Timeline)
## 'data.frame': 157 obs. of 14 variables:
## $ X : Factor w/ 157 levels "2004-01","2004-02",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Cartilha : int 38 46 58 47 38 44 34 49 42 38 ...
## $ DA : int 49 49 19 54 51 59 42 44 76 58 ...
## $ DAs : int 37 61 100 69 62 63 63 61 46 85 ...
## $ FE : int 31 43 75 79 89 90 55 51 69 64 ...
## $ PA : int 12 18 25 49 39 35 25 17 36 30 ...
## $ DE : int 12 12 25 20 8 16 8 7 20 10 ...
## $ TA : int 3 3 3 3 2 2 2 3 3 3 ...
## $ DA1 : int 10 16 26 18 16 16 16 16 12 22 ...
## $ Dislexia : int 35 26 33 40 82 72 57 49 65 61 ...
## $ Disgrafia : int 13 10 10 5 6 4 4 5 10 3 ...
## $ Discalculia: int 3 3 3 4 5 3 2 2 3 4 ...
## $ Tempo : Factor w/ 157 levels "2004-01-30","2004-03-01",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ tempo : Factor w/ 157 levels "2004-01-29 22:00:00",..: 1 2 3 4 5 6 7 8 9 10 ...
tail(Timeline)
## X Cartilha DA DAs FE PA DE TA DA1 Dislexia Disgrafia Discalculia
## 152 2016-08 24 17 18 4 4 5 1 5 20 1 2
## 153 2016-09 25 19 22 8 6 5 1 6 31 2 2
## 154 2016-10 24 17 19 8 5 5 1 5 21 2 2
## 155 2016-11 23 22 23 8 6 7 1 6 20 2 3
## 156 2016-12 14 18 16 3 5 5 1 4 17 1 2
## 157 2017-01 15 9 11 3 3 2 0 3 14 1 1
## Tempo tempo
## 152 2016-08-30 2016-08-29 21:00:00
## 153 2016-09-30 2016-09-29 21:00:00
## 154 2016-10-30 2016-10-29 22:00:00
## 155 2016-11-30 2016-11-29 22:00:00
## 156 2016-12-30 2016-12-29 22:00:00
## 157 2017-01-30 2017-01-29 22:00:00
Timeline_5anos<-read.csv("C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados/Timeline_5anos.csv", sep=",", dec=",",fill=TRUE, header=TRUE)
summary(Timeline)
## X Cartilha DA DAs
## 2004-01: 1 Min. : 14.00 Min. : 7.00 Min. : 9.00
## 2004-02: 1 1st Qu.: 24.00 1st Qu.:16.00 1st Qu.: 23.00
## 2004-03: 1 Median : 28.00 Median :19.00 Median : 29.00
## 2004-04: 1 Mean : 29.56 Mean :22.77 Mean : 32.03
## 2004-05: 1 3rd Qu.: 33.00 3rd Qu.:27.00 3rd Qu.: 38.00
## 2004-06: 1 Max. :100.00 Max. :76.00 Max. :100.00
## (Other):151
## FE PA DE TA
## Min. : 3.0 Min. : 2.00 Min. : 2.000 Min. :0.0000
## 1st Qu.: 7.0 1st Qu.: 5.00 1st Qu.: 5.000 1st Qu.:0.0000
## Median :12.0 Median : 7.00 Median : 6.000 Median :1.0000
## Mean :20.2 Mean :10.11 Mean : 6.739 Mean :0.9045
## 3rd Qu.:27.0 3rd Qu.:12.00 3rd Qu.: 8.000 3rd Qu.:1.0000
## Max. :90.0 Max. :49.00 Max. :25.000 Max. :4.0000
##
## DA1 Dislexia Disgrafia Discalculia
## Min. : 2.000 Min. : 13.00 Min. : 0.000 Min. :0.000
## 1st Qu.: 6.000 1st Qu.: 21.00 1st Qu.: 1.000 1st Qu.:1.000
## Median : 7.000 Median : 25.00 Median : 2.000 Median :2.000
## Mean : 8.344 Mean : 30.59 Mean : 2.363 Mean :2.089
## 3rd Qu.:10.000 3rd Qu.: 35.00 3rd Qu.: 3.000 3rd Qu.:2.000
## Max. :26.000 Max. :100.00 Max. :13.000 Max. :6.000
##
## Tempo tempo
## 2004-01-30: 1 2004-01-29 22:00:00: 1
## 2004-03-01: 1 2004-02-29 21:00:00: 1
## 2004-03-30: 1 2004-03-29 21:00:00: 1
## 2004-04-30: 1 2004-04-29 21:00:00: 1
## 2004-05-30: 1 2004-05-29 21:00:00: 1
## 2004-06-30: 1 2004-06-29 21:00:00: 1
## (Other) :151 (Other) :151
Select Study period (months 97:156 - 5 year exactly)
Timeline_5anos <- Timeline[97:156, ]
head(Timeline_5anos)
## X Cartilha DA DAs FE PA DE TA DA1 Dislexia Disgrafia Discalculia
## 97 2012-01 15 10 19 5 3 3 0 5 22 1 1
## 98 2012-02 20 15 19 6 7 4 0 5 20 1 1
## 99 2012-03 26 18 30 9 7 5 0 8 27 2 2
## 100 2012-04 25 25 29 11 8 5 1 8 27 2 1
## 101 2012-05 29 24 35 12 10 7 1 9 24 2 2
## 102 2012-06 25 18 31 12 7 6 1 8 23 2 2
## Tempo tempo
## 97 2012-01-30 2012-01-29 22:00:00
## 98 2012-03-01 2012-02-29 21:00:00
## 99 2012-03-30 2012-03-29 21:00:00
## 100 2012-04-30 2012-04-29 21:00:00
## 101 2012-05-30 2012-05-29 21:00:00
## 102 2012-06-30 2012-06-29 21:00:00
tail(Timeline_5anos)
## X Cartilha DA DAs FE PA DE TA DA1 Dislexia Disgrafia Discalculia
## 151 2016-07 21 14 15 4 3 4 1 4 22 1 1
## 152 2016-08 24 17 18 4 4 5 1 5 20 1 2
## 153 2016-09 25 19 22 8 6 5 1 6 31 2 2
## 154 2016-10 24 17 19 8 5 5 1 5 21 2 2
## 155 2016-11 23 22 23 8 6 7 1 6 20 2 3
## 156 2016-12 14 18 16 3 5 5 1 4 17 1 2
## Tempo tempo
## 151 2016-07-30 2016-07-29 21:00:00
## 152 2016-08-30 2016-08-29 21:00:00
## 153 2016-09-30 2016-09-29 21:00:00
## 154 2016-10-30 2016-10-29 22:00:00
## 155 2016-11-30 2016-11-29 22:00:00
## 156 2016-12-30 2016-12-29 22:00:00
write.csv(Timeline_5anos,file="Timeline_5anos.csv")
Relação Palavras x Tempo
GRafico de frequencia Comparativa - 2004 a 2017
plot(Timeline$Dislexia, type="l", main="Palavras-chaves", col="red", ylim = c(0, 100))
lines(Timeline$DAs, type = "h" ,col= "darkblue")
lines(Timeline$FE, type = "h" ,col= "darkgray")
lines(Timeline$PA, type = "h" ,col= "pink")
lines(Timeline$DE, type = "h" ,col= "black")
lines(Timeline$DA, type = "h" ,col= "darkred")
lines(Timeline$Discalculia, type = "h" ,col= "darkgreen")
lines(Timeline$Disgrafia, type = "h" ,col= "orange")
lines(Timeline$TA, type = "h" ,col= "purple")
legend("topright", c('Dislexia','Dif. de Aprendizagens', 'Dif. de Aprendizagem', 'Fracasso Escolar', 'Transt. da Aprendizagem', 'Disgrafia', 'Probl. de Aprendizagem', "Discalculia"),lty=c(1,1), lwd=c(1,1),
col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "darkgreen"), box.col="white", cex=0.7)
plot(Timeline_5anos$tempo, Timeline_5anos$Dislexia, col='Blue', main="Gráfico Palavras-chave", xlab="ano", ylab="palavras-chave")
lines(Timeline_5anos$tempo, Timeline_5anos$DA, col='Green')
lines(Timeline_5anos$tempo, Timeline_5anos$DAs, col='Darkgreen')
lines(Timeline_5anos$tempo, Timeline_5anos$FE, col='Darkblue')
lines(Timeline_5anos$tempo, Timeline_5anos$PA, col='Darkred')
lines(Timeline_5anos$tempo, Timeline_5anos$DE, col='Purple')
lines(Timeline_5anos$tempo, Timeline_5anos$TA, col='orange')
lines(Timeline_5anos$tempo, Timeline_5anos$Dislexia, col='yellow')
lines(Timeline_5anos$tempo, Timeline_5anos$Discalculia, col='Pink')
lines(Timeline_5anos$tempo, Timeline_5anos$Disgrafia, col='Blue')
legend("topright", c('DA','DAs', 'FE', 'PA', 'DE', 'TA', 'Dislexia', 'Discalculia', 'Disgrafia'),lty=c(1,1), lwd=c(1,1),
col = c("green","darkgreen", 'darkblue', 'darkred', 'purple', 'orange', 'yellow', 'pink', 'blue'), box.col="white", cex=0.7)
Grafico de Frequencia Comparativa - ultimos 5 anos
plot(Timeline_5anos$Tempo, Timeline_5anos$Dislexia, main="Palavras-chaves", col="red", ylim = c(0, 60))
lines(Timeline_5anos$Tempo, Timeline_5anos$Dislexia,col= "red")
lines(Timeline_5anos$Tempo, Timeline_5anos$DAs,col= "darkblue")
lines(Timeline_5anos$Tempo, Timeline_5anos$FE ,col= "darkgray")
lines(Timeline_5anos$Tempo, Timeline_5anos$PA ,col= "pink")
lines(Timeline_5anos$Tempo, Timeline_5anos$DE ,col= "black")
lines(Timeline_5anos$Tempo, Timeline_5anos$DA ,col= "darkred")
lines(Timeline_5anos$Tempo, Timeline_5anos$Discalculia ,col= "darkgreen")
lines(Timeline_5anos$Tempo, Timeline_5anos$Disgrafia, col= "orange")
lines(Timeline_5anos$Tempo, Timeline_5anos$TA, col= "purple")
legend("topright", c('Dislexia','Dif. de Aprendizagens', 'Dif. de Aprendizagem', 'Fracasso Escolar'),lty=c(1,1), lwd=c(1,1),
col=c("red","darkblue", "darkred","darkgray", "purple", "orange", "pink", "darkgreen"), box.col="white", cex=0.7)
legend("topleft", c('Transt. da Aprendizagem', 'Disgrafia', 'Probl. de Aprendizagem', "Discalculia"),lty=c(1,1), lwd=c(1,1),
col=c("purple", "orange", "pink", "darkgreen"), box.col="white", cex=0.7)
Relação entre as palavras (1) Dificiculdades de Aprendizagem (DA) x todas as palavras (2) Dislexia x todas as palavras
GRáfico e Correlação
library(lattice)
xyplot(DA~DAs, Timeline, ylab = "DA", xlab = "DAs")
cor.test(Timeline$DA, Timeline$DAs) ## DificuldadeS de Aprendizagem
##
## Pearson's product-moment correlation
##
## data: Timeline$DA and Timeline$DAs
## t = 14.779, df = 155, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6909186 0.8228527
## sample estimates:
## cor
## 0.7647881
xyplot(DA~FE, Timeline, ylab = "DA", xlab = "FE")
cor.test(Timeline$DA, Timeline$FE) ## Fracasso Escolas
##
## Pearson's product-moment correlation
##
## data: Timeline$DA and Timeline$FE
## t = 19.21, df = 155, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7858275 0.8801211
## sample estimates:
## cor
## 0.8391715
xyplot(DA~PA, Timeline, ylab = "DA", xlab = "PA")
cor.test(Timeline$DA, Timeline$PA) ## Problemas de Aprendizagem
##
## Pearson's product-moment correlation
##
## data: Timeline$DA and Timeline$PA
## t = 17.654, df = 155, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7575583 0.8633455
## sample estimates:
## cor
## 0.8172227
xyplot(DA~DE, Timeline, ylab = "DA", xlab = "DE")
cor.test(Timeline$DA, Timeline$DE) ## Desempenho academico
##
## Pearson's product-moment correlation
##
## data: Timeline$DA and Timeline$DE
## t = 10.417, df = 155, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5392832 0.7254340
## sample estimates:
## cor
## 0.6417142
xyplot(DA~Dislexia, Timeline, ylab = "DA", xlab = "Dislexia")
cor.test(Timeline$DA, Timeline$Dislexia)
##
## Pearson's product-moment correlation
##
## data: Timeline$DA and Timeline$Dislexia
## t = 8.9921, df = 155, p-value = 8.009e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4721817 0.6798049
## sample estimates:
## cor
## 0.5855143
xyplot(DA~Discalculia, Timeline, ylab = "DA", xlab = "Discalculia")
cor.test(Timeline$DA, Timeline$Discalculia)
##
## Pearson's product-moment correlation
##
## data: Timeline$DA and Timeline$Discalculia
## t = 7.7955, df = 155, p-value = 8.725e-13
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4079773 0.6345872
## sample estimates:
## cor
## 0.5307011
xyplot(DA~Disgrafia, Timeline, ylab = "DA", xlab = "Disgrafia")
cor.test(Timeline$DA, Timeline$Disgrafia)
##
## Pearson's product-moment correlation
##
## data: Timeline$DA and Timeline$Disgrafia
## t = 13.541, df = 155, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6550508 0.8004889
## sample estimates:
## cor
## 0.7361551
xyplot(DA~TA, Timeline, ylab = "DA", xlab = "TA")
cor.test(Timeline$DA, Timeline$TA) ## Transtorno de Aprendizagem
##
## Pearson's product-moment correlation
##
## data: Timeline$DA and Timeline$TA
## t = 10.223, df = 155, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5307227 0.7197025
## sample estimates:
## cor
## 0.6346052
xyplot(Dislexia~DAs, Timeline, ylab = "DA", xlab = "DAs")
cor.test(Timeline$Dislexia, Timeline$DAs) ## DificuldadeS de Aprendizagem
##
## Pearson's product-moment correlation
##
## data: Timeline$Dislexia and Timeline$DAs
## t = 7.1286, df = 155, p-value = 3.611e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3689747 0.6063397
## sample estimates:
## cor
## 0.4968946
xyplot(Dislexia~FE, Timeline, ylab = "DA", xlab = "FE")
cor.test(Timeline$Dislexia, Timeline$FE) ## Fracasso Escolas
##
## Pearson's product-moment correlation
##
## data: Timeline$Dislexia and Timeline$FE
## t = 11.802, df = 155, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5955117 0.7624497
## sample estimates:
## cor
## 0.6879756
xyplot(Dislexia~PA, Timeline, ylab = "DA", xlab = "PA")
cor.test(Timeline$Dislexia, Timeline$PA) ## Problemas de Aprendizagem
##
## Pearson's product-moment correlation
##
## data: Timeline$Dislexia and Timeline$PA
## t = 9.9916, df = 155, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5202744 0.7126720
## sample estimates:
## cor
## 0.6259045
xyplot(Dislexia~DE, Timeline, ylab = "DA", xlab = "DE")
cor.test(Timeline$Dislexia, Timeline$DE) ## Desempenho academico
##
## Pearson's product-moment correlation
##
## data: Timeline$Dislexia and Timeline$DE
## t = 4.6371, df = 155, p-value = 7.457e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2035280 0.4794633
## sample estimates:
## cor
## 0.3490387
xyplot(Dislexia~Discalculia, Timeline, ylab = "DA", xlab = "Discalculia")
cor.test(Timeline$Dislexia, Timeline$Discalculia)
##
## Pearson's product-moment correlation
##
## data: Timeline$Dislexia and Timeline$Discalculia
## t = 3.9931, df = 155, p-value = 0.0001004
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1562442 0.4409515
## sample estimates:
## cor
## 0.3054079
xyplot(Dislexia~Disgrafia, Timeline, ylab = "DA", xlab = "Disgrafia")
cor.test(Timeline$Dislexia, Timeline$Disgrafia)
##
## Pearson's product-moment correlation
##
## data: Timeline$Dislexia and Timeline$Disgrafia
## t = 5.7383, df = 155, p-value = 4.881e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2803306 0.5398314
## sample estimates:
## cor
## 0.4185884
xyplot(Dislexia~TA, Timeline, ylab = "DA", xlab = "TA")
cor.test(Timeline$Dislexia, Timeline$TA) ## Transtorno de Aprendizagem
##
## Pearson's product-moment correlation
##
## data: Timeline$Dislexia and Timeline$TA
## t = 3.6236, df = 155, p-value = 0.0003933
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1284462 0.4178104
## sample estimates:
## cor
## 0.2794617
Regressão linear - Dificuldades de Aprendizagem
library("ggplot2")
library("devtools")
ggplot(Timeline, aes(y=DA, x=DAs)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(DAs, DA, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
ggplot(Timeline, aes(y=DA, x=FE)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(FE, DA, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
ggplot(Timeline, aes(y=DA, x=PA)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(PA, DA, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
ggplot(Timeline, aes(y=DA, x=DE)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(DE, DA, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
ggplot(Timeline, aes(y=DA, x=Dislexia)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(Dislexia, DA, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
ggplot(Timeline, aes(y=DA, x=Discalculia)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(Discalculia, DA, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.03
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.03
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0022e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.03
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.03
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 1.0022e-016
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 1
ggplot(Timeline, aes(y=DA, x=Disgrafia)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(Disgrafia, DA, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 2
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 2
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
ggplot(Timeline, aes(y=DA, x=TA)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(TA, DA, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.02
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.02
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.02
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.02
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 1
RL_DA = lm( DA ~ DAs + FE + PA + DE + Dislexia + Discalculia + Disgrafia + TA, data=Timeline)
summary( RL_DA )
##
## Call:
## lm(formula = DA ~ DAs + FE + PA + DE + Dislexia + Discalculia +
## Disgrafia + TA, data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.185 -2.431 0.058 2.311 20.317
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.71883 1.72431 3.897 0.000147 ***
## DAs 0.06442 0.05879 1.096 0.274956
## FE 0.15382 0.06708 2.293 0.023252 *
## PA 0.35978 0.11694 3.077 0.002496 **
## DE 0.15261 0.18690 0.817 0.415519
## Dislexia 0.05330 0.04029 1.323 0.187902
## Discalculia 0.42539 0.50040 0.850 0.396650
## Disgrafia 1.10291 0.39224 2.812 0.005594 **
## TA 1.20442 0.79640 1.512 0.132582
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.408 on 148 degrees of freedom
## Multiple R-squared: 0.7779, Adjusted R-squared: 0.7658
## F-statistic: 64.78 on 8 and 148 DF, p-value: < 2.2e-16
Regressão linear - Dislexia
RL_Dis = lm( Dislexia ~ DAs + FE + PA + DE + Discalculia + Disgrafia + TA, data=Timeline)
summary( RL_Dis )
##
## Call:
## lm(formula = Dislexia ~ DAs + FE + PA + DE + Discalculia + Disgrafia +
## TA, data = Timeline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.441 -5.510 -0.710 2.743 59.849
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.35399 2.82475 8.976 1.13e-15 ***
## DAs -0.23414 0.11800 -1.984 0.0491 *
## FE 0.71912 0.12302 5.846 3.07e-08 ***
## PA 0.41386 0.23537 1.758 0.0807 .
## DE -0.44224 0.37831 -1.169 0.2443
## Discalculia -0.01647 1.01752 -0.016 0.9871
## Disgrafia 0.03539 0.79757 0.044 0.9647
## TA -3.37112 1.59567 -2.113 0.0363 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11 on 149 degrees of freedom
## Multiple R-squared: 0.5298, Adjusted R-squared: 0.5077
## F-statistic: 23.99 on 7 and 149 DF, p-value: < 2.2e-16
ggplot(Timeline, aes(y=Dislexia, x=DAs)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(DAs, Dislexia, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
ggplot(Timeline, aes(y=Dislexia, x=FE)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(FE, Dislexia, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
ggplot(Timeline, aes(y=Dislexia, x=PA)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(PA, Dislexia, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
ggplot(Timeline, aes(y=Dislexia, x=DE)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(DE, Dislexia, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
ggplot(Timeline, aes(y=Dislexia, x=DA)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(Dislexia, DA, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
ggplot(Timeline, aes(y=Dislexia, x=Discalculia)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(Discalculia, Dislexia, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.03
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.03
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0022e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.03
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.03
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 1.0022e-016
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 1
ggplot(Timeline, aes(y=Dislexia, x=Disgrafia)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(Disgrafia, Dislexia, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 2
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 2
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
ggplot(Timeline, aes(y=Dislexia, x=TA)) + geom_point(shape=1) + geom_smooth(method=lm)
qplot(TA, Dislexia, data = Timeline, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.02
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.02
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at -0.02
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 1.02
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 1
Gráfico Comparativo utilizando Regressão lienar # great plotting functions to smooth out noise and amplify trends for better understanding: # borrowed from amunatagui- # http://amunategui.github.io/google-trends-walkthrough/
library(ggplot2)
ggplot(Timeline,aes(x=tempo)) +
stat_smooth(aes(y = DA, group=1, colour="DA"), method=lm, formula = y ~ poly(x,1), level=0.95) +
stat_smooth(aes(y = Dislexia, group=1, colour="Dislexia"), method=lm, formula = y ~ poly(x,2), level=0.95) +
stat_smooth(aes(y = FE, group=1, colour="FE"), method=lm, formula = y ~ poly(x,3), level=0.95) +
stat_smooth(aes(y = PA, group=1, colour="PA"), method=lm, formula = y ~ poly(x,4), level=0.95) +
stat_smooth(aes(y = DE, group=1, colour="DE"), method=lm, formula = y ~ poly(x,5), level=0.95) +
stat_smooth(aes(y = Discalculia, group=1, colour="Discalculia"), method=lm, formula = y ~ poly(x,6), level=0.95) +
stat_smooth(aes(y = Disgrafia, group=1, colour="Disgrafia"), method=lm, formula = y ~ poly(x,7), level=0.95) +
stat_smooth(aes(y = TA, group=1, colour="TA"), method=lm, formula = y ~ poly(x,8), level=0.95) +
geom_point (aes(y = DA, colour = "DA"), size=1) +
geom_point (aes(y = Dislexia, colour ="Dislexia"), size=1) +
geom_point (aes(y = FE, colour ="FE"), size=1) +
geom_point (aes(y = PA, colour ="PA"), size=1) +
geom_point (aes(y = DE, colour ="DE"), size=1) +
geom_point (aes(y = Discalculia, colour ="Discalculia"), size=1) +
geom_point (aes(y = Disgrafia, colour ="Disgrafia"), size=1) +
geom_point (aes(y = TA, colour ="TA"), size=1) +
scale_colour_manual("Search Terms", breaks = c("DA", "Dislexia", "FE", "PA", "DE", "Discalculia", "Disgrafia", "TA" ), values = c("blue","red", "darkgreen", "darkred", "darkblue", "purple", "orange", "pink")) +
theme_bw() +
xlab("tempo") +
ylab("interesse") +
ggtitle("Regressão linear das Palavras-chave")
GRáfico combinado de Regressão Linear com remoção de outliers
# borrowed from aL3xa -
# http://stackoverflow.com/questions/4787332/how-to-remove-outliers-from-a-dataset
remove_outliers <- function(x, na.rm = TRUE, ...) {
qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...)
H <- 1.5 * IQR(x, na.rm = na.rm)
y <- x
y[x < (qnt[1] - H)] <- NA
y[x > (qnt[2] + H)] <- NA
y
}
Timeline$DA_clean <- remove_outliers(Timeline$DA)
Timeline$Dislexia_clean <- remove_outliers(Timeline$Dislexia)
Timeline$FE_clean <- remove_outliers(Timeline$FE)
Timeline$PA_clean <- remove_outliers(Timeline$PA)
Timeline$DE_clean <- remove_outliers(Timeline$DE)
Timeline$Discalculia_clean <- remove_outliers(Timeline$Discalculia)
Timeline$Disgrafia_clean <- remove_outliers(Timeline$Disgrafia)
Timeline$TA_clean <- remove_outliers(Timeline$TA)
library(ggplot2)
ggplot(Timeline,aes(x=tempo)) +
stat_smooth(aes(y = DA_clean, group=1, colour="DA"), method=lm, formula = y ~ poly(x,1), level=0.95) +
stat_smooth(aes(y = Dislexia_clean, group=1, colour="Dislexia"), method=lm, formula = y ~ poly(x,2), level=0.95) +
stat_smooth(aes(y = FE_clean, group=1, colour="FE"), method=lm, formula = y ~ poly(x,3), level=0.95) +
stat_smooth(aes(y = PA_clean, group=1, colour="PA"), method=lm, formula = y ~ poly(x,4), level=0.95) +
stat_smooth(aes(y = DE_clean, group=1, colour="DE"), method=lm, formula = y ~ poly(x,5), level=0.95) +
stat_smooth(aes(y = Discalculia_clean, group=1, colour="Discalculia"), method=lm, formula = y ~ poly(x,6), level=0.95) +
stat_smooth(aes(y = Disgrafia_clean, group=1, colour="Disgrafia"), method=lm, formula = y ~ poly(x,7), level=0.95) +
stat_smooth(aes(y = TA_clean, group=1, colour="TA"), method=lm, formula = y ~ poly(x,8), level=0.95) +
geom_point (aes(y = DA_clean, colour = "DA"), size=1) +
geom_point (aes(y = Dislexia_clean, colour ="Dislexia"), size=1) +
geom_point (aes(y = FE_clean, colour ="FE"), size=1) +
geom_point (aes(y = PA_clean, colour ="PA"), size=1) +
geom_point (aes(y = DE_clean, colour ="DE"), size=1) +
geom_point (aes(y = Discalculia_clean, colour ="Discalculia"), size=1) +
geom_point (aes(y = Disgrafia_clean, colour ="Disgrafia"), size=1) +
geom_point (aes(y = TA_clean, colour ="TA"), size=1) +
scale_colour_manual("Search Terms", breaks = c("DA", "Dislexia", "FE", "PA", "DE", "Discalculia", "Disgrafia", "TA" ), values = c("blue","red", "darkgreen", "darkred", "darkblue", "purple", "orange", "pink")) +
theme_bw() +
xlab("tempo") +
ylab("interesse") +
ggtitle("Palavras-chave no Google Trends")
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (geom_point).
GRáfico combinado de Regressão Linear com remoção de outliers - ultimos 3 anos # borrowed from aL3xa - # http://stackoverflow.com/questions/4787332/how-to-remove-outliers-from-a-dataset
Timeline_5anos$DA_clean <- remove_outliers(Timeline_5anos$DA)
Timeline_5anos$Dislexia_clean <- remove_outliers(Timeline_5anos$Dislexia)
Timeline_5anos$FE_clean <- remove_outliers(Timeline_5anos$FE)
Timeline_5anos$PA_clean <- remove_outliers(Timeline_5anos$PA)
Timeline_5anos$DE_clean <- remove_outliers(Timeline_5anos$DE)
Timeline_5anos$Discalculia_clean <- remove_outliers(Timeline_5anos$Discalculia)
Timeline_5anos$Disgrafia_clean <- remove_outliers(Timeline_5anos$Disgrafia)
Timeline_5anos$TA_clean <- remove_outliers(Timeline_5anos$TA)
library(ggplot2)
ggplot(Timeline_5anos,aes(x=tempo)) +
stat_smooth(aes(y = DA_clean, group=1, colour="DA"), method=lm, formula = y ~ poly(x,1), level=0.95) +
stat_smooth(aes(y = Dislexia_clean, group=1, colour="Dislexia"), method=lm, formula = y ~ poly(x,2), level=0.95) +
stat_smooth(aes(y = FE_clean, group=1, colour="FE"), method=lm, formula = y ~ poly(x,3), level=0.95) +
stat_smooth(aes(y = PA_clean, group=1, colour="PA"), method=lm, formula = y ~ poly(x,4), level=0.95) +
stat_smooth(aes(y = DE_clean, group=1, colour="DE"), method=lm, formula = y ~ poly(x,5), level=0.95) +
stat_smooth(aes(y = Discalculia_clean, group=1, colour="Discalculia"), method=lm, formula = y ~ poly(x,6), level=0.95) +
stat_smooth(aes(y = Disgrafia_clean, group=1, colour="Disgrafia"), method=lm, formula = y ~ poly(x,7), level=0.95) +
stat_smooth(aes(y = TA_clean, group=1, colour="TA"), method=lm, formula = y ~ poly(x,8), level=0.95) +
geom_point (aes(y = DA_clean, colour = "DA"), size=1) +
geom_point (aes(y = Dislexia_clean, colour ="Dislexia"), size=1) +
geom_point (aes(y = FE_clean, colour ="FE"), size=1) +
geom_point (aes(y = PA_clean, colour ="PA"), size=1) +
geom_point (aes(y = DE_clean, colour ="DE"), size=1) +
geom_point (aes(y = Discalculia_clean, colour ="Discalculia"), size=1) +
geom_point (aes(y = Disgrafia_clean, colour ="Disgrafia"), size=1) +
geom_point (aes(y = TA_clean, colour ="TA"), size=1) +
scale_colour_manual("Search Terms", breaks = c("DA", "Dislexia", "FE", "PA", "DE", "Discalculia", "Disgrafia", "TA" ), values = c("blue","red", "darkgreen", "darkred", "darkblue", "purple", "orange", "pink")) +
theme_bw() +
xlab("tempo") +
ylab("interesse") +
ggtitle("Palavras-chave no Google Trends")
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
Análise da distruição por mês ##borrowed from claudio ##https://gustibuseconomia.com/2014/03/26/momento-r-do-dia-furacoes-ou-uma-imagem-vale-mais-do-que-mil-palavras-mas-qual-imagem-e-esta-a-pergunta-de-um-milhao-de-imagens/
Timeline$mes = c(rep(month.name, 13), "January")
#equivalência dos dados com o mês
Timeline$ordem = c(rep(1:12, 13), 1)
library(ggplot2)
library(lattice)
library(latticeExtra)
## Loading required package: RColorBrewer
##
## Attaching package: 'latticeExtra'
## The following object is masked from 'package:ggplot2':
##
## layer
# os graficos da orgnização por mês desde 2004
#Dislexia
op <- par(mfrow = c(1,2))
boxplot(Dislexia~ordem,data=Timeline)
monthplot(Timeline$Dislexia, col = "red",ylim=c(min(Timeline$Dislexia),max(Timeline$Dislexia)),main="Dislexia",xlab="meses",ylab="ocorrências")
par(op)
#Dificuldade de Aprendizagem
op <- par(mfrow = c(1,2))
boxplot(DA~ordem,data=Timeline)
monthplot(Timeline$DA, col="orange",ylim=c(min(Timeline$DA),max(Timeline$DA)),main="Dif. de Aprendizagem",xlab="meses",ylab="ocorrências")
par(op)
#Organização por mês nos últimos cinco anos
op <- par(mfrow = c(2,2))
monthplot(Timeline_5anos$Dislexia, xlab = "Dislexia", ylab = "", col="orange", cex.axis = 0.8)
monthplot(Timeline_5anos$DA, xlab = "Dif. de Aprendizagem", ylab = "", col="orange", cex.axis = 0.8)
monthplot(Timeline_5anos$Dislexia, ylab = "", type = "h", col="green", cex.axis = 0.8)
monthplot(Timeline_5anos$DA, ylab = "", type = "h", col="green", cex.axis = 0.8)
par(op)
Comparação Estatística - ANOVA entre os Meses ##Borrowed from Herick Soares de Santana ##http://posgraduando.com/como-fazer-analise-de-variancia-one-way-anova-one-way-no-r/
#Função para análise de variância (variável resposta ~ variável preditora)
anova = aov(DA_clean~mes, data=Timeline)
#Verificar um sumário dos resultados
summary(anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## mes 11 2422 220.23 4.714 4.31e-06 ***
## Residuals 135 6307 46.72
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 10 observations deleted due to missingness
#Teste de normalidade (a normalidade é alcançada com um valor de p > 0,05)
shapiro.test(resid(anova))
##
## Shapiro-Wilk normality test
##
## data: resid(anova)
## W = 0.85974, p-value = 1.643e-10
#carregar pacote para rodar a função do teste para homogeneidade das variâncias
#Caso não tenho instalado é só digitar: install.packages("car")
library(car)
#Teste da homogeneidade das variâncias (a homogeneidade é alcançada com valores acima de p acima de 0,05)
leveneTest(DA_clean~mes,data=Timeline)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 11 0.2931 0.9863
## 135
# O resultado da ANOVA foi significativo a um p < 0,05, ou seja, é necessário realizar um teste post-hoc para verificar quais grupos diferem entre si
#Teste de Tukey
TukeyHSD(anova)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = DA_clean ~ mes, data = Timeline)
##
## $mes
## diff lwr upr p adj
## August-April -4.5833333 -13.86475790 4.6980912 0.8899012
## December-April -9.4230769 -18.52426249 -0.3218914 0.0353515
## February-April -8.9166667 -18.19809123 0.3647579 0.0721405
## January-April -13.8076923 -22.90887788 -4.7065067 0.0000890
## July-April -5.7307692 -14.83195480 3.3704163 0.6275613
## June-April -3.6818182 -13.17184013 5.8082038 0.9790498
## March-April -3.8076923 -12.90887788 5.2934933 0.9633061
## May-April -0.4166667 -9.69809123 8.8647579 1.0000000
## November-April -0.1666667 -9.44809123 9.1147579 1.0000000
## October-April -3.2500000 -12.53142456 6.0314246 0.9907129
## September-April -2.6666667 -11.94809123 6.6147579 0.9983210
## December-August -4.8397436 -13.94092916 4.2614420 0.8317310
## February-August -4.3333333 -13.61475790 4.9480912 0.9223939
## January-August -9.2243590 -18.32554454 -0.1231734 0.0438685
## July-August -1.1474359 -10.24862147 7.9537497 0.9999996
## June-August 0.9015152 -8.58850679 10.3915371 1.0000000
## March-August 0.7756410 -8.32554454 9.8768266 1.0000000
## May-August 4.1666667 -5.11475790 13.4480912 0.9400583
## November-August 4.4166667 -4.86475790 13.6980912 0.9123754
## October-August 1.3333333 -7.94809123 10.6147579 0.9999983
## September-August 1.9166667 -7.36475790 11.1980912 0.9999281
## February-December 0.5064103 -8.59477531 9.6075958 1.0000000
## January-December -4.3846154 -13.30191966 4.5326889 0.8927255
## July-December 3.6923077 -5.22499659 12.6096120 0.9659536
## June-December 5.7412587 -3.57256182 15.0550793 0.6582860
## March-December 5.6153846 -3.30191966 14.5326889 0.6274563
## May-December 9.0064103 -0.09477531 18.1075958 0.0552081
## November-December 9.2564103 0.15522469 18.3575958 0.0423843
## October-December 6.1730769 -2.92810865 15.2742625 0.5127069
## September-December 6.7564103 -2.34477531 15.8575958 0.3683256
## January-February -4.8910256 -13.99221121 4.2101599 0.8219043
## July-February 3.1858974 -5.91528813 12.2870830 0.9907365
## June-February 5.2348485 -4.25517346 14.7248704 0.7958500
## March-February 5.1089744 -3.99221121 14.2101599 0.7770057
## May-February 8.5000000 -0.78142456 17.7814246 0.1069200
## November-February 8.7500000 -0.53142456 18.0314246 0.0847175
## October-February 5.6666667 -3.61475790 14.9480912 0.6717416
## September-February 6.2500000 -3.03142456 15.5314246 0.5242272
## July-January 8.0769231 -0.84038120 16.9942274 0.1163156
## June-January 10.1258741 0.81205357 19.4396947 0.0207028
## March-January 10.0000000 1.08269572 18.9173043 0.0142975
## May-January 13.3910256 4.28984007 22.4922112 0.0001709
## November-January 13.6410256 4.53984007 22.7422112 0.0001157
## October-January 10.5576923 1.45650674 19.6588779 0.0092740
## September-January 11.1410256 2.03984007 20.2422112 0.0043721
## June-July 2.0489510 -7.26486951 11.3627716 0.9998657
## March-July 1.9230769 -6.99422736 10.8403812 0.9998896
## May-July 5.3141026 -3.78708300 14.4152881 0.7306246
## November-July 5.5641026 -3.53708300 14.6652881 0.6698710
## October-July 2.4807692 -6.62041634 11.5819548 0.9989622
## September-July 3.0641026 -6.03708300 12.1652881 0.9932967
## March-June -0.1258741 -9.43969468 9.1879464 1.0000000
## May-June 3.2651515 -6.22487043 12.7551735 0.9919657
## November-June 3.5151515 -5.97487043 13.0051735 0.9853920
## October-June 0.4318182 -9.05820376 9.9218401 1.0000000
## September-June 1.0151515 -8.47487043 10.5051735 0.9999999
## May-March 3.3910256 -5.71015993 12.4922112 0.9846971
## November-March 3.6410256 -5.46015993 12.7422112 0.9735750
## October-March 0.5576923 -8.54349326 9.6588779 1.0000000
## September-March 1.1410256 -7.96015993 10.2422112 0.9999996
## November-May 0.2500000 -9.03142456 9.5314246 1.0000000
## October-May -2.8333333 -12.11475790 6.4480912 0.9971145
## September-May -2.2500000 -11.53142456 7.0314246 0.9996556
## October-November -3.0833333 -12.36475790 6.1980912 0.9940098
## September-November -2.5000000 -11.78142456 6.7814246 0.9990702
## September-October 0.5833333 -8.69809123 9.8647579 1.0000000
anova = aov(Dislexia_clean~mes, data=Timeline)
summary(anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## mes 11 2763 251.17 3.495 0.00026 ***
## Residuals 133 9559 71.87
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 12 observations deleted due to missingness
shapiro.test(resid(anova))
##
## Shapiro-Wilk normality test
##
## data: resid(anova)
## W = 0.92004, p-value = 3.147e-07
leveneTest(Dislexia_clean~mes,data=Timeline)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 11 0.644 0.7882
## 133
TukeyHSD(anova)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Dislexia_clean ~ mes, data = Timeline)
##
## $mes
## diff lwr upr p adj
## August-April -5.0000000 -16.0635271 6.063527 0.9371603
## December-April -13.7307692 -25.0224342 -2.439104 0.0048314
## February-April -9.0641026 -20.3557676 2.227562 0.2526785
## January-April -13.6153846 -24.6789117 -2.551858 0.0040515
## July-April -9.9807692 -21.2724342 1.310896 0.1389117
## June-April -2.9807692 -14.2724342 8.310896 0.9992259
## March-April -3.3141026 -14.6057676 7.977562 0.9979577
## May-April -1.6853147 -13.2407918 9.870162 0.9999979
## November-April -5.4807692 -16.7724342 5.810896 0.9006553
## October-April -6.2307692 -17.7862464 5.324708 0.8184016
## September-April -2.9807692 -14.2724342 8.310896 0.9992259
## December-August -8.7307692 -20.0224342 2.560896 0.3060246
## February-August -4.0641026 -15.3557676 7.227562 0.9883475
## January-August -8.6153846 -19.6789117 2.448142 0.2955910
## July-August -4.9807692 -16.2724342 6.310896 0.9466636
## June-August 2.0192308 -9.2724342 13.310896 0.9999831
## March-August 1.6858974 -9.6057676 12.977562 0.9999974
## May-August 3.3146853 -8.2407918 14.870162 0.9983386
## November-August -0.4807692 -11.7724342 10.810896 1.0000000
## October-August -1.2307692 -12.7862464 10.324708 0.9999999
## September-August 2.0192308 -9.2724342 13.310896 0.9999831
## February-December 4.6666667 -6.8486174 16.181951 0.9708298
## January-December 0.1153846 -11.1762804 11.407050 1.0000000
## July-December 3.7500000 -7.7652841 15.265284 0.9949240
## June-December 10.7500000 -0.7652841 22.265284 0.0917964
## March-December 10.4166667 -1.0986174 21.931951 0.1173835
## May-December 12.0454545 0.2713678 23.819541 0.0399661
## November-December 8.2500000 -3.2652841 19.765284 0.4244060
## October-December 7.5000000 -4.2740868 19.274087 0.6100107
## September-December 10.7500000 -0.7652841 22.265284 0.0917964
## January-February -4.5512821 -15.8429471 6.740383 0.9719788
## July-February -0.9166667 -12.4319507 10.598617 1.0000000
## June-February 6.0833333 -5.4319507 17.598617 0.8373784
## March-February 5.7500000 -5.7652841 17.265284 0.8821009
## May-February 7.3787879 -4.3952989 19.152875 0.6341276
## November-February 3.5833333 -7.9319507 15.098617 0.9965726
## October-February 2.8333333 -8.9407535 14.607420 0.9996777
## September-February 6.0833333 -5.4319507 17.598617 0.8373784
## July-January 3.6346154 -7.6570496 14.926280 0.9954057
## June-January 10.6346154 -0.6570496 21.926280 0.0853868
## March-January 10.3012821 -0.9903830 21.592947 0.1100827
## May-January 11.9300699 0.3745928 23.485547 0.0364195
## November-January 8.1346154 -3.1570496 19.426280 0.4155615
## October-January 7.3846154 -4.1708618 18.940093 0.6051459
## September-January 10.6346154 -0.6570496 21.926280 0.0853868
## June-July 7.0000000 -4.5152841 18.515284 0.6773827
## March-July 6.6666667 -4.8486174 18.181951 0.7407627
## May-July 8.2954545 -3.4786322 20.069541 0.4512857
## November-July 4.5000000 -7.0152841 16.015284 0.9777932
## October-July 3.7500000 -8.0240868 15.524087 0.9958061
## September-July 7.0000000 -4.5152841 18.515284 0.6773827
## March-June -0.3333333 -11.8486174 11.181951 1.0000000
## May-June 1.2954545 -10.4786322 13.069541 0.9999999
## November-June -2.5000000 -14.0152841 9.015284 0.9998815
## October-June -3.2500000 -15.0240868 8.524087 0.9988299
## September-June 0.0000000 -11.5152841 11.515284 1.0000000
## May-March 1.6287879 -10.1452989 13.402875 0.9999988
## November-March -2.1666667 -13.6819507 9.348617 0.9999716
## October-March -2.9166667 -14.6907535 8.857420 0.9995748
## September-March 0.3333333 -11.1819507 11.848617 1.0000000
## November-May -3.7954545 -15.5695413 7.978632 0.9953476
## October-May -4.5454545 -16.5727765 7.481867 0.9828383
## September-May -1.2954545 -13.0695413 10.478632 0.9999999
## October-November -0.7500000 -12.5240868 11.024087 1.0000000
## September-November 2.5000000 -9.0152841 14.015284 0.9998815
## September-October 3.2500000 -8.5240868 15.024087 0.9988299
Descrição por estado
GeoMap <-read.csv("C:/Users/Karina/Desktop/UFMG/Mestrado/2015 - 2016/Projetos/Projeto 2016/Instrumentos e Dados/Dados/GeoMap.csv", sep=",", dec=",",fill=TRUE, header=TRUE)
str(GeoMap)
## 'data.frame': 27 obs. of 21 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Região : Factor w/ 27 levels "Acre","Alagoas",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ DA_Shop : int NA 0 0 10 3 6 7 0 2 0 ...
## $ Dislexia_Shop: int NA 13 100 10 2 6 7 0 4 11 ...
## $ Cartilha_Shop: int NA 20 99 20 14 12 29 6 5 17 ...
## $ Cartilha : int 61 46 91 50 46 38 50 37 43 52 ...
## $ DA : int NA 77 NA 58 70 64 40 49 49 100 ...
## $ Dislexia : int 59 47 100 54 49 51 46 48 49 55 ...
## $ Pais : int 78 81 98 77 81 77 74 74 70 82 ...
## $ Pais1 : int 73 74 90 70 76 71 69 68 65 74 ...
## $ Fam1 : int 26 29 44 29 27 27 27 23 22 30 ...
## $ EF1 : int 24 28 32 18 29 23 17 27 19 30 ...
## $ Crianca1 : int 83 82 100 72 80 80 85 74 73 81 ...
## $ TA1 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ DA1 : int 0 23 0 15 21 20 11 14 14 28 ...
## $ Dislexia1 : int 61 43 100 55 50 49 44 47 49 54 ...
## $ Disgrafia1 : int 0 0 0 0 4 4 4 0 4 0 ...
## $ Discalculia1 : int 0 4 0 4 4 4 3 4 3 6 ...
## $ Cartilha_sug : int 80 52 95 58 52 44 57 43 49 59 ...
## $ DA_sug : int NA 83 NA 53 75 71 41 51 51 100 ...
## $ Pais_sug : int 78 79 96 75 81 76 73 73 69 80 ...
tail(GeoMap)
## X Região DA_Shop Dislexia_Shop Cartilha_Shop Cartilha DA
## 22 22 Rondnia 0 0 54 52 NA
## 23 23 Roraima NA NA NA 100 NA
## 24 24 Santa Catarina 3 0 11 32 43
## 25 25 SÆo Paulo 2 5 20 32 31
## 26 26 Sergipe 0 35 18 46 NA
## 27 27 Tocantins 0 30 30 65 NA
## Dislexia Pais Pais1 Fam1 EF1 Crianca1 TA1 DA1 Dislexia1 Disgrafia1
## 22 55 77 71 21 24 77 0 0 55 0
## 23 77 100 94 34 33 95 0 0 79 0
## 24 48 77 71 19 27 63 0 13 48 3
## 25 46 71 64 18 19 65 1 9 45 2
## 26 46 78 72 27 22 77 0 0 42 0
## 27 41 81 72 24 34 81 0 0 41 0
## Discalculia1 Cartilha_sug DA_sug Pais_sug
## 22 0 63 NA 76
## 23 0 100 NA 100
## 24 3 36 48 75
## 25 3 37 33 69
## 26 0 54 NA 77
## 27 0 75 NA 77
summary(GeoMap)
## X Região DA_Shop Dislexia_Shop
## Min. : 1.0 Acre : 1 Min. : 0.00 Min. : 0.00
## 1st Qu.: 7.5 Alagoas : 1 1st Qu.: 0.00 1st Qu.: 4.00
## Median :14.0 Amap : 1 Median : 2.00 Median : 7.00
## Mean :14.0 Amazonas: 1 Mean : 2.56 Mean : 12.32
## 3rd Qu.:20.5 Bahia : 1 3rd Qu.: 3.00 3rd Qu.: 11.00
## Max. :27.0 Cear : 1 Max. :10.00 Max. :100.00
## (Other) :21 NA's :2 NA's :2
## Cartilha_Shop Cartilha DA Dislexia
## Min. : 5.00 Min. : 29.00 Min. : 28.00 Min. : 41.00
## 1st Qu.: 9.00 1st Qu.: 37.50 1st Qu.: 42.50 1st Qu.: 46.00
## Median :17.00 Median : 46.00 Median : 55.00 Median : 49.00
## Mean :20.84 Mean : 48.63 Mean : 56.53 Mean : 52.11
## 3rd Qu.:20.00 3rd Qu.: 52.50 3rd Qu.: 68.00 3rd Qu.: 54.00
## Max. :99.00 Max. :100.00 Max. :100.00 Max. :100.00
## NA's :2 NA's :8
## Pais Pais1 Fam1 EF1
## Min. : 67.00 Min. :62.00 Min. :18.00 Min. :17.00
## 1st Qu.: 72.00 1st Qu.:67.00 1st Qu.:21.50 1st Qu.:20.50
## Median : 75.00 Median :69.00 Median :26.00 Median :25.00
## Mean : 76.89 Mean :70.74 Mean :25.22 Mean :25.04
## 3rd Qu.: 78.00 3rd Qu.:72.00 3rd Qu.:27.00 3rd Qu.:29.00
## Max. :100.00 Max. :94.00 Max. :44.00 Max. :34.00
##
## Crianca1 TA1 DA1 Dislexia1
## Min. : 60.00 Min. :0.00000 Min. : 0.00 Min. : 41.00
## 1st Qu.: 71.50 1st Qu.:0.00000 1st Qu.: 0.00 1st Qu.: 46.00
## Median : 77.00 Median :0.00000 Median :13.00 Median : 49.00
## Mean : 76.11 Mean :0.03704 Mean :11.48 Mean : 52.07
## 3rd Qu.: 80.50 3rd Qu.:0.00000 3rd Qu.:17.50 3rd Qu.: 54.50
## Max. :100.00 Max. :1.00000 Max. :28.00 Max. :100.00
##
## Disgrafia1 Discalculia1 Cartilha_sug DA_sug
## Min. :0.000 Min. :0.000 Min. : 32.00 Min. : 30.00
## 1st Qu.:0.000 1st Qu.:1.500 1st Qu.: 43.50 1st Qu.: 46.00
## Median :0.000 Median :4.000 Median : 52.00 Median : 53.00
## Mean :1.593 Mean :2.926 Mean : 55.33 Mean : 59.05
## 3rd Qu.:3.500 3rd Qu.:4.000 3rd Qu.: 63.00 3rd Qu.: 73.00
## Max. :5.000 Max. :6.000 Max. :100.00 Max. :100.00
## NA's :8
## Pais_sug
## Min. : 67.00
## 1st Qu.: 71.50
## Median : 74.00
## Mean : 75.52
## 3rd Qu.: 77.00
## Max. :100.00
##
Relação Palavras x Região
GRafico de frequencia Comparativa - 2004 a 2017
plot(GeoMap$Dislexia, type="l", main="Palavras-chaves", col="red", ylim = c(0, 100))
lines(GeoMap$DA1, type = "h" ,col= "darkblue")
lines(GeoMap$Cartilha, type = "h" ,col= "darkgray")
lines(GeoMap$Discalculia1, type = "h" ,col= "darkgreen")
lines(GeoMap$Disgrafia1, type = "h" ,col= "orange")
lines(GeoMap$TA1, type = "h" ,col= "purple")
legend("topright", c('Dislexia','Dif. de Aprendizagens', 'Cartilha', "Discalculia", 'Disgrafia', 'Transt. da Aprendizagem'),lty=c(1,1), lwd=c(1,1),
col=c("red","darkblue", "darkgray", "darkgreen", "orange", "purple"), box.col="white", cex=0.7)