library(haven)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v readr 1.2.1
## v tibble 2.1.3 v purrr 0.2.5
## v tidyr 0.8.2 v stringr 1.3.1
## v ggplot2 3.1.0 v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
library(Amelia)
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.5, built: 2018-05-07)
## ## Copyright (C) 2005-2019 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:purrr':
##
## transpose
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(psycho)
library(ggplot2)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(DescTools)
##
## Attaching package: 'DescTools'
## The following object is masked from 'package:data.table':
##
## %like%
library(ggpubr)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:DescTools':
##
## Recode
## The following object is masked from 'package:purrr':
##
## some
## The following object is masked from 'package:dplyr':
##
## recode
library(tibble)
library(nnet)
disleksija <- read_sav("D:/Vita/Disleksija/Baze/disleksija_final.sav")
disleksija$Disleksija <- as.factor(disleksija$Disleksija)
disleksija$Pol <- as.factor(disleksija$Pol)
disleksija$UzrasnaGrupa <- as.factor(disleksija$UzrasnaGrupa)
#creating training and test subset
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following objects are masked from 'package:DescTools':
##
## MAE, RMSE
## The following object is masked from 'package:purrr':
##
## lift
library(caTools)
set.seed(101)
split = sample.split(disleksija$Disleksija, SplitRatio = 0.70)
final.train = subset(disleksija, split == TRUE)
final.test = subset(disleksija, split == FALSE)
#logit model with all predictors with 3 fold cross validation
fitControl <- trainControl(## 10-fold CV
method = "cv",
number = 3,
savePredictions = TRUE
)
#MODEL 1 - sve i uzrast i konacni skor i svi subtestovi
model1<-caret::train(Disleksija ~ Uzrast +
Brzo_imenovanje +
Nizanje_kuglica +
Jednominutno_citanje +
Posturalna_stabilnost +
Fonemska_segmentacija +
Dvominutni_speling +
Brojcani_niz_unazad +
Citanje_besmislenog_teksta +
Jednominutno_pisanje +
Verbalna_fluentnost +
Semanticka_fluentnost +
Vokabular +
ARQbroj,data=disleksija,method="glm",family=binomial(),
trControl=fitControl)
pred <- predict(model1, newdata = final.test, type = "raw")
confusionMatrix(pred, final.test$Disleksija)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 180 0
## 1 0 9
##
## Accuracy : 1
## 95% CI : (0.9807, 1)
## No Information Rate : 0.9524
## P-Value [Acc > NIR] : 9.891e-05
##
## Kappa : 1
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0000
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 1.0000
## Prevalence : 0.9524
## Detection Rate : 0.9524
## Detection Prevalence : 0.9524
## Balanced Accuracy : 1.0000
##
## 'Positive' Class : 0
##
#Plotovanje modela za razlicite grupe ispitanika
model <- glm(Disleksija ~ ARQbroj +
UzrasnaGrupa,data=final.train, na.action="na.exclude", family="binomial")
model$coef
## (Intercept) ARQbroj UzrasnaGrupa2 UzrasnaGrupa3 UzrasnaGrupa4
## -27.99556321 3.31102794 18.62343348 -3.64857213 0.21736686
## UzrasnaGrupa5
## 0.03972801
contrasts(final.test$UzrasnaGrupa)
## 2 3 4 5
## 1 0 0 0 0
## 2 1 0 0 0
## 3 0 1 0 0
## 4 0 0 1 0
## 5 0 0 0 1
b0 <- model$coef[1] # intercept
X1 <- model$coef[2]
uzrasnagrupa2 <- model$coef[3]
uzrasnagrupa3 <- model$coef[4]
uzrasnagrupa4 <- model$coef[5]
uzrasnagrupa5 <- model$coef[6]
X1_range <- seq(from=min(final.train$ARQbroj), to=(ARQbroj=50), by=.01)
referentna <- b0 +
X1*X1_range +
uzrasnagrupa2*0 +
uzrasnagrupa3*0 +
uzrasnagrupa4*0 +
uzrasnagrupa5*0
uzrast2_log <- b0 +
X1*X1_range +
uzrasnagrupa2*1 +
uzrasnagrupa3*0 +
uzrasnagrupa4*0 +
uzrasnagrupa5*0
uzrast3_log <- b0 +
X1*X1_range +
uzrasnagrupa2*0 +
uzrasnagrupa3*1 +
uzrasnagrupa4*0 +
uzrasnagrupa5*0
uzrast4_log <- b0 +
X1*X1_range +
uzrasnagrupa2*0 +
uzrasnagrupa3*0 +
uzrasnagrupa4*1 +
uzrasnagrupa5*0
uzrast5_log <- b0 +
X1*X1_range +
uzrasnagrupa2*0 +
uzrasnagrupa3*0 +
uzrasnagrupa4*0 +
uzrasnagrupa5*1
# Compute the probibilities (this is what will actually get plotted):
referentna_prob <- exp(referentna)/(1 + exp(referentna))
uzrast2_prob <- exp(uzrast2_log)/(1 + exp(uzrast2_log))
uzrast3_prob <- exp(uzrast3_log)/(1 + exp(uzrast3_log))
uzrast4_prob <- exp(uzrast4_log)/(1 + exp(uzrast4_log))
uzrast5_prob <- exp(uzrast5_log)/(1 + exp(uzrast5_log))
# We'll start by plotting the ref group:
plot(X1_range, referentna_prob,
ylim=c(0,1),
type="l",
lwd=3,
lty=2,
col="gold",
xlab="Postignuce na DST-J", ylab="P(ishod)", main="Verovatnoca postojanja disleksije")
# Add the line for people who are in the b group
lines(X1_range, uzrast2_prob,
type="l",
lwd=3,
lty=3,
col="green")
# Add the line for people who are in the c group
lines(X1_range, uzrast3_prob,
type="l",
lwd=3,
lty=4,
col="blue")
lines(X1_range, uzrast4_prob,
type="l",
lwd=3,
lty=4,
col="orangered")
lines(X1_range, uzrast5_prob,
type="l",
lwd=3,
lty=4,
col="red")
# add a horizontal line at p=.5
abline(h=.5, lty=2)
legend("bottomright", legend = c("Od 6,6 do 7,5 godina",
"Od 7,6 do 8,5 godina",
"Od 8,6 do 9,5 godina",
"Od 9,6 do 10,5 godina",
"Od 10,6 do 11,5 godina"), col=c("gold", "green", "blue", "orangered", "red"),lty=1:2, cex=0.8)

#With ggplot
library(ggplot2)
library(tidyr)
# first you have to get the information into a long dataframe, which is what ggplot likes :)
plot.data <- data.frame(uzrast1=referentna_prob, uzrast2=uzrast2_prob, uzrast3=uzrast3_prob, uzrast4=uzrast4_prob, uzrast5=uzrast5_prob, X1=X1_range)
plot.data <- gather(plot.data, key=group, value=uzrast, 1:5)
head(plot.data)
## X1 group uzrast
## 1 0.00 uzrast1 6.945146e-13
## 2 0.01 uzrast1 7.178951e-13
## 3 0.02 uzrast1 7.420627e-13
## 4 0.03 uzrast1 7.670439e-13
## 5 0.04 uzrast1 7.928661e-13
## 6 0.05 uzrast1 8.195575e-13
ggplot(plot.data, aes(x=X1, y=uzrast, color=group)) + # asking it to set the color by the variable "group" is what makes it draw three different lines
geom_line(lwd=2) +
labs(x="Postignuce na DST-J", y="P(ishod)", title="Verovatnoca za disleksiju u zavisnosti od skora na DST-J")
