## Rattle: A free graphical interface for data mining with R.
## Version 3.4.1 Copyright (c) 2006-2014 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
## Loading required package: ggplot2
#Считаем записанную таблицу и выберем столбцы со значениями SR-матрицы
raven_sr = read.csv(file="E://RDir/exp/resSR.csv")[,-(1:13)]
#Проводим анализ главных компонент
raven_PCA = princomp(raven_sr)
summary(raven_PCA)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4
## Standard deviation 0.0437179 0.03594045 0.03299787 0.03096558
## Proportion of Variance 0.1247180 0.08429027 0.07105295 0.06257038
## Cumulative Proportion 0.1247180 0.20900824 0.28006120 0.34263158
## Comp.5 Comp.6 Comp.7 Comp.8
## Standard deviation 0.02715458 0.02657244 0.02520520 0.02364566
## Proportion of Variance 0.04811679 0.04607586 0.04145631 0.03648492
## Cumulative Proportion 0.39074837 0.43682422 0.47828054 0.51476546
## Comp.9 Comp.10 Comp.11 Comp.12
## Standard deviation 0.02311168 0.02240072 0.02112174 0.02078720
## Proportion of Variance 0.03485568 0.03274418 0.02911185 0.02819696
## Cumulative Proportion 0.54962114 0.58236532 0.61147717 0.63967414
## Comp.13 Comp.14 Comp.15 Comp.16
## Standard deviation 0.01961338 0.01885879 0.01819860 0.01797349
## Proportion of Variance 0.02510241 0.02320801 0.02161156 0.02108022
## Cumulative Proportion 0.66477654 0.68798455 0.70959612 0.73067634
## Comp.17 Comp.18 Comp.19 Comp.20
## Standard deviation 0.01700800 0.01653077 0.01541902 0.01500106
## Proportion of Variance 0.01887631 0.01783185 0.01551401 0.01468433
## Cumulative Proportion 0.74955264 0.76738450 0.78289851 0.79758284
## Comp.21 Comp.22 Comp.23 Comp.24
## Standard deviation 0.01405790 0.01360008 0.01342297 0.01311042
## Proportion of Variance 0.01289588 0.01206961 0.01175731 0.01121615
## Cumulative Proportion 0.81047873 0.82254833 0.83430564 0.84552179
## Comp.25 Comp.26 Comp.27 Comp.28
## Standard deviation 0.01250554 0.012240871 0.012020588 0.011264495
## Proportion of Variance 0.01020506 0.009777663 0.009428917 0.008280068
## Cumulative Proportion 0.85572685 0.865504513 0.874933430 0.883213498
## Comp.29 Comp.30 Comp.31 Comp.32
## Standard deviation 0.011201929 0.010323205 0.010014649 0.009648703
## Proportion of Variance 0.008188342 0.006954076 0.006544582 0.006075028
## Cumulative Proportion 0.891401840 0.898355917 0.904900499 0.910975527
## Comp.33 Comp.34 Comp.35 Comp.36
## Standard deviation 0.009390832 0.009101021 0.008790048 0.008748815
## Proportion of Variance 0.005754645 0.005404937 0.005041884 0.004994694
## Cumulative Proportion 0.916730171 0.922135108 0.927176993 0.932171686
## Comp.37 Comp.38 Comp.39 Comp.40
## Standard deviation 0.008307515 0.008068756 0.007860782 0.007466816
## Proportion of Variance 0.004503526 0.004248382 0.004032199 0.003638156
## Cumulative Proportion 0.936675212 0.940923594 0.944955793 0.948593949
## Comp.41 Comp.42 Comp.43 Comp.44
## Standard deviation 0.007333108 0.007043632 0.006736550 0.006603951
## Proportion of Variance 0.003509026 0.003237455 0.002961321 0.002845890
## Cumulative Proportion 0.952102975 0.955340430 0.958301751 0.961147641
## Comp.45 Comp.46 Comp.47 Comp.48
## Standard deviation 0.006491314 0.006342372 0.006075960 0.005893793
## Proportion of Variance 0.002749638 0.002624906 0.002409019 0.002266732
## Cumulative Proportion 0.963897279 0.966522185 0.968931205 0.971197937
## Comp.49 Comp.50 Comp.51 Comp.52
## Standard deviation 0.005804064 0.005486887 0.005407149 0.005111946
## Proportion of Variance 0.002198238 0.001964547 0.001907863 0.001705230
## Cumulative Proportion 0.973396175 0.975360722 0.977268585 0.978973815
## Comp.53 Comp.54 Comp.55 Comp.56
## Standard deviation 0.004891384 0.004861248 0.004514164 0.004395381
## Proportion of Variance 0.001561255 0.001542076 0.001329735 0.001260676
## Cumulative Proportion 0.980535070 0.982077146 0.983406881 0.984667557
## Comp.57 Comp.58 Comp.59 Comp.60
## Standard deviation 0.004288865 0.004162577 0.004019648 0.0039089123
## Proportion of Variance 0.001200315 0.001130667 0.001054354 0.0009970622
## Cumulative Proportion 0.985867872 0.986998539 0.988052893 0.9890499555
## Comp.61 Comp.62 Comp.63 Comp.64
## Standard deviation 0.0038067786 0.0036820303 0.0034188568 0.0033125517
## Proportion of Variance 0.0009456396 0.0008846778 0.0007627324 0.0007160375
## Cumulative Proportion 0.9899955951 0.9908802729 0.9916430053 0.9923590428
## Comp.65 Comp.66 Comp.67 Comp.68
## Standard deviation 0.0031561657 0.0030933210 0.0029061849 0.0028214862
## Proportion of Variance 0.0006500249 0.0006243964 0.0005511336 0.0005194769
## Cumulative Proportion 0.9930090677 0.9936334640 0.9941845976 0.9947040745
## Comp.69 Comp.70 Comp.71 Comp.72
## Standard deviation 0.0027702148 0.0026522405 0.0026210571 0.0024797221
## Proportion of Variance 0.0005007688 0.0004590248 0.0004482945 0.0004012513
## Cumulative Proportion 0.9952048434 0.9956638682 0.9961121627 0.9965134139
## Comp.73 Comp.74 Comp.75 Comp.76
## Standard deviation 0.0024002958 0.0022467195 0.0020831977 0.0020566569
## Proportion of Variance 0.0003759585 0.0003293883 0.0002831857 0.0002760159
## Cumulative Proportion 0.9968893725 0.9972187607 0.9975019464 0.9977779623
## Comp.77 Comp.78 Comp.79 Comp.80
## Standard deviation 0.001948439 0.0018859930 0.0018174521 0.0017486758
## Proportion of Variance 0.000247733 0.0002321082 0.0002155442 0.0001995396
## Cumulative Proportion 0.998025695 0.9982578036 0.9984733478 0.9986728873
## Comp.81 Comp.82 Comp.83 Comp.84
## Standard deviation 0.0015676968 0.0015368000 0.0014409297 0.0013509339
## Proportion of Variance 0.0001603742 0.0001541151 0.0001354865 0.0001190909
## Cumulative Proportion 0.9988332616 0.9989873766 0.9991228631 0.9992419540
## Comp.85 Comp.86 Comp.87 Comp.88
## Standard deviation 0.0012936929 1.221531e-03 1.145541e-03 1.117366e-03
## Proportion of Variance 0.0001092126 9.736869e-05 8.563108e-05 8.147068e-05
## Cumulative Proportion 0.9993511666 9.994485e-01 9.995342e-01 9.996156e-01
## Comp.89 Comp.90 Comp.91 Comp.92
## Standard deviation 1.065997e-03 0.0009819074 8.580464e-04 8.167346e-04
## Proportion of Variance 7.415186e-05 0.0000629146 4.804321e-05 4.352837e-05
## Cumulative Proportion 9.996898e-01 0.9997527036 9.998007e-01 9.998443e-01
## Comp.93 Comp.94 Comp.95 Comp.96
## Standard deviation 7.571407e-04 6.970443e-04 6.600078e-04 5.605347e-04
## Proportion of Variance 3.740792e-05 3.170525e-05 2.842553e-05 2.050291e-05
## Cumulative Proportion 9.998817e-01 9.999134e-01 9.999418e-01 9.999623e-01
## Comp.97 Comp.98 Comp.99 Comp.100
## Standard deviation 5.174109e-04 4.566766e-04 3.181415e-04 1.270910e-10
## Proportion of Variance 1.746955e-05 1.360905e-05 6.604669e-06 1.053998e-18
## Cumulative Proportion 9.999798e-01 9.999934e-01 1.000000e+00 1.000000e+00
#Чтобы выбрать нужные главные компоненты, запишем графики факторных нагрузок в файлы для более удобного просмотра
create_loadings_plots <- function(path,n_components, loadings, size_of_SR, colours) {
for (i in 1:n_components) {
m = matrix(data = loadings[,i], ncol = size_of_SR, byrow=T)
filepath = paste(path,"//levelplot",i,".jpg", sep="")
jpeg(file = filepath, width = 1024, height = 768, bg = "transparent")
print(#Обязательно ставим "PRINT", иначе будут записаны пустые файлы
levelplot(m, col.regions = colours(100), ylim=c(10.5,0.5),
main = paste(i," component loadings",sep=""),
xlab = "", ylab = "", useRaster = T)
)
dev.off()
}
}
#Настроим цветовую шкалу
palette = colorRampPalette(colors = c("#0000FF","#FFFFFF","#FF0000"))
#Запишем полученные levelplot'ы
create_loadings_plots("E://RDir//Raven//Loadings",100,raven_PCA$loadings,10,palette)
#Функция для просмотра полученных графиков нагрузок
show_loading_plot <- function(path,number_of_component) {
filepath = paste(path,"//levelplot",number_of_component,".jpg", sep="")
loading <- readJPEG(filepath)
plot(1, type="n", xlim=c(0, 1024), ylim=c(768, 0))
rasterImage(loading,0, 768, 1024, 0, interpolate = FALSE)
}
#Первый компонент можно трактовать как приверженность "горизонтальной" стратегии
show_loading_plot("E://RDir//Raven//Loadings",1)
#Третий компонент можно трактовать как приверженность "вертикальной" стратегии
show_loading_plot("E://RDir//Raven//Loadings",3)
#Запишем, какие компоненты будем использовать для анализа
using_comps = c(1,3)
#Определим фрейм данных, включающий в себя нужные компоненты, интегральные показатели и результаты
df = cbind(predict(raven_PCA)[,using_comps], test_results$Ball, test_results$Total, test_results$PTOM, test_results$PTOA)
colnames(df) = c("Horis", "Vertic", "Ball", "Total","PTOM", "PTOA")
df = as.data.frame(df)
#Построим дерево решений для предсказания, пройдет испытуемый задание или нет
raven_tree_ball = rpart(formula = Ball ~ Horis + Vertic + PTOM + PTOA, data = df, method = "class")
fancyRpartPlot(raven_tree_ball)
#Определим масштабы параметров
## [1] "Horis = [-0.15042;0.083]"
## [1] "Vertic = [-0.07077;0.08097]"
## [1] "PTOM = [0.197;0.926]"
## [1] "PTOA = [0.074;0.803]"
Proportional Time-on-Matrix (PToM), Proportional Time-on-Alternatives (PToA) – время фиксаций на матрице и вариантах ответа соответственно, деленное на суммарное время фиксаций
get_formula <- function (to_,with_) {
tlen = length(to_)
wlen = length(with_)
formula = paste("")
for (i in 1:tlen)
formula = ifelse(i==tlen,paste(formula,to_[i],"~"),paste(formula,to_[i],"+"))
for (i in 1:wlen)
formula = ifelse(i==wlen,paste(formula,with_[i]),paste(formula,with_[i],"+"));
return(formula)
}
c_to_str <- function(c) {
len = length(c)
str = paste("",sep="")
for (i in 1:len)
str = ifelse(i==len,paste(str,c[i]),paste(str,c[i],", ",sep = ""))
str
}
cross_validate_tree <- function (to_predict, predict_with, data) {
formula = get_formula(to_predict,predict_with)
times = nrow(data)
n_correct = c()
for (i in 1:times) {
train = data[-i,]
val = data[i,]
testing_tree = rpart(formula = formula, data = train, method = "class")
predicted = predict(testing_tree, newdata = val)
n_correct[i] = which(predicted==max(predicted))-1
}
percentage = round(sum(n_correct)/times,2)
tr = rpart(formula = formula, data = data, method = "class")
fancyRpartPlot(tr)
print("Оценка скользящего контроля методом leave-one-out: ")
print(paste("Правильно классифицировано: ", percentage*100, "%",sep=""))
print(paste("Предсказывалось значение для:",to_predict))
print(paste("Предсказывалось с помощью:",c_to_str(predict_with)))
}
## [1] "Оценка скользящего контроля методом leave-one-out: "
## [1] "Правильно классифицировано: 80%"
## [1] "Предсказывалось значение для: Ball"
## [1] "Предсказывалось с помощью: Horis, Vertic, PTOM, PTOA"
## [1] "Оценка скользящего контроля методом leave-one-out: "
## [1] "Правильно классифицировано: 84%"
## [1] "Предсказывалось значение для: Ball"
## [1] "Предсказывалось с помощью: Horis, Vertic"
## [1] "Оценка скользящего контроля методом leave-one-out: "
## [1] "Правильно классифицировано: 85%"
## [1] "Предсказывалось значение для: Ball"
## [1] "Предсказывалось с помощью: PTOM, PTOA"
reading_results = read.csv(file = "E://RDir/reading/reading_results.csv", header = T, sep = ";")
#Считаем записанную таблицу и выберем столбцы со значениями SR-матрицы
reading_sr = read.csv(file="E://RDir/reading/readingSR.csv")[,-(1:16)]
#Проводим анализ главных компонент
reading_PCA = princomp(reading_sr)
summary(reading_PCA)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4
## Standard deviation 0.03347603 0.02619846 0.0242333 0.01987124
## Proportion of Variance 0.24014253 0.14707965 0.1258421 0.08461573
## Cumulative Proportion 0.24014253 0.38722218 0.5130643 0.59768004
## Comp.5 Comp.6 Comp.7 Comp.8
## Standard deviation 0.01828501 0.01786010 0.01626396 0.01372377
## Proportion of Variance 0.07164594 0.06835481 0.05668314 0.04035973
## Cumulative Proportion 0.66932598 0.73768079 0.79436393 0.83472366
## Comp.9 Comp.10 Comp.11 Comp.12
## Standard deviation 0.01131573 0.009633114 0.00800960 0.00732107
## Proportion of Variance 0.02743888 0.019885409 0.01374747 0.01148551
## Cumulative Proportion 0.86216255 0.882047954 0.89579542 0.90728093
## Comp.13 Comp.14 Comp.15 Comp.16
## Standard deviation 0.007205657 0.006630318 0.006353303 0.005533008
## Proportion of Variance 0.011126237 0.009420410 0.008649683 0.006560299
## Cumulative Proportion 0.918407169 0.927827579 0.936477262 0.943037561
## Comp.17 Comp.18 Comp.19 Comp.20
## Standard deviation 0.005281226 0.004945200 0.004784800 0.004641323
## Proportion of Variance 0.005976825 0.005240452 0.004906013 0.004616200
## Cumulative Proportion 0.949014386 0.954254838 0.959160851 0.963777051
## Comp.21 Comp.22 Comp.23 Comp.24
## Standard deviation 0.004235383 0.004202997 0.004024227 0.003634306
## Proportion of Variance 0.003844027 0.003785465 0.003470292 0.002830375
## Cumulative Proportion 0.967621078 0.971406543 0.974876835 0.977707211
## Comp.25 Comp.26 Comp.27 Comp.28
## Standard deviation 0.003562330 0.003356759 0.003035503 0.002866506
## Proportion of Variance 0.002719376 0.002414578 0.001974524 0.001760787
## Cumulative Proportion 0.980426587 0.982841165 0.984815690 0.986576477
## Comp.29 Comp.30 Comp.31 Comp.32
## Standard deviation 0.002668391 0.002518040 0.002431094 0.002384636
## Proportion of Variance 0.001525808 0.001358709 0.001266499 0.001218555
## Cumulative Proportion 0.988102285 0.989460994 0.990727493 0.991946048
## Comp.33 Comp.34 Comp.35 Comp.36
## Standard deviation 0.002173528 0.0020275796 0.0019342537 0.0018619550
## Proportion of Variance 0.001012352 0.0008809615 0.0008017297 0.0007429155
## Cumulative Proportion 0.992958400 0.9938393611 0.9946410908 0.9953840063
## Comp.37 Comp.38 Comp.39 Comp.40
## Standard deviation 0.0016378425 0.0015610067 0.001542066 0.0014245410
## Proportion of Variance 0.0005748379 0.0005221684 0.000509574 0.0004348616
## Cumulative Proportion 0.9959588442 0.9964810126 0.996990587 0.9974254482
## Comp.41 Comp.42 Comp.43 Comp.44
## Standard deviation 0.0013689859 0.0012380709 0.0011584288 0.0011343014
## Proportion of Variance 0.0004016049 0.0003284673 0.0002875675 0.0002757135
## Cumulative Proportion 0.9978270531 0.9981555204 0.9984430880 0.9987188015
## Comp.45 Comp.46 Comp.47 Comp.48
## Standard deviation 0.0010763195 0.0009532855 0.0008779454 0.0007724895
## Proportion of Variance 0.0002482467 0.0001947364 0.0001651719 0.0001278752
## Cumulative Proportion 0.9989670482 0.9991617845 0.9993269564 0.9994548316
## Comp.49 Comp.50 Comp.51 Comp.52
## Standard deviation 0.0007230445 6.782753e-04 6.157763e-04 5.631824e-04
## Proportion of Variance 0.0001120292 9.858552e-05 8.125442e-05 6.796719e-05
## Cumulative Proportion 0.9995668608 9.996654e-01 9.997467e-01 9.998147e-01
## Comp.53 Comp.54 Comp.55 Comp.56
## Standard deviation 5.283276e-04 4.028602e-04 3.822538e-04 2.869987e-04
## Proportion of Variance 5.981468e-05 3.477842e-05 3.131157e-05 1.765066e-05
## Cumulative Proportion 9.998745e-01 9.999093e-01 9.999406e-01 9.999582e-01
## Comp.57 Comp.58 Comp.59 Comp.60
## Standard deviation 2.472641e-04 1.795151e-04 1.631584e-04 1.477383e-04
## Proportion of Variance 1.310156e-05 6.905629e-06 5.704535e-06 4.677211e-06
## Cumulative Proportion 9.999713e-01 9.999782e-01 9.999839e-01 9.999886e-01
## Comp.61 Comp.62 Comp.63 Comp.64
## Standard deviation 1.103689e-04 1.041689e-04 9.987556e-05 8.422204e-05
## Proportion of Variance 2.610326e-06 2.325293e-06 2.137566e-06 1.520032e-06
## Cumulative Proportion 9.999912e-01 9.999935e-01 9.999957e-01 9.999972e-01
## Comp.65 Comp.66 Comp.67 Comp.68
## Standard deviation 6.889062e-05 5.838797e-05 4.779923e-05 3.404225e-05
## Proportion of Variance 1.017001e-06 7.305465e-07 4.896017e-07 2.483348e-07
## Cumulative Proportion 9.999982e-01 9.999990e-01 9.999994e-01 9.999997e-01
## Comp.69 Comp.70 Comp.71 Comp.72
## Standard deviation 2.411278e-05 2.040568e-05 1.579816e-05 1.000994e-05
## Proportion of Variance 1.245936e-07 8.922844e-08 5.348279e-08 2.147157e-08
## Cumulative Proportion 9.999998e-01 9.999999e-01 1.000000e+00 1.000000e+00
## Comp.73 Comp.74 Comp.75 Comp.76
## Standard deviation 6.916797e-06 6.400714e-06 2.360795e-06 6.863999e-07
## Proportion of Variance 1.025206e-08 8.779260e-09 1.194312e-09 1.009614e-10
## Cumulative Proportion 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## Comp.77 Comp.78 Comp.79 Comp.80
## Standard deviation 2.802859e-07 6.955463e-08 2.034805e-08 3.055290e-09
## Proportion of Variance 1.683463e-11 1.036700e-12 8.872512e-14 2.000349e-15
## Cumulative Proportion 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## Comp.81
## Standard deviation 0
## Proportion of Variance 0
## Cumulative Proportion 1
#Чтобы выбрать нужные главные компоненты, запишем графики факторных нагрузок в файлы для более удобного просмотра
create_loadings_plots <- function(path,n_components, loadings, size_of_SR, colours) {
for (i in 1:n_components) {
m = matrix(data = loadings[,i], ncol = size_of_SR, byrow=T)
filepath = paste(path,"//levelplot",i,".jpg", sep="")
jpeg(file = filepath, width = 1024, height = 768, bg = "transparent")
print(#Обязательно ставим "PRINT", иначе будут записаны пустые файлы
levelplot(m, col.regions = colours(100), ylim=c(9.5,0.5),
main = paste(i," component loadings",sep=""),
xlab = "", ylab = "", useRaster = T)
)
dev.off()
}
}
#Настроим цветовую шкалу
palette = colorRampPalette(colors = c("#0000FF","#FFFFFF","#FF0000"))
#Запишем полученные levelplot'ы
#create_loadings_plots("E://RDir//reading//Loadings",81,reading_PCA$loadings,9,palette)
#2 главный компонент можно трактовать как перечитывание при высоких значениях
show_loading_plot("E://RDir//reading//Loadings",2)
using_comps = 2
df = cbind(predict(reading_PCA)[,using_comps],reading_results$Навык.чтения..учит..,reading_results$Класс,reading_results$Пол)
colnames(df) = c("Rereading", "Skill", "Class", "Gender")
df = as.data.frame(df)
#Переводим в шкалу (0;1)
df$Rereading = (df$Rereading-min(df$Rereading))/(max(df$Rereading)-min(df$Rereading))
df$Skill = as.factor(df$Skill)
levels(df$Skill) = c("Bad","Good")
df$Gender = as.factor(df$Gender)
levels(df$Gender) = c("female","male")
#Предсказываем возрастную группу
cross_validate_tree("Class", c("Rereading","Gender","Skill"), data = df)
## [1] "Оценка скользящего контроля методом leave-one-out: "
## [1] "Правильно классифицировано: 53%"
## [1] "Предсказывалось значение для: Class"
## [1] "Предсказывалось с помощью: Rereading, Gender, Skill"
#Предсказываем уровень навыка чтения
cross_validate_tree("Skill", c("Rereading","Gender","Class"), data = df)
## [1] "Оценка скользящего контроля методом leave-one-out: "
## [1] "Правильно классифицировано: 68%"
## [1] "Предсказывалось значение для: Skill"
## [1] "Предсказывалось с помощью: Rereading, Gender, Class"