Lê a base de dados e dá os nomes
library(readr)
base_pura <- read_csv("dados_redações - dados.csv")
base_pura$prop_comp_per_per <- base_pura$quant_orac/base_pura$quant_per_comp # Proporção de período composto por período
base_pura$prop_adj_per_pal <- base_pura$quant_pal/base_pura$quant_adj # Proporção de adjetivos por palavra
base_pura$prop_ora_per_per <- base_pura$quant_orac/base_pura$quant_per # Proporção de orações por periodo
base_pura$prop_adj_per_per <- base_pura$quant_adj/base_pura$quant_per # Proporção de adjetivos por período
head(base_pura)
## # A tibble: 6 x 27
## texto nota quant_pal tam_voc quant_per tam_per quant_subs quant_orac
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 reda~ 1000 472 270 15 31 127 36
## 2 reda~ 1000 392 202 16 25 116 31
## 3 reda~ 1000 434 246 16 27 132 31
## 4 reda~ 1000 499 278 17 29 138 40
## 5 reda~ 1000 467 270 14 33 129 28
## 6 reda~ 1000 480 286 19 25 139 33
## # ... with 19 more variables: quant_per_simp <dbl>, quant_per_comp <dbl>,
## # voz_pas <dbl>, voz_atv <dbl>, subs_por_orac <dbl>, quant_adj <dbl>,
## # quant_adv <dbl>, quant_prep <dbl>, quant_ent_per <dbl>,
## # quant_ent_loc <dbl>, quant_ent_org <dbl>, quant_ent_misc <dbl>,
## # quant_que <dbl>, quant_crase <dbl>, quant_estr <dbl>,
## # prop_comp_per_per <dbl>, prop_adj_per_pal <dbl>,
## # prop_ora_per_per <dbl>, prop_adj_per_per <dbl>
Cria a matriz de correlação
base <- cor(base_pura[,-c(1,2, 22, 23)])
base
## quant_pal tam_voc quant_per tam_per
## quant_pal 1.00000000 0.89507530 0.24788354 0.533150370
## tam_voc 0.89507530 1.00000000 0.32298308 0.396085060
## quant_per 0.24788354 0.32298308 1.00000000 -0.668654452
## tam_per 0.53315037 0.39608506 -0.66865445 1.000000000
## quant_subs 0.89946867 0.74657354 0.19414739 0.515630043
## quant_orac 0.31559613 0.40390351 0.53796150 -0.195138114
## quant_per_simp 0.10864738 0.10474587 0.57484019 -0.424087778
## quant_per_comp 0.16688317 0.24855295 0.51253514 -0.303811530
## voz_pas 0.18966443 0.10233587 0.46474159 -0.227519345
## voz_atv 0.25845722 0.38533289 0.38560151 -0.116968338
## subs_por_orac 0.41564798 0.20123354 -0.23535565 0.471225000
## quant_adj 0.67197049 0.71402624 0.46949853 0.119083392
## quant_adv 0.31927138 0.31051687 -0.08396437 0.321982646
## quant_prep 0.84002767 0.67416723 0.06338117 0.559327611
## quant_ent_per -0.18070842 -0.08386261 0.07339563 -0.191658917
## quant_ent_loc 0.23366913 0.04533386 -0.03561409 0.256525876
## quant_ent_org 0.03701266 -0.05100842 0.18617618 -0.156593622
## quant_ent_misc -0.11775627 0.03175400 -0.12138827 0.009698839
## quant_que 0.18639858 0.19291810 0.01302949 0.183489151
## prop_comp_per_per 0.06869693 0.02942947 -0.15126238 0.198201024
## prop_adj_per_pal -0.10774491 -0.22056040 -0.41812147 0.258076323
## prop_ora_per_per 0.09178628 0.09534035 -0.57090870 0.606833599
## prop_adj_per_per 0.55785494 0.52847339 -0.27764018 0.691915349
## quant_subs quant_orac quant_per_simp quant_per_comp
## quant_pal 0.89946867 0.31559613 0.108647385 0.16688317
## tam_voc 0.74657354 0.40390351 0.104745870 0.24855295
## quant_per 0.19414739 0.53796150 0.574840189 0.51253514
## tam_per 0.51563004 -0.19513811 -0.424087778 -0.30381153
## quant_subs 1.00000000 0.20460864 0.063208727 0.15283872
## quant_orac 0.20460864 1.00000000 -0.133995690 0.73947515
## quant_per_simp 0.06320873 -0.13399569 1.000000000 -0.40422861
## quant_per_comp 0.15283872 0.73947515 -0.404228614 1.00000000
## voz_pas 0.22744842 0.31628179 0.077189961 0.43888049
## voz_atv 0.12696340 0.92957466 -0.171021841 0.60773892
## subs_por_orac 0.52298296 -0.62851326 0.181374587 -0.44579866
## quant_adj 0.51273367 0.42443088 0.426843719 0.07711384
## quant_adv 0.06937831 0.06137062 -0.245679802 0.14587786
## quant_prep 0.90808156 0.09180019 -0.008692084 0.08798529
## quant_ent_per -0.19605245 0.14239228 0.102477899 -0.06624665
## quant_ent_loc 0.19451573 0.05066759 0.007716909 -0.08005293
## quant_ent_org -0.03295526 -0.02383101 0.037615577 0.17869538
## quant_ent_misc -0.24821504 -0.27224326 0.085860952 -0.21490591
## quant_que 0.12728518 0.66106605 -0.513965380 0.54133967
## prop_comp_per_per -0.03487952 0.02330020 0.447850958 -0.64545065
## prop_adj_per_pal 0.06145197 -0.37278832 -0.428418831 -0.01448845
## prop_ora_per_per 0.05112835 0.36935571 -0.716974020 0.11093178
## prop_adj_per_per 0.42866062 0.07738186 -0.038420198 -0.27226631
## voz_pas voz_atv subs_por_orac quant_adj
## quant_pal 0.18966443 0.258457223 0.415647983 0.671970493
## tam_voc 0.10233587 0.385332895 0.201233541 0.714026238
## quant_per 0.46474159 0.385601505 -0.235355650 0.469498530
## tam_per -0.22751935 -0.116968338 0.471225000 0.119083392
## quant_subs 0.22744842 0.126963400 0.522982956 0.512733669
## quant_orac 0.31628179 0.929574664 -0.628513261 0.424430881
## quant_per_simp 0.07718996 -0.171021841 0.181374587 0.426843719
## quant_per_comp 0.43888049 0.607738919 -0.445798656 0.077113835
## voz_pas 1.00000000 -0.055702640 0.089596445 0.071508385
## voz_atv -0.05570264 1.000000000 -0.696310624 0.418916452
## subs_por_orac 0.08959644 -0.696310624 1.000000000 0.003311686
## quant_adj 0.07150838 0.418916452 0.003311686 1.000000000
## quant_adv 0.06677839 0.038642250 0.079375677 0.103813283
## quant_prep 0.26487837 -0.006309371 0.567227588 0.385313051
## quant_ent_per -0.30673610 0.269056456 -0.332306509 0.070943525
## quant_ent_loc 0.09010662 0.018312645 0.100990210 0.173178961
## quant_ent_org 0.24980317 -0.122150485 0.028068045 0.060150627
## quant_ent_misc -0.33430724 -0.156623831 0.026462806 0.087110685
## quant_que 0.02520159 0.685963244 -0.514793544 0.124136561
## prop_comp_per_per -0.32937707 0.152512773 -0.059300241 0.334259824
## prop_adj_per_pal 0.07673334 -0.422167903 0.419620832 -0.760599665
## prop_ora_per_per -0.18886130 0.462126029 -0.319175700 -0.031032075
## prop_adj_per_per -0.26436391 0.184169502 0.154809167 0.703108830
## quant_adv quant_prep quant_ent_per quant_ent_loc
## quant_pal 0.31927138 0.840027667 -0.180708423 0.233669128
## tam_voc 0.31051687 0.674167228 -0.083862610 0.045333864
## quant_per -0.08396437 0.063381172 0.073395632 -0.035614087
## tam_per 0.32198265 0.559327611 -0.191658917 0.256525876
## quant_subs 0.06937831 0.908081565 -0.196052446 0.194515727
## quant_orac 0.06137062 0.091800192 0.142392276 0.050667592
## quant_per_simp -0.24567980 -0.008692084 0.102477899 0.007716909
## quant_per_comp 0.14587786 0.087985290 -0.066246653 -0.080052934
## voz_pas 0.06677839 0.264878370 -0.306736103 0.090106623
## voz_atv 0.03864225 -0.006309371 0.269056456 0.018312645
## subs_por_orac 0.07937568 0.567227588 -0.332306509 0.100990210
## quant_adj 0.10381328 0.385313051 0.070943525 0.173178961
## quant_adv 1.00000000 0.143404775 -0.249339579 0.457623461
## quant_prep 0.14340478 1.000000000 -0.228290987 0.215797802
## quant_ent_per -0.24933958 -0.228290987 1.000000000 0.030502909
## quant_ent_loc 0.45762346 0.215797802 0.030502909 1.000000000
## quant_ent_org 0.33798932 0.158090646 -0.360362282 0.241413240
## quant_ent_misc 0.23874533 -0.108765699 0.005726171 -0.141864378
## quant_que 0.25344775 0.129734093 0.041054978 0.204941601
## prop_comp_per_per -0.18553972 -0.056280261 0.301877832 0.206531162
## prop_adj_per_pal 0.06724139 0.117293272 -0.222475426 -0.055613342
## prop_ora_per_per 0.14507127 0.063696995 0.059568172 0.124504307
## prop_adj_per_per 0.21752352 0.402233217 0.020085704 0.257055742
## quant_ent_org quant_ent_misc quant_que
## quant_pal 0.03701266 -0.117756270 0.18639858
## tam_voc -0.05100842 0.031753995 0.19291810
## quant_per 0.18617618 -0.121388273 0.01302949
## tam_per -0.15659362 0.009698839 0.18348915
## quant_subs -0.03295526 -0.248215035 0.12728518
## quant_orac -0.02383101 -0.272243256 0.66106605
## quant_per_simp 0.03761558 0.085860952 -0.51396538
## quant_per_comp 0.17869538 -0.214905905 0.54133967
## voz_pas 0.24980317 -0.334307243 0.02520159
## voz_atv -0.12215049 -0.156623831 0.68596324
## subs_por_orac 0.02806804 0.026462806 -0.51479354
## quant_adj 0.06015063 0.087110685 0.12413656
## quant_adv 0.33798932 0.238745325 0.25344775
## quant_prep 0.15809065 -0.108765699 0.12973409
## quant_ent_per -0.36036228 0.005726171 0.04105498
## quant_ent_loc 0.24141324 -0.141864378 0.20494160
## quant_ent_org 1.00000000 0.097053872 0.07889985
## quant_ent_misc 0.09705387 1.000000000 -0.15139656
## quant_que 0.07889985 -0.151396563 1.00000000
## prop_comp_per_per -0.27242134 -0.006090612 -0.03711980
## prop_adj_per_pal -0.17053532 -0.179037673 -0.13052290
## prop_ora_per_per -0.27299662 -0.135882152 0.63306317
## prop_adj_per_per -0.05822098 0.172629794 0.20666995
## prop_comp_per_per prop_adj_per_pal prop_ora_per_per
## quant_pal 0.068696934 -0.10774491 0.09178628
## tam_voc 0.029429468 -0.22056040 0.09534035
## quant_per -0.151262384 -0.41812147 -0.57090870
## tam_per 0.198201024 0.25807632 0.60683360
## quant_subs -0.034879518 0.06145197 0.05112835
## quant_orac 0.023300199 -0.37278832 0.36935571
## quant_per_simp 0.447850958 -0.42841883 -0.71697402
## quant_per_comp -0.645450648 -0.01448845 0.11093178
## voz_pas -0.329377069 0.07673334 -0.18886130
## voz_atv 0.152512773 -0.42216790 0.46212603
## subs_por_orac -0.059300241 0.41962083 -0.31917570
## quant_adj 0.334259824 -0.76059966 -0.03103207
## quant_adv -0.185539716 0.06724139 0.14507127
## quant_prep -0.056280261 0.11729327 0.06369699
## quant_ent_per 0.301877832 -0.22247543 0.05956817
## quant_ent_loc 0.206531162 -0.05561334 0.12450431
## quant_ent_org -0.272421340 -0.17053532 -0.27299662
## quant_ent_misc -0.006090612 -0.17903767 -0.13588215
## quant_que -0.037119799 -0.13052290 0.63306317
## prop_comp_per_per 1.000000000 -0.39628899 0.24098648
## prop_adj_per_pal -0.396288991 1.00000000 0.06496154
## prop_ora_per_per 0.240986476 0.06496154 1.00000000
## prop_adj_per_per 0.460637470 -0.50257789 0.46304218
## prop_adj_per_per
## quant_pal 0.55785494
## tam_voc 0.52847339
## quant_per -0.27764018
## tam_per 0.69191535
## quant_subs 0.42866062
## quant_orac 0.07738186
## quant_per_simp -0.03842020
## quant_per_comp -0.27226631
## voz_pas -0.26436391
## voz_atv 0.18416950
## subs_por_orac 0.15480917
## quant_adj 0.70310883
## quant_adv 0.21752352
## quant_prep 0.40223322
## quant_ent_per 0.02008570
## quant_ent_loc 0.25705574
## quant_ent_org -0.05822098
## quant_ent_misc 0.17262979
## quant_que 0.20666995
## prop_comp_per_per 0.46063747
## prop_adj_per_pal -0.50257789
## prop_ora_per_per 0.46304218
## prop_adj_per_per 1.00000000
Reorganiza a matriz
library(reshape2)
base_melt <- melt(base)
head(base_melt)
## Var1 Var2 value
## 1 quant_pal quant_pal 1.0000000
## 2 tam_voc quant_pal 0.8950753
## 3 quant_per quant_pal 0.2478835
## 4 tam_per quant_pal 0.5331504
## 5 quant_subs quant_pal 0.8994687
## 6 quant_orac quant_pal 0.3155961
HeatMap sem Cluster
library(ggplot2)
library(plotly)
ggplotly(ggplot(data = base_melt, aes(x=Var1, y=Var2, fill=value)) +
geom_tile()+
labs(x= NULL,
y = NULL,
fill = NULL)+
theme(axis.text.x = element_text(angle = 45))+
scale_fill_gradient2(low="red", mid = "white", high="blue", midpoint = 0))
Queísmo
mean <- mean(base_pura$quant_que)
sd <- sd(base_pura$quant_que)
teste <- data.frame(x = c("info", "info"),
inf = c('mean',
'sd'),
value = c(mean,
sd))
ggplotly(ggplot(teste, aes(x = x, y= value, color = x))+
geom_pointrange(aes(ymin = mean-sd,
ymax = mean+sd))+
labs(x = NULL,
y = "Quantidade de 'Que'")+
guides(color = FALSE)+
ylim(0,20)+
theme(axis.text.x = element_blank()))
Summary da base de dados
summary(base_pura)
## texto nota quant_pal tam_voc
## Length:25 Min. :1000 Min. :317.0 Min. :191.0
## Class :character 1st Qu.:1000 1st Qu.:392.0 1st Qu.:215.0
## Mode :character Median :1000 Median :439.0 Median :244.0
## Mean :1000 Mean :425.5 Mean :240.6
## 3rd Qu.:1000 3rd Qu.:461.0 3rd Qu.:263.0
## Max. :1000 Max. :547.0 Max. :300.0
## quant_per tam_per quant_subs quant_orac
## Min. :10.00 Min. :23.0 Min. : 76.0 Min. :23.00
## 1st Qu.:13.00 1st Qu.:26.0 1st Qu.:106.0 1st Qu.:28.00
## Median :15.00 Median :28.0 Median :121.0 Median :31.00
## Mean :14.56 Mean :29.8 Mean :115.6 Mean :31.36
## 3rd Qu.:16.00 3rd Qu.:33.0 3rd Qu.:129.0 3rd Qu.:34.00
## Max. :19.00 Max. :46.0 Max. :147.0 Max. :40.00
## quant_per_simp quant_per_comp voz_pas voz_atv subs_por_orac
## Min. : 1.00 Min. : 7.00 Min. :0.00 Min. :20 Min. :2.0
## 1st Qu.: 3.00 1st Qu.: 9.00 1st Qu.:3.00 1st Qu.:24 1st Qu.:3.0
## Median : 4.00 Median :10.00 Median :3.00 Median :28 Median :4.0
## Mean : 4.24 Mean :10.28 Mean :3.36 Mean :28 Mean :3.8
## 3rd Qu.: 5.00 3rd Qu.:11.00 3rd Qu.:5.00 3rd Qu.:31 3rd Qu.:4.0
## Max. :10.00 Max. :14.00 Max. :6.00 Max. :38 Max. :5.0
## quant_adj quant_adv quant_prep quant_ent_per
## Min. :24.00 Min. : 3.00 Min. : 45.00 Min. :0.00
## 1st Qu.:36.00 1st Qu.:12.00 1st Qu.: 76.00 1st Qu.:1.00
## Median :40.00 Median :16.00 Median : 83.00 Median :2.00
## Mean :41.12 Mean :17.76 Mean : 84.36 Mean :2.08
## 3rd Qu.:47.00 3rd Qu.:24.00 3rd Qu.: 96.00 3rd Qu.:2.00
## Max. :69.00 Max. :39.00 Max. :117.00 Max. :5.00
## quant_ent_loc quant_ent_org quant_ent_misc quant_que quant_crase
## Min. :1.00 Min. :0.00 Min. :0.0 Min. : 6.00 Min. :0
## 1st Qu.:2.00 1st Qu.:0.00 1st Qu.:0.0 1st Qu.: 8.00 1st Qu.:0
## Median :4.00 Median :1.00 Median :2.0 Median :11.00 Median :0
## Mean :4.16 Mean :1.12 Mean :1.8 Mean :11.64 Mean :0
## 3rd Qu.:6.00 3rd Qu.:2.00 3rd Qu.:3.0 3rd Qu.:15.00 3rd Qu.:0
## Max. :9.00 Max. :4.00 Max. :8.0 Max. :21.00 Max. :0
## quant_estr prop_comp_per_per prop_adj_per_pal prop_ora_per_per
## Min. :0 Min. :2.364 Min. : 6.957 Min. :1.533
## 1st Qu.:0 1st Qu.:2.857 1st Qu.: 9.300 1st Qu.:1.938
## Median :0 Median :3.000 Median :10.605 Median :2.091
## Mean :0 Mean :3.108 Mean :10.694 Mean :2.184
## 3rd Qu.:0 3rd Qu.:3.286 3rd Qu.:11.692 3rd Qu.:2.400
## Max. :0 Max. :4.571 Max. :17.167 Max. :3.000
## prop_adj_per_per
## Min. :1.929
## 1st Qu.:2.353
## Median :2.688
## Mean :2.853
## 3rd Qu.:3.154
## Max. :4.750