Lê a base de dados e dá os nomes

library(readr)
base_pura <- read_csv("dados_redações - dados.csv")
base_pura$prop_comp_per_per <- base_pura$quant_orac/base_pura$quant_per_comp # Proporção de período composto por período
base_pura$prop_adj_per_pal <- base_pura$quant_pal/base_pura$quant_adj  # Proporção de adjetivos por palavra
base_pura$prop_ora_per_per <- base_pura$quant_orac/base_pura$quant_per # Proporção de orações por periodo
base_pura$prop_adj_per_per <- base_pura$quant_adj/base_pura$quant_per # Proporção de adjetivos por período



head(base_pura)
## # A tibble: 6 x 27
##   texto  nota quant_pal tam_voc quant_per tam_per quant_subs quant_orac
##   <chr> <dbl>     <dbl>   <dbl>     <dbl>   <dbl>      <dbl>      <dbl>
## 1 reda~  1000       472     270        15      31        127         36
## 2 reda~  1000       392     202        16      25        116         31
## 3 reda~  1000       434     246        16      27        132         31
## 4 reda~  1000       499     278        17      29        138         40
## 5 reda~  1000       467     270        14      33        129         28
## 6 reda~  1000       480     286        19      25        139         33
## # ... with 19 more variables: quant_per_simp <dbl>, quant_per_comp <dbl>,
## #   voz_pas <dbl>, voz_atv <dbl>, subs_por_orac <dbl>, quant_adj <dbl>,
## #   quant_adv <dbl>, quant_prep <dbl>, quant_ent_per <dbl>,
## #   quant_ent_loc <dbl>, quant_ent_org <dbl>, quant_ent_misc <dbl>,
## #   quant_que <dbl>, quant_crase <dbl>, quant_estr <dbl>,
## #   prop_comp_per_per <dbl>, prop_adj_per_pal <dbl>,
## #   prop_ora_per_per <dbl>, prop_adj_per_per <dbl>

Cria a matriz de correlação

base <- cor(base_pura[,-c(1,2, 22, 23)])
base
##                     quant_pal     tam_voc   quant_per      tam_per
## quant_pal          1.00000000  0.89507530  0.24788354  0.533150370
## tam_voc            0.89507530  1.00000000  0.32298308  0.396085060
## quant_per          0.24788354  0.32298308  1.00000000 -0.668654452
## tam_per            0.53315037  0.39608506 -0.66865445  1.000000000
## quant_subs         0.89946867  0.74657354  0.19414739  0.515630043
## quant_orac         0.31559613  0.40390351  0.53796150 -0.195138114
## quant_per_simp     0.10864738  0.10474587  0.57484019 -0.424087778
## quant_per_comp     0.16688317  0.24855295  0.51253514 -0.303811530
## voz_pas            0.18966443  0.10233587  0.46474159 -0.227519345
## voz_atv            0.25845722  0.38533289  0.38560151 -0.116968338
## subs_por_orac      0.41564798  0.20123354 -0.23535565  0.471225000
## quant_adj          0.67197049  0.71402624  0.46949853  0.119083392
## quant_adv          0.31927138  0.31051687 -0.08396437  0.321982646
## quant_prep         0.84002767  0.67416723  0.06338117  0.559327611
## quant_ent_per     -0.18070842 -0.08386261  0.07339563 -0.191658917
## quant_ent_loc      0.23366913  0.04533386 -0.03561409  0.256525876
## quant_ent_org      0.03701266 -0.05100842  0.18617618 -0.156593622
## quant_ent_misc    -0.11775627  0.03175400 -0.12138827  0.009698839
## quant_que          0.18639858  0.19291810  0.01302949  0.183489151
## prop_comp_per_per  0.06869693  0.02942947 -0.15126238  0.198201024
## prop_adj_per_pal  -0.10774491 -0.22056040 -0.41812147  0.258076323
## prop_ora_per_per   0.09178628  0.09534035 -0.57090870  0.606833599
## prop_adj_per_per   0.55785494  0.52847339 -0.27764018  0.691915349
##                    quant_subs  quant_orac quant_per_simp quant_per_comp
## quant_pal          0.89946867  0.31559613    0.108647385     0.16688317
## tam_voc            0.74657354  0.40390351    0.104745870     0.24855295
## quant_per          0.19414739  0.53796150    0.574840189     0.51253514
## tam_per            0.51563004 -0.19513811   -0.424087778    -0.30381153
## quant_subs         1.00000000  0.20460864    0.063208727     0.15283872
## quant_orac         0.20460864  1.00000000   -0.133995690     0.73947515
## quant_per_simp     0.06320873 -0.13399569    1.000000000    -0.40422861
## quant_per_comp     0.15283872  0.73947515   -0.404228614     1.00000000
## voz_pas            0.22744842  0.31628179    0.077189961     0.43888049
## voz_atv            0.12696340  0.92957466   -0.171021841     0.60773892
## subs_por_orac      0.52298296 -0.62851326    0.181374587    -0.44579866
## quant_adj          0.51273367  0.42443088    0.426843719     0.07711384
## quant_adv          0.06937831  0.06137062   -0.245679802     0.14587786
## quant_prep         0.90808156  0.09180019   -0.008692084     0.08798529
## quant_ent_per     -0.19605245  0.14239228    0.102477899    -0.06624665
## quant_ent_loc      0.19451573  0.05066759    0.007716909    -0.08005293
## quant_ent_org     -0.03295526 -0.02383101    0.037615577     0.17869538
## quant_ent_misc    -0.24821504 -0.27224326    0.085860952    -0.21490591
## quant_que          0.12728518  0.66106605   -0.513965380     0.54133967
## prop_comp_per_per -0.03487952  0.02330020    0.447850958    -0.64545065
## prop_adj_per_pal   0.06145197 -0.37278832   -0.428418831    -0.01448845
## prop_ora_per_per   0.05112835  0.36935571   -0.716974020     0.11093178
## prop_adj_per_per   0.42866062  0.07738186   -0.038420198    -0.27226631
##                       voz_pas      voz_atv subs_por_orac    quant_adj
## quant_pal          0.18966443  0.258457223   0.415647983  0.671970493
## tam_voc            0.10233587  0.385332895   0.201233541  0.714026238
## quant_per          0.46474159  0.385601505  -0.235355650  0.469498530
## tam_per           -0.22751935 -0.116968338   0.471225000  0.119083392
## quant_subs         0.22744842  0.126963400   0.522982956  0.512733669
## quant_orac         0.31628179  0.929574664  -0.628513261  0.424430881
## quant_per_simp     0.07718996 -0.171021841   0.181374587  0.426843719
## quant_per_comp     0.43888049  0.607738919  -0.445798656  0.077113835
## voz_pas            1.00000000 -0.055702640   0.089596445  0.071508385
## voz_atv           -0.05570264  1.000000000  -0.696310624  0.418916452
## subs_por_orac      0.08959644 -0.696310624   1.000000000  0.003311686
## quant_adj          0.07150838  0.418916452   0.003311686  1.000000000
## quant_adv          0.06677839  0.038642250   0.079375677  0.103813283
## quant_prep         0.26487837 -0.006309371   0.567227588  0.385313051
## quant_ent_per     -0.30673610  0.269056456  -0.332306509  0.070943525
## quant_ent_loc      0.09010662  0.018312645   0.100990210  0.173178961
## quant_ent_org      0.24980317 -0.122150485   0.028068045  0.060150627
## quant_ent_misc    -0.33430724 -0.156623831   0.026462806  0.087110685
## quant_que          0.02520159  0.685963244  -0.514793544  0.124136561
## prop_comp_per_per -0.32937707  0.152512773  -0.059300241  0.334259824
## prop_adj_per_pal   0.07673334 -0.422167903   0.419620832 -0.760599665
## prop_ora_per_per  -0.18886130  0.462126029  -0.319175700 -0.031032075
## prop_adj_per_per  -0.26436391  0.184169502   0.154809167  0.703108830
##                     quant_adv   quant_prep quant_ent_per quant_ent_loc
## quant_pal          0.31927138  0.840027667  -0.180708423   0.233669128
## tam_voc            0.31051687  0.674167228  -0.083862610   0.045333864
## quant_per         -0.08396437  0.063381172   0.073395632  -0.035614087
## tam_per            0.32198265  0.559327611  -0.191658917   0.256525876
## quant_subs         0.06937831  0.908081565  -0.196052446   0.194515727
## quant_orac         0.06137062  0.091800192   0.142392276   0.050667592
## quant_per_simp    -0.24567980 -0.008692084   0.102477899   0.007716909
## quant_per_comp     0.14587786  0.087985290  -0.066246653  -0.080052934
## voz_pas            0.06677839  0.264878370  -0.306736103   0.090106623
## voz_atv            0.03864225 -0.006309371   0.269056456   0.018312645
## subs_por_orac      0.07937568  0.567227588  -0.332306509   0.100990210
## quant_adj          0.10381328  0.385313051   0.070943525   0.173178961
## quant_adv          1.00000000  0.143404775  -0.249339579   0.457623461
## quant_prep         0.14340478  1.000000000  -0.228290987   0.215797802
## quant_ent_per     -0.24933958 -0.228290987   1.000000000   0.030502909
## quant_ent_loc      0.45762346  0.215797802   0.030502909   1.000000000
## quant_ent_org      0.33798932  0.158090646  -0.360362282   0.241413240
## quant_ent_misc     0.23874533 -0.108765699   0.005726171  -0.141864378
## quant_que          0.25344775  0.129734093   0.041054978   0.204941601
## prop_comp_per_per -0.18553972 -0.056280261   0.301877832   0.206531162
## prop_adj_per_pal   0.06724139  0.117293272  -0.222475426  -0.055613342
## prop_ora_per_per   0.14507127  0.063696995   0.059568172   0.124504307
## prop_adj_per_per   0.21752352  0.402233217   0.020085704   0.257055742
##                   quant_ent_org quant_ent_misc   quant_que
## quant_pal            0.03701266   -0.117756270  0.18639858
## tam_voc             -0.05100842    0.031753995  0.19291810
## quant_per            0.18617618   -0.121388273  0.01302949
## tam_per             -0.15659362    0.009698839  0.18348915
## quant_subs          -0.03295526   -0.248215035  0.12728518
## quant_orac          -0.02383101   -0.272243256  0.66106605
## quant_per_simp       0.03761558    0.085860952 -0.51396538
## quant_per_comp       0.17869538   -0.214905905  0.54133967
## voz_pas              0.24980317   -0.334307243  0.02520159
## voz_atv             -0.12215049   -0.156623831  0.68596324
## subs_por_orac        0.02806804    0.026462806 -0.51479354
## quant_adj            0.06015063    0.087110685  0.12413656
## quant_adv            0.33798932    0.238745325  0.25344775
## quant_prep           0.15809065   -0.108765699  0.12973409
## quant_ent_per       -0.36036228    0.005726171  0.04105498
## quant_ent_loc        0.24141324   -0.141864378  0.20494160
## quant_ent_org        1.00000000    0.097053872  0.07889985
## quant_ent_misc       0.09705387    1.000000000 -0.15139656
## quant_que            0.07889985   -0.151396563  1.00000000
## prop_comp_per_per   -0.27242134   -0.006090612 -0.03711980
## prop_adj_per_pal    -0.17053532   -0.179037673 -0.13052290
## prop_ora_per_per    -0.27299662   -0.135882152  0.63306317
## prop_adj_per_per    -0.05822098    0.172629794  0.20666995
##                   prop_comp_per_per prop_adj_per_pal prop_ora_per_per
## quant_pal               0.068696934      -0.10774491       0.09178628
## tam_voc                 0.029429468      -0.22056040       0.09534035
## quant_per              -0.151262384      -0.41812147      -0.57090870
## tam_per                 0.198201024       0.25807632       0.60683360
## quant_subs             -0.034879518       0.06145197       0.05112835
## quant_orac              0.023300199      -0.37278832       0.36935571
## quant_per_simp          0.447850958      -0.42841883      -0.71697402
## quant_per_comp         -0.645450648      -0.01448845       0.11093178
## voz_pas                -0.329377069       0.07673334      -0.18886130
## voz_atv                 0.152512773      -0.42216790       0.46212603
## subs_por_orac          -0.059300241       0.41962083      -0.31917570
## quant_adj               0.334259824      -0.76059966      -0.03103207
## quant_adv              -0.185539716       0.06724139       0.14507127
## quant_prep             -0.056280261       0.11729327       0.06369699
## quant_ent_per           0.301877832      -0.22247543       0.05956817
## quant_ent_loc           0.206531162      -0.05561334       0.12450431
## quant_ent_org          -0.272421340      -0.17053532      -0.27299662
## quant_ent_misc         -0.006090612      -0.17903767      -0.13588215
## quant_que              -0.037119799      -0.13052290       0.63306317
## prop_comp_per_per       1.000000000      -0.39628899       0.24098648
## prop_adj_per_pal       -0.396288991       1.00000000       0.06496154
## prop_ora_per_per        0.240986476       0.06496154       1.00000000
## prop_adj_per_per        0.460637470      -0.50257789       0.46304218
##                   prop_adj_per_per
## quant_pal               0.55785494
## tam_voc                 0.52847339
## quant_per              -0.27764018
## tam_per                 0.69191535
## quant_subs              0.42866062
## quant_orac              0.07738186
## quant_per_simp         -0.03842020
## quant_per_comp         -0.27226631
## voz_pas                -0.26436391
## voz_atv                 0.18416950
## subs_por_orac           0.15480917
## quant_adj               0.70310883
## quant_adv               0.21752352
## quant_prep              0.40223322
## quant_ent_per           0.02008570
## quant_ent_loc           0.25705574
## quant_ent_org          -0.05822098
## quant_ent_misc          0.17262979
## quant_que               0.20666995
## prop_comp_per_per       0.46063747
## prop_adj_per_pal       -0.50257789
## prop_ora_per_per        0.46304218
## prop_adj_per_per        1.00000000

Reorganiza a matriz

library(reshape2)

base_melt <- melt(base)
head(base_melt)
##         Var1      Var2     value
## 1  quant_pal quant_pal 1.0000000
## 2    tam_voc quant_pal 0.8950753
## 3  quant_per quant_pal 0.2478835
## 4    tam_per quant_pal 0.5331504
## 5 quant_subs quant_pal 0.8994687
## 6 quant_orac quant_pal 0.3155961

HeatMap sem Cluster

library(ggplot2)
library(plotly)

ggplotly(ggplot(data = base_melt, aes(x=Var1, y=Var2, fill=value)) + 
  geom_tile()+
  labs(x= NULL,
       y = NULL,
       fill = NULL)+
    theme(axis.text.x = element_text(angle = 45))+
    scale_fill_gradient2(low="red", mid = "white", high="blue", midpoint = 0))

HeatMap com Cluster

library(heatmaply)

heatmaply_cor(base,
              scale_fill_gradient_fun = ggplot2::scale_fill_gradient2(low="red", mid = "white", high="blue", midpoint = 0))

Queísmo

mean <- mean(base_pura$quant_que)
sd <- sd(base_pura$quant_que)
teste <- data.frame(x = c("info", "info"),
                    inf = c('mean',
                            'sd'),
                    value = c(mean,
                              sd))
ggplotly(ggplot(teste, aes(x = x, y= value, color = x))+
  geom_pointrange(aes(ymin = mean-sd,
                      ymax = mean+sd))+
  labs(x = NULL,
       y = "Quantidade de 'Que'")+
  guides(color = FALSE)+
  ylim(0,20)+
  theme(axis.text.x = element_blank()))

Summary da base de dados

summary(base_pura)
##     texto                nota        quant_pal        tam_voc     
##  Length:25          Min.   :1000   Min.   :317.0   Min.   :191.0  
##  Class :character   1st Qu.:1000   1st Qu.:392.0   1st Qu.:215.0  
##  Mode  :character   Median :1000   Median :439.0   Median :244.0  
##                     Mean   :1000   Mean   :425.5   Mean   :240.6  
##                     3rd Qu.:1000   3rd Qu.:461.0   3rd Qu.:263.0  
##                     Max.   :1000   Max.   :547.0   Max.   :300.0  
##    quant_per        tam_per       quant_subs      quant_orac   
##  Min.   :10.00   Min.   :23.0   Min.   : 76.0   Min.   :23.00  
##  1st Qu.:13.00   1st Qu.:26.0   1st Qu.:106.0   1st Qu.:28.00  
##  Median :15.00   Median :28.0   Median :121.0   Median :31.00  
##  Mean   :14.56   Mean   :29.8   Mean   :115.6   Mean   :31.36  
##  3rd Qu.:16.00   3rd Qu.:33.0   3rd Qu.:129.0   3rd Qu.:34.00  
##  Max.   :19.00   Max.   :46.0   Max.   :147.0   Max.   :40.00  
##  quant_per_simp  quant_per_comp     voz_pas        voz_atv   subs_por_orac
##  Min.   : 1.00   Min.   : 7.00   Min.   :0.00   Min.   :20   Min.   :2.0  
##  1st Qu.: 3.00   1st Qu.: 9.00   1st Qu.:3.00   1st Qu.:24   1st Qu.:3.0  
##  Median : 4.00   Median :10.00   Median :3.00   Median :28   Median :4.0  
##  Mean   : 4.24   Mean   :10.28   Mean   :3.36   Mean   :28   Mean   :3.8  
##  3rd Qu.: 5.00   3rd Qu.:11.00   3rd Qu.:5.00   3rd Qu.:31   3rd Qu.:4.0  
##  Max.   :10.00   Max.   :14.00   Max.   :6.00   Max.   :38   Max.   :5.0  
##    quant_adj       quant_adv       quant_prep     quant_ent_per 
##  Min.   :24.00   Min.   : 3.00   Min.   : 45.00   Min.   :0.00  
##  1st Qu.:36.00   1st Qu.:12.00   1st Qu.: 76.00   1st Qu.:1.00  
##  Median :40.00   Median :16.00   Median : 83.00   Median :2.00  
##  Mean   :41.12   Mean   :17.76   Mean   : 84.36   Mean   :2.08  
##  3rd Qu.:47.00   3rd Qu.:24.00   3rd Qu.: 96.00   3rd Qu.:2.00  
##  Max.   :69.00   Max.   :39.00   Max.   :117.00   Max.   :5.00  
##  quant_ent_loc  quant_ent_org  quant_ent_misc   quant_que      quant_crase
##  Min.   :1.00   Min.   :0.00   Min.   :0.0    Min.   : 6.00   Min.   :0   
##  1st Qu.:2.00   1st Qu.:0.00   1st Qu.:0.0    1st Qu.: 8.00   1st Qu.:0   
##  Median :4.00   Median :1.00   Median :2.0    Median :11.00   Median :0   
##  Mean   :4.16   Mean   :1.12   Mean   :1.8    Mean   :11.64   Mean   :0   
##  3rd Qu.:6.00   3rd Qu.:2.00   3rd Qu.:3.0    3rd Qu.:15.00   3rd Qu.:0   
##  Max.   :9.00   Max.   :4.00   Max.   :8.0    Max.   :21.00   Max.   :0   
##    quant_estr prop_comp_per_per prop_adj_per_pal prop_ora_per_per
##  Min.   :0    Min.   :2.364     Min.   : 6.957   Min.   :1.533   
##  1st Qu.:0    1st Qu.:2.857     1st Qu.: 9.300   1st Qu.:1.938   
##  Median :0    Median :3.000     Median :10.605   Median :2.091   
##  Mean   :0    Mean   :3.108     Mean   :10.694   Mean   :2.184   
##  3rd Qu.:0    3rd Qu.:3.286     3rd Qu.:11.692   3rd Qu.:2.400   
##  Max.   :0    Max.   :4.571     Max.   :17.167   Max.   :3.000   
##  prop_adj_per_per
##  Min.   :1.929   
##  1st Qu.:2.353   
##  Median :2.688   
##  Mean   :2.853   
##  3rd Qu.:3.154   
##  Max.   :4.750