Práctica Calificada Bivariado

Raul Salinas Prieto

15020410

linkFreedom="https://en.wikipedia.org/wiki/List_of_freedom_indices"
pathFreedom='//*[@id="mw-content-text"]/div[1]/table[2]'
Freedom=htmltab::htmltab(linkFreedom, pathFreedom)
names(Freedom)=c("Country", "Free", "EconomicFree", "FreePress", "Regime")
names(Freedom)
## [1] "Country"      "Free"         "EconomicFree" "FreePress"   
## [5] "Regime"
str(Freedom)
## 'data.frame':    198 obs. of  5 variables:
##  $ Country     : chr  " Afghanistan" " Albania" " Algeria" " Andorra" ...
##  $ Free        : chr  "not free" "partly free" "not free" "free" ...
##  $ EconomicFree: chr  "mostly unfree" "moderately free" "repressed" "n/a" ...
##  $ FreePress   : chr  "difficult situation" "noticeable problems" "difficult situation" "satisfactory situation" ...
##  $ Regime      : chr  "authoritarian regime" "hybrid regime" "hybrid regime" "n/a" ...
Freedom$EconomicFree=as.ordered(Freedom$EconomicFree)
Freedom$Free=as.ordered(Freedom$Free)
str(Freedom)
## 'data.frame':    198 obs. of  5 variables:
##  $ Country     : chr  " Afghanistan" " Albania" " Algeria" " Andorra" ...
##  $ Free        : Ord.factor w/ 4 levels "free"<"n/a"<"not free"<..: 3 4 3 1 3 1 1 4 1 1 ...
##  $ EconomicFree: Ord.factor w/ 6 levels "free"<"moderately free"<..: 4 2 6 5 4 5 4 3 1 3 ...
##  $ FreePress   : chr  "difficult situation" "noticeable problems" "difficult situation" "satisfactory situation" ...
##  $ Regime      : chr  "authoritarian regime" "hybrid regime" "hybrid regime" "n/a" ...
columna=Freedom$Free
fila=Freedom$EconomicFree
(t=table(fila,columna))
##                  columna
## fila              free n/a not free partly free
##   free               4   0        0           2
##   moderately free   26   0        9          27
##   mostly free       24   1        3           3
##   mostly unfree     15   0       19          28
##   n/a               11   0        7           0
##   repressed          3   0       12           4
(prop_t=prop.table(t,margin = 2))
##                  columna
## fila                    free        n/a   not free partly free
##   free            0.04819277 0.00000000 0.00000000  0.03125000
##   moderately free 0.31325301 0.00000000 0.18000000  0.42187500
##   mostly free     0.28915663 1.00000000 0.06000000  0.04687500
##   mostly unfree   0.18072289 0.00000000 0.38000000  0.43750000
##   n/a             0.13253012 0.00000000 0.14000000  0.00000000
##   repressed       0.03614458 0.00000000 0.24000000  0.06250000
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
# nota que uso la funcion "t()":
balloonplot(t(prop_t), main ="tabla",
            label = T, show.margins = FALSE)

chisq.test(t)
## Warning in chisq.test(t): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  t
## X-squared = 62.144, df = 15, p-value = 1.076e-07
library(oii)
association.measures(fila,columna)
## Chi-square-based measures of association:
##    Phi:                      0.560 
##    Contingency coefficient:  0.489 
##    Cramer's V:               0.323 
## 
## Ordinal measures of association:
##    Total number of pairs:   19503 
##    Concordant pairs:        5240   ( 26.87 %)
##    Discordant pairs:        4839   ( 24.81 %)
##    Tied on first variable:  2780   ( 14.25 %)
##    Tied on second variable: 4838   ( 24.81 %)
##    Tied on both variables:  1806   ( 9.26 %)
## 
##    Goodman-Kruskal Gamma: 0.040 
##    Somers' d (col dep.):  0.027 
##    Kendall's tau-b:       0.029 
##    Stuart's tau-c:        0.027
columna=Freedom$Regime
fila=Freedom$EconomicFree
(t=table(fila,columna))
##                  columna
## fila              authoritarian regime flawed democracy full democracy
##   free                               0                2              4
##   moderately free                    9               24              5
##   mostly free                        3               12             13
##   mostly unfree                     25               14              0
##   n/a                                5                0              0
##   repressed                         12                2              0
##                  columna
## fila              hybrid regime n/a
##   free                        0   0
##   moderately free            14  10
##   mostly free                 2   1
##   mostly unfree              17   6
##   n/a                         0  13
##   repressed                   4   1
(prop_t=prop.table(t,margin = 2))
##                  columna
## fila              authoritarian regime flawed democracy full democracy
##   free                      0.00000000       0.03703704     0.18181818
##   moderately free           0.16666667       0.44444444     0.22727273
##   mostly free               0.05555556       0.22222222     0.59090909
##   mostly unfree             0.46296296       0.25925926     0.00000000
##   n/a                       0.09259259       0.00000000     0.00000000
##   repressed                 0.22222222       0.03703704     0.00000000
##                  columna
## fila              hybrid regime        n/a
##   free               0.00000000 0.00000000
##   moderately free    0.37837838 0.32258065
##   mostly free        0.05405405 0.03225806
##   mostly unfree      0.45945946 0.19354839
##   n/a                0.00000000 0.41935484
##   repressed          0.10810811 0.03225806
library(gplots)
# nota que uso la funcion "t()":
balloonplot(t(prop_t), main ="tabla",
            label = T, show.margins = FALSE)

chisq.test(t)
## Warning in chisq.test(t): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  t
## X-squared = 141.88, df = 20, p-value < 2.2e-16
library(oii)
association.measures(fila,columna)
## Chi-square-based measures of association:
##    Phi:                      0.846 
##    Contingency coefficient:  0.646 
##    Cramer's V:               0.423 
## 
## Ordinal measures of association:
##    Total number of pairs:   19503 
##    Concordant pairs:        5352   ( 27.44 %)
##    Discordant pairs:        6657   ( 34.13 %)
##    Tied on first variable:  3270   ( 16.77 %)
##    Tied on second variable: 2908   ( 14.91 %)
##    Tied on both variables:  1316   ( 6.75 %)
## 
##    Goodman-Kruskal Gamma: -0.109 
##    Somers' d (col dep.):  -0.087 
##    Kendall's tau-b:       -0.086 
##    Stuart's tau-c:        -0.083
LinkDemo="https://en.wikipedia.org/wiki/Democracy_Index"
pathDemo='//*[@id="mw-content-text"]/div[1]/table[2]'
Demo=htmltab::htmltab(LinkDemo, pathDemo)
Demo=Demo[,-c(1, 3, 11)]
names(Demo)=c("country", "elec", "funct", "partic", "cult", "civil", "regime", "region")
names(Demo)
## [1] "country" "elec"    "funct"   "partic"  "cult"    "civil"   "regime" 
## [8] "region"
str(Demo)
## 'data.frame':    167 obs. of  8 variables:
##  $ country: chr  " Norway" " Iceland" " Sweden" " New Zealand" ...
##  $ elec   : chr  "10.00" "10.00" "9.58" "10.00" ...
##  $ funct  : chr  "9.64" "9.29" "9.64" "9.29" ...
##  $ partic : chr  "10.00" "8.89" "8.33" "8.89" ...
##  $ cult   : chr  "10.00" "10.00" "10.00" "8.13" ...
##  $ civil  : chr  "9.71" "9.71" "9.41" "10.00" ...
##  $ regime : chr  "Full democracy" "Full democracy" "Full democracy" "Full democracy" ...
##  $ region : chr  "Western Europe" "Western Europe" "Western Europe" "Asia & Australasia" ...
Demo[,2:6]=lapply(Demo[,2:6],as.numeric)
str(Demo)
## 'data.frame':    167 obs. of  8 variables:
##  $ country: chr  " Norway" " Iceland" " Sweden" " New Zealand" ...
##  $ elec   : num  10 10 9.58 10 10 10 10 9.58 10 9.58 ...
##  $ funct  : num  9.64 9.29 9.64 9.29 8.93 7.86 9.29 9.64 8.93 9.29 ...
##  $ partic : num  10 8.89 8.33 8.89 8.89 8.33 8.33 7.78 7.78 7.78 ...
##  $ cult   : num  10 10 10 8.13 8.75 10 9.38 9.38 8.75 9.38 ...
##  $ civil  : num  9.71 9.71 9.41 10 9.71 10 9.12 9.71 10 9.12 ...
##  $ regime : chr  "Full democracy" "Full democracy" "Full democracy" "Full democracy" ...
##  $ region : chr  "Western Europe" "Western Europe" "Western Europe" "Asia & Australasia" ...
fcatnum=formula(cult ~ region)
aggregate(fcatnum, Demo,mean) 
##                       region     cult
## 1         Asia & Australasia 5.493571
## 2             Eastern Europe 4.935357
## 3              Latin America 5.262917
## 4 Middle East & North Africa 4.753000
## 5              North America 8.440000
## 6         Sub-Saharan Africa 5.230227
## 7             Western Europe 8.097143
library(ggpubr)
## Loading required package: ggplot2
## Loading required package: magrittr
p1=ggscatter(Demo, 
          x = "partic", y = "cult",
          cor.coef = TRUE, 
          cor.method = "pearson")

p1

s4=ggscatter(Demo, 
          x = "partic", y = "cult",
          cor.coef = TRUE, 
          cor.method = "spearman")
s4

En las dos últimas me equivoqué y era “spearman” y el coeficiente de correlación era 0.53.