PC2.utf8.md

LINK1 = "https://en.wikipedia.org/wiki/List_of_Wimbledon_gentlemen%27s_singles_champions"

library(htmltab)
B = htmltab(doc = LINK1, 
               which = '//*[@id="mw-content-text"]/div/table[4]' )

names (B)

## [1] "Year"               "Country"            "Champion"          
## [4] "Country"            "Runner-up"          "Score in the final"

str (B)

## 'data.frame':    52 obs. of  6 variables:
##  $ Year              : chr  "1968" "1969" "1970" "1971" ...
##  $ Country           : chr  " AUS" " AUS" " AUS" " AUS" ...
##  $ Champion          : chr  "Rod Laver" "Rod Laver" "John Newcombe" "John Newcombe" ...
##  $ Country           : chr  " AUS" " AUS" " AUS" " USA" ...
##  $ Runner-up         : chr  "Tony Roche" "John Newcombe" "Ken Rosewall" "Stan Smith" ...
##  $ Score in the final: chr  "6–3, 6–4, 6–2" "6–4, 5–7, 6–4, 6–4" "5–7, 6–3, 6–2, 3–6, 6–1" "6–3, 5–7, 2–6, 6–4, 6–4" ...

#Recodificación

names(B)[6]="Score"

B[,c(1)]=lapply(B[,c(1)], as.numeric)

## Warning in `[<-.data.frame`(`*tmp*`, , c(1), value = list(1968, 1969,
## 1970, : provided 52 variables to replace 1 variables

str(B)

## 'data.frame':    52 obs. of  6 variables:
##  $ Year     : num  1968 1968 1968 1968 1968 ...
##  $ Country  : chr  " AUS" " AUS" " AUS" " AUS" ...
##  $ Champion : chr  "Rod Laver" "Rod Laver" "John Newcombe" "John Newcombe" ...
##  $ Country  : chr  " AUS" " AUS" " AUS" " USA" ...
##  $ Runner-up: chr  "Tony Roche" "John Newcombe" "Ken Rosewall" "Stan Smith" ...
##  $ Score    : chr  "6–3, 6–4, 6–2" "6–4, 5–7, 6–4, 6–4" "5–7, 6–3, 6–2, 3–6, 6–1" "6–3, 5–7, 2–6, 6–4, 6–4" ...

table (B$Country)

## 
##   SUI   AUS   CRO   ESP   FRG   GBR   GER   NED   SRB   SWE   TCH   USA 
##     8     6     1     2     3     2     1     1     5     7     1    15

library(questionr)
library(magrittr)
B1=freq(B$Country,total = F,sort = 'dec',exclude = c(NA)) %>% data.frame()
B1=data.frame(variable=row.names(B1),B1,row.names = NULL)
B1

##    variable  n   X.
## 1       USA 15 28.8
## 2       SUI  8 15.4
## 3       SWE  7 13.5
## 4       AUS  6 11.5
## 5       SRB  5  9.6
## 6       FRG  3  5.8
## 7       ESP  2  3.8
## 8       GBR  2  3.8
## 9       CRO  1  1.9
## 10      GER  1  1.9
## 11      NED  1  1.9
## 12      TCH  1  1.9

library(ggplot2)
base = ggplot(data=B1,aes(x=variable,y=n)) 
B2 = base + geom_bar(stat='identity') 
B2 = B2 + scale_x_discrete(limits = B1$variable)
B2= B2 + labs(title="Países con mayor cantidad de ganadores del Wimbledon ",
              subtitle = "De 1968 al 2019 ",        
              x ="País", 
                      y = "Conteo",
                      caption = "Fuente: Wikipedia") 

B2

library(qcc)

## Package 'qcc' version 2.7

## Type 'citation("qcc")' for citing this R package in publications.

pareto.chart(table(B$Country),cumperc = c(0, 25,50,75,100))

##        
## Pareto chart analysis for table(B$Country)
##          Frequency  Cum.Freq. Percentage Cum.Percent.
##    USA   15.000000  15.000000  28.846154    28.846154
##     SUI   8.000000  23.000000  15.384615    44.230769
##    SWE    7.000000  30.000000  13.461538    57.692308
##    AUS    6.000000  36.000000  11.538462    69.230769
##    SRB    5.000000  41.000000   9.615385    78.846154
##    FRG    3.000000  44.000000   5.769231    84.615385
##    ESP    2.000000  46.000000   3.846154    88.461538
##    GBR    2.000000  48.000000   3.846154    92.307692
##    CRO    1.000000  49.000000   1.923077    94.230769
##    GER    1.000000  50.000000   1.923077    96.153846
##    NED    1.000000  51.000000   1.923077    98.076923
##    TCH    1.000000  52.000000   1.923077   100.000000

El 80% del total de ganadores está representado por los Estados Unidos, Suecia, Suiza, Australia y Serbia.

library (DescTools)
Mode(B$Country)

## [1] " USA"

La moda es Estados Unidos, por ende, el país que más veces ha ganado el certamen es Estados Unidos.

Herfindahl(table(B$Country))

## [1] 0.1553254

La moda no es significativa, Estados Unidos no posee una preponderancia cabal.

1/Herfindahl(table(B$Country))

## [1] 6.438095

Hay 6 grupos representativos.

LINK2="https://en.wikipedia.org/wiki/List_of_Wimbledon_ladies%27_singles_champions"

C = htmltab(doc = LINK2, 
               which = '//*[@id="mw-content-text"]/div/table[4]' )

names (C)

## [1] "Year"               "Country"            "Champion"          
## [4] "Country"            "Runner-up"          "Score in the final"

str (C)

## 'data.frame':    52 obs. of  6 variables:
##  $ Year              : chr  "1968" "1969" "1970" "1971" ...
##  $ Country           : chr  " USA" " GBR" " AUS" " AUS" ...
##  $ Champion          : chr  "Billie Jean King" "Ann Jones" "Margaret Court" "Evonne Goolagong" ...
##  $ Country           : chr  " AUS" " USA" " USA" " AUS" ...
##  $ Runner-up         : chr  "Judy Tegart" "Billie Jean King" "Billie Jean King" "Margaret Court" ...
##  $ Score in the final: chr  "9–7, 7–5" "3–6, 6–3, 6–2" "14–12, 11–9" "6–4, 6–1" ...

Sólo se empleará la variable “Country” del ganador del torneo, por ende, las variables correspondientes de quien perdió no son necesarias.

#Recodificación

C = C [,c (1:3,6)]
C[,c(1)]=lapply(C[,c(1)], as.numeric)

## Warning in `[<-.data.frame`(`*tmp*`, , c(1), value = list(1968, 1969,
## 1970, : provided 52 variables to replace 1 variables

str (C)

## 'data.frame':    52 obs. of  4 variables:
##  $ Year              : num  1968 1968 1968 1968 1968 ...
##  $ Country           : chr  " USA" " GBR" " AUS" " AUS" ...
##  $ Champion          : chr  "Billie Jean King" "Ann Jones" "Margaret Court" "Evonne Goolagong" ...
##  $ Score in the final: chr  "9–7, 7–5" "3–6, 6–3, 6–2" "14–12, 11–9" "6–4, 6–1" ...

table (C$Country)

## 
##   SUI   AUS   CZE   ESP   FRA   FRG   GBR   GER   ROU   RUS   USA 
##     1     3     3     2     2     2     2     6     1     1    29

C1=freq(C$Country, total = F,sort = 'dec',exclude = c(NA)) %>% data.frame()
C1=data.frame(variable=row.names(C1),C1,row.names   = NULL)
C1

##    variable  n   X.
## 1       USA 29 55.8
## 2       GER  6 11.5
## 3       AUS  3  5.8
## 4       CZE  3  5.8
## 5       ESP  2  3.8
## 6       FRA  2  3.8
## 7       FRG  2  3.8
## 8       GBR  2  3.8
## 9       SUI  1  1.9
## 10      ROU  1  1.9
## 11      RUS  1  1.9

base = ggplot(data=C1,aes(x=variable,y=n)) 
C2 = base + geom_bar(stat='identity') 
C2= C2 + labs(title="Ganadoras del Wimbledon por país",
                    subtitle = "Desde al año 1968 a la actualidad",  x = "Países" , 
                      y = "Cantidad",
                      caption =" Fuente: Wikipedia") 
C2 + theme_classic() + 
            theme(plot.title = element_text(hjust = 0.5,size=20), 
                  plot.subtitle = element_text(hjust = 1),
                  plot.caption = element_text(hjust = 1), 
                  
                  axis.text.x = element_text(angle=45,hjust = 1))

pareto.chart(table(C$Country),cumperc = c(0,50,80,100))

##        
## Pareto chart analysis for table(C$Country)
##          Frequency  Cum.Freq. Percentage Cum.Percent.
##    USA   29.000000  29.000000  55.769231    55.769231
##    GER    6.000000  35.000000  11.538462    67.307692
##    AUS    3.000000  38.000000   5.769231    73.076923
##    CZE    3.000000  41.000000   5.769231    78.846154
##    ESP    2.000000  43.000000   3.846154    82.692308
##    FRA    2.000000  45.000000   3.846154    86.538462
##    FRG    2.000000  47.000000   3.846154    90.384615
##    GBR    2.000000  49.000000   3.846154    94.230769
##     SUI   1.000000  50.000000   1.923077    96.153846
##    ROU    1.000000  51.000000   1.923077    98.076923
##    RUS    1.000000  52.000000   1.923077   100.000000

Los Estados Unidos poseen más del 50% de ganadoras. El 80% está representado por Estados Unidos, Alemania, Australia, República Checa y España.

Mode(C$Country)

## [1] " USA"

La moda es Estados Unidos, el país más ganador del Wimbledon es Estados Unidos.

Herfindahl(table(C$Country))

## [1] 0.3380178

La moda es significativa, por ende, se puede interpretar que los Estados Unidos incentivo mucho este deporte.

1/sum(prop.table(table(C$Country))**2)

## [1] 2.958425

Existen dos grupos representativos, lo que implica que estos países se preocupan por el deporte.