library(htmltab)
linkTenissMen='https://en.wikipedia.org/wiki/List_of_Wimbledon_gentlemen%27s_singles_champions'
linkTabla='///div/table[4]'
TennisW = htmltab(doc = linkTenissMen, 
               which =linkTabla) 
names(TennisW)
## [1] "Year"               "Country"            "Champion"          
## [4] "Country"            "Runner-up"          "Score in the final"
str(TennisW)
## 'data.frame':    52 obs. of  6 variables:
##  $ Year              : chr  "1968" "1969" "1970" "1971" ...
##  $ Country           : chr  " AUS" " AUS" " AUS" " AUS" ...
##  $ Champion          : chr  "Rod Laver" "Rod Laver" "John Newcombe" "John Newcombe" ...
##  $ Country           : chr  " AUS" " AUS" " AUS" " USA" ...
##  $ Runner-up         : chr  "Tony Roche" "John Newcombe" "Ken Rosewall" "Stan Smith" ...
##  $ Score in the final: chr  "6–3, 6–4, 6–2" "6–4, 5–7, 6–4, 6–4" "5–7, 6–3, 6–2, 3–6, 6–1" "6–3, 5–7, 2–6, 6–4, 6–4" ...
TennisW=TennisW[c(2)]
names(TennisW)
## [1] "Country"
str(TennisW)
## 'data.frame':    52 obs. of  1 variable:
##  $ Country: chr  " AUS" " AUS" " AUS" " AUS" ...
TennisW
##    Country
## 2      AUS
## 3      AUS
## 4      AUS
## 5      AUS
## 6      USA
## 7      TCH
## 8      USA
## 9      USA
## 10     SWE
## 11     SWE
## 12     SWE
## 13     SWE
## 14     SWE
## 15     USA
## 16     USA
## 17     USA
## 18     USA
## 19     FRG
## 20     FRG
## 21     AUS
## 22     SWE
## 23     FRG
## 24     SWE
## 25     GER
## 26     USA
## 27     USA
## 28     USA
## 29     USA
## 30     NED
## 31     USA
## 32     USA
## 33     USA
## 34     USA
## 35     CRO
## 36     AUS
## 37     SUI
## 38     SUI
## 39     SUI
## 40     SUI
## 41     SUI
## 42     ESP
## 43     SUI
## 44     ESP
## 45     SRB
## 46     SUI
## 47     GBR
## 48     SRB
## 49     SRB
## 50     GBR
## 51     SUI
## 52     SRB
## 53     SRB
head(TennisW)
##   Country
## 2     AUS
## 3     AUS
## 4     AUS
## 5     AUS
## 6     USA
## 7     TCH

GraficoBasico

library(ggplot2)
base = ggplot(data=TennisW,aes(x=Country))
bar1 = base + geom_bar()
bar1

library(DescTools)

Moda

Mode(TennisW$Country)
## [1] " USA"

Dispersión/Variación Modal

dataTable=table(TennisW$Country)
1-max(prop.table(dataTable))
## [1] 0.7115385

Concentración

dataTable=table(TennisW$Country)
Herfindahl(dataTable)
## [1] 0.1553254

PARTE 2

library(htmltab)
linkPage="https://en.wikipedia.org/wiki/List_of_Wimbledon_ladies%27_singles_champions"
linkTabla="//div/table[4]"
TennisGirls=htmltab(doc=linkPage, which=linkTabla)
names(TennisGirls)
## [1] "Year"               "Country"            "Champion"          
## [4] "Country"            "Runner-up"          "Score in the final"
str(TennisGirls)
## 'data.frame':    52 obs. of  6 variables:
##  $ Year              : chr  "1968" "1969" "1970" "1971" ...
##  $ Country           : chr  " USA" " GBR" " AUS" " AUS" ...
##  $ Champion          : chr  "Billie Jean King" "Ann Jones" "Margaret Court" "Evonne Goolagong" ...
##  $ Country           : chr  " AUS" " USA" " USA" " AUS" ...
##  $ Runner-up         : chr  "Judy Tegart" "Billie Jean King" "Billie Jean King" "Margaret Court" ...
##  $ Score in the final: chr  "9–7, 7–5" "3–6, 6–3, 6–2" "14–12, 11–9" "6–4, 6–1" ...
names(TennisGirls)
## [1] "Year"               "Country"            "Champion"          
## [4] "Country"            "Runner-up"          "Score in the final"
TennisGirls=TennisGirls[c(2)]
names(TennisGirls)
## [1] "Country"
str(TennisGirls)
## 'data.frame':    52 obs. of  1 variable:
##  $ Country: chr  " USA" " GBR" " AUS" " AUS" ...
TennisGirls
##    Country
## 2      USA
## 3      GBR
## 4      AUS
## 5      AUS
## 6      USA
## 7      USA
## 8      USA
## 9      USA
## 10     USA
## 11     GBR
## 12     USA
## 13     USA
## 14     AUS
## 15     USA
## 16     USA
## 17     USA
## 18     USA
## 19     USA
## 20     USA
## 21     USA
## 22     FRG
## 23     FRG
## 24     USA
## 25     GER
## 26     GER
## 27     GER
## 28     ESP
## 29     GER
## 30     GER
## 31     SUI
## 32     CZE
## 33     USA
## 34     USA
## 35     USA
## 36     USA
## 37     USA
## 38     RUS
## 39     USA
## 40     FRA
## 41     USA
## 42     USA
## 43     USA
## 44     USA
## 45     CZE
## 46     USA
## 47     FRA
## 48     CZE
## 49     USA
## 50     USA
## 51     ESP
## 52     GER
## 53     ROU
head(TennisGirls)
##   Country
## 2     USA
## 3     GBR
## 4     AUS
## 5     AUS
## 6     USA
## 7     USA
library(questionr)
library(magrittr)
NomOEX=freq(TennisGirls$Country,cum = T)%>%data.frame()
NomOEX=data.frame(variable=row.names(NomOEX),NomOEX,row.names = NULL)
NomOEX
##    variable  n   X. val. X.cum val.cum
## 1       SUI  1  1.9  1.9   1.9     1.9
## 2       AUS  3  5.8  5.8   7.7     7.7
## 3       CZE  3  5.8  5.8  13.5    13.5
## 4       ESP  2  3.8  3.8  17.3    17.3
## 5       FRA  2  3.8  3.8  21.2    21.2
## 6       FRG  2  3.8  3.8  25.0    25.0
## 7       GBR  2  3.8  3.8  28.8    28.8
## 8       GER  6 11.5 11.5  40.4    40.4
## 9       ROU  1  1.9  1.9  42.3    42.3
## 10      RUS  1  1.9  1.9  44.2    44.2
## 11      USA 29 55.8 55.8 100.0   100.0
library(ggplot2)
base = ggplot(data=NomOEX,aes(x=variable,y=n)) 

bar1 = base + geom_bar(stat='identity') 

bar1

bar1 = bar1 + scale_x_discrete(limits = NomOEX$variable)
bar1

text1="Países - Open Era mujeres"
text2="países"
text3="Conteo"
text4="Fuente: Wikipedia"

bar2= bar1 + labs(title=text1,
                      x =text2, 
                      y = text3,
                      caption = text4) 
bar2

library(qcc)
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
pareto.chart(table(TennisGirls$Country),cumperc = c(0,50,80,100))

##        
## Pareto chart analysis for table(TennisGirls$Country)
##          Frequency  Cum.Freq. Percentage Cum.Percent.
##    USA   29.000000  29.000000  55.769231    55.769231
##    GER    6.000000  35.000000  11.538462    67.307692
##    AUS    3.000000  38.000000   5.769231    73.076923
##    CZE    3.000000  41.000000   5.769231    78.846154
##    ESP    2.000000  43.000000   3.846154    82.692308
##    FRA    2.000000  45.000000   3.846154    86.538462
##    FRG    2.000000  47.000000   3.846154    90.384615
##    GBR    2.000000  49.000000   3.846154    94.230769
##     SUI   1.000000  50.000000   1.923077    96.153846
##    ROU    1.000000  51.000000   1.923077    98.076923
##    RUS    1.000000  52.000000   1.923077   100.000000

ESTADISTICOS

library(DescTools)

MODA

Mode(TennisGirls$Country)
## [1] " USA"

DISPERSION/VARIACION MODAL

dataTable=table(TennisGirls$Country)
1-max(prop.table(dataTable))
## [1] 0.4423077

Concentración

dataTable=table(TennisGirls$Country)
Herfindahl(dataTable)
## [1] 0.3380178