library(htmltab)
linkTenissMen='https://en.wikipedia.org/wiki/List_of_Wimbledon_gentlemen%27s_singles_champions'
linkTabla='///div/table[4]'
TennisW = htmltab(doc = linkTenissMen,
which =linkTabla)
names(TennisW)
## [1] "Year" "Country" "Champion"
## [4] "Country" "Runner-up" "Score in the final"
str(TennisW)
## 'data.frame': 52 obs. of 6 variables:
## $ Year : chr "1968" "1969" "1970" "1971" ...
## $ Country : chr " AUS" " AUS" " AUS" " AUS" ...
## $ Champion : chr "Rod Laver" "Rod Laver" "John Newcombe" "John Newcombe" ...
## $ Country : chr " AUS" " AUS" " AUS" " USA" ...
## $ Runner-up : chr "Tony Roche" "John Newcombe" "Ken Rosewall" "Stan Smith" ...
## $ Score in the final: chr "6–3, 6–4, 6–2" "6–4, 5–7, 6–4, 6–4" "5–7, 6–3, 6–2, 3–6, 6–1" "6–3, 5–7, 2–6, 6–4, 6–4" ...
TennisW=TennisW[c(2)]
names(TennisW)
## [1] "Country"
str(TennisW)
## 'data.frame': 52 obs. of 1 variable:
## $ Country: chr " AUS" " AUS" " AUS" " AUS" ...
TennisW
## Country
## 2 AUS
## 3 AUS
## 4 AUS
## 5 AUS
## 6 USA
## 7 TCH
## 8 USA
## 9 USA
## 10 SWE
## 11 SWE
## 12 SWE
## 13 SWE
## 14 SWE
## 15 USA
## 16 USA
## 17 USA
## 18 USA
## 19 FRG
## 20 FRG
## 21 AUS
## 22 SWE
## 23 FRG
## 24 SWE
## 25 GER
## 26 USA
## 27 USA
## 28 USA
## 29 USA
## 30 NED
## 31 USA
## 32 USA
## 33 USA
## 34 USA
## 35 CRO
## 36 AUS
## 37 SUI
## 38 SUI
## 39 SUI
## 40 SUI
## 41 SUI
## 42 ESP
## 43 SUI
## 44 ESP
## 45 SRB
## 46 SUI
## 47 GBR
## 48 SRB
## 49 SRB
## 50 GBR
## 51 SUI
## 52 SRB
## 53 SRB
head(TennisW)
## Country
## 2 AUS
## 3 AUS
## 4 AUS
## 5 AUS
## 6 USA
## 7 TCH
GraficoBasico
library(ggplot2)
base = ggplot(data=TennisW,aes(x=Country))
bar1 = base + geom_bar()
bar1
library(DescTools)
Moda
Mode(TennisW$Country)
## [1] " USA"
Dispersión/Variación Modal
dataTable=table(TennisW$Country)
1-max(prop.table(dataTable))
## [1] 0.7115385
Concentración
dataTable=table(TennisW$Country)
Herfindahl(dataTable)
## [1] 0.1553254
PARTE 2
library(htmltab)
linkPage="https://en.wikipedia.org/wiki/List_of_Wimbledon_ladies%27_singles_champions"
linkTabla="//div/table[4]"
TennisGirls=htmltab(doc=linkPage, which=linkTabla)
names(TennisGirls)
## [1] "Year" "Country" "Champion"
## [4] "Country" "Runner-up" "Score in the final"
str(TennisGirls)
## 'data.frame': 52 obs. of 6 variables:
## $ Year : chr "1968" "1969" "1970" "1971" ...
## $ Country : chr " USA" " GBR" " AUS" " AUS" ...
## $ Champion : chr "Billie Jean King" "Ann Jones" "Margaret Court" "Evonne Goolagong" ...
## $ Country : chr " AUS" " USA" " USA" " AUS" ...
## $ Runner-up : chr "Judy Tegart" "Billie Jean King" "Billie Jean King" "Margaret Court" ...
## $ Score in the final: chr "9–7, 7–5" "3–6, 6–3, 6–2" "14–12, 11–9" "6–4, 6–1" ...
names(TennisGirls)
## [1] "Year" "Country" "Champion"
## [4] "Country" "Runner-up" "Score in the final"
TennisGirls=TennisGirls[c(2)]
names(TennisGirls)
## [1] "Country"
str(TennisGirls)
## 'data.frame': 52 obs. of 1 variable:
## $ Country: chr " USA" " GBR" " AUS" " AUS" ...
TennisGirls
## Country
## 2 USA
## 3 GBR
## 4 AUS
## 5 AUS
## 6 USA
## 7 USA
## 8 USA
## 9 USA
## 10 USA
## 11 GBR
## 12 USA
## 13 USA
## 14 AUS
## 15 USA
## 16 USA
## 17 USA
## 18 USA
## 19 USA
## 20 USA
## 21 USA
## 22 FRG
## 23 FRG
## 24 USA
## 25 GER
## 26 GER
## 27 GER
## 28 ESP
## 29 GER
## 30 GER
## 31 SUI
## 32 CZE
## 33 USA
## 34 USA
## 35 USA
## 36 USA
## 37 USA
## 38 RUS
## 39 USA
## 40 FRA
## 41 USA
## 42 USA
## 43 USA
## 44 USA
## 45 CZE
## 46 USA
## 47 FRA
## 48 CZE
## 49 USA
## 50 USA
## 51 ESP
## 52 GER
## 53 ROU
head(TennisGirls)
## Country
## 2 USA
## 3 GBR
## 4 AUS
## 5 AUS
## 6 USA
## 7 USA
library(questionr)
library(magrittr)
NomOEX=freq(TennisGirls$Country,cum = T)%>%data.frame()
NomOEX=data.frame(variable=row.names(NomOEX),NomOEX,row.names = NULL)
NomOEX
## variable n X. val. X.cum val.cum
## 1 SUI 1 1.9 1.9 1.9 1.9
## 2 AUS 3 5.8 5.8 7.7 7.7
## 3 CZE 3 5.8 5.8 13.5 13.5
## 4 ESP 2 3.8 3.8 17.3 17.3
## 5 FRA 2 3.8 3.8 21.2 21.2
## 6 FRG 2 3.8 3.8 25.0 25.0
## 7 GBR 2 3.8 3.8 28.8 28.8
## 8 GER 6 11.5 11.5 40.4 40.4
## 9 ROU 1 1.9 1.9 42.3 42.3
## 10 RUS 1 1.9 1.9 44.2 44.2
## 11 USA 29 55.8 55.8 100.0 100.0
library(ggplot2)
base = ggplot(data=NomOEX,aes(x=variable,y=n))
bar1 = base + geom_bar(stat='identity')
bar1
bar1 = bar1 + scale_x_discrete(limits = NomOEX$variable)
bar1
text1="Países - Open Era mujeres"
text2="países"
text3="Conteo"
text4="Fuente: Wikipedia"
bar2= bar1 + labs(title=text1,
x =text2,
y = text3,
caption = text4)
bar2
library(qcc)
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
pareto.chart(table(TennisGirls$Country),cumperc = c(0,50,80,100))
##
## Pareto chart analysis for table(TennisGirls$Country)
## Frequency Cum.Freq. Percentage Cum.Percent.
## USA 29.000000 29.000000 55.769231 55.769231
## GER 6.000000 35.000000 11.538462 67.307692
## AUS 3.000000 38.000000 5.769231 73.076923
## CZE 3.000000 41.000000 5.769231 78.846154
## ESP 2.000000 43.000000 3.846154 82.692308
## FRA 2.000000 45.000000 3.846154 86.538462
## FRG 2.000000 47.000000 3.846154 90.384615
## GBR 2.000000 49.000000 3.846154 94.230769
## SUI 1.000000 50.000000 1.923077 96.153846
## ROU 1.000000 51.000000 1.923077 98.076923
## RUS 1.000000 52.000000 1.923077 100.000000
ESTADISTICOS
library(DescTools)
MODA
Mode(TennisGirls$Country)
## [1] " USA"
DISPERSION/VARIACION MODAL
dataTable=table(TennisGirls$Country)
1-max(prop.table(dataTable))
## [1] 0.4423077
Concentración
dataTable=table(TennisGirls$Country)
Herfindahl(dataTable)
## [1] 0.3380178