library(htmltab)
linkPage="https://en.wikipedia.org/wiki/List_of_Wimbledon_gentlemen%27s_singles_champions"
linkTabla="///div/table[4]"
open=htmltab(doc=linkPage, which=linkTabla)
names(open)
## [1] "Year" "Country" "Champion"
## [4] "Country" "Runner-up" "Score in the final"
str(open)
## 'data.frame': 52 obs. of 6 variables:
## $ Year : chr "1968" "1969" "1970" "1971" ...
## $ Country : chr " AUS" " AUS" " AUS" " AUS" ...
## $ Champion : chr "Rod Laver" "Rod Laver" "John Newcombe" "John Newcombe" ...
## $ Country : chr " AUS" " AUS" " AUS" " USA" ...
## $ Runner-up : chr "Tony Roche" "John Newcombe" "Ken Rosewall" "Stan Smith" ...
## $ Score in the final: chr "6–3, 6–4, 6–2" "6–4, 5–7, 6–4, 6–4" "5–7, 6–3, 6–2, 3–6, 6–1" "6–3, 5–7, 2–6, 6–4, 6–4" ...
open=open[c(2)]
names(open)
## [1] "Country"
str(open)
## 'data.frame': 52 obs. of 1 variable:
## $ Country: chr " AUS" " AUS" " AUS" " AUS" ...
open
head(open)
library(questionr)
library(magrittr)
NOMINAL=freq(open$Country,cum = T)%>%data.frame()
NOMINAL=data.frame(variable=row.names(NOMINAL),NOMINAL,row.names = NULL)
open
library(ggplot2)
base = ggplot(data=NOMINAL,aes(x=variable,y=n))
bar1 = base + geom_bar(stat='identity')
bar1
text1="Datos en la tabla estadística - países"
text2="Countries"
text3="Conteo"
text4="Fuente: Wikipedia"
bar2= bar1 + labs(title=text1,
x =text2,
y = text3,
caption = text4)
bar2
library(qcc)
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
pareto.chart(table(open$Country),cumperc = c(0,50,80,100))
##
## Pareto chart analysis for table(open$Country)
## Frequency Cum.Freq. Percentage Cum.Percent.
## USA 15.000000 15.000000 28.846154 28.846154
## SUI 8.000000 23.000000 15.384615 44.230769
## SWE 7.000000 30.000000 13.461538 57.692308
## AUS 6.000000 36.000000 11.538462 69.230769
## SRB 5.000000 41.000000 9.615385 78.846154
## FRG 3.000000 44.000000 5.769231 84.615385
## ESP 2.000000 46.000000 3.846154 88.461538
## GBR 2.000000 48.000000 3.846154 92.307692
## CRO 1.000000 49.000000 1.923077 94.230769
## GER 1.000000 50.000000 1.923077 96.153846
## NED 1.000000 51.000000 1.923077 98.076923
## TCH 1.000000 52.000000 1.923077 100.000000
el 80% se encuentra entre la República Checa, España y Francia
library(DescTools)
Mode(open$Country)
## [1] " USA"
USA es el país que más se repite
dataTable=table(open$Country)
1-max(prop.table(dataTable))
## [1] 0.7115385
La moda no representa el 71%
dataTable=table(open$Country)
Herfindahl(dataTable)
## [1] 0.1553254
La moda no es significativa
MUJERES
library(htmltab)
linkPage="https://en.wikipedia.org/wiki/List_of_Wimbledon_ladies%27_singles_champions"
linkTabla="//div/table[4]"
openn=htmltab(doc=linkPage, which=linkTabla)
names(openn)
## [1] "Year" "Country" "Champion"
## [4] "Country" "Runner-up" "Score in the final"
str(openn)
## 'data.frame': 52 obs. of 6 variables:
## $ Year : chr "1968" "1969" "1970" "1971" ...
## $ Country : chr " USA" " GBR" " AUS" " AUS" ...
## $ Champion : chr "Billie Jean King" "Ann Jones" "Margaret Court" "Evonne Goolagong" ...
## $ Country : chr " AUS" " USA" " USA" " AUS" ...
## $ Runner-up : chr "Judy Tegart" "Billie Jean King" "Billie Jean King" "Margaret Court" ...
## $ Score in the final: chr "9–7, 7–5" "3–6, 6–3, 6–2" "14–12, 11–9" "6–4, 6–1" ...
openn=openn[c(2)]
names(openn)
## [1] "Country"
str(openn)
## 'data.frame': 52 obs. of 1 variable:
## $ Country: chr " USA" " GBR" " AUS" " AUS" ...
openn
head(openn)
library(questionr)
library(magrittr)
NOMINALL=freq(openn$Country,cum = T)%>%data.frame()
NOMINALL=data.frame(variable=row.names(NOMINALL),NOMINALL,row.names = NULL)
NOMINALL
library(ggplot2)
base = ggplot(data=NOMINALL,aes(x=variable,y=n))
bar1 = base + geom_bar(stat='identity')
bar1
text1="Datos en la tabla estadística - países II"
text2="países"
text3="Conteo"
text4="Fuente: Wikipedia"
bar2= bar1 + labs(title=text1,
x =text2,
y = text3,
caption = text4)
bar2
library(qcc)
pareto.chart(table(openn$Country),cumperc = c(0,50,80,100))
##
## Pareto chart analysis for table(openn$Country)
## Frequency Cum.Freq. Percentage Cum.Percent.
## USA 29.000000 29.000000 55.769231 55.769231
## GER 6.000000 35.000000 11.538462 67.307692
## AUS 3.000000 38.000000 5.769231 73.076923
## CZE 3.000000 41.000000 5.769231 78.846154
## ESP 2.000000 43.000000 3.846154 82.692308
## FRA 2.000000 45.000000 3.846154 86.538462
## FRG 2.000000 47.000000 3.846154 90.384615
## GBR 2.000000 49.000000 3.846154 94.230769
## SUI 1.000000 50.000000 1.923077 96.153846
## ROU 1.000000 51.000000 1.923077 98.076923
## RUS 1.000000 52.000000 1.923077 100.000000
el 80% se encuentra entre la República Checa, España y Francia
library(DescTools)
Mode(openn$Country)
## [1] " USA"
el pais que más se repite es USA
dataTable=table(openn$Country)
1-max(prop.table(dataTable))
## [1] 0.4423077
a moda no representa el 44%
dataTable=table(openn$Country)
Herfindahl(dataTable)
## [1] 0.3380178
La moda se diferencia de los demás