En los dos casos planteados, se está trabajando con variables nominales.
library(htmltab)
linkPage='https://en.wikipedia.org/wiki/List_of_Wimbledon_gentlemen%27s_singles_champions'
linkTabla='//*[@id="mw-content-text"]/div/table[4]/tbody'
OpenEra=htmltab(doc = linkPage, which = linkTabla)
OpenEra=OpenEra[c(2)]
names(OpenEra)
## [1] "Country"
str(OpenEra)
## 'data.frame': 52 obs. of 1 variable:
## $ Country: chr " AUS" " AUS" " AUS" " AUS" ...
head(OpenEra)
## Country
## 2 AUS
## 3 AUS
## 4 AUS
## 5 AUS
## 6 USA
## 7 TCH
2.GRÁFICOS
library(questionr)
library(magrittr)
freqCountry=freq(OpenEra$Country,cum = T)%>%data.frame()
freqCountry=data.frame(Country=row.names(freqCountry),freqCountry,row.names = NULL)
freqCountry
## Country n X. val. X.cum val.cum
## 1 SUI 8 15.4 15.4 15.4 15.4
## 2 AUS 6 11.5 11.5 26.9 26.9
## 3 CRO 1 1.9 1.9 28.8 28.8
## 4 ESP 2 3.8 3.8 32.7 32.7
## 5 FRG 3 5.8 5.8 38.5 38.5
## 6 GBR 2 3.8 3.8 42.3 42.3
## 7 GER 1 1.9 1.9 44.2 44.2
## 8 NED 1 1.9 1.9 46.2 46.2
## 9 SRB 5 9.6 9.6 55.8 55.8
## 10 SWE 7 13.5 13.5 69.2 69.2
## 11 TCH 1 1.9 1.9 71.2 71.2
## 12 USA 15 28.8 28.8 100.0 100.0
library(ggplot2)
base = ggplot(data=freqCountry,aes(x=Country,y=n))
bar1 = base + geom_bar(stat='identity')
text1="Country in Gentlemen's Open Era"
text2="Countries"
text3="Victory Count"
text4="Traken from: Wikipedia"
bar2= bar1 + labs(title = text1,
x=text2,
y=text3,
caption=text4)
bar2
library(qcc)
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
pareto.chart(table(OpenEra$Country),cumperc = c(0,50,80,100))
##
## Pareto chart analysis for table(OpenEra$Country)
## Frequency Cum.Freq. Percentage Cum.Percent.
## USA 15.000000 15.000000 28.846154 28.846154
## SUI 8.000000 23.000000 15.384615 44.230769
## SWE 7.000000 30.000000 13.461538 57.692308
## AUS 6.000000 36.000000 11.538462 69.230769
## SRB 5.000000 41.000000 9.615385 78.846154
## FRG 3.000000 44.000000 5.769231 84.615385
## ESP 2.000000 46.000000 3.846154 88.461538
## GBR 2.000000 48.000000 3.846154 92.307692
## CRO 1.000000 49.000000 1.923077 94.230769
## GER 1.000000 50.000000 1.923077 96.153846
## NED 1.000000 51.000000 1.923077 98.076923
## TCH 1.000000 52.000000 1.923077 100.000000
El 80% se concentra entre FRG y ESP
ESTADÍSTICAS
Moda
library(DescTools)
Mode(OpenEra$Country)
## [1] " USA"
El mayor número de medallas ganadas lo ha obtenido USA.
Representatividad Efectiva : Laakso - Taagepera
dataTable=table(OpenEra$Country)
1/Herfindahl(dataTable)
## [1] 6.438095
Hay 6 paises que concentran la mayoría de las medallas ganadas.
Índice de Herfindahl- Hirschman
dataTable=table(OpenEra$Country)
Herfindahl(dataTable)
## [1] 0.1553254
library(htmltab)
linkPage='https://en.wikipedia.org/wiki/List_of_Wimbledon_ladies%27_singles_champions'
linkTabla='//*[@id="mw-content-text"]/div/table[4]/tbody'
OpenEraF=htmltab(doc = linkPage, which = linkTabla)
OpenEraF=OpenEraF[c(2)]
names(OpenEraF)
## [1] "Country"
str(OpenEraF)
## 'data.frame': 52 obs. of 1 variable:
## $ Country: chr " USA" " GBR" " AUS" " AUS" ...
head(OpenEraF)
## Country
## 2 USA
## 3 GBR
## 4 AUS
## 5 AUS
## 6 USA
## 7 USA
2.GRÁFICOS
library(questionr)
library(magrittr)
freqCountryF=freq(OpenEraF$Country,cum = T)%>%data.frame()
freqCountryF=data.frame(Country=row.names(freqCountryF),freqCountryF,row.names = NULL)
freqCountryF
## Country n X. val. X.cum val.cum
## 1 SUI 1 1.9 1.9 1.9 1.9
## 2 AUS 3 5.8 5.8 7.7 7.7
## 3 CZE 3 5.8 5.8 13.5 13.5
## 4 ESP 2 3.8 3.8 17.3 17.3
## 5 FRA 2 3.8 3.8 21.2 21.2
## 6 FRG 2 3.8 3.8 25.0 25.0
## 7 GBR 2 3.8 3.8 28.8 28.8
## 8 GER 6 11.5 11.5 40.4 40.4
## 9 ROU 1 1.9 1.9 42.3 42.3
## 10 RUS 1 1.9 1.9 44.2 44.2
## 11 USA 29 55.8 55.8 100.0 100.0
library(ggplot2)
base1 = ggplot(data=freqCountryF,aes(x=Country,y=n))
bar3 = base1 + geom_bar(stat='identity')
text1="Country in Ladies' Open Era"
text2="Countries"
text3="Victory Count"
text4="Traken from: Wikipedia"
bar4= bar3 + labs(title = text1,
x=text2,
y=text3,
caption=text4)
bar4
library(qcc)
pareto.chart(table(OpenEraF$Country),cumperc = c(0,50,80,100))
##
## Pareto chart analysis for table(OpenEraF$Country)
## Frequency Cum.Freq. Percentage Cum.Percent.
## USA 29.000000 29.000000 55.769231 55.769231
## GER 6.000000 35.000000 11.538462 67.307692
## AUS 3.000000 38.000000 5.769231 73.076923
## CZE 3.000000 41.000000 5.769231 78.846154
## ESP 2.000000 43.000000 3.846154 82.692308
## FRA 2.000000 45.000000 3.846154 86.538462
## FRG 2.000000 47.000000 3.846154 90.384615
## GBR 2.000000 49.000000 3.846154 94.230769
## SUI 1.000000 50.000000 1.923077 96.153846
## ROU 1.000000 51.000000 1.923077 98.076923
## RUS 1.000000 52.000000 1.923077 100.000000
El 80% se concentra entre FRG y ESP
ESTADÍSTICAS
Moda
library(DescTools)
Mode(OpenEraF$Country)
## [1] " USA"
El mayor número de medallas ganadas lo ha obtenido USA.
Representatividad Efectiva : Laakso - Taagepera
dataTable=table(OpenEraF$Country)
1/Herfindahl(dataTable)
## [1] 2.958425
Hay 3 paises que concentran la mayoría de las medallas ganadas.
Índice de Herfindahl- Hirschman
dataTable=table(OpenEraF$Country)
Herfindahl(dataTable)
## [1] 0.3380178