library(htmltab)
LINKtennisMen= "https://en.wikipedia.org/wiki/List_of_Wimbledon_gentlemen%27s_singles_champions"
linktabla2 ='//*[@id="mw-content-text"]/div/table[4]'
openera = htmltab(doc = LINKtennisMen,
which = linktabla2)
names(openera)
## [1] "Year" "Country" "Champion"
## [4] "Country" "Runner-up" "Score in the final"
openera=openera[,c(1, 4)]
names(openera)
## [1] "Year" "Country"
str(openera)
## 'data.frame': 52 obs. of 2 variables:
## $ Year : chr "1968" "1969" "1970" "1971" ...
## $ Country: chr " AUS" " AUS" " AUS" " USA" ...
openera[complete.cases(openera$Year),]
## Year Country
## 2 1968 AUS
## 3 1969 AUS
## 4 1970 AUS
## 5 1971 USA
## 6 1972 ROM
## 7 1973 URS
## 8 1974 AUS
## 9 1975 USA
## 10 1976 ROM
## 11 1977 USA
## 12 1978 USA
## 13 1979 USA
## 14 1980 USA
## 15 1981 SWE
## 16 1982 USA
## 17 1983 NZL
## 18 1984 USA
## 19 1985 USA
## 20 1986 TCH
## 21 1987 TCH
## 22 1988 FRG
## 23 1989 SWE
## 24 1990 FRG
## 25 1991 GER
## 26 1992 CRO
## 27 1993 USA
## 28 1994 CRO
## 29 1995 GER
## 30 1996 USA
## 31 1997 FRA
## 32 1998 CRO
## 33 1999 USA
## 34 2000 AUS
## 35 2001 AUS
## 36 2002 ARG
## 37 2003 AUS
## 38 2004 USA
## 39 2005 USA
## 40 2006 ESP
## 41 2007 ESP
## 42 2008 SUI
## 43 2009 USA
## 44 2010 CZE
## 45 2011 ESP
## 46 2012 GBR
## 47 2013 SRB
## 48 2014 SUI
## 49 2015 SUI
## 50 2016 CAN
## 51 2017 CRO
## 52 2018 RSA
## 53 2019 SUI
producir los datos
table(openera$Country)
##
## SUI ARG AUS CAN CRO CZE ESP FRA FRG GBR GER NZL
## 4 1 7 1 4 1 3 1 2 1 2 1
## ROM RSA SRB SWE TCH URS USA
## 2 1 1 2 2 1 15
library(knitr)
dataTable=table(openera$Country)
kable(dataTable)
| Var1 | Freq |
|---|---|
| SUI | 4 |
| ARG | 1 |
| AUS | 7 |
| CAN | 1 |
| CRO | 4 |
| CZE | 1 |
| ESP | 3 |
| FRA | 1 |
| FRG | 2 |
| GBR | 1 |
| GER | 2 |
| NZL | 1 |
| ROM | 2 |
| RSA | 1 |
| SRB | 1 |
| SWE | 2 |
| TCH | 2 |
| URS | 1 |
| USA | 15 |
grafico
library(ggplot2)
base = ggplot(data=openera,aes(x=Country))
bar1 = base + geom_bar()
bar1
estadisticos centrales: moda
library(DescTools)
Mode(openera$Country)
## [1] " USA"
Dispersión
Concentracion: Herfindahl- Hirschman
< 0.01 : indica que la moda no es significativa, las categorias tienen pesos similares. < 0.15 : indica que la moda no es significativa, varias categorias tienen pesos similares. entre 0.15 - 0.25: hay una moda. 0.25: La moda se diferencia de los demas
library(DescTools)
Herfindahl(dataTable)
## [1] 0.1272189
Representatividad Efectiva : Laakso - Taagepera
1/Herfindahl(dataTable)
## [1] 7.860465
hay casi 8 grupos representativos representates
…………………………………..segunda parte…………………………………
LINKtennisLadies= "https://en.wikipedia.org/wiki/List_of_Wimbledon_ladies%27_singles_champions"
linktabla = '//*[@id="mw-content-text"]/div/table[4]'
openeraW = htmltab(doc =LINKtennisLadies,
which = linktabla)
names(openeraW)
## [1] "Year" "Country" "Champion"
## [4] "Country" "Runner-up" "Score in the final"
openeraW=openeraW[,c(1,4)]
names(openeraW)
## [1] "Year" "Country"
table(openeraW$Country)
##
## ARG AUS BEL CAN CZE ESP FRA FRG GER NED POL RUS TCH URS USA
## 1 5 2 1 2 3 2 1 3 1 1 2 2 1 24
## YUG
## 1
library(knitr)
dataTable=table(openeraW$Country)
kable(dataTable)
| Var1 | Freq |
|---|---|
| ARG | 1 |
| AUS | 5 |
| BEL | 2 |
| CAN | 1 |
| CZE | 2 |
| ESP | 3 |
| FRA | 2 |
| FRG | 1 |
| GER | 3 |
| NED | 1 |
| POL | 1 |
| RUS | 2 |
| TCH | 2 |
| URS | 1 |
| USA | 24 |
| YUG | 1 |
grafico. version basica
library(ggplot2)
base = ggplot(data=openeraW,aes(x=Country))
bar1 = base + geom_bar()
bar1
bar1 = bar1 + labs(x="Pais Ganador",
y="Cantidad",
title="List of Wimbledon ladies' singles champions",
subtitle = "open era",
caption = "Fuente: Wikipedia")
bar1
bar1 + theme(axis.text.x = element_text(angle = 25,size=7,hjust = 1))
Estadísticos:
Centrales: La Moda
library(DescTools)
Mode(openeraW$Country)
## [1] " USA"
estados unidos es el pais donde mas han ganado en Wimbledon
DISPERSION
Concentracion: Herfindahl- Hirschman
< 0.01 : indica que la moda no es significativa, las categorias tienen pesos similares. < 0.15 : indica que la moda no es significativa, varias categorias tienen pesos similares. entre 0.15 - 0.25: hay una moda. 0.25: la moda se diferencia de los demas
library(DescTools)
Herfindahl(dataTable)
## [1] 0.2389053
0.23: hay moda
Representatividad Efectiva : Laakso - Taagepera
1/Herfindahl(dataTable)
## [1] 4.185759
hay casi 5 grupos representativos.