Cargamos las librerías oportunas
library( ggplot2 )
library(tables)
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
library(knitr)
## Warning: package 'knitr' was built under R version 3.3.3
Leemos la tabla, le asignamos como “df” y comprobamos la estructura
df<- read.table( "paisesMundoRedC.csv",
header = TRUE,
sep = ";",
dec = ".",
stringsAsFactors = FALSE )
head(df)
## Country EPI_regions
## AGO Angola Sub-Saharan Africa
## ALB Albania Central and Eastern Europ
## ARE United Arab Emirates Middle East and North Africa
## ARG Argentina Latin America and Caribbe
## ARM Armenia Middle East and North Africa
## AUS Australia East Asia and the Pacific
## GEO_subregion Population2005 GDP_capita.MRYA landarea EPI
## AGO Southern Africa 15941.4 2314.4 1251895.62 39.5
## ALB Central Europe 3129.7 4955.3 28346.12 84.0
## ARE Arabian Peninsula 4495.8 22698.3 74776.60 64.0
## ARG South America 38747.2 13652.4 2736296.00 81.8
## ARM Eastern Europe 3016.3 5011.0 28272.73 77.8
## AUS Australia + New Zealand 20155.1 30677.9 7634643.84 79.8
## FOREST FISH AGRICULTURE
## AGO 95.4 87.3 61.3
## ALB 100.0 62.5 75.6
## ARE 100.0 50.0 72.3
## ARG 75.9 58.8 79.9
## ARM 70.1 NA 94.2
## AUS 100.0 96.7 78.7
dim(df)
## [1] 149 10
str(df)
## 'data.frame': 149 obs. of 10 variables:
## $ Country : chr "Angola" "Albania" "United Arab Emirates" "Argentina" ...
## $ EPI_regions : chr "Sub-Saharan Africa" "Central and Eastern Europ" "Middle East and North Africa" "Latin America and Caribbe" ...
## $ GEO_subregion : chr "Southern Africa" "Central Europe" "Arabian Peninsula" "South America" ...
## $ Population2005 : num 15941 3130 4496 38747 3016 ...
## $ GDP_capita.MRYA: num 2314 4955 22698 13652 5011 ...
## $ landarea : num 1251896 28346 74777 2736296 28273 ...
## $ EPI : num 39.5 84 64 81.8 77.8 79.8 89.4 72.2 54.7 78.4 ...
## $ FOREST : num 95.4 100 100 75.9 70.1 100 100 100 0 100 ...
## $ FISH : num 87.3 62.5 50 58.8 NA 96.7 NA NA NA 47.4 ...
## $ AGRICULTURE : num 61.3 75.6 72.3 79.9 94.2 78.7 76.4 71.4 95.9 80.8 ...
Es un data frame y vemos como tiene 149 observaciones con 10 variables: 7 cuantitativas y 3 cualitativas. La variable “GEO_subregion” contiene una clasificación (a escala de grandes regiones del mundo) del país en cuestión.
Ahora, nos centramos en asignar como“dfA”a lo referente a los países africanos de la tabla, usando la funcion “grep”
indicesAfrica <-grep( "Africa", df$GEO_subregion )
dfA <- df[ indicesAfrica, ]
str( dfA )
## 'data.frame': 41 obs. of 10 variables:
## $ Country : chr "Angola" "Burundi" "Benin" "Burkina Faso" ...
## $ EPI_regions : chr "Sub-Saharan Africa" "Sub-Saharan Africa" "Sub-Saharan Africa" "Sub-Saharan Africa" ...
## $ GEO_subregion : chr "Southern Africa" "Eastern Africa" "Western Africa" "Western Africa" ...
## $ Population2005 : num 15941 7548 8439 13228 1765 ...
## $ GDP_capita.MRYA: num 2314 630 1016 1143 11313 ...
## $ landarea : num 1251896 25227 115828 275748 559516 ...
## $ EPI : num 39.5 54.7 56.1 44.3 68.7 56 65.2 63.8 47.3 69.7 ...
## $ FOREST : num 95.4 0 17.8 64.5 79.2 97.2 100 78.4 94.8 98.4 ...
## $ FISH : num 87.3 NA 91.5 NA NA NA 91.2 52.4 46.3 74.1 ...
## $ AGRICULTURE : num 61.3 95.9 88.2 87.7 72.3 71.8 88.7 69.9 70.8 99.1 ...
Codificamos las variables categóricas en “dfA”
dfA$EPI_regions<- factor(dfA$EPI_regions)
dfA$GEO_subregion<- factor(dfA$GEO_subregion)
dfA$Country<- factor(dfA$Country)
Hacemos un resumen del conjunto de datos
summary(dfA)
## Country EPI_regions GEO_subregion
## Algeria : 1 Middle East and North Africa: 5 Central Africa : 6
## Angola : 1 Sub-Saharan Africa :36 Eastern Africa : 7
## Benin : 1 Northern Africa: 5
## Botswana : 1 Southern Africa:10
## Burkina Faso: 1 Western Africa :13
## Burundi : 1
## (Other) :35
## Population2005 GDP_capita.MRYA landarea EPI
## Min. : 793.1 Min. : 629.8 Min. : 17410 Min. :39.10
## 1st Qu.: 5525.5 1st Qu.: 1008.1 1st Qu.: 147882 1st Qu.:51.30
## Median : 12883.9 Median : 1312.8 Median : 403759 Median :59.40
## Mean : 21030.0 Mean : 2506.2 Mean : 642219 Mean :59.16
## 3rd Qu.: 28816.2 3rd Qu.: 2299.1 3rd Qu.: 968072 3rd Qu.:69.00
## Max. :131529.7 Max. :11313.3 Max. :2492385 Max. :78.10
##
## FOREST FISH AGRICULTURE
## Min. : 0.00 Min. :23.90 Min. :53.00
## 1st Qu.: 73.30 1st Qu.:72.60 1st Qu.:69.30
## Median : 86.40 Median :79.10 Median :73.90
## Mean : 78.51 Mean :75.11 Mean :74.87
## 3rd Qu.: 98.40 3rd Qu.:87.05 3rd Qu.:81.60
## Max. :100.00 Max. :91.60 Max. :99.10
## NA's :14
Vemos como en el resúmen ya viene tanto el minimo, el primer cuartil y la mediana como la media el tercer cuartil y el máximo.Además en el resumen, las variables categóricas que hemos factorizado aparecen con el numero correspondiente a su nivel.
Seguidamente , vamos a seleccionar las variables indicadas y calcularemos sus descriptivos en función de “GEO_subregion”
Primero vamos con “Population2005”:
tapply( dfA$Population2005,dfA$GEO_subregion, mean )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 15506.65 23183.19 36940.14 16388.48
## Western Africa
## 19871.10
tapply( dfA$Population2005,dfA$GEO_subregion, sd )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 21282.72 27067.09 23147.14 15486.97
## Western Africa
## 34051.05
tapply( dfA$Population2005,dfA$GEO_subregion, median )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 6893.3 9037.7 32853.8 12946.7
## Western Africa
## 11658.2
tapply( dfA$Population2005,dfA$GEO_subregion, min )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 1383.8 793.1 10102.5 1032.4
## Western Africa
## 1586.3
tapply( dfA$Population2005,dfA$GEO_subregion, max )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 57548.7 77430.7 74032.9 47431.8
## Western Africa
## 131529.7
A continuación, seguimos con “landarea”:
tapply( dfA$landarea,dfA$GEO_subregion, mean )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 875594.6 300392.2 1262919.2 676390.9
## Western Africa
## 453551.0
tapply( dfA$landarea,dfA$GEO_subregion, sd )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 786168.0 413610.4 1079434.0 418232.7
## Western Africa
## 453693.9
tapply( dfA$landarea,dfA$GEO_subregion, median )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 544316.8 121862.9 968071.5 761220.4
## Western Africa
## 245860.1
tapply( dfA$landarea,dfA$GEO_subregion, min )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 265145.62 20903.50 147881.49 17409.73
## Western Africa
## 34105.82
tapply( dfA$landarea,dfA$GEO_subregion, max )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 2313414 1123717 2492385 1251896
## Western Africa
## 1248146
Y, por último, terminamos con “GDP_capita.MRYA”
tapply( dfA$GDP_capita.MRYA,dfA$GEO_subregion, mean )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 2037.667 1163.486 4912.280 4057.450
## Western Africa
## 1326.885
tapply( dfA$GDP_capita.MRYA,dfA$GEO_subregion, sd )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 1914.4573 417.6242 2209.5782 4095.1023
## Western Africa
## 561.7618
tapply( dfA$GDP_capita.MRYA,dfA$GEO_subregion, median )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 1250.05 1104.70 4346.40 2026.50
## Western Africa
## 1142.90
tapply( dfA$GDP_capita.MRYA,dfA$GEO_subregion, min )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 700.0 629.8 2050.2 631.5
## Western Africa
## 700.3
tapply( dfA$GDP_capita.MRYA,dfA$GEO_subregion, max )
## Central Africa Eastern Africa Northern Africa Southern Africa
## 5835.0 1982.4 7758.2 11313.3
## Western Africa
## 2299.1
Despues del cálculo de los descriptivos anteriores, vamos a volver a calcular los descriptivos principales para la variable “AGRICULTURE”. Esta vez, lo haremos la función “tabular” para poder poner etiquetas representativas y para imprimir en tabla como se requiere.
tabla<-tabular( GEO_subregion ~ AGRICULTURE*( (media=mean) + (desviación=sd) + (mediana=median) + (máximo=max) + (mínimo=min) ), data = dfA )
html( tabla,
options = htmloptions( HTMLcaption = "AGRICULTURA" ),
pad = TRUE)
AGRICULTURA
|
|
AGRICULTURE
|
|
GEO_subregion
|
media
|
desviación
|
mediana
|
máximo
|
mínimo
|
|
Central Africa
|
79.28
|
11.174
|
76.75
|
99.1
|
69.9
|
|
Eastern Africa
|
77.41
|
12.403
|
78.00
|
95.9
|
54.4
|
|
Northern Africa
|
66.04
|
8.136
|
68.40
|
74.8
|
53.0
|
|
Southern Africa
|
69.74
|
4.681
|
71.80
|
74.7
|
61.3
|
|
Western Africa
|
78.82
|
7.131
|
78.80
|
88.7
|
65.9
|
En cuanto a gráficas, primero haremos una de dispersión, usando el “material” de base de R. En ella están reflejados el producto interior bruto en funciond e cada país de África. Coloreados en función del factor “GEO_subregion”
plot(dfA$Population2005, dfA$GDP_capita.MRYA,
xlab = "Población2005",
ylab= "P.I.B.",
main = "PIB vs Población de África",
col=dfA$GEO_subregion)

Para acabar esta tarea, realizaremos un gráfico con “ggplot”. En nuestro caso, hemos elegido la cantidad existente de bosque por cada región africana delimitada. Además está coloreado por cada subregión africana existente.
ggplot( dfA, aes( x = EPI_regions , y = FOREST, fill=GEO_subregion ) ) +
geom_boxplot()+
labs(title="Bosque vs Región", x="Región", y="Bosque")

Podemos ver como es en la parte Sub-sahariana donde únicamente existe bosque
Dejamos constancia de la sessión al terminar
sessionInfo()
## R version 3.3.2 (2016-10-31)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7601) Service Pack 1
##
## locale:
## [1] LC_COLLATE=Spanish_Spain.1252 LC_CTYPE=Spanish_Spain.1252
## [3] LC_MONETARY=Spanish_Spain.1252 LC_NUMERIC=C
## [5] LC_TIME=Spanish_Spain.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] knitr_1.15.1 tables_0.8 Hmisc_4.0-2 Formula_1.2-1
## [5] survival_2.40-1 lattice_0.20-34 ggplot2_2.2.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.9 RColorBrewer_1.1-2 plyr_1.8.4
## [4] base64enc_0.1-3 tools_3.3.2 rpart_4.1-10
## [7] digest_0.6.12 evaluate_0.10 tibble_1.2
## [10] gtable_0.2.0 htmlTable_1.9 checkmate_1.8.2
## [13] Matrix_1.2-8 yaml_2.1.14 gridExtra_2.2.1
## [16] stringr_1.2.0 cluster_2.0.5 htmlwidgets_0.8
## [19] rprojroot_1.2 grid_3.3.2 nnet_7.3-12
## [22] data.table_1.10.4 foreign_0.8-67 rmarkdown_1.3
## [25] latticeExtra_0.6-28 magrittr_1.5 backports_1.0.5
## [28] scales_0.4.1 htmltools_0.3.5 splines_3.3.2
## [31] assertthat_0.1 colorspace_1.3-2 labeling_0.3
## [34] stringi_1.1.2 acepack_1.4.1 lazyeval_0.2.0
## [37] munsell_0.4.3