Cargamos las librerías oportunas

library( ggplot2 )
library(tables)
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
library(knitr)
## Warning: package 'knitr' was built under R version 3.3.3

Leemos la tabla, le asignamos como “df” y comprobamos la estructura

df<- read.table( "paisesMundoRedC.csv", 
                  header = TRUE,
                  sep = ";",
                  dec = ".",
                  stringsAsFactors = FALSE )
head(df)
##                  Country                  EPI_regions
## AGO               Angola           Sub-Saharan Africa
## ALB              Albania    Central and Eastern Europ
## ARE United Arab Emirates Middle East and North Africa
## ARG            Argentina    Latin America and Caribbe
## ARM              Armenia Middle East and North Africa
## AUS            Australia    East Asia and the Pacific
##               GEO_subregion Population2005 GDP_capita.MRYA   landarea  EPI
## AGO         Southern Africa        15941.4          2314.4 1251895.62 39.5
## ALB          Central Europe         3129.7          4955.3   28346.12 84.0
## ARE       Arabian Peninsula         4495.8         22698.3   74776.60 64.0
## ARG           South America        38747.2         13652.4 2736296.00 81.8
## ARM          Eastern Europe         3016.3          5011.0   28272.73 77.8
## AUS Australia + New Zealand        20155.1         30677.9 7634643.84 79.8
##     FOREST FISH AGRICULTURE
## AGO   95.4 87.3        61.3
## ALB  100.0 62.5        75.6
## ARE  100.0 50.0        72.3
## ARG   75.9 58.8        79.9
## ARM   70.1   NA        94.2
## AUS  100.0 96.7        78.7
dim(df)
## [1] 149  10
str(df)
## 'data.frame':    149 obs. of  10 variables:
##  $ Country        : chr  "Angola" "Albania" "United Arab Emirates" "Argentina" ...
##  $ EPI_regions    : chr  "Sub-Saharan Africa" "Central and Eastern Europ" "Middle East and North Africa" "Latin America and Caribbe" ...
##  $ GEO_subregion  : chr  "Southern Africa" "Central Europe" "Arabian Peninsula" "South America" ...
##  $ Population2005 : num  15941 3130 4496 38747 3016 ...
##  $ GDP_capita.MRYA: num  2314 4955 22698 13652 5011 ...
##  $ landarea       : num  1251896 28346 74777 2736296 28273 ...
##  $ EPI            : num  39.5 84 64 81.8 77.8 79.8 89.4 72.2 54.7 78.4 ...
##  $ FOREST         : num  95.4 100 100 75.9 70.1 100 100 100 0 100 ...
##  $ FISH           : num  87.3 62.5 50 58.8 NA 96.7 NA NA NA 47.4 ...
##  $ AGRICULTURE    : num  61.3 75.6 72.3 79.9 94.2 78.7 76.4 71.4 95.9 80.8 ...

Es un data frame y vemos como tiene 149 observaciones con 10 variables: 7 cuantitativas y 3 cualitativas. La variable “GEO_subregion” contiene una clasificación (a escala de grandes regiones del mundo) del país en cuestión.

Ahora, nos centramos en asignar como“dfA”a lo referente a los países africanos de la tabla, usando la funcion “grep”

 indicesAfrica <-grep( "Africa", df$GEO_subregion )
dfA <- df[ indicesAfrica, ]
str( dfA )
## 'data.frame':    41 obs. of  10 variables:
##  $ Country        : chr  "Angola" "Burundi" "Benin" "Burkina Faso" ...
##  $ EPI_regions    : chr  "Sub-Saharan Africa" "Sub-Saharan Africa" "Sub-Saharan Africa" "Sub-Saharan Africa" ...
##  $ GEO_subregion  : chr  "Southern Africa" "Eastern Africa" "Western Africa" "Western Africa" ...
##  $ Population2005 : num  15941 7548 8439 13228 1765 ...
##  $ GDP_capita.MRYA: num  2314 630 1016 1143 11313 ...
##  $ landarea       : num  1251896 25227 115828 275748 559516 ...
##  $ EPI            : num  39.5 54.7 56.1 44.3 68.7 56 65.2 63.8 47.3 69.7 ...
##  $ FOREST         : num  95.4 0 17.8 64.5 79.2 97.2 100 78.4 94.8 98.4 ...
##  $ FISH           : num  87.3 NA 91.5 NA NA NA 91.2 52.4 46.3 74.1 ...
##  $ AGRICULTURE    : num  61.3 95.9 88.2 87.7 72.3 71.8 88.7 69.9 70.8 99.1 ...

Codificamos las variables categóricas en “dfA”

dfA$EPI_regions<- factor(dfA$EPI_regions)
dfA$GEO_subregion<- factor(dfA$GEO_subregion)
dfA$Country<- factor(dfA$Country)

Hacemos un resumen del conjunto de datos

summary(dfA)
##          Country                         EPI_regions         GEO_subregion
##  Algeria     : 1   Middle East and North Africa: 5   Central Africa : 6   
##  Angola      : 1   Sub-Saharan Africa          :36   Eastern Africa : 7   
##  Benin       : 1                                     Northern Africa: 5   
##  Botswana    : 1                                     Southern Africa:10   
##  Burkina Faso: 1                                     Western Africa :13   
##  Burundi     : 1                                                          
##  (Other)     :35                                                          
##  Population2005     GDP_capita.MRYA      landarea            EPI       
##  Min.   :   793.1   Min.   :  629.8   Min.   :  17410   Min.   :39.10  
##  1st Qu.:  5525.5   1st Qu.: 1008.1   1st Qu.: 147882   1st Qu.:51.30  
##  Median : 12883.9   Median : 1312.8   Median : 403759   Median :59.40  
##  Mean   : 21030.0   Mean   : 2506.2   Mean   : 642219   Mean   :59.16  
##  3rd Qu.: 28816.2   3rd Qu.: 2299.1   3rd Qu.: 968072   3rd Qu.:69.00  
##  Max.   :131529.7   Max.   :11313.3   Max.   :2492385   Max.   :78.10  
##                                                                        
##      FOREST            FISH        AGRICULTURE   
##  Min.   :  0.00   Min.   :23.90   Min.   :53.00  
##  1st Qu.: 73.30   1st Qu.:72.60   1st Qu.:69.30  
##  Median : 86.40   Median :79.10   Median :73.90  
##  Mean   : 78.51   Mean   :75.11   Mean   :74.87  
##  3rd Qu.: 98.40   3rd Qu.:87.05   3rd Qu.:81.60  
##  Max.   :100.00   Max.   :91.60   Max.   :99.10  
##                   NA's   :14

Vemos como en el resúmen ya viene tanto el minimo, el primer cuartil y la mediana como la media el tercer cuartil y el máximo.Además en el resumen, las variables categóricas que hemos factorizado aparecen con el numero correspondiente a su nivel.

Seguidamente , vamos a seleccionar las variables indicadas y calcularemos sus descriptivos en función de “GEO_subregion”

Primero vamos con “Population2005”:

tapply( dfA$Population2005,dfA$GEO_subregion, mean )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##        15506.65        23183.19        36940.14        16388.48 
##  Western Africa 
##        19871.10
tapply( dfA$Population2005,dfA$GEO_subregion, sd )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##        21282.72        27067.09        23147.14        15486.97 
##  Western Africa 
##        34051.05
tapply( dfA$Population2005,dfA$GEO_subregion, median )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##          6893.3          9037.7         32853.8         12946.7 
##  Western Africa 
##         11658.2
tapply( dfA$Population2005,dfA$GEO_subregion, min )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##          1383.8           793.1         10102.5          1032.4 
##  Western Africa 
##          1586.3
tapply( dfA$Population2005,dfA$GEO_subregion, max )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##         57548.7         77430.7         74032.9         47431.8 
##  Western Africa 
##        131529.7

A continuación, seguimos con “landarea”:

tapply( dfA$landarea,dfA$GEO_subregion, mean )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##        875594.6        300392.2       1262919.2        676390.9 
##  Western Africa 
##        453551.0
tapply( dfA$landarea,dfA$GEO_subregion, sd )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##        786168.0        413610.4       1079434.0        418232.7 
##  Western Africa 
##        453693.9
tapply( dfA$landarea,dfA$GEO_subregion, median )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##        544316.8        121862.9        968071.5        761220.4 
##  Western Africa 
##        245860.1
tapply( dfA$landarea,dfA$GEO_subregion, min )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##       265145.62        20903.50       147881.49        17409.73 
##  Western Africa 
##        34105.82
tapply( dfA$landarea,dfA$GEO_subregion, max )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##         2313414         1123717         2492385         1251896 
##  Western Africa 
##         1248146

Y, por último, terminamos con “GDP_capita.MRYA”

tapply( dfA$GDP_capita.MRYA,dfA$GEO_subregion, mean )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##        2037.667        1163.486        4912.280        4057.450 
##  Western Africa 
##        1326.885
tapply( dfA$GDP_capita.MRYA,dfA$GEO_subregion, sd )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##       1914.4573        417.6242       2209.5782       4095.1023 
##  Western Africa 
##        561.7618
tapply( dfA$GDP_capita.MRYA,dfA$GEO_subregion, median )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##         1250.05         1104.70         4346.40         2026.50 
##  Western Africa 
##         1142.90
tapply( dfA$GDP_capita.MRYA,dfA$GEO_subregion, min )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##           700.0           629.8          2050.2           631.5 
##  Western Africa 
##           700.3
tapply( dfA$GDP_capita.MRYA,dfA$GEO_subregion, max )
##  Central Africa  Eastern Africa Northern Africa Southern Africa 
##          5835.0          1982.4          7758.2         11313.3 
##  Western Africa 
##          2299.1

Despues del cálculo de los descriptivos anteriores, vamos a volver a calcular los descriptivos principales para la variable “AGRICULTURE”. Esta vez, lo haremos la función “tabular” para poder poner etiquetas representativas y para imprimir en tabla como se requiere.

tabla<-tabular( GEO_subregion ~ AGRICULTURE*( (media=mean) + (desviación=sd) + (mediana=median) + (máximo=max) + (mínimo=min)  ), data = dfA )
html( tabla, 
      options = htmloptions( HTMLcaption = "AGRICULTURA" ),
      pad = TRUE)
AGRICULTURA
  AGRICULTURE
GEO_subregion media desviación mediana máximo mínimo
Central Africa 79.28 11.174 76.75 99.1 69.9
Eastern Africa 77.41 12.403 78.00 95.9 54.4
Northern Africa 66.04 8.136 68.40 74.8 53.0
Southern Africa 69.74 4.681 71.80 74.7 61.3
Western Africa 78.82 7.131 78.80 88.7 65.9

En cuanto a gráficas, primero haremos una de dispersión, usando el “material” de base de R. En ella están reflejados el producto interior bruto en funciond e cada país de África. Coloreados en función del factor “GEO_subregion”

plot(dfA$Population2005, dfA$GDP_capita.MRYA,
      xlab = "Población2005", 
      ylab= "P.I.B.", 
      main = "PIB vs Población de África",
      col=dfA$GEO_subregion)

Para acabar esta tarea, realizaremos un gráfico con “ggplot”. En nuestro caso, hemos elegido la cantidad existente de bosque por cada región africana delimitada. Además está coloreado por cada subregión africana existente.

ggplot( dfA, aes( x = EPI_regions , y = FOREST, fill=GEO_subregion ) ) + 
geom_boxplot()+
labs(title="Bosque vs Región", x="Región", y="Bosque")

Podemos ver como es en la parte Sub-sahariana donde únicamente existe bosque

Dejamos constancia de la sessión al terminar

sessionInfo()
## R version 3.3.2 (2016-10-31)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7601) Service Pack 1
## 
## locale:
## [1] LC_COLLATE=Spanish_Spain.1252  LC_CTYPE=Spanish_Spain.1252   
## [3] LC_MONETARY=Spanish_Spain.1252 LC_NUMERIC=C                  
## [5] LC_TIME=Spanish_Spain.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] knitr_1.15.1    tables_0.8      Hmisc_4.0-2     Formula_1.2-1  
## [5] survival_2.40-1 lattice_0.20-34 ggplot2_2.2.1  
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.9         RColorBrewer_1.1-2  plyr_1.8.4         
##  [4] base64enc_0.1-3     tools_3.3.2         rpart_4.1-10       
##  [7] digest_0.6.12       evaluate_0.10       tibble_1.2         
## [10] gtable_0.2.0        htmlTable_1.9       checkmate_1.8.2    
## [13] Matrix_1.2-8        yaml_2.1.14         gridExtra_2.2.1    
## [16] stringr_1.2.0       cluster_2.0.5       htmlwidgets_0.8    
## [19] rprojroot_1.2       grid_3.3.2          nnet_7.3-12        
## [22] data.table_1.10.4   foreign_0.8-67      rmarkdown_1.3      
## [25] latticeExtra_0.6-28 magrittr_1.5        backports_1.0.5    
## [28] scales_0.4.1        htmltools_0.3.5     splines_3.3.2      
## [31] assertthat_0.1      colorspace_1.3-2    labeling_0.3       
## [34] stringi_1.1.2       acepack_1.4.1       lazyeval_0.2.0     
## [37] munsell_0.4.3