library (psych)

getwd ()
## [1] "C:/Users/Victoria/Desktop/archivos escritorio/PhD. Marcela/Primera estancia/MODULO 2"
df<- read.csv ("http://gauss.inf.um.es/datos/paisesMundoRedC.csv", header=TRUE, sep = ";", dec = ".", stringsAsFactors = FALSE)
head (df)
##                  Country                  EPI_regions
## AGO               Angola           Sub-Saharan Africa
## ALB              Albania    Central and Eastern Europ
## ARE United Arab Emirates Middle East and North Africa
## ARG            Argentina    Latin America and Caribbe
## ARM              Armenia Middle East and North Africa
## AUS            Australia    East Asia and the Pacific
##               GEO_subregion Population2005 GDP_capita.MRYA   landarea  EPI
## AGO         Southern Africa        15941.4          2314.4 1251895.62 39.5
## ALB          Central Europe         3129.7          4955.3   28346.12 84.0
## ARE       Arabian Peninsula         4495.8         22698.3   74776.60 64.0
## ARG           South America        38747.2         13652.4 2736296.00 81.8
## ARM          Eastern Europe         3016.3          5011.0   28272.73 77.8
## AUS Australia + New Zealand        20155.1         30677.9 7634643.84 79.8
##     FOREST FISH AGRICULTURE
## AGO   95.4 87.3        61.3
## ALB  100.0 62.5        75.6
## ARE  100.0 50.0        72.3
## ARG   75.9 58.8        79.9
## ARM   70.1   NA        94.2
## AUS  100.0 96.7        78.7
dim(df)
## [1] 149  10
str (df)
## 'data.frame':    149 obs. of  10 variables:
##  $ Country        : chr  "Angola" "Albania" "United Arab Emirates" "Argentina" ...
##  $ EPI_regions    : chr  "Sub-Saharan Africa" "Central and Eastern Europ" "Middle East and North Africa" "Latin America and Caribbe" ...
##  $ GEO_subregion  : chr  "Southern Africa" "Central Europe" "Arabian Peninsula" "South America" ...
##  $ Population2005 : num  15941 3130 4496 38747 3016 ...
##  $ GDP_capita.MRYA: num  2314 4955 22698 13652 5011 ...
##  $ landarea       : num  1251896 28346 74777 2736296 28273 ...
##  $ EPI            : num  39.5 84 64 81.8 77.8 79.8 89.4 72.2 54.7 78.4 ...
##  $ FOREST         : num  95.4 100 100 75.9 70.1 100 100 100 0 100 ...
##  $ FISH           : num  87.3 62.5 50 58.8 NA 96.7 NA NA NA 47.4 ...
##  $ AGRICULTURE    : num  61.3 75.6 72.3 79.9 94.2 78.7 76.4 71.4 95.9 80.8 ...

Las variables son de caracter (Country, EPI_regions, GEO_subregion) y también es numérica.

mediante la función str ()observamos como está formado el objeto df, que es un objeto de data frame que cuenta 149 observaciones y 10 variables y a su vez vemos las características de cada una de ellas.

##   [1] "Southern Africa"         "Central Europe"         
##   [3] "Arabian Peninsula"       "South America"          
##   [5] "Eastern Europe"          "Australia + New Zealand"
##   [7] "Western Europe"          "Eastern Europe"         
##   [9] "Eastern Africa"          "Western Europe"         
##  [11] "Western Africa"          "Western Africa"         
##  [13] "South Asia"              "Central Europe"         
##  [15] "Central Europe"          "Eastern Europe"         
##  [17] "Meso America"            "South America"          
##  [19] "South America"           "Southern Africa"        
##  [21] "Central Africa"          "North America"          
##  [23] "Western Europe"          "South America"          
##  [25] "Northeast Asia"          "Western Africa"         
##  [27] "Central Africa"          "Central Africa"         
##  [29] "Central Africa"          "South America"          
##  [31] "Meso America"            "Caribbean"              
##  [33] "Central Europe"          "Central Europe"         
##  [35] "Western Europe"          "Eastern Africa"         
##  [37] "Western Europe"          "Caribbean"              
##  [39] "Northern Africa"         "South America"          
##  [41] "Northern Africa"         "Eastern Africa"         
##  [43] "Western Europe"          "Central Europe"         
##  [45] "Eastern Africa"          "Western Europe"         
##  [47] "South Pacific"           "Western Europe"         
##  [49] "Central Africa"          "Western Europe"         
##  [51] "Eastern Europe"          "Western Africa"         
##  [53] "Western Africa"          "Western Africa"         
##  [55] "Western Europe"          "Meso America"           
##  [57] "South America"           "Meso America"           
##  [59] "Central Europe"          "Caribbean"              
##  [61] "Central Europe"          "South East Asia"        
##  [63] "South Asia"              "Western Europe"         
##  [65] "South Asia"              "Mashriq"                
##  [67] "Western Europe"          "Western Europe"         
##  [69] "Western Europe"          "Caribbean"              
##  [71] "Mashriq"                 "Northeast Asia"         
##  [73] "Central Asia"            "Eastern Africa"         
##  [75] "Central Asia"            "South East Asia"        
##  [77] "Northeast Asia"          "Arabian Peninsula"      
##  [79] "South East Asia"         "Mashriq"                
##  [81] "South Asia"              "Central Europe"         
##  [83] "Western Europe"          "Central Europe"         
##  [85] "Northern Africa"         "Eastern Europe"         
##  [87] "Western Indian Ocean"    "Meso America"           
##  [89] "Central Europe"          "Western Africa"         
##  [91] "South East Asia"         "Northeast Asia"         
##  [93] "Southern Africa"         "Western Africa"         
##  [95] "Western Indian Ocean"    "Southern Africa"        
##  [97] "South East Asia"         "Southern Africa"        
##  [99] "Western Africa"          "Western Africa"         
## [101] "Meso America"            "Western Europe"         
## [103] "Western Europe"          "South Asia"             
## [105] "Australia + New Zealand" "Arabian Peninsula"      
## [107] "South Asia"              "Meso America"           
## [109] "South America"           "South East Asia"        
## [111] "South Pacific"           "Central Europe"         
## [113] "Western Europe"          "South America"          
## [115] "Central Europe"          "Eastern Europe"         
## [117] "Eastern Africa"          "Arabian Peninsula"      
## [119] "Northern Africa"         "Western Africa"         
## [121] "South Pacific"           "Western Africa"         
## [123] "Meso America"            "Central Europe"         
## [125] "Central Europe"          "Western Europe"         
## [127] "Southern Africa"         "Mashriq"                
## [129] "Central Africa"          "Western Africa"         
## [131] "South East Asia"         "Central Asia"           
## [133] "Central Asia"            "Caribbean"              
## [135] "Northern Africa"         "Central Europe"         
## [137] "Northeast Asia"          "Southern Africa"        
## [139] "Eastern Africa"          "Eastern Europe"         
## [141] "South America"           "North America"          
## [143] "Central Asia"            "South America"          
## [145] "South East Asia"         "Arabian Peninsula"      
## [147] "Southern Africa"         "Southern Africa"        
## [149] "Southern Africa"

en la variable Geo_subregionse se visualiza las subregios de los continentes de de Africa, America, Asia, Europa y Oceanía

indicesAfrica <- grep( "Africa", df$GEO_subregion ) 
dfA <- df[ indicesAfrica, ]
str( dfA )
## 'data.frame':    41 obs. of  10 variables:
##  $ Country        : chr  "Angola" "Burundi" "Benin" "Burkina Faso" ...
##  $ EPI_regions    : chr  "Sub-Saharan Africa" "Sub-Saharan Africa" "Sub-Saharan Africa" "Sub-Saharan Africa" ...
##  $ GEO_subregion  : chr  "Southern Africa" "Eastern Africa" "Western Africa" "Western Africa" ...
##  $ Population2005 : num  15941 7548 8439 13228 1765 ...
##  $ GDP_capita.MRYA: num  2314 630 1016 1143 11313 ...
##  $ landarea       : num  1251896 25227 115828 275748 559516 ...
##  $ EPI            : num  39.5 54.7 56.1 44.3 68.7 56 65.2 63.8 47.3 69.7 ...
##  $ FOREST         : num  95.4 0 17.8 64.5 79.2 97.2 100 78.4 94.8 98.4 ...
##  $ FISH           : num  87.3 NA 91.5 NA NA NA 91.2 52.4 46.3 74.1 ...
##  $ AGRICULTURE    : num  61.3 95.9 88.2 87.7 72.3 71.8 88.7 69.9 70.8 99.1 ...

con la función “factor” cambiamos las variables categóricas en factores y automáticamente se les asigna niveles. Para verificar los cambios lo hacemos con str()

sapply(dfA, FUN = typeof)
##         Country     EPI_regions   GEO_subregion  Population2005 
##     "character"     "character"     "character"        "double" 
## GDP_capita.MRYA        landarea             EPI          FOREST 
##        "double"        "double"        "double"        "double" 
##            FISH     AGRICULTURE 
##        "double"        "double"
dfA$Country <- factor (dfA$Country)
dfA$EPI_regions <- factor (dfA$EPI_regions)
dfA$GEO_subregion <- factor (dfA$GEO_subregion)
str(dfA)
## 'data.frame':    41 obs. of  10 variables:
##  $ Country        : Factor w/ 41 levels "Algeria","Angola",..: 2 6 3 5 4 8 11 7 12 10 ...
##  $ EPI_regions    : Factor w/ 2 levels "Middle East and North Africa",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ GEO_subregion  : Factor w/ 5 levels "Central Africa",..: 4 2 5 5 4 1 5 1 1 1 ...
##  $ Population2005 : num  15941 7548 8439 13228 1765 ...
##  $ GDP_capita.MRYA: num  2314 630 1016 1143 11313 ...
##  $ landarea       : num  1251896 25227 115828 275748 559516 ...
##  $ EPI            : num  39.5 54.7 56.1 44.3 68.7 56 65.2 63.8 47.3 69.7 ...
##  $ FOREST         : num  95.4 0 17.8 64.5 79.2 97.2 100 78.4 94.8 98.4 ...
##  $ FISH           : num  87.3 NA 91.5 NA NA NA 91.2 52.4 46.3 74.1 ...
##  $ AGRICULTURE    : num  61.3 95.9 88.2 87.7 72.3 71.8 88.7 69.9 70.8 99.1 ...

con la función summary del data frame dfA se puede observar los niveles de las variables que se le aplicó factores y los datos descriptivos de las variables numéricas. Se puede destacar que la Media de la poblacion del 2005 es de 21030 y de la agricultura de 74.87.

summary(dfA)
##          Country                         EPI_regions         GEO_subregion
##  Algeria     : 1   Middle East and North Africa: 5   Central Africa : 6   
##  Angola      : 1   Sub-Saharan Africa          :36   Eastern Africa : 7   
##  Benin       : 1                                     Northern Africa: 5   
##  Botswana    : 1                                     Southern Africa:10   
##  Burkina Faso: 1                                     Western Africa :13   
##  Burundi     : 1                                                          
##  (Other)     :35                                                          
##  Population2005     GDP_capita.MRYA      landarea            EPI       
##  Min.   :   793.1   Min.   :  629.8   Min.   :  17410   Min.   :39.10  
##  1st Qu.:  5525.5   1st Qu.: 1008.1   1st Qu.: 147882   1st Qu.:51.30  
##  Median : 12883.9   Median : 1312.8   Median : 403759   Median :59.40  
##  Mean   : 21030.0   Mean   : 2506.2   Mean   : 642219   Mean   :59.16  
##  3rd Qu.: 28816.2   3rd Qu.: 2299.1   3rd Qu.: 968072   3rd Qu.:69.00  
##  Max.   :131529.7   Max.   :11313.3   Max.   :2492385   Max.   :78.10  
##                                                                        
##      FOREST            FISH        AGRICULTURE   
##  Min.   :  0.00   Min.   :23.90   Min.   :53.00  
##  1st Qu.: 73.30   1st Qu.:72.60   1st Qu.:69.30  
##  Median : 86.40   Median :79.10   Median :73.90  
##  Mean   : 78.51   Mean   :75.11   Mean   :74.87  
##  3rd Qu.: 98.40   3rd Qu.:87.05   3rd Qu.:81.60  
##  Max.   :100.00   Max.   :91.60   Max.   :99.10  
##                   NA's   :14

Mediante la funcion tabular, cruzamos la variable GEO_subregion versus las Vabriables Population2005, landarea y GDP_capita.MRYA, y para cada grupo calcular la media, desviacion estandar, max, min.

#install.packages("tables")
library (tables)
## Warning: package 'tables' was built under R version 3.4.4
## Loading required package: Hmisc
## Warning: package 'Hmisc' was built under R version 3.4.4
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.4
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
## 
##     describe
## The following objects are masked from 'package:base':
## 
##     format.pval, units
tabla <- tabular (GEO_subregion ~ (Population2005+ landarea+ GDP_capita.MRYA)*((media=mean)+sd+max+min), data= dfA)
#tabla <- tabular (dfA$GEO_subregion ~ (dfA$Population2005+ dfA$landarea+ dfA$GDP_capita.MRYA)*((media=mean)+sd+max+min), data= dfA)
tabla
##                                                                      
##                  Population2005                      landarea        
##  GEO_subregion   media          sd    max    min     media    sd     
##  Central Africa  15507          21283  57549  1383.8  875595   786168
##  Eastern Africa  23183          27067  77431   793.1  300392   413610
##  Northern Africa 36940          23147  74033 10102.5 1262919  1079434
##  Southern Africa 16388          15487  47432  1032.4  676391   418233
##  Western Africa  19871          34051 131530  1586.3  453551   453694
##                                                    
##                 GDP_capita.MRYA                    
##  max     min    media           sd     max   min   
##  2313414 265146 2038            1914.5  5835  700.0
##  1123717  20904 1163             417.6  1982  629.8
##  2492385 147881 4912            2209.6  7758 2050.2
##  1251896  17410 4057            4095.1 11313  631.5
##  1248146  34106 1327             561.8  2299  700.3

Calcula para la variable AGRICULTURE la media, mediana, desviación típica, el mínimo y el máximo según el factor GEO_subregion . Pon etiquetas representativas a las columnas.

tabla2 <- tabular( (Sub_Geog = GEO_subregion)  ~  (Agricultura = AGRICULTURE) *( (Media = mean ) + (Desv. = sd) + (Max. = max) + (Min.= min)), data = dfA )
tabla2
##                                              
##                  Agricultura                 
##  Sub_Geog        Media       Desv.  Max. Min.
##  Central Africa  79.28       11.174 99.1 69.9
##  Eastern Africa  77.41       12.403 95.9 54.4
##  Northern Africa 66.04        8.136 74.8 53.0
##  Southern Africa 69.74        4.681 74.7 61.3
##  Western Africa  78.82        7.131 88.7 65.9

imprime la tabla de forma elegante.

html (tabla2, options = htmloptions( HTMLcaption = "Agricultura",pad = TRUE))
Agricultura
  Agricultura
Sub_Geog Media Desv. Max. Min.
Central Africa 79.28 11.174 99.1 69.9
Eastern Africa 77.41 12.403 95.9 54.4
Northern Africa 66.04  8.136 74.8 53.0
Southern Africa 69.74  4.681 74.7 61.3
Western Africa 78.82  7.131 88.7 65.9

gráfico de dispersión para estudiar la relación entre el tamaño de la población de cada país (Population2005) y el producto interior bruto (GDP_capita.MRYA). Colorea los puntos según el factor GEO_subregion.

plot (dfA$Population2005, dfA$GDP_capita.MRYA, col=dfA$GEO_subregion)

#Realiza un gráfico de tu interés para el conjunto de datos empleando la librería ggplot2

library(ggplot2)
ggplot (dfA, aes (x= landarea, y = GDP_capita.MRYA))+
geom_point(na.rm=T,aes( colour = GEO_subregion ))+
stat_smooth( method = "lm" )

sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7601) Service Pack 1
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Spanish_Ecuador.1252  LC_CTYPE=Spanish_Ecuador.1252   
## [3] LC_MONETARY=Spanish_Ecuador.1252 LC_NUMERIC=C                    
## [5] LC_TIME=Spanish_Ecuador.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] tables_0.8.3    Hmisc_4.1-1     ggplot2_2.2.1   Formula_1.2-2  
## [5] survival_2.41-3 lattice_0.20-35 psych_1.7.8    
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.15        compiler_3.4.3      pillar_1.2.1       
##  [4] RColorBrewer_1.1-2  plyr_1.8.4          base64enc_0.1-3    
##  [7] tools_3.4.3         rpart_4.1-11        digest_0.6.15      
## [10] checkmate_1.8.5     htmlTable_1.11.2    evaluate_0.10.1    
## [13] tibble_1.4.2        gtable_0.2.0        nlme_3.1-131       
## [16] rlang_0.2.0         Matrix_1.2-12       rstudioapi_0.7     
## [19] yaml_2.1.18         parallel_3.4.3      gridExtra_2.3      
## [22] stringr_1.3.0       knitr_1.20          cluster_2.0.6      
## [25] htmlwidgets_1.0     nnet_7.3-12         rprojroot_1.3-2    
## [28] grid_3.4.3          data.table_1.10.4-3 foreign_0.8-69     
## [31] rmarkdown_1.9       latticeExtra_0.6-28 magrittr_1.5       
## [34] backports_1.1.2     scales_0.5.0        htmltools_0.3.6    
## [37] splines_3.4.3       mnormt_1.5-5        colorspace_1.3-2   
## [40] labeling_0.3        stringi_1.1.6       acepack_1.4.1      
## [43] lazyeval_0.2.1      munsell_0.4.3

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.