#install.packages("gapminder")
#install.packages("GGally")
suppressMessages(suppressWarnings(library(gapminder)))
suppressMessages(suppressWarnings(library(ggplot2)))
suppressMessages(suppressWarnings(library(GGally)))
data(gapminder)
head(gapminder)
## # A tibble: 6 × 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Afghanistan Asia       1952    28.8  8425333      779.
## 2 Afghanistan Asia       1957    30.3  9240934      821.
## 3 Afghanistan Asia       1962    32.0 10267083      853.
## 4 Afghanistan Asia       1967    34.0 11537966      836.
## 5 Afghanistan Asia       1972    36.1 13079460      740.
## 6 Afghanistan Asia       1977    38.4 14880372      786.
tail(gapminder)
## # A tibble: 6 × 6
##   country  continent  year lifeExp      pop gdpPercap
##   <fct>    <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Zimbabwe Africa     1982    60.4  7636524      789.
## 2 Zimbabwe Africa     1987    62.4  9216418      706.
## 3 Zimbabwe Africa     1992    60.4 10704340      693.
## 4 Zimbabwe Africa     1997    46.8 11404948      792.
## 5 Zimbabwe Africa     2002    40.0 11926563      672.
## 6 Zimbabwe Africa     2007    43.5 12311143      470.
#Gráfico evolución esperanza de vida en Colombia
plot(lifeExp ~ year, data = gapminder[gapminder$country == "Colombia", ],
     type = "b", xlab = "A\u00f1o", ylab = "Esperanza de Vida")

#Filtramos por el año 2007
gap <- gapminder[gapminder$year==2007,]
head(gap)
## # A tibble: 6 × 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Afghanistan Asia       2007    43.8 31889923      975.
## 2 Albania     Europe     2007    76.4  3600523     5937.
## 3 Algeria     Africa     2007    72.3 33333216     6223.
## 4 Angola      Africa     2007    42.7 12420476     4797.
## 5 Argentina   Americas   2007    75.3 40301927    12779.
## 6 Australia   Oceania    2007    81.2 20434176    34435.
dim(gap)
## [1] 142   6
names(gap)
## [1] "country"   "continent" "year"      "lifeExp"   "pop"       "gdpPercap"
gap$ingreso <- as.factor(ifelse(gap$gdpPercap < 1035, 'Bajo',
               ifelse(gap$gdpPercap < 4085, 'Medio Bajo',
               ifelse(gap$gdpPercap < 12615, 'Medio Alto', 'Alto'))))
head(gap)
## # A tibble: 6 × 7
##   country     continent  year lifeExp      pop gdpPercap ingreso   
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl> <fct>     
## 1 Afghanistan Asia       2007    43.8 31889923      975. Bajo      
## 2 Albania     Europe     2007    76.4  3600523     5937. Medio Alto
## 3 Algeria     Africa     2007    72.3 33333216     6223. Medio Alto
## 4 Angola      Africa     2007    42.7 12420476     4797. Medio Alto
## 5 Argentina   Americas   2007    75.3 40301927    12779. Alto      
## 6 Australia   Oceania    2007    81.2 20434176    34435. Alto
summary(gap)
##         country       continent       year         lifeExp     
##  Afghanistan:  1   Africa  :52   Min.   :2007   Min.   :39.61  
##  Albania    :  1   Americas:25   1st Qu.:2007   1st Qu.:57.16  
##  Algeria    :  1   Asia    :33   Median :2007   Median :71.94  
##  Angola     :  1   Europe  :30   Mean   :2007   Mean   :67.01  
##  Argentina  :  1   Oceania : 2   3rd Qu.:2007   3rd Qu.:76.41  
##  Australia  :  1                 Max.   :2007   Max.   :82.60  
##  (Other)    :136                                               
##       pop              gdpPercap             ingreso  
##  Min.   :1.996e+05   Min.   :  277.6   Alto      :42  
##  1st Qu.:4.508e+06   1st Qu.: 1624.8   Bajo      :20  
##  Median :1.052e+07   Median : 6124.4   Medio Alto:42  
##  Mean   :4.402e+07   Mean   :11680.1   Medio Bajo:38  
##  3rd Qu.:3.121e+07   3rd Qu.:18008.8                  
##  Max.   :1.319e+09   Max.   :49357.2                  
## 
summary(gap[,4:6])
##     lifeExp           pop              gdpPercap      
##  Min.   :39.61   Min.   :1.996e+05   Min.   :  277.6  
##  1st Qu.:57.16   1st Qu.:4.508e+06   1st Qu.: 1624.8  
##  Median :71.94   Median :1.052e+07   Median : 6124.4  
##  Mean   :67.01   Mean   :4.402e+07   Mean   :11680.1  
##  3rd Qu.:76.41   3rd Qu.:3.121e+07   3rd Qu.:18008.8  
##  Max.   :82.60   Max.   :1.319e+09   Max.   :49357.2
promedio=function(x){sum(x)/length(x)}

media_lifeexp_2007 <-    promedio(gap$lifeExp)
media_gdpper_2007  <-    promedio(gap$gdpPercap)
media_pop_2007     <-    mean(gap$pop)

cbind(media_lifeexp_2007,media_gdpper_2007,media_pop_2007)
##      media_lifeexp_2007 media_gdpper_2007 media_pop_2007
## [1,]           67.00742          11680.07       44021220
gdpTotal=gap$gdpPercap* gap$pop
sum(gdpTotal)/sum(gap$pop)
## [1] 9295.987
attach(gap)
mean(lifeExp)
## [1] 67.00742
sum(lifeExp)
## [1] 9515.054
length(lifeExp)
## [1] 142
quantile(lifeExp, c(.25, .5, .75))
##      25%      50%      75% 
## 57.16025 71.93550 76.41325
min(lifeExp)
## [1] 39.613
max(lifeExp)
## [1] 82.603
tabla <- data.frame(
  Medida = c("Media", "Suma", "Tamaño", "Cuartil 25%", "Mediana", "Cuartil 75%", "Mínimo", "Máximo"),
  Valor = c(mean(lifeExp, na.rm = TRUE),
            sum(lifeExp, na.rm = TRUE),
            length(lifeExp),
            quantile(lifeExp, 0.25, na.rm = TRUE),
            quantile(lifeExp, 0.50, na.rm = TRUE),
            quantile(lifeExp, 0.75, na.rm = TRUE),
            min(lifeExp, na.rm = TRUE),
            max(lifeExp, na.rm = TRUE))
)

print(tabla)
##        Medida      Valor
## 1       Media   67.00742
## 2        Suma 9515.05400
## 3      Tamaño  142.00000
## 4 Cuartil 25%   57.16025
## 5     Mediana   71.93550
## 6 Cuartil 75%   76.41325
## 7      Mínimo   39.61300
## 8      Máximo   82.60300
#Tabla de frecuencias
table(continent,ingreso)
##           ingreso
## continent  Alto Bajo Medio Alto Medio Bajo
##   Africa      1   18         12         21
##   Americas    6    0         15          4
##   Asia       10    2          8         13
##   Europe     23    0          7          0
##   Oceania     2    0          0          0
tapply(lifeExp,continent,promedio)
##   Africa Americas     Asia   Europe  Oceania 
## 54.80604 73.60812 70.72848 77.64860 80.71950
xtabs(lifeExp~continent+ingreso)
##           ingreso
## continent      Alto     Bajo Medio Alto Medio Bajo
##   Africa     56.735  910.571    727.653   1154.955
##   Americas  461.333    0.000   1109.303    269.567
##   Asia      784.191  105.897    566.998    876.954
##   Europe   1812.380    0.000    517.078      0.000
##   Oceania   161.439    0.000      0.000      0.000
xtabs(lifeExp~continent+ingreso)/table(continent,ingreso)
##           ingreso
## continent      Alto     Bajo Medio Alto Medio Bajo
##   Africa   56.73500 50.58728   60.63775   54.99786
##   Americas 76.88883            73.95353   67.39175
##   Asia     78.41910 52.94850   70.87475   67.45800
##   Europe   78.79913            73.86829           
##   Oceania  80.71950
Datos07=data.frame(lifeExp,gap$pop,gdpPercap)
cov(Datos07)
##                lifeExp       gap.pop     gdpPercap
## lifeExp   1.457578e+02  8.475088e+07        105368
## gap.pop   8.475088e+07  2.179208e+16 -105694667829
## gdpPercap 1.053680e+05 -1.056947e+11     165377988
cor(Datos07)
##              lifeExp     gap.pop  gdpPercap
## lifeExp   1.00000000  0.04755312  0.6786624
## gap.pop   0.04755312  1.00000000 -0.0556756
## gdpPercap 0.67866240 -0.05567560  1.0000000
ggpairs(Datos07)

summary(glm(lifeExp~continent+gdpPercap))
## 
## Call:
## glm(formula = lifeExp ~ continent + gdpPercap)
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       5.374e+01  9.284e-01  57.881  < 2e-16 ***
## continentAmericas 1.606e+01  1.662e+00   9.663  < 2e-16 ***
## continentAsia     1.267e+01  1.557e+00   8.137 2.27e-13 ***
## continentEurope   1.523e+01  1.956e+00   7.786 1.57e-12 ***
## continentOceania  1.665e+01  4.974e+00   3.348  0.00105 ** 
## gdpPercap         3.466e-04  5.674e-05   6.109 9.89e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 43.22029)
## 
##     Null deviance: 20552  on 141  degrees of freedom
## Residual deviance:  5878  on 136  degrees of freedom
## AIC: 945.66
## 
## Number of Fisher Scoring iterations: 2
# función básica
boxplot(gap$gdpPercap,
        main = "Boxplot - GDP per cápita",
        xlab = "GDP per cápita",
        ylab = "Valor",
        col = "pink",
        border = "red",
        horizontal = TRUE) #si se quiere vertical eliminar este argumento