#IMPORT DATA

df <- readxl::read_excel("C:/Users/USER/Downloads/Life Expectancy Data Clean.xlsx")

Daftar Variabel dan Tipe Variabel

## tibble [2,938 × 22] (S3: tbl_df/tbl/data.frame)
##  $ Country                        : chr [1:2938] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ Year                           : num [1:2938] 2015 2014 2013 2012 2011 ...
##  $ Status                         : chr [1:2938] "Developing" "Developing" "Developing" "Developing" ...
##  $ Life expectancy                : num [1:2938] 65 59.9 59.9 59.5 59.2 58.8 58.6 58.1 57.5 57.3 ...
##  $ Adult Mortality                : num [1:2938] 263 271 268 272 275 279 281 287 295 295 ...
##  $ infant deaths                  : num [1:2938] 62 64 66 69 71 74 77 80 82 84 ...
##  $ Alcohol                        : num [1:2938] 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.03 0.02 0.03 ...
##  $ percentage expenditure         : num [1:2938] 71.3 73.5 73.2 78.2 7.1 ...
##  $ Hepatitis B                    : num [1:2938] 65 62 64 67 68 66 63 64 63 64 ...
##  $ Measles                        : num [1:2938] 1154 492 430 2787 3013 ...
##  $ BMI                            : num [1:2938] 19.1 18.6 18.1 17.6 17.2 16.7 16.2 15.7 15.2 14.7 ...
##  $ under-five deaths              : num [1:2938] 83 86 89 93 97 102 106 110 113 116 ...
##  $ Polio                          : num [1:2938] 6 58 62 67 68 66 63 64 63 58 ...
##  $ Total expenditure              : num [1:2938] 8.16 8.18 8.13 8.52 7.87 9.2 9.42 8.33 6.73 7.43 ...
##  $ Diphtheria                     : num [1:2938] 65 62 64 67 68 66 63 64 63 58 ...
##  $ HIV/AIDS                       : num [1:2938] 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
##  $ GDP                            : num [1:2938] 584.3 612.7 631.7 670 63.5 ...
##  $ Population                     : num [1:2938] 33736494 327582 31731688 3696958 2978599 ...
##  $ thinness  1-19 years           : num [1:2938] 17.2 17.5 17.7 17.9 18.2 18.4 18.6 18.8 19 19.2 ...
##  $ thinness 5-9 years             : num [1:2938] 17.3 17.5 17.7 18 18.2 18.4 18.7 18.9 19.1 19.3 ...
##  $ Income composition of resources: num [1:2938] 0.479 0.476 0.47 0.463 0.454 0.448 0.434 0.433 0.415 0.405 ...
##  $ Schooling                      : num [1:2938] 10.1 10 9.9 9.8 9.5 9.2 8.9 8.7 8.4 8.1 ...

#Statistik Deskriptif dan Karakteristik #Statistika Deskriptif umum

summary(df)
##    Country               Year         Status          Life expectancy
##  Length:2938        Min.   :2000   Length:2938        Min.   :36.30  
##  Class :character   1st Qu.:2004   Class :character   1st Qu.:63.10  
##  Mode  :character   Median :2008   Mode  :character   Median :72.10  
##                     Mean   :2008                      Mean   :69.22  
##                     3rd Qu.:2012                      3rd Qu.:75.70  
##                     Max.   :2015                      Max.   :89.00  
##                                                       NA's   :10     
##  Adult Mortality infant deaths       Alcohol        percentage expenditure
##  Min.   :  1.0   Min.   :   0.0   Min.   : 0.0100   Min.   :    0.000     
##  1st Qu.: 74.0   1st Qu.:   0.0   1st Qu.: 0.8775   1st Qu.:    4.685     
##  Median :144.0   Median :   3.0   Median : 3.7550   Median :   64.913     
##  Mean   :164.8   Mean   :  30.3   Mean   : 4.6029   Mean   :  738.251     
##  3rd Qu.:228.0   3rd Qu.:  22.0   3rd Qu.: 7.7025   3rd Qu.:  441.534     
##  Max.   :723.0   Max.   :1800.0   Max.   :17.8700   Max.   :19479.912     
##  NA's   :10                       NA's   :194                             
##   Hepatitis B       Measles              BMI        under-five deaths
##  Min.   : 1.00   Min.   :     0.0   Min.   : 1.00   Min.   :   0.00  
##  1st Qu.:77.00   1st Qu.:     0.0   1st Qu.:19.30   1st Qu.:   0.00  
##  Median :92.00   Median :    17.0   Median :43.50   Median :   4.00  
##  Mean   :80.94   Mean   :  2419.6   Mean   :38.32   Mean   :  42.04  
##  3rd Qu.:97.00   3rd Qu.:   360.2   3rd Qu.:56.20   3rd Qu.:  28.00  
##  Max.   :99.00   Max.   :212183.0   Max.   :87.30   Max.   :2500.00  
##  NA's   :553                        NA's   :34                       
##      Polio       Total expenditure   Diphtheria       HIV/AIDS     
##  Min.   : 3.00   Min.   : 0.370    Min.   : 2.00   Min.   : 0.100  
##  1st Qu.:78.00   1st Qu.: 4.260    1st Qu.:78.00   1st Qu.: 0.100  
##  Median :93.00   Median : 5.755    Median :93.00   Median : 0.100  
##  Mean   :82.55   Mean   : 5.938    Mean   :82.32   Mean   : 1.742  
##  3rd Qu.:97.00   3rd Qu.: 7.492    3rd Qu.:97.00   3rd Qu.: 0.800  
##  Max.   :99.00   Max.   :17.600    Max.   :99.00   Max.   :50.600  
##  NA's   :19      NA's   :226       NA's   :19                      
##       GDP              Population        thinness  1-19 years
##  Min.   :     1.68   Min.   :3.400e+01   Min.   : 0.10       
##  1st Qu.:   463.94   1st Qu.:1.958e+05   1st Qu.: 1.60       
##  Median :  1766.95   Median :1.387e+06   Median : 3.30       
##  Mean   :  7483.16   Mean   :1.275e+07   Mean   : 4.84       
##  3rd Qu.:  5910.81   3rd Qu.:7.420e+06   3rd Qu.: 7.20       
##  Max.   :119172.74   Max.   :1.294e+09   Max.   :27.70       
##  NA's   :448         NA's   :652         NA's   :34          
##  thinness 5-9 years Income composition of resources   Schooling    
##  Min.   : 0.10      Min.   :0.0000                  Min.   : 0.00  
##  1st Qu.: 1.50      1st Qu.:0.4930                  1st Qu.:10.10  
##  Median : 3.30      Median :0.6770                  Median :12.30  
##  Mean   : 4.87      Mean   :0.6276                  Mean   :11.99  
##  3rd Qu.: 7.20      3rd Qu.:0.7790                  3rd Qu.:14.30  
##  Max.   :28.60      Max.   :0.9480                  Max.   :20.70  
##  NA's   :34         NA's   :167                     NA's   :163

#Statistik Deskriptif variabel numerik

numeric_vars <- df[, sapply(df, is.numeric)]
describe(numeric_vars)
##                                 vars    n        mean          sd     median
## Year                               1 2938     2007.52        4.61    2008.00
## Life expectancy                    2 2928       69.22        9.52      72.10
## Adult Mortality                    3 2928      164.80      124.29     144.00
## infant deaths                      4 2938       30.30      117.93       3.00
## Alcohol                            5 2744        4.60        4.05       3.76
## percentage expenditure             6 2938      738.25     1987.91      64.91
## Hepatitis B                        7 2385       80.94       25.07      92.00
## Measles                            8 2938     2419.59    11467.27      17.00
## BMI                                9 2904       38.32       20.04      43.50
## under-five deaths                 10 2938       42.04      160.45       4.00
## Polio                             11 2919       82.55       23.43      93.00
## Total expenditure                 12 2712        5.94        2.50       5.76
## Diphtheria                        13 2919       82.32       23.72      93.00
## HIV/AIDS                          14 2938        1.74        5.08       0.10
## GDP                               15 2490     7483.16    14270.17    1766.95
## Population                        16 2286 12753375.12 61012096.51 1386542.00
## thinness  1-19 years              17 2904        4.84        4.42       3.30
## thinness 5-9 years                18 2904        4.87        4.51       3.30
## Income composition of resources   19 2771        0.63        0.21       0.68
## Schooling                         20 2775       11.99        3.36      12.30
##                                    trimmed        mad     min          max
## Year                               2007.52       5.93 2000.00 2.015000e+03
## Life expectancy                      69.91       8.60   36.30 8.900000e+01
## Adult Mortality                     150.51     112.68    1.00 7.230000e+02
## infant deaths                        10.20       4.45    0.00 1.800000e+03
## Alcohol                               4.23       4.81    0.01 1.787000e+01
## percentage expenditure              230.74      96.24    0.00 1.947991e+04
## Hepatitis B                          86.89       8.90    1.00 9.900000e+01
## Measles                             286.08      25.20    0.00 2.121830e+05
## BMI                                  39.05      24.17    1.00 8.730000e+01
## under-five deaths                    14.15       5.93    0.00 2.500000e+03
## Polio                                88.05       8.90    3.00 9.900000e+01
## Total expenditure                     5.85       2.36    0.37 1.760000e+01
## Diphtheria                           87.99       8.90    2.00 9.900000e+01
## HIV/AIDS                              0.54       0.00    0.10 5.060000e+01
## GDP                                3751.73    2360.98    1.68 1.191727e+05
## Population                      3953693.58 2012347.06   34.00 1.293859e+09
## thinness  1-19 years                  4.14       3.41    0.10 2.770000e+01
## thinness 5-9 years                    4.15       3.41    0.10 2.860000e+01
## Income composition of resources       0.65       0.19    0.00 9.500000e-01
## Schooling                            12.17       3.11    0.00 2.070000e+01
##                                        range  skew kurtosis         se
## Year                            1.500000e+01 -0.01    -1.21       0.09
## Life expectancy                 5.270000e+01 -0.64    -0.24       0.18
## Adult Mortality                 7.220000e+02  1.17     1.74       2.30
## infant deaths                   1.800000e+03  9.78   115.76       2.18
## Alcohol                         1.786000e+01  0.59    -0.81       0.08
## percentage expenditure          1.947991e+04  4.65    26.51      36.68
## Hepatitis B                     9.800000e+01 -1.93     2.76       0.51
## Measles                         2.121830e+05  9.43   114.58     211.56
## BMI                             8.630000e+01 -0.22    -1.29       0.37
## under-five deaths               2.500000e+03  9.49   109.49       2.96
## Polio                           9.600000e+01 -2.10     3.76       0.43
## Total expenditure               1.723000e+01  0.62     1.15       0.05
## Diphtheria                      9.700000e+01 -2.07     3.55       0.44
## HIV/AIDS                        5.050000e+01  5.39    34.80       0.09
## GDP                             1.191711e+05  3.20    12.29     285.98
## Population                      1.293859e+09 15.90   297.09 1276079.80
## thinness  1-19 years            2.760000e+01  1.71     3.96       0.08
## thinness 5-9 years              2.850000e+01  1.78     4.34       0.08
## Income composition of resources 9.500000e-01 -1.14     1.38       0.00
## Schooling                       2.070000e+01 -0.60     0.88       0.06

Karakteristik data

#Melihat missing value:

colSums(is.na(df))
##                         Country                            Year 
##                               0                               0 
##                          Status                 Life expectancy 
##                               0                              10 
##                 Adult Mortality                   infant deaths 
##                              10                               0 
##                         Alcohol          percentage expenditure 
##                             194                               0 
##                     Hepatitis B                         Measles 
##                             553                               0 
##                             BMI               under-five deaths 
##                              34                               0 
##                           Polio               Total expenditure 
##                              19                             226 
##                      Diphtheria                        HIV/AIDS 
##                              19                               0 
##                             GDP                      Population 
##                             448                             652 
##            thinness  1-19 years              thinness 5-9 years 
##                              34                              34 
## Income composition of resources                       Schooling 
##                             167                             163

#Pemeriksaan skewness/kurtosis:

describe(numeric_vars)[, c("skew", "kurtosis")]
##                                  skew kurtosis
## Year                            -0.01    -1.21
## Life expectancy                 -0.64    -0.24
## Adult Mortality                  1.17     1.74
## infant deaths                    9.78   115.76
## Alcohol                          0.59    -0.81
## percentage expenditure           4.65    26.51
## Hepatitis B                     -1.93     2.76
## Measles                          9.43   114.58
## BMI                             -0.22    -1.29
## under-five deaths                9.49   109.49
## Polio                           -2.10     3.76
## Total expenditure                0.62     1.15
## Diphtheria                      -2.07     3.55
## HIV/AIDS                         5.39    34.80
## GDP                              3.20    12.29
## Population                      15.90   297.09
## thinness  1-19 years             1.71     3.96
## thinness 5-9 years               1.78     4.34
## Income composition of resources -1.14     1.38
## Schooling                       -0.60     0.88

#Korelasi antar variabel numerik:

cor_matrix <- cor(numeric_vars, use = "pairwise.complete.obs")
cor_matrix[1:10, 1:10]   # hanya menampilkan sebagian
##                               Year Life expectancy Adult Mortality
## Year                    1.00000000       0.1700330     -0.07905159
## Life expectancy         0.17003302       1.0000000     -0.69635931
## Adult Mortality        -0.07905159      -0.6963593      1.00000000
## infant deaths          -0.03741493      -0.1965572      0.07875601
## Alcohol                -0.05298978       0.4048768     -0.19584820
## percentage expenditure  0.03139998       0.3818635     -0.24285953
## Hepatitis B             0.10433334       0.2567619     -0.16247633
## Measles                -0.08249298      -0.1575858      0.03117641
## BMI                     0.10897365       0.5676935     -0.38701678
## under-five deaths      -0.04293699      -0.2225291      0.09414613
##                        infant deaths     Alcohol percentage expenditure
## Year                     -0.03741493 -0.05298978             0.03139998
## Life expectancy          -0.19655718  0.40487676             0.38186350
## Adult Mortality           0.07875601 -0.19584820            -0.24285953
## infant deaths             1.00000000 -0.11563768            -0.08561222
## Alcohol                  -0.11563768  1.00000000             0.34128531
## percentage expenditure   -0.08561222  0.34128531             1.00000000
## Hepatitis B              -0.22356628  0.08754871             0.01627369
## Measles                   0.50112834 -0.05182667            -0.05659568
## BMI                      -0.22727889  0.33040846             0.22869975
## under-five deaths         0.99662888 -0.11237040            -0.08785231
##                        Hepatitis B     Measles        BMI under-five deaths
## Year                    0.10433334 -0.08249298  0.1089736       -0.04293699
## Life expectancy         0.25676195 -0.15758580  0.5676935       -0.22252912
## Adult Mortality        -0.16247633  0.03117641 -0.3870168        0.09414613
## infant deaths          -0.22356628  0.50112834 -0.2272789        0.99662888
## Alcohol                 0.08754871 -0.05182667  0.3304085       -0.11237040
## percentage expenditure  0.01627369 -0.05659568  0.2286998       -0.08785231
## Hepatitis B             1.00000000 -0.12052937  0.1503795       -0.23312625
## Measles                -0.12052937  1.00000000 -0.1759771        0.50780871
## BMI                     0.15037953 -0.17597706  1.0000000       -0.23766852
## under-five deaths      -0.23312625  0.50780871 -0.2376685        1.00000000

#cek variabel prediktor

# Variabel respon
Y <- "Life expectancy"

# Variabel predictor
X <- names(df)[names(df) != "Life expectancy"]
X
##  [1] "Country"                         "Year"                           
##  [3] "Status"                          "Adult Mortality"                
##  [5] "infant deaths"                   "Alcohol"                        
##  [7] "percentage expenditure"          "Hepatitis B"                    
##  [9] "Measles"                         "BMI"                            
## [11] "under-five deaths"               "Polio"                          
## [13] "Total expenditure"               "Diphtheria"                     
## [15] "HIV/AIDS"                        "GDP"                            
## [17] "Population"                      "thinness  1-19 years"           
## [19] "thinness 5-9 years"              "Income composition of resources"
## [21] "Schooling"

##Cek Multikolinearitas (VIF) .

numeric_vars <- df %>%
select(where(is.numeric))

names(numeric_vars)
##  [1] "Year"                            "Life expectancy"                
##  [3] "Adult Mortality"                 "infant deaths"                  
##  [5] "Alcohol"                         "percentage expenditure"         
##  [7] "Hepatitis B"                     "Measles"                        
##  [9] "BMI"                             "under-five deaths"              
## [11] "Polio"                           "Total expenditure"              
## [13] "Diphtheria"                      "HIV/AIDS"                       
## [15] "GDP"                             "Population"                     
## [17] "thinness  1-19 years"            "thinness 5-9 years"             
## [19] "Income composition of resources" "Schooling"
model_vif <- lm(`Life expectancy` ~ ., data = numeric_vars)

vif(model_vif)
##                              Year                 `Adult Mortality` 
##                          1.157920                          1.809171 
##                   `infant deaths`                           Alcohol 
##                        213.609554                          2.067310 
##          `percentage expenditure`                     `Hepatitis B` 
##                         12.904426                          1.680406 
##                           Measles                               BMI 
##                          1.516630                          1.802986 
##               `under-five deaths`                             Polio 
##                        203.591034                          1.722414 
##               `Total expenditure`                        Diphtheria 
##                          1.124370                          2.094307 
##                        `HIV/AIDS`                               GDP 
##                          1.500870                         13.649710 
##                        Population            `thinness  1-19 years` 
##                          1.943421                          7.606109 
##              `thinness 5-9 years` `Income composition of resources` 
##                          7.584832                          3.028945 
##                         Schooling 
##                          3.538093