Data Preparation

library(dplyr)
library(knitr)
library(tibble)
library(data.table)
library(corrplot)
library(VIM) 
# load data

test <- as.tibble(fread('test.csv', na.strings=c('-1','-1.0')))
## 
Read 0.0% of 892816 rows
Read 11.2% of 892816 rows
Read 22.4% of 892816 rows
Read 32.5% of 892816 rows
Read 43.7% of 892816 rows
Read 53.8% of 892816 rows
Read 63.8% of 892816 rows
Read 73.9% of 892816 rows
Read 85.1% of 892816 rows
Read 96.3% of 892816 rows
Read 892816 rows and 58 (of 58) columns from 0.160 GB file in 00:00:13
train <- as.tibble(fread('train.csv', na.strings=c('-1','-1.0')))
## 
Read 15.1% of 595212 rows
Read 31.9% of 595212 rows
Read 48.7% of 595212 rows
Read 65.5% of 595212 rows
Read 82.3% of 595212 rows
Read 99.1% of 595212 rows
Read 595212 rows and 59 (of 59) columns from 0.108 GB file in 00:00:08
sample_submit <- as.tibble(fread('sample_submission.csv'))

Research question

Can a machine learning model be a better predictor of annual auto insurance claims than basic statistics?

Cases

The cases would be the driver

Data collection

**https://www.kaggle.com/c/porto-seguro-safe-driver-prediction**

Type of study

This is an obervational study

Data Source

**https://www.kaggle.com/headsortails/steering-wheel-of-fortune-porto-seguro-eda/data**

Response

The response variable is the insurance claim

Explanatory

The explanatory variables are all other variables except the response variables. There is a combination of numerical and categorical

Relevant summary statistics

Provide summary statistics relevant to your research question. For example, if you’re comparing means across groups provide means, SDs, sample sizes of each group. This step requires the use of R, hence a code chunk is provided below. Insert more code chunks as needed.

summary(test)
##        id            ps_ind_01     ps_ind_02_cat     ps_ind_03     
##  Min.   :      0   Min.   :0.000   Min.   :1.000   Min.   : 0.000  
##  1st Qu.: 372022   1st Qu.:0.000   1st Qu.:1.000   1st Qu.: 2.000  
##  Median : 744307   Median :1.000   Median :1.000   Median : 4.000  
##  Mean   : 744154   Mean   :1.902   Mean   :1.359   Mean   : 4.414  
##  3rd Qu.:1116309   3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.: 6.000  
##  Max.   :1488026   Max.   :7.000   Max.   :4.000   Max.   :11.000  
##                                    NA's   :307                     
##  ps_ind_04_cat    ps_ind_05_cat   ps_ind_06_bin    ps_ind_07_bin   
##  Min.   :0.0000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.000   Median :0.0000   Median :0.0000  
##  Mean   :0.4176   Mean   :0.422   Mean   :0.3932   Mean   :0.2572  
##  3rd Qu.:1.0000   3rd Qu.:0.000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :6.000   Max.   :1.0000   Max.   :1.0000  
##  NA's   :145      NA's   :8710                                     
##  ps_ind_08_bin    ps_ind_09_bin    ps_ind_10_bin      ps_ind_11_bin     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000000   1st Qu.:0.000000  
##  Median :0.0000   Median :0.0000   Median :0.000000   Median :0.000000  
##  Mean   :0.1637   Mean   :0.1859   Mean   :0.000373   Mean   :0.001595  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.000000   3rd Qu.:0.000000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.000000   Max.   :1.000000  
##                                                                         
##  ps_ind_12_bin      ps_ind_13_bin        ps_ind_14         ps_ind_15     
##  Min.   :0.000000   Min.   :0.000000   Min.   :0.00000   Min.   : 0.000  
##  1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.: 5.000  
##  Median :0.000000   Median :0.000000   Median :0.00000   Median : 7.000  
##  Mean   :0.009376   Mean   :0.001039   Mean   :0.01238   Mean   : 7.297  
##  3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:10.000  
##  Max.   :1.000000   Max.   :1.000000   Max.   :4.00000   Max.   :13.000  
##                                                                          
##  ps_ind_16_bin    ps_ind_17_bin    ps_ind_18_bin     ps_reg_01     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.4000  
##  Median :1.0000   Median :0.0000   Median :0.000   Median :0.7000  
##  Mean   :0.6606   Mean   :0.1204   Mean   :0.155   Mean   :0.6111  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.000   3rd Qu.:0.9000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.000   Max.   :0.9000  
##                                                                    
##    ps_reg_02        ps_reg_03      ps_car_01_cat    ps_car_02_cat 
##  Min.   :0.0000   Min.   :0.06     Min.   : 0.000   Min.   :0.00  
##  1st Qu.:0.2000   1st Qu.:0.63     1st Qu.: 7.000   1st Qu.:1.00  
##  Median :0.3000   Median :0.80     Median : 7.000   Median :1.00  
##  Mean   :0.4399   Mean   :0.89     Mean   : 8.294   Mean   :0.83  
##  3rd Qu.:0.6000   3rd Qu.:1.09     3rd Qu.:11.000   3rd Qu.:1.00  
##  Max.   :1.8000   Max.   :4.42     Max.   :11.000   Max.   :1.00  
##                   NA's   :161684   NA's   :160      NA's   :5     
##  ps_car_03_cat    ps_car_04_cat    ps_car_05_cat    ps_car_06_cat   
##  Min.   :0.0      Min.   :0.0000   Min.   :0.0      Min.   : 0.000  
##  1st Qu.:0.0      1st Qu.:0.0000   1st Qu.:0.0      1st Qu.: 1.000  
##  Median :1.0      Median :0.0000   Median :1.0      Median : 7.000  
##  Mean   :0.6      Mean   :0.7258   Mean   :0.5      Mean   : 6.564  
##  3rd Qu.:1.0      3rd Qu.:0.0000   3rd Qu.:1.0      3rd Qu.:11.000  
##  Max.   :1.0      Max.   :9.0000   Max.   :1.0      Max.   :17.000  
##  NA's   :616911                    NA's   :400359                   
##  ps_car_07_cat   ps_car_08_cat    ps_car_09_cat  ps_car_10_cat   
##  Min.   :0.000   Min.   :0.0000   Min.   :0.00   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.:1.0000   1st Qu.:0.00   1st Qu.:1.0000  
##  Median :1.000   Median :1.0000   Median :2.00   Median :1.0000  
##  Mean   :0.948   Mean   :0.8323   Mean   :1.33   Mean   :0.9921  
##  3rd Qu.:1.000   3rd Qu.:1.0000   3rd Qu.:2.00   3rd Qu.:1.0000  
##  Max.   :1.000   Max.   :1.0000   Max.   :4.00   Max.   :2.0000  
##  NA's   :17331                    NA's   :877                    
##  ps_car_11_cat      ps_car_11       ps_car_12        ps_car_13     
##  Min.   :  1.00   Min.   :0.000   Min.   :0.1414   Min.   :0.2758  
##  1st Qu.: 32.00   1st Qu.:2.000   1st Qu.:0.3162   1st Qu.:0.6712  
##  Median : 65.00   Median :3.000   Median :0.3742   Median :0.7661  
##  Mean   : 62.28   Mean   :2.347   Mean   :0.3800   Mean   :0.8136  
##  3rd Qu.: 94.00   3rd Qu.:3.000   3rd Qu.:0.4000   3rd Qu.:0.9061  
##  Max.   :104.00   Max.   :3.000   Max.   :1.2649   Max.   :4.0313  
##                   NA's   :1                                        
##    ps_car_14       ps_car_15       ps_calc_01       ps_calc_02    
##  Min.   :0.11    Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.35    1st Qu.:2.828   1st Qu.:0.2000   1st Qu.:0.2000  
##  Median :0.37    Median :3.317   Median :0.4000   Median :0.5000  
##  Mean   :0.37    Mean   :3.068   Mean   :0.4496   Mean   :0.4505  
##  3rd Qu.:0.40    3rd Qu.:3.606   3rd Qu.:0.7000   3rd Qu.:0.7000  
##  Max.   :0.64    Max.   :3.742   Max.   :0.9000   Max.   :0.9000  
##  NA's   :63805                                                    
##    ps_calc_03       ps_calc_04      ps_calc_05      ps_calc_06    
##  Min.   :0.0000   Min.   :0.000   Min.   :0.000   Min.   : 1.000  
##  1st Qu.:0.2000   1st Qu.:2.000   1st Qu.:1.000   1st Qu.: 7.000  
##  Median :0.4000   Median :2.000   Median :2.000   Median : 8.000  
##  Mean   :0.4501   Mean   :2.371   Mean   :1.885   Mean   : 7.688  
##  3rd Qu.:0.7000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.: 9.000  
##  Max.   :0.9000   Max.   :5.000   Max.   :6.000   Max.   :10.000  
##                                                                   
##    ps_calc_07     ps_calc_08       ps_calc_09      ps_calc_10    
##  Min.   :0.00   Min.   : 1.000   Min.   :0.000   Min.   : 0.000  
##  1st Qu.:2.00   1st Qu.: 8.000   1st Qu.:1.000   1st Qu.: 6.000  
##  Median :3.00   Median : 9.000   Median :2.000   Median : 8.000  
##  Mean   :3.01   Mean   : 9.226   Mean   :2.339   Mean   : 8.443  
##  3rd Qu.:4.00   3rd Qu.:10.000   3rd Qu.:3.000   3rd Qu.:10.000  
##  Max.   :9.00   Max.   :12.000   Max.   :7.000   Max.   :25.000  
##                                                                  
##    ps_calc_11       ps_calc_12      ps_calc_13       ps_calc_14   
##  Min.   : 0.000   Min.   : 0.00   Min.   : 0.000   Min.   : 0.00  
##  1st Qu.: 4.000   1st Qu.: 1.00   1st Qu.: 2.000   1st Qu.: 6.00  
##  Median : 5.000   Median : 1.00   Median : 3.000   Median : 7.00  
##  Mean   : 5.438   Mean   : 1.44   Mean   : 2.875   Mean   : 7.54  
##  3rd Qu.: 7.000   3rd Qu.: 2.00   3rd Qu.: 4.000   3rd Qu.: 9.00  
##  Max.   :20.000   Max.   :11.00   Max.   :15.000   Max.   :28.00  
##                                                                   
##  ps_calc_15_bin   ps_calc_16_bin   ps_calc_17_bin   ps_calc_18_bin  
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :1.0000   Median :1.0000   Median :0.0000  
##  Mean   :0.1237   Mean   :0.6278   Mean   :0.5547   Mean   :0.2878  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##                                                                     
##  ps_calc_19_bin   ps_calc_20_bin  
##  Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000  
##  Mean   :0.3493   Mean   :0.1524  
##  3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000  
## 
summary(train)
##        id              target          ps_ind_01   ps_ind_02_cat 
##  Min.   :      7   Min.   :0.00000   Min.   :0.0   Min.   :1.00  
##  1st Qu.: 371992   1st Qu.:0.00000   1st Qu.:0.0   1st Qu.:1.00  
##  Median : 743548   Median :0.00000   Median :1.0   Median :1.00  
##  Mean   : 743804   Mean   :0.03645   Mean   :1.9   Mean   :1.36  
##  3rd Qu.:1115549   3rd Qu.:0.00000   3rd Qu.:3.0   3rd Qu.:2.00  
##  Max.   :1488027   Max.   :1.00000   Max.   :7.0   Max.   :4.00  
##                                                    NA's   :216   
##    ps_ind_03      ps_ind_04_cat   ps_ind_05_cat   ps_ind_06_bin   
##  Min.   : 0.000   Min.   :0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.: 2.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median : 4.000   Median :0.000   Median :0.000   Median :0.0000  
##  Mean   : 4.423   Mean   :0.417   Mean   :0.419   Mean   :0.3937  
##  3rd Qu.: 6.000   3rd Qu.:1.000   3rd Qu.:0.000   3rd Qu.:1.0000  
##  Max.   :11.000   Max.   :1.000   Max.   :6.000   Max.   :1.0000  
##                   NA's   :83      NA's   :5809                    
##  ps_ind_07_bin   ps_ind_08_bin    ps_ind_09_bin    ps_ind_10_bin     
##  Min.   :0.000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000000  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000000  
##  Median :0.000   Median :0.0000   Median :0.0000   Median :0.000000  
##  Mean   :0.257   Mean   :0.1639   Mean   :0.1853   Mean   :0.000373  
##  3rd Qu.:1.000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.000000  
##  Max.   :1.000   Max.   :1.0000   Max.   :1.0000   Max.   :1.000000  
##                                                                      
##  ps_ind_11_bin      ps_ind_12_bin      ps_ind_13_bin      
##  Min.   :0.000000   Min.   :0.000000   Min.   :0.0000000  
##  1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.0000000  
##  Median :0.000000   Median :0.000000   Median :0.0000000  
##  Mean   :0.001692   Mean   :0.009439   Mean   :0.0009476  
##  3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.0000000  
##  Max.   :1.000000   Max.   :1.000000   Max.   :1.0000000  
##                                                           
##    ps_ind_14         ps_ind_15    ps_ind_16_bin    ps_ind_17_bin   
##  Min.   :0.00000   Min.   : 0.0   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.: 5.0   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median : 7.0   Median :1.0000   Median :0.0000  
##  Mean   :0.01245   Mean   : 7.3   Mean   :0.6608   Mean   :0.1211  
##  3rd Qu.:0.00000   3rd Qu.:10.0   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :4.00000   Max.   :13.0   Max.   :1.0000   Max.   :1.0000  
##                                                                    
##  ps_ind_18_bin      ps_reg_01       ps_reg_02        ps_reg_03     
##  Min.   :0.0000   Min.   :0.000   Min.   :0.0000   Min.   :0.06    
##  1st Qu.:0.0000   1st Qu.:0.400   1st Qu.:0.2000   1st Qu.:0.63    
##  Median :0.0000   Median :0.700   Median :0.3000   Median :0.80    
##  Mean   :0.1534   Mean   :0.611   Mean   :0.4392   Mean   :0.89    
##  3rd Qu.:0.0000   3rd Qu.:0.900   3rd Qu.:0.6000   3rd Qu.:1.08    
##  Max.   :1.0000   Max.   :0.900   Max.   :1.8000   Max.   :4.04    
##                                                    NA's   :107772  
##  ps_car_01_cat    ps_car_02_cat    ps_car_03_cat    ps_car_04_cat   
##  Min.   : 0.000   Min.   :0.0000   Min.   :0.0      Min.   :0.0000  
##  1st Qu.: 7.000   1st Qu.:1.0000   1st Qu.:0.0      1st Qu.:0.0000  
##  Median : 7.000   Median :1.0000   Median :1.0      Median :0.0000  
##  Mean   : 8.298   Mean   :0.8299   Mean   :0.6      Mean   :0.7252  
##  3rd Qu.:11.000   3rd Qu.:1.0000   3rd Qu.:1.0      3rd Qu.:0.0000  
##  Max.   :11.000   Max.   :1.0000   Max.   :1.0      Max.   :9.0000  
##  NA's   :107      NA's   :5        NA's   :411231                   
##  ps_car_05_cat    ps_car_06_cat    ps_car_07_cat   ps_car_08_cat   
##  Min.   :0.00     Min.   : 0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.00     1st Qu.: 1.000   1st Qu.:1.000   1st Qu.:1.0000  
##  Median :1.00     Median : 7.000   Median :1.000   Median :1.0000  
##  Mean   :0.53     Mean   : 6.555   Mean   :0.948   Mean   :0.8321  
##  3rd Qu.:1.00     3rd Qu.:11.000   3rd Qu.:1.000   3rd Qu.:1.0000  
##  Max.   :1.00     Max.   :17.000   Max.   :1.000   Max.   :1.0000  
##  NA's   :266551                    NA's   :11489                   
##  ps_car_09_cat   ps_car_10_cat    ps_car_11_cat      ps_car_11    
##  Min.   :0.000   Min.   :0.0000   Min.   :  1.00   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:1.0000   1st Qu.: 32.00   1st Qu.:2.000  
##  Median :2.000   Median :1.0000   Median : 65.00   Median :3.000  
##  Mean   :1.331   Mean   :0.9921   Mean   : 62.22   Mean   :2.346  
##  3rd Qu.:2.000   3rd Qu.:1.0000   3rd Qu.: 93.00   3rd Qu.:3.000  
##  Max.   :4.000   Max.   :2.0000   Max.   :104.00   Max.   :3.000  
##  NA's   :569                                       NA's   :5      
##    ps_car_12        ps_car_13        ps_car_14       ps_car_15    
##  Min.   :0.1000   Min.   :0.2506   Min.   :0.11    Min.   :0.000  
##  1st Qu.:0.3162   1st Qu.:0.6709   1st Qu.:0.35    1st Qu.:2.828  
##  Median :0.3742   Median :0.7658   Median :0.37    Median :3.317  
##  Mean   :0.3799   Mean   :0.8133   Mean   :0.37    Mean   :3.066  
##  3rd Qu.:0.4000   3rd Qu.:0.9062   3rd Qu.:0.40    3rd Qu.:3.606  
##  Max.   :1.2649   Max.   :3.7206   Max.   :0.64    Max.   :3.742  
##  NA's   :1                         NA's   :42620                  
##    ps_calc_01       ps_calc_02       ps_calc_03       ps_calc_04   
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.2000   1st Qu.:0.2000   1st Qu.:0.2000   1st Qu.:2.000  
##  Median :0.5000   Median :0.4000   Median :0.5000   Median :2.000  
##  Mean   :0.4498   Mean   :0.4496   Mean   :0.4498   Mean   :2.372  
##  3rd Qu.:0.7000   3rd Qu.:0.7000   3rd Qu.:0.7000   3rd Qu.:3.000  
##  Max.   :0.9000   Max.   :0.9000   Max.   :0.9000   Max.   :5.000  
##                                                                    
##    ps_calc_05      ps_calc_06       ps_calc_07      ps_calc_08    
##  Min.   :0.000   Min.   : 0.000   Min.   :0.000   Min.   : 2.000  
##  1st Qu.:1.000   1st Qu.: 7.000   1st Qu.:2.000   1st Qu.: 8.000  
##  Median :2.000   Median : 8.000   Median :3.000   Median : 9.000  
##  Mean   :1.886   Mean   : 7.689   Mean   :3.006   Mean   : 9.226  
##  3rd Qu.:3.000   3rd Qu.: 9.000   3rd Qu.:4.000   3rd Qu.:10.000  
##  Max.   :6.000   Max.   :10.000   Max.   :9.000   Max.   :12.000  
##                                                                   
##    ps_calc_09      ps_calc_10       ps_calc_11       ps_calc_12    
##  Min.   :0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:1.000   1st Qu.: 6.000   1st Qu.: 4.000   1st Qu.: 1.000  
##  Median :2.000   Median : 8.000   Median : 5.000   Median : 1.000  
##  Mean   :2.339   Mean   : 8.434   Mean   : 5.441   Mean   : 1.442  
##  3rd Qu.:3.000   3rd Qu.:10.000   3rd Qu.: 7.000   3rd Qu.: 2.000  
##  Max.   :7.000   Max.   :25.000   Max.   :19.000   Max.   :10.000  
##                                                                    
##    ps_calc_13       ps_calc_14     ps_calc_15_bin   ps_calc_16_bin  
##  Min.   : 0.000   Min.   : 0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 2.000   1st Qu.: 6.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median : 3.000   Median : 7.000   Median :0.0000   Median :1.0000  
##  Mean   : 2.872   Mean   : 7.539   Mean   :0.1224   Mean   :0.6278  
##  3rd Qu.: 4.000   3rd Qu.: 9.000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :13.000   Max.   :23.000   Max.   :1.0000   Max.   :1.0000  
##                                                                     
##  ps_calc_17_bin   ps_calc_18_bin   ps_calc_19_bin  ps_calc_20_bin  
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :1.0000   Median :0.0000   Median :0.000   Median :0.0000  
##  Mean   :0.5542   Mean   :0.2872   Mean   :0.349   Mean   :0.1533  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.000   Max.   :1.0000  
## 
# count the missing values
sum(is.na(test))
## [1] 1270295
sum(is.na(train))
## [1] 846458
train %>%
  select(which(colMeans(is.na(.)) > 0)) %>%
  aggr(prop = FALSE, combined = TRUE, numbers = TRUE, bars = FALSE, cex.axis = 0.7)
## Warning in plot.aggr(res, ...): not enough vertical space to display
## frequencies (too many combinations)

** ps_car_03 and ps_car_05 have the largest number of NAs.**

train %>%
  mutate_at(vars(ends_with("cat")), funs(as.integer)) %>%
  mutate_at(vars(ends_with("bin")), funs(as.integer)) %>%
  mutate(target = as.integer(target)) %>%
  cor(use="complete.obs") %>%
  corrplot(type="lower", tl.col = "black",  diag=FALSE)

This correlation plot compares the correlations between independent variables to see whether two variables will move together as one changes. This can be used as a starting point for analysis