Importación de la base

data <- read.csv('/var/home/diegob/Documents/Computer Science/R code/data_prac_2 1.csv')
data <- subset(data, select = -X)
head(data, 10)
##    edad      sexo    imc hijos fumador   region         clm
## 1    19  femenino   27.9     0     yes suroeste   16884.924
## 2    18 masculino  33.77     1      no  sureste   1725.5523
## 3    28 masculino     33     3      no  sureste    4449.462
## 4    33 masculino 22.705     0      no noroeste 21984.47061
## 5       masculino  28.88     0      no noroeste   3866.8552
## 6    31  femenino  25.74     0      no  sureste   3756.6216
## 7    46  femenino  33.44     1      no  sureste   8240.5896
## 8    37  femenino  27.74     3      no noroeste   7281.5056
## 9    37 masculino  29.83     2      no  noreste   6406.4107
## 10   60  femenino  25.84     0      no noroeste 28923.13692

Limpieza de la base

  1. Vamos a verificar primero la consistencia de los datos en cada columna, en caso de que existan valores diferentes, reemplazamos con NA Usamos la función distinct para extraer el rango de datos de cada columna e identificar lo que no pertenecen a la base ** ¿Por qué no usar which(is.na(dataCar), arr.ind = TRUE)? Por la heterogeneidad de datos, es diferente tener “null” a un NA
data %>% distinct(edad)
##    edad
## 1    19
## 2    18
## 3    28
## 4    33
## 5      
## 6    31
## 7    46
## 8    37
## 9    60
## 10   25
## 11   62
## 12   23
## 13   56
## 14   27
## 15   30
## 16   59
## 17   63
## 18   55
## 19   22
## 20   26
## 21   24
## 22   41
## 23   38
## 24   21
## 25   48
## 26   36
## 27   40
## 28   58
## 29   53
## 30   34
## 31   43
## 32   64
## 33   20
## 34   61
## 35   44
## 36   57
## 37   29
## 38   45
## 39   &&
## 40   35
## 41   54
## 42   52
## 43   49
## 44   47
## 45   32
## 46   51
## 47   42
## 48   39
## 49   50
## 50 null
data %>% distinct(sexo)
##        sexo
## 1  femenino
## 2 masculino
data %>% distinct(imc)
##        imc
## 1     27.9
## 2    33.77
## 3       33
## 4   22.705
## 5    28.88
## 6    25.74
## 7    33.44
## 8    27.74
## 9    29.83
## 10   25.84
## 11   26.22
## 12   26.29
## 13    34.4
## 14   39.82
## 15   42.13
## 16    24.6
## 17   30.78
## 18  23.845
## 19    40.3
## 20    35.3
## 21  36.005
## 22    32.4
## 23    34.1
## 24   31.92
## 25  28.025
## 26   27.72
## 27  23.085
## 28  32.775
## 29  17.385
## 30    36.3
## 31    35.6
## 32  26.315
## 33    28.6
## 34   28.31
## 35    36.4
## 36  20.425
## 37  32.965
## 38    20.8
## 39   36.67
## 40    39.9
## 41    26.6
## 42   36.63
## 43   21.78
## 44    30.8
## 45   37.05
## 46    37.3
## 47  38.665
## 48   34.77
## 49   24.53
## 50    35.2
## 51  35.625
## 52   33.63
## 53      28
## 54   34.43
## 55   28.69
## 56  36.955
## 57  31.825
## 58   31.68
## 59   22.88
## 60  37.335
## 61   27.36
## 62   33.66
## 63    24.7
## 64  25.935
## 65   22.42
## 66    28.9
## 67      $$
## 68   36.19
## 69   23.98
## 70   24.75
## 71    28.5
## 72    28.1
## 73   32.01
## 74    27.4
## 75   34.01
## 76   29.59
## 77   35.53
## 78  39.805
## 79  26.885
## 80  38.285
## 81   37.62
## 82   41.23
## 83    34.8
## 84  22.895
## 85   31.16
## 86    27.2
## 87   26.98
## 88   39.49
## 89  24.795
## 90    31.3
## 91   38.28
## 92   19.95
## 93    19.3
## 94    31.6
## 95   25.46
## 96  30.115
## 97   29.92
## 98    27.5
## 99    28.4
## 100 30.875
## 101  27.94
## 102  35.09
## 103   29.7
## 104  35.72
## 105 32.205
## 106 28.595
## 107  49.06
## 108  27.17
## 109  23.37
## 110   37.1
## 111  23.75
## 112 28.975
## 113  31.35
## 114 33.915
## 115 28.785
## 116   28.3
## 117   37.4
## 118 17.765
## 119   34.7
## 120 26.505
## 121  22.04
## 122   35.9
## 123 25.555
## 124  28.05
## 125 25.175
## 126   31.9
## 127     36
## 128  32.49
## 129   25.3
## 130 29.735
## 131  38.83
## 132 30.495
## 133  37.73
## 134  37.43
## 135  24.13
## 136 37.145
## 137  39.52
## 138  24.42
## 139  27.83
## 140  36.85
## 141   39.6
## 142   29.8
## 143  29.64
## 144 28.215
## 145     37
## 146 33.155
## 147 18.905
## 148  41.47
## 149   30.3
## 150  15.96
## 151 33.345
## 152   37.7
## 153 27.835
## 154   29.2
## 155  26.41
## 156  30.69
## 157 41.895
## 158   30.9
## 159   32.2
## 160  32.11
## 161  31.57
## 162   26.2
## 163  30.59
## 164   32.8
## 165  18.05
## 166  39.33
## 167  32.23
## 168 24.035
## 169  36.08
## 170   22.3
## 171   26.4
## 172   31.8
## 173  26.73
## 174   23.1
## 175  23.21
## 176   33.7
## 177  33.25
## 178  24.64
## 179  33.88
## 180  38.06
## 181  41.91
## 182 31.635
## 183 36.195
## 184   17.8
## 185  24.51
## 186  22.22
## 187  38.39
## 188  29.07
## 189 22.135
## 190   26.8
## 191  30.02
## 192  35.86
## 193   20.9
## 194  17.29
## 195  34.21
## 196 25.365
## 197  40.15
## 198 24.415
## 199   25.2
## 200  26.84
## 201  24.32
## 202  42.35
## 203   19.8
## 204 32.395
## 205   30.2
## 206       
## 207   34.2
## 208 27.455
## 209  27.55
## 210 20.615
## 211   24.3
## 212  31.79
## 213  21.56
## 214  28.12
## 215 40.565
## 216 27.645
## 217   31.2
## 218  26.62
## 219 36.765
## 220   33.4
## 221  45.54
## 222  28.82
## 223  22.99
## 224   27.7
## 225  34.39
## 226  22.61
## 227  37.51
## 228     38
## 229  33.33
## 230 34.865
## 231  33.06
## 232  35.97
## 233   31.4
## 234  25.27
## 235 40.945
## 236 34.105
## 237  36.48
## 238   33.8
## 239   36.7
## 240 36.385
## 241   34.5
## 242   32.3
## 243   27.6
## 244  29.26
## 245  35.75
## 246  23.18
## 247   25.6
## 248 35.245
## 249  43.89
## 250  20.79
## 251   30.5
## 252   21.7
## 253  21.89
## 254   null
## 255     &&
## 256 32.015
## 257   30.4
## 258  21.09
## 259  22.23
## 260   32.9
## 261  24.89
## 262  31.46
## 263 17.955
## 264 30.685
## 265  43.34
## 266  39.05
## 267 31.445
## 268 19.855
## 269  31.02
## 270   20.6
## 271  47.52
## 272   20.4
## 273  38.38
## 274  24.31
## 275   23.6
## 276  21.12
## 277  30.03
## 278  17.48
## 279 20.235
## 280 17.195
## 281   23.9
## 282  35.15
## 283  35.64
## 284   22.6
## 285  39.16
## 286 27.265
## 287 29.165
## 288 16.815
## 289   33.1
## 290   26.9
## 291  33.11
## 292  31.73
## 293  46.75
## 294  32.68
## 295   33.5
## 296  43.01
## 297  36.52
## 298 26.695
## 299  25.65
## 300   38.6
## 301   29.6
## 302   23.4
## 303  46.53
## 304  30.14
## 305     30
## 306 38.095
## 307  28.38
## 308   28.7
## 309  33.82
## 310  24.09
## 311  32.67
## 312   25.1
## 313  32.56
## 314 41.325
## 315   34.3
## 316 31.065
## 317  21.47
## 318  25.08
## 319   43.4
## 320   25.7
## 321  27.93
## 322   39.2
## 323  26.03
## 324  30.25
## 325  28.93
## 326   35.7
## 327  35.31
## 328     31
## 329  44.22
## 330  26.07
## 331   25.8
## 332 39.425
## 333  40.48
## 334   38.9
## 335  47.41
## 336  30.21
## 337 35.435
## 338   46.7
## 339   46.2
## 340   21.4
## 341   23.8
## 342  44.77
## 343  32.12
## 344   29.1
## 345  37.29
## 346  43.12
## 347  36.86
## 348 34.295
## 349 23.465
## 350  45.43
## 351  23.65
## 352  28.27
## 353  35.91
## 354     29
## 355  19.57
## 356  21.85
## 357  40.26
## 358 33.725
## 359  29.48
## 360   32.6
## 361 37.525
## 362 23.655
## 363   37.8
## 364  29.37
## 365     19
## 366   21.3
## 367  42.46
## 368  38.95
## 369   36.1
## 370  38.19
## 371   42.4
## 372  34.96
## 373  42.68
## 374  31.13
## 375  31.54
## 376  29.81
## 377 21.375
## 378  40.81
## 379   17.4
## 380   20.3
## 381 26.125
## 382  41.69
## 383   24.1
## 384   36.2
## 385 40.185
## 386  39.27
## 387  34.87
## 388 44.745
## 389 29.545
## 390  23.54
## 391  40.66
## 392   36.6
## 393   35.4
## 394 27.075
## 395 21.755
## 396  40.28
## 397   23.7
## 398   35.5
## 399  29.15
## 400     27
## 401 37.905
## 402  22.77
## 403   22.8
## 404  38.17
## 405  34.58
## 406   27.1
## 407   39.7
## 408 19.475
## 409   26.7
## 410  34.32
## 411   24.4
## 412  41.14
## 413 22.515
## 414   41.8
## 415  26.18
## 416  42.24
## 417  26.51
## 418 35.815
## 419  41.42
## 420 36.575
## 421  42.94
## 422  21.01
## 423 24.225
## 424  17.67
## 425   31.5
## 426   31.1
## 427  32.78
## 428  32.45
## 429  50.38
## 430   47.6
## 431 33.535
## 432   25.4
## 433   29.9
## 434   43.7
## 435  24.86
## 436   30.1
## 437   28.8
## 438   29.5
## 439   39.5
## 440  29.04
## 441  38.94
## 442     44
## 443 20.045
## 444  40.92
## 445   35.1
## 446 29.355
## 447 32.585
## 448  32.34
## 449   39.8
## 450 24.605
## 451  33.99
## 452   28.2
## 453     25
## 454   33.2
## 455   23.2
## 456   20.1
## 457   32.5
## 458  37.18
## 459  46.09
## 460  39.93
## 461   35.8
## 462 31.255
## 463 18.335
## 464   42.9
## 465 28.405
## 466  26.79
## 467 39.615
## 468   25.9
## 469 25.745
## 470  28.16
## 471  23.56
## 472   40.5
## 473  35.42
## 474 39.995
## 475 34.675
## 476  20.52
## 477 23.275
## 478  36.29
## 479   32.7
## 480  19.19
## 481 24.985
## 482  20.13
## 483  23.32
## 484  45.32
## 485   34.6
## 486 18.715
## 487 21.565
## 488     23
## 489  37.07
## 490  52.58
## 491 42.655
## 492  21.66
## 493     32
## 494   18.3
## 495  47.74
## 496   22.1
## 497 19.095
## 498  31.24
## 499 29.925
## 500  20.35
## 501  25.85
## 502  42.75
## 503   18.6
## 504  23.87
## 505   45.9
## 506   21.5
## 507 30.305
## 508  44.88
## 509   41.1
## 510  40.37
## 511  28.49
## 512  33.55
## 513 40.375
## 514  27.28
## 515  17.86
## 516   33.3
## 517  39.14
## 518 21.945
## 519  24.97
## 520  23.94
## 521 34.485
## 522   21.8
## 523   23.3
## 524  36.96
## 525  21.28
## 526   29.4
## 527   27.3
## 528   29.3
## 529   37.9
## 530 37.715
## 531  23.76
## 532  25.52
## 533  27.61
## 534  27.06
## 535   39.4
## 536   34.9
## 537     22
## 538  30.36
## 539   27.8
## 540  53.13
## 541  39.71
## 542  32.87
## 543   44.7
## 544  30.97
data %>% distinct(hijos)
##   hijos
## 1     0
## 2     1
## 3     3
## 4     2
## 5     5
## 6     4
data %>% distinct(fumador)
##   fumador
## 1     yes
## 2      no
data %>% distinct(region)
##     region
## 1 suroeste
## 2  sureste
## 3 noroeste
## 4  noreste
data %>% distinct(clm)
##              clm
## 1      16884.924
## 2      1725.5523
## 3       4449.462
## 4    21984.47061
## 5      3866.8552
## 6      3756.6216
## 7      8240.5896
## 8      7281.5056
## 9      6406.4107
## 10   28923.13692
## 11     2721.3208
## 12    27808.7251
## 13      1826.843
## 14    11090.7178
## 15    39611.7577
## 16      1837.237
## 17    10797.3362
## 18    2395.17155
## 19     10602.385
## 20     36837.467
## 21   13228.84695
## 22      4149.736
## 23      1137.011
## 24    37701.8768
## 25    6203.90175
## 26    14001.1338
## 27   14451.83515
## 28   12268.63225
## 29    2775.19215
## 30         38711
## 31     35585.576
## 32    2198.18985
## 33      4687.797
## 34    13770.0979
## 35   51194.55914
## 36    1625.43375
## 37   15612.19335
## 38        2302.3
## 39    39774.2763
## 40     48173.361
## 41      3046.062
## 42     4949.7587
## 43     6272.4772
## 44      6313.759
## 45     6079.6715
## 46   20630.28351
## 47    3393.35635
## 48     3556.9223
## 49    12629.8967
## 50     38709.176
## 51    2211.13075
## 52     3579.8287
## 53     23568.272
## 54    37742.5757
## 55     8059.6791
## 56   47496.49445
## 57   13607.36875
## 58    34303.1672
## 59    23244.7902
## 60    5989.52365
## 61     8606.2174
## 62     4504.6624
## 63   30166.61817
## 64    4133.64165
## 65    14711.7438
## 66      1743.214
## 67     14235.072
## 68    6389.37785
## 69     5920.1041
## 70    17663.1442
## 71    16577.7795
## 72      6799.458
## 73     11741.726
## 74    11946.6259
## 75      7726.854
## 76    11356.6609
## 77     3947.4131
## 78     1532.4697
## 79    2755.02095
## 80    6571.02435
## 81    4441.21315
## 82    7935.29115
## 83    37165.1638
## 84    11033.6617
## 85     39836.519
## 86   21098.55405
## 87    43578.9394
## 88     11073.176
## 89     8026.6666
## 90    11082.5772
## 91     2026.9741
## 92              
## 93    30184.9367
## 94     5729.0053
## 95     47291.055
## 96     3766.8838
## 97      12105.32
## 98    10226.2842
## 99    22412.6485
## 100    15820.699
## 101     6186.127
## 102    3645.0894
## 103   21344.8467
## 104     5003.853
## 105  17560.37975
## 106     2331.519
## 107   3877.30425
## 108    2867.1196
## 109   47055.5321
## 110   10825.2537
## 111    11881.358
## 112     4646.759
## 113    2404.7338
## 114  11488.31695
## 115  30259.99556
## 116   11381.3254
## 117    6686.4313
## 118     7740.337
## 119   2257.47525
## 120   39556.4945
## 121  10115.00885
## 122   3385.39915
## 123     17081.08
## 124     9634.538
## 125   32734.1863
## 126     6082.405
## 127  12815.44495
## 128   13616.3586
## 129    11163.568
## 130   2457.21115
## 131    2155.6815
## 132     1261.442
## 133   2045.68525
## 134  27322.73386
## 135     2166.732
## 136  27375.90478
## 137    3490.5491
## 138    18972.495
## 139    18157.876
## 140   20745.9891
## 141    5138.2567
## 142  40720.55105
## 143    9877.6077
## 144   10959.6947
## 145     1842.519
## 146    5125.2157
## 147     7789.635
## 148   6334.34355
## 149   19964.7463
## 150    7077.1894
## 151    6948.7008
## 152   21223.6758
## 153  15518.18025
## 154   36950.2567
## 155  19749.38338
## 156    21348.706
## 157   36149.4835
## 158    10450.552
## 159     5152.134
## 160    5028.1466
## 161  10407.08585
## 162      4830.63
## 163   6128.79745
## 164   2719.27975
## 165   4827.90495
## 166   13405.3903
## 167      8116.68
## 168    1694.7964
## 169     5246.047
## 170   2855.43755
## 171     48824.45
## 172   6455.86265
## 173    10436.096
## 174     8823.279
## 175   8538.28845
## 176  11735.87905
## 177    1631.8212
## 178    4005.4225
## 179    7419.4779
## 180    7731.4271
## 181  43753.33705
## 182    3981.9768
## 183     5325.651
## 184     6775.961
## 185    4922.9159
## 186   12557.6053
## 187     4883.866
## 188    2137.6536
## 189    12044.342
## 190    1137.4697
## 191    1639.5631
## 192     5649.715
## 193     8516.829
## 194    9644.2525
## 195   14901.5167
## 196    2130.6759
## 197    8871.1517
## 198  13012.20865
## 199   37133.8982
## 200     7147.105
## 201    4337.7352
## 202    11743.299
## 203   20984.0936
## 204    13880.949
## 205    6610.1097
## 206      1980.07
## 207   8162.71625
## 208     3537.703
## 209    5002.7827
## 210     8520.026
## 211     7371.772
## 212    10355.641
## 213     2483.736
## 214    3392.9768
## 215  25081.76784
## 216     5012.471
## 217   10564.8845
## 218     5253.524
## 219    34779.615
## 220   19515.5416
## 221   11987.1682
## 222    2689.4954
## 223  24227.33724
## 224   7358.17565
## 225    9225.2564
## 226   7443.64305
## 227   14001.2867
## 228     1727.785
## 229    12333.828
## 230    6710.1919
## 231   19444.2658
## 232    1615.7667
## 233    4463.2051
## 234   17352.6803
## 235    7152.6714
## 236   38511.6283
## 237   5354.07465
## 238  35160.13457
## 239     7196.867
## 240   29523.1656
## 241  24476.47851
## 242   12648.7034
## 243    1986.9334
## 244     1832.094
## 245   4040.55825
## 246   12829.4551
## 247    47305.305
## 248   44260.7499
## 249     4260.744
## 250  41097.16175
## 251  13047.33235
## 252   43921.1837
## 253    5400.9805
## 254  11520.09985
## 255   33750.2918
## 256     11837.16
## 257   17085.2676
## 258   24869.8368
## 259  36219.40545
## 260  20462.99766
## 261   46151.1245
## 262    17179.522
## 263  14590.63205
## 264     7441.053
## 265    9282.4806
## 266    1719.4363
## 267    42856.838
## 268    7265.7025
## 269   9617.66245
## 270    2523.1695
## 271     9715.841
## 272   2803.69785
## 273     2150.469
## 274   12928.7911
## 275    9855.1314
## 276   22331.5668
## 277  48549.17835
## 278   4237.12655
## 279  11879.10405
## 280      9625.92
## 281    7742.1098
## 282    9432.9253
## 283   14256.1928
## 284  47896.79135
## 285  25992.82104
## 286     3172.018
## 287  20277.80751
## 288   42112.2356
## 289    2156.7518
## 290     3906.127
## 291           &&
## 292    16297.846
## 293   21978.6769
## 294   38746.3551
## 295    9249.4952
## 296    6746.7425
## 297   24873.3849
## 298     4349.462
## 299    12646.207
## 300   19442.3535
## 301  20177.67113
## 302    4151.0287
## 303  11944.59435
## 304    7749.1564
## 305     8444.474
## 306     1737.376
## 307   42124.5153
## 308    8124.4084
## 309    34838.873
## 310    9722.7695
## 311   8835.26495
## 312  10435.06525
## 313   7421.19455
## 314   4667.60765
## 315    4894.7533
## 316  24671.66334
## 317     35491.64
## 318  11566.30055
## 319     2866.091
## 320   6600.20595
## 321    3561.8889
## 322   42760.5022
## 323     47928.03
## 324     9144.565
## 325  48517.56315
## 326   24393.6224
## 327   13429.0354
## 328  11658.37915
## 329  19144.57652
## 330    13822.803
## 331   12142.5786
## 332   13937.6665
## 333    41919.097
## 334    8232.6388
## 335  18955.22017
## 336   13352.0998
## 337   13217.0945
## 338  13981.85035
## 339   10977.2063
## 340    6184.2994
## 341    4889.9995
## 342   8334.45755
## 343    5478.0368
## 344   1635.73365
## 345   11830.6072
## 346     8932.084
## 347     3554.203
## 348   12404.8791
## 349  14133.03775
## 350  24603.04837
## 351    8944.1151
## 352    9620.3307
## 353    1837.2819
## 354    1607.5101
## 355    10043.249
## 356      4751.07
## 357     2597.779
## 358    3180.5101
## 359    9778.3472
## 360    13430.265
## 361   8017.06115
## 362   8116.26885
## 363     3481.868
## 364   13415.0381
## 365   12029.2867
## 366   7639.41745
## 367    36085.219
## 368    1391.5287
## 369   18033.9679
## 370   21659.9301
## 371   38126.2465
## 372  16455.70785
## 373  27000.98473
## 374  15006.57945
## 375  42303.69215
## 376  20781.48892
## 377    5846.9176
## 378   8302.53565
## 379     1261.859
## 380   11856.4115
## 381  30284.64294
## 382    3176.8159
## 383    4618.0799
## 384  10736.87075
## 385    2138.0707
## 386   8964.06055
## 387    9290.1395
## 388     9411.005
## 389   7526.70645
## 390     8522.003
## 391  16586.49771
## 392    1631.6683
## 393     9264.797
## 394    8083.9198
## 395  14692.66935
## 396     10269.46
## 397     3260.199
## 398   11396.9002
## 399    4185.0979
## 400     8539.671
## 401    6652.5288
## 402    4074.4537
## 403    1621.3402
## 404  19594.80965
## 405  14455.64405
## 406     5080.096
## 407    2134.9015
## 408    7345.7266
## 409     9140.951
## 410    18608.262
## 411   14418.2804
## 412   28950.4692
## 413   46889.2612
## 414   46599.1084
## 415  39125.33225
## 416    2727.3951
## 417      8968.33
## 418    9788.8659
## 419   6555.07035
## 420  7323.734819
## 421   3167.45585
## 422   18804.7524
## 423  23082.95533
## 424   4906.40965
## 425     5969.723
## 426    12638.195
## 427   4243.59005
## 428   13919.8229
## 429    2254.7967
## 430     5926.846
## 431   12592.5345
## 432    2897.3235
## 433    4738.2682
## 434    37079.372
## 435    1149.3959
## 436  28287.89766
## 437           $$
## 438     7345.084
## 439   12730.9996
## 440   11454.0215
## 441     5910.944
## 442     4762.329
## 443     7512.267
## 444    4032.2407
## 445     1969.614
## 446   1769.53165
## 447    4686.3887
## 448   21797.0004
## 449  11840.77505
## 450    10601.412
## 451      7682.67
## 452   10381.4787
## 453    22144.032
## 454  15230.32405
## 455  11165.41765
## 456   1632.03625
## 457   19521.9682
## 458    13224.693
## 459   12643.3778
## 460   23288.9284
## 461    2201.0971
## 462    2497.0383
## 463   2203.47185
## 464     1744.465
## 465  20878.78443
## 466    25382.297
## 467   28868.6639
## 468  35147.52848
## 469   2534.39375
## 470    1534.3045
## 471    1824.2854
## 472  15555.18875
## 473    9304.7019
## 474    1622.1885
## 475     9880.068
## 476     9563.029
## 477   4347.02335
## 478   12475.3513
## 479     1253.936
## 480  48885.13561
## 481   10461.9794
## 482     1748.774
## 483  24513.09126
## 484    2196.4732
## 485    12574.049
## 486    17942.106
## 487    1967.0227
## 488     4931.647
## 489     8027.968
## 490    8211.1002
## 491     13470.86
## 492    36197.699
## 493    6837.3687
## 494   22218.1149
## 495    5974.3847
## 496         null
## 497    2643.2685
## 498    3077.0955
## 499    3044.2133
## 500     11455.28
## 501   11763.0009
## 502    2498.4144
## 503    9361.3268
## 504     1256.299
## 505     21082.16
## 506    11362.755
## 507  27724.28875
## 508   8413.46305
## 509     5240.765
## 510  25656.57526
## 511    3994.1778
## 512   9866.30485
## 513    5397.6167
## 514  38245.59327
## 515  11482.63485
## 516  24059.68019
## 517     9861.025
## 518   8342.90875
## 519    1708.0014
## 520   48675.5177
## 521   14043.4767
## 522    12925.886
## 523  19214.70553
## 524   13831.1152
## 525   6067.12675
## 526     5972.378
## 527     8825.086
## 528    8233.0975
## 529  27346.04207
## 530     6196.448
## 531    3056.3881
## 532    13887.204
## 533  63770.42801
## 534   10231.4999
## 535   23807.2406
## 536   3268.84665
## 537    11538.421
## 538   3213.62205
## 539    45863.205
## 540    3972.9247
## 541   11187.6567
## 542  17878.90068
## 543     3847.674
## 544    8334.5896
## 545    3935.1799
## 546  39983.42595
## 547    1646.4297
## 548    9193.8385
## 549   10923.9332
## 550     2494.022
## 551    9058.7303
## 552    2801.2588
## 553   2128.43105
## 554   6373.55735
## 555    7256.7231
## 556    11552.904
## 557  45702.02235
## 558     3761.292
## 559    4753.6368
## 560  31620.00106
## 561  13224.05705
## 562   12222.8983
## 563    1664.9996
## 564  58571.07448
## 565      9724.53
## 566   12913.9924
## 567    6356.2707
## 568  17626.23951
## 569     1242.816
## 570    4779.6023
## 571   43943.8761
## 572   13635.6379
## 573    5976.8311
## 574    11842.442
## 575    2566.4707
## 576   15359.1045
## 577    5709.1644
## 578   8823.98575
## 579    7640.3092
## 580    5594.8455
## 581  33471.97189
## 582    1633.0444
## 583   9174.13565
## 584    11070.535
## 585   16085.1275
## 586   17468.9839
## 587     9283.562
## 588   3558.62025
## 589  25678.77845
## 590    4435.0942
## 591    39241.442
## 592    8547.6913
## 593     6571.544
## 594   2207.69745
## 595     6753.038
## 596      1880.07
## 597   42969.8527
## 598  11658.11505
## 599    23306.547
## 600   34439.8559
## 601    10713.644
## 602     3659.346
## 603    40182.246
## 604      9182.17
## 605  34617.84065
## 606  12129.61415
## 607    3736.4647
## 608    6748.5912
## 609  11326.71487
## 610    11365.952
## 611   42983.4585
## 612    10085.846
## 613     1977.815
## 614    3366.6697
## 615   7173.35995
## 616     9391.346
## 617   14410.9321
## 618  24915.04626
## 619   20149.3229
## 620   12949.1554
## 621     6666.243
## 622  32787.45859
## 623  13143.86485
## 624    4466.6214
## 625  18806.14547
## 626   10141.1362
## 627    6123.5688
## 628    8252.2843
## 629     1712.227
## 630  12430.95335
## 631    9800.8882
## 632    10579.711
## 633    8280.6227
## 634     8527.532
## 635    12244.531
## 636    24667.419
## 637     3410.324
## 638   4058.71245
## 639  26392.26029
## 640  14394.39815
## 641    6435.6237
## 642  22192.43711
## 643    5148.5526
## 644    1136.3994
## 645   27037.9141
## 646   42560.4304
## 647     8703.456
## 648  45710.20785
## 649    6500.2359
## 650    4837.5823
## 651    3943.5954
## 652     4399.731
## 653    6185.3208
## 654   46200.9851
## 655   7222.78625
## 656   46130.5265
## 657    12363.547
## 658   10156.7832
## 659     2585.269
## 660      1242.26
## 661     40103.89
## 662    9863.4718
## 663     4766.022
## 664   11244.3769
## 665   7729.64575
## 666    5438.7491
## 667  26236.57997
## 668   34806.4677
## 669    2104.1134
## 670     8068.185
## 671   2362.22905
## 672   2352.96845
## 673     3577.999
## 674   3201.24515
## 675  29186.48236
## 676   40273.6455
## 677  10976.24575
## 678    3500.6123
## 679    2020.5523
## 680   9541.69555
## 681    9504.3103
## 682    5385.3379
## 683   8930.93455
## 684     5375.038
## 685   10264.4421
## 686   6113.23105
## 687    5469.0066
## 688      1727.54
## 689   10107.2206
## 690   8310.83915
## 691    1984.4533
## 692     2457.502
## 693    12146.971
## 694    9566.9909
## 695   13112.6048
## 696   10848.1343
## 697   12231.6136
## 698    9875.6804
## 699    11264.541
## 700    12979.358
## 701     1263.249
## 702  10106.13425
## 703   40932.4295
## 704   6664.68595
## 705  16657.71745
## 706    2217.6012
## 707    6781.3542
## 708   19361.9988
## 709    10065.413
## 710     4234.927
## 711   9447.25035
## 712    14007.222
## 713   40419.0191
## 714     3484.331
## 715   36189.1017
## 716  44585.45587
## 717   8604.48365
## 718   18246.4955
## 719  43254.41795
## 720    3757.8448
## 721    8827.2099
## 722   9910.35985
## 723  11737.84884
## 724     8556.907
## 725   3062.50825
## 726    19539.243
## 727   1906.35825
## 728  14210.53595
## 729   11833.7823
## 730  17128.42608
## 731   5031.26955
## 732     7985.815
## 733   23065.4207
## 734    5428.7277
## 735   36307.7983
## 736    3925.7582
## 737     2416.955
## 738    19040.876
## 739    3070.8087
## 740  11842.62375
## 741     8062.764
## 742     7050.642
## 743    14319.031
## 744   6933.24225
## 745  27941.28758
## 746     11150.78
## 747  12797.20962
## 748   17748.5062
## 749     7261.741
## 750   10560.4917
## 751     6986.697
## 752   7448.40395
## 753    5934.3798
## 754    9869.8102
## 755    18259.216
## 756    1146.7966
## 757    9386.1613
## 758    24520.264
## 759    4350.5144
## 760     6414.178
## 761  12741.16745
## 762    1917.3184
## 763   5209.57885
## 764   13457.9608
## 765     5662.225
## 766     1252.407
## 767    2731.9122
## 768    21195.818
## 769    7209.4918
## 770    18310.742
## 771    4266.1658
## 772   4719.52405
## 773    11848.141
## 774  17904.52705
## 775    7046.7222
## 776   14313.8463
## 777      2103.08
## 778   38792.6856
## 779    1815.8759
## 780   7731.85785
## 781  28476.73499
## 782   2136.88225
## 783    1131.5066
## 784    3309.7926
## 785      9414.92
## 786    6360.9936
## 787   11013.7119
## 788   4428.88785
## 789    5584.3057
## 790    1877.9294
## 791   2842.76075
## 792     3597.596
## 793  23401.30575
## 794  55135.40209
## 795     7445.918
## 796    2680.9493
## 797    1621.8827
## 798    8219.2039
## 799   12523.6048
## 800  16069.08475
## 801   43813.8661
## 802  20773.62775
## 803   39597.4072
## 804    6117.4945
## 805    13393.756
## 806   4719.73655
## 807   11743.9341
## 808    5377.4578
## 809    7160.3303
## 810   11657.7189
## 811   6402.29135
## 812   12622.1795
## 813     1526.312
## 814    12323.936
## 815   27533.9129
## 816  10072.05505
## 817   45008.9555
## 818     9872.701
## 819    2438.0552
## 820     2974.126
## 821  10601.63225
## 822   37270.1512
## 823     14119.62
## 824   42111.6647
## 825   11729.6795
## 826  24106.91255
## 827     1875.344
## 828   40974.1649
## 829   15817.9857
## 830  18218.16139
## 831    10965.446
## 832    46113.511
## 833     7151.092
## 834  12269.68865
## 835   5458.04645
## 836     8782.469
## 837     6600.361
## 838    1141.4451
## 839     11576.13
## 840  13129.60345
## 841     4391.652
## 842     8457.818
## 843    3392.3652
## 844    5966.8874
## 845     6849.026
## 846    8891.1395
## 847    2690.1138
## 848   26140.3603
## 849    6653.7886
## 850     6311.952
## 851     3443.064
## 852    2789.0574
## 853   2585.85065
## 854   46255.1125
## 855   4877.98105
## 856   19719.6947
## 857  27218.43725
## 858    5272.1758
## 859     1682.597
## 860   11945.1327
## 861  29330.98315
## 862    7243.8136
## 863  10422.91665
## 864   44202.6536
## 865   13555.0049
## 866    13063.883
## 867  19798.05455
## 868   2221.56445
## 869    1634.5734
## 870   2117.33885
## 871   8688.85885
## 872   48673.5588
## 873   4661.28635
## 874    8125.7845
## 875    12644.589
## 876   4564.19145
## 877   4846.92015
## 878    7633.7206
## 879    15170.069
## 880    17496.306
## 881    2639.0429
## 882   33732.6867
## 883  14382.70905
## 884     7626.993
## 885   5257.50795
## 886    2473.3341
## 887  21774.32215
## 888  35069.37452
## 889    13041.921
## 890    5245.2269
## 891    13451.122
## 892     13462.52
## 893     5488.262
## 894   4320.41085
## 895     6250.435
## 896  25333.33284
## 897     2913.569
## 898    12032.326
## 899   13470.8044
## 900    6289.7549
## 901    2927.0647
## 902     6238.298
## 903     10096.97
## 904     7348.142
## 905    4673.3922
## 906    12233.828
## 907  32108.66282
## 908   8965.79575
## 909    2304.0022
## 910    9487.6442
## 911    1121.8739
## 912    9549.5651
## 913   2217.46915
## 914    1628.4709
## 915   12982.8747
## 916     11674.13
## 917     7160.094
## 918    39047.285
## 919   6358.77645
## 920    19933.458
## 921  11534.87265
## 922    47462.894
## 923   4527.18295
## 924    38998.546
## 925  20009.63365
## 926    3875.7341
## 927     41999.52
## 928  12609.88702
## 929   41034.2214
## 930  28468.91901
## 931   2730.10785
## 932     3353.284
## 933    14474.675
## 934   9500.57305
## 935  26467.09737
## 936     4746.344
## 937  23967.38305
## 938   7518.02535
## 939   3279.86855
## 940    8596.8278
## 941   10702.6424
## 942    4992.3764
## 943   2527.81865
## 944     1759.338
## 945    2322.6218
## 946  16138.76205
## 947    7804.1605
## 948    2902.9065
## 949   9704.66805
## 950    4889.0368
## 951  25517.11363
## 952   4500.33925
## 953    19199.944
## 954  16796.41194
## 955   4915.05985
## 956      7624.63
## 957   8410.04685
## 958  28340.18885
## 959   4518.82625
## 960   14571.8908
## 961      3378.91
## 962   7144.86265
## 963    10118.424
## 964    5484.4673
## 965  16420.49455
## 966   7986.47525
## 967     7418.522
## 968   13887.9685
## 969    6551.7501
## 970   5267.81815
## 971   17361.7661
## 972    34472.841
## 973      1972.95
## 974  21232.18226
## 975    8627.5411
## 976    4433.3877
## 977    4438.2634
## 978  24915.22085
## 979  23241.47453
## 980    9957.7216
## 981     8269.044
## 982   18767.7377
## 983  36580.28216
## 984     8765.249
## 985     5383.536
## 986   12124.9924
## 987   2709.24395
## 988     3987.926
## 989  12495.29085
## 990  26018.95052
## 991     8798.593
## 992   35595.5898
## 993   42211.1382
## 994    1711.0268
## 995    8569.8618
## 996     2020.177
## 997   16450.8947
## 998  21595.38229
## 999     9850.432
## 1000   6877.9801
## 1001 21677.28345
## 1002   44423.803
## 1003   4137.5227
## 1004 13747.87235
## 1005  12950.0712
## 1006   12094.478
## 1007  37484.4493
## 1008 39725.51805
## 1009   2250.8352
## 1010 22493.65964
## 1011 20234.85475
## 1012  1704.70015
## 1013 33475.81715
## 1014    3161.454
## 1015 11394.06555
## 1016    21880.82
## 1017   7325.0482
## 1018  44501.3982
## 1019  3594.17085
## 1020   39727.614
## 1021  8023.13545
## 1022  14394.5579
## 1023   9288.0267
## 1024   25309.489
## 1025   3353.4703
## 1026 10594.50155
## 1027    8277.523
## 1028 17929.30337
## 1029   2480.9791
## 1030   4462.7218
## 1031   1981.5819
## 1032  11554.2236
## 1033  48970.2476
## 1034  6548.19505
## 1035    5708.867
## 1036    7045.499
## 1037   8978.1851
## 1038  5757.41345
## 1039  39871.7043
## 1040 13974.45555
## 1041  1909.52745
## 1042  12096.6512
## 1043 13204.28565
## 1044   4562.8421
## 1045   2102.2647
## 1046  34672.1472
## 1047  15161.5344
## 1048 11884.04858
## 1049  4454.40265
## 1050   5855.9025
## 1051    4076.497
## 1052 15019.76005
## 1053    19023.26
## 1054 10796.35025
## 1055  11353.2276
## 1056   9748.9106
## 1057   10577.087
## 1058  41676.0811
## 1059  11286.5387
## 1060     3591.48
## 1061   11299.343
## 1062   4561.1885
## 1063  44641.1974
## 1064   1674.6323
## 1065 23045.56616
## 1066   3227.1211
## 1067 16776.30405
## 1068   11253.421
## 1069   3471.4096
## 1070  11363.2832
## 1071 20420.60465
## 1072  8988.15875
## 1073  10493.9458
## 1074    2904.088
## 1075   8605.3615
## 1076   11512.405
## 1077  41949.2441
## 1078  24180.9335
## 1079  5312.16985
## 1080   2396.0959
## 1081  10807.4863
## 1082   9222.4026
## 1083  36124.5737
## 1084  38282.7495
## 1085   5693.4305
## 1086   34166.273
## 1087   8347.1643
## 1088  46661.4424
## 1089 18903.49141
## 1090  40904.1995
## 1091  14254.6082
## 1092   10214.636
## 1093   5836.5204
## 1094 14358.36437
## 1095    1728.897
## 1096   8582.3023
## 1097    3693.428
## 1098 20709.02034
## 1099  9991.03765
## 1100 19673.33573
## 1101  11085.5868
## 1102    7623.518
## 1103   3176.2877
## 1104   3704.3545
## 1105 36898.73308
## 1106   9048.0273
## 1107    7954.517
## 1108 27117.99378
## 1109   6338.0756
## 1110    9630.397
## 1111 11289.10925
## 1112 52590.82939
## 1113   2261.5688
## 1114    10791.96
## 1115    5979.731
## 1116  2203.73595
## 1117  12235.8392
## 1118  40941.2854
## 1119  5630.45785
## 1120  11015.1747
## 1121  7228.21565
## 1122  39722.7462
## 1123 14426.07385
## 1124   2459.7201
## 1125    3989.841
## 1126   7727.2532
## 1127   5124.1887
## 1128 18963.17192
## 1129  2200.83085
## 1130   7153.5539
## 1131  5227.98875
## 1132  10982.5013
## 1133    4529.477
## 1134     4670.64
## 1135  6112.35295
## 1136  17178.6824
## 1137     22478.6
## 1138  11093.6229
## 1139   6457.8434
## 1140   4433.9159
## 1141    2154.361
## 1142  23887.6627
## 1143    6496.886
## 1144  2899.48935
## 1145  19350.3689
## 1146  7650.77375
## 1147  2850.68375
## 1148    2632.992
## 1149   9447.3824
## 1150  18328.2381
## 1151   8603.8234
## 1152 37465.34375
## 1153  13844.7972
## 1154  21771.3423
## 1155 13126.67745
## 1156  5327.40025
## 1157 13725.47184
## 1158 13019.16105
## 1159  8671.19125
## 1160  4134.08245
## 1161 18838.70366
## 1162  33307.5508
## 1163   5699.8375
## 1164  6393.60345
## 1165    4934.705
## 1166   6198.7518
## 1167  8733.22925
## 1168   2055.3249
## 1169     9964.06
## 1170  18223.4512
## 1171   5116.5004
## 1172 36910.60803
## 1173   38415.474
## 1174 20296.86345
## 1175   12347.172
## 1176  5373.36425
## 1177 23563.01618
## 1178   1702.4553
## 1179   10806.839
## 1180  3956.07145
## 1181 12890.05765
## 1182   5415.6612
## 1183   4058.1161
## 1184   41661.602
## 1185   7537.1639
## 1186  4718.20355
## 1187   6593.5083
## 1188    8442.667
## 1189 26125.67477
## 1190   6858.4796
## 1191   4795.6568
## 1192  6640.54485
## 1193   7162.0122
## 1194  10594.2257
## 1195 11938.25595
## 1196 60021.39897
## 1197 20167.33603
## 1198 12479.70895
## 1199   11345.519
## 1200   8515.7587
## 1201  2699.56835
## 1202  14449.8544
## 1203 12224.35085
## 1204  6985.50695
## 1205   3238.4357
## 1206   47269.854
## 1207  49577.6624
## 1208   4296.2712
## 1209   1135.9407
## 1210    5615.369
## 1211    9101.798
## 1212    6059.173
## 1213   1633.9618
## 1214  37607.5277
## 1215  18648.4217
## 1216    1241.565
## 1217   16232.847
## 1218 15828.82173
## 1219   4415.1588
## 1220    6474.013
## 1221 11436.73815
## 1222 11305.93455
## 1223  10197.7722
## 1224   4544.2348
## 1225    3277.161
## 1226   6770.1925
## 1227    7337.748
## 1228 10370.91255
## 1229  26926.5144
## 1230    10704.47
## 1231 34254.05335
## 1232    1880.487
## 1233      8615.3
## 1234  3292.52985
## 1235  3021.80915
## 1236 14478.33015
## 1237   4747.0529
## 1238  17043.3414
## 1239    10959.33
## 1240  4357.04365
## 1241 22462.04375
## 1242   4189.1131
## 1243   8283.6807
## 1244 24535.69855
## 1245  14283.4594
## 1246   1720.3537
## 1247    47403.88
## 1248   8534.6718
## 1249   3732.6251
## 1250    5472.449
## 1251   38344.566
## 1252   7147.4728
## 1253   7133.9025
## 1254   34828.654
## 1255   1515.3449
## 1256  9301.89355
## 1257 11931.12525
## 1258     1964.78
## 1259  1708.92575
## 1260   4340.4409
## 1261  5261.46945
## 1262  2710.82855
## 1263 62592.87309
## 1264 46718.16325
## 1265    3208.787
## 1266  37829.7242
## 1267 21259.37795
## 1268   2464.6188
## 1269  16115.3045
## 1270  21472.4788
## 1271   33900.653
## 1272    6875.961
## 1273  6940.90985
## 1274  4571.41305
## 1275    4536.259
## 1276   36397.576
## 1277 18765.87545
## 1278 11272.33139
## 1279    1731.677
## 1280   1163.4627
## 1281 19496.71917
## 1282  7201.70085
## 1283  5425.02335
## 1284 28101.33305
## 1285  12981.3457
## 1286  43896.3763
## 1287  4239.89265
## 1288 13143.33665
## 1289   7050.0213
## 1290   9377.9047
## 1291 22395.74424
## 1292   10325.206
## 1293  12629.1656
## 1294 10795.93733
## 1295   11411.685
## 1296  10600.5483
## 1297   2205.9808
## 1298   1629.8335
## 1299    2007.945
## 1300  29141.3603

Se presentan valores como ““,”$$“,”&&“,”null” Vamos a tomar ventaja de que los datos son de tipo carácter para hacer la sustitución de los valores con NA desde el importe de la base

data <- read.csv('/var/home/diegob/Documents/Computer Science/R code/data_prac_2 1.csv', na.strings = c("", "$$", "&&", "null", stringsAsFactor = FALSE))
data <- data %>% select(-X)
head(data, 10)
##    edad      sexo    imc hijos fumador   region       clm
## 1    19  femenino 27.900     0     yes suroeste 16884.924
## 2    18 masculino 33.770     1      no  sureste  1725.552
## 3    28 masculino 33.000     3      no  sureste  4449.462
## 4    33 masculino 22.705     0      no noroeste 21984.471
## 5    NA masculino 28.880     0      no noroeste  3866.855
## 6    31  femenino 25.740     0      no  sureste  3756.622
## 7    46  femenino 33.440     1      no  sureste  8240.590
## 8    37  femenino 27.740     3      no noroeste  7281.506
## 9    37 masculino 29.830     2      no  noreste  6406.411
## 10   60  femenino 25.840     0      no noroeste 28923.137

Ahora vamos a verificar los tipos de datos

sapply(data, class)
##        edad        sexo         imc       hijos     fumador      region 
##   "integer" "character"   "numeric"   "integer" "character" "character" 
##         clm 
##   "numeric"

Listo, tenemos los tipos de datos adecuados

Verificando cuántos valores faltantes hay

colSums(is.na(data))
##    edad    sexo     imc   hijos fumador  region     clm 
##      72       0      39       0       0       0      41

Sólamente nos preocuparemos para las variables numéricas, reemplazándolas por alguna técnica vista en clase

  1. Vamos a verificar valores duplicados
data %>% janitor::get_dupes()
## No variable names specified - using all columns.
##   edad      sexo   imc hijos fumador   region      clm dupe_count
## 1   19 masculino 30.59     0      no noroeste 1639.563          2
## 2   19 masculino 30.59     0      no noroeste 1639.563          2

Existen valores duplicados, sin embargo, las variables disponibles en el base, puede que no sirvan como identificador porque puede haber relaciones uno a muchos, propondría agregar una variable más, por ejemplo un número de póliza, o usando las variables existentes una combinación de imc y clm como columna identificadora

# quitando duplicados
data <- unique(data)
head(data, 10)
##    edad      sexo    imc hijos fumador   region       clm
## 1    19  femenino 27.900     0     yes suroeste 16884.924
## 2    18 masculino 33.770     1      no  sureste  1725.552
## 3    28 masculino 33.000     3      no  sureste  4449.462
## 4    33 masculino 22.705     0      no noroeste 21984.471
## 5    NA masculino 28.880     0      no noroeste  3866.855
## 6    31  femenino 25.740     0      no  sureste  3756.622
## 7    46  femenino 33.440     1      no  sureste  8240.590
## 8    37  femenino 27.740     3      no noroeste  7281.506
## 9    37 masculino 29.830     2      no  noreste  6406.411
## 10   60  femenino 25.840     0      no noroeste 28923.137
  1. Reemplazando valores faltantes usando media, mediana, media recortada, moda, interpolación
data_numeric <- data %>% select(where(is.numeric))
head(data_numeric,10)
##    edad    imc hijos       clm
## 1    19 27.900     0 16884.924
## 2    18 33.770     1  1725.552
## 3    28 33.000     3  4449.462
## 4    33 22.705     0 21984.471
## 5    NA 28.880     0  3866.855
## 6    31 25.740     0  3756.622
## 7    46 33.440     1  8240.590
## 8    37 27.740     3  7281.506
## 9    37 29.830     2  6406.411
## 10   60 25.840     0 28923.137

Reemplazo con media

data_mean <- data.frame(lapply(data_numeric, function (x) ifelse(is.na(x), mean(x, na.rm=TRUE), x)))
summary(data_mean)
##       edad            imc            hijos            clm       
##  Min.   :18.00   Min.   :15.96   Min.   :0.000   Min.   : 1122  
##  1st Qu.:27.00   1st Qu.:26.40   1st Qu.:0.000   1st Qu.: 4878  
##  Median :39.24   Median :30.59   Median :1.000   Median : 9705  
##  Mean   :39.24   Mean   :30.62   Mean   :1.096   Mean   :13287  
##  3rd Qu.:51.00   3rd Qu.:34.40   3rd Qu.:2.000   3rd Qu.:16115  
##  Max.   :64.00   Max.   :53.13   Max.   :5.000   Max.   :63770

Reemplazando con mediana

data_median <- data.frame(lapply(data_numeric, function (x) ifelse(is.na(x), median(x, na.rm=TRUE), x)))
summary(data_median)
##       edad            imc            hijos            clm       
##  Min.   :18.00   Min.   :15.96   Min.   :0.000   Min.   : 1122  
##  1st Qu.:27.00   1st Qu.:26.40   1st Qu.:0.000   1st Qu.: 4878  
##  Median :39.00   Median :30.30   Median :1.000   Median : 9382  
##  Mean   :39.23   Mean   :30.61   Mean   :1.096   Mean   :13167  
##  3rd Qu.:51.00   3rd Qu.:34.40   3rd Qu.:2.000   3rd Qu.:16115  
##  Max.   :64.00   Max.   :53.13   Max.   :5.000   Max.   :63770

Reemplazando con media recortada

data_median_cut <- data.frame(lapply(data_numeric, function (x) ifelse(is.na(x), mean(x, na.rm=TRUE, trim = 0.2), x)))
summary(data_median_cut)
##       edad            imc            hijos            clm       
##  Min.   :18.00   Min.   :15.96   Min.   :0.000   Min.   : 1122  
##  1st Qu.:27.00   1st Qu.:26.40   1st Qu.:0.000   1st Qu.: 4878  
##  Median :39.08   Median :30.41   Median :1.000   Median : 9705  
##  Mean   :39.23   Mean   :30.61   Mean   :1.096   Mean   :13183  
##  3rd Qu.:51.00   3rd Qu.:34.40   3rd Qu.:2.000   3rd Qu.:16115  
##  Max.   :64.00   Max.   :53.13   Max.   :5.000   Max.   :63770

Reemplazando con moda

data_mode <-  data.frame(lapply(data_numeric, function (x) ifelse(is.na(x), mfv(x, na_rm=TRUE, trim = 0.1), x)))
summary(data_mode)
##       edad            imc            hijos            clm       
##  Min.   :18.00   Min.   :15.96   Min.   :0.000   Min.   : 1122  
##  1st Qu.:25.00   1st Qu.:26.40   1st Qu.:0.000   1st Qu.: 4762  
##  Median :38.00   Median :30.59   Median :1.000   Median : 9305  
##  Mean   :38.13   Mean   :30.66   Mean   :1.096   Mean   :13254  
##  3rd Qu.:51.00   3rd Qu.:34.40   3rd Qu.:2.000   3rd Qu.:16586  
##  Max.   :64.00   Max.   :53.13   Max.   :5.000   Max.   :63770

Reemplazando con interpolación

data_interpol <- data.frame(lapply(data_numeric, function (x) ifelse(is.na(x), na.approx(x, na.rm=T), x)))
summary(data_interpol)
##       edad            imc            hijos            clm       
##  Min.   :18.00   Min.   :15.96   Min.   :0.000   Min.   : 1122  
##  1st Qu.:27.00   1st Qu.:26.22   1st Qu.:0.000   1st Qu.: 4796  
##  Median :39.00   Median :30.40   Median :1.000   Median : 9411  
##  Mean   :39.29   Mean   :30.62   Mean   :1.096   Mean   :13274  
##  3rd Qu.:51.00   3rd Qu.:34.58   3rd Qu.:2.000   3rd Qu.:16819  
##  Max.   :64.00   Max.   :53.13   Max.   :5.000   Max.   :63770

La mediana podría ser candidata como técnica de reemplazo porque no difiere mucho de los datos originales

Usando la base con interpolación

ggplot(data_interpol, aes(x = edad)) +
  geom_histogram(fill = "blue", color = "black") +
  labs(title = "Edad de los asegurados", x="Edad", y="Frecuencia")+
  theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data_interpol, aes(x = imc)) +
  geom_histogram(fill = "blue", color = "black") +
  labs(title = "IMC de los asegurados", x="IMC", y="Frecuencia")+
  theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data_interpol, aes(x = hijos)) +
  geom_histogram(fill = "blue", color = "black") +
  labs(title = "Número de hijos", x="Cantidad de hijos", y="Frecuencia")+
  theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data_interpol, aes(x = clm))+
  geom_histogram(fill = "blue", color = "black") +
  labs(title = "Reclamos ", x="cantidad de reclamo", y="Frecuencia")+
  theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

media de monto de reclamación por sexo

char_data <- data %>% select(sexo, fumador, region)
data_interpol_all <- cbind(data_interpol, char_data)  # agregando las demás columnas

mean_sex <- data_interpol_all %>%
        group_by(sexo) %>%
        summarise(claim_amount = mean(clm))

mean_sex
## # A tibble: 2 × 2
##   sexo      claim_amount
##   <chr>            <dbl>
## 1 femenino        12517.
## 2 masculino       14017.

media de monto de reclamación por fumador y sexo

mean_sex_smk <- data_interpol_all %>%
        group_by(fumador, sexo) %>%
        summarise(claim_amount = mean(clm))
## `summarise()` has grouped output by 'fumador'. You can override using the
## `.groups` argument.
mean_sex_smk
## # A tibble: 4 × 3
## # Groups:   fumador [2]
##   fumador sexo      claim_amount
##   <chr>   <chr>            <dbl>
## 1 no      femenino         8935.
## 2 no      masculino        8206.
## 3 yes     femenino        29556.
## 4 yes     masculino       32874.

Región con mayor monto promedio de reclamación

mean_region <- data_interpol_all %>%
        group_by(region) %>%
        summarise(claim_amount = mean(clm)) %>%
  arrange(desc(claim_amount))

mean_region
## # A tibble: 4 × 2
##   region   claim_amount
##   <chr>           <dbl>
## 1 sureste        14588.
## 2 noreste        13577.
## 3 noroeste       12418.
## 4 suroeste       12354.

etiquetando registros si el imc es mayor a 30

data_interpol_label <- data_interpol_all %>%
        mutate(clasificacion = ifelse(imc > 30, "obesidad", ""))
head(data_interpol_label,10)
##    edad    imc hijos       clm      sexo fumador   region clasificacion
## 1    19 27.900     0 16884.924  femenino     yes suroeste              
## 2    18 33.770     1  1725.552 masculino      no  sureste      obesidad
## 3    28 33.000     3  4449.462 masculino      no  sureste      obesidad
## 4    33 22.705     0 21984.471 masculino      no noroeste              
## 5    32 28.880     0  3866.855 masculino      no noroeste              
## 6    31 25.740     0  3756.622  femenino      no  sureste              
## 7    46 33.440     1  8240.590  femenino      no  sureste      obesidad
## 8    37 27.740     3  7281.506  femenino      no noroeste              
## 9    37 29.830     2  6406.411 masculino      no  noreste              
## 10   60 25.840     0 28923.137  femenino      no noroeste

Top 10 personas obesas

obesas <- filter(data_interpol_label, data_interpol_label$clasificacion=='obesidad') %>% arrange(desc(imc))
head(obesas, 10)
##    edad   imc hijos       clm      sexo fumador   region clasificacion
## 1    18 53.13     0  1163.463 masculino      no  sureste      obesidad
## 2    22 52.58     1 44501.398 masculino     yes  sureste      obesidad
## 3    23 50.38     1  2438.055 masculino      no  sureste      obesidad
## 4    58 49.06     0 11381.325 masculino      no  sureste      obesidad
## 5    52 47.74     1  9748.911 masculino      no  sureste      obesidad
## 6    37 47.60     2 46113.511  femenino     yes suroeste      obesidad
## 7    47 47.52     1  8083.920 masculino      no  sureste      obesidad
## 8    54 47.41     0 63770.428  femenino     yes  sureste      obesidad
## 9    52 46.75     5 12592.534  femenino      no  sureste      obesidad
## 10   54 46.70     2 11538.421  femenino      no suroeste      obesidad