data <- read.csv('/var/home/diegob/Documents/Computer Science/R code/data_prac_2 1.csv')
data <- subset(data, select = -X)
head(data, 10)
## edad sexo imc hijos fumador region clm
## 1 19 femenino 27.9 0 yes suroeste 16884.924
## 2 18 masculino 33.77 1 no sureste 1725.5523
## 3 28 masculino 33 3 no sureste 4449.462
## 4 33 masculino 22.705 0 no noroeste 21984.47061
## 5 masculino 28.88 0 no noroeste 3866.8552
## 6 31 femenino 25.74 0 no sureste 3756.6216
## 7 46 femenino 33.44 1 no sureste 8240.5896
## 8 37 femenino 27.74 3 no noroeste 7281.5056
## 9 37 masculino 29.83 2 no noreste 6406.4107
## 10 60 femenino 25.84 0 no noroeste 28923.13692
NA Usamos la función distinct para extraer el
rango de datos de cada columna e identificar lo que no pertenecen a la
base ** ¿Por qué no usar
which(is.na(dataCar), arr.ind = TRUE)? Por la
heterogeneidad de datos, es diferente tener “null” a un NAdata %>% distinct(edad)
## edad
## 1 19
## 2 18
## 3 28
## 4 33
## 5
## 6 31
## 7 46
## 8 37
## 9 60
## 10 25
## 11 62
## 12 23
## 13 56
## 14 27
## 15 30
## 16 59
## 17 63
## 18 55
## 19 22
## 20 26
## 21 24
## 22 41
## 23 38
## 24 21
## 25 48
## 26 36
## 27 40
## 28 58
## 29 53
## 30 34
## 31 43
## 32 64
## 33 20
## 34 61
## 35 44
## 36 57
## 37 29
## 38 45
## 39 &&
## 40 35
## 41 54
## 42 52
## 43 49
## 44 47
## 45 32
## 46 51
## 47 42
## 48 39
## 49 50
## 50 null
data %>% distinct(sexo)
## sexo
## 1 femenino
## 2 masculino
data %>% distinct(imc)
## imc
## 1 27.9
## 2 33.77
## 3 33
## 4 22.705
## 5 28.88
## 6 25.74
## 7 33.44
## 8 27.74
## 9 29.83
## 10 25.84
## 11 26.22
## 12 26.29
## 13 34.4
## 14 39.82
## 15 42.13
## 16 24.6
## 17 30.78
## 18 23.845
## 19 40.3
## 20 35.3
## 21 36.005
## 22 32.4
## 23 34.1
## 24 31.92
## 25 28.025
## 26 27.72
## 27 23.085
## 28 32.775
## 29 17.385
## 30 36.3
## 31 35.6
## 32 26.315
## 33 28.6
## 34 28.31
## 35 36.4
## 36 20.425
## 37 32.965
## 38 20.8
## 39 36.67
## 40 39.9
## 41 26.6
## 42 36.63
## 43 21.78
## 44 30.8
## 45 37.05
## 46 37.3
## 47 38.665
## 48 34.77
## 49 24.53
## 50 35.2
## 51 35.625
## 52 33.63
## 53 28
## 54 34.43
## 55 28.69
## 56 36.955
## 57 31.825
## 58 31.68
## 59 22.88
## 60 37.335
## 61 27.36
## 62 33.66
## 63 24.7
## 64 25.935
## 65 22.42
## 66 28.9
## 67 $$
## 68 36.19
## 69 23.98
## 70 24.75
## 71 28.5
## 72 28.1
## 73 32.01
## 74 27.4
## 75 34.01
## 76 29.59
## 77 35.53
## 78 39.805
## 79 26.885
## 80 38.285
## 81 37.62
## 82 41.23
## 83 34.8
## 84 22.895
## 85 31.16
## 86 27.2
## 87 26.98
## 88 39.49
## 89 24.795
## 90 31.3
## 91 38.28
## 92 19.95
## 93 19.3
## 94 31.6
## 95 25.46
## 96 30.115
## 97 29.92
## 98 27.5
## 99 28.4
## 100 30.875
## 101 27.94
## 102 35.09
## 103 29.7
## 104 35.72
## 105 32.205
## 106 28.595
## 107 49.06
## 108 27.17
## 109 23.37
## 110 37.1
## 111 23.75
## 112 28.975
## 113 31.35
## 114 33.915
## 115 28.785
## 116 28.3
## 117 37.4
## 118 17.765
## 119 34.7
## 120 26.505
## 121 22.04
## 122 35.9
## 123 25.555
## 124 28.05
## 125 25.175
## 126 31.9
## 127 36
## 128 32.49
## 129 25.3
## 130 29.735
## 131 38.83
## 132 30.495
## 133 37.73
## 134 37.43
## 135 24.13
## 136 37.145
## 137 39.52
## 138 24.42
## 139 27.83
## 140 36.85
## 141 39.6
## 142 29.8
## 143 29.64
## 144 28.215
## 145 37
## 146 33.155
## 147 18.905
## 148 41.47
## 149 30.3
## 150 15.96
## 151 33.345
## 152 37.7
## 153 27.835
## 154 29.2
## 155 26.41
## 156 30.69
## 157 41.895
## 158 30.9
## 159 32.2
## 160 32.11
## 161 31.57
## 162 26.2
## 163 30.59
## 164 32.8
## 165 18.05
## 166 39.33
## 167 32.23
## 168 24.035
## 169 36.08
## 170 22.3
## 171 26.4
## 172 31.8
## 173 26.73
## 174 23.1
## 175 23.21
## 176 33.7
## 177 33.25
## 178 24.64
## 179 33.88
## 180 38.06
## 181 41.91
## 182 31.635
## 183 36.195
## 184 17.8
## 185 24.51
## 186 22.22
## 187 38.39
## 188 29.07
## 189 22.135
## 190 26.8
## 191 30.02
## 192 35.86
## 193 20.9
## 194 17.29
## 195 34.21
## 196 25.365
## 197 40.15
## 198 24.415
## 199 25.2
## 200 26.84
## 201 24.32
## 202 42.35
## 203 19.8
## 204 32.395
## 205 30.2
## 206
## 207 34.2
## 208 27.455
## 209 27.55
## 210 20.615
## 211 24.3
## 212 31.79
## 213 21.56
## 214 28.12
## 215 40.565
## 216 27.645
## 217 31.2
## 218 26.62
## 219 36.765
## 220 33.4
## 221 45.54
## 222 28.82
## 223 22.99
## 224 27.7
## 225 34.39
## 226 22.61
## 227 37.51
## 228 38
## 229 33.33
## 230 34.865
## 231 33.06
## 232 35.97
## 233 31.4
## 234 25.27
## 235 40.945
## 236 34.105
## 237 36.48
## 238 33.8
## 239 36.7
## 240 36.385
## 241 34.5
## 242 32.3
## 243 27.6
## 244 29.26
## 245 35.75
## 246 23.18
## 247 25.6
## 248 35.245
## 249 43.89
## 250 20.79
## 251 30.5
## 252 21.7
## 253 21.89
## 254 null
## 255 &&
## 256 32.015
## 257 30.4
## 258 21.09
## 259 22.23
## 260 32.9
## 261 24.89
## 262 31.46
## 263 17.955
## 264 30.685
## 265 43.34
## 266 39.05
## 267 31.445
## 268 19.855
## 269 31.02
## 270 20.6
## 271 47.52
## 272 20.4
## 273 38.38
## 274 24.31
## 275 23.6
## 276 21.12
## 277 30.03
## 278 17.48
## 279 20.235
## 280 17.195
## 281 23.9
## 282 35.15
## 283 35.64
## 284 22.6
## 285 39.16
## 286 27.265
## 287 29.165
## 288 16.815
## 289 33.1
## 290 26.9
## 291 33.11
## 292 31.73
## 293 46.75
## 294 32.68
## 295 33.5
## 296 43.01
## 297 36.52
## 298 26.695
## 299 25.65
## 300 38.6
## 301 29.6
## 302 23.4
## 303 46.53
## 304 30.14
## 305 30
## 306 38.095
## 307 28.38
## 308 28.7
## 309 33.82
## 310 24.09
## 311 32.67
## 312 25.1
## 313 32.56
## 314 41.325
## 315 34.3
## 316 31.065
## 317 21.47
## 318 25.08
## 319 43.4
## 320 25.7
## 321 27.93
## 322 39.2
## 323 26.03
## 324 30.25
## 325 28.93
## 326 35.7
## 327 35.31
## 328 31
## 329 44.22
## 330 26.07
## 331 25.8
## 332 39.425
## 333 40.48
## 334 38.9
## 335 47.41
## 336 30.21
## 337 35.435
## 338 46.7
## 339 46.2
## 340 21.4
## 341 23.8
## 342 44.77
## 343 32.12
## 344 29.1
## 345 37.29
## 346 43.12
## 347 36.86
## 348 34.295
## 349 23.465
## 350 45.43
## 351 23.65
## 352 28.27
## 353 35.91
## 354 29
## 355 19.57
## 356 21.85
## 357 40.26
## 358 33.725
## 359 29.48
## 360 32.6
## 361 37.525
## 362 23.655
## 363 37.8
## 364 29.37
## 365 19
## 366 21.3
## 367 42.46
## 368 38.95
## 369 36.1
## 370 38.19
## 371 42.4
## 372 34.96
## 373 42.68
## 374 31.13
## 375 31.54
## 376 29.81
## 377 21.375
## 378 40.81
## 379 17.4
## 380 20.3
## 381 26.125
## 382 41.69
## 383 24.1
## 384 36.2
## 385 40.185
## 386 39.27
## 387 34.87
## 388 44.745
## 389 29.545
## 390 23.54
## 391 40.66
## 392 36.6
## 393 35.4
## 394 27.075
## 395 21.755
## 396 40.28
## 397 23.7
## 398 35.5
## 399 29.15
## 400 27
## 401 37.905
## 402 22.77
## 403 22.8
## 404 38.17
## 405 34.58
## 406 27.1
## 407 39.7
## 408 19.475
## 409 26.7
## 410 34.32
## 411 24.4
## 412 41.14
## 413 22.515
## 414 41.8
## 415 26.18
## 416 42.24
## 417 26.51
## 418 35.815
## 419 41.42
## 420 36.575
## 421 42.94
## 422 21.01
## 423 24.225
## 424 17.67
## 425 31.5
## 426 31.1
## 427 32.78
## 428 32.45
## 429 50.38
## 430 47.6
## 431 33.535
## 432 25.4
## 433 29.9
## 434 43.7
## 435 24.86
## 436 30.1
## 437 28.8
## 438 29.5
## 439 39.5
## 440 29.04
## 441 38.94
## 442 44
## 443 20.045
## 444 40.92
## 445 35.1
## 446 29.355
## 447 32.585
## 448 32.34
## 449 39.8
## 450 24.605
## 451 33.99
## 452 28.2
## 453 25
## 454 33.2
## 455 23.2
## 456 20.1
## 457 32.5
## 458 37.18
## 459 46.09
## 460 39.93
## 461 35.8
## 462 31.255
## 463 18.335
## 464 42.9
## 465 28.405
## 466 26.79
## 467 39.615
## 468 25.9
## 469 25.745
## 470 28.16
## 471 23.56
## 472 40.5
## 473 35.42
## 474 39.995
## 475 34.675
## 476 20.52
## 477 23.275
## 478 36.29
## 479 32.7
## 480 19.19
## 481 24.985
## 482 20.13
## 483 23.32
## 484 45.32
## 485 34.6
## 486 18.715
## 487 21.565
## 488 23
## 489 37.07
## 490 52.58
## 491 42.655
## 492 21.66
## 493 32
## 494 18.3
## 495 47.74
## 496 22.1
## 497 19.095
## 498 31.24
## 499 29.925
## 500 20.35
## 501 25.85
## 502 42.75
## 503 18.6
## 504 23.87
## 505 45.9
## 506 21.5
## 507 30.305
## 508 44.88
## 509 41.1
## 510 40.37
## 511 28.49
## 512 33.55
## 513 40.375
## 514 27.28
## 515 17.86
## 516 33.3
## 517 39.14
## 518 21.945
## 519 24.97
## 520 23.94
## 521 34.485
## 522 21.8
## 523 23.3
## 524 36.96
## 525 21.28
## 526 29.4
## 527 27.3
## 528 29.3
## 529 37.9
## 530 37.715
## 531 23.76
## 532 25.52
## 533 27.61
## 534 27.06
## 535 39.4
## 536 34.9
## 537 22
## 538 30.36
## 539 27.8
## 540 53.13
## 541 39.71
## 542 32.87
## 543 44.7
## 544 30.97
data %>% distinct(hijos)
## hijos
## 1 0
## 2 1
## 3 3
## 4 2
## 5 5
## 6 4
data %>% distinct(fumador)
## fumador
## 1 yes
## 2 no
data %>% distinct(region)
## region
## 1 suroeste
## 2 sureste
## 3 noroeste
## 4 noreste
data %>% distinct(clm)
## clm
## 1 16884.924
## 2 1725.5523
## 3 4449.462
## 4 21984.47061
## 5 3866.8552
## 6 3756.6216
## 7 8240.5896
## 8 7281.5056
## 9 6406.4107
## 10 28923.13692
## 11 2721.3208
## 12 27808.7251
## 13 1826.843
## 14 11090.7178
## 15 39611.7577
## 16 1837.237
## 17 10797.3362
## 18 2395.17155
## 19 10602.385
## 20 36837.467
## 21 13228.84695
## 22 4149.736
## 23 1137.011
## 24 37701.8768
## 25 6203.90175
## 26 14001.1338
## 27 14451.83515
## 28 12268.63225
## 29 2775.19215
## 30 38711
## 31 35585.576
## 32 2198.18985
## 33 4687.797
## 34 13770.0979
## 35 51194.55914
## 36 1625.43375
## 37 15612.19335
## 38 2302.3
## 39 39774.2763
## 40 48173.361
## 41 3046.062
## 42 4949.7587
## 43 6272.4772
## 44 6313.759
## 45 6079.6715
## 46 20630.28351
## 47 3393.35635
## 48 3556.9223
## 49 12629.8967
## 50 38709.176
## 51 2211.13075
## 52 3579.8287
## 53 23568.272
## 54 37742.5757
## 55 8059.6791
## 56 47496.49445
## 57 13607.36875
## 58 34303.1672
## 59 23244.7902
## 60 5989.52365
## 61 8606.2174
## 62 4504.6624
## 63 30166.61817
## 64 4133.64165
## 65 14711.7438
## 66 1743.214
## 67 14235.072
## 68 6389.37785
## 69 5920.1041
## 70 17663.1442
## 71 16577.7795
## 72 6799.458
## 73 11741.726
## 74 11946.6259
## 75 7726.854
## 76 11356.6609
## 77 3947.4131
## 78 1532.4697
## 79 2755.02095
## 80 6571.02435
## 81 4441.21315
## 82 7935.29115
## 83 37165.1638
## 84 11033.6617
## 85 39836.519
## 86 21098.55405
## 87 43578.9394
## 88 11073.176
## 89 8026.6666
## 90 11082.5772
## 91 2026.9741
## 92
## 93 30184.9367
## 94 5729.0053
## 95 47291.055
## 96 3766.8838
## 97 12105.32
## 98 10226.2842
## 99 22412.6485
## 100 15820.699
## 101 6186.127
## 102 3645.0894
## 103 21344.8467
## 104 5003.853
## 105 17560.37975
## 106 2331.519
## 107 3877.30425
## 108 2867.1196
## 109 47055.5321
## 110 10825.2537
## 111 11881.358
## 112 4646.759
## 113 2404.7338
## 114 11488.31695
## 115 30259.99556
## 116 11381.3254
## 117 6686.4313
## 118 7740.337
## 119 2257.47525
## 120 39556.4945
## 121 10115.00885
## 122 3385.39915
## 123 17081.08
## 124 9634.538
## 125 32734.1863
## 126 6082.405
## 127 12815.44495
## 128 13616.3586
## 129 11163.568
## 130 2457.21115
## 131 2155.6815
## 132 1261.442
## 133 2045.68525
## 134 27322.73386
## 135 2166.732
## 136 27375.90478
## 137 3490.5491
## 138 18972.495
## 139 18157.876
## 140 20745.9891
## 141 5138.2567
## 142 40720.55105
## 143 9877.6077
## 144 10959.6947
## 145 1842.519
## 146 5125.2157
## 147 7789.635
## 148 6334.34355
## 149 19964.7463
## 150 7077.1894
## 151 6948.7008
## 152 21223.6758
## 153 15518.18025
## 154 36950.2567
## 155 19749.38338
## 156 21348.706
## 157 36149.4835
## 158 10450.552
## 159 5152.134
## 160 5028.1466
## 161 10407.08585
## 162 4830.63
## 163 6128.79745
## 164 2719.27975
## 165 4827.90495
## 166 13405.3903
## 167 8116.68
## 168 1694.7964
## 169 5246.047
## 170 2855.43755
## 171 48824.45
## 172 6455.86265
## 173 10436.096
## 174 8823.279
## 175 8538.28845
## 176 11735.87905
## 177 1631.8212
## 178 4005.4225
## 179 7419.4779
## 180 7731.4271
## 181 43753.33705
## 182 3981.9768
## 183 5325.651
## 184 6775.961
## 185 4922.9159
## 186 12557.6053
## 187 4883.866
## 188 2137.6536
## 189 12044.342
## 190 1137.4697
## 191 1639.5631
## 192 5649.715
## 193 8516.829
## 194 9644.2525
## 195 14901.5167
## 196 2130.6759
## 197 8871.1517
## 198 13012.20865
## 199 37133.8982
## 200 7147.105
## 201 4337.7352
## 202 11743.299
## 203 20984.0936
## 204 13880.949
## 205 6610.1097
## 206 1980.07
## 207 8162.71625
## 208 3537.703
## 209 5002.7827
## 210 8520.026
## 211 7371.772
## 212 10355.641
## 213 2483.736
## 214 3392.9768
## 215 25081.76784
## 216 5012.471
## 217 10564.8845
## 218 5253.524
## 219 34779.615
## 220 19515.5416
## 221 11987.1682
## 222 2689.4954
## 223 24227.33724
## 224 7358.17565
## 225 9225.2564
## 226 7443.64305
## 227 14001.2867
## 228 1727.785
## 229 12333.828
## 230 6710.1919
## 231 19444.2658
## 232 1615.7667
## 233 4463.2051
## 234 17352.6803
## 235 7152.6714
## 236 38511.6283
## 237 5354.07465
## 238 35160.13457
## 239 7196.867
## 240 29523.1656
## 241 24476.47851
## 242 12648.7034
## 243 1986.9334
## 244 1832.094
## 245 4040.55825
## 246 12829.4551
## 247 47305.305
## 248 44260.7499
## 249 4260.744
## 250 41097.16175
## 251 13047.33235
## 252 43921.1837
## 253 5400.9805
## 254 11520.09985
## 255 33750.2918
## 256 11837.16
## 257 17085.2676
## 258 24869.8368
## 259 36219.40545
## 260 20462.99766
## 261 46151.1245
## 262 17179.522
## 263 14590.63205
## 264 7441.053
## 265 9282.4806
## 266 1719.4363
## 267 42856.838
## 268 7265.7025
## 269 9617.66245
## 270 2523.1695
## 271 9715.841
## 272 2803.69785
## 273 2150.469
## 274 12928.7911
## 275 9855.1314
## 276 22331.5668
## 277 48549.17835
## 278 4237.12655
## 279 11879.10405
## 280 9625.92
## 281 7742.1098
## 282 9432.9253
## 283 14256.1928
## 284 47896.79135
## 285 25992.82104
## 286 3172.018
## 287 20277.80751
## 288 42112.2356
## 289 2156.7518
## 290 3906.127
## 291 &&
## 292 16297.846
## 293 21978.6769
## 294 38746.3551
## 295 9249.4952
## 296 6746.7425
## 297 24873.3849
## 298 4349.462
## 299 12646.207
## 300 19442.3535
## 301 20177.67113
## 302 4151.0287
## 303 11944.59435
## 304 7749.1564
## 305 8444.474
## 306 1737.376
## 307 42124.5153
## 308 8124.4084
## 309 34838.873
## 310 9722.7695
## 311 8835.26495
## 312 10435.06525
## 313 7421.19455
## 314 4667.60765
## 315 4894.7533
## 316 24671.66334
## 317 35491.64
## 318 11566.30055
## 319 2866.091
## 320 6600.20595
## 321 3561.8889
## 322 42760.5022
## 323 47928.03
## 324 9144.565
## 325 48517.56315
## 326 24393.6224
## 327 13429.0354
## 328 11658.37915
## 329 19144.57652
## 330 13822.803
## 331 12142.5786
## 332 13937.6665
## 333 41919.097
## 334 8232.6388
## 335 18955.22017
## 336 13352.0998
## 337 13217.0945
## 338 13981.85035
## 339 10977.2063
## 340 6184.2994
## 341 4889.9995
## 342 8334.45755
## 343 5478.0368
## 344 1635.73365
## 345 11830.6072
## 346 8932.084
## 347 3554.203
## 348 12404.8791
## 349 14133.03775
## 350 24603.04837
## 351 8944.1151
## 352 9620.3307
## 353 1837.2819
## 354 1607.5101
## 355 10043.249
## 356 4751.07
## 357 2597.779
## 358 3180.5101
## 359 9778.3472
## 360 13430.265
## 361 8017.06115
## 362 8116.26885
## 363 3481.868
## 364 13415.0381
## 365 12029.2867
## 366 7639.41745
## 367 36085.219
## 368 1391.5287
## 369 18033.9679
## 370 21659.9301
## 371 38126.2465
## 372 16455.70785
## 373 27000.98473
## 374 15006.57945
## 375 42303.69215
## 376 20781.48892
## 377 5846.9176
## 378 8302.53565
## 379 1261.859
## 380 11856.4115
## 381 30284.64294
## 382 3176.8159
## 383 4618.0799
## 384 10736.87075
## 385 2138.0707
## 386 8964.06055
## 387 9290.1395
## 388 9411.005
## 389 7526.70645
## 390 8522.003
## 391 16586.49771
## 392 1631.6683
## 393 9264.797
## 394 8083.9198
## 395 14692.66935
## 396 10269.46
## 397 3260.199
## 398 11396.9002
## 399 4185.0979
## 400 8539.671
## 401 6652.5288
## 402 4074.4537
## 403 1621.3402
## 404 19594.80965
## 405 14455.64405
## 406 5080.096
## 407 2134.9015
## 408 7345.7266
## 409 9140.951
## 410 18608.262
## 411 14418.2804
## 412 28950.4692
## 413 46889.2612
## 414 46599.1084
## 415 39125.33225
## 416 2727.3951
## 417 8968.33
## 418 9788.8659
## 419 6555.07035
## 420 7323.734819
## 421 3167.45585
## 422 18804.7524
## 423 23082.95533
## 424 4906.40965
## 425 5969.723
## 426 12638.195
## 427 4243.59005
## 428 13919.8229
## 429 2254.7967
## 430 5926.846
## 431 12592.5345
## 432 2897.3235
## 433 4738.2682
## 434 37079.372
## 435 1149.3959
## 436 28287.89766
## 437 $$
## 438 7345.084
## 439 12730.9996
## 440 11454.0215
## 441 5910.944
## 442 4762.329
## 443 7512.267
## 444 4032.2407
## 445 1969.614
## 446 1769.53165
## 447 4686.3887
## 448 21797.0004
## 449 11840.77505
## 450 10601.412
## 451 7682.67
## 452 10381.4787
## 453 22144.032
## 454 15230.32405
## 455 11165.41765
## 456 1632.03625
## 457 19521.9682
## 458 13224.693
## 459 12643.3778
## 460 23288.9284
## 461 2201.0971
## 462 2497.0383
## 463 2203.47185
## 464 1744.465
## 465 20878.78443
## 466 25382.297
## 467 28868.6639
## 468 35147.52848
## 469 2534.39375
## 470 1534.3045
## 471 1824.2854
## 472 15555.18875
## 473 9304.7019
## 474 1622.1885
## 475 9880.068
## 476 9563.029
## 477 4347.02335
## 478 12475.3513
## 479 1253.936
## 480 48885.13561
## 481 10461.9794
## 482 1748.774
## 483 24513.09126
## 484 2196.4732
## 485 12574.049
## 486 17942.106
## 487 1967.0227
## 488 4931.647
## 489 8027.968
## 490 8211.1002
## 491 13470.86
## 492 36197.699
## 493 6837.3687
## 494 22218.1149
## 495 5974.3847
## 496 null
## 497 2643.2685
## 498 3077.0955
## 499 3044.2133
## 500 11455.28
## 501 11763.0009
## 502 2498.4144
## 503 9361.3268
## 504 1256.299
## 505 21082.16
## 506 11362.755
## 507 27724.28875
## 508 8413.46305
## 509 5240.765
## 510 25656.57526
## 511 3994.1778
## 512 9866.30485
## 513 5397.6167
## 514 38245.59327
## 515 11482.63485
## 516 24059.68019
## 517 9861.025
## 518 8342.90875
## 519 1708.0014
## 520 48675.5177
## 521 14043.4767
## 522 12925.886
## 523 19214.70553
## 524 13831.1152
## 525 6067.12675
## 526 5972.378
## 527 8825.086
## 528 8233.0975
## 529 27346.04207
## 530 6196.448
## 531 3056.3881
## 532 13887.204
## 533 63770.42801
## 534 10231.4999
## 535 23807.2406
## 536 3268.84665
## 537 11538.421
## 538 3213.62205
## 539 45863.205
## 540 3972.9247
## 541 11187.6567
## 542 17878.90068
## 543 3847.674
## 544 8334.5896
## 545 3935.1799
## 546 39983.42595
## 547 1646.4297
## 548 9193.8385
## 549 10923.9332
## 550 2494.022
## 551 9058.7303
## 552 2801.2588
## 553 2128.43105
## 554 6373.55735
## 555 7256.7231
## 556 11552.904
## 557 45702.02235
## 558 3761.292
## 559 4753.6368
## 560 31620.00106
## 561 13224.05705
## 562 12222.8983
## 563 1664.9996
## 564 58571.07448
## 565 9724.53
## 566 12913.9924
## 567 6356.2707
## 568 17626.23951
## 569 1242.816
## 570 4779.6023
## 571 43943.8761
## 572 13635.6379
## 573 5976.8311
## 574 11842.442
## 575 2566.4707
## 576 15359.1045
## 577 5709.1644
## 578 8823.98575
## 579 7640.3092
## 580 5594.8455
## 581 33471.97189
## 582 1633.0444
## 583 9174.13565
## 584 11070.535
## 585 16085.1275
## 586 17468.9839
## 587 9283.562
## 588 3558.62025
## 589 25678.77845
## 590 4435.0942
## 591 39241.442
## 592 8547.6913
## 593 6571.544
## 594 2207.69745
## 595 6753.038
## 596 1880.07
## 597 42969.8527
## 598 11658.11505
## 599 23306.547
## 600 34439.8559
## 601 10713.644
## 602 3659.346
## 603 40182.246
## 604 9182.17
## 605 34617.84065
## 606 12129.61415
## 607 3736.4647
## 608 6748.5912
## 609 11326.71487
## 610 11365.952
## 611 42983.4585
## 612 10085.846
## 613 1977.815
## 614 3366.6697
## 615 7173.35995
## 616 9391.346
## 617 14410.9321
## 618 24915.04626
## 619 20149.3229
## 620 12949.1554
## 621 6666.243
## 622 32787.45859
## 623 13143.86485
## 624 4466.6214
## 625 18806.14547
## 626 10141.1362
## 627 6123.5688
## 628 8252.2843
## 629 1712.227
## 630 12430.95335
## 631 9800.8882
## 632 10579.711
## 633 8280.6227
## 634 8527.532
## 635 12244.531
## 636 24667.419
## 637 3410.324
## 638 4058.71245
## 639 26392.26029
## 640 14394.39815
## 641 6435.6237
## 642 22192.43711
## 643 5148.5526
## 644 1136.3994
## 645 27037.9141
## 646 42560.4304
## 647 8703.456
## 648 45710.20785
## 649 6500.2359
## 650 4837.5823
## 651 3943.5954
## 652 4399.731
## 653 6185.3208
## 654 46200.9851
## 655 7222.78625
## 656 46130.5265
## 657 12363.547
## 658 10156.7832
## 659 2585.269
## 660 1242.26
## 661 40103.89
## 662 9863.4718
## 663 4766.022
## 664 11244.3769
## 665 7729.64575
## 666 5438.7491
## 667 26236.57997
## 668 34806.4677
## 669 2104.1134
## 670 8068.185
## 671 2362.22905
## 672 2352.96845
## 673 3577.999
## 674 3201.24515
## 675 29186.48236
## 676 40273.6455
## 677 10976.24575
## 678 3500.6123
## 679 2020.5523
## 680 9541.69555
## 681 9504.3103
## 682 5385.3379
## 683 8930.93455
## 684 5375.038
## 685 10264.4421
## 686 6113.23105
## 687 5469.0066
## 688 1727.54
## 689 10107.2206
## 690 8310.83915
## 691 1984.4533
## 692 2457.502
## 693 12146.971
## 694 9566.9909
## 695 13112.6048
## 696 10848.1343
## 697 12231.6136
## 698 9875.6804
## 699 11264.541
## 700 12979.358
## 701 1263.249
## 702 10106.13425
## 703 40932.4295
## 704 6664.68595
## 705 16657.71745
## 706 2217.6012
## 707 6781.3542
## 708 19361.9988
## 709 10065.413
## 710 4234.927
## 711 9447.25035
## 712 14007.222
## 713 40419.0191
## 714 3484.331
## 715 36189.1017
## 716 44585.45587
## 717 8604.48365
## 718 18246.4955
## 719 43254.41795
## 720 3757.8448
## 721 8827.2099
## 722 9910.35985
## 723 11737.84884
## 724 8556.907
## 725 3062.50825
## 726 19539.243
## 727 1906.35825
## 728 14210.53595
## 729 11833.7823
## 730 17128.42608
## 731 5031.26955
## 732 7985.815
## 733 23065.4207
## 734 5428.7277
## 735 36307.7983
## 736 3925.7582
## 737 2416.955
## 738 19040.876
## 739 3070.8087
## 740 11842.62375
## 741 8062.764
## 742 7050.642
## 743 14319.031
## 744 6933.24225
## 745 27941.28758
## 746 11150.78
## 747 12797.20962
## 748 17748.5062
## 749 7261.741
## 750 10560.4917
## 751 6986.697
## 752 7448.40395
## 753 5934.3798
## 754 9869.8102
## 755 18259.216
## 756 1146.7966
## 757 9386.1613
## 758 24520.264
## 759 4350.5144
## 760 6414.178
## 761 12741.16745
## 762 1917.3184
## 763 5209.57885
## 764 13457.9608
## 765 5662.225
## 766 1252.407
## 767 2731.9122
## 768 21195.818
## 769 7209.4918
## 770 18310.742
## 771 4266.1658
## 772 4719.52405
## 773 11848.141
## 774 17904.52705
## 775 7046.7222
## 776 14313.8463
## 777 2103.08
## 778 38792.6856
## 779 1815.8759
## 780 7731.85785
## 781 28476.73499
## 782 2136.88225
## 783 1131.5066
## 784 3309.7926
## 785 9414.92
## 786 6360.9936
## 787 11013.7119
## 788 4428.88785
## 789 5584.3057
## 790 1877.9294
## 791 2842.76075
## 792 3597.596
## 793 23401.30575
## 794 55135.40209
## 795 7445.918
## 796 2680.9493
## 797 1621.8827
## 798 8219.2039
## 799 12523.6048
## 800 16069.08475
## 801 43813.8661
## 802 20773.62775
## 803 39597.4072
## 804 6117.4945
## 805 13393.756
## 806 4719.73655
## 807 11743.9341
## 808 5377.4578
## 809 7160.3303
## 810 11657.7189
## 811 6402.29135
## 812 12622.1795
## 813 1526.312
## 814 12323.936
## 815 27533.9129
## 816 10072.05505
## 817 45008.9555
## 818 9872.701
## 819 2438.0552
## 820 2974.126
## 821 10601.63225
## 822 37270.1512
## 823 14119.62
## 824 42111.6647
## 825 11729.6795
## 826 24106.91255
## 827 1875.344
## 828 40974.1649
## 829 15817.9857
## 830 18218.16139
## 831 10965.446
## 832 46113.511
## 833 7151.092
## 834 12269.68865
## 835 5458.04645
## 836 8782.469
## 837 6600.361
## 838 1141.4451
## 839 11576.13
## 840 13129.60345
## 841 4391.652
## 842 8457.818
## 843 3392.3652
## 844 5966.8874
## 845 6849.026
## 846 8891.1395
## 847 2690.1138
## 848 26140.3603
## 849 6653.7886
## 850 6311.952
## 851 3443.064
## 852 2789.0574
## 853 2585.85065
## 854 46255.1125
## 855 4877.98105
## 856 19719.6947
## 857 27218.43725
## 858 5272.1758
## 859 1682.597
## 860 11945.1327
## 861 29330.98315
## 862 7243.8136
## 863 10422.91665
## 864 44202.6536
## 865 13555.0049
## 866 13063.883
## 867 19798.05455
## 868 2221.56445
## 869 1634.5734
## 870 2117.33885
## 871 8688.85885
## 872 48673.5588
## 873 4661.28635
## 874 8125.7845
## 875 12644.589
## 876 4564.19145
## 877 4846.92015
## 878 7633.7206
## 879 15170.069
## 880 17496.306
## 881 2639.0429
## 882 33732.6867
## 883 14382.70905
## 884 7626.993
## 885 5257.50795
## 886 2473.3341
## 887 21774.32215
## 888 35069.37452
## 889 13041.921
## 890 5245.2269
## 891 13451.122
## 892 13462.52
## 893 5488.262
## 894 4320.41085
## 895 6250.435
## 896 25333.33284
## 897 2913.569
## 898 12032.326
## 899 13470.8044
## 900 6289.7549
## 901 2927.0647
## 902 6238.298
## 903 10096.97
## 904 7348.142
## 905 4673.3922
## 906 12233.828
## 907 32108.66282
## 908 8965.79575
## 909 2304.0022
## 910 9487.6442
## 911 1121.8739
## 912 9549.5651
## 913 2217.46915
## 914 1628.4709
## 915 12982.8747
## 916 11674.13
## 917 7160.094
## 918 39047.285
## 919 6358.77645
## 920 19933.458
## 921 11534.87265
## 922 47462.894
## 923 4527.18295
## 924 38998.546
## 925 20009.63365
## 926 3875.7341
## 927 41999.52
## 928 12609.88702
## 929 41034.2214
## 930 28468.91901
## 931 2730.10785
## 932 3353.284
## 933 14474.675
## 934 9500.57305
## 935 26467.09737
## 936 4746.344
## 937 23967.38305
## 938 7518.02535
## 939 3279.86855
## 940 8596.8278
## 941 10702.6424
## 942 4992.3764
## 943 2527.81865
## 944 1759.338
## 945 2322.6218
## 946 16138.76205
## 947 7804.1605
## 948 2902.9065
## 949 9704.66805
## 950 4889.0368
## 951 25517.11363
## 952 4500.33925
## 953 19199.944
## 954 16796.41194
## 955 4915.05985
## 956 7624.63
## 957 8410.04685
## 958 28340.18885
## 959 4518.82625
## 960 14571.8908
## 961 3378.91
## 962 7144.86265
## 963 10118.424
## 964 5484.4673
## 965 16420.49455
## 966 7986.47525
## 967 7418.522
## 968 13887.9685
## 969 6551.7501
## 970 5267.81815
## 971 17361.7661
## 972 34472.841
## 973 1972.95
## 974 21232.18226
## 975 8627.5411
## 976 4433.3877
## 977 4438.2634
## 978 24915.22085
## 979 23241.47453
## 980 9957.7216
## 981 8269.044
## 982 18767.7377
## 983 36580.28216
## 984 8765.249
## 985 5383.536
## 986 12124.9924
## 987 2709.24395
## 988 3987.926
## 989 12495.29085
## 990 26018.95052
## 991 8798.593
## 992 35595.5898
## 993 42211.1382
## 994 1711.0268
## 995 8569.8618
## 996 2020.177
## 997 16450.8947
## 998 21595.38229
## 999 9850.432
## 1000 6877.9801
## 1001 21677.28345
## 1002 44423.803
## 1003 4137.5227
## 1004 13747.87235
## 1005 12950.0712
## 1006 12094.478
## 1007 37484.4493
## 1008 39725.51805
## 1009 2250.8352
## 1010 22493.65964
## 1011 20234.85475
## 1012 1704.70015
## 1013 33475.81715
## 1014 3161.454
## 1015 11394.06555
## 1016 21880.82
## 1017 7325.0482
## 1018 44501.3982
## 1019 3594.17085
## 1020 39727.614
## 1021 8023.13545
## 1022 14394.5579
## 1023 9288.0267
## 1024 25309.489
## 1025 3353.4703
## 1026 10594.50155
## 1027 8277.523
## 1028 17929.30337
## 1029 2480.9791
## 1030 4462.7218
## 1031 1981.5819
## 1032 11554.2236
## 1033 48970.2476
## 1034 6548.19505
## 1035 5708.867
## 1036 7045.499
## 1037 8978.1851
## 1038 5757.41345
## 1039 39871.7043
## 1040 13974.45555
## 1041 1909.52745
## 1042 12096.6512
## 1043 13204.28565
## 1044 4562.8421
## 1045 2102.2647
## 1046 34672.1472
## 1047 15161.5344
## 1048 11884.04858
## 1049 4454.40265
## 1050 5855.9025
## 1051 4076.497
## 1052 15019.76005
## 1053 19023.26
## 1054 10796.35025
## 1055 11353.2276
## 1056 9748.9106
## 1057 10577.087
## 1058 41676.0811
## 1059 11286.5387
## 1060 3591.48
## 1061 11299.343
## 1062 4561.1885
## 1063 44641.1974
## 1064 1674.6323
## 1065 23045.56616
## 1066 3227.1211
## 1067 16776.30405
## 1068 11253.421
## 1069 3471.4096
## 1070 11363.2832
## 1071 20420.60465
## 1072 8988.15875
## 1073 10493.9458
## 1074 2904.088
## 1075 8605.3615
## 1076 11512.405
## 1077 41949.2441
## 1078 24180.9335
## 1079 5312.16985
## 1080 2396.0959
## 1081 10807.4863
## 1082 9222.4026
## 1083 36124.5737
## 1084 38282.7495
## 1085 5693.4305
## 1086 34166.273
## 1087 8347.1643
## 1088 46661.4424
## 1089 18903.49141
## 1090 40904.1995
## 1091 14254.6082
## 1092 10214.636
## 1093 5836.5204
## 1094 14358.36437
## 1095 1728.897
## 1096 8582.3023
## 1097 3693.428
## 1098 20709.02034
## 1099 9991.03765
## 1100 19673.33573
## 1101 11085.5868
## 1102 7623.518
## 1103 3176.2877
## 1104 3704.3545
## 1105 36898.73308
## 1106 9048.0273
## 1107 7954.517
## 1108 27117.99378
## 1109 6338.0756
## 1110 9630.397
## 1111 11289.10925
## 1112 52590.82939
## 1113 2261.5688
## 1114 10791.96
## 1115 5979.731
## 1116 2203.73595
## 1117 12235.8392
## 1118 40941.2854
## 1119 5630.45785
## 1120 11015.1747
## 1121 7228.21565
## 1122 39722.7462
## 1123 14426.07385
## 1124 2459.7201
## 1125 3989.841
## 1126 7727.2532
## 1127 5124.1887
## 1128 18963.17192
## 1129 2200.83085
## 1130 7153.5539
## 1131 5227.98875
## 1132 10982.5013
## 1133 4529.477
## 1134 4670.64
## 1135 6112.35295
## 1136 17178.6824
## 1137 22478.6
## 1138 11093.6229
## 1139 6457.8434
## 1140 4433.9159
## 1141 2154.361
## 1142 23887.6627
## 1143 6496.886
## 1144 2899.48935
## 1145 19350.3689
## 1146 7650.77375
## 1147 2850.68375
## 1148 2632.992
## 1149 9447.3824
## 1150 18328.2381
## 1151 8603.8234
## 1152 37465.34375
## 1153 13844.7972
## 1154 21771.3423
## 1155 13126.67745
## 1156 5327.40025
## 1157 13725.47184
## 1158 13019.16105
## 1159 8671.19125
## 1160 4134.08245
## 1161 18838.70366
## 1162 33307.5508
## 1163 5699.8375
## 1164 6393.60345
## 1165 4934.705
## 1166 6198.7518
## 1167 8733.22925
## 1168 2055.3249
## 1169 9964.06
## 1170 18223.4512
## 1171 5116.5004
## 1172 36910.60803
## 1173 38415.474
## 1174 20296.86345
## 1175 12347.172
## 1176 5373.36425
## 1177 23563.01618
## 1178 1702.4553
## 1179 10806.839
## 1180 3956.07145
## 1181 12890.05765
## 1182 5415.6612
## 1183 4058.1161
## 1184 41661.602
## 1185 7537.1639
## 1186 4718.20355
## 1187 6593.5083
## 1188 8442.667
## 1189 26125.67477
## 1190 6858.4796
## 1191 4795.6568
## 1192 6640.54485
## 1193 7162.0122
## 1194 10594.2257
## 1195 11938.25595
## 1196 60021.39897
## 1197 20167.33603
## 1198 12479.70895
## 1199 11345.519
## 1200 8515.7587
## 1201 2699.56835
## 1202 14449.8544
## 1203 12224.35085
## 1204 6985.50695
## 1205 3238.4357
## 1206 47269.854
## 1207 49577.6624
## 1208 4296.2712
## 1209 1135.9407
## 1210 5615.369
## 1211 9101.798
## 1212 6059.173
## 1213 1633.9618
## 1214 37607.5277
## 1215 18648.4217
## 1216 1241.565
## 1217 16232.847
## 1218 15828.82173
## 1219 4415.1588
## 1220 6474.013
## 1221 11436.73815
## 1222 11305.93455
## 1223 10197.7722
## 1224 4544.2348
## 1225 3277.161
## 1226 6770.1925
## 1227 7337.748
## 1228 10370.91255
## 1229 26926.5144
## 1230 10704.47
## 1231 34254.05335
## 1232 1880.487
## 1233 8615.3
## 1234 3292.52985
## 1235 3021.80915
## 1236 14478.33015
## 1237 4747.0529
## 1238 17043.3414
## 1239 10959.33
## 1240 4357.04365
## 1241 22462.04375
## 1242 4189.1131
## 1243 8283.6807
## 1244 24535.69855
## 1245 14283.4594
## 1246 1720.3537
## 1247 47403.88
## 1248 8534.6718
## 1249 3732.6251
## 1250 5472.449
## 1251 38344.566
## 1252 7147.4728
## 1253 7133.9025
## 1254 34828.654
## 1255 1515.3449
## 1256 9301.89355
## 1257 11931.12525
## 1258 1964.78
## 1259 1708.92575
## 1260 4340.4409
## 1261 5261.46945
## 1262 2710.82855
## 1263 62592.87309
## 1264 46718.16325
## 1265 3208.787
## 1266 37829.7242
## 1267 21259.37795
## 1268 2464.6188
## 1269 16115.3045
## 1270 21472.4788
## 1271 33900.653
## 1272 6875.961
## 1273 6940.90985
## 1274 4571.41305
## 1275 4536.259
## 1276 36397.576
## 1277 18765.87545
## 1278 11272.33139
## 1279 1731.677
## 1280 1163.4627
## 1281 19496.71917
## 1282 7201.70085
## 1283 5425.02335
## 1284 28101.33305
## 1285 12981.3457
## 1286 43896.3763
## 1287 4239.89265
## 1288 13143.33665
## 1289 7050.0213
## 1290 9377.9047
## 1291 22395.74424
## 1292 10325.206
## 1293 12629.1656
## 1294 10795.93733
## 1295 11411.685
## 1296 10600.5483
## 1297 2205.9808
## 1298 1629.8335
## 1299 2007.945
## 1300 29141.3603
Se presentan valores como ““,”$$“,”&&“,”null” Vamos a tomar ventaja de que los datos son de tipo carácter para hacer la sustitución de los valores con NA desde el importe de la base
data <- read.csv('/var/home/diegob/Documents/Computer Science/R code/data_prac_2 1.csv', na.strings = c("", "$$", "&&", "null", stringsAsFactor = FALSE))
data <- data %>% select(-X)
head(data, 10)
## edad sexo imc hijos fumador region clm
## 1 19 femenino 27.900 0 yes suroeste 16884.924
## 2 18 masculino 33.770 1 no sureste 1725.552
## 3 28 masculino 33.000 3 no sureste 4449.462
## 4 33 masculino 22.705 0 no noroeste 21984.471
## 5 NA masculino 28.880 0 no noroeste 3866.855
## 6 31 femenino 25.740 0 no sureste 3756.622
## 7 46 femenino 33.440 1 no sureste 8240.590
## 8 37 femenino 27.740 3 no noroeste 7281.506
## 9 37 masculino 29.830 2 no noreste 6406.411
## 10 60 femenino 25.840 0 no noroeste 28923.137
Ahora vamos a verificar los tipos de datos
sapply(data, class)
## edad sexo imc hijos fumador region
## "integer" "character" "numeric" "integer" "character" "character"
## clm
## "numeric"
Listo, tenemos los tipos de datos adecuados
Verificando cuántos valores faltantes hay
colSums(is.na(data))
## edad sexo imc hijos fumador region clm
## 72 0 39 0 0 0 41
Sólamente nos preocuparemos para las variables numéricas, reemplazándolas por alguna técnica vista en clase
data %>% janitor::get_dupes()
## No variable names specified - using all columns.
## edad sexo imc hijos fumador region clm dupe_count
## 1 19 masculino 30.59 0 no noroeste 1639.563 2
## 2 19 masculino 30.59 0 no noroeste 1639.563 2
Existen valores duplicados, sin embargo, las variables disponibles en el base, puede que no sirvan como identificador porque puede haber relaciones uno a muchos, propondría agregar una variable más, por ejemplo un número de póliza, o usando las variables existentes una combinación de imc y clm como columna identificadora
# quitando duplicados
data <- unique(data)
head(data, 10)
## edad sexo imc hijos fumador region clm
## 1 19 femenino 27.900 0 yes suroeste 16884.924
## 2 18 masculino 33.770 1 no sureste 1725.552
## 3 28 masculino 33.000 3 no sureste 4449.462
## 4 33 masculino 22.705 0 no noroeste 21984.471
## 5 NA masculino 28.880 0 no noroeste 3866.855
## 6 31 femenino 25.740 0 no sureste 3756.622
## 7 46 femenino 33.440 1 no sureste 8240.590
## 8 37 femenino 27.740 3 no noroeste 7281.506
## 9 37 masculino 29.830 2 no noreste 6406.411
## 10 60 femenino 25.840 0 no noroeste 28923.137
data_numeric <- data %>% select(where(is.numeric))
head(data_numeric,10)
## edad imc hijos clm
## 1 19 27.900 0 16884.924
## 2 18 33.770 1 1725.552
## 3 28 33.000 3 4449.462
## 4 33 22.705 0 21984.471
## 5 NA 28.880 0 3866.855
## 6 31 25.740 0 3756.622
## 7 46 33.440 1 8240.590
## 8 37 27.740 3 7281.506
## 9 37 29.830 2 6406.411
## 10 60 25.840 0 28923.137
data_mean <- data.frame(lapply(data_numeric, function (x) ifelse(is.na(x), mean(x, na.rm=TRUE), x)))
summary(data_mean)
## edad imc hijos clm
## Min. :18.00 Min. :15.96 Min. :0.000 Min. : 1122
## 1st Qu.:27.00 1st Qu.:26.40 1st Qu.:0.000 1st Qu.: 4878
## Median :39.24 Median :30.59 Median :1.000 Median : 9705
## Mean :39.24 Mean :30.62 Mean :1.096 Mean :13287
## 3rd Qu.:51.00 3rd Qu.:34.40 3rd Qu.:2.000 3rd Qu.:16115
## Max. :64.00 Max. :53.13 Max. :5.000 Max. :63770
data_median <- data.frame(lapply(data_numeric, function (x) ifelse(is.na(x), median(x, na.rm=TRUE), x)))
summary(data_median)
## edad imc hijos clm
## Min. :18.00 Min. :15.96 Min. :0.000 Min. : 1122
## 1st Qu.:27.00 1st Qu.:26.40 1st Qu.:0.000 1st Qu.: 4878
## Median :39.00 Median :30.30 Median :1.000 Median : 9382
## Mean :39.23 Mean :30.61 Mean :1.096 Mean :13167
## 3rd Qu.:51.00 3rd Qu.:34.40 3rd Qu.:2.000 3rd Qu.:16115
## Max. :64.00 Max. :53.13 Max. :5.000 Max. :63770
data_median_cut <- data.frame(lapply(data_numeric, function (x) ifelse(is.na(x), mean(x, na.rm=TRUE, trim = 0.2), x)))
summary(data_median_cut)
## edad imc hijos clm
## Min. :18.00 Min. :15.96 Min. :0.000 Min. : 1122
## 1st Qu.:27.00 1st Qu.:26.40 1st Qu.:0.000 1st Qu.: 4878
## Median :39.08 Median :30.41 Median :1.000 Median : 9705
## Mean :39.23 Mean :30.61 Mean :1.096 Mean :13183
## 3rd Qu.:51.00 3rd Qu.:34.40 3rd Qu.:2.000 3rd Qu.:16115
## Max. :64.00 Max. :53.13 Max. :5.000 Max. :63770
data_mode <- data.frame(lapply(data_numeric, function (x) ifelse(is.na(x), mfv(x, na_rm=TRUE, trim = 0.1), x)))
summary(data_mode)
## edad imc hijos clm
## Min. :18.00 Min. :15.96 Min. :0.000 Min. : 1122
## 1st Qu.:25.00 1st Qu.:26.40 1st Qu.:0.000 1st Qu.: 4762
## Median :38.00 Median :30.59 Median :1.000 Median : 9305
## Mean :38.13 Mean :30.66 Mean :1.096 Mean :13254
## 3rd Qu.:51.00 3rd Qu.:34.40 3rd Qu.:2.000 3rd Qu.:16586
## Max. :64.00 Max. :53.13 Max. :5.000 Max. :63770
data_interpol <- data.frame(lapply(data_numeric, function (x) ifelse(is.na(x), na.approx(x, na.rm=T), x)))
summary(data_interpol)
## edad imc hijos clm
## Min. :18.00 Min. :15.96 Min. :0.000 Min. : 1122
## 1st Qu.:27.00 1st Qu.:26.22 1st Qu.:0.000 1st Qu.: 4796
## Median :39.00 Median :30.40 Median :1.000 Median : 9411
## Mean :39.29 Mean :30.62 Mean :1.096 Mean :13274
## 3rd Qu.:51.00 3rd Qu.:34.58 3rd Qu.:2.000 3rd Qu.:16819
## Max. :64.00 Max. :53.13 Max. :5.000 Max. :63770
La mediana podría ser candidata como técnica de reemplazo porque no difiere mucho de los datos originales
ggplot(data_interpol, aes(x = edad)) +
geom_histogram(fill = "blue", color = "black") +
labs(title = "Edad de los asegurados", x="Edad", y="Frecuencia")+
theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data_interpol, aes(x = imc)) +
geom_histogram(fill = "blue", color = "black") +
labs(title = "IMC de los asegurados", x="IMC", y="Frecuencia")+
theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data_interpol, aes(x = hijos)) +
geom_histogram(fill = "blue", color = "black") +
labs(title = "Número de hijos", x="Cantidad de hijos", y="Frecuencia")+
theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data_interpol, aes(x = clm))+
geom_histogram(fill = "blue", color = "black") +
labs(title = "Reclamos ", x="cantidad de reclamo", y="Frecuencia")+
theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
char_data <- data %>% select(sexo, fumador, region)
data_interpol_all <- cbind(data_interpol, char_data) # agregando las demás columnas
mean_sex <- data_interpol_all %>%
group_by(sexo) %>%
summarise(claim_amount = mean(clm))
mean_sex
## # A tibble: 2 × 2
## sexo claim_amount
## <chr> <dbl>
## 1 femenino 12517.
## 2 masculino 14017.
mean_sex_smk <- data_interpol_all %>%
group_by(fumador, sexo) %>%
summarise(claim_amount = mean(clm))
## `summarise()` has grouped output by 'fumador'. You can override using the
## `.groups` argument.
mean_sex_smk
## # A tibble: 4 × 3
## # Groups: fumador [2]
## fumador sexo claim_amount
## <chr> <chr> <dbl>
## 1 no femenino 8935.
## 2 no masculino 8206.
## 3 yes femenino 29556.
## 4 yes masculino 32874.
mean_region <- data_interpol_all %>%
group_by(region) %>%
summarise(claim_amount = mean(clm)) %>%
arrange(desc(claim_amount))
mean_region
## # A tibble: 4 × 2
## region claim_amount
## <chr> <dbl>
## 1 sureste 14588.
## 2 noreste 13577.
## 3 noroeste 12418.
## 4 suroeste 12354.
data_interpol_label <- data_interpol_all %>%
mutate(clasificacion = ifelse(imc > 30, "obesidad", ""))
head(data_interpol_label,10)
## edad imc hijos clm sexo fumador region clasificacion
## 1 19 27.900 0 16884.924 femenino yes suroeste
## 2 18 33.770 1 1725.552 masculino no sureste obesidad
## 3 28 33.000 3 4449.462 masculino no sureste obesidad
## 4 33 22.705 0 21984.471 masculino no noroeste
## 5 32 28.880 0 3866.855 masculino no noroeste
## 6 31 25.740 0 3756.622 femenino no sureste
## 7 46 33.440 1 8240.590 femenino no sureste obesidad
## 8 37 27.740 3 7281.506 femenino no noroeste
## 9 37 29.830 2 6406.411 masculino no noreste
## 10 60 25.840 0 28923.137 femenino no noroeste
obesas <- filter(data_interpol_label, data_interpol_label$clasificacion=='obesidad') %>% arrange(desc(imc))
head(obesas, 10)
## edad imc hijos clm sexo fumador region clasificacion
## 1 18 53.13 0 1163.463 masculino no sureste obesidad
## 2 22 52.58 1 44501.398 masculino yes sureste obesidad
## 3 23 50.38 1 2438.055 masculino no sureste obesidad
## 4 58 49.06 0 11381.325 masculino no sureste obesidad
## 5 52 47.74 1 9748.911 masculino no sureste obesidad
## 6 37 47.60 2 46113.511 femenino yes suroeste obesidad
## 7 47 47.52 1 8083.920 masculino no sureste obesidad
## 8 54 47.41 0 63770.428 femenino yes sureste obesidad
## 9 52 46.75 5 12592.534 femenino no sureste obesidad
## 10 54 46.70 2 11538.421 femenino no suroeste obesidad