### Import the data in to R
library(readxl)
BreastCancer <- read_excel("C:/Users/Dhruva/Desktop/GRAD 699/Assignments/BC.xlsx")
### Check for count of missing data
sum(is.na(BreastCancer))
## [1] 0
### Percent of missingness per variable in dataset
(colMeans(is.na(BreastCancer)))*100
## Age BMI Glucose Insulin HOMA
## 0 0 0 0 0
## Leptin Adiponectin Resistin MCP.1 Classification
## 0 0 0 0 0
There are no missing values in the breast cancer dataset.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(reshape2)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
### Plot for number of missing values
missing.values <- BreastCancer %>%
gather(key = "key", value = "val") %>%
mutate(is.missing = is.na(val)) %>%
group_by(key, is.missing) %>%
summarise(num.missing = n()) %>%
filter(is.missing==T) %>%
select(-is.missing) %>%
arrange(desc(num.missing))
missing.values
## # A tibble: 0 x 2
## # Groups: key [0]
## # ... with 2 variables: key <chr>, num.missing <int>
missing.values %>%
ggplot() +
geom_bar(aes(x=key, y=num.missing), stat = 'identity') +
labs(x='variable', y="number of missing values", title='Number of missing values') +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Plot for number of missing values was constructed for vizualation but it is empty due to none missing values.
### Since there are no missing values in the dataset, paterns are not observed.
library(BaylorEdPsych)
library(mvnmle)
LittleMCAR(BreastCancer)
## this could take a while
## $chi.square
## [1] 6.737668e-20
##
## $df
## [1] 0
##
## $p.value
## [1] 0
##
## $missing.patterns
## [1] 1
##
## $amount.missing
## Age BMI Glucose Insulin HOMA Leptin Adiponectin Resistin
## Number Missing 0 0 0 0 0 0 0 0
## Percent Missing 0 0 0 0 0 0 0 0
## MCP.1 Classification
## Number Missing 0 0
## Percent Missing 0 0
##
## $data
## $data$DataSet1
## Age BMI Glucose Insulin HOMA Leptin Adiponectin Resistin
## 1 48 23.50000 70 2.707 0.4674087 8.8071 9.702400 7.99585
## 2 83 20.69049 92 3.115 0.7068973 8.8438 5.429285 4.06405
## 3 82 23.12467 91 4.498 1.0096511 17.9393 22.432040 9.27715
## 4 68 21.36752 77 3.226 0.6127249 9.8827 7.169560 12.76600
## 5 86 21.11111 92 3.549 0.8053864 6.6994 4.819240 10.57635
## 6 49 22.85446 92 3.226 0.7320869 6.8317 13.679750 10.31760
## 7 89 22.70000 77 4.690 0.8907873 6.9640 5.589865 12.93610
## 8 76 23.80000 118 6.470 1.8832013 4.3110 13.251320 5.10420
## 9 73 22.00000 97 3.350 0.8015433 4.4700 10.358725 6.28445
## 10 75 23.00000 83 4.952 1.0138395 17.1270 11.578990 7.09130
## 11 34 21.47000 78 3.469 0.6674356 14.5700 13.110000 6.92000
## 12 29 23.01000 82 5.663 1.1454361 35.5900 26.720000 4.58000
## 13 25 22.86000 82 4.090 0.8272707 20.4500 23.670000 5.14000
## 14 24 18.67000 88 6.107 1.3300000 8.8800 36.060000 6.85000
## 15 38 23.34000 75 5.782 1.0696700 15.2600 17.950000 9.35000
## 16 44 20.76000 86 7.553 1.6000000 14.0900 20.320000 7.64000
## 17 47 22.03000 84 2.869 0.5900000 26.6500 38.040000 3.32000
## 18 61 32.03896 85 18.077 3.7901443 30.7729 7.780255 13.68392
## 19 64 34.52972 95 4.427 1.0373937 21.2117 5.462620 6.70188
## 20 32 36.51264 87 14.026 3.0099796 49.3727 5.100000 17.10223
## 21 36 28.57668 86 4.345 0.9217193 15.1248 8.600000 9.15390
## 22 34 31.97501 87 4.530 0.9721380 28.7502 7.642760 5.62592
## 23 29 32.27079 84 5.810 1.2038320 45.6196 6.209635 24.60330
## 24 35 30.27682 84 4.376 0.9067072 39.2134 9.048185 16.43706
## 25 54 30.48316 90 5.537 1.2292140 12.3310 9.731380 10.19299
## 26 45 37.03561 83 6.760 1.3839973 39.9802 4.617125 8.70448
## 27 50 38.57876 106 6.703 1.7526111 46.6401 4.667645 11.78388
## 28 66 31.44654 90 9.245 2.0523900 45.9624 10.355260 23.38190
## 29 35 35.25076 90 6.817 1.5133740 50.6094 6.966895 22.03703
## 30 36 34.17489 80 6.590 1.3004267 10.2809 5.065915 15.72187
## 31 66 36.21228 101 15.533 3.8697881 74.7069 7.539550 22.32024
## 32 53 36.79017 101 10.175 2.5349317 27.1841 20.030000 10.26309
## 33 28 35.85581 87 8.576 1.8404096 68.5102 4.794200 21.44366
## 34 43 34.42217 89 23.194 5.0918561 31.2128 8.300955 6.71026
## 35 51 27.68878 77 3.855 0.7321930 20.0920 3.192090 10.37518
## 36 67 29.60677 79 5.819 1.1339291 21.9033 2.194280 4.20750
## 37 66 31.23859 82 4.181 0.8456769 16.2247 4.267105 3.29175
## 38 69 35.09270 101 5.646 1.4066068 83.4821 6.796985 82.10000
## 39 60 26.34929 103 5.138 1.3053945 24.2998 2.194280 20.25350
## 40 77 35.58793 76 3.881 0.7275581 21.7863 8.125550 17.26150
## 41 76 29.21841 83 5.376 1.1006464 28.5620 7.369960 8.04375
## 42 76 27.20000 94 14.070 3.2623640 35.8910 9.346630 8.41560
## 43 75 27.30000 85 5.197 1.0896377 10.3900 9.000805 7.57670
## 44 69 32.50000 93 5.430 1.2456420 15.1450 11.787960 11.78796
## 45 71 30.30000 102 8.340 2.0983440 56.5020 8.130000 4.29890
## 46 66 27.70000 90 6.042 1.3413240 24.8460 7.652055 6.70520
## 47 75 25.70000 94 8.079 1.8732508 65.9260 3.741220 4.49685
## 48 78 25.30000 60 3.508 0.5191840 6.6330 10.567295 4.66380
## 49 69 29.40000 89 10.704 2.3498848 45.2720 8.286300 4.53000
## 50 85 26.60000 96 4.462 1.0566016 7.8500 7.931700 9.61350
## 51 76 27.10000 110 26.211 7.1119180 21.7780 4.935635 8.49395
## 52 77 25.90000 85 4.580 0.9602733 13.7400 9.753260 11.77400
## 53 45 21.30395 102 13.852 3.4851632 7.6476 21.056625 23.03408
## 54 45 20.83000 74 4.560 0.8323520 7.7529 8.237405 28.03230
## 55 49 20.95661 94 12.305 2.8531193 11.2406 8.412175 23.11770
## 56 34 24.24242 92 21.699 4.9242264 16.7353 21.823745 12.06534
## 57 42 21.35991 93 2.999 0.6879706 19.0826 8.462915 17.37615
## 58 68 21.08281 102 6.200 1.5599200 9.6994 8.574655 13.74244
## 59 51 19.13265 93 4.364 1.0011016 11.0816 5.807620 5.57055
## 60 62 22.65625 92 3.482 0.7901819 9.8648 11.236235 10.69548
## 61 38 22.49964 95 5.261 1.2328277 8.4380 4.771920 15.73606
## 62 69 21.51386 112 6.683 1.8462901 32.5800 4.138025 15.69876
## 63 49 21.36752 78 2.640 0.5079360 6.3339 3.886145 22.94254
## 64 51 22.89282 103 2.740 0.6961427 8.0163 9.349775 11.55492
## 65 59 22.83288 98 6.862 1.6587741 14.9037 4.230105 8.20490
## 66 45 23.14050 116 4.902 1.4026256 17.9973 4.294705 5.26330
## 67 54 24.21875 86 3.730 0.7912573 8.6874 3.705230 10.34455
## 68 64 22.22222 98 5.700 1.3778800 12.1905 4.783985 13.91245
## 69 46 20.83000 88 3.420 0.7423680 12.8700 18.550000 13.56000
## 70 44 19.56000 114 15.890 4.4682680 13.0800 20.370000 4.62000
## 71 45 20.26000 92 3.440 0.7806507 7.6500 16.670000 7.84000
## 72 44 24.74000 106 58.460 15.2853413 18.1600 16.100000 5.31000
## 73 51 18.37000 105 6.030 1.5617700 9.6200 12.760000 3.21000
## 74 72 23.62000 105 4.420 1.1447800 21.7800 17.860000 4.82000
## 75 46 22.21000 86 36.940 7.8362053 10.1600 9.760000 5.68000
## 76 43 26.56250 101 10.555 2.6296023 9.8000 6.420295 16.10000
## 77 55 31.97501 92 16.635 3.7750360 37.2234 11.018455 7.16514
## 78 43 31.25000 103 4.328 1.0996005 25.7816 12.718960 38.65310
## 79 86 26.66667 201 41.611 20.6307338 47.6470 5.357135 24.37010
## 80 41 26.67276 97 22.033 5.2717625 44.7059 13.494865 27.83250
## 81 59 28.67263 77 3.188 0.6055075 17.0220 16.440480 31.69040
## 82 81 31.64037 100 9.669 2.3850200 38.8066 10.636525 29.55830
## 83 48 32.46191 99 28.677 7.0029234 46.0760 21.570000 10.15726
## 84 71 25.51020 112 10.395 2.8717920 19.0653 5.486100 42.74470
## 85 42 29.29687 98 4.172 1.0085115 12.2617 6.695585 53.67170
## 86 65 29.66655 85 14.649 3.0714070 26.5166 7.282870 19.46324
## 87 48 28.12500 90 2.540 0.5638800 15.5325 10.222310 16.11032
## 88 85 27.68878 196 51.814 25.0503419 70.8824 7.901685 55.21530
## 89 48 31.25000 199 12.162 5.9699204 18.1314 4.104105 53.63080
## 90 58 29.15452 139 16.582 5.6854151 22.8884 10.262660 13.97399
## 91 40 30.83653 128 41.894 13.2273323 31.0385 6.160995 17.55503
## 92 82 31.21748 100 18.077 4.4589933 31.6453 9.923650 19.94687
## 93 52 30.80125 87 30.212 6.4834952 29.2739 6.268540 24.24591
## 94 49 32.46191 134 24.887 8.2259831 42.3914 10.793940 5.76800
## 95 60 31.23141 131 30.130 9.7360073 37.8430 8.404430 11.50005
## 96 49 29.77778 70 8.396 1.4497093 51.3387 10.731740 20.76801
## 97 44 27.88762 99 9.208 2.2485936 12.6757 5.478170 23.03306
## 98 40 27.63605 103 2.432 0.6178901 14.3224 6.783870 26.01360
## 99 71 27.91552 104 18.200 4.6689067 53.4997 1.656020 49.24184
## 100 69 28.44444 108 8.808 2.3464512 14.7485 5.288025 16.48508
## 101 74 28.65014 88 3.012 0.6538048 31.1233 7.652220 18.35574
## 102 66 26.56250 89 6.524 1.4322355 14.9084 8.429960 14.91922
## 103 65 30.91558 97 10.491 2.5101466 44.0217 3.710090 20.46850
## 104 72 29.13632 83 10.949 2.2416253 26.8081 2.784910 14.76966
## 105 57 34.83815 95 12.548 2.9404147 33.1612 2.364950 9.95420
## 106 73 37.10937 134 5.636 1.8628859 41.4064 3.335665 6.89235
## 107 45 29.38476 90 4.713 1.0462860 23.8479 6.644245 15.55625
## 108 46 33.18000 92 5.750 1.3048667 18.6900 9.160000 8.89000
## 109 68 35.56000 131 8.150 2.6335367 17.8700 11.900000 4.19000
## 110 75 30.48000 152 7.010 2.6282827 50.5300 10.060000 11.73000
## 111 54 36.05000 119 11.910 3.4959820 89.2700 8.010000 5.06000
## 112 45 26.85000 92 3.330 0.7556880 54.6800 12.100000 10.96000
## 113 62 26.84000 100 4.530 1.1174000 12.4500 21.420000 7.32000
## 114 65 32.05000 97 5.730 1.3709980 61.4800 22.540000 10.33000
## 115 72 25.59000 82 2.820 0.5703920 24.9600 33.750000 3.27000
## 116 86 27.18000 138 19.910 6.7773640 90.2800 14.110000 4.35000
## MCP.1 Classification
## 1 417.114 1
## 2 468.786 1
## 3 554.697 1
## 4 928.220 1
## 5 773.920 1
## 6 530.410 1
## 7 1256.083 1
## 8 280.694 1
## 9 136.855 1
## 10 318.302 1
## 11 354.600 1
## 12 174.800 1
## 13 313.730 1
## 14 632.220 1
## 15 165.020 1
## 16 63.610 1
## 17 191.720 1
## 18 444.395 1
## 19 252.449 1
## 20 588.460 1
## 21 534.224 1
## 22 572.783 1
## 23 904.981 1
## 24 733.797 1
## 25 1227.910 1
## 26 586.173 1
## 27 887.160 1
## 28 1102.110 1
## 29 667.928 1
## 30 581.313 1
## 31 864.968 1
## 32 695.754 1
## 33 358.624 1
## 34 960.246 1
## 35 473.859 1
## 36 585.307 1
## 37 634.602 1
## 38 263.499 1
## 39 378.996 1
## 40 618.272 1
## 41 698.789 1
## 42 377.227 1
## 43 335.393 1
## 44 270.142 1
## 45 200.976 1
## 46 225.880 1
## 47 206.802 1
## 48 209.749 1
## 49 215.769 1
## 50 232.006 1
## 51 45.843 1
## 52 488.829 1
## 53 552.444 2
## 54 382.955 2
## 55 573.630 2
## 56 481.949 2
## 57 321.919 2
## 58 448.799 2
## 59 90.600 2
## 60 703.973 2
## 61 199.055 2
## 62 713.239 2
## 63 737.672 2
## 64 359.232 2
## 65 355.310 2
## 66 518.586 2
## 67 635.049 2
## 68 395.976 2
## 69 301.210 2
## 70 220.660 2
## 71 193.870 2
## 72 244.750 2
## 73 513.660 2
## 74 195.940 2
## 75 312.000 2
## 76 806.724 2
## 77 483.377 2
## 78 775.322 2
## 79 1698.440 2
## 80 783.796 2
## 81 910.489 2
## 82 426.175 2
## 83 738.034 2
## 84 799.898 2
## 85 1041.843 2
## 86 1698.440 2
## 87 1698.440 2
## 88 1078.359 2
## 89 1698.440 2
## 90 923.886 2
## 91 638.261 2
## 92 994.316 2
## 93 764.667 2
## 94 656.393 2
## 95 396.021 2
## 96 602.486 2
## 97 407.206 2
## 98 293.123 2
## 99 256.001 2
## 100 353.568 2
## 101 572.401 2
## 102 269.487 2
## 103 396.648 2
## 104 232.018 2
## 105 655.834 2
## 106 788.902 2
## 107 621.273 2
## 108 209.190 2
## 109 198.400 2
## 110 99.450 2
## 111 218.280 2
## 112 268.230 2
## 113 330.160 2
## 114 314.050 2
## 115 392.460 2
## 116 90.090 2
Data is tested for MCAR and the p-values obtained is 0 which is less than 0.05 and has no missing values. Therefore, we can say that data is neither Missing Completely At Random (MCAR) nor Missing At Random (MAR).
### Since there are no missing values, the imputations were not used.
### Also the outliers are inlcuded in the dataset due to less number of observations.