setwd("~/Desktop/galaxy500/resources")
library(readr)
pima <- read_csv("Pima.tr2.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   npreg = col_double(),
##   glu = col_double(),
##   bp = col_double(),
##   skin = col_double(),
##   bmi = col_double(),
##   ped = col_double(),
##   age = col_double(),
##   type = col_character()
## )

Start E.D.A.

dim(pima)
## [1] 300   9
names(pima)
## [1] "X1"    "npreg" "glu"   "bp"    "skin"  "bmi"   "ped"   "age"   "type"
str(pima)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 300 obs. of  9 variables:
##  $ X1   : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ npreg: num  5 7 5 0 0 5 3 1 3 2 ...
##  $ glu  : num  86 195 77 165 107 97 83 193 142 128 ...
##  $ bp   : num  68 70 82 76 60 76 58 50 80 78 ...
##  $ skin : num  28 33 41 43 25 27 31 16 15 37 ...
##  $ bmi  : num  30.2 25.1 35.8 47.9 26.4 35.6 34.3 25.9 32.4 43.3 ...
##  $ ped  : num  0.364 0.163 0.156 0.259 0.133 ...
##  $ age  : num  24 55 35 26 23 52 25 24 63 31 ...
##  $ type : chr  "No" "Yes" "No" "No" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   X1 = col_double(),
##   ..   npreg = col_double(),
##   ..   glu = col_double(),
##   ..   bp = col_double(),
##   ..   skin = col_double(),
##   ..   bmi = col_double(),
##   ..   ped = col_double(),
##   ..   age = col_double(),
##   ..   type = col_character()
##   .. )
attributes(pima)
## $names
## [1] "X1"    "npreg" "glu"   "bp"    "skin"  "bmi"   "ped"   "age"   "type" 
## 
## $class
## [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame" 
## 
## $row.names
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
##  [18]  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34
##  [35]  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51
##  [52]  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68
##  [69]  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85
##  [86]  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102
## [103] 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
## [120] 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
## [137] 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
## [154] 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
## [171] 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
## [188] 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
## [205] 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
## [222] 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
## [239] 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
## [256] 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
## [273] 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
## [290] 290 291 292 293 294 295 296 297 298 299 300
## 
## $spec
## cols(
##   X1 = col_double(),
##   npreg = col_double(),
##   glu = col_double(),
##   bp = col_double(),
##   skin = col_double(),
##   bmi = col_double(),
##   ped = col_double(),
##   age = col_double(),
##   type = col_character()
## )
summary(pima)
##        X1             npreg             glu              bp        
##  Min.   :  1.00   Min.   : 0.000   Min.   : 56.0   Min.   : 38.00  
##  1st Qu.: 75.75   1st Qu.: 1.000   1st Qu.:101.0   1st Qu.: 64.00  
##  Median :150.50   Median : 3.000   Median :121.0   Median : 72.00  
##  Mean   :150.50   Mean   : 3.787   Mean   :123.7   Mean   : 72.32  
##  3rd Qu.:225.25   3rd Qu.: 6.000   3rd Qu.:142.0   3rd Qu.: 80.00  
##  Max.   :300.00   Max.   :14.000   Max.   :199.0   Max.   :114.00  
##                                                    NA's   :13      
##       skin            bmi             ped              age      
##  Min.   : 7.00   Min.   :18.20   Min.   :0.0780   Min.   :21.0  
##  1st Qu.:21.00   1st Qu.:27.10   1st Qu.:0.2367   1st Qu.:24.0  
##  Median :29.00   Median :32.00   Median :0.3360   Median :29.0  
##  Mean   :29.15   Mean   :32.05   Mean   :0.4357   Mean   :33.1  
##  3rd Qu.:36.00   3rd Qu.:36.50   3rd Qu.:0.5867   3rd Qu.:40.0  
##  Max.   :99.00   Max.   :52.90   Max.   :2.2880   Max.   :72.0  
##  NA's   :98      NA's   :3                                      
##      type          
##  Length:300        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
# Number of pregnancies
table(pima$npreg)
## 
##  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 
## 44 51 41 26 35 22 19 17 15  9  8  2  6  3  2
hist(pima$npreg)

plot(density(pima$npreg))

plot(pima$npreg, pima$age)

pairs(pima[,4:8])