Data Cleaning of patient-data dataset

Load Libraries

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)

Read Nifty CSV

dfrPatient <- read.csv("D:\\R\\R-Programming\\patient-data.csv", header = T, stringsAsFactors = F)
dfrPatient2 <- read.csv("D:\\R\\R-Programming\\patient-data.csv",header = T, stringsAsFactors = F)
head(dfrPatient)  
##          ID      Name  Race Gender Smokes HeightInCms WeightInKgs
## 1 AC/AH/001 Demetrius White   Male  False      182.87       76.57
## 2 AC/AH/017   Rosario White   Male  False      179.12       80.43
## 3 AC/AH/020     Julio Black   Male  False      169.15       75.48
## 4 AC/AH/022      Lupe White   Male  False      175.66       94.54
## 5 AC/AH/029    Lavern White Female  False      164.47       71.78
## 6 AC/AH/033    Bernie   Dog Female   True      158.27       69.90
##    BirthDate        State  Pet HealthGrade  Died RecordDate
## 1 31-01-1972  Georgia,xxx  Dog           2 False 25-11-2015
## 2 09-06-1972     Missouri  Dog           2 False 25-11-2015
## 3 03-07-1972 Pennsylvania None           2 False 25-11-2015
## 4 11-08-1972      Florida  Cat           1 False 25-11-2015
## 5 06-06-1973         Iowa NULL           2  True 25-11-2015
## 6 25-06-1973     Maryland  Dog           2 False 25-11-2015

Detecting NAs

detectNas <- function(inp){
  return(sum(is.na(inp)))
}

lapply(dfrPatient, detectNas)
## $ID
## [1] 0
## 
## $Name
## [1] 0
## 
## $Race
## [1] 0
## 
## $Gender
## [1] 0
## 
## $Smokes
## [1] 0
## 
## $HeightInCms
## [1] 0
## 
## $WeightInKgs
## [1] 0
## 
## $BirthDate
## [1] 0
## 
## $State
## [1] 0
## 
## $Pet
## [1] 2
## 
## $HealthGrade
## [1] 0
## 
## $Died
## [1] 0
## 
## $RecordDate
## [1] 0

Detecting Zeros

detect0 <- function(inp){
  if (class(inp) != "numeric"){
      return ("Non Numeric data")}
  sum(inp==0)
}
lapply(dfrPatient,detect0)
## $ID
## [1] "Non Numeric data"
## 
## $Name
## [1] "Non Numeric data"
## 
## $Race
## [1] "Non Numeric data"
## 
## $Gender
## [1] "Non Numeric data"
## 
## $Smokes
## [1] "Non Numeric data"
## 
## $HeightInCms
## [1] 0
## 
## $WeightInKgs
## [1] 0
## 
## $BirthDate
## [1] "Non Numeric data"
## 
## $State
## [1] "Non Numeric data"
## 
## $Pet
## [1] "Non Numeric data"
## 
## $HealthGrade
## [1] "Non Numeric data"
## 
## $Died
## [1] "Non Numeric data"
## 
## $RecordDate
## [1] "Non Numeric data"

Detecting spaces

detectspace <- function(inp){  
  
  if (class (inp) !="character")
      return("Non Character Data")
    sum(trimws(inp) == "")
  }
 
lapply(dfrPatient, detectspace)   
## $ID
## [1] 0
## 
## $Name
## [1] 0
## 
## $Race
## [1] 0
## 
## $Gender
## [1] 0
## 
## $Smokes
## [1] 0
## 
## $HeightInCms
## [1] "Non Character Data"
## 
## $WeightInKgs
## [1] "Non Character Data"
## 
## $BirthDate
## [1] 0
## 
## $State
## [1] 0
## 
## $Pet
## [1] NA
## 
## $HealthGrade
## [1] "Non Character Data"
## 
## $Died
## [1] 0
## 
## $RecordDate
## [1] 0

Detecting Outliers

detectoutlier<- function(inp,na.rm=TRUE){
    if (class(inp) != "numeric"){
        return ("Non Numeric Column")
}
    i.qnt <- quantile(inp , probs= c(0.25,0.75), na.rm=na.rm)
    i.max <- 1.5 *IQR(inp, na.rm=na.rm)
    otp <- inp
    otp[inp < (i.qnt[1] - i.max)] <- NA
    otp[inp < (i.qnt[2] - i.max)] <- NA
    return (inp [is.na(otp)])
  }  
    
lapply(dfrPatient, detectoutlier)
## $ID
## [1] "Non Numeric Column"
## 
## $Name
## [1] "Non Numeric Column"
## 
## $Race
## [1] "Non Numeric Column"
## 
## $Gender
## [1] "Non Numeric Column"
## 
## $Smokes
## [1] "Non Numeric Column"
## 
## $HeightInCms
## numeric(0)
## 
## $WeightInKgs
## numeric(0)
## 
## $BirthDate
## [1] "Non Numeric Column"
## 
## $State
## [1] "Non Numeric Column"
## 
## $Pet
## [1] "Non Numeric Column"
## 
## $HealthGrade
## [1] "Non Numeric Column"
## 
## $Died
## [1] "Non Numeric Column"
## 
## $RecordDate
## [1] "Non Numeric Column"

Data Validation

summarise(group_by(dfrPatient, Gender) , n())
## # A tibble: 6 x 2
##    Gender `n()`
##     <chr> <int>
## 1  Female     6
## 2    Male     3
## 3  Female    45
## 4 Female      4
## 5    Male    40
## 6   Male      2
summarise(group_by(dfrPatient, Race) , n())
## # A tibble: 6 x 2
##        Race `n()`
##       <chr> <int>
## 1     Asian     5
## 2 Bi-Racial     1
## 3     Black     8
## 4       Dog     1
## 5  Hispanic    17
## 6     White    68
summarise(group_by(dfrPatient, Died) , n())
## # A tibble: 2 x 2
##    Died `n()`
##   <chr> <int>
## 1 False    46
## 2  True    54
summarise(group_by(dfrPatient, Pet) , n())
## # A tibble: 10 x 2
##      Pet `n()`
##    <chr> <int>
##  1  Bird     9
##  2   Cat    24
##  3   CAT     5
##  4   Dog    28
##  5   DOG     4
##  6 Horse     1
##  7  None    23
##  8  NONE     1
##  9  NULL     3
## 10  <NA>     2
summarise(group_by(dfrPatient, Smokes) , n())
## # A tibble: 4 x 2
##   Smokes `n()`
##    <chr> <int>
## 1  False    72
## 2     No     6
## 3   True    18
## 4    Yes     4
summarise(group_by(dfrPatient, HealthGrade) , n())
## # A tibble: 4 x 2
##   HealthGrade `n()`
##         <int> <int>
## 1           1    29
## 2           2    30
## 3           3    34
## 4          99     7
summarise(group_by(dfrPatient, State) , n())
## # A tibble: 34 x 2
##          State `n()`
##          <chr> <int>
##  1     Alabama     2
##  2     Arizona     2
##  3  California    13
##  4    Colorado     1
##  5 Connecticut     1
##  6     Florida     8
##  7     Georgia     3
##  8 Georgia,xxx     1
##  9      Hawaii     2
## 10    Illinois     4
## # ... with 24 more rows

Error Handling

#Error Handling in Gender
dfrPatient$Gender <- trimws(toupper(dfrPatient$Gender))
summarise(group_by(dfrPatient, Gender) , n())
## # A tibble: 2 x 2
##   Gender `n()`
##    <chr> <int>
## 1 FEMALE    55
## 2   MALE    45
#Error Handling in Race
dfrPatient$Race <- trimws(toupper(dfrPatient$Race))
dfrPatient$Race[dfrPatient$Race =="DOG"] <- NA
summarise(group_by(dfrPatient, Race) , n())
## # A tibble: 6 x 2
##        Race `n()`
##       <chr> <int>
## 1     ASIAN     5
## 2 BI-RACIAL     1
## 3     BLACK     8
## 4  HISPANIC    17
## 5     WHITE    68
## 6      <NA>     1
#Error Handling in Died
dfrPatient$Died <- as.logical(dfrPatient$Died)
summarise(group_by(dfrPatient, Died) , n())
## # A tibble: 2 x 2
##    Died `n()`
##   <lgl> <int>
## 1 FALSE    46
## 2  TRUE    54
#Error Handling in Pet
dfrPatient$Pet <- trimws(toupper(dfrPatient$Pet))
dfrPatient$Pet[dfrPatient$Pet == "NULL"] <- NA
summarise(group_by(dfrPatient, Pet) , n())
## # A tibble: 6 x 2
##     Pet `n()`
##   <chr> <int>
## 1  BIRD     9
## 2   CAT    29
## 3   DOG    32
## 4 HORSE     1
## 5  NONE    24
## 6  <NA>     5
#Error Handling in Smokes
dfrPatient$Smokes <- trimws(toupper(dfrPatient2$Smokes))
dfrPatient$Smokes[dfrPatient$Smokes == "NO"]<- FALSE
dfrPatient$Smokes[dfrPatient$Smokes == "YES"]<- TRUE
dfrPatient$Smokes <- as.logical(dfrPatient2$Smokes)
summarise(group_by(dfrPatient, Smokes) , n())
## # A tibble: 3 x 2
##   Smokes `n()`
##    <lgl> <int>
## 1  FALSE    72
## 2   TRUE    18
## 3     NA    10
#Error Handling in Healthgrade
summarise(group_by(dfrPatient, HealthGrade) , n())
## # A tibble: 4 x 2
##   HealthGrade `n()`
##         <int> <int>
## 1           1    29
## 2           2    30
## 3           3    34
## 4          99     7
dfrPatient$HealthGrade[dfrPatient$HealthGrade == 1] <- "Good Health"
dfrPatient$HealthGrade[dfrPatient$HealthGrade == 2] <- " Average Health"
dfrPatient$HealthGrade[dfrPatient$HealthGrade == 3] <- "Bad Health"
dfrPatient$HealthGrade[dfrPatient$HealthGrade == 99] <- "Worse"
summarise(group_by(dfrPatient, HealthGrade) , n())
## # A tibble: 4 x 2
##       HealthGrade `n()`
##             <chr> <int>
## 1  Average Health    30
## 2      Bad Health    34
## 3     Good Health    29
## 4           Worse     7
#Error handling in state
summarise(group_by(dfrPatient , State) , n())
## # A tibble: 34 x 2
##          State `n()`
##          <chr> <int>
##  1     Alabama     2
##  2     Arizona     2
##  3  California    13
##  4    Colorado     1
##  5 Connecticut     1
##  6     Florida     8
##  7     Georgia     3
##  8 Georgia,xxx     1
##  9      Hawaii     2
## 10    Illinois     4
## # ... with 24 more rows
dfrPatient$State[dfrPatient$State == "Georgia,xxx"] <- "Georgia"
View(summarise(group_by(dfrPatient , State) , n()))

Remove NA rows using complete.cases

nrow(dfrPatient)
## [1] 100
vclComplete <- complete.cases(dfrPatient)
dfrPatient <- dfrPatient[vclComplete, ]
nrow(dfrPatient)
## [1] 86

Data Preparation

dfrPatient<-  mutate(dfrPatient, BMI =  (WeightInKgs)/(HeightInCms/100)^2  )
dfrPatient<-  mutate(dfrPatient, BMILabel = ifelse( BMI < 18.50,"Underweight",
                                       ifelse(BMI > 18.50 & BMI < 25.00,"Normal",
                                              ifelse(BMI > 25.00 & BMI< 30.00,"Overweight",
                                                     ifelse(BMI >30.00,"Obese",NA)))))

Viewing Data

#Top 10 records by BMI
head(arrange(dfrPatient,desc(BMI)),10)
##           ID     Name  Race Gender Smokes HeightInCms WeightInKgs
## 1  AC/SG/009    Sammy WHITE   MALE  FALSE      166.84       88.25
## 2  AC/SG/064      Jon WHITE   MALE  FALSE      169.16       90.08
## 3  AC/AH/076   Albert WHITE   MALE  FALSE      176.22       97.67
## 4  AC/AH/104   Jeremy WHITE   MALE   TRUE      169.85       90.63
## 5  AC/AH/022     Lupe WHITE   MALE  FALSE      175.66       94.54
## 6  AC/AH/248   Andrea WHITE   MALE  FALSE      178.64       97.05
## 7  AC/SG/067   Thomas WHITE   MALE  FALSE      167.51       84.15
## 8  AC/AH/052 Courtney WHITE   MALE   TRUE      175.39       92.22
## 9  AC/AH/127     Jame WHITE   MALE  FALSE      167.75       82.06
## 10 AC/SG/107      Sol WHITE   MALE  FALSE      176.54       90.76
##     BirthDate        State  Pet     HealthGrade  Died RecordDate      BMI
## 1  04-03-1972      Vermont  DOG     Good Health FALSE 25-06-2016 31.70402
## 2  04-10-1972     Illinois  CAT  Average Health  TRUE 25-07-2016 31.47988
## 3  08-04-1973    Louisiana  CAT  Average Health FALSE 25-12-2015 31.45218
## 4  12-04-1972     Kentucky NONE     Good Health  TRUE 25-12-2015 31.41528
## 5  11-08-1972      Florida  CAT     Good Health FALSE 25-11-2015 30.63867
## 6  12-01-1973      Indiana  CAT     Good Health  TRUE 25-05-2016 30.41152
## 7  19-07-1972 Pennsylvania BIRD  Average Health  TRUE 25-07-2016 29.98974
## 8  16-03-1972      Indiana BIRD      Bad Health FALSE 25-12-2015 29.97888
## 9  29-10-1972        Texas  DOG     Good Health  TRUE 25-01-2016 29.16127
## 10 28-01-1973       Hawaii NONE      Bad Health FALSE 25-08-2016 29.12113
##      BMILabel
## 1       Obese
## 2       Obese
## 3       Obese
## 4       Obese
## 5       Obese
## 6       Obese
## 7  Overweight
## 8  Overweight
## 9  Overweight
## 10 Overweight
#Bottom 10 records by BMI 
head(arrange(dfrPatient,BMI),10)
##           ID      Name     Race Gender Smokes HeightInCms WeightInKgs
## 1  AC/SG/193    Ronnie    WHITE   MALE   TRUE      185.43       73.63
## 2  AC/AH/061    Lester    BLACK   MALE  FALSE      181.13       72.33
## 3  AC/SG/099    Leslie    ASIAN   MALE  FALSE      172.72       67.62
## 4  AC/AH/001 Demetrius    WHITE   MALE  FALSE      182.87       76.57
## 5  AC/AH/210     Keith HISPANIC FEMALE   TRUE      170.03       66.68
## 6  AC/AH/086      Kyle    BLACK   MALE   TRUE      180.11       75.72
## 7  AC/AH/045   Shirley    WHITE   MALE  FALSE      181.32       76.90
## 8  AC/AH/089      Dong    WHITE   MALE  FALSE      179.24       75.54
## 9  AC/AH/164     Shane HISPANIC   MALE   TRUE      177.03       74.04
## 10 AC/AH/114      Kris HISPANIC   MALE  FALSE      177.75       74.84
##     BirthDate        State  Pet     HealthGrade  Died RecordDate      BMI
## 1  05-06-1973         Iowa  DOG      Bad Health FALSE 25-09-2016 21.41385
## 2  16-11-1972    Wisconsin  DOG           Worse  TRUE 25-12-2015 22.04640
## 3  04-02-1972         Ohio  CAT     Good Health FALSE 25-07-2016 22.66678
## 4  31-01-1972      Georgia  DOG  Average Health FALSE 25-11-2015 22.89674
## 5  28-08-1972     New York  DOG           Worse FALSE 25-03-2016 23.06452
## 6  12-05-1973      Georgia  CAT      Bad Health FALSE 25-12-2015 23.34183
## 7  25-12-1971    Louisiana  DOG     Good Health FALSE 25-11-2015 23.39025
## 8  11-03-1972   California NONE  Average Health  TRUE 25-12-2015 23.51295
## 9  18-02-1972      Florida NONE  Average Health FALSE 25-02-2016 23.62505
## 10 19-11-1972 Pennsylvania BIRD      Bad Health FALSE 25-01-2016 23.68725
##    BMILabel
## 1    Normal
## 2    Normal
## 3    Normal
## 4    Normal
## 5    Normal
## 6    Normal
## 7    Normal
## 8    Normal
## 9    Normal
## 10   Normal
#Gender > Race - Frequency / counts
summarise(group_by(dfrPatient, Gender,Race),n())
## # A tibble: 9 x 3
## # Groups:   Gender [?]
##   Gender      Race `n()`
##    <chr>     <chr> <int>
## 1 FEMALE     ASIAN     2
## 2 FEMALE     BLACK     1
## 3 FEMALE  HISPANIC     7
## 4 FEMALE     WHITE    35
## 5   MALE     ASIAN     2
## 6   MALE BI-RACIAL     1
## 7   MALE     BLACK     5
## 8   MALE  HISPANIC    10
## 9   MALE     WHITE    23
table(dfrPatient$Gender, dfrPatient$Race)
##         
##          ASIAN BI-RACIAL BLACK HISPANIC WHITE
##   FEMALE     2         0     1        7    35
##   MALE       2         1     5       10    23
#Count all the dead people 
filter(dfrPatient, Died ==TRUE)
##           ID        Name     Race Gender Smokes HeightInCms WeightInKgs
## 1  AC/AH/049      Martin    WHITE FEMALE  FALSE      160.06       72.37
## 2  AC/AH/061      Lester    BLACK   MALE  FALSE      181.13       72.33
## 3  AC/AH/089        Dong    WHITE   MALE  FALSE      179.24       75.54
## 4  AC/AH/104      Jeremy    WHITE   MALE   TRUE      169.85       90.63
## 5  AC/AH/127        Jame    WHITE   MALE  FALSE      167.75       82.06
## 6  AC/AH/133       Clyde HISPANIC   MALE  FALSE      181.15       83.93
## 7  AC/AH/150       Brett    WHITE   MALE   TRUE      181.56       79.54
## 8  AC/AH/154        Tony    WHITE FEMALE  FALSE      160.03       64.30
## 9  AC/AH/156      George    WHITE   MALE  FALSE      165.62       76.72
## 10 AC/AH/160        Rory    ASIAN FEMALE  FALSE      159.67       71.88
## 11 AC/AH/176       Jerry    ASIAN   MALE  FALSE      175.21       83.65
## 12 AC/AH/180        Drew    WHITE FEMALE  FALSE      160.80       64.77
## 13 AC/AH/185      Ronald    WHITE   MALE  FALSE      166.46       76.83
## 14 AC/AH/186 Christopher    WHITE FEMALE  FALSE      157.95       67.41
## 15 AC/AH/192   Dominique    WHITE   MALE  FALSE      180.61       83.59
## 16 AC/AH/211         Son    WHITE FEMALE  FALSE      157.16       69.64
## 17 AC/AH/219         Jay    WHITE FEMALE  FALSE      163.47       72.89
## 18 AC/AH/221      Carlos    WHITE FEMALE  FALSE      165.34       70.84
## 19 AC/AH/233      Marion    WHITE FEMALE  FALSE      163.97       66.71
## 20 AC/AH/244        Sean    WHITE FEMALE  FALSE      160.09       65.93
## 21 AC/AH/248      Andrea    WHITE   MALE  FALSE      178.64       97.05
## 22 AC/AH/249       Jesus HISPANIC FEMALE   TRUE      159.78       68.31
## 23 AC/SG/010        Theo    ASIAN FEMALE  FALSE      159.32       64.92
## 24 AC/SG/016      Jimmie    BLACK FEMALE  FALSE      161.84       69.97
## 25 AC/SG/046        Carl HISPANIC   MALE  FALSE      171.41       81.70
## 26 AC/SG/055        Evan    WHITE   MALE  FALSE      166.75       79.06
## 27 AC/SG/064         Jon    WHITE   MALE  FALSE      169.16       90.08
## 28 AC/SG/065      Shayne    WHITE FEMALE  FALSE      157.01       66.56
## 29 AC/SG/067      Thomas    WHITE   MALE  FALSE      167.51       84.15
## 30 AC/SG/068   Valentine HISPANIC FEMALE  FALSE      160.47       68.20
## 31 AC/SG/084       Brian HISPANIC   MALE  FALSE      174.25       80.93
## 32 AC/SG/101       Jason    WHITE FEMALE  FALSE      159.23       69.96
## 33 AC/SG/116      Connie    BLACK   MALE  FALSE      184.34       90.41
## 34 AC/SG/123     Darnell    WHITE FEMALE   TRUE      162.32       72.72
## 35 AC/SG/134       Daryl    WHITE FEMALE   TRUE      162.59       69.76
## 36 AC/SG/155     Raymond    WHITE FEMALE  FALSE      158.35       69.72
## 37 AC/SG/165       Elmer    WHITE FEMALE  FALSE      162.18       67.81
## 38 AC/SG/167       Jimmy    WHITE FEMALE  FALSE      159.38       70.37
## 39 AC/SG/179       Logan    WHITE   MALE  FALSE      183.10       82.47
## 40 AC/SG/181       Terry HISPANIC   MALE  FALSE      177.14       88.70
## 41 AC/SG/182       Jamie HISPANIC   MALE   TRUE      171.08       72.51
## 42 AC/SG/191        Lacy HISPANIC FEMALE  FALSE      159.33       70.68
## 43 AC/SG/197       Stacy    WHITE FEMALE  FALSE      159.44       66.21
## 44 AC/SG/216        Alva    WHITE FEMALE  FALSE      159.13       66.96
## 45 AC/SG/217        Dean    WHITE FEMALE  FALSE      160.58       71.49
## 46 AC/SG/234        Luis HISPANIC FEMALE  FALSE      164.88       68.07
##     BirthDate          State   Pet     HealthGrade Died RecordDate
## 1  28-04-1972     California HORSE  Average Health TRUE 25-12-2015
## 2  16-11-1972      Wisconsin   DOG           Worse TRUE 25-12-2015
## 3  11-03-1972     California  NONE  Average Health TRUE 25-12-2015
## 4  12-04-1972       Kentucky  NONE     Good Health TRUE 25-12-2015
## 5  29-10-1972          Texas   DOG     Good Health TRUE 25-01-2016
## 6  13-10-1973     Washington   CAT      Bad Health TRUE 25-02-2016
## 7  03-05-1972       Kentucky   DOG     Good Health TRUE 25-02-2016
## 8  30-08-1973     California   DOG     Good Health TRUE 25-02-2016
## 9  09-07-1972     California   DOG     Good Health TRUE 25-02-2016
## 10 22-09-1973        Florida   CAT  Average Health TRUE 25-02-2016
## 11 01-05-1973       Virginia   DOG      Bad Health TRUE 25-03-2016
## 12 18-02-1973         Oregon   CAT     Good Health TRUE 25-03-2016
## 13 17-08-1972       Colorado  NONE           Worse TRUE 25-03-2016
## 14 06-05-1972     New Jersey   DOG      Bad Health TRUE 25-03-2016
## 15 24-03-1972       Michigan  NONE      Bad Health TRUE 25-03-2016
## 16 14-07-1973     California   CAT  Average Health TRUE 25-04-2016
## 17 07-04-1972 North Carolina  BIRD     Good Health TRUE 25-04-2016
## 18 01-02-1972       Michigan   DOG           Worse TRUE 25-04-2016
## 19 23-12-1971           Ohio   CAT      Bad Health TRUE 25-04-2016
## 20 25-01-1973       Maryland  NONE           Worse TRUE 25-05-2016
## 21 12-01-1973        Indiana   CAT     Good Health TRUE 25-05-2016
## 22 23-04-1972        Alabama   CAT  Average Health TRUE 25-05-2016
## 23 29-01-1973       New York   CAT  Average Health TRUE 25-06-2016
## 24 03-04-1972        Arizona   CAT      Bad Health TRUE 25-06-2016
## 25 05-08-1973    Mississippi  BIRD  Average Health TRUE 25-06-2016
## 26 24-02-1972       Illinois  BIRD      Bad Health TRUE 25-07-2016
## 27 04-10-1972       Illinois   CAT  Average Health TRUE 25-07-2016
## 28 05-04-1972     California   DOG      Bad Health TRUE 25-07-2016
## 29 19-07-1972   Pennsylvania  BIRD  Average Health TRUE 25-07-2016
## 30 15-04-1972      Tennessee   CAT      Bad Health TRUE 25-07-2016
## 31 06-03-1972       Virginia   DOG  Average Health TRUE 25-07-2016
## 32 28-09-1973       Michigan   DOG  Average Health TRUE 25-07-2016
## 33 05-06-1972        Florida  NONE      Bad Health TRUE 25-08-2016
## 34 03-09-1972 North Carolina  BIRD     Good Health TRUE 25-08-2016
## 35 28-05-1972          Texas   CAT  Average Health TRUE 25-08-2016
## 36 02-06-1972     California   CAT      Bad Health TRUE 25-08-2016
## 37 25-03-1972     Washington  BIRD     Good Health TRUE 25-08-2016
## 38 30-09-1973     Washington  NONE  Average Health TRUE 25-09-2016
## 39 24-10-1972           Ohio   DOG      Bad Health TRUE 25-09-2016
## 40 24-11-1971        Indiana   CAT      Bad Health TRUE 25-09-2016
## 41 25-03-1973      Louisiana  NONE      Bad Health TRUE 25-09-2016
## 42 21-06-1973          Texas  NONE      Bad Health TRUE 25-09-2016
## 43 08-11-1972       New York   CAT     Good Health TRUE 25-10-2016
## 44 19-06-1972        Alabama  NONE     Good Health TRUE 25-10-2016
## 45 11-11-1972           Ohio  NONE     Good Health TRUE 25-10-2016
## 46 10-11-1971   Pennsylvania   CAT      Bad Health TRUE 25-10-2016
##         BMI   BMILabel
## 1  28.24834 Overweight
## 2  22.04640     Normal
## 3  23.51295     Normal
## 4  31.41528      Obese
## 5  29.16127 Overweight
## 6  25.57647 Overweight
## 7  24.12933     Normal
## 8  25.10777 Overweight
## 9  27.96939 Overweight
## 10 28.19431 Overweight
## 11 27.24885 Overweight
## 12 25.04966 Overweight
## 13 27.72752 Overweight
## 14 27.01998 Overweight
## 15 25.62541 Overweight
## 16 28.19517 Overweight
## 17 27.27670 Overweight
## 18 25.91330 Overweight
## 19 24.81202     Normal
## 20 25.72496 Overweight
## 21 30.41152      Obese
## 22 26.75713 Overweight
## 23 25.57631 Overweight
## 24 26.71407 Overweight
## 25 27.80672 Overweight
## 26 28.43316 Overweight
## 27 31.47988      Obese
## 28 26.99968 Overweight
## 29 29.98974 Overweight
## 30 26.48480 Overweight
## 31 26.65410 Overweight
## 32 27.59307 Overweight
## 33 26.60586 Overweight
## 34 27.60005 Overweight
## 35 26.38875 Overweight
## 36 27.80489 Overweight
## 37 25.78096 Overweight
## 38 27.70256 Overweight
## 39 24.59910     Normal
## 40 28.26769 Overweight
## 41 24.77419     Normal
## 42 27.84206 Overweight
## 43 26.04528 Overweight
## 44 26.44304 Overweight
## 45 27.72441 Overweight
## 46 25.03916 Overweight
nrow(filter(dfrPatient, Died==TRUE))
## [1] 46
#Hispanic Females
filter(dfrPatient , Race =="HISPANIC" & Gender== "FEMALES")
##  [1] ID          Name        Race        Gender      Smokes     
##  [6] HeightInCms WeightInKgs BirthDate   State       Pet        
## [11] HealthGrade Died        RecordDate  BMI         BMILabel   
## <0 rows> (or 0-length row.names)
#Sample Finctions
set.seed(707)
sample_n(dfrPatient , 10)
##           ID    Name     Race Gender Smokes HeightInCms WeightInKgs
## 9  AC/AH/050 Frances    WHITE FEMALE  FALSE      166.48       67.34
## 38 AC/AH/210   Keith HISPANIC FEMALE   TRUE      170.03       66.68
## 23 AC/AH/127    Jame    WHITE   MALE  FALSE      167.75       82.06
## 45 AC/AH/241 Lindsay    WHITE FEMALE  FALSE      161.38       73.55
## 63 AC/SG/101   Jason    WHITE FEMALE  FALSE      159.23       69.96
## 57 AC/SG/067  Thomas    WHITE   MALE  FALSE      167.51       84.15
## 70 AC/SG/139  Jordan    WHITE   MALE  FALSE      171.94       82.11
## 5  AC/AH/037  Samuel    WHITE FEMALE  FALSE      161.69       68.85
## 17 AC/AH/086    Kyle    BLACK   MALE   TRUE      180.11       75.72
## 49 AC/SG/002     Jan    WHITE FEMALE   TRUE      161.57       67.92
##     BirthDate        State  Pet     HealthGrade  Died RecordDate      BMI
## 9  08-11-1971     Michigan NONE     Good Health FALSE 25-12-2015 24.29679
## 38 28-08-1972     New York  DOG           Worse FALSE 25-03-2016 23.06452
## 23 29-10-1972        Texas  DOG     Good Health  TRUE 25-01-2016 29.16127
## 45 08-02-1972      Florida  CAT      Bad Health FALSE 25-05-2016 28.24121
## 63 28-09-1973     Michigan  DOG  Average Health  TRUE 25-07-2016 27.59307
## 57 19-07-1972 Pennsylvania BIRD  Average Health  TRUE 25-07-2016 29.98974
## 70 06-10-1973     Michigan NONE     Good Health FALSE 25-08-2016 27.77424
## 5  20-03-1972 Pennsylvania NONE     Good Health FALSE 25-11-2015 26.33526
## 17 12-05-1973      Georgia  CAT      Bad Health FALSE 25-12-2015 23.34183
## 49 03-07-1973      Arizona  DOG      Bad Health FALSE 25-05-2016 26.01814
##      BMILabel
## 9      Normal
## 38     Normal
## 23 Overweight
## 45 Overweight
## 63 Overweight
## 57 Overweight
## 70 Overweight
## 5  Overweight
## 17     Normal
## 49 Overweight