Load Libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
Read Nifty CSV
dfrPatient <- read.csv("D:\\R\\R-Programming\\patient-data.csv", header = T, stringsAsFactors = F)
dfrPatient2 <- read.csv("D:\\R\\R-Programming\\patient-data.csv",header = T, stringsAsFactors = F)
head(dfrPatient)
## ID Name Race Gender Smokes HeightInCms WeightInKgs
## 1 AC/AH/001 Demetrius White Male False 182.87 76.57
## 2 AC/AH/017 Rosario White Male False 179.12 80.43
## 3 AC/AH/020 Julio Black Male False 169.15 75.48
## 4 AC/AH/022 Lupe White Male False 175.66 94.54
## 5 AC/AH/029 Lavern White Female False 164.47 71.78
## 6 AC/AH/033 Bernie Dog Female True 158.27 69.90
## BirthDate State Pet HealthGrade Died RecordDate
## 1 31-01-1972 Georgia,xxx Dog 2 False 25-11-2015
## 2 09-06-1972 Missouri Dog 2 False 25-11-2015
## 3 03-07-1972 Pennsylvania None 2 False 25-11-2015
## 4 11-08-1972 Florida Cat 1 False 25-11-2015
## 5 06-06-1973 Iowa NULL 2 True 25-11-2015
## 6 25-06-1973 Maryland Dog 2 False 25-11-2015
Detecting NAs
detectNas <- function(inp){
return(sum(is.na(inp)))
}
lapply(dfrPatient, detectNas)
## $ID
## [1] 0
##
## $Name
## [1] 0
##
## $Race
## [1] 0
##
## $Gender
## [1] 0
##
## $Smokes
## [1] 0
##
## $HeightInCms
## [1] 0
##
## $WeightInKgs
## [1] 0
##
## $BirthDate
## [1] 0
##
## $State
## [1] 0
##
## $Pet
## [1] 2
##
## $HealthGrade
## [1] 0
##
## $Died
## [1] 0
##
## $RecordDate
## [1] 0
Detecting Zeros
detect0 <- function(inp){
if (class(inp) != "numeric"){
return ("Non Numeric data")}
sum(inp==0)
}
lapply(dfrPatient,detect0)
## $ID
## [1] "Non Numeric data"
##
## $Name
## [1] "Non Numeric data"
##
## $Race
## [1] "Non Numeric data"
##
## $Gender
## [1] "Non Numeric data"
##
## $Smokes
## [1] "Non Numeric data"
##
## $HeightInCms
## [1] 0
##
## $WeightInKgs
## [1] 0
##
## $BirthDate
## [1] "Non Numeric data"
##
## $State
## [1] "Non Numeric data"
##
## $Pet
## [1] "Non Numeric data"
##
## $HealthGrade
## [1] "Non Numeric data"
##
## $Died
## [1] "Non Numeric data"
##
## $RecordDate
## [1] "Non Numeric data"
Detecting spaces
detectspace <- function(inp){
if (class (inp) !="character")
return("Non Character Data")
sum(trimws(inp) == "")
}
lapply(dfrPatient, detectspace)
## $ID
## [1] 0
##
## $Name
## [1] 0
##
## $Race
## [1] 0
##
## $Gender
## [1] 0
##
## $Smokes
## [1] 0
##
## $HeightInCms
## [1] "Non Character Data"
##
## $WeightInKgs
## [1] "Non Character Data"
##
## $BirthDate
## [1] 0
##
## $State
## [1] 0
##
## $Pet
## [1] NA
##
## $HealthGrade
## [1] "Non Character Data"
##
## $Died
## [1] 0
##
## $RecordDate
## [1] 0
Detecting Outliers
detectoutlier<- function(inp,na.rm=TRUE){
if (class(inp) != "numeric"){
return ("Non Numeric Column")
}
i.qnt <- quantile(inp , probs= c(0.25,0.75), na.rm=na.rm)
i.max <- 1.5 *IQR(inp, na.rm=na.rm)
otp <- inp
otp[inp < (i.qnt[1] - i.max)] <- NA
otp[inp < (i.qnt[2] - i.max)] <- NA
return (inp [is.na(otp)])
}
lapply(dfrPatient, detectoutlier)
## $ID
## [1] "Non Numeric Column"
##
## $Name
## [1] "Non Numeric Column"
##
## $Race
## [1] "Non Numeric Column"
##
## $Gender
## [1] "Non Numeric Column"
##
## $Smokes
## [1] "Non Numeric Column"
##
## $HeightInCms
## numeric(0)
##
## $WeightInKgs
## numeric(0)
##
## $BirthDate
## [1] "Non Numeric Column"
##
## $State
## [1] "Non Numeric Column"
##
## $Pet
## [1] "Non Numeric Column"
##
## $HealthGrade
## [1] "Non Numeric Column"
##
## $Died
## [1] "Non Numeric Column"
##
## $RecordDate
## [1] "Non Numeric Column"
Data Validation
summarise(group_by(dfrPatient, Gender) , n())
## # A tibble: 6 x 2
## Gender `n()`
## <chr> <int>
## 1 Female 6
## 2 Male 3
## 3 Female 45
## 4 Female 4
## 5 Male 40
## 6 Male 2
summarise(group_by(dfrPatient, Race) , n())
## # A tibble: 6 x 2
## Race `n()`
## <chr> <int>
## 1 Asian 5
## 2 Bi-Racial 1
## 3 Black 8
## 4 Dog 1
## 5 Hispanic 17
## 6 White 68
summarise(group_by(dfrPatient, Died) , n())
## # A tibble: 2 x 2
## Died `n()`
## <chr> <int>
## 1 False 46
## 2 True 54
summarise(group_by(dfrPatient, Pet) , n())
## # A tibble: 10 x 2
## Pet `n()`
## <chr> <int>
## 1 Bird 9
## 2 Cat 24
## 3 CAT 5
## 4 Dog 28
## 5 DOG 4
## 6 Horse 1
## 7 None 23
## 8 NONE 1
## 9 NULL 3
## 10 <NA> 2
summarise(group_by(dfrPatient, Smokes) , n())
## # A tibble: 4 x 2
## Smokes `n()`
## <chr> <int>
## 1 False 72
## 2 No 6
## 3 True 18
## 4 Yes 4
summarise(group_by(dfrPatient, HealthGrade) , n())
## # A tibble: 4 x 2
## HealthGrade `n()`
## <int> <int>
## 1 1 29
## 2 2 30
## 3 3 34
## 4 99 7
summarise(group_by(dfrPatient, State) , n())
## # A tibble: 34 x 2
## State `n()`
## <chr> <int>
## 1 Alabama 2
## 2 Arizona 2
## 3 California 13
## 4 Colorado 1
## 5 Connecticut 1
## 6 Florida 8
## 7 Georgia 3
## 8 Georgia,xxx 1
## 9 Hawaii 2
## 10 Illinois 4
## # ... with 24 more rows
Error Handling
#Error Handling in Gender
dfrPatient$Gender <- trimws(toupper(dfrPatient$Gender))
summarise(group_by(dfrPatient, Gender) , n())
## # A tibble: 2 x 2
## Gender `n()`
## <chr> <int>
## 1 FEMALE 55
## 2 MALE 45
#Error Handling in Race
dfrPatient$Race <- trimws(toupper(dfrPatient$Race))
dfrPatient$Race[dfrPatient$Race =="DOG"] <- NA
summarise(group_by(dfrPatient, Race) , n())
## # A tibble: 6 x 2
## Race `n()`
## <chr> <int>
## 1 ASIAN 5
## 2 BI-RACIAL 1
## 3 BLACK 8
## 4 HISPANIC 17
## 5 WHITE 68
## 6 <NA> 1
#Error Handling in Died
dfrPatient$Died <- as.logical(dfrPatient$Died)
summarise(group_by(dfrPatient, Died) , n())
## # A tibble: 2 x 2
## Died `n()`
## <lgl> <int>
## 1 FALSE 46
## 2 TRUE 54
#Error Handling in Pet
dfrPatient$Pet <- trimws(toupper(dfrPatient$Pet))
dfrPatient$Pet[dfrPatient$Pet == "NULL"] <- NA
summarise(group_by(dfrPatient, Pet) , n())
## # A tibble: 6 x 2
## Pet `n()`
## <chr> <int>
## 1 BIRD 9
## 2 CAT 29
## 3 DOG 32
## 4 HORSE 1
## 5 NONE 24
## 6 <NA> 5
#Error Handling in Smokes
dfrPatient$Smokes <- trimws(toupper(dfrPatient2$Smokes))
dfrPatient$Smokes[dfrPatient$Smokes == "NO"]<- FALSE
dfrPatient$Smokes[dfrPatient$Smokes == "YES"]<- TRUE
dfrPatient$Smokes <- as.logical(dfrPatient2$Smokes)
summarise(group_by(dfrPatient, Smokes) , n())
## # A tibble: 3 x 2
## Smokes `n()`
## <lgl> <int>
## 1 FALSE 72
## 2 TRUE 18
## 3 NA 10
#Error Handling in Healthgrade
summarise(group_by(dfrPatient, HealthGrade) , n())
## # A tibble: 4 x 2
## HealthGrade `n()`
## <int> <int>
## 1 1 29
## 2 2 30
## 3 3 34
## 4 99 7
dfrPatient$HealthGrade[dfrPatient$HealthGrade == 1] <- "Good Health"
dfrPatient$HealthGrade[dfrPatient$HealthGrade == 2] <- " Average Health"
dfrPatient$HealthGrade[dfrPatient$HealthGrade == 3] <- "Bad Health"
dfrPatient$HealthGrade[dfrPatient$HealthGrade == 99] <- "Worse"
summarise(group_by(dfrPatient, HealthGrade) , n())
## # A tibble: 4 x 2
## HealthGrade `n()`
## <chr> <int>
## 1 Average Health 30
## 2 Bad Health 34
## 3 Good Health 29
## 4 Worse 7
#Error handling in state
summarise(group_by(dfrPatient , State) , n())
## # A tibble: 34 x 2
## State `n()`
## <chr> <int>
## 1 Alabama 2
## 2 Arizona 2
## 3 California 13
## 4 Colorado 1
## 5 Connecticut 1
## 6 Florida 8
## 7 Georgia 3
## 8 Georgia,xxx 1
## 9 Hawaii 2
## 10 Illinois 4
## # ... with 24 more rows
dfrPatient$State[dfrPatient$State == "Georgia,xxx"] <- "Georgia"
View(summarise(group_by(dfrPatient , State) , n()))
Remove NA rows using complete.cases
nrow(dfrPatient)
## [1] 100
vclComplete <- complete.cases(dfrPatient)
dfrPatient <- dfrPatient[vclComplete, ]
nrow(dfrPatient)
## [1] 86
Data Preparation
dfrPatient<- mutate(dfrPatient, BMI = (WeightInKgs)/(HeightInCms/100)^2 )
dfrPatient<- mutate(dfrPatient, BMILabel = ifelse( BMI < 18.50,"Underweight",
ifelse(BMI > 18.50 & BMI < 25.00,"Normal",
ifelse(BMI > 25.00 & BMI< 30.00,"Overweight",
ifelse(BMI >30.00,"Obese",NA)))))
Viewing Data
#Top 10 records by BMI
head(arrange(dfrPatient,desc(BMI)),10)
## ID Name Race Gender Smokes HeightInCms WeightInKgs
## 1 AC/SG/009 Sammy WHITE MALE FALSE 166.84 88.25
## 2 AC/SG/064 Jon WHITE MALE FALSE 169.16 90.08
## 3 AC/AH/076 Albert WHITE MALE FALSE 176.22 97.67
## 4 AC/AH/104 Jeremy WHITE MALE TRUE 169.85 90.63
## 5 AC/AH/022 Lupe WHITE MALE FALSE 175.66 94.54
## 6 AC/AH/248 Andrea WHITE MALE FALSE 178.64 97.05
## 7 AC/SG/067 Thomas WHITE MALE FALSE 167.51 84.15
## 8 AC/AH/052 Courtney WHITE MALE TRUE 175.39 92.22
## 9 AC/AH/127 Jame WHITE MALE FALSE 167.75 82.06
## 10 AC/SG/107 Sol WHITE MALE FALSE 176.54 90.76
## BirthDate State Pet HealthGrade Died RecordDate BMI
## 1 04-03-1972 Vermont DOG Good Health FALSE 25-06-2016 31.70402
## 2 04-10-1972 Illinois CAT Average Health TRUE 25-07-2016 31.47988
## 3 08-04-1973 Louisiana CAT Average Health FALSE 25-12-2015 31.45218
## 4 12-04-1972 Kentucky NONE Good Health TRUE 25-12-2015 31.41528
## 5 11-08-1972 Florida CAT Good Health FALSE 25-11-2015 30.63867
## 6 12-01-1973 Indiana CAT Good Health TRUE 25-05-2016 30.41152
## 7 19-07-1972 Pennsylvania BIRD Average Health TRUE 25-07-2016 29.98974
## 8 16-03-1972 Indiana BIRD Bad Health FALSE 25-12-2015 29.97888
## 9 29-10-1972 Texas DOG Good Health TRUE 25-01-2016 29.16127
## 10 28-01-1973 Hawaii NONE Bad Health FALSE 25-08-2016 29.12113
## BMILabel
## 1 Obese
## 2 Obese
## 3 Obese
## 4 Obese
## 5 Obese
## 6 Obese
## 7 Overweight
## 8 Overweight
## 9 Overweight
## 10 Overweight
#Bottom 10 records by BMI
head(arrange(dfrPatient,BMI),10)
## ID Name Race Gender Smokes HeightInCms WeightInKgs
## 1 AC/SG/193 Ronnie WHITE MALE TRUE 185.43 73.63
## 2 AC/AH/061 Lester BLACK MALE FALSE 181.13 72.33
## 3 AC/SG/099 Leslie ASIAN MALE FALSE 172.72 67.62
## 4 AC/AH/001 Demetrius WHITE MALE FALSE 182.87 76.57
## 5 AC/AH/210 Keith HISPANIC FEMALE TRUE 170.03 66.68
## 6 AC/AH/086 Kyle BLACK MALE TRUE 180.11 75.72
## 7 AC/AH/045 Shirley WHITE MALE FALSE 181.32 76.90
## 8 AC/AH/089 Dong WHITE MALE FALSE 179.24 75.54
## 9 AC/AH/164 Shane HISPANIC MALE TRUE 177.03 74.04
## 10 AC/AH/114 Kris HISPANIC MALE FALSE 177.75 74.84
## BirthDate State Pet HealthGrade Died RecordDate BMI
## 1 05-06-1973 Iowa DOG Bad Health FALSE 25-09-2016 21.41385
## 2 16-11-1972 Wisconsin DOG Worse TRUE 25-12-2015 22.04640
## 3 04-02-1972 Ohio CAT Good Health FALSE 25-07-2016 22.66678
## 4 31-01-1972 Georgia DOG Average Health FALSE 25-11-2015 22.89674
## 5 28-08-1972 New York DOG Worse FALSE 25-03-2016 23.06452
## 6 12-05-1973 Georgia CAT Bad Health FALSE 25-12-2015 23.34183
## 7 25-12-1971 Louisiana DOG Good Health FALSE 25-11-2015 23.39025
## 8 11-03-1972 California NONE Average Health TRUE 25-12-2015 23.51295
## 9 18-02-1972 Florida NONE Average Health FALSE 25-02-2016 23.62505
## 10 19-11-1972 Pennsylvania BIRD Bad Health FALSE 25-01-2016 23.68725
## BMILabel
## 1 Normal
## 2 Normal
## 3 Normal
## 4 Normal
## 5 Normal
## 6 Normal
## 7 Normal
## 8 Normal
## 9 Normal
## 10 Normal
#Gender > Race - Frequency / counts
summarise(group_by(dfrPatient, Gender,Race),n())
## # A tibble: 9 x 3
## # Groups: Gender [?]
## Gender Race `n()`
## <chr> <chr> <int>
## 1 FEMALE ASIAN 2
## 2 FEMALE BLACK 1
## 3 FEMALE HISPANIC 7
## 4 FEMALE WHITE 35
## 5 MALE ASIAN 2
## 6 MALE BI-RACIAL 1
## 7 MALE BLACK 5
## 8 MALE HISPANIC 10
## 9 MALE WHITE 23
table(dfrPatient$Gender, dfrPatient$Race)
##
## ASIAN BI-RACIAL BLACK HISPANIC WHITE
## FEMALE 2 0 1 7 35
## MALE 2 1 5 10 23
#Count all the dead people
filter(dfrPatient, Died ==TRUE)
## ID Name Race Gender Smokes HeightInCms WeightInKgs
## 1 AC/AH/049 Martin WHITE FEMALE FALSE 160.06 72.37
## 2 AC/AH/061 Lester BLACK MALE FALSE 181.13 72.33
## 3 AC/AH/089 Dong WHITE MALE FALSE 179.24 75.54
## 4 AC/AH/104 Jeremy WHITE MALE TRUE 169.85 90.63
## 5 AC/AH/127 Jame WHITE MALE FALSE 167.75 82.06
## 6 AC/AH/133 Clyde HISPANIC MALE FALSE 181.15 83.93
## 7 AC/AH/150 Brett WHITE MALE TRUE 181.56 79.54
## 8 AC/AH/154 Tony WHITE FEMALE FALSE 160.03 64.30
## 9 AC/AH/156 George WHITE MALE FALSE 165.62 76.72
## 10 AC/AH/160 Rory ASIAN FEMALE FALSE 159.67 71.88
## 11 AC/AH/176 Jerry ASIAN MALE FALSE 175.21 83.65
## 12 AC/AH/180 Drew WHITE FEMALE FALSE 160.80 64.77
## 13 AC/AH/185 Ronald WHITE MALE FALSE 166.46 76.83
## 14 AC/AH/186 Christopher WHITE FEMALE FALSE 157.95 67.41
## 15 AC/AH/192 Dominique WHITE MALE FALSE 180.61 83.59
## 16 AC/AH/211 Son WHITE FEMALE FALSE 157.16 69.64
## 17 AC/AH/219 Jay WHITE FEMALE FALSE 163.47 72.89
## 18 AC/AH/221 Carlos WHITE FEMALE FALSE 165.34 70.84
## 19 AC/AH/233 Marion WHITE FEMALE FALSE 163.97 66.71
## 20 AC/AH/244 Sean WHITE FEMALE FALSE 160.09 65.93
## 21 AC/AH/248 Andrea WHITE MALE FALSE 178.64 97.05
## 22 AC/AH/249 Jesus HISPANIC FEMALE TRUE 159.78 68.31
## 23 AC/SG/010 Theo ASIAN FEMALE FALSE 159.32 64.92
## 24 AC/SG/016 Jimmie BLACK FEMALE FALSE 161.84 69.97
## 25 AC/SG/046 Carl HISPANIC MALE FALSE 171.41 81.70
## 26 AC/SG/055 Evan WHITE MALE FALSE 166.75 79.06
## 27 AC/SG/064 Jon WHITE MALE FALSE 169.16 90.08
## 28 AC/SG/065 Shayne WHITE FEMALE FALSE 157.01 66.56
## 29 AC/SG/067 Thomas WHITE MALE FALSE 167.51 84.15
## 30 AC/SG/068 Valentine HISPANIC FEMALE FALSE 160.47 68.20
## 31 AC/SG/084 Brian HISPANIC MALE FALSE 174.25 80.93
## 32 AC/SG/101 Jason WHITE FEMALE FALSE 159.23 69.96
## 33 AC/SG/116 Connie BLACK MALE FALSE 184.34 90.41
## 34 AC/SG/123 Darnell WHITE FEMALE TRUE 162.32 72.72
## 35 AC/SG/134 Daryl WHITE FEMALE TRUE 162.59 69.76
## 36 AC/SG/155 Raymond WHITE FEMALE FALSE 158.35 69.72
## 37 AC/SG/165 Elmer WHITE FEMALE FALSE 162.18 67.81
## 38 AC/SG/167 Jimmy WHITE FEMALE FALSE 159.38 70.37
## 39 AC/SG/179 Logan WHITE MALE FALSE 183.10 82.47
## 40 AC/SG/181 Terry HISPANIC MALE FALSE 177.14 88.70
## 41 AC/SG/182 Jamie HISPANIC MALE TRUE 171.08 72.51
## 42 AC/SG/191 Lacy HISPANIC FEMALE FALSE 159.33 70.68
## 43 AC/SG/197 Stacy WHITE FEMALE FALSE 159.44 66.21
## 44 AC/SG/216 Alva WHITE FEMALE FALSE 159.13 66.96
## 45 AC/SG/217 Dean WHITE FEMALE FALSE 160.58 71.49
## 46 AC/SG/234 Luis HISPANIC FEMALE FALSE 164.88 68.07
## BirthDate State Pet HealthGrade Died RecordDate
## 1 28-04-1972 California HORSE Average Health TRUE 25-12-2015
## 2 16-11-1972 Wisconsin DOG Worse TRUE 25-12-2015
## 3 11-03-1972 California NONE Average Health TRUE 25-12-2015
## 4 12-04-1972 Kentucky NONE Good Health TRUE 25-12-2015
## 5 29-10-1972 Texas DOG Good Health TRUE 25-01-2016
## 6 13-10-1973 Washington CAT Bad Health TRUE 25-02-2016
## 7 03-05-1972 Kentucky DOG Good Health TRUE 25-02-2016
## 8 30-08-1973 California DOG Good Health TRUE 25-02-2016
## 9 09-07-1972 California DOG Good Health TRUE 25-02-2016
## 10 22-09-1973 Florida CAT Average Health TRUE 25-02-2016
## 11 01-05-1973 Virginia DOG Bad Health TRUE 25-03-2016
## 12 18-02-1973 Oregon CAT Good Health TRUE 25-03-2016
## 13 17-08-1972 Colorado NONE Worse TRUE 25-03-2016
## 14 06-05-1972 New Jersey DOG Bad Health TRUE 25-03-2016
## 15 24-03-1972 Michigan NONE Bad Health TRUE 25-03-2016
## 16 14-07-1973 California CAT Average Health TRUE 25-04-2016
## 17 07-04-1972 North Carolina BIRD Good Health TRUE 25-04-2016
## 18 01-02-1972 Michigan DOG Worse TRUE 25-04-2016
## 19 23-12-1971 Ohio CAT Bad Health TRUE 25-04-2016
## 20 25-01-1973 Maryland NONE Worse TRUE 25-05-2016
## 21 12-01-1973 Indiana CAT Good Health TRUE 25-05-2016
## 22 23-04-1972 Alabama CAT Average Health TRUE 25-05-2016
## 23 29-01-1973 New York CAT Average Health TRUE 25-06-2016
## 24 03-04-1972 Arizona CAT Bad Health TRUE 25-06-2016
## 25 05-08-1973 Mississippi BIRD Average Health TRUE 25-06-2016
## 26 24-02-1972 Illinois BIRD Bad Health TRUE 25-07-2016
## 27 04-10-1972 Illinois CAT Average Health TRUE 25-07-2016
## 28 05-04-1972 California DOG Bad Health TRUE 25-07-2016
## 29 19-07-1972 Pennsylvania BIRD Average Health TRUE 25-07-2016
## 30 15-04-1972 Tennessee CAT Bad Health TRUE 25-07-2016
## 31 06-03-1972 Virginia DOG Average Health TRUE 25-07-2016
## 32 28-09-1973 Michigan DOG Average Health TRUE 25-07-2016
## 33 05-06-1972 Florida NONE Bad Health TRUE 25-08-2016
## 34 03-09-1972 North Carolina BIRD Good Health TRUE 25-08-2016
## 35 28-05-1972 Texas CAT Average Health TRUE 25-08-2016
## 36 02-06-1972 California CAT Bad Health TRUE 25-08-2016
## 37 25-03-1972 Washington BIRD Good Health TRUE 25-08-2016
## 38 30-09-1973 Washington NONE Average Health TRUE 25-09-2016
## 39 24-10-1972 Ohio DOG Bad Health TRUE 25-09-2016
## 40 24-11-1971 Indiana CAT Bad Health TRUE 25-09-2016
## 41 25-03-1973 Louisiana NONE Bad Health TRUE 25-09-2016
## 42 21-06-1973 Texas NONE Bad Health TRUE 25-09-2016
## 43 08-11-1972 New York CAT Good Health TRUE 25-10-2016
## 44 19-06-1972 Alabama NONE Good Health TRUE 25-10-2016
## 45 11-11-1972 Ohio NONE Good Health TRUE 25-10-2016
## 46 10-11-1971 Pennsylvania CAT Bad Health TRUE 25-10-2016
## BMI BMILabel
## 1 28.24834 Overweight
## 2 22.04640 Normal
## 3 23.51295 Normal
## 4 31.41528 Obese
## 5 29.16127 Overweight
## 6 25.57647 Overweight
## 7 24.12933 Normal
## 8 25.10777 Overweight
## 9 27.96939 Overweight
## 10 28.19431 Overweight
## 11 27.24885 Overweight
## 12 25.04966 Overweight
## 13 27.72752 Overweight
## 14 27.01998 Overweight
## 15 25.62541 Overweight
## 16 28.19517 Overweight
## 17 27.27670 Overweight
## 18 25.91330 Overweight
## 19 24.81202 Normal
## 20 25.72496 Overweight
## 21 30.41152 Obese
## 22 26.75713 Overweight
## 23 25.57631 Overweight
## 24 26.71407 Overweight
## 25 27.80672 Overweight
## 26 28.43316 Overweight
## 27 31.47988 Obese
## 28 26.99968 Overweight
## 29 29.98974 Overweight
## 30 26.48480 Overweight
## 31 26.65410 Overweight
## 32 27.59307 Overweight
## 33 26.60586 Overweight
## 34 27.60005 Overweight
## 35 26.38875 Overweight
## 36 27.80489 Overweight
## 37 25.78096 Overweight
## 38 27.70256 Overweight
## 39 24.59910 Normal
## 40 28.26769 Overweight
## 41 24.77419 Normal
## 42 27.84206 Overweight
## 43 26.04528 Overweight
## 44 26.44304 Overweight
## 45 27.72441 Overweight
## 46 25.03916 Overweight
nrow(filter(dfrPatient, Died==TRUE))
## [1] 46
#Hispanic Females
filter(dfrPatient , Race =="HISPANIC" & Gender== "FEMALES")
## [1] ID Name Race Gender Smokes
## [6] HeightInCms WeightInKgs BirthDate State Pet
## [11] HealthGrade Died RecordDate BMI BMILabel
## <0 rows> (or 0-length row.names)
#Sample Finctions
set.seed(707)
sample_n(dfrPatient , 10)
## ID Name Race Gender Smokes HeightInCms WeightInKgs
## 9 AC/AH/050 Frances WHITE FEMALE FALSE 166.48 67.34
## 38 AC/AH/210 Keith HISPANIC FEMALE TRUE 170.03 66.68
## 23 AC/AH/127 Jame WHITE MALE FALSE 167.75 82.06
## 45 AC/AH/241 Lindsay WHITE FEMALE FALSE 161.38 73.55
## 63 AC/SG/101 Jason WHITE FEMALE FALSE 159.23 69.96
## 57 AC/SG/067 Thomas WHITE MALE FALSE 167.51 84.15
## 70 AC/SG/139 Jordan WHITE MALE FALSE 171.94 82.11
## 5 AC/AH/037 Samuel WHITE FEMALE FALSE 161.69 68.85
## 17 AC/AH/086 Kyle BLACK MALE TRUE 180.11 75.72
## 49 AC/SG/002 Jan WHITE FEMALE TRUE 161.57 67.92
## BirthDate State Pet HealthGrade Died RecordDate BMI
## 9 08-11-1971 Michigan NONE Good Health FALSE 25-12-2015 24.29679
## 38 28-08-1972 New York DOG Worse FALSE 25-03-2016 23.06452
## 23 29-10-1972 Texas DOG Good Health TRUE 25-01-2016 29.16127
## 45 08-02-1972 Florida CAT Bad Health FALSE 25-05-2016 28.24121
## 63 28-09-1973 Michigan DOG Average Health TRUE 25-07-2016 27.59307
## 57 19-07-1972 Pennsylvania BIRD Average Health TRUE 25-07-2016 29.98974
## 70 06-10-1973 Michigan NONE Good Health FALSE 25-08-2016 27.77424
## 5 20-03-1972 Pennsylvania NONE Good Health FALSE 25-11-2015 26.33526
## 17 12-05-1973 Georgia CAT Bad Health FALSE 25-12-2015 23.34183
## 49 03-07-1973 Arizona DOG Bad Health FALSE 25-05-2016 26.01814
## BMILabel
## 9 Normal
## 38 Normal
## 23 Overweight
## 45 Overweight
## 63 Overweight
## 57 Overweight
## 70 Overweight
## 5 Overweight
## 17 Normal
## 49 Overweight