library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Reading Dataset
csvpd <- read.csv("patient-data.csv",header=T, sep = ',', stringsAsFactors = FALSE)
Viewing data and checking number of rows, column, class,variable of data
dim(csvpd)
## [1] 100 13
str(csvpd)
## 'data.frame': 100 obs. of 13 variables:
## $ ID : chr "AC/AH/001" "AC/AH/017" "AC/AH/020" "AC/AH/022" ...
## $ Name : chr "Demetrius" "Rosario" "Julio" "Lupe" ...
## $ Race : chr "White" "White" "Black" "White" ...
## $ Gender : chr "Male" "Male" "Male" "Male" ...
## $ Smokes : chr "False" "False" "False" "False" ...
## $ HeightInCms: num 183 179 169 176 164 ...
## $ WeightInKgs: num 76.6 80.4 75.5 94.5 71.8 ...
## $ BirthDate : chr "31-01-1972" "09-06-1972" "03-07-1972" "11-08-1972" ...
## $ State : chr "Georgia,xxx" "Missouri" "Pennsylvania" "Florida" ...
## $ Pet : chr "Dog" "Dog" "None" "Cat" ...
## $ HealthGrade: int 2 2 2 1 2 2 1 1 1 2 ...
## $ Died : chr "False" "False" "False" "False" ...
## $ RecordDate : chr "25-11-2015" "25-11-2015" "25-11-2015" "25-11-2015" ...
head(csvpd)
## ID Name Race Gender Smokes HeightInCms WeightInKgs
## 1 AC/AH/001 Demetrius White Male False 182.87 76.57
## 2 AC/AH/017 Rosario White Male False 179.12 80.43
## 3 AC/AH/020 Julio Black Male False 169.15 75.48
## 4 AC/AH/022 Lupe White Male False 175.66 94.54
## 5 AC/AH/029 Lavern White Female False 164.47 71.78
## 6 AC/AH/033 Bernie Dog Female True 158.27 69.90
## BirthDate State Pet HealthGrade Died RecordDate
## 1 31-01-1972 Georgia,xxx Dog 2 False 25-11-2015
## 2 09-06-1972 Missouri Dog 2 False 25-11-2015
## 3 03-07-1972 Pennsylvania None 2 False 25-11-2015
## 4 11-08-1972 Florida Cat 1 False 25-11-2015
## 5 06-06-1973 Iowa NULL 2 True 25-11-2015
## 6 25-06-1973 Maryland Dog 2 False 25-11-2015
View(csvpd)
Data Preparation (Cleaning) Detecting NAs
which(is.na(csvpd$Name))
## integer(0)
detectNAs <- function(inp){
return(sum(is.na(inp)))
}
lapply(csvpd, FUN=detectNAs)
## $ID
## [1] 0
##
## $Name
## [1] 0
##
## $Race
## [1] 0
##
## $Gender
## [1] 0
##
## $Smokes
## [1] 0
##
## $HeightInCms
## [1] 0
##
## $WeightInKgs
## [1] 0
##
## $BirthDate
## [1] 0
##
## $State
## [1] 0
##
## $Pet
## [1] 2
##
## $HealthGrade
## [1] 0
##
## $Died
## [1] 0
##
## $RecordDate
## [1] 0
Detecting Zeros
detectZeros <- function(inp) {
if (class(inp) != "numeric")
{return( " Not Numeric")}
sum(inp=0)
}
lapply(csvpd, FUN=detectZeros)
## $ID
## [1] " Not Numeric"
##
## $Name
## [1] " Not Numeric"
##
## $Race
## [1] " Not Numeric"
##
## $Gender
## [1] " Not Numeric"
##
## $Smokes
## [1] " Not Numeric"
##
## $HeightInCms
## [1] 0
##
## $WeightInKgs
## [1] 0
##
## $BirthDate
## [1] " Not Numeric"
##
## $State
## [1] " Not Numeric"
##
## $Pet
## [1] " Not Numeric"
##
## $HealthGrade
## [1] " Not Numeric"
##
## $Died
## [1] " Not Numeric"
##
## $RecordDate
## [1] " Not Numeric"
Detecting Spaces
detectSpaces <- function(inp) {
if (class(inp) != "character")
{return( " Not character")}
sum(trimws(inp) == "")
}
lapply(csvpd, FUN=detectSpaces)
## $ID
## [1] 0
##
## $Name
## [1] 0
##
## $Race
## [1] 0
##
## $Gender
## [1] 0
##
## $Smokes
## [1] 0
##
## $HeightInCms
## [1] " Not character"
##
## $WeightInKgs
## [1] " Not character"
##
## $BirthDate
## [1] 0
##
## $State
## [1] 0
##
## $Pet
## [1] NA
##
## $HealthGrade
## [1] " Not character"
##
## $Died
## [1] 0
##
## $RecordDate
## [1] 0
Detecting outliers
detectOutliers <- function(inp, na.rm=TRUE){
if(class(inp) != "numeric"){
return("Not a numeric column")
}
quartilev <- quantile(inp,probs =c(0.25,0.75),na.rm = na.rm)
max <- 1.5 * IQR(inp, na.rm = na.rm)
otp <- inp
otp[inp < (quartilev[1] - max)] <- NA
otp[inp > (quartilev[2] + max)] <- NA
return(inp[is.na(otp)])
}
lapply(csvpd, FUN=detectOutliers)
## $ID
## [1] "Not a numeric column"
##
## $Name
## [1] "Not a numeric column"
##
## $Race
## [1] "Not a numeric column"
##
## $Gender
## [1] "Not a numeric column"
##
## $Smokes
## [1] "Not a numeric column"
##
## $HeightInCms
## numeric(0)
##
## $WeightInKgs
## numeric(0)
##
## $BirthDate
## [1] "Not a numeric column"
##
## $State
## [1] "Not a numeric column"
##
## $Pet
## [1] "Not a numeric column"
##
## $HealthGrade
## [1] "Not a numeric column"
##
## $Died
## [1] "Not a numeric column"
##
## $RecordDate
## [1] "Not a numeric column"
Summarizing and cleaning Data in column Gender
summarise(group_by(csvpd,Gender), n())
## # A tibble: 6 x 2
## Gender `n()`
## <chr> <int>
## 1 Female 6
## 2 Male 3
## 3 Female 45
## 4 Female 4
## 5 Male 40
## 6 Male 2
csvpd$Gender <- trimws(toupper(csvpd$Gender))
summarise(group_by(csvpd,Gender), n())
## # A tibble: 2 x 2
## Gender `n()`
## <chr> <int>
## 1 FEMALE 55
## 2 MALE 45
Summarising and cleaning data in col Race
summarise(group_by(csvpd,Race), n())
## # A tibble: 6 x 2
## Race `n()`
## <chr> <int>
## 1 Asian 5
## 2 Bi-Racial 1
## 3 Black 8
## 4 Dog 1
## 5 Hispanic 17
## 6 White 68
csvpd$Race <- trimws(toupper(csvpd$Race))
csvpd$Race[csvpd$Race=="DOG"] <- NA
summarise(group_by(csvpd,Race), n())
## # A tibble: 6 x 2
## Race `n()`
## <chr> <int>
## 1 ASIAN 5
## 2 BI-RACIAL 1
## 3 BLACK 8
## 4 HISPANIC 17
## 5 WHITE 68
## 6 <NA> 1
Summarising and cleaning data in col Died
summarise(group_by(csvpd,Died), n())
## # A tibble: 2 x 2
## Died `n()`
## <chr> <int>
## 1 False 46
## 2 True 54
class(csvpd$Died)
## [1] "character"
csvpd$Died <- as.logical(csvpd$Died)
class(csvpd$Died)
## [1] "logical"
summarise(group_by(csvpd,Died), n())
## # A tibble: 2 x 2
## Died `n()`
## <lgl> <int>
## 1 FALSE 46
## 2 TRUE 54
Summarising and cleaning data in col Pet
summarise(group_by(csvpd,Pet), n())
## # A tibble: 10 x 2
## Pet `n()`
## <chr> <int>
## 1 Bird 9
## 2 Cat 24
## 3 CAT 5
## 4 Dog 28
## 5 DOG 4
## 6 Horse 1
## 7 None 23
## 8 NONE 1
## 9 NULL 3
## 10 <NA> 2
csvpd$Pet <- trimws(toupper(csvpd$Pet))
csvpd$Pet[csvpd$Pet=="NULL"] <- NA
summarise(group_by(csvpd,Pet), n())
## # A tibble: 6 x 2
## Pet `n()`
## <chr> <int>
## 1 BIRD 9
## 2 CAT 29
## 3 DOG 32
## 4 HORSE 1
## 5 NONE 24
## 6 <NA> 5
Summarising and cleaning data in col Smokes
summarise(group_by(csvpd,Smokes), n())
## # A tibble: 4 x 2
## Smokes `n()`
## <chr> <int>
## 1 False 72
## 2 No 6
## 3 True 18
## 4 Yes 4
class(csvpd$Smokes)
## [1] "character"
csvpd$Smokes <- trimws(toupper(csvpd$Smokes))
csvpd$Smokes[csvpd$Smokes=="NO"] <- FALSE
csvpd$Smokes[csvpd$Smokes=="YES"] <- TRUE
csvpd$Smokes <- as.logical(csvpd$Smokes)
summarise(group_by(csvpd,Smokes), n())
## # A tibble: 2 x 2
## Smokes `n()`
## <lgl> <int>
## 1 FALSE 78
## 2 TRUE 22
Summarising and cleaning data in col HealthGrade
summarise(group_by(csvpd,HealthGrade), n())
## # A tibble: 4 x 2
## HealthGrade `n()`
## <int> <int>
## 1 1 29
## 2 2 30
## 3 3 34
## 4 99 7
class(csvpd$HealthGrade)
## [1] "integer"
csvpd$HealthGrade[csvpd$HealthGrade==1] <- "Good"
csvpd$HealthGrade[csvpd$HealthGrade==2] <- "NORMAL"
csvpd$HealthGrade[csvpd$HealthGrade==3] <- "BAD"
csvpd$HealthGrade[csvpd$HealthGrade=="99"] <- NA
class(csvpd$HealthGrade)
## [1] "character"
summarise(group_by(csvpd,HealthGrade), n())
## # A tibble: 4 x 2
## HealthGrade `n()`
## <chr> <int>
## 1 BAD 34
## 2 Good 29
## 3 NORMAL 30
## 4 <NA> 7
Summarising and cleaning data in col State
View(summarise(group_by(csvpd,State), n()))
csvpd$State[csvpd$State== "Georgia,xxx"] <- "Georgia"
View(summarise(group_by(csvpd,State), n()))
To remove missing values
nrow(csvpd)
## [1] 100
vclComplete <- complete.cases(csvpd)
csvpd <- csvpd[vclComplete,]
nrow(csvpd)
## [1] 88
Calculating BMI Value
csvpd <- mutate(csvpd, BMI_Value=WeightInKgs /(HeightInCms/100)^2)
Creating BMI Label
csvpd <- mutate(csvpd, BMILabel = NA)
csvpd$BMILabel <- ifelse(csvpd$BMI_Value < 18.5,"Underweight",
ifelse(csvpd$BMI_Value > 18.5 & csvpd$BMI_Value < 25,"Normal",
ifelse(csvpd$BMI_Value > 25 & csvpd$BMI_Value < 30,"Overweight",
ifelse(csvpd$BMI_Value >30,"Obese", NA ))))
View(csvpd)
Reporting Display top 10 records based on BMI-Value
head(arrange(csvpd, desc(BMI_Value)), 10)
## ID Name Race Gender Smokes HeightInCms WeightInKgs
## 1 AC/SG/009 Sammy WHITE MALE FALSE 166.84 88.25
## 2 AC/SG/064 Jon WHITE MALE FALSE 169.16 90.08
## 3 AC/AH/076 Albert WHITE MALE FALSE 176.22 97.67
## 4 AC/AH/104 Jeremy WHITE MALE TRUE 169.85 90.63
## 5 AC/AH/022 Lupe WHITE MALE FALSE 175.66 94.54
## 6 AC/AH/248 Andrea WHITE MALE FALSE 178.64 97.05
## 7 AC/SG/067 Thomas WHITE MALE FALSE 167.51 84.15
## 8 AC/AH/052 Courtney WHITE MALE TRUE 175.39 92.22
## 9 AC/AH/159 Edward WHITE MALE FALSE 181.64 96.91
## 10 AC/AH/127 Jame WHITE MALE FALSE 167.75 82.06
## BirthDate State Pet HealthGrade Died RecordDate BMI_Value
## 1 04-03-1972 Vermont DOG Good FALSE 25-06-2016 31.70402
## 2 04-10-1972 Illinois CAT NORMAL TRUE 25-07-2016 31.47988
## 3 08-04-1973 Louisiana CAT NORMAL FALSE 25-12-2015 31.45218
## 4 12-04-1972 Kentucky NONE Good TRUE 25-12-2015 31.41528
## 5 11-08-1972 Florida CAT Good FALSE 25-11-2015 30.63867
## 6 12-01-1973 Indiana CAT Good TRUE 25-05-2016 30.41152
## 7 19-07-1972 Pennsylvania BIRD NORMAL TRUE 25-07-2016 29.98974
## 8 16-03-1972 Indiana BIRD BAD FALSE 25-12-2015 29.97888
## 9 04-12-1972 Connecticut CAT NORMAL FALSE 25-02-2016 29.37282
## 10 29-10-1972 Texas DOG Good TRUE 25-01-2016 29.16127
## BMILabel
## 1 Obese
## 2 Obese
## 3 Obese
## 4 Obese
## 5 Obese
## 6 Obese
## 7 Overweight
## 8 Overweight
## 9 Overweight
## 10 Overweight
Display bottom 10 records based on BMI-Value
head(arrange(csvpd, BMI_Value), 10)
## ID Name Race Gender Smokes HeightInCms WeightInKgs
## 1 AC/SG/193 Ronnie WHITE MALE TRUE 185.43 73.63
## 2 AC/SG/099 Leslie ASIAN MALE FALSE 172.72 67.62
## 3 AC/AH/001 Demetrius WHITE MALE FALSE 182.87 76.57
## 4 AC/AH/086 Kyle BLACK MALE TRUE 180.11 75.72
## 5 AC/AH/045 Shirley WHITE MALE FALSE 181.32 76.90
## 6 AC/AH/089 Dong WHITE MALE FALSE 179.24 75.54
## 7 AC/AH/164 Shane HISPANIC MALE TRUE 177.03 74.04
## 8 AC/AH/114 Kris HISPANIC MALE FALSE 177.75 74.84
## 9 AC/AH/077 Tommy BLACK MALE FALSE 174.09 72.20
## 10 AC/AH/150 Brett WHITE MALE TRUE 181.56 79.54
## BirthDate State Pet HealthGrade Died RecordDate BMI_Value
## 1 05-06-1973 Iowa DOG BAD FALSE 25-09-2016 21.41385
## 2 04-02-1972 Ohio CAT Good FALSE 25-07-2016 22.66678
## 3 31-01-1972 Georgia DOG NORMAL FALSE 25-11-2015 22.89674
## 4 12-05-1973 Georgia CAT BAD FALSE 25-12-2015 23.34183
## 5 25-12-1971 Louisiana DOG Good FALSE 25-11-2015 23.39025
## 6 11-03-1972 California NONE NORMAL TRUE 25-12-2015 23.51295
## 7 18-02-1972 Florida NONE NORMAL FALSE 25-02-2016 23.62505
## 8 19-11-1972 Pennsylvania BIRD BAD FALSE 25-01-2016 23.68725
## 9 01-02-1973 Washington CAT BAD FALSE 25-12-2015 23.82262
## 10 03-05-1972 Kentucky DOG Good TRUE 25-02-2016 24.12933
## BMILabel
## 1 Normal
## 2 Normal
## 3 Normal
## 4 Normal
## 5 Normal
## 6 Normal
## 7 Normal
## 8 Normal
## 9 Normal
## 10 Normal
Provide frequency / counts of Gender > Race
summarise(group_by(csvpd, Gender, Race), n())
## # A tibble: 9 x 3
## # Groups: Gender [?]
## Gender Race `n()`
## <chr> <chr> <int>
## 1 FEMALE ASIAN 2
## 2 FEMALE BLACK 1
## 3 FEMALE HISPANIC 6
## 4 FEMALE WHITE 37
## 5 MALE ASIAN 2
## 6 MALE BI-RACIAL 1
## 7 MALE BLACK 4
## 8 MALE HISPANIC 9
## 9 MALE WHITE 26
To view the same in table form
table(csvpd$Gender, csvpd$Race)
##
## ASIAN BI-RACIAL BLACK HISPANIC WHITE
## FEMALE 2 0 1 6 37
## MALE 2 1 4 9 26
table(csvpd$Race, csvpd$Gender)
##
## FEMALE MALE
## ASIAN 2 2
## BI-RACIAL 0 1
## BLACK 1 4
## HISPANIC 6 9
## WHITE 37 26
Provide max, min and average values for BMI-Values as per following Race > Gender
summarise(group_by(csvpd, Gender, Race),min(BMI_Value), max(BMI_Value), mean(BMI_Value))
## # A tibble: 9 x 5
## # Groups: Gender [?]
## Gender Race `min(BMI_Value)` `max(BMI_Value)` `mean(BMI_Value)`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 FEMALE ASIAN 25.57631 28.19431 26.88531
## 2 FEMALE BLACK 26.71407 26.71407 26.71407
## 3 FEMALE HISPANIC 25.03916 27.84206 26.52176
## 4 FEMALE WHITE 24.21459 28.24834 26.46648
## 5 MALE ASIAN 22.66678 27.24885 24.95782
## 6 MALE BI-RACIAL 24.83473 24.83473 24.83473
## 7 MALE BLACK 23.34183 26.60586 25.03778
## 8 MALE HISPANIC 23.62505 28.26769 26.02289
## 9 MALE WHITE 21.41385 31.70402 27.67114
** Display All Records for all people who are dead**
filter(csvpd, Died==TRUE)
## ID Name Race Gender Smokes HeightInCms WeightInKgs
## 1 AC/AH/049 Martin WHITE FEMALE FALSE 160.06 72.37
## 2 AC/AH/089 Dong WHITE MALE FALSE 179.24 75.54
## 3 AC/AH/104 Jeremy WHITE MALE TRUE 169.85 90.63
## 4 AC/AH/127 Jame WHITE MALE FALSE 167.75 82.06
## 5 AC/AH/133 Clyde HISPANIC MALE FALSE 181.15 83.93
## 6 AC/AH/150 Brett WHITE MALE TRUE 181.56 79.54
## 7 AC/AH/154 Tony WHITE FEMALE FALSE 160.03 64.30
## 8 AC/AH/156 George WHITE MALE FALSE 165.62 76.72
## 9 AC/AH/160 Rory ASIAN FEMALE FALSE 159.67 71.88
## 10 AC/AH/171 Devin WHITE FEMALE FALSE 163.35 70.46
## 11 AC/AH/176 Jerry ASIAN MALE FALSE 175.21 83.65
## 12 AC/AH/180 Drew WHITE FEMALE FALSE 160.80 64.77
## 13 AC/AH/186 Christopher WHITE FEMALE FALSE 157.95 67.41
## 14 AC/AH/192 Dominique WHITE MALE FALSE 180.61 83.59
## 15 AC/AH/211 Son WHITE FEMALE FALSE 157.16 69.64
## 16 AC/AH/219 Jay WHITE FEMALE FALSE 163.47 72.89
## 17 AC/AH/233 Marion WHITE FEMALE FALSE 163.97 66.71
## 18 AC/AH/248 Andrea WHITE MALE FALSE 178.64 97.05
## 19 AC/AH/249 Jesus HISPANIC FEMALE TRUE 159.78 68.31
## 20 AC/SG/003 Walter WHITE FEMALE FALSE 161.83 66.03
## 21 AC/SG/008 Dana WHITE MALE TRUE 169.66 77.30
## 22 AC/SG/010 Theo ASIAN FEMALE FALSE 159.32 64.92
## 23 AC/SG/015 Shaun WHITE MALE TRUE 170.51 84.35
## 24 AC/SG/016 Jimmie BLACK FEMALE FALSE 161.84 69.97
## 25 AC/SG/046 Carl HISPANIC MALE FALSE 171.41 81.70
## 26 AC/SG/055 Evan WHITE MALE FALSE 166.75 79.06
## 27 AC/SG/064 Jon WHITE MALE FALSE 169.16 90.08
## 28 AC/SG/065 Shayne WHITE FEMALE FALSE 157.01 66.56
## 29 AC/SG/067 Thomas WHITE MALE FALSE 167.51 84.15
## 30 AC/SG/068 Valentine HISPANIC FEMALE FALSE 160.47 68.20
## 31 AC/SG/084 Brian HISPANIC MALE FALSE 174.25 80.93
## 32 AC/SG/101 Jason WHITE FEMALE FALSE 159.23 69.96
## 33 AC/SG/116 Connie BLACK MALE FALSE 184.34 90.41
## 34 AC/SG/123 Darnell WHITE FEMALE TRUE 162.32 72.72
## 35 AC/SG/134 Daryl WHITE FEMALE TRUE 162.59 69.76
## 36 AC/SG/155 Raymond WHITE FEMALE FALSE 158.35 69.72
## 37 AC/SG/165 Elmer WHITE FEMALE FALSE 162.18 67.81
## 38 AC/SG/167 Jimmy WHITE FEMALE FALSE 159.38 70.37
## 39 AC/SG/172 Whitney WHITE MALE FALSE 171.45 84.29
## 40 AC/SG/179 Logan WHITE MALE FALSE 183.10 82.47
## 41 AC/SG/181 Terry HISPANIC MALE FALSE 177.14 88.70
## 42 AC/SG/182 Jamie HISPANIC MALE TRUE 171.08 72.51
## 43 AC/SG/191 Lacy HISPANIC FEMALE FALSE 159.33 70.68
## 44 AC/SG/197 Stacy WHITE FEMALE FALSE 159.44 66.21
## 45 AC/SG/216 Alva WHITE FEMALE FALSE 159.13 66.96
## 46 AC/SG/217 Dean WHITE FEMALE FALSE 160.58 71.49
## 47 AC/SG/234 Luis HISPANIC FEMALE FALSE 164.88 68.07
## BirthDate State Pet HealthGrade Died RecordDate BMI_Value
## 1 28-04-1972 California HORSE NORMAL TRUE 25-12-2015 28.24834
## 2 11-03-1972 California NONE NORMAL TRUE 25-12-2015 23.51295
## 3 12-04-1972 Kentucky NONE Good TRUE 25-12-2015 31.41528
## 4 29-10-1972 Texas DOG Good TRUE 25-01-2016 29.16127
## 5 13-10-1973 Washington CAT BAD TRUE 25-02-2016 25.57647
## 6 03-05-1972 Kentucky DOG Good TRUE 25-02-2016 24.12933
## 7 30-08-1973 California DOG Good TRUE 25-02-2016 25.10777
## 8 09-07-1972 California DOG Good TRUE 25-02-2016 27.96939
## 9 22-09-1973 Florida CAT NORMAL TRUE 25-02-2016 28.19431
## 10 16-04-1973 California BIRD BAD TRUE 25-03-2016 26.40611
## 11 01-05-1973 Virginia DOG BAD TRUE 25-03-2016 27.24885
## 12 18-02-1973 Oregon CAT Good TRUE 25-03-2016 25.04966
## 13 06-05-1972 New Jersey DOG BAD TRUE 25-03-2016 27.01998
## 14 24-03-1972 Michigan NONE BAD TRUE 25-03-2016 25.62541
## 15 14-07-1973 California CAT NORMAL TRUE 25-04-2016 28.19517
## 16 07-04-1972 North Carolina BIRD Good TRUE 25-04-2016 27.27670
## 17 23-12-1971 Ohio CAT BAD TRUE 25-04-2016 24.81202
## 18 12-01-1973 Indiana CAT Good TRUE 25-05-2016 30.41152
## 19 23-04-1972 Alabama CAT NORMAL TRUE 25-05-2016 26.75713
## 20 11-07-1972 Oregon NONE NORMAL TRUE 25-05-2016 25.21292
## 21 26-05-1973 Nevada DOG Good TRUE 25-05-2016 26.85472
## 22 29-01-1973 New York CAT NORMAL TRUE 25-06-2016 25.57631
## 23 09-11-1972 New Jersey DOG BAD TRUE 25-06-2016 29.01252
## 24 03-04-1972 Arizona CAT BAD TRUE 25-06-2016 26.71407
## 25 05-08-1973 Mississippi BIRD NORMAL TRUE 25-06-2016 27.80672
## 26 24-02-1972 Illinois BIRD BAD TRUE 25-07-2016 28.43316
## 27 04-10-1972 Illinois CAT NORMAL TRUE 25-07-2016 31.47988
## 28 05-04-1972 California DOG BAD TRUE 25-07-2016 26.99968
## 29 19-07-1972 Pennsylvania BIRD NORMAL TRUE 25-07-2016 29.98974
## 30 15-04-1972 Tennessee CAT BAD TRUE 25-07-2016 26.48480
## 31 06-03-1972 Virginia DOG NORMAL TRUE 25-07-2016 26.65410
## 32 28-09-1973 Michigan DOG NORMAL TRUE 25-07-2016 27.59307
## 33 05-06-1972 Florida NONE BAD TRUE 25-08-2016 26.60586
## 34 03-09-1972 North Carolina BIRD Good TRUE 25-08-2016 27.60005
## 35 28-05-1972 Texas CAT NORMAL TRUE 25-08-2016 26.38875
## 36 02-06-1972 California CAT BAD TRUE 25-08-2016 27.80489
## 37 25-03-1972 Washington BIRD Good TRUE 25-08-2016 25.78096
## 38 30-09-1973 Washington NONE NORMAL TRUE 25-09-2016 27.70256
## 39 25-02-1972 Florida DOG NORMAL TRUE 25-09-2016 28.67484
## 40 24-10-1972 Ohio DOG BAD TRUE 25-09-2016 24.59910
## 41 24-11-1971 Indiana CAT BAD TRUE 25-09-2016 28.26769
## 42 25-03-1973 Louisiana NONE BAD TRUE 25-09-2016 24.77419
## 43 21-06-1973 Texas NONE BAD TRUE 25-09-2016 27.84206
## 44 08-11-1972 New York CAT Good TRUE 25-10-2016 26.04528
## 45 19-06-1972 Alabama NONE Good TRUE 25-10-2016 26.44304
## 46 11-11-1972 Ohio NONE Good TRUE 25-10-2016 27.72441
## 47 10-11-1971 Pennsylvania CAT BAD TRUE 25-10-2016 25.03916
## BMILabel
## 1 Overweight
## 2 Normal
## 3 Obese
## 4 Overweight
## 5 Overweight
## 6 Normal
## 7 Overweight
## 8 Overweight
## 9 Overweight
## 10 Overweight
## 11 Overweight
## 12 Overweight
## 13 Overweight
## 14 Overweight
## 15 Overweight
## 16 Overweight
## 17 Normal
## 18 Obese
## 19 Overweight
## 20 Overweight
## 21 Overweight
## 22 Overweight
## 23 Overweight
## 24 Overweight
## 25 Overweight
## 26 Overweight
## 27 Obese
## 28 Overweight
## 29 Overweight
## 30 Overweight
## 31 Overweight
## 32 Overweight
## 33 Overweight
## 34 Overweight
## 35 Overweight
## 36 Overweight
## 37 Overweight
## 38 Overweight
## 39 Overweight
## 40 Normal
## 41 Overweight
## 42 Normal
## 43 Overweight
## 44 Overweight
## 45 Overweight
## 46 Overweight
## 47 Overweight
nrow(filter(csvpd, Died==TRUE))
## [1] 47
Display All Records for “Hispanic Females”
filter(csvpd, Race=="HISPANIC" & Gender=="FEMALE")
## ID Name Race Gender Smokes HeightInCms WeightInKgs
## 1 AC/AH/208 Lawrence HISPANIC FEMALE FALSE 165.80 71.77
## 2 AC/AH/249 Jesus HISPANIC FEMALE TRUE 159.78 68.31
## 3 AC/SG/068 Valentine HISPANIC FEMALE FALSE 160.47 68.20
## 4 AC/SG/122 Michal HISPANIC FEMALE FALSE 160.09 68.94
## 5 AC/SG/191 Lacy HISPANIC FEMALE FALSE 159.33 70.68
## 6 AC/SG/234 Luis HISPANIC FEMALE FALSE 164.88 68.07
## BirthDate State Pet HealthGrade Died RecordDate BMI_Value
## 1 07-08-1973 Louisiana NONE Good FALSE 25-03-2016 26.10802
## 2 23-04-1972 Alabama CAT NORMAL TRUE 25-05-2016 26.75713
## 3 15-04-1972 Tennessee CAT BAD TRUE 25-07-2016 26.48480
## 4 16-12-1971 South Carolina DOG Good FALSE 25-08-2016 26.89942
## 5 21-06-1973 Texas NONE BAD TRUE 25-09-2016 27.84206
## 6 10-11-1971 Pennsylvania CAT BAD TRUE 25-10-2016 25.03916
## BMILabel
## 1 Overweight
## 2 Overweight
## 3 Overweight
## 4 Overweight
## 5 Overweight
## 6 Overweight
nrow(filter(csvpd, Race=="HISPANIC" & Gender=="FEMALE"))
## [1] 6
Provide 7 sample records from the Dataset. Use seed(707)
set.seed(707)
sample_n(csvpd, 7)
## ID Name Race Gender Smokes HeightInCms WeightInKgs
## 9 AC/AH/049 Martin WHITE FEMALE FALSE 160.06 72.37
## 39 AC/AH/211 Son WHITE FEMALE FALSE 157.16 69.64
## 24 AC/AH/133 Clyde HISPANIC MALE FALSE 181.15 83.93
## 46 AC/AH/248 Andrea WHITE MALE FALSE 178.64 97.05
## 64 AC/SG/101 Jason WHITE FEMALE FALSE 159.23 69.96
## 59 AC/SG/067 Thomas WHITE MALE FALSE 167.51 84.15
## 72 AC/SG/142 Kenneth WHITE FEMALE FALSE 158.07 69.80
## BirthDate State Pet HealthGrade Died RecordDate BMI_Value
## 9 28-04-1972 California HORSE NORMAL TRUE 25-12-2015 28.24834
## 39 14-07-1973 California CAT NORMAL TRUE 25-04-2016 28.19517
## 24 13-10-1973 Washington CAT BAD TRUE 25-02-2016 25.57647
## 46 12-01-1973 Indiana CAT Good TRUE 25-05-2016 30.41152
## 64 28-09-1973 Michigan DOG NORMAL TRUE 25-07-2016 27.59307
## 59 19-07-1972 Pennsylvania BIRD NORMAL TRUE 25-07-2016 29.98974
## 72 15-05-1972 Kansas DOG BAD FALSE 25-08-2016 27.93550
## BMILabel
## 9 Overweight
## 39 Overweight
## 24 Overweight
## 46 Obese
## 64 Overweight
## 59 Overweight
## 72 Overweight