The patient data set has the following variables

ID, Name, Race, Gender, Smokes, HeightInCms, WeightInKgs, BirthDate, State, Pet, HealthGrade, Died, RecordDate

Loading required Library

library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Reading the Dataset

Reading Dataset

csvpd <- read.csv("patient-data.csv",header=T, sep = ',', stringsAsFactors = FALSE)

Viewing data and checking number of rows, column, class,variable of data

dim(csvpd)
## [1] 100  13
str(csvpd)
## 'data.frame':    100 obs. of  13 variables:
##  $ ID         : chr  "AC/AH/001" "AC/AH/017" "AC/AH/020" "AC/AH/022" ...
##  $ Name       : chr  "Demetrius" "Rosario" "Julio" "Lupe" ...
##  $ Race       : chr  "White" "White" "Black" "White" ...
##  $ Gender     : chr  "Male" "Male" "Male" "Male" ...
##  $ Smokes     : chr  "False" "False" "False" "False" ...
##  $ HeightInCms: num  183 179 169 176 164 ...
##  $ WeightInKgs: num  76.6 80.4 75.5 94.5 71.8 ...
##  $ BirthDate  : chr  "31-01-1972" "09-06-1972" "03-07-1972" "11-08-1972" ...
##  $ State      : chr  "Georgia,xxx" "Missouri" "Pennsylvania" "Florida" ...
##  $ Pet        : chr  "Dog" "Dog" "None" "Cat" ...
##  $ HealthGrade: int  2 2 2 1 2 2 1 1 1 2 ...
##  $ Died       : chr  "False" "False" "False" "False" ...
##  $ RecordDate : chr  "25-11-2015" "25-11-2015" "25-11-2015" "25-11-2015" ...
head(csvpd)
##          ID      Name  Race Gender Smokes HeightInCms WeightInKgs
## 1 AC/AH/001 Demetrius White   Male  False      182.87       76.57
## 2 AC/AH/017   Rosario White   Male  False      179.12       80.43
## 3 AC/AH/020     Julio Black   Male  False      169.15       75.48
## 4 AC/AH/022      Lupe White   Male  False      175.66       94.54
## 5 AC/AH/029    Lavern White Female  False      164.47       71.78
## 6 AC/AH/033    Bernie   Dog Female   True      158.27       69.90
##    BirthDate        State  Pet HealthGrade  Died RecordDate
## 1 31-01-1972  Georgia,xxx  Dog           2 False 25-11-2015
## 2 09-06-1972     Missouri  Dog           2 False 25-11-2015
## 3 03-07-1972 Pennsylvania None           2 False 25-11-2015
## 4 11-08-1972      Florida  Cat           1 False 25-11-2015
## 5 06-06-1973         Iowa NULL           2  True 25-11-2015
## 6 25-06-1973     Maryland  Dog           2 False 25-11-2015
View(csvpd)

Data Preparation (Cleaning) Detecting NAs

which(is.na(csvpd$Name)) 
## integer(0)
detectNAs <- function(inp){
  return(sum(is.na(inp)))
}       

lapply(csvpd, FUN=detectNAs)
## $ID
## [1] 0
## 
## $Name
## [1] 0
## 
## $Race
## [1] 0
## 
## $Gender
## [1] 0
## 
## $Smokes
## [1] 0
## 
## $HeightInCms
## [1] 0
## 
## $WeightInKgs
## [1] 0
## 
## $BirthDate
## [1] 0
## 
## $State
## [1] 0
## 
## $Pet
## [1] 2
## 
## $HealthGrade
## [1] 0
## 
## $Died
## [1] 0
## 
## $RecordDate
## [1] 0

Detecting Zeros

detectZeros <- function(inp) {      
  if (class(inp) != "numeric")       
  {return( " Not Numeric")}      
  sum(inp=0)       
}          
lapply(csvpd, FUN=detectZeros) 
## $ID
## [1] " Not Numeric"
## 
## $Name
## [1] " Not Numeric"
## 
## $Race
## [1] " Not Numeric"
## 
## $Gender
## [1] " Not Numeric"
## 
## $Smokes
## [1] " Not Numeric"
## 
## $HeightInCms
## [1] 0
## 
## $WeightInKgs
## [1] 0
## 
## $BirthDate
## [1] " Not Numeric"
## 
## $State
## [1] " Not Numeric"
## 
## $Pet
## [1] " Not Numeric"
## 
## $HealthGrade
## [1] " Not Numeric"
## 
## $Died
## [1] " Not Numeric"
## 
## $RecordDate
## [1] " Not Numeric"

Detecting Spaces

detectSpaces <- function(inp) {      
  if (class(inp) != "character")           
  {return( " Not character")}            
  sum(trimws(inp) == "")                
  
}         
lapply(csvpd, FUN=detectSpaces)  
## $ID
## [1] 0
## 
## $Name
## [1] 0
## 
## $Race
## [1] 0
## 
## $Gender
## [1] 0
## 
## $Smokes
## [1] 0
## 
## $HeightInCms
## [1] " Not character"
## 
## $WeightInKgs
## [1] " Not character"
## 
## $BirthDate
## [1] 0
## 
## $State
## [1] 0
## 
## $Pet
## [1] NA
## 
## $HealthGrade
## [1] " Not character"
## 
## $Died
## [1] 0
## 
## $RecordDate
## [1] 0

Detecting outliers

detectOutliers <- function(inp, na.rm=TRUE){         
  if(class(inp) != "numeric"){       
    return("Not a numeric column")       
  }        
  quartilev <- quantile(inp,probs =c(0.25,0.75),na.rm = na.rm)       
  max <- 1.5 * IQR(inp, na.rm = na.rm)       
  otp <- inp      
  otp[inp < (quartilev[1] - max)] <- NA     
  otp[inp > (quartilev[2] + max)] <- NA       
  return(inp[is.na(otp)])       
}       
     
lapply(csvpd, FUN=detectOutliers)     
## $ID
## [1] "Not a numeric column"
## 
## $Name
## [1] "Not a numeric column"
## 
## $Race
## [1] "Not a numeric column"
## 
## $Gender
## [1] "Not a numeric column"
## 
## $Smokes
## [1] "Not a numeric column"
## 
## $HeightInCms
## numeric(0)
## 
## $WeightInKgs
## numeric(0)
## 
## $BirthDate
## [1] "Not a numeric column"
## 
## $State
## [1] "Not a numeric column"
## 
## $Pet
## [1] "Not a numeric column"
## 
## $HealthGrade
## [1] "Not a numeric column"
## 
## $Died
## [1] "Not a numeric column"
## 
## $RecordDate
## [1] "Not a numeric column"

Summarizing and cleaning Data in column Gender

summarise(group_by(csvpd,Gender), n())     
## # A tibble: 6 x 2
##    Gender `n()`
##     <chr> <int>
## 1  Female     6
## 2    Male     3
## 3  Female    45
## 4 Female      4
## 5    Male    40
## 6   Male      2
csvpd$Gender <- trimws(toupper(csvpd$Gender))     
summarise(group_by(csvpd,Gender), n())   
## # A tibble: 2 x 2
##   Gender `n()`
##    <chr> <int>
## 1 FEMALE    55
## 2   MALE    45

Summarising and cleaning data in col Race

summarise(group_by(csvpd,Race), n())
## # A tibble: 6 x 2
##        Race `n()`
##       <chr> <int>
## 1     Asian     5
## 2 Bi-Racial     1
## 3     Black     8
## 4       Dog     1
## 5  Hispanic    17
## 6     White    68
csvpd$Race <- trimws(toupper(csvpd$Race))              
csvpd$Race[csvpd$Race=="DOG"] <- NA              
summarise(group_by(csvpd,Race), n()) 
## # A tibble: 6 x 2
##        Race `n()`
##       <chr> <int>
## 1     ASIAN     5
## 2 BI-RACIAL     1
## 3     BLACK     8
## 4  HISPANIC    17
## 5     WHITE    68
## 6      <NA>     1

Summarising and cleaning data in col Died

summarise(group_by(csvpd,Died), n())
## # A tibble: 2 x 2
##    Died `n()`
##   <chr> <int>
## 1 False    46
## 2  True    54
class(csvpd$Died) 
## [1] "character"
csvpd$Died <- as.logical(csvpd$Died)           
class(csvpd$Died)   
## [1] "logical"
summarise(group_by(csvpd,Died), n())  
## # A tibble: 2 x 2
##    Died `n()`
##   <lgl> <int>
## 1 FALSE    46
## 2  TRUE    54

Summarising and cleaning data in col Pet

summarise(group_by(csvpd,Pet), n())  
## # A tibble: 10 x 2
##      Pet `n()`
##    <chr> <int>
##  1  Bird     9
##  2   Cat    24
##  3   CAT     5
##  4   Dog    28
##  5   DOG     4
##  6 Horse     1
##  7  None    23
##  8  NONE     1
##  9  NULL     3
## 10  <NA>     2
csvpd$Pet <- trimws(toupper(csvpd$Pet))              
csvpd$Pet[csvpd$Pet=="NULL"] <- NA         
summarise(group_by(csvpd,Pet), n())
## # A tibble: 6 x 2
##     Pet `n()`
##   <chr> <int>
## 1  BIRD     9
## 2   CAT    29
## 3   DOG    32
## 4 HORSE     1
## 5  NONE    24
## 6  <NA>     5

Summarising and cleaning data in col Smokes

summarise(group_by(csvpd,Smokes), n()) 
## # A tibble: 4 x 2
##   Smokes `n()`
##    <chr> <int>
## 1  False    72
## 2     No     6
## 3   True    18
## 4    Yes     4
class(csvpd$Smokes)
## [1] "character"
csvpd$Smokes <- trimws(toupper(csvpd$Smokes))          
csvpd$Smokes[csvpd$Smokes=="NO"] <- FALSE        
csvpd$Smokes[csvpd$Smokes=="YES"] <- TRUE            
csvpd$Smokes <- as.logical(csvpd$Smokes)              
summarise(group_by(csvpd,Smokes), n()) 
## # A tibble: 2 x 2
##   Smokes `n()`
##    <lgl> <int>
## 1  FALSE    78
## 2   TRUE    22

Summarising and cleaning data in col HealthGrade

summarise(group_by(csvpd,HealthGrade), n())  
## # A tibble: 4 x 2
##   HealthGrade `n()`
##         <int> <int>
## 1           1    29
## 2           2    30
## 3           3    34
## 4          99     7
class(csvpd$HealthGrade)    
## [1] "integer"
csvpd$HealthGrade[csvpd$HealthGrade==1] <- "Good"               
csvpd$HealthGrade[csvpd$HealthGrade==2] <- "NORMAL"             
csvpd$HealthGrade[csvpd$HealthGrade==3] <- "BAD"             
csvpd$HealthGrade[csvpd$HealthGrade=="99"] <- NA            
class(csvpd$HealthGrade)       
## [1] "character"
summarise(group_by(csvpd,HealthGrade), n()) 
## # A tibble: 4 x 2
##   HealthGrade `n()`
##         <chr> <int>
## 1         BAD    34
## 2        Good    29
## 3      NORMAL    30
## 4        <NA>     7

Summarising and cleaning data in col State

View(summarise(group_by(csvpd,State), n()))               
csvpd$State[csvpd$State== "Georgia,xxx"] <- "Georgia"              
View(summarise(group_by(csvpd,State), n()))        

To remove missing values

nrow(csvpd)  
## [1] 100
vclComplete <- complete.cases(csvpd)                
csvpd <- csvpd[vclComplete,]               
nrow(csvpd) 
## [1] 88

Calculating BMI Value

csvpd <- mutate(csvpd, BMI_Value=WeightInKgs /(HeightInCms/100)^2)

Creating BMI Label

csvpd <- mutate(csvpd, BMILabel = NA)           
csvpd$BMILabel <- ifelse(csvpd$BMI_Value < 18.5,"Underweight",           
                  ifelse(csvpd$BMI_Value > 18.5 & csvpd$BMI_Value < 25,"Normal",   
                  ifelse(csvpd$BMI_Value > 25 &  csvpd$BMI_Value < 30,"Overweight",            
                  ifelse(csvpd$BMI_Value >30,"Obese", NA ))))             

View(csvpd)     

Reporting Display top 10 records based on BMI-Value

head(arrange(csvpd, desc(BMI_Value)), 10)   
##           ID     Name  Race Gender Smokes HeightInCms WeightInKgs
## 1  AC/SG/009    Sammy WHITE   MALE  FALSE      166.84       88.25
## 2  AC/SG/064      Jon WHITE   MALE  FALSE      169.16       90.08
## 3  AC/AH/076   Albert WHITE   MALE  FALSE      176.22       97.67
## 4  AC/AH/104   Jeremy WHITE   MALE   TRUE      169.85       90.63
## 5  AC/AH/022     Lupe WHITE   MALE  FALSE      175.66       94.54
## 6  AC/AH/248   Andrea WHITE   MALE  FALSE      178.64       97.05
## 7  AC/SG/067   Thomas WHITE   MALE  FALSE      167.51       84.15
## 8  AC/AH/052 Courtney WHITE   MALE   TRUE      175.39       92.22
## 9  AC/AH/159   Edward WHITE   MALE  FALSE      181.64       96.91
## 10 AC/AH/127     Jame WHITE   MALE  FALSE      167.75       82.06
##     BirthDate        State  Pet HealthGrade  Died RecordDate BMI_Value
## 1  04-03-1972      Vermont  DOG        Good FALSE 25-06-2016  31.70402
## 2  04-10-1972     Illinois  CAT      NORMAL  TRUE 25-07-2016  31.47988
## 3  08-04-1973    Louisiana  CAT      NORMAL FALSE 25-12-2015  31.45218
## 4  12-04-1972     Kentucky NONE        Good  TRUE 25-12-2015  31.41528
## 5  11-08-1972      Florida  CAT        Good FALSE 25-11-2015  30.63867
## 6  12-01-1973      Indiana  CAT        Good  TRUE 25-05-2016  30.41152
## 7  19-07-1972 Pennsylvania BIRD      NORMAL  TRUE 25-07-2016  29.98974
## 8  16-03-1972      Indiana BIRD         BAD FALSE 25-12-2015  29.97888
## 9  04-12-1972  Connecticut  CAT      NORMAL FALSE 25-02-2016  29.37282
## 10 29-10-1972        Texas  DOG        Good  TRUE 25-01-2016  29.16127
##      BMILabel
## 1       Obese
## 2       Obese
## 3       Obese
## 4       Obese
## 5       Obese
## 6       Obese
## 7  Overweight
## 8  Overweight
## 9  Overweight
## 10 Overweight

Display bottom 10 records based on BMI-Value

head(arrange(csvpd, BMI_Value), 10)
##           ID      Name     Race Gender Smokes HeightInCms WeightInKgs
## 1  AC/SG/193    Ronnie    WHITE   MALE   TRUE      185.43       73.63
## 2  AC/SG/099    Leslie    ASIAN   MALE  FALSE      172.72       67.62
## 3  AC/AH/001 Demetrius    WHITE   MALE  FALSE      182.87       76.57
## 4  AC/AH/086      Kyle    BLACK   MALE   TRUE      180.11       75.72
## 5  AC/AH/045   Shirley    WHITE   MALE  FALSE      181.32       76.90
## 6  AC/AH/089      Dong    WHITE   MALE  FALSE      179.24       75.54
## 7  AC/AH/164     Shane HISPANIC   MALE   TRUE      177.03       74.04
## 8  AC/AH/114      Kris HISPANIC   MALE  FALSE      177.75       74.84
## 9  AC/AH/077     Tommy    BLACK   MALE  FALSE      174.09       72.20
## 10 AC/AH/150     Brett    WHITE   MALE   TRUE      181.56       79.54
##     BirthDate        State  Pet HealthGrade  Died RecordDate BMI_Value
## 1  05-06-1973         Iowa  DOG         BAD FALSE 25-09-2016  21.41385
## 2  04-02-1972         Ohio  CAT        Good FALSE 25-07-2016  22.66678
## 3  31-01-1972      Georgia  DOG      NORMAL FALSE 25-11-2015  22.89674
## 4  12-05-1973      Georgia  CAT         BAD FALSE 25-12-2015  23.34183
## 5  25-12-1971    Louisiana  DOG        Good FALSE 25-11-2015  23.39025
## 6  11-03-1972   California NONE      NORMAL  TRUE 25-12-2015  23.51295
## 7  18-02-1972      Florida NONE      NORMAL FALSE 25-02-2016  23.62505
## 8  19-11-1972 Pennsylvania BIRD         BAD FALSE 25-01-2016  23.68725
## 9  01-02-1973   Washington  CAT         BAD FALSE 25-12-2015  23.82262
## 10 03-05-1972     Kentucky  DOG        Good  TRUE 25-02-2016  24.12933
##    BMILabel
## 1    Normal
## 2    Normal
## 3    Normal
## 4    Normal
## 5    Normal
## 6    Normal
## 7    Normal
## 8    Normal
## 9    Normal
## 10   Normal

Provide frequency / counts of Gender > Race

summarise(group_by(csvpd, Gender, Race), n()) 
## # A tibble: 9 x 3
## # Groups:   Gender [?]
##   Gender      Race `n()`
##    <chr>     <chr> <int>
## 1 FEMALE     ASIAN     2
## 2 FEMALE     BLACK     1
## 3 FEMALE  HISPANIC     6
## 4 FEMALE     WHITE    37
## 5   MALE     ASIAN     2
## 6   MALE BI-RACIAL     1
## 7   MALE     BLACK     4
## 8   MALE  HISPANIC     9
## 9   MALE     WHITE    26

To view the same in table form

table(csvpd$Gender, csvpd$Race)
##         
##          ASIAN BI-RACIAL BLACK HISPANIC WHITE
##   FEMALE     2         0     1        6    37
##   MALE       2         1     4        9    26
table(csvpd$Race, csvpd$Gender)   
##            
##             FEMALE MALE
##   ASIAN          2    2
##   BI-RACIAL      0    1
##   BLACK          1    4
##   HISPANIC       6    9
##   WHITE         37   26

Provide max, min and average values for BMI-Values as per following Race > Gender

summarise(group_by(csvpd, Gender, Race),min(BMI_Value), max(BMI_Value), mean(BMI_Value)) 
## # A tibble: 9 x 5
## # Groups:   Gender [?]
##   Gender      Race `min(BMI_Value)` `max(BMI_Value)` `mean(BMI_Value)`
##    <chr>     <chr>            <dbl>            <dbl>             <dbl>
## 1 FEMALE     ASIAN         25.57631         28.19431          26.88531
## 2 FEMALE     BLACK         26.71407         26.71407          26.71407
## 3 FEMALE  HISPANIC         25.03916         27.84206          26.52176
## 4 FEMALE     WHITE         24.21459         28.24834          26.46648
## 5   MALE     ASIAN         22.66678         27.24885          24.95782
## 6   MALE BI-RACIAL         24.83473         24.83473          24.83473
## 7   MALE     BLACK         23.34183         26.60586          25.03778
## 8   MALE  HISPANIC         23.62505         28.26769          26.02289
## 9   MALE     WHITE         21.41385         31.70402          27.67114

** Display All Records for all people who are dead**

filter(csvpd, Died==TRUE)  
##           ID        Name     Race Gender Smokes HeightInCms WeightInKgs
## 1  AC/AH/049      Martin    WHITE FEMALE  FALSE      160.06       72.37
## 2  AC/AH/089        Dong    WHITE   MALE  FALSE      179.24       75.54
## 3  AC/AH/104      Jeremy    WHITE   MALE   TRUE      169.85       90.63
## 4  AC/AH/127        Jame    WHITE   MALE  FALSE      167.75       82.06
## 5  AC/AH/133       Clyde HISPANIC   MALE  FALSE      181.15       83.93
## 6  AC/AH/150       Brett    WHITE   MALE   TRUE      181.56       79.54
## 7  AC/AH/154        Tony    WHITE FEMALE  FALSE      160.03       64.30
## 8  AC/AH/156      George    WHITE   MALE  FALSE      165.62       76.72
## 9  AC/AH/160        Rory    ASIAN FEMALE  FALSE      159.67       71.88
## 10 AC/AH/171       Devin    WHITE FEMALE  FALSE      163.35       70.46
## 11 AC/AH/176       Jerry    ASIAN   MALE  FALSE      175.21       83.65
## 12 AC/AH/180        Drew    WHITE FEMALE  FALSE      160.80       64.77
## 13 AC/AH/186 Christopher    WHITE FEMALE  FALSE      157.95       67.41
## 14 AC/AH/192   Dominique    WHITE   MALE  FALSE      180.61       83.59
## 15 AC/AH/211         Son    WHITE FEMALE  FALSE      157.16       69.64
## 16 AC/AH/219         Jay    WHITE FEMALE  FALSE      163.47       72.89
## 17 AC/AH/233      Marion    WHITE FEMALE  FALSE      163.97       66.71
## 18 AC/AH/248      Andrea    WHITE   MALE  FALSE      178.64       97.05
## 19 AC/AH/249       Jesus HISPANIC FEMALE   TRUE      159.78       68.31
## 20 AC/SG/003      Walter    WHITE FEMALE  FALSE      161.83       66.03
## 21 AC/SG/008        Dana    WHITE   MALE   TRUE      169.66       77.30
## 22 AC/SG/010        Theo    ASIAN FEMALE  FALSE      159.32       64.92
## 23 AC/SG/015       Shaun    WHITE   MALE   TRUE      170.51       84.35
## 24 AC/SG/016      Jimmie    BLACK FEMALE  FALSE      161.84       69.97
## 25 AC/SG/046        Carl HISPANIC   MALE  FALSE      171.41       81.70
## 26 AC/SG/055        Evan    WHITE   MALE  FALSE      166.75       79.06
## 27 AC/SG/064         Jon    WHITE   MALE  FALSE      169.16       90.08
## 28 AC/SG/065      Shayne    WHITE FEMALE  FALSE      157.01       66.56
## 29 AC/SG/067      Thomas    WHITE   MALE  FALSE      167.51       84.15
## 30 AC/SG/068   Valentine HISPANIC FEMALE  FALSE      160.47       68.20
## 31 AC/SG/084       Brian HISPANIC   MALE  FALSE      174.25       80.93
## 32 AC/SG/101       Jason    WHITE FEMALE  FALSE      159.23       69.96
## 33 AC/SG/116      Connie    BLACK   MALE  FALSE      184.34       90.41
## 34 AC/SG/123     Darnell    WHITE FEMALE   TRUE      162.32       72.72
## 35 AC/SG/134       Daryl    WHITE FEMALE   TRUE      162.59       69.76
## 36 AC/SG/155     Raymond    WHITE FEMALE  FALSE      158.35       69.72
## 37 AC/SG/165       Elmer    WHITE FEMALE  FALSE      162.18       67.81
## 38 AC/SG/167       Jimmy    WHITE FEMALE  FALSE      159.38       70.37
## 39 AC/SG/172     Whitney    WHITE   MALE  FALSE      171.45       84.29
## 40 AC/SG/179       Logan    WHITE   MALE  FALSE      183.10       82.47
## 41 AC/SG/181       Terry HISPANIC   MALE  FALSE      177.14       88.70
## 42 AC/SG/182       Jamie HISPANIC   MALE   TRUE      171.08       72.51
## 43 AC/SG/191        Lacy HISPANIC FEMALE  FALSE      159.33       70.68
## 44 AC/SG/197       Stacy    WHITE FEMALE  FALSE      159.44       66.21
## 45 AC/SG/216        Alva    WHITE FEMALE  FALSE      159.13       66.96
## 46 AC/SG/217        Dean    WHITE FEMALE  FALSE      160.58       71.49
## 47 AC/SG/234        Luis HISPANIC FEMALE  FALSE      164.88       68.07
##     BirthDate          State   Pet HealthGrade Died RecordDate BMI_Value
## 1  28-04-1972     California HORSE      NORMAL TRUE 25-12-2015  28.24834
## 2  11-03-1972     California  NONE      NORMAL TRUE 25-12-2015  23.51295
## 3  12-04-1972       Kentucky  NONE        Good TRUE 25-12-2015  31.41528
## 4  29-10-1972          Texas   DOG        Good TRUE 25-01-2016  29.16127
## 5  13-10-1973     Washington   CAT         BAD TRUE 25-02-2016  25.57647
## 6  03-05-1972       Kentucky   DOG        Good TRUE 25-02-2016  24.12933
## 7  30-08-1973     California   DOG        Good TRUE 25-02-2016  25.10777
## 8  09-07-1972     California   DOG        Good TRUE 25-02-2016  27.96939
## 9  22-09-1973        Florida   CAT      NORMAL TRUE 25-02-2016  28.19431
## 10 16-04-1973     California  BIRD         BAD TRUE 25-03-2016  26.40611
## 11 01-05-1973       Virginia   DOG         BAD TRUE 25-03-2016  27.24885
## 12 18-02-1973         Oregon   CAT        Good TRUE 25-03-2016  25.04966
## 13 06-05-1972     New Jersey   DOG         BAD TRUE 25-03-2016  27.01998
## 14 24-03-1972       Michigan  NONE         BAD TRUE 25-03-2016  25.62541
## 15 14-07-1973     California   CAT      NORMAL TRUE 25-04-2016  28.19517
## 16 07-04-1972 North Carolina  BIRD        Good TRUE 25-04-2016  27.27670
## 17 23-12-1971           Ohio   CAT         BAD TRUE 25-04-2016  24.81202
## 18 12-01-1973        Indiana   CAT        Good TRUE 25-05-2016  30.41152
## 19 23-04-1972        Alabama   CAT      NORMAL TRUE 25-05-2016  26.75713
## 20 11-07-1972         Oregon  NONE      NORMAL TRUE 25-05-2016  25.21292
## 21 26-05-1973         Nevada   DOG        Good TRUE 25-05-2016  26.85472
## 22 29-01-1973       New York   CAT      NORMAL TRUE 25-06-2016  25.57631
## 23 09-11-1972     New Jersey   DOG         BAD TRUE 25-06-2016  29.01252
## 24 03-04-1972        Arizona   CAT         BAD TRUE 25-06-2016  26.71407
## 25 05-08-1973    Mississippi  BIRD      NORMAL TRUE 25-06-2016  27.80672
## 26 24-02-1972       Illinois  BIRD         BAD TRUE 25-07-2016  28.43316
## 27 04-10-1972       Illinois   CAT      NORMAL TRUE 25-07-2016  31.47988
## 28 05-04-1972     California   DOG         BAD TRUE 25-07-2016  26.99968
## 29 19-07-1972   Pennsylvania  BIRD      NORMAL TRUE 25-07-2016  29.98974
## 30 15-04-1972      Tennessee   CAT         BAD TRUE 25-07-2016  26.48480
## 31 06-03-1972       Virginia   DOG      NORMAL TRUE 25-07-2016  26.65410
## 32 28-09-1973       Michigan   DOG      NORMAL TRUE 25-07-2016  27.59307
## 33 05-06-1972        Florida  NONE         BAD TRUE 25-08-2016  26.60586
## 34 03-09-1972 North Carolina  BIRD        Good TRUE 25-08-2016  27.60005
## 35 28-05-1972          Texas   CAT      NORMAL TRUE 25-08-2016  26.38875
## 36 02-06-1972     California   CAT         BAD TRUE 25-08-2016  27.80489
## 37 25-03-1972     Washington  BIRD        Good TRUE 25-08-2016  25.78096
## 38 30-09-1973     Washington  NONE      NORMAL TRUE 25-09-2016  27.70256
## 39 25-02-1972        Florida   DOG      NORMAL TRUE 25-09-2016  28.67484
## 40 24-10-1972           Ohio   DOG         BAD TRUE 25-09-2016  24.59910
## 41 24-11-1971        Indiana   CAT         BAD TRUE 25-09-2016  28.26769
## 42 25-03-1973      Louisiana  NONE         BAD TRUE 25-09-2016  24.77419
## 43 21-06-1973          Texas  NONE         BAD TRUE 25-09-2016  27.84206
## 44 08-11-1972       New York   CAT        Good TRUE 25-10-2016  26.04528
## 45 19-06-1972        Alabama  NONE        Good TRUE 25-10-2016  26.44304
## 46 11-11-1972           Ohio  NONE        Good TRUE 25-10-2016  27.72441
## 47 10-11-1971   Pennsylvania   CAT         BAD TRUE 25-10-2016  25.03916
##      BMILabel
## 1  Overweight
## 2      Normal
## 3       Obese
## 4  Overweight
## 5  Overweight
## 6      Normal
## 7  Overweight
## 8  Overweight
## 9  Overweight
## 10 Overweight
## 11 Overweight
## 12 Overweight
## 13 Overweight
## 14 Overweight
## 15 Overweight
## 16 Overweight
## 17     Normal
## 18      Obese
## 19 Overweight
## 20 Overweight
## 21 Overweight
## 22 Overweight
## 23 Overweight
## 24 Overweight
## 25 Overweight
## 26 Overweight
## 27      Obese
## 28 Overweight
## 29 Overweight
## 30 Overweight
## 31 Overweight
## 32 Overweight
## 33 Overweight
## 34 Overweight
## 35 Overweight
## 36 Overweight
## 37 Overweight
## 38 Overweight
## 39 Overweight
## 40     Normal
## 41 Overweight
## 42     Normal
## 43 Overweight
## 44 Overweight
## 45 Overweight
## 46 Overweight
## 47 Overweight
nrow(filter(csvpd, Died==TRUE))
## [1] 47

Display All Records for “Hispanic Females”

filter(csvpd, Race=="HISPANIC" & Gender=="FEMALE")     
##          ID      Name     Race Gender Smokes HeightInCms WeightInKgs
## 1 AC/AH/208  Lawrence HISPANIC FEMALE  FALSE      165.80       71.77
## 2 AC/AH/249     Jesus HISPANIC FEMALE   TRUE      159.78       68.31
## 3 AC/SG/068 Valentine HISPANIC FEMALE  FALSE      160.47       68.20
## 4 AC/SG/122    Michal HISPANIC FEMALE  FALSE      160.09       68.94
## 5 AC/SG/191      Lacy HISPANIC FEMALE  FALSE      159.33       70.68
## 6 AC/SG/234      Luis HISPANIC FEMALE  FALSE      164.88       68.07
##    BirthDate          State  Pet HealthGrade  Died RecordDate BMI_Value
## 1 07-08-1973      Louisiana NONE        Good FALSE 25-03-2016  26.10802
## 2 23-04-1972        Alabama  CAT      NORMAL  TRUE 25-05-2016  26.75713
## 3 15-04-1972      Tennessee  CAT         BAD  TRUE 25-07-2016  26.48480
## 4 16-12-1971 South Carolina  DOG        Good FALSE 25-08-2016  26.89942
## 5 21-06-1973          Texas NONE         BAD  TRUE 25-09-2016  27.84206
## 6 10-11-1971   Pennsylvania  CAT         BAD  TRUE 25-10-2016  25.03916
##     BMILabel
## 1 Overweight
## 2 Overweight
## 3 Overweight
## 4 Overweight
## 5 Overweight
## 6 Overweight
nrow(filter(csvpd, Race=="HISPANIC" & Gender=="FEMALE"))  
## [1] 6

Provide 7 sample records from the Dataset. Use seed(707)

set.seed(707)             
sample_n(csvpd, 7)
##           ID    Name     Race Gender Smokes HeightInCms WeightInKgs
## 9  AC/AH/049  Martin    WHITE FEMALE  FALSE      160.06       72.37
## 39 AC/AH/211     Son    WHITE FEMALE  FALSE      157.16       69.64
## 24 AC/AH/133   Clyde HISPANIC   MALE  FALSE      181.15       83.93
## 46 AC/AH/248  Andrea    WHITE   MALE  FALSE      178.64       97.05
## 64 AC/SG/101   Jason    WHITE FEMALE  FALSE      159.23       69.96
## 59 AC/SG/067  Thomas    WHITE   MALE  FALSE      167.51       84.15
## 72 AC/SG/142 Kenneth    WHITE FEMALE  FALSE      158.07       69.80
##     BirthDate        State   Pet HealthGrade  Died RecordDate BMI_Value
## 9  28-04-1972   California HORSE      NORMAL  TRUE 25-12-2015  28.24834
## 39 14-07-1973   California   CAT      NORMAL  TRUE 25-04-2016  28.19517
## 24 13-10-1973   Washington   CAT         BAD  TRUE 25-02-2016  25.57647
## 46 12-01-1973      Indiana   CAT        Good  TRUE 25-05-2016  30.41152
## 64 28-09-1973     Michigan   DOG      NORMAL  TRUE 25-07-2016  27.59307
## 59 19-07-1972 Pennsylvania  BIRD      NORMAL  TRUE 25-07-2016  29.98974
## 72 15-05-1972       Kansas   DOG         BAD FALSE 25-08-2016  27.93550
##      BMILabel
## 9  Overweight
## 39 Overweight
## 24 Overweight
## 46      Obese
## 64 Overweight
## 59 Overweight
## 72 Overweight