Recap

  • R is a language, Rstudio let's us use it

  • Mardown - Grey = code, white = notes

  • We set a working directory and read in data

  • R is sensitive (cases, NAs)

Functions learned

data$Gender     # format: name of data $ column name
setwd()         # sets working directory
read.csv()      # brings in data
head() tail()   # peak at the first or last 6 rows
View()          # capital V! opens spreadsheet
summary()       # max, min, mean
str()           # structure of data 
names()         # lists all the columns
table()         # summary table for a column

Class 2 : Cleaning data

Unclean data

 table(data$`Case Type`)
## 
##                        Brain Tumor                               CSDH 
##                                  1                                 66 
##                     CSDH, epidural                     CSDH, Epidural 
##                                  1                                  2 
## CSDH,other,Extensive brain hypoxia                           Epidural 
##                                  1                                  1 
##                          Infection              Other, Cervial injury 
##                                  1                                  1 
##           Other, Extradural lesion               Other, Scalp Abscess 
##                                  1                                  1 
##                  Other,Brain Tumor                             Others 
##                                  1                                  6 
##              Others skull fracture                     Others, Stroke 
##                                  1                                  1 
##                                SDH                              Spine 
##                                  3                                 19 
##                                TBI                              Tumor 
##                                626                                 41 
##                    Tumor(Thalamus)                   Tumor, Other,HCP 
##                                  1                                  1 
##                            unknown 
##                                  1

Clean data

table(data$`Case Type`)
## 
##  CSDH Other Spine   TBI Tumor 
##    70    12    20   630    45

Unclean data

names(data2)
##  [1] "Patien's Age"                "Admission Date"             
##  [3] "Total1"                      "Heart Rate/Min"             
##  [5] "Resp Rate/min"               "Blood Pressure"             
##  [7] "/"                           "Discharge GCS"              
##  [9] "Discharge GOS"               "Alcohol status"             
## [11] "Gender"                      "Pupil Reactive?"            
## [13] "Temp"                        "Spo2"                       
## [15] "Did planned operation occur" "Management"                 
## [17] "Case Type"
  • Spaces are bad

  • Symbols cause problems

Clean data

  • underscore instead of spaces

  • no symbols

names(data2)
##  [1] "Age"       "adm_date"  "GCS"       "HR"        "RR"       
##  [6] "Sys_BP"    "Dia_BP"    "dc_GCS"    "dc_GOS"    "etoh"     
## [11] "male"      "pupils"    "temp"      "spo2"      "tbi_surg" 
## [16] "tbi_surg2" "dx"

Time to code

letters <- c( "a", "b", "c")
numbers <- c(1 , 2, 3)  

mydata <- data.frame(letters, numbers)

mydata$chicken <- c("hard", "soft", "chewy")

mydata$food <- mydata$chicken

Back to our data

#load in data if you have not already done so

#Make a new dataframe with only columns 13, 14, 8
data2 <- data[ , c(8, 9, 3)]

or

#data2 <- select(data, "Patien's Age", Gender , Case Type)

data2 <- select(data, "Patien's Age", Gender , "Case Type")
head(data2) 
## # A tibble: 6 x 3
##   `Patien's Age` Gender `Case Type`
##            <int> <chr>  <chr>      
## 1             28 Male   TBI        
## 2             26 Female CSDH       
## 3              0 <NA>   TBI        
## 4             24 Male   TBI        
## 5             19 Male   TBI        
## 6              0 Male   TBI

Change column names

colnames(data2) <- c("age",
                     "gender",
                     "case_type")
head(data2)
## # A tibble: 6 x 3
##     age gender case_type
##   <int> <chr>  <chr>    
## 1    28 Male   TBI      
## 2    26 Female CSDH     
## 3     0 <NA>   TBI      
## 4    24 Male   TBI      
## 5    19 Male   TBI      
## 6     0 Male   TBI
summary(data2$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00   16.00   26.00   29.64   40.00  280.00       2

Unclean column

table(data2$case_type)
## 
##                        Brain Tumor                               CSDH 
##                                  1                                 66 
##                     CSDH, epidural                     CSDH, Epidural 
##                                  1                                  2 
## CSDH,other,Extensive brain hypoxia                           Epidural 
##                                  1                                  1 
##                          Infection              Other, Cervial injury 
##                                  1                                  1 
##           Other, Extradural lesion               Other, Scalp Abscess 
##                                  1                                  1 
##                  Other,Brain Tumor                             Others 
##                                  1                                  6 
##              Others skull fracture                     Others, Stroke 
##                                  1                                  1 
##                                SDH                              Spine 
##                                  3                                 19 
##                                TBI                              Tumor 
##                                626                                 41 
##                    Tumor(Thalamus)                   Tumor, Other,HCP 
##                                  1                                  1 
##                            unknown 
##                                  1

Recode function

data2$case_type <- car::recode(data2$case_type, "'Brain Tumor' = 'Tumor';
                                                   'Tumor(Thalamus)' = 'Tumor';
                                                   'Tumor, Other,HCP' = 'Tumor'")
## 
##                               CSDH                     CSDH, epidural 
##                                 66                                  1 
##                     CSDH, Epidural CSDH,other,Extensive brain hypoxia 
##                                  2                                  1 
##                           Epidural                          Infection 
##                                  1                                  1 
##              Other, Cervial injury           Other, Extradural lesion 
##                                  1                                  1 
##               Other, Scalp Abscess                  Other,Brain Tumor 
##                                  1                                  1 
##                             Others              Others skull fracture 
##                                  6                                  1 
##                     Others, Stroke                                SDH 
##                                  1                                  3 
##                              Spine                                TBI 
##                                 19                                626 
##                              Tumor                            unknown 
##                                 44                                  1

Recode the rest of the column

copy paste my code

data2$case_type <- car::recode(data2$case_type, "'Brain Tumor' = 'Tumor';
                                                   'Tumor(Thalamus)' = 'Tumor';
                                                   'Tumor, Other,HCP' = 'Tumor';
                                                   'CSDH, epidural' = 'CSDH';
                                                   'CSDH, Epidural' = 'CSDH';
                                                   'CSDH,other,Extensive brain hypoxia' = 'CSDH';
                                                   'Other,Brain Tumor' = 'Tumor';
                                                   'Others skull fracture' = 'Other';
                                                   'Others, Stroke' = 'Other';
                                                   'Other, Scalp Abscess' = 'Other';
                                                   'Epidural' = 'TBI';
                                                   'Other, Cervial injury' = 'Spine';
                                                   'Other, Extradural lesion' = 'Other';
                                                   'Others' = 'Other';
                                                   'Infection' = 'Other';
                                                   'unknown' = 'Other';
                                                   'SDH' = 'TBI'")
table(data2$case_type)
## 
##  CSDH Other Spine   TBI Tumor 
##    70    12    20   630    45