Subsetting data in R with square brackets

This is part of My notes on R programming on my site https://dataz4s.com

# Read in data
library(readxl)
LungCapData <- read_excel("C:/Users/Usuario/Documents/dataZ4s/R/MarinLectures/LungCapData.xlsx", 
                          col_types = c("numeric", "numeric", "numeric", 
                                        "text", "text", "text"))

# change Smoke, Gender and Caesarean to factors with as.factor() command
LungCapData$Smoke <- as.factor(LungCapData$Smoke)
LungCapData$Gender <- as.factor(LungCapData$Gender)
LungCapData$Caesarean <- as.factor(LungCapData$Caesarean)

# attach(LungCapData)
attach(LungCapData)

dim() and length()

# Dimensions of the dataset
dim(LungCapData)
## [1] 725   6
# length in a vector or a variable 
length(Age)
## [1] 725

Subsetting with brackets []

# Subsetting row 11 to 14
Age[11:14]
## [1] 19 17 12 10
# Subsetting with brackets on matrix or dataframe
# Blank space after comma to include all columns
# Row 11 to 14 including all columns 
LungCapData[11:14, ]
## # A tibble: 4 x 6
##   LungCap   Age Height Smoke Gender Caesarean
##     <dbl> <dbl>  <dbl> <fct> <fct>  <fct>    
## 1   11.5     19   76.4 no    male   yes      
## 2   10.9     17   71.7 no    male   no       
## 3    6.52    12   57.5 no    male   no       
## 4    6       10   61.1 no    female no

Subsetting a step further…

# double equal sign (==) is used to represent the meaning of equality in a mathematical sense
# mean age for females
mean(Age[Gender=="female"])
## [1] 12.44972
# mean age for males
mean(Age[Gender=="male"])
## [1] 12.20708

Subset as per gender

# Save gender data into objects
FemData <- LungCapData[Gender=="female", ]
MaleData <- LungCapData[Gender=="female", ]

# Checking 
dim(FemData)
## [1] 358   6
dim(MaleData)
## [1] 358   6
summary(Gender)
## female   male 
##    358    367
FemData[1:4, ]
## # A tibble: 4 x 6
##   LungCap   Age Height Smoke Gender Caesarean
##     <dbl> <dbl>  <dbl> <fct> <fct>  <fct>    
## 1   10.1     18   74.7 yes   female no       
## 2    9.55    16   69.7 no    female yes      
## 3    6.22    11   58.7 no    female no       
## 4    6       10   61.1 no    female no
# Subset for males over 15
MaleOver15 <- LungCapData[Gender=="male" & Age>15, ]

# Checking
dim(MaleOver15)
## [1] 89  6
MaleOver15[1:4,]
## # A tibble: 4 x 6
##   LungCap   Age Height Smoke Gender Caesarean
##     <dbl> <dbl>  <dbl> <fct> <fct>  <fct>    
## 1    11.5    19   76.4 no    male   yes      
## 2    10.9    17   71.7 no    male   no       
## 3    10.0    16   72.4 no    male   no       
## 4    11.3    17   77.7 no    male   no

This page is inspired by Mike Marons Statlectures video ‘Subsetting data in R…’. View this page on my site: https://dataz4s.com/r-statistical-programming/subsetting-data-square-brackets/