This is part of My notes on R programming on my site https://dataz4s.com
# Read in data
library(readxl)
LungCapData <- read_excel("C:/Users/Usuario/Documents/dataZ4s/R/MarinLectures/LungCapData.xlsx",
col_types = c("numeric", "numeric", "numeric",
"text", "text", "text"))
# change Smoke, Gender and Caesarean to factors with as.factor() command
LungCapData$Smoke <- as.factor(LungCapData$Smoke)
LungCapData$Gender <- as.factor(LungCapData$Gender)
LungCapData$Caesarean <- as.factor(LungCapData$Caesarean)
# attach(LungCapData)
attach(LungCapData)
# Head/ first rows of the dataset
head(LungCapData)
## # A tibble: 6 x 6
## LungCap Age Height Smoke Gender Caesarean
## <dbl> <dbl> <dbl> <fct> <fct> <fct>
## 1 6.48 6 62.1 no male no
## 2 10.1 18 74.7 yes female no
## 3 9.55 16 69.7 no female yes
## 4 11.1 14 71 no male no
## 5 4.8 5 56.9 no male no
## 6 6.22 11 58.7 no female no
# Dimensions of the dataset
dim(LungCapData)
## [1] 725 6
# length in a vector or a variable
length(Age)
## [1] 725
# Subsetting row 11 to 14
Age[11:14]
## [1] 19 17 12 10
# Subsetting with brackets on matrix or dataframe
# Blank space after comma to include all columns
# Row 11 to 14 including all columns
LungCapData[11:14, ]
## # A tibble: 4 x 6
## LungCap Age Height Smoke Gender Caesarean
## <dbl> <dbl> <dbl> <fct> <fct> <fct>
## 1 11.5 19 76.4 no male yes
## 2 10.9 17 71.7 no male no
## 3 6.52 12 57.5 no male no
## 4 6 10 61.1 no female no
# double equal sign (==) is used to represent the meaning of equality in a mathematical sense
# mean age for females
mean(Age[Gender=="female"])
## [1] 12.44972
# mean age for males
mean(Age[Gender=="male"])
## [1] 12.20708
# Save gender data into objects
FemData <- LungCapData[Gender=="female", ]
MaleData <- LungCapData[Gender=="female", ]
# Checking
dim(FemData)
## [1] 358 6
dim(MaleData)
## [1] 358 6
summary(Gender)
## female male
## 358 367
FemData[1:4, ]
## # A tibble: 4 x 6
## LungCap Age Height Smoke Gender Caesarean
## <dbl> <dbl> <dbl> <fct> <fct> <fct>
## 1 10.1 18 74.7 yes female no
## 2 9.55 16 69.7 no female yes
## 3 6.22 11 58.7 no female no
## 4 6 10 61.1 no female no
# Subset for males over 15
MaleOver15 <- LungCapData[Gender=="male" & Age>15, ]
# Checking
dim(MaleOver15)
## [1] 89 6
MaleOver15[1:4,]
## # A tibble: 4 x 6
## LungCap Age Height Smoke Gender Caesarean
## <dbl> <dbl> <dbl> <fct> <fct> <fct>
## 1 11.5 19 76.4 no male yes
## 2 10.9 17 71.7 no male no
## 3 10.0 16 72.4 no male no
## 4 11.3 17 77.7 no male no
This page is inspired by Mike Marons Statlectures video ‘Subsetting data in R…’. View this page on my site: https://dataz4s.com/r-statistical-programming/subsetting-data-square-brackets/