This page is part of My notes on R programming on my site: https://dataz4s.com/
# When importing the Excel document to R, some variables are registered as "text" when they should be factors
# we can convert with the as.factor() command
x <- c(1,0,0,1,1,0,1,0)
# x is registered as "numeric"
class(x)
## [1] "numeric"
# we convert with as.factor
x <- as.factor(x)
class(x)
## [1] "factor"
# using as.factor() command for importing LungCap dataset
library(readxl)
LungCapData <- read_excel("C:/Users/Usuario/Documents/dataZ4s/R/MarinLectures/LungCapData.xlsx",
col_types = c("numeric", "numeric", "numeric",
"text", "text", "text"))
# change Smoke, Gender and Caesarean to factors with as.factor() command
LungCapData$Smoke <- as.factor(LungCapData$Smoke)
LungCapData$Gender <- as.factor(LungCapData$Gender)
LungCapData$Caesarean <- as.factor(LungCapData$Caesarean)
# Viewing data
View(LungCapData)
names(LungCapData)
## [1] "LungCap" "Age" "Height" "Smoke" "Gender" "Caesarean"
# Using $ to extract mean age of the persons in the sample
mean(LungCapData$Age)
## [1] 12.3269
# Ages for all persons in the dataset
LungCapData$Age
## [1] 6 18 16 14 5 11 8 11 15 11 19 17 12 10 10 13 15 8 11 14 6 8 16 11 11
## [26] 12 12 9 4 18 4 13 13 13 12 10 6 9 11 17 14 17 8 12 6 11 11 12 17 7
## [51] 15 15 11 10 18 6 13 19 9 12 12 14 9 13 13 13 11 11 11 12 14 11 11 13 13
## [76] 12 14 9 17 11 12 16 17 19 14 12 19 11 15 19 9 14 13 14 19 18 16 7 16 10
## [101] 13 14 13 16 16 5 16 12 7 7 15 18 8 3 15 15 7 18 8 17 16 12 17 16 19
## [126] 12 17 19 12 12 15 17 13 12 16 13 15 15 14 13 6 18 18 18 9 17 14 14 14 3
## [151] 11 8 9 8 16 7 11 5 16 11 16 10 12 19 7 8 13 17 9 8 15 6 14 10 17
## [176] 15 10 16 17 17 13 17 15 16 18 10 16 8 14 4 17 15 10 13 16 17 19 11 8 12
## [201] 13 16 15 18 8 5 10 8 13 7 11 15 10 15 8 7 10 18 15 19 14 3 6 12 14
## [226] 8 15 5 3 11 11 13 18 19 15 18 8 11 17 14 12 14 8 12 11 6 9 11 18 18
## [251] 19 9 18 9 7 8 18 11 12 11 14 14 5 12 19 9 17 10 9 9 10 14 14 12 17
## [276] 10 12 13 11 12 17 14 15 8 13 11 10 6 10 18 6 18 3 15 13 19 11 13 5 8
## [301] 18 13 10 18 12 9 15 14 8 19 7 13 14 19 9 12 10 3 13 16 13 10 15 11 11
## [326] 19 15 11 19 8 7 10 13 14 14 9 11 13 15 18 15 13 9 8 7 17 11 12 9 14
## [351] 8 16 17 9 12 19 17 15 12 7 8 15 18 11 10 9 11 16 16 18 16 10 13 16 18
## [376] 9 13 11 5 6 12 9 15 13 16 18 11 13 7 4 13 16 17 10 11 7 11 16 13 8
## [401] 3 12 15 8 4 13 7 15 15 18 7 13 18 19 10 6 13 14 15 5 12 17 8 12 12
## [426] 12 7 19 17 16 6 12 6 6 14 15 7 14 16 11 9 19 17 15 13 5 11 11 10 13
## [451] 10 9 15 13 17 5 14 10 13 8 17 13 10 10 16 5 15 11 12 19 10 18 13 15 16
## [476] 17 14 9 16 16 16 16 12 16 7 18 4 19 11 15 8 15 5 6 19 14 14 7 15 13
## [501] 9 13 18 5 14 7 18 17 14 13 11 12 18 3 14 9 10 9 15 14 16 12 9 13 15
## [526] 11 6 12 17 12 3 12 7 12 8 9 15 11 15 12 9 10 6 12 14 15 8 19 12 7
## [551] 17 7 12 13 15 15 6 13 19 12 9 17 19 10 12 14 12 18 5 19 17 13 9 7 16
## [576] 16 17 17 18 7 7 12 12 8 8 14 11 17 13 13 15 5 10 15 11 3 6 7 12 14
## [601] 13 19 15 14 7 16 16 16 7 7 16 15 12 6 11 13 15 13 18 15 3 16 12 12 5
## [626] 12 14 6 19 9 11 10 7 16 5 8 15 10 10 13 18 12 13 19 10 19 16 13 13 14
## [651] 9 8 10 13 12 6 14 18 14 3 16 10 12 10 16 10 8 10 5 8 8 18 6 10 14
## [676] 17 18 14 14 10 13 18 11 15 19 16 16 15 8 7 12 10 13 7 13 14 9 16 15 13
## [701] 10 14 12 7 15 19 15 12 15 10 17 5 3 14 16 19 11 16 17 9 9 18 11 15 10
# Using attach() command to extract mean age of persons in the sample
attach(LungCapData)
# Attaching makes it easier to extract variabels
mean(Age)
## [1] 12.3269
Age
## [1] 6 18 16 14 5 11 8 11 15 11 19 17 12 10 10 13 15 8 11 14 6 8 16 11 11
## [26] 12 12 9 4 18 4 13 13 13 12 10 6 9 11 17 14 17 8 12 6 11 11 12 17 7
## [51] 15 15 11 10 18 6 13 19 9 12 12 14 9 13 13 13 11 11 11 12 14 11 11 13 13
## [76] 12 14 9 17 11 12 16 17 19 14 12 19 11 15 19 9 14 13 14 19 18 16 7 16 10
## [101] 13 14 13 16 16 5 16 12 7 7 15 18 8 3 15 15 7 18 8 17 16 12 17 16 19
## [126] 12 17 19 12 12 15 17 13 12 16 13 15 15 14 13 6 18 18 18 9 17 14 14 14 3
## [151] 11 8 9 8 16 7 11 5 16 11 16 10 12 19 7 8 13 17 9 8 15 6 14 10 17
## [176] 15 10 16 17 17 13 17 15 16 18 10 16 8 14 4 17 15 10 13 16 17 19 11 8 12
## [201] 13 16 15 18 8 5 10 8 13 7 11 15 10 15 8 7 10 18 15 19 14 3 6 12 14
## [226] 8 15 5 3 11 11 13 18 19 15 18 8 11 17 14 12 14 8 12 11 6 9 11 18 18
## [251] 19 9 18 9 7 8 18 11 12 11 14 14 5 12 19 9 17 10 9 9 10 14 14 12 17
## [276] 10 12 13 11 12 17 14 15 8 13 11 10 6 10 18 6 18 3 15 13 19 11 13 5 8
## [301] 18 13 10 18 12 9 15 14 8 19 7 13 14 19 9 12 10 3 13 16 13 10 15 11 11
## [326] 19 15 11 19 8 7 10 13 14 14 9 11 13 15 18 15 13 9 8 7 17 11 12 9 14
## [351] 8 16 17 9 12 19 17 15 12 7 8 15 18 11 10 9 11 16 16 18 16 10 13 16 18
## [376] 9 13 11 5 6 12 9 15 13 16 18 11 13 7 4 13 16 17 10 11 7 11 16 13 8
## [401] 3 12 15 8 4 13 7 15 15 18 7 13 18 19 10 6 13 14 15 5 12 17 8 12 12
## [426] 12 7 19 17 16 6 12 6 6 14 15 7 14 16 11 9 19 17 15 13 5 11 11 10 13
## [451] 10 9 15 13 17 5 14 10 13 8 17 13 10 10 16 5 15 11 12 19 10 18 13 15 16
## [476] 17 14 9 16 16 16 16 12 16 7 18 4 19 11 15 8 15 5 6 19 14 14 7 15 13
## [501] 9 13 18 5 14 7 18 17 14 13 11 12 18 3 14 9 10 9 15 14 16 12 9 13 15
## [526] 11 6 12 17 12 3 12 7 12 8 9 15 11 15 12 9 10 6 12 14 15 8 19 12 7
## [551] 17 7 12 13 15 15 6 13 19 12 9 17 19 10 12 14 12 18 5 19 17 13 9 7 16
## [576] 16 17 17 18 7 7 12 12 8 8 14 11 17 13 13 15 5 10 15 11 3 6 7 12 14
## [601] 13 19 15 14 7 16 16 16 7 7 16 15 12 6 11 13 15 13 18 15 3 16 12 12 5
## [626] 12 14 6 19 9 11 10 7 16 5 8 15 10 10 13 18 12 13 19 10 19 16 13 13 14
## [651] 9 8 10 13 12 6 14 18 14 3 16 10 12 10 16 10 8 10 5 8 8 18 6 10 14
## [676] 17 18 14 14 10 13 18 11 15 19 16 16 15 8 7 12 10 13 7 13 14 9 16 15 13
## [701] 10 14 12 7 15 19 15 12 15 10 17 5 3 14 16 19 11 16 17 9 9 18 11 15 10
# we can detach the dataset
detach(LungCapData)
# We will keep working with the data attached
attach(LungCapData)
# First, let's recall the names
names(LungCapData)
## [1] "LungCap" "Age" "Height" "Smoke" "Gender" "Caesarean"
# then let's check the class of each variable
class(LungCap)
## [1] "numeric"
class(Age)
## [1] "numeric"
class(Height)
## [1] "numeric"
class(Smoke)
## [1] "factor"
class(Gender)
## [1] "factor"
class(Caesarean)
## [1] "factor"
summary(LungCapData)
## LungCap Age Height Smoke Gender
## Min. : 0.507 Min. : 3.00 Min. :45.30 no :648 female:358
## 1st Qu.: 6.150 1st Qu.: 9.00 1st Qu.:59.90 yes: 77 male :367
## Median : 8.000 Median :13.00 Median :65.40
## Mean : 7.863 Mean :12.33 Mean :64.84
## 3rd Qu.: 9.800 3rd Qu.:15.00 3rd Qu.:70.30
## Max. :14.675 Max. :19.00 Max. :81.80
## Caesarean
## no :561
## yes:164
##
##
##
##
This page is inspired from Mark Marinโs Statslectures video, Working with Variables in R. View the page here: https://dataz4s.com/r-statistical-programming/variables-glance/