Variables at a glance

This page is part of My notes on R programming on my site: https://dataz4s.com/

Convert to factor with as.factor() command

# When importing the Excel document to R, some variables are registered as "text" when they should be factors
# we can convert with the as.factor() command
x <- c(1,0,0,1,1,0,1,0)

# x is registered as "numeric"
class(x)
## [1] "numeric"
# we convert with as.factor
x <- as.factor(x)
class(x)
## [1] "factor"
# using as.factor() command for importing LungCap dataset

library(readxl)
LungCapData <- read_excel("C:/Users/Usuario/Documents/dataZ4s/R/MarinLectures/LungCapData.xlsx", 
                          col_types = c("numeric", "numeric", "numeric", 
                                        "text", "text", "text"))

# change Smoke, Gender and Caesarean to factors with as.factor() command
LungCapData$Smoke <- as.factor(LungCapData$Smoke)
LungCapData$Gender <- as.factor(LungCapData$Gender)
LungCapData$Caesarean <- as.factor(LungCapData$Caesarean)

# Viewing data
View(LungCapData)

names()

names(LungCapData)
## [1] "LungCap"   "Age"       "Height"    "Smoke"     "Gender"    "Caesarean"

Extracting variables with $

# Using $ to extract mean age of the persons in the sample 
mean(LungCapData$Age)
## [1] 12.3269
# Ages for all persons in the dataset
LungCapData$Age
##   [1]  6 18 16 14  5 11  8 11 15 11 19 17 12 10 10 13 15  8 11 14  6  8 16 11 11
##  [26] 12 12  9  4 18  4 13 13 13 12 10  6  9 11 17 14 17  8 12  6 11 11 12 17  7
##  [51] 15 15 11 10 18  6 13 19  9 12 12 14  9 13 13 13 11 11 11 12 14 11 11 13 13
##  [76] 12 14  9 17 11 12 16 17 19 14 12 19 11 15 19  9 14 13 14 19 18 16  7 16 10
## [101] 13 14 13 16 16  5 16 12  7  7 15 18  8  3 15 15  7 18  8 17 16 12 17 16 19
## [126] 12 17 19 12 12 15 17 13 12 16 13 15 15 14 13  6 18 18 18  9 17 14 14 14  3
## [151] 11  8  9  8 16  7 11  5 16 11 16 10 12 19  7  8 13 17  9  8 15  6 14 10 17
## [176] 15 10 16 17 17 13 17 15 16 18 10 16  8 14  4 17 15 10 13 16 17 19 11  8 12
## [201] 13 16 15 18  8  5 10  8 13  7 11 15 10 15  8  7 10 18 15 19 14  3  6 12 14
## [226]  8 15  5  3 11 11 13 18 19 15 18  8 11 17 14 12 14  8 12 11  6  9 11 18 18
## [251] 19  9 18  9  7  8 18 11 12 11 14 14  5 12 19  9 17 10  9  9 10 14 14 12 17
## [276] 10 12 13 11 12 17 14 15  8 13 11 10  6 10 18  6 18  3 15 13 19 11 13  5  8
## [301] 18 13 10 18 12  9 15 14  8 19  7 13 14 19  9 12 10  3 13 16 13 10 15 11 11
## [326] 19 15 11 19  8  7 10 13 14 14  9 11 13 15 18 15 13  9  8  7 17 11 12  9 14
## [351]  8 16 17  9 12 19 17 15 12  7  8 15 18 11 10  9 11 16 16 18 16 10 13 16 18
## [376]  9 13 11  5  6 12  9 15 13 16 18 11 13  7  4 13 16 17 10 11  7 11 16 13  8
## [401]  3 12 15  8  4 13  7 15 15 18  7 13 18 19 10  6 13 14 15  5 12 17  8 12 12
## [426] 12  7 19 17 16  6 12  6  6 14 15  7 14 16 11  9 19 17 15 13  5 11 11 10 13
## [451] 10  9 15 13 17  5 14 10 13  8 17 13 10 10 16  5 15 11 12 19 10 18 13 15 16
## [476] 17 14  9 16 16 16 16 12 16  7 18  4 19 11 15  8 15  5  6 19 14 14  7 15 13
## [501]  9 13 18  5 14  7 18 17 14 13 11 12 18  3 14  9 10  9 15 14 16 12  9 13 15
## [526] 11  6 12 17 12  3 12  7 12  8  9 15 11 15 12  9 10  6 12 14 15  8 19 12  7
## [551] 17  7 12 13 15 15  6 13 19 12  9 17 19 10 12 14 12 18  5 19 17 13  9  7 16
## [576] 16 17 17 18  7  7 12 12  8  8 14 11 17 13 13 15  5 10 15 11  3  6  7 12 14
## [601] 13 19 15 14  7 16 16 16  7  7 16 15 12  6 11 13 15 13 18 15  3 16 12 12  5
## [626] 12 14  6 19  9 11 10  7 16  5  8 15 10 10 13 18 12 13 19 10 19 16 13 13 14
## [651]  9  8 10 13 12  6 14 18 14  3 16 10 12 10 16 10  8 10  5  8  8 18  6 10 14
## [676] 17 18 14 14 10 13 18 11 15 19 16 16 15  8  7 12 10 13  7 13 14  9 16 15 13
## [701] 10 14 12  7 15 19 15 12 15 10 17  5  3 14 16 19 11 16 17  9  9 18 11 15 10

attach()

# Using attach() command to extract mean age of persons in the sample
attach(LungCapData) 

# Attaching makes it easier to extract variabels
mean(Age)
## [1] 12.3269
Age
##   [1]  6 18 16 14  5 11  8 11 15 11 19 17 12 10 10 13 15  8 11 14  6  8 16 11 11
##  [26] 12 12  9  4 18  4 13 13 13 12 10  6  9 11 17 14 17  8 12  6 11 11 12 17  7
##  [51] 15 15 11 10 18  6 13 19  9 12 12 14  9 13 13 13 11 11 11 12 14 11 11 13 13
##  [76] 12 14  9 17 11 12 16 17 19 14 12 19 11 15 19  9 14 13 14 19 18 16  7 16 10
## [101] 13 14 13 16 16  5 16 12  7  7 15 18  8  3 15 15  7 18  8 17 16 12 17 16 19
## [126] 12 17 19 12 12 15 17 13 12 16 13 15 15 14 13  6 18 18 18  9 17 14 14 14  3
## [151] 11  8  9  8 16  7 11  5 16 11 16 10 12 19  7  8 13 17  9  8 15  6 14 10 17
## [176] 15 10 16 17 17 13 17 15 16 18 10 16  8 14  4 17 15 10 13 16 17 19 11  8 12
## [201] 13 16 15 18  8  5 10  8 13  7 11 15 10 15  8  7 10 18 15 19 14  3  6 12 14
## [226]  8 15  5  3 11 11 13 18 19 15 18  8 11 17 14 12 14  8 12 11  6  9 11 18 18
## [251] 19  9 18  9  7  8 18 11 12 11 14 14  5 12 19  9 17 10  9  9 10 14 14 12 17
## [276] 10 12 13 11 12 17 14 15  8 13 11 10  6 10 18  6 18  3 15 13 19 11 13  5  8
## [301] 18 13 10 18 12  9 15 14  8 19  7 13 14 19  9 12 10  3 13 16 13 10 15 11 11
## [326] 19 15 11 19  8  7 10 13 14 14  9 11 13 15 18 15 13  9  8  7 17 11 12  9 14
## [351]  8 16 17  9 12 19 17 15 12  7  8 15 18 11 10  9 11 16 16 18 16 10 13 16 18
## [376]  9 13 11  5  6 12  9 15 13 16 18 11 13  7  4 13 16 17 10 11  7 11 16 13  8
## [401]  3 12 15  8  4 13  7 15 15 18  7 13 18 19 10  6 13 14 15  5 12 17  8 12 12
## [426] 12  7 19 17 16  6 12  6  6 14 15  7 14 16 11  9 19 17 15 13  5 11 11 10 13
## [451] 10  9 15 13 17  5 14 10 13  8 17 13 10 10 16  5 15 11 12 19 10 18 13 15 16
## [476] 17 14  9 16 16 16 16 12 16  7 18  4 19 11 15  8 15  5  6 19 14 14  7 15 13
## [501]  9 13 18  5 14  7 18 17 14 13 11 12 18  3 14  9 10  9 15 14 16 12  9 13 15
## [526] 11  6 12 17 12  3 12  7 12  8  9 15 11 15 12  9 10  6 12 14 15  8 19 12  7
## [551] 17  7 12 13 15 15  6 13 19 12  9 17 19 10 12 14 12 18  5 19 17 13  9  7 16
## [576] 16 17 17 18  7  7 12 12  8  8 14 11 17 13 13 15  5 10 15 11  3  6  7 12 14
## [601] 13 19 15 14  7 16 16 16  7  7 16 15 12  6 11 13 15 13 18 15  3 16 12 12  5
## [626] 12 14  6 19  9 11 10  7 16  5  8 15 10 10 13 18 12 13 19 10 19 16 13 13 14
## [651]  9  8 10 13 12  6 14 18 14  3 16 10 12 10 16 10  8 10  5  8  8 18  6 10 14
## [676] 17 18 14 14 10 13 18 11 15 19 16 16 15  8  7 12 10 13  7 13 14  9 16 15 13
## [701] 10 14 12  7 15 19 15 12 15 10 17  5  3 14 16 19 11 16 17  9  9 18 11 15 10
# we can detach the dataset
detach(LungCapData)

# We will keep working with the data attached
attach(LungCapData)

class() command

# First, let's recall the names
names(LungCapData)
## [1] "LungCap"   "Age"       "Height"    "Smoke"     "Gender"    "Caesarean"
# then let's check the class of each variable
class(LungCap)
## [1] "numeric"
class(Age)
## [1] "numeric"
class(Height)
## [1] "numeric"
class(Smoke)
## [1] "factor"
class(Gender)
## [1] "factor"
class(Caesarean)
## [1] "factor"

Generic summary with summary() command

summary(LungCapData)
##     LungCap            Age            Height      Smoke        Gender   
##  Min.   : 0.507   Min.   : 3.00   Min.   :45.30   no :648   female:358  
##  1st Qu.: 6.150   1st Qu.: 9.00   1st Qu.:59.90   yes: 77   male  :367  
##  Median : 8.000   Median :13.00   Median :65.40                         
##  Mean   : 7.863   Mean   :12.33   Mean   :64.84                         
##  3rd Qu.: 9.800   3rd Qu.:15.00   3rd Qu.:70.30                         
##  Max.   :14.675   Max.   :19.00   Max.   :81.80                         
##  Caesarean
##  no :561  
##  yes:164  
##           
##           
##           
## 

This page is inspired from Mark Marinโ€™s Statslectures video, Working with Variables in R. View the page here: https://dataz4s.com/r-statistical-programming/variables-glance/