# The data set used
SmokeBan <- read.csv("https://vincentarelbundock.github.io/Rdatasets/csv/AER/SmokeBan.csv", header = TRUE)
# CREATE A NEW DATA FRAME WITH COLUMNS AND ROWS
# Creating and displaying a new data frame called "Smokers50AndUnder".
Smokers50AndUnder <- subset(SmokeBan,smoker=="yes" & age<= 50,select=c(gender,ban,age))
head(Smokers50AndUnder)
## gender ban age
## 1 female yes 41
## 2 female yes 44
## 4 female no 29
## 7 female yes 47
## 8 male no 36
## 19 female yes 28
# ADD A NEW COLUMN TO THE DATA FRAME.
# Adding the years_smoking column. This column denotes the number of years an employee has been smoking. The values for the new column will be randomly generated up to 25 years.
Smokers50AndUnder$years_smoked<-sample(1:25, size = 2054, replace = T)
head(Smokers50AndUnder)
## gender ban age years_smoked
## 1 female yes 41 7
## 2 female yes 44 5
## 4 female no 29 18
## 7 female yes 47 9
## 8 male no 36 3
## 19 female yes 28 10
# CREATING S SUBSET OF THE DATA FRAME.
# for all smokers who are over 40 add ban.
Smokers50AndUnder$ban[Smokers50AndUnder$age > 40]<-"yes"
head(Smokers50AndUnder)
## gender ban age years_smoked
## 1 female yes 41 7
## 2 female yes 44 5
## 4 female no 29 18
## 7 female yes 47 9
## 8 male no 36 3
## 19 female yes 28 10