# The data set used
SmokeBan <- read.csv("https://vincentarelbundock.github.io/Rdatasets/csv/AER/SmokeBan.csv", header = TRUE)

# CREATE A NEW DATA FRAME WITH COLUMNS AND ROWS

# Creating and displaying a new data frame called "Smokers50AndUnder".
Smokers50AndUnder <- subset(SmokeBan,smoker=="yes" & age<= 50,select=c(gender,ban,age))
head(Smokers50AndUnder)
##    gender ban age
## 1  female yes  41
## 2  female yes  44
## 4  female  no  29
## 7  female yes  47
## 8    male  no  36
## 19 female yes  28
# ADD A NEW COLUMN TO THE DATA FRAME. 

# Adding the years_smoking column. This column denotes the number of years an employee has been smoking. The values for the new column will be randomly generated up to 25 years.
Smokers50AndUnder$years_smoked<-sample(1:25, size = 2054, replace = T)
head(Smokers50AndUnder)
##    gender ban age years_smoked
## 1  female yes  41            7
## 2  female yes  44            5
## 4  female  no  29           18
## 7  female yes  47            9
## 8    male  no  36            3
## 19 female yes  28           10
# CREATING S SUBSET OF THE DATA FRAME. 

# for all smokers who are over 40 add ban.
Smokers50AndUnder$ban[Smokers50AndUnder$age > 40]<-"yes"
head(Smokers50AndUnder)
##    gender ban age years_smoked
## 1  female yes  41            7
## 2  female yes  44            5
## 4  female  no  29           18
## 7  female yes  47            9
## 8    male  no  36            3
## 19 female yes  28           10