Complete all Exercises, and submit answers to VtopBeta
Create a csv file with at least 20 entries on the following attributes:
Name Nominal
Age 1…100
Experience 1…15
Remove values of Experience and Fill with “NA”.
Replace few entries of Age with >100 values.
Print the dataset using R
displaytable<-readRDS("exercisetwo.rds")
displaytable## Name Age Experience
## 1 A1 2 NA
## 2 A2 75 3
## 3 A3 10 NA
## 4 A4 100 NA
## 5 A5 86 15
## 6 A6 41 4
## 7 A7 76 6
## 8 A8 23 5
## 9 A9 41 13
## 10 A10 85 4
## 11 A11 85 10
## 12 A12 145 NA
## 13 A13 5 NA
## 14 A14 41 8
## 15 A15 4 NA
## 16 A16 77 10
## 17 A17 14 9
## 18 A18 73 6
## 19 A19 32 12
## 20 A20 84 10
exercise<-readRDS("exercisetwo.rds")
exercise[complete.cases(exercise),]## Name Age Experience
## 2 A2 75 3
## 5 A5 86 15
## 6 A6 41 4
## 7 A7 76 6
## 8 A8 23 5
## 9 A9 41 13
## 10 A10 85 4
## 11 A11 85 10
## 14 A14 41 8
## 16 A16 77 10
## 17 A17 14 9
## 18 A18 73 6
## 19 A19 32 12
## 20 A20 84 10
data_mean <- mean(exercise$Age,na.rm=TRUE)
data_mean## [1] 54.95
exercisedata<-readRDS("exercisetwo.rds")
exercisedata$Experience[is.na(exercisedata$Experience)] <- mean(exercisedata$Experience,na.rm = TRUE)
exercisedata## Name Age Experience
## 1 A1 2 8.214286
## 2 A2 75 3.000000
## 3 A3 10 8.214286
## 4 A4 100 8.214286
## 5 A5 86 15.000000
## 6 A6 41 4.000000
## 7 A7 76 6.000000
## 8 A8 23 5.000000
## 9 A9 41 13.000000
## 10 A10 85 4.000000
## 11 A11 85 10.000000
## 12 A12 145 8.214286
## 13 A13 5 8.214286
## 14 A14 41 8.000000
## 15 A15 4 8.214286
## 16 A16 77 10.000000
## 17 A17 14 9.000000
## 18 A18 73 6.000000
## 19 A19 32 12.000000
## 20 A20 84 10.000000
exercisedata$Age[exercisedata$Age>100] <- NA
exercisedata## Name Age Experience
## 1 A1 2 8.214286
## 2 A2 75 3.000000
## 3 A3 10 8.214286
## 4 A4 100 8.214286
## 5 A5 86 15.000000
## 6 A6 41 4.000000
## 7 A7 76 6.000000
## 8 A8 23 5.000000
## 9 A9 41 13.000000
## 10 A10 85 4.000000
## 11 A11 85 10.000000
## 12 A12 NA 8.214286
## 13 A13 5 8.214286
## 14 A14 41 8.000000
## 15 A15 4 8.214286
## 16 A16 77 10.000000
## 17 A17 14 9.000000
## 18 A18 73 6.000000
## 19 A19 32 12.000000
## 20 A20 84 10.000000
exercisedata$Age[is.na(exercisedata$Age)] <- data_mean
exercisedata## Name Age Experience
## 1 A1 2.00 8.214286
## 2 A2 75.00 3.000000
## 3 A3 10.00 8.214286
## 4 A4 100.00 8.214286
## 5 A5 86.00 15.000000
## 6 A6 41.00 4.000000
## 7 A7 76.00 6.000000
## 8 A8 23.00 5.000000
## 9 A9 41.00 13.000000
## 10 A10 85.00 4.000000
## 11 A11 85.00 10.000000
## 12 A12 54.95 8.214286
## 13 A13 5.00 8.214286
## 14 A14 41.00 8.000000
## 15 A15 4.00 8.214286
## 16 A16 77.00 10.000000
## 17 A17 14.00 9.000000
## 18 A18 73.00 6.000000
## 19 A19 32.00 12.000000
## 20 A20 84.00 10.000000
exercisedata$Age[is.na(exercisedata$Age)==data_mean] <- 145
exercisedata## Name Age Experience
## 1 A1 2.00 8.214286
## 2 A2 75.00 3.000000
## 3 A3 10.00 8.214286
## 4 A4 100.00 8.214286
## 5 A5 86.00 15.000000
## 6 A6 41.00 4.000000
## 7 A7 76.00 6.000000
## 8 A8 23.00 5.000000
## 9 A9 41.00 13.000000
## 10 A10 85.00 4.000000
## 11 A11 85.00 10.000000
## 12 A12 54.95 8.214286
## 13 A13 5.00 8.214286
## 14 A14 41.00 8.000000
## 15 A15 4.00 8.214286
## 16 A16 77.00 10.000000
## 17 A17 14.00 9.000000
## 18 A18 73.00 6.000000
## 19 A19 32.00 12.000000
## 20 A20 84.00 10.000000
boxplot(exercisedata$Age)plot(exercisedata)plot(exercisedata$Age)exercisedata$Exp <- exercisedata$Experience
exercisedata## Name Age Experience Exp
## 1 A1 2.00 8.214286 8.214286
## 2 A2 75.00 3.000000 3.000000
## 3 A3 10.00 8.214286 8.214286
## 4 A4 100.00 8.214286 8.214286
## 5 A5 86.00 15.000000 15.000000
## 6 A6 41.00 4.000000 4.000000
## 7 A7 76.00 6.000000 6.000000
## 8 A8 23.00 5.000000 5.000000
## 9 A9 41.00 13.000000 13.000000
## 10 A10 85.00 4.000000 4.000000
## 11 A11 85.00 10.000000 10.000000
## 12 A12 54.95 8.214286 8.214286
## 13 A13 5.00 8.214286 8.214286
## 14 A14 41.00 8.000000 8.000000
## 15 A15 4.00 8.214286 8.214286
## 16 A16 77.00 10.000000 10.000000
## 17 A17 14.00 9.000000 9.000000
## 18 A18 73.00 6.000000 6.000000
## 19 A19 32.00 12.000000 12.000000
## 20 A20 84.00 10.000000 10.000000
corf <- cor(data.frame(exercisedata$Age,exercisedata$Experience,exercisedata$Exp))
corf## exercisedata.Age exercisedata.Experience
## exercisedata.Age 1.000000000 0.008652845
## exercisedata.Experience 0.008652845 1.000000000
## exercisedata.Exp 0.008652845 1.000000000
## exercisedata.Exp
## exercisedata.Age 0.008652845
## exercisedata.Experience 1.000000000
## exercisedata.Exp 1.000000000