Giving Descriptive Names to the Columns
colnames(esoph) <- c("AgeGroup", "AlcoholConsumption", "TobaccoConsumption", "Numberofcases", "Numberofcontrols")
head(esoph)
## AgeGroup AlcoholConsumption TobaccoConsumption Numberofcases
## 1 25-34 0-39g/day 0-9g/day 0
## 2 25-34 0-39g/day 10-19 0
## 3 25-34 0-39g/day 20-29 0
## 4 25-34 0-39g/day 30+ 0
## 5 25-34 40-79 0-9g/day 0
## 6 25-34 40-79 10-19 0
## Numberofcontrols
## 1 40
## 2 10
## 3 6
## 4 5
## 5 27
## 6 7
# Exploratory Data Analysis Questions 1: What age groupd have the highest risk for esophageal cancer?
X <- subset(esoph, select = c("AgeGroup", "Numberofcases"))
head(X)
## AgeGroup Numberofcases
## 1 25-34 0
## 2 25-34 0
## 3 25-34 0
## 4 25-34 0
## 5 25-34 0
## 6 25-34 0
plot(X, main = "Esophageal cancer risks by age group")

# Exploratory Data Analysis Questions 2: Which alcohol consumption range cause higher risks for esophageal cancer?
Y <- subset(esoph, select = c("AlcoholConsumption", "TobaccoConsumption", "Numberofcases"))
head(Y)
## AlcoholConsumption TobaccoConsumption Numberofcases
## 1 0-39g/day 0-9g/day 0
## 2 0-39g/day 10-19 0
## 3 0-39g/day 20-29 0
## 4 0-39g/day 30+ 0
## 5 40-79 0-9g/day 0
## 6 40-79 10-19 0
heights <- tapply(Y$`Numberofcases`, Y$`AlcoholConsumption`, mean)
barplot(heights, main = "Mean number of cases by alcohol consumption",
names.arg = c("0-39 g/day", "40-79 g/day", "80-119 g/day", "120+ g/day"),
ylab = "Number of cases")

# Exploratory Data Analysis Questions 3: Which Tobacco Consumption range cause higher risks for esophageal cancer?
heights2 <- tapply(Y$`Numberofcases`, Y$`TobaccoConsumption`, mean)
barplot(heights2, main = "Mean number of cases by tobacco consumption",
names.arg = c("0-9 g/day", "10-19 g/day", "20-29 g/day", "30+ g/day"),
ylab = "Number of cases")

# conclusion 1: Age groupd from 55-64 have the higher number of cases for esophageal cancer.
# Conclusion 2: Alcohol Consumption from 40-79 g/day has the highest average number of cases for esophageal cancer
# Conclusion 3: Tobacco Consumption from 0-9 g/day has the highest average number of cases for esophageal cancer.