install.packages(“knitr”) install.packages(“ggplot2”) install.packages(“dplyr”)

library(dplyr) library(ggplot2) library(readxl) library(descr) library(tidyverse)

library(tidyverse)

Importing the dataset

titanic <- read_csv(“C:/Users/susha/Downloads/archive/titanic_data.csv”)

Calculate average fare by sex

fare_by_sex <- titanic %>% group_by(Sex) %>% summarise(avg_fare = mean(Fare, na.rm = TRUE))

print(fare_by_sex)

Calculate average fare by passenger class

fare_by_class <- titanic %>% group_by(Pclass) %>% summarise(avg_fare = mean(Fare, na.rm = TRUE))

print(fare_by_class)

Calculate survival rate by sex

survival_by_sex <- titanic %>% group_by(Sex) %>% summarise(survival_rate = mean(Survived, na.rm = TRUE))

print(survival_by_sex)

Calculate survival rate by passenger class

survival_by_class <- titanic %>% group_by(Pclass) %>% summarise(survival_rate = mean(Survived, na.rm = TRUE))

print(survival_by_class)

Visualisation

Boxplot of Fare by Sex

ggplot(titanic, aes(x = Sex, y = Fare, fill = Sex)) + geom_boxplot() + labs(title = “Fare Distribution by Sex”, x = “Sex”, y = “Fare”) + theme_minimal()

Barplot of Survival Rate by Class

ggplot(survival_by_class, aes(x = factor(Pclass), y = survival_rate, fill = factor(Pclass))) + geom_col() + labs(title = “Survival Rate by Passenger Class”, x = “Passenger Class”, y = “Survival Rate”) + theme_minimal()

In this, we observe significant differences in ticket prices and survival rates across gender and passenger class. The average fare for women was approximately $44.68, while men paid an average of $64.32, illustrating a marked disparity in ticket prices between genders. When examining fare by passenger class, we find the average fares for First Class, Second Class, and Third Class to be $84.15, $20.66, and $13.63, respectively. This reflects the clear stratification in pricing based on class, with First-Class passengers paying considerably more than those in lower classes.

In terms of survival rates, women had a significantly higher chance of survival, with 74.2% of women surviving the disaster compared to only 18.9% of men. The survival rates by passenger class further demonstrate the effect of socio-economic status: First-Class passengers had a survival rate of 62.96%, Second-Class passengers had 47.28%, and Third-Class passengers had the lowest survival rate at 24.25%. These results reinforce the historical “women and children first” protocol and highlight the critical role that both gender and class played in determining the likelihood of survival during the disaster.

data() data(package = .packages(all.available = TRUE))

#Loading in-built dataset “esoph”

data(“esoph”)

View the structure and summary of the dataset

str(esoph) summary(esoph)

Comparison of cancer cases by alcohol consumption group (alcgp):

Average number of cancer cases by alcohol group

alc_cases <- esoph %>% group_by(alcgp) %>% summarise(avg_ncases = mean(ncases)) print(alc_cases)

Comparison of cancer cases by tobacco consumption group (tobgp):

Average number of cancer cases by tobacco group

tob_cases <- esoph %>% group_by(tobgp) %>% summarise(avg_ncases = mean(ncases)) print(tob_cases)

Interaction between age, alcohol, and tobacco consumption:

# Average number of cancer cases by age, alcohol, and tobacco groups

age_alc_tob_cases <- esoph %>% group_by(agegp, alcgp, tobgp) %>% summarise(avg_ncases = mean(ncases)) print(age_alc_tob_cases, n=88)

#Visualisation

ggplot(alc_cases, aes(x = alcgp, y = avg_ncases)) + geom_bar(stat = “identity”, fill = “skyblue”) + labs(title = “Average Cancer Cases by Alcohol Consumption Group”, x = “Alcohol Group”, y = “Average Cancer Cases”)

This dataset examines the relationship between alcohol and tobacco consumption and cancer incidence, reveals clear patterns in cancer cases across different consumption levels. For alcohol consumption, the 40-79g/day group experienced an average of 89.2 cancer cases, while the 120+g/day group saw an even higher average of 129.6 cancer cases. Similarly, tobacco consumption also shows a significant association with cancer cases, with the 30+ tobacco group having an average of 106.4 cancer cases.

The interaction between age, alcohol, and tobacco consumption further emphasizes the compounding nature of these risks. For example, individuals in the age group 50-59, with high levels of both alcohol and tobacco consumption, showed the highest average number of cancer cases, with an average of 131.3 cases. This pattern suggests that the cumulative impact of both lifestyle factors and age significantly increases the likelihood of developing cancer, underlining the importance of addressing these behaviors as part of a broader public health strategy.

These analyses not only reinforce the role of socio-economic factors in historical survival outcomes but also underscore the impact of lifestyle choices on cancer risk, offering valuable insights into both public health and social history.