#Brief introduction
#The Steak-Risk Survey is designed to examine the connections between people's risk-taking
#behaviors and their lifestyle choices, particularly their steak preferences. It gathers data on
#habits such as smoking, drinking, gambling, and driving, along with more adventurous activities
#like skydiving. The survey also asks participants how they like their steak and whether they
#eat it. Demographic information, including gender, age, income, education, and regional location,
#is collected to better understand how these factors relate to individual behaviors and preferences.
# Set the file path
file_path <- "C:/Users/Dell/Downloads/steak-risk-survey.csv"
# Read the CSV file
data <- read.csv(file_path)
# View the first few rows of the data
head(data)
## RespondentID
## 1 NA
## 2 3237565956
## 3 3234982343
## 4 3234973379
## 5 3234972383
## 6 3234958833
## Consider.the.following.hypothetical.situations...br.In.Lottery.A..you.have.a.50..chance.of.success..with.a.payout.of..100...br.In.Lottery.B..you.have.a.90..chance.of.success..with.a.payout.of..20...br..br.Assuming.you.have..10.to.bet..would.you.play. ...
## 1 Response
## 2 Lottery B
## 3 Lottery A
## 4 Lottery A
## 5 Lottery B
## 6 Lottery B
## Do.you.ever.smoke.cigarettes. Do.you.ever.drink.alcohol. Do.you.ever.gamble.
## 1 Response Response Response
## 2
## 3 No Yes No
## 4 No Yes Yes
## 5 Yes Yes Yes
## 6 No Yes No
## Have.you.ever.been.skydiving. Do.you.ever.drive.above.the.speed.limit.
## 1 Response Response
## 2
## 3 No No
## 4 No Yes
## 5 No Yes
## 6 No Yes
## Have.you.ever.cheated.on.your.significant.other. Do.you.eat.steak.
## 1 Response Response
## 2
## 3 No Yes
## 4 Yes Yes
## 5 Yes Yes
## 6 Yes Yes
## How.do.you.like.your.steak.prepared. Gender Age Household.Income
## 1 Response Response Response Response
## 2
## 3 Medium rare Male > 60 $50,000 - $99,999
## 4 Rare Male > 60 $150,000+
## 5 Medium Male > 60 $50,000 - $99,999
## 6 Medium Male > 60 $50,000 - $99,999
## Education Location..Census.Region.
## 1 Response Response
## 2
## 3 Some college or Associate degree East North Central
## 4 Graduate degree South Atlantic
## 5 Bachelor degree New England
## 6 Graduate degree Middle Atlantic
# Rename complex column names for easier handling
colnames(data)[2] <- "Lottery_Choice"
# Check the new column names
colnames(data)
## [1] "RespondentID"
## [2] "Lottery_Choice"
## [3] "Do.you.ever.smoke.cigarettes."
## [4] "Do.you.ever.drink.alcohol."
## [5] "Do.you.ever.gamble."
## [6] "Have.you.ever.been.skydiving."
## [7] "Do.you.ever.drive.above.the.speed.limit."
## [8] "Have.you.ever.cheated.on.your.significant.other."
## [9] "Do.you.eat.steak."
## [10] "How.do.you.like.your.steak.prepared."
## [11] "Gender"
## [12] "Age"
## [13] "Household.Income"
## [14] "Education"
## [15] "Location..Census.Region."
# View the structure of the dataset
str(data)
## 'data.frame': 551 obs. of 15 variables:
## $ RespondentID : num NA 3.24e+09 3.23e+09 3.23e+09 3.23e+09 ...
## $ Lottery_Choice : chr "Response" "Lottery B" "Lottery A" "Lottery A" ...
## $ Do.you.ever.smoke.cigarettes. : chr "Response" "" "No" "No" ...
## $ Do.you.ever.drink.alcohol. : chr "Response" "" "Yes" "Yes" ...
## $ Do.you.ever.gamble. : chr "Response" "" "No" "Yes" ...
## $ Have.you.ever.been.skydiving. : chr "Response" "" "No" "No" ...
## $ Do.you.ever.drive.above.the.speed.limit. : chr "Response" "" "No" "Yes" ...
## $ Have.you.ever.cheated.on.your.significant.other.: chr "Response" "" "No" "Yes" ...
## $ Do.you.eat.steak. : chr "Response" "" "Yes" "Yes" ...
## $ How.do.you.like.your.steak.prepared. : chr "Response" "" "Medium rare" "Rare" ...
## $ Gender : chr "Response" "" "Male" "Male" ...
## $ Age : chr "Response" "" "> 60" "> 60" ...
## $ Household.Income : chr "Response" "" "$50,000 - $99,999" "$150,000+" ...
## $ Education : chr "Response" "" "Some college or Associate degree" "Graduate degree" ...
## $ Location..Census.Region. : chr "Response" "" "East North Central" "South Atlantic" ...
# Convert relevant columns into factors (categorical data)
data$Gender <- as.factor(data$Gender)
data$Lottery_Choice <- as.factor(data$Lottery_Choice)
# Summary statistics for age
summary(data$Age)
## Length Class Mode
## 551 character character
# Print all column names to identify the correct column name
colnames(data)
## [1] "RespondentID"
## [2] "Lottery_Choice"
## [3] "Do.you.ever.smoke.cigarettes."
## [4] "Do.you.ever.drink.alcohol."
## [5] "Do.you.ever.gamble."
## [6] "Have.you.ever.been.skydiving."
## [7] "Do.you.ever.drive.above.the.speed.limit."
## [8] "Have.you.ever.cheated.on.your.significant.other."
## [9] "Do.you.eat.steak."
## [10] "How.do.you.like.your.steak.prepared."
## [11] "Gender"
## [12] "Age"
## [13] "Household.Income"
## [14] "Education"
## [15] "Location..Census.Region."
# Clean column names to remove extra spaces or special characters
colnames(data) <- make.names(colnames(data))
# Check column names again after cleaning
colnames(data)
## [1] "RespondentID"
## [2] "Lottery_Choice"
## [3] "Do.you.ever.smoke.cigarettes."
## [4] "Do.you.ever.drink.alcohol."
## [5] "Do.you.ever.gamble."
## [6] "Have.you.ever.been.skydiving."
## [7] "Do.you.ever.drive.above.the.speed.limit."
## [8] "Have.you.ever.cheated.on.your.significant.other."
## [9] "Do.you.eat.steak."
## [10] "How.do.you.like.your.steak.prepared."
## [11] "Gender"
## [12] "Age"
## [13] "Household.Income"
## [14] "Education"
## [15] "Location..Census.Region."
# Rename the column (assuming it's now "Consider.the.following.hypothetical.situations..")
colnames(data)[2] <- "Lottery_Choice"
# Convert relevant columns to factors
data$Gender <- as.factor(data$Gender)
data$Lottery_Choice <- as.factor(data$Lottery_Choice)
# Check the updated structure
str(data)
## 'data.frame': 551 obs. of 15 variables:
## $ RespondentID : num NA 3.24e+09 3.23e+09 3.23e+09 3.23e+09 ...
## $ Lottery_Choice : Factor w/ 4 levels "","Lottery A",..: 4 3 2 2 3 3 2 2 2 2 ...
## $ Do.you.ever.smoke.cigarettes. : chr "Response" "" "No" "No" ...
## $ Do.you.ever.drink.alcohol. : chr "Response" "" "Yes" "Yes" ...
## $ Do.you.ever.gamble. : chr "Response" "" "No" "Yes" ...
## $ Have.you.ever.been.skydiving. : chr "Response" "" "No" "No" ...
## $ Do.you.ever.drive.above.the.speed.limit. : chr "Response" "" "No" "Yes" ...
## $ Have.you.ever.cheated.on.your.significant.other.: chr "Response" "" "No" "Yes" ...
## $ Do.you.eat.steak. : chr "Response" "" "Yes" "Yes" ...
## $ How.do.you.like.your.steak.prepared. : chr "Response" "" "Medium rare" "Rare" ...
## $ Gender : Factor w/ 4 levels "","Female","Male",..: 4 1 3 3 3 3 3 3 3 3 ...
## $ Age : chr "Response" "" "> 60" "> 60" ...
## $ Household.Income : chr "Response" "" "$50,000 - $99,999" "$150,000+" ...
## $ Education : chr "Response" "" "Some college or Associate degree" "Graduate degree" ...
## $ Location..Census.Region. : chr "Response" "" "East North Central" "South Atlantic" ...
#Visualizing Data
# Set CRAN mirror
options(repos = c(CRAN = "https://cran.rstudio.com/"))
# Install ggplot2 if not already installed
if (!require(ggplot2)) {
install.packages("ggplot2")
library(ggplot2)
}
## Loading required package: ggplot2
# Bar plot for Gender
barplot(table(data$Gender),
main = "Distribution of Gender",
xlab = "Gender",
ylab = "Frequency",
col = "lightblue")

# Bar plot for Lottery Choice
barplot(table(data$Lottery_Choice),
main = "Lottery Choice Distribution",
xlab = "Lottery Choice",
ylab = "Frequency",
col = "lightgreen")

# Pie chart for Gender distribution
gender_distribution <- table(data$Gender)
pie(gender_distribution,
main = "Gender Distribution",
col = c("lightblue", "pink"))

# Pie chart for Lottery Choice distribution
lottery_distribution <- table(data$Lottery_Choice)
pie(lottery_distribution,
main = "Lottery Choice Distribution",
col = c("lightgreen", "lightcoral"))

install.packages("ggplot2")
## Warning: package 'ggplot2' is in use and will not be installed
# Load ggplot2 library
library(ggplot2)
# Bar plot for Gender using ggplot2
ggplot(data, aes(x = Gender)) +
geom_bar(fill = "lightblue") +
ggtitle("Gender Distribution") +
xlab("Gender") +
ylab("Frequency")

# Boxplot of Age by Gender using ggplot2
ggplot(data, aes(x = Gender, y = Age)) +
geom_boxplot(fill = "lightblue") +
ggtitle("Age by Gender") +
xlab("Gender") +
ylab("Age")

install.packages("ggplot2")
## Warning: package 'ggplot2' is in use and will not be installed
library(ggplot2)
# Creating a sample dataset
data <- data.frame(
Gender = c("Male", "Female", "Female", "Male", "Male", "Female"),
Age = c(23, 25, 30, 22, 28, 26)
)
# Bar plot for Gender using ggplot2
ggplot(data, aes(x = Gender)) +
geom_bar(fill = "lightblue") +
ggtitle("Gender Distribution") +
xlab("Gender") +
ylab("Frequency")

# Boxplot of Age by Gender using ggplot2
ggplot(data, aes(x = Gender, y = Age)) +
geom_boxplot(fill = "lightblue") +
ggtitle("Age by Gender") +
xlab("Gender") +
ylab("Age")

#Discusion
#It the gender distribution is analyzed with Age versus Gender is obvious there is a bigger
#of female population above of 26 years comparing than the male individuals in this group rounding
#their 22 years and a half. By gender distribution also is notable that the female and male are equal
#meaning the same quantity expressed in the pie chart. In other hand by Lottery choice distribution
#that those interviewed on Lottery B is lighted large population than Lottery A.
#Conclusion
#This dataset reveals connections between risk-taking behaviors
#and personal preferences, particularly steak consumption. It explores habits like smoking,
#drinking, and gambling, alongside demographic factors such as age, gender, and income.
#By analyzing these elements, the dataset helps identify patterns between lifestyle choices
#and behaviors, offering insights into how personal preferences may relate to risk tolerance
#across different groups.