library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(ggplot2)
options(readr.show_col_types = FALSE)
Candy <- "~/Data_607/candy-data.csv"
candy_data <- read_csv(Candy)
head(candy_data)
candy_data <- candy_data %>%
rename(
CandyName = competitorname,
Chocolate = chocolate,
WinPercent = winpercent
)
head(candy_data)
candy_data$Chocolate <- ifelse(candy_data$Chocolate, "Yes", "No")
head(candy_data)
candy_subset <- candy_data %>%
select(CandyName, Chocolate, WinPercent)
ggplot(candy_subset, aes(x = Chocolate, y = WinPercent, fill = Chocolate)) +
geom_boxplot() +
labs(
title = "Comparison of Chocolate vs. Non-Chocolate Candy Win Percent",
x = "Contains Chocolate",
y = "Win Percent"
) +
theme_minimal()
# Conclusion: Candies that contains chocolate has the highest percentage
of winning.