Author: Navdeep Metchu
Date: 2023-10-25
/* General Styles */
body {
font-family: 'Arial', sans-serif;
background-color: #f4f4f4;
margin: 0;
padding: 20px;
}
/* Header Styles */
header {
background-color: #333;
color: #fff;
padding: 10px 0;
text-align: center;
}
header h1 {
margin: 0;
font-size: 2.5em;
}
header p {
margin-top: 5px;
font-size: 1.1em;
}
# Read the dataset
pokemon_data <- read.csv("PokemonStats.csv")
# Creating a new binary column 'HighTotalStat'
pokemon_data$HighTotalStat <- as.integer(pokemon_data$Total > 500)
# Displaying the first few rows of the dataset with the new column
head(pokemon_data[, c("Name", "Total", "HighTotalStat")])
## Name Total HighTotalStat
## 1 Bulbasaur 318 0
## 2 Ivysaur 405 0
## 3 Venusaur 525 1
## 4 Venusaur Mega Venusaur 625 1
## 5 Charmander 309 0
## 6 Charmeleon 405 0
# Calculate correlation of potential explanatory variables with 'HighTotalStat'
correlations <- cor(pokemon_data[, c('HP', 'Attack', 'Defense', 'SpAtk', 'SpDef', 'Speed', 'Height', 'Weight', 'HighTotalStat')])['HighTotalStat',]
# Remove the correlation of 'HighTotalStat' with itself
correlations <- correlations[-which(names(correlations) == "HighTotalStat")]
# Sort correlations in descending order
sorted_correlations <- sort(correlations, decreasing=TRUE)
sorted_correlations
## SpAtk Attack SpDef HP Defense Speed Height
## 0.5825124 0.5254463 0.5151532 0.4693824 0.4465165 0.4003035 0.2315656
X <- pokemon_data[, c('SpAtk', 'Attack', 'SpDef', 'HP')]
y <- pokemon_data$HighTotalStat
logit_model <- glm(y ~ SpAtk + Attack + SpDef + HP, data=pokemon_data, family=binomial(link="logit"))
summary(logit_model)
##
## Call:
## glm(formula = y ~ SpAtk + Attack + SpDef + HP, family = binomial(link = "logit"),
## data = pokemon_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -21.587125 1.557261 -13.862 < 2e-16 ***
## SpAtk 0.063047 0.005516 11.430 < 2e-16 ***
## Attack 0.075208 0.006561 11.463 < 2e-16 ***
## SpDef 0.070693 0.007011 10.083 < 2e-16 ***
## HP 0.038298 0.005336 7.177 7.13e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1468.44 on 1193 degrees of freedom
## Residual deviance: 504.17 on 1189 degrees of freedom
## AIC: 514.17
##
## Number of Fisher Scoring iterations: 8
pokemon_data$HighTotalStat <- as.integer(pokemon_data$Total > 500)
# List of explanatory variables
explanatory_vars <- c('SpAtk', 'Attack', 'SpDef', 'HP')
# Setting up the plotting parameters
par(mfrow=c(4,1), mar=c(4,4,2,1))
for (var in explanatory_vars) {
p <- ggplot(pokemon_data, aes(x=factor(HighTotalStat), y=get(var), fill=factor(HighTotalStat))) +
geom_boxplot() +
scale_fill_manual(values=c("#98A886", "#735290"), name="HighTotalStat", breaks=c(0, 1), labels=c("No", "Yes")) +
labs(title=paste('Box plot of', var, 'vs. HighTotalStat'),
x='HighTotalStat', y=var) +
theme_minimal() +
theme(
legend.position="top",
panel.background = element_rect(fill="lightgray", colour="black", linewidth=1),
panel.grid.major = element_line(colour = "white"),
panel.grid.minor = element_line(colour = "white", linetype = "dashed")
)
print(p)
}