| Name | DataType | DataUnits |
|---|---|---|
| ID | int | ID for each pokemon |
| Name | factor | Name of each poemon |
| Type 1 | factor | Each pokemon has a type, this determinates weakness/resistance to arracks |
| Type 2 | factor | Some pokemon are dual type and have 2 |
| Total | int | Sum of all stats that come after this, a general guide to how strong a pokemon is |
| HP | int | Hit points, or health, defines how much damage a pokemon can withstand before fainting |
| Attack | int | The base modifier for normal attacks(Scratch,Punch,…) |
| Defense | int | The base damage resistance against normal attacks |
| SP Attack | int | Special attack, the base modifier for special attacks(fire blast, bubble beam,…) |
| SP Defense | int | The base damage resistance against special attacks |
| Speed | int | Determines which pokemon attacks first each round |
| Generation | number | The generation from that concret Pokémon |
| Legendary | boolean | If the Pokémon is legendary or not |
colours = c("brown1", "springgreen3")
statsDf <- data.frame(names = factor(myDf$Name, levels = myDf$Name[order(myDf$HP)]), Health = myDf$HP)
statsDf <- statsDf %>%
mutate (factor = as.factor(names),
concern = cut (Health,
breaks = c(-Inf, 100, Inf),
labels = c("Not Enough Strong", "Very Strong"),
right = TRUE)
)
hpGraph <- statsDf %>% filter(Health > 80) %>% ggplot(aes(x = names, y = Health, fill = concern)) +
geom_bar(position = "dodge", stat = "identity") +
scale_fill_manual(values = colours) +
coord_flip() +
ggtitle("Best Health Stats")
hpGraph
statsDf <- data.frame(names = factor(myDf$Name, levels = myDf$Name[order(myDf$Attack)]), Attack = myDf$Attack)
statsDf <- statsDf %>%
mutate (factor = as.factor(names),
concern = cut (Attack,
breaks = c(-Inf, 110, Inf),
labels = c("Not Enough Strong", "Very Strong"),
right = TRUE)
)
attGraph <- statsDf %>% filter(Attack > 100) %>% ggplot(aes(x = names, y = Attack, fill = concern)) +
geom_bar(position = "dodge", stat = "identity") +
scale_fill_manual(values = colours) +
coord_flip() +
ggtitle("Best Attack Stats")
attGraph
statsDf <- data.frame(names = factor(myDf$Name, levels = myDf$Name[order(myDf$Defense)]), Defense = myDf$Defense)
statsDf <- statsDf %>%
mutate (factor = as.factor(names),
concern = cut (Defense,
breaks = c(-Inf, 110, Inf),
labels = c("Not Enough Strong", "Very Strong"),
right = TRUE)
)
defGraph <- statsDf %>% filter(Defense > 100) %>% ggplot(aes(x = names, y = Defense, fill = concern)) +
geom_bar(position = "dodge", stat = "identity") +
scale_fill_manual(values = colours) +
coord_flip() +
ggtitle("Best Defense Stats")
defGraph
statsDf <- data.frame(names = factor(myDf$Name, levels = myDf$Name[order(myDf$Sp..Atk)]), spAt = myDf$Sp..Atk)
statsDf <- statsDf %>%
mutate (factor = as.factor(names),
concern = cut (spAt,
breaks = c(-Inf, 110, Inf),
labels = c("Not Enough Strong", "Very Strong"),
right = TRUE)
)
spAtGraph <- statsDf %>% filter(spAt > 100) %>% ggplot(aes(x = names, y = spAt, fill = concern)) +
geom_bar(position = "dodge", stat = "identity") +
scale_fill_manual(values = colours) +
coord_flip() +
ggtitle("Best Special Attack Stats")
spAtGraph
statsDf <- data.frame(names = factor(myDf$Name, levels = myDf$Name[order(myDf$Sp..Def)]), spDef = myDf$Sp..Def)
statsDf <- statsDf %>%
mutate (factor = as.factor(names),
concern = cut (spDef,
breaks = c(-Inf, 110, Inf),
labels = c("Not Enough Strong", "Very Strong"),
right = TRUE)
)
spDefGraph <- statsDf %>% filter(spDef > 100) %>% ggplot(aes(x = names, y = spDef, fill = concern)) +
geom_bar(position = "dodge", stat = "identity") +
scale_fill_manual(values = colours) +
coord_flip() +
ggtitle("Best Special Defense Stats")
spDefGraph
statsDf <- data.frame(names = factor(myDf$Name, levels = myDf$Name[order(myDf$Speed)]), Speed = myDf$Speed)
statsDf <- statsDf %>%
mutate (factor = as.factor(names),
concern = cut (Speed,
breaks = c(-Inf, 110, Inf),
labels = c("Not Enough Strong", "Very Strong"),
right = TRUE)
)
speedGraph <- statsDf %>% filter(Speed > 100) %>%
ggplot(aes(x = names, y = Speed, fill = concern)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = colours) +
coord_flip() +
ggtitle("Best Speed Stats")
speedGraph
colours = c("yellow4", "gray25", "slateblue", "yellow2", "hotpink1",
"indianred2", "red2", "lightblue3", "darkslateblue", "limegreen",
"tan3", "darkslategray1", "white", "blueviolet", "magenta1", "#8B76FF",
"#8E6856", "royalblue1")
myDf %>%
filter(Type.1 != '') %>%
group_by(Type.1) %>%
summarise(number = n()) %>%
ggplot(aes(x = reorder(Type.1, number), y = number, fill = Type.1)) +
scale_fill_manual(values=colours) +
geom_bar(stat = 'identity') +
xlab(label = "Type of Pokemon") +
ylab(label = "Number of Pokemon") +
ggtitle(label = "Number of Pokemon by Type 1") +
coord_flip() +
geom_text(aes(label = number), hjust = -1.0)
myDf %>%
filter(Type.2 != '') %>%
group_by(Type.2) %>%
summarise(number = n()) %>%
ggplot(aes(x = reorder(Type.2, number), y = number, fill = Type.2)) +
scale_fill_manual(values=colours) +
geom_bar(stat = 'identity') +
xlab(label = "Type of Pokemon") +
ylab(label = "Number of Pokemon") +
ggtitle(label = "Number of Pokemon by Type 2") +
coord_flip() +
geom_text(aes(label = number), hjust = -1.0)
myDf %>%
gather(., key, value, HP:Speed) %>%
group_by(., Type.1, key) %>%
summarise(., Stat = as.integer(mean(value))) %>%
ggplot(., aes(y=Type.1, x=key)) +
geom_tile(aes(fill = Stat)) +
theme_bw() +
theme(legend.position = 'bottom') +
geom_text(aes(label = Stat), color = 'white', size = 3) +
labs(x='Stat Category', y='Pokemon Type', title = 'Heatmap Distribution of Pokemon Stats by Type')
propDf <- data.frame(names = factor(myDf$Name, levels = myDf$Name[order(myDf$Generation)]), Generation = myDf$Generation)
genGraph <- ggplot(propDf, aes(x = names, y = Generation))+
geom_point(stat = "identity") +
scale_fill_manual(values = colours) +
coord_flip() +
ggtitle("Generations")
genGraph
legGraph <- myDf %>% ggplot(aes(Legendary)) +
geom_bar() +
labs(title="Number of Legendary Pokémon", x="IsLegendary", y="Quantity")
legGraph
We computed probability based on Type 1 Pokemons and their Legendary status, as Type 1 represents their basic element.
legendaryTable <- table(myDf$Type.1, myDf$Legendary) #Sample Table type1/legendary
legendaryYTable <- margin.table(legendaryTable,1) #Margin Y Pokemons per type1
legendaryXTable <- margin.table(legendaryTable,2) #Margin X Pokemons per legendary
legendaryProp <- prop.table(legendaryTable) #Joint probability type1/legendary
legendaryMarginY <- margin.table(legendaryTable,1)/margin.table(legendaryTable) #Marginal probability Y specific type1
legendaryMarginX <- margin.table(legendaryTable,2)/margin.table(legendaryTable) #Marginal probability X specific legendary
legendaryTableDf <- as.data.frame.table(legendaryTable)
legendaryYTableDf <- as.data.frame.table(legendaryYTable)
legendaryXTableDf <- as.data.frame.table(legendaryXTable)
legendaryPropDf <- as.data.frame.table(legendaryProp)
legendaryMarginYDf <- as.data.frame.table(legendaryMarginY)
legendaryMarginXDf <- as.data.frame.table(legendaryMarginX)
We worked on a total population of 800 Pokemons divided in 18 Type 1 categories and 2 Legendary different status.
ggplot(legendaryTableDf, aes(x=legendaryTableDf$Var1, y=legendaryTableDf$Freq)) + geom_bar(aes(fill = legendaryTableDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Pop', fill='Is Legendary?', title = 'Pokemon population between Type 1 & Legendary') + geom_text(aes(label = legendaryTableDf$Freq))
ggplot(legendaryYTableDf, aes(x=legendaryYTableDf$Var1, y=legendaryYTableDf$Freq)) + geom_bar(position = "stack", stat = "identity", fill = "salmon") + coord_flip() + labs(x='Type 1', y='Pokemon Population', title = 'Marginal Population Y') + geom_text(aes(label = legendaryYTableDf$Freq), hjust = +1.5)
ggplot(legendaryXTableDf, aes(x=legendaryXTableDf$Var1, y=legendaryXTableDf$Freq)) + geom_bar(position = "stack", stat = "identity", fill = "salmon") + coord_flip() + labs(x='Is Legendary?', y='Pokemon Population', title = 'Marginal Population X') + geom_text(aes(label = legendaryXTableDf$Freq), hjust = +1.5)
ggplot(legendaryPropDf, aes(x=legendaryPropDf$Var1, y=legendaryPropDf$Freq)) + geom_bar(aes(fill = legendaryPropDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Joint probability between Type1 & Legendary Pokemons') + geom_text(aes(label = scales::percent(legendaryPropDf$Freq)))
ggplot(legendaryMarginYDf, aes(x=legendaryMarginYDf$Var1, y=legendaryMarginYDf$Freq)) + geom_bar(position = "stack", stat = "identity", fill = "salmon") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', title = 'Marginal Probability Y') + geom_text(aes(label = scales::percent(legendaryMarginYDf$Freq)), hjust = +1)
ggplot(legendaryMarginXDf, aes(x=legendaryMarginXDf$Var1, y=legendaryMarginXDf$Freq)) + geom_bar(position = "stack", stat = "identity", fill = "salmon") + coord_flip() + labs(x='Is legendary?', y='Pokemon Proportion', title = 'Marginal Probability X') + geom_text(aes(label = scales::percent(legendaryMarginXDf$Freq)), hjust = +1)
#Conditional probability Pokemons type1 if false legendary
condFLegendaryDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Legendary == "False")
condFLegendaryTable <- table(condFLegendaryDf$myDf.Type.1, condFLegendaryDf$myDf.Legendary)
condFLegendary <- prop.table(condFLegendaryTable)
condFLegendaryDf <- as.data.frame(condFLegendary)
ggplot(condFLegendaryDf, aes(x=condFLegendaryDf$Var1, y=condFLegendaryDf$Freq)) + geom_bar(aes(fill = condFLegendaryDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Type 1 Pokemons if Legendary is False') + geom_text(aes(label = ifelse(condFLegendaryDf$Freq > 0, scales::percent(condFLegendaryDf$Freq),"")))
#Conditional probability Pokemons type1 if true legendary
condTLegendaryDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Legendary == "True")
condTLegendaryTable <- table(condTLegendaryDf$myDf.Type.1, condTLegendaryDf$myDf.Legendary)
condTLegendary <- prop.table(condTLegendaryTable)
condTLegendaryDf <- as.data.frame(condTLegendary)
ggplot(condTLegendaryDf, aes(x=condTLegendaryDf$Var1, y=condTLegendaryDf$Freq)) + geom_bar(aes(fill = condTLegendaryDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Type 1 Pokemons if Legendary is True') + geom_text(aes(label = ifelse(condTLegendaryDf$Freq > 0, scales::percent(condTLegendaryDf$Freq),"")))
#Conditional probability Pokemons legendary if each pokemon type1
#Water
condWaterDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Water")
condWaterTable <- table(condWaterDf$myDf.Type.1, condWaterDf$myDf.Legendary)
condWater <- prop.table(condWaterTable)
condWaterDf <- as.data.frame(condWater)
ggplot(condWaterDf, aes(x=condWaterDf$Var1[18], y=condWaterDf$Freq)) + geom_bar(aes(fill = condWaterDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Water') + geom_text(aes(label = ifelse(condWaterDf$Freq > 0, scales::percent(condWaterDf$Freq),"")))
#Steel
condSteelDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Steel")
condSteelTable <- table(condSteelDf$myDf.Type.1, condSteelDf$myDf.Legendary)
condSteel <- prop.table(condSteelTable)
condSteelDf <- as.data.frame(condSteel)
ggplot(condSteelDf, aes(x=condSteelDf$Var1[17], y=condSteelDf$Freq)) + geom_bar(aes(fill = condSteelDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Steel') + geom_text(aes(label = ifelse(condSteelDf$Freq > 0, scales::percent(condSteelDf$Freq),"")))
#Rock
condRockDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Rock")
condRockTable <- table(condRockDf$myDf.Type.1, condRockDf$myDf.Legendary)
condRock <- prop.table(condRockTable)
condRockDf <- as.data.frame(condRock)
ggplot(condRockDf, aes(x=condRockDf$Var1[16], y=condRockDf$Freq)) + geom_bar(aes(fill = condRockDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Rock') + geom_text(aes(label = ifelse(condRockDf$Freq > 0, scales::percent(condRockDf$Freq),"")))
#Psychic
condPsychicDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Psychic")
condPsychicTable <- table(condPsychicDf$myDf.Type.1, condPsychicDf$myDf.Legendary)
condPsychic <- prop.table(condPsychicTable)
condPsychicDf <- as.data.frame(condPsychic)
ggplot(condPsychicDf, aes(x=condPsychicDf$Var1[15], y=condPsychicDf$Freq)) + geom_bar(aes(fill = condPsychicDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Psychic') + geom_text(aes(label = ifelse(condPsychicDf$Freq > 0, scales::percent(condPsychicDf$Freq),"")))
#Poison
condPoisonDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Poison")
condPoisonTable <- table(condPoisonDf$myDf.Type.1, condPoisonDf$myDf.Legendary)
condPoison <- prop.table(condPoisonTable)
condPoisonDf <- as.data.frame(condPoison)
ggplot(condPoisonDf, aes(x=condPoisonDf$Var1[14], y=condPoisonDf$Freq)) + geom_bar(aes(fill = condPoisonDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Poison') + geom_text(aes(label = ifelse(condPoisonDf$Freq > 0, scales::percent(condPoisonDf$Freq),"")))
#Normal
condNormalDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Normal")
condNormalTable <- table(condNormalDf$myDf.Type.1, condNormalDf$myDf.Legendary)
condNormal <- prop.table(condNormalTable)
condNormalDf <- as.data.frame(condNormal)
ggplot(condNormalDf, aes(x=condNormalDf$Var1[13], y=condNormalDf$Freq)) + geom_bar(aes(fill = condNormalDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Normal') + geom_text(aes(label = ifelse(condNormalDf$Freq > 0, scales::percent(condNormalDf$Freq),"")))
#Ice
condIceDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Ice")
condIceTable <- table(condIceDf$myDf.Type.1, condIceDf$myDf.Legendary)
condIce <- prop.table(condIceTable)
condIceDf <- as.data.frame(condIce)
ggplot(condIceDf, aes(x=condIceDf$Var1[12], y=condIceDf$Freq)) + geom_bar(aes(fill = condIceDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Ice') + geom_text(aes(label = ifelse(condIceDf$Freq > 0, scales::percent(condIceDf$Freq),"")))
#Ground
condGroundDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Ground")
condGroundTable <- table(condGroundDf$myDf.Type.1, condGroundDf$myDf.Legendary)
condGround <- prop.table(condGroundTable)
condGroundDf <- as.data.frame(condGround)
ggplot(condGroundDf, aes(x=condGroundDf$Var1[11], y=condGroundDf$Freq)) + geom_bar(aes(fill = condGroundDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Ground') + geom_text(aes(label = ifelse(condGroundDf$Freq > 0, scales::percent(condGroundDf$Freq),"")))
#Grass
condGrassDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Grass")
condGrassTable <- table(condGrassDf$myDf.Type.1, condGrassDf$myDf.Legendary)
condGrass <- prop.table(condGrassTable)
condGrassDf <- as.data.frame(condGrass)
ggplot(condGrassDf, aes(x=condGrassDf$Var1[10], y=condGrassDf$Freq)) + geom_bar(aes(fill = condGrassDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Grass') + geom_text(aes(label = ifelse(condGrassDf$Freq > 0, scales::percent(condGrassDf$Freq),"")))
#Ghost
condGhostDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Ghost")
condGhostTable <- table(condGhostDf$myDf.Type.1, condGhostDf$myDf.Legendary)
condGhost <- prop.table(condGhostTable)
condGhostDf <- as.data.frame(condGhost)
ggplot(condGhostDf, aes(x=condGhostDf$Var1[9], y=condGhostDf$Freq)) + geom_bar(aes(fill = condGhostDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Ghost') + geom_text(aes(label = ifelse(condGhostDf$Freq > 0, scales::percent(condGhostDf$Freq),"")))
#Flying
condFlyingDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Flying")
condFlyingTable <- table(condFlyingDf$myDf.Type.1, condFlyingDf$myDf.Legendary)
condFlying <- prop.table(condFlyingTable)
condFlyingDf <- as.data.frame(condFlying)
ggplot(condFlyingDf, aes(x=condFlyingDf$Var1[8], y=condFlyingDf$Freq)) + geom_bar(aes(fill = condFlyingDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Flying') + geom_text(aes(label = ifelse(condFlyingDf$Freq > 0, scales::percent(condFlyingDf$Freq),"")))
#Fire
condFireDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Fire")
condFireTable <- table(condFireDf$myDf.Type.1, condFireDf$myDf.Legendary)
condFire <- prop.table(condFireTable)
condFireDf <- as.data.frame(condFire)
ggplot(condFireDf, aes(x=condFireDf$Var1[7], y=condFireDf$Freq)) + geom_bar(aes(fill = condFireDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Fire') + geom_text(aes(label = ifelse(condFireDf$Freq > 0, scales::percent(condFireDf$Freq),"")))
#Fighting
condFightingDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Fighting")
condFightingTable <- table(condFightingDf$myDf.Type.1, condFightingDf$myDf.Legendary)
condFighting <- prop.table(condFightingTable)
condFightingDf <- as.data.frame(condFighting)
ggplot(condFightingDf, aes(x=condFightingDf$Var1[6], y=condFightingDf$Freq)) + geom_bar(aes(fill = condFightingDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Fighting') + geom_text(aes(label = ifelse(condFightingDf$Freq > 0, scales::percent(condFightingDf$Freq),"")))
#Fairy
condFairyDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Fairy")
condFairyTable <- table(condFairyDf$myDf.Type.1, condFairyDf$myDf.Legendary)
condFairy <- prop.table(condFairyTable)
condFairyDf <- as.data.frame(condFairy)
ggplot(condFairyDf, aes(x=condFairyDf$Var1[5], y=condFairyDf$Freq)) + geom_bar(aes(fill = condFairyDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Fairy') + geom_text(aes(label = ifelse(condFairyDf$Freq > 0, scales::percent(condFairyDf$Freq),"")))
#Electric
condElectricDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Electric")
condElectricTable <- table(condElectricDf$myDf.Type.1, condElectricDf$myDf.Legendary)
condElectric <- prop.table(condElectricTable)
condElectricDf <- as.data.frame(condElectric)
ggplot(condElectricDf, aes(x=condElectricDf$Var1[4], y=condElectricDf$Freq)) + geom_bar(aes(fill = condElectricDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Electric') + geom_text(aes(label = ifelse(condElectricDf$Freq > 0, scales::percent(condElectricDf$Freq),"")))
#Dragon
condDragonDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Dragon")
condDragonTable <- table(condDragonDf$myDf.Type.1, condDragonDf$myDf.Legendary)
condDragon <- prop.table(condDragonTable)
condDragonDf <- as.data.frame(condDragon)
ggplot(condDragonDf, aes(x=condDragonDf$Var1[3], y=condDragonDf$Freq)) + geom_bar(aes(fill = condDragonDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Dragon') + geom_text(aes(label = ifelse(condDragonDf$Freq > 0, scales::percent(condDragonDf$Freq),"")))
#Dark
condDarkDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Dark")
condDarkTable <- table(condDarkDf$myDf.Type.1, condDarkDf$myDf.Legendary)
condDark <- prop.table(condDarkTable)
condDarkDf <- as.data.frame(condDark)
ggplot(condDarkDf, aes(x=condDarkDf$Var1[2], y=condDarkDf$Freq)) + geom_bar(aes(fill = condDarkDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Dark') + geom_text(aes(label = ifelse(condDarkDf$Freq > 0, scales::percent(condDarkDf$Freq),"")))
#Bug
condBugDf <- data.frame(myDf$Type.1, myDf$Legendary) %>% filter(myDf$Type.1 == "Bug")
condBugTable <- table(condBugDf$myDf.Type.1, condBugDf$myDf.Legendary)
condBug <- prop.table(condBugTable)
condBugDf <- as.data.frame(condBug)
ggplot(condBugDf, aes(x=condBugDf$Var1[1], y=condBugDf$Freq)) + geom_bar(aes(fill = condBugDf$Var2), position = "stack", stat = "identity") + coord_flip() + labs(x='Type 1', y='Pokemon Proportion', fill='Is Legendary?', title = 'Conditional Probability of Legendary if Pokemon Type1 is Bug') + geom_text(aes(label = ifelse(condBugDf$Freq > 0, scales::percent(condBugDf$Freq),"")))
GhostTypePkmn <- myDf %>% select(Type.1,Type.2) %>% filter(Type.1 %in% "Ghost" | Type.2 %in% "Ghost") %>% summarise(number = n())
totalNumberGhostTypePkmn <- GhostTypePkmn$number
nTrials <- 20 #20 times (because in a competition there are 20 combats)
totalPokemon <- 800
prob <- totalNumberGhostTypePkmn/totalPokemon
x <- 0:20 #random variable
binomDistr <- dbinom(x, nTrials, prob)
auxdf <- data.frame(x=x, y=binomDistr)
ggplot(auxdf)+geom_point(aes(x=x,y=binomDistr),col="red",size=4) +
geom_segment(aes(x=x,y=binomDistr,xend=x,yend=0),col="blue") +
scale_x_continuous(breaks=x) +ggtitle("X~Binom(x,p) | Probability Mass Function Plot")
Weakness <- myDf %>% select(Type.1,Type.2) %>% filter(Type.1 %in% c("Fire","Bug","Ice","Flying","Poison") | Type.2 %in% c("Fire","Bug","Ice","Flying","Poison")) %>% summarise(number = n())
totalWeakness <- Weakness$number
x <- 0:6
hyper <- dhyper(x,totalWeakness ,800 - totalWeakness ,6)
auxdfH <- data.frame(x=x, y=hyper)
ggplot(auxdfH)+geom_point(aes(x=x,y=hyper),col="red",size=4) +
geom_segment(aes(x=x,y=hyper,xend=x,yend=0),col="blue") +
scale_x_continuous(breaks=x) +ggtitle("Hypergeometric")
#P(X>= 50.0)
TotalD <- c(49.0,49.1,49.2,49.3,49.4,49.5,49.6,49.7,49.8,49.9,50.0,50.1,50.2,50.3,50.4,50.5,50.6,50.7,50.8,50.9,51.0,51.1,51.2,51.3,51.4,51.5,51.6,51.7,51.8,51.9,52.0)
m<-mean(TotalD)
SD<-sd(TotalD)
Df <- data.frame(TotalD)
DN <- dnorm(TotalD,m, SD)
auxdfC <- data.frame(x=TotalD, y=DN)
ggplot(auxdfC)+geom_point(aes(x=TotalD,y=DN),col="red",size=4) +
geom_segment(aes(x=TotalD,y=DN,xend=x,yend=0),col="blue") +
scale_x_continuous(breaks=TotalD) +ggtitle("Probability Density Function") +
labs(x='Possible Damage', y='P(X)')
#Standard formation for an equilibrate team composition: Physical Sweeper, Physical Sweeper, Special Sweeper, Special Sweeper, Physical Tank and Wall.
#->Physical Sweeper: Attack + speed ->Special Sweeper: SP Atack + speed ->Physical Tank: Attack + Defense ->Wall: HP + Defense + SP Defence
#If we took 300 Pokémons at random, which would be the 95% of the "Confidence Interval for the population mean" for each one of the team member types? (Physical Sweeper, Special Sweeper, Physical Tank and Wall)
#Confidence Interval (Physical Sweeper)
PhSweepers <- sample(c(myDf$Attack+myDf$Speed), size = 300) #sumem Attack+speed de cada "row" per obtenir el "Physical Sweeper" i agafem 300 randome
PhSmean <- mean(PhSweepers)
PhS_StDes <- sd(PhSweepers)
error <- qnorm(0.95)*(PhS_StDes/sqrt(300))
plus <- PhSmean + error
minus <- PhSmean - error
if(PhSmean<plus && minus<PhSmean){
cat("We can be 95% Confident that the population mean (",PhSmean,") lies in this interval{",plus,", ",minus,"}")
} else cat("We cann't be 95% Confident that the population mean (",PhSmean,")lies in this interval{",plus,", ",minus,"}")
## We can be 95% Confident that the population mean ( 146.7767 ) lies in this interval{ 151.6299 , 141.9234 }
#Confidence Interval (Special Sweeper)
SpSweepers <- sample(c(myDf$Sp..Atk+myDf$Speed), size = 300) #sumem SP Atack+speed de cada "row" per obtenir el "Special Sweeper" i agafem 300 randome
SpSmean <- mean(SpSweepers)
SpS_StDes <- sd(SpSweepers)
error <- qnorm(0.95)*(SpS_StDes/sqrt(300))
plus <- SpSmean + error
minus <- SpSmean - error
if(SpSmean<plus && minus<SpSmean){
cat("We can be 95% Confident that the population mean (",SpSmean,") lies in this interval{",plus,", ",minus,"}")
} else cat("We cann't be 95% Confident that the population mean (",SpSmean,") lies in this interval{",plus,", ",minus,"}")
## We can be 95% Confident that the population mean ( 144.7467 ) lies in this interval{ 150.0576 , 139.4357 }
#Confidence Interval (Physical Tank)
PhTank<- sample(c(myDf$Attack+myDf$Defense), size = 300) #sumem Attack+Defense de cada "row" per obtenir el "Physical Tank" i agafem 300 randome
PhTmean <- mean(PhTank)
PhT_StDes <- sd(PhTank)
error <- qnorm(0.95)*(PhT_StDes/sqrt(300))
plus <- PhTmean + error
minus <- PhTmean - error
if(PhTmean<plus && minus<PhTmean){
cat("We can be 95% Confident that the population mean (",PhTmean,") lies in this interval{",plus,", ",minus,"}")
} else cat("We cann't be 95% Confident that the population mean (",PhTmean,") lies in this interval{",plus,", ",minus,"}")
## We can be 95% Confident that the population mean ( 150.4033 ) lies in this interval{ 155.4447 , 145.362 }
#Confidence Interval (Wall)
Wall <- sample(c(myDf$HP+myDf$Defense+myDf$Sp..Def), size = 300) #sumem HP+Defense+SP Defence de cada "row" per obtenir el "Wall" i agafem 300 randome
Wallmean <- mean(Wall)
Wall_StDes <- sd(Wall)
error <- qnorm(0.95)*(Wall_StDes/sqrt(300))
plus <- Wallmean + error
minus <- Wallmean - error
if(Wallmean<plus && minus<Wallmean){
cat("We can be 95% Confident that the population mean (",Wallmean,") lies in this interval{",plus,", ",minus,"}")
} else cat("We cann't be 95% Confident that the population mean (",Wallmean,") lies in this interval{",plus,", ",minus,"}")
## We can be 95% Confident that the population mean ( 214.76 ) lies in this interval{ 220.8962 , 208.6238 }
#Keeping in mind the last problem's data (300 random pokemons), for each type of the team composition (Physical Sweeper, Special Sweeper, Physical Tank and Wall), is there enough evidences at "5% level of significance" to conclude the following Hypothesis?
#Hypothesis Testing (Physical Sweeper) H0: μ <= 140 -> The mean is equal or less than 140 H1: μ > 140 -> The mean is greatter than 140
PhSweepers <- sample(c(myDf$Attack+myDf$Speed), size = 300) #We add Attack+speed from every row in order to get the "Physical Sweeper" and then we get 300 Pokemons at random
PhSmean <- mean(PhSweepers)
PhS_StDes <- sd(PhSweepers)
z <- (PhSmean - 140)/(PhS_StDes/sqrt(300))
cat("Using the Critical Values:")
## Using the Critical Values:
if(z>qnorm(1-0.05)){
cat("We reject the null hypothesis (H0 μ <= 140) at 5% level of significance as z (",z,") is > than (",qnorm(1-0.05),").")
q<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ <= 140) at 5% level of significance as z (",z,") is < than (",qnorm(1-0.05),").")
q<-FALSE
}
## We reject the null hypothesis (H0 µ <= 140) at 5% level of significance as z ( 3.111225 ) is > than ( 1.644854 ).
cat("Using the P-Value:")
## Using the P-Value:
if(pnorm(-abs(z))<(0.05)){
cat("We reject de null hypothesis (H0 μ <= 140) at 5% level of significance as P-Value (",pnorm(-abs(z)),") is < than (",0.05,").")
p<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ <= 140) at 5% level of significance as P-Value (",pnorm(-abs(z)),") is > than (",0.05,").")
p<-FALSE
}
## We reject de null hypothesis (H0 µ <= 140) at 5% level of significance as P-Value ( 0.0009315637 ) is < than ( 0.05 ).
if(p&&q){
cat("There is enough evidence at the 5% level of significance to suggest that the mean is greater than 140")
} else if(!p&&q ||p&&!q){
cat("There is not enough evidence at the 5% level of significance to suggest that the mean is greater than 140")
} else {
cat("There is enough evidence at the 5% level of significance to suggest that the mean is minus or igual than 140")
}
## There is enough evidence at the 5% level of significance to suggest that the mean is greater than 140
#Hypothesis Testing (Special Sweeper) H0: μ <= 140 -> The mean is equal or less than 140 H1: μ > 140 -> The mean is greatter than 140
SpSweepers <- sample(c(myDf$Sp..Atk+myDf$Speed), size = 300) #We add SPAttack+speed from every row in order to get the "Special Sweeper" and then we get 300 Pokemons at random
SpSmean <- mean(SpSweepers)
SpS_StDes <- sd(SpSweepers)
z <- (SpSmean - 140)/(SpS_StDes/sqrt(300))
cat("Using the Critical Values:")
## Using the Critical Values:
if(z>qnorm(1-0.05)){
cat("We reject the null hypothesis (H0 μ <= 140) at 5% level of significance as z (",z,") is > than (",qnorm(1-0.05),").")
q<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ <= 140) at 5% level of significance as z (",z,") is < than (",qnorm(1-0.05),").")
q<-FALSE
}
## We confirm the null hypothesis (H0 µ <= 140) at 5% level of significance as z ( 1.175979 ) is < than ( 1.644854 ).
cat("Using the P-Value:")
## Using the P-Value:
if(pnorm(-abs(z))<(0.05)){
cat("We reject de null hypothesis (H0 μ <= 140) at 5% level of significance as P-Value (",pnorm(-abs(z)),") is < than (",0.05,").")
p<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ <= 140) at 5% level of significance as P-Value (",pnorm(-abs(z)),") is > than (",0.05,").")
p<-FALSE
}
## We confirm the null hypothesis (H0 µ <= 140) at 5% level of significance as P-Value ( 0.1198017 ) is > than ( 0.05 ).
if(p&&q){
cat("There is enough evidence at the 5% level of significance to suggest that the mean is greater than 140")
} else if(!p&&q ||p&&!q){
cat("There is not enough evidence at the 5% level of significance to suggest that the mean is greater than 140")
} else {
cat("There is enough evidence at the 5% level of significance to suggest that the mean is minus or igual than 140")
}
## There is enough evidence at the 5% level of significance to suggest that the mean is minus or igual than 140
#Hypothesis Testing (Physical Tank) H0: μ <= 150 -> The mean is equal or less than 150 H1: μ > 150 -> The mean is greatter than 150
PhTank<- sample(c(myDf$Attack+myDf$Defense), size = 300) #We add Attack+Defense from every row in order to get the "Physical Tank" and then we get 300 Pokemons at random
PhTmean <- mean(PhTank)
PhT_StDes <- sd(PhTank)
z <- (PhTmean - 150)/(PhT_StDes/sqrt(300))
cat("Using the Critical Values:")
## Using the Critical Values:
if(z>qnorm(1-0.05)){
cat("We reject the null hypothesis (H0 μ <= 150) at 5% level of significance as z (",z,") is > than (",qnorm(1-0.05),").")
q<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ <= 150) at 5% level of significance as z (",z,") is < than (",qnorm(1-0.05),").")
q<-FALSE
}
## We confirm the null hypothesis (H0 µ <= 150) at 5% level of significance as z ( 0.3353322 ) is < than ( 1.644854 ).
cat("Using the P-Value:")
## Using the P-Value:
if(pnorm(-abs(z))<(0.05)){
cat("We reject de null hypothesis (H0 μ <= 150) at 5% level of significance as P-Value (",pnorm(-abs(z)),") is < than (",0.05,").")
p<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ <= 150) at 5% level of significance as P-Value (",pnorm(-abs(z)),") is > than (",0.05,").")
p<-FALSE
}
## We confirm the null hypothesis (H0 µ <= 150) at 5% level of significance as P-Value ( 0.3686872 ) is > than ( 0.05 ).
if(p&&q){
cat("There is enough evidence at the 5% level of significance to suggest that the mean is greater than 150")
} else if(!p&&q ||p&&!q){
cat("There is not enough evidence at the 5% level of significance to suggest that the mean is greater than 150")
} else {
cat("There is enough evidence at the 5% level of significance to suggest that the mean is minus or igual than 150")
}
## There is enough evidence at the 5% level of significance to suggest that the mean is minus or igual than 150
#Hypothesis Testing (Wall) H0: μ <= 210 -> The mean is equal or less than 210 H1: μ > 210 -> The mean is greatter than 210
Wall <- sample(c(myDf$HP+myDf$Defense+myDf$Sp..Def), size = 300) #We add HP+Defense+SpDefense from every row in order to get the "Wall" and then we get 300 Pokemons at random
Wallmean <- mean(Wall)
Wall_StDes <- sd(Wall)
z <- (Wallmean - 210)/(Wall_StDes/sqrt(300))
cat("Using the Critical Values:")
## Using the Critical Values:
if(z>qnorm(1-0.05)){
cat("We reject the null hypothesis (H0 μ <= 210) at 5% level of significance as z (",z,") is > than (",qnorm(1-0.05),").")
q<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ <= 210) at 5% level of significance as z (",z,") is < than (",qnorm(1-0.05),").")
q<-FALSE
}
## We confirm the null hypothesis (H0 µ <= 210) at 5% level of significance as z ( 0.9543804 ) is < than ( 1.644854 ).
cat("Using the P-Value:")
## Using the P-Value:
if(pnorm(-abs(z))<(0.05)){
cat("We reject de null hypothesis (H0 μ <= 210) at 5% level of significance as P-Value (",pnorm(-abs(z)),") is < than (",0.05,").")
p<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ <= 210) at 5% level of significance as P-Value (",pnorm(-abs(z)),") is > than (",0.05,").")
p<-FALSE
}
## We confirm the null hypothesis (H0 µ <= 210) at 5% level of significance as P-Value ( 0.1699456 ) is > than ( 0.05 ).
if(p&&q){
cat("There is enough evidence at the 5% level of significance to suggest that the mean is greater than 210")
} else if(!p&&q ||p&&!q){
cat("There is not enough evidence at the 5% level of significance to suggest that the mean is greater than 210")
} else {
cat("There is enough evidence at the 5% level of significance to suggest that the mean is minus or igual than 210")
}
## There is enough evidence at the 5% level of significance to suggest that the mean is minus or igual than 210
#As before, we will use the same problem but now with 2 population. 1 of the populations contains 300 pokémons from generations 1 to 3 and the other one contains 300 ones from the 4 to the 6.
#Confidence Interval (Physical Sweeper)
ATable <- subset(myDf, myDf$Generation<=3)
BTable <- subset(myDf, myDf$Generation>3)
A_PhSweepers <- sample(c(ATable$Attack+ATable$Speed), size = 300)#We add Attack+Speed from every row in order to get the "Physical Sweeper" and then we get 300 Pokemons at random grom population A
B_PhSweepers <-sample(c(BTable$Attack+BTable$Speed), size = 300)#We add Attack+Speed from every row in order to get the "Physical Sweeper" and then we get 300 Pokemons at random grom population B
A_PhSmean <- mean(A_PhSweepers)
A_PhS_StDes <- sd(A_PhSweepers)
B_PhSmean <- mean(B_PhSweepers)
B_PhS_StDes <- sd(B_PhSweepers)
cat("Having in a count that the A_PhS_StDes(",A_PhS_StDes,"), B_PhS_StDes (",B_PhS_StDes,") and the Phs_StDes (",sd(myDf$Attack+myDf$Speed),") are not so far awey beetween them, we can say that they are iguals.")
## Having in a count that the A_PhS_StDes( 52.66067 ), B_PhS_StDes ( 47.27611 ) and the Phs_StDes ( 51.15841 ) are not so far awey beetween them, we can say that they are iguals.
error <- qt(0.95, df=300+300-2)*(sqrt((1/300)+(1/300)))
if((A_PhSmean - B_PhSmean) >=0 ){
AB_PhSmean <-(A_PhSmean - B_PhSmean)
plus <- (A_PhSmean - B_PhSmean) + error
minus <- (A_PhSmean - B_PhSmean) - error
}else{
AB_PhSmean <-(B_PhSmean - A_PhSmean)
plus <- ( B_PhSmean - A_PhSmean) + error
minus <- ( B_PhSmean - A_PhSmean) - error
}
if(AB_PhSmean<plus && minus<AB_PhSmean){
cat("We can be 95% Confident that the diference population mean (",AB_PhSmean,") lies in this interval{",plus,", ",minus,"}")
} else cat("We cann't be 95% Confident that the diference population mean (",AB_PhSmean,")lies in this interval{",plus,", ",minus,"}")
## We can be 95% Confident that the diference population mean ( 6.576667 ) lies in this interval{ 6.711177 , 6.442157 }
#Confidence Interval (Special Sweeper)
ATable <- subset(myDf, myDf$Generation<=3)
BTable <- subset(myDf, myDf$Generation>3)
A_SpSweepers <- sample(c(ATable$Sp..Atk+ATable$Speed), size = 300)#We add SPAttack+Speed from every row in order to get the "Special Sweeper" and then we get 300 Pokemons at random from population A
B_SpSweepers <-sample(c(BTable$Sp..Atk+BTable$Speed), size = 300)#We add SPAttack+Speed from every row in order to get the "Special Sweeper" and then we get 300 Pokemons at random from population B
A_SpSmean <- mean(A_SpSweepers)
A_SpS_StDes <- sd(A_SpSweepers)
B_SpSmean <- mean(B_SpSweepers)
B_SpS_StDes <- sd(B_SpSweepers)
cat("Having in a count that the A_SpS_StDes(",A_SpS_StDes,"), B_SpS_StDes (",B_SpS_StDes,") and the SpS_StDes (",sd(myDf$Sp..Atk+myDf$Speed),") are not so far awey beetween them, we can say that they are iguals.")
## Having in a count that the A_SpS_StDes( 54.26455 ), B_SpS_StDes ( 50.5973 ) and the SpS_StDes ( 53.05534 ) are not so far awey beetween them, we can say that they are iguals.
error <- qt(0.95, df=300+300-2)*(sqrt((1/300)+(1/300)))
if((A_SpSmean - B_SpSmean) >=0 ){
AB_SpSmean <-(A_SpSmean - B_SpSmean)
plus <- (A_SpSmean - B_SpSmean) + error
minus <- (A_SpSmean - B_SpSmean) - error
}else{
AB_SpSmean <-(B_SpSmean - A_SpSmean)
plus <- ( B_SpSmean - A_SpSmean) + error
minus <- ( B_SpSmean - A_SpSmean) - error
}
if(AB_SpSmean<plus && minus<AB_SpSmean){
cat("We can be 95% Confident that the diference population mean (",AB_SpSmean,") lies in this interval{",plus,", ",minus,"}")
} else cat("We cann't be 95% Confident that the diference population mean (",AB_SpSmean,")lies in this interval{",plus,", ",minus,"}")
## We can be 95% Confident that the diference population mean ( 1.63 ) lies in this interval{ 1.76451 , 1.49549 }
#Confidence Interval (Physical Tank)
ATable <- subset(myDf, myDf$Generation<=3)
BTable <- subset(myDf, myDf$Generation>3)
A_PhTank <- sample(c(ATable$Attack+ATable$Defense), size = 300)#We add Attack+Defense from every row in order to get the "Physical Tank" and then we get 300 Pokemons at random grom population A
B_PhTank <-sample(c(BTable$Attack+BTable$Defense), size = 300)#We add Attack+Defense from every row in order to get the "Physical Tank" and then we get 300 Pokemons at random grom population B
A_PhTmean <- mean(A_PhTank)
A_PhT_StDes <- sd(A_PhTank)
B_PhTmean <- mean(B_PhTank)
B_PhT_StDes <- sd(B_PhTank)
cat("Having in a count that the A_PhT_StDes(",A_PhT_StDes,"), B_PhT_StDes (",B_PhT_StDes,") and the PhT_StDes (",sd(myDf$Attack+myDf$Defense),") are not so far awey beetween them, we can say that they are iguals.")
## Having in a count that the A_PhT_StDes( 58.09939 ), B_PhT_StDes ( 48.2342 ) and the PhT_StDes ( 53.98066 ) are not so far awey beetween them, we can say that they are iguals.
error <- qt(0.95, df=300+300-2)*(sqrt((1/300)+(1/300)))
if((A_PhTmean - B_PhTmean) >=0 ){
AB_PhTmean <-(A_PhTmean - B_PhTmean)
plus <- (A_PhTmean - B_PhTmean) + error
minus <- (A_PhTmean - B_PhTmean) - error
}else{
AB_PhTmean <-(B_PhTmean - A_PhTmean)
plus <- ( B_PhTmean - A_PhTmean) + error
minus <- ( B_PhTmean - A_PhTmean) - error
}
if(AB_PhTmean<plus && minus<AB_PhTmean){
cat("We can be 95% Confident that the diference population mean (",AB_PhTmean,") lies in this interval{",plus,", ",minus,"}")
} else cat("We cann't be 95% Confident that the diference population mean (",AB_PhTmean,")lies in this interval{",plus,", ",minus,"}")
## We can be 95% Confident that the diference population mean ( 7.826667 ) lies in this interval{ 7.961177 , 7.692157 }
#Confidence Interval (Wall)
ATable <- subset(myDf, myDf$Generation<=3)
BTable <- subset(myDf, myDf$Generation>3)
A_Wall <- sample(c(ATable$HP+ATable$Defense+ATable$Sp..Def), size = 300)#We add HP+Defense+SpDefense from every row in order to get the "Wall" and then we get 300 Pokemons at random from population A
B_Wall <- sample(c(BTable$HP+BTable$Defense+BTable$Sp..Def), size = 300)#We add HP+Defense+SpDefense from every row in order to get the "Wall" and then we get 300 Pokemons at random from population B
A_Wmean <- mean(A_Wall)
A_W_StDes <- sd(A_Wall)
B_Wmean <- mean(B_Wall)
B_W_StDes <- sd(B_Wall)
cat("Having in a count that the A_W_StDes(",A_W_StDes,"), B_W_StDes (",B_W_StDes,") and the W_StDes (",sd(myDf$HP+myDf$Defense+myDf$Sp..Def),") are not so far awey beetween them, we can say that they are iguals.")
## Having in a count that the A_W_StDes( 70.43584 ), B_W_StDes ( 60.25467 ) and the W_StDes ( 64.8473 ) are not so far awey beetween them, we can say that they are iguals.
error <- qt(0.95, df=300+300-2)*(sqrt((1/300)+(1/300)))
if((A_Wmean - B_Wmean) >=0 ){
AB_Wmean <-(A_Wmean - B_Wmean)
plus <- (A_Wmean - B_Wmean) + error
minus <- (A_Wmean - B_Wmean) - error
}else{
AB_Wmean <-(B_Wmean - A_Wmean)
plus <- ( B_Wmean - A_Wmean) + error
minus <- ( B_Wmean - A_Wmean) - error
}
if(AB_Wmean<plus && minus<AB_Wmean){
cat("We can be 95% Confident that the diference population mean (",AB_Wmean,") lies in this interval{",plus,", ",minus,"}")
} else cat("We cann't be 95% Confident that the diference population mean (",AB_Wmean,")lies in this interval{",plus,", ",minus,"}")
## We can be 95% Confident that the diference population mean ( 3.603333 ) lies in this interval{ 3.737843 , 3.468823 }
#Keeping in mind the previous data (population A and population B with 300 random pokémons each), per cada un dels tipus de la formacio (Physical Sweeper, Special Sweeper, Physical Tank and Wall) for each type of the team composition (Physical Sweeper, Special Sweeper, Physical Tank and Wall), is there enough evidences at "5% level of significance" to conclude the following Hypothesis?
#Hypothesis Testing (Physical Sweeper)
#H0: μ1 = μ2 -> mean1 is equal to mean2
#H1: μ1 != μ2 -> mean1 is not equal to mean2
ATable <- subset(myDf, myDf$Generation<=3)
BTable <- subset(myDf, myDf$Generation>3)
A_PhSweepers <- sample(c(ATable$Attack+ATable$Speed), size = 300)#We add Attack+Speed from every row in order to get the "Physical Sweeper" and then we get 300 Pokemons at random grom population A
B_PhSweepers <-sample(c(BTable$Attack+BTable$Speed), size = 300)#We add Attack+Speed from every row in order to get the "Physical Sweeper" and then we get 300 Pokemons at random grom population B
A_PhSmean <- mean(A_PhSweepers)
A_PhS_StDes <- sd(A_PhSweepers)
B_PhSmean <- mean(B_PhSweepers)
B_PhS_StDes <- sd(B_PhSweepers)
if((A_PhSmean-B_PhSmean)>=0){
t <- (A_PhSmean-B_PhSmean)/(sqrt(((A_PhS_StDes^2)/300 + (B_PhS_StDes^2)/300)))
}else {
t <- (B_PhSmean-A_PhSmean)/(sqrt(((A_PhS_StDes^2)/300 + (B_PhS_StDes^2)/300)))
}
cat("Using the Critical Values:")
## Using the Critical Values:
if(t>qt(0.95, df=300+300-2)){
cat("We reject the null hypothesis (H0 μ1 = μ2) at 5% level of significance as t (",t,") is > than (",qt(0.95, df=300+300-2),").")
q<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ1 = μ2) at 5% level of significance as t (",t,") is < than (",qnorm(1-0.05),").")
q<-FALSE
}
## We confirm the null hypothesis (H0 µ1 = µ2) at 5% level of significance as t ( 0.6450564 ) is < than ( 1.644854 ).
cat("Using the P-Value:")
## Using the P-Value:
if(pt(-abs(t), df=300+300-2)<(0.05)){
cat("We reject de null hypothesis (H0 μ1 = μ2) at 5% level of significance as P-Value (",pt(-abs(t), df=300+300-2),") is < than (",0.05,").")
p<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ1 = μ2) at 5% level of significance as P-Value (",pt(-abs(t), df=300+300-2),") is > than (",0.05,").")
p<-FALSE
}
## We confirm the null hypothesis (H0 µ1 = µ2) at 5% level of significance as P-Value ( 0.259569 ) is > than ( 0.05 ).
if(p&&q){
cat("There is enough evidence at the 5% level of significance to suggest that μ1 = μ2")
} else if(!p&&q ||p&&!q){
cat("There is not enough evidence at the 5% level of significance to suggest that the μ1 = μ2")
} else {
cat("There is enough evidence at the 5% level of significance to suggest that the μ1 != μ2")
}
## There is enough evidence at the 5% level of significance to suggest that the µ1 != µ2
#Hypotesis Testing (Special Sweeper) #H0: μ1 = μ2 -> mean1 is equal to mean2
#H1: μ1 != μ2 -> mean1 is not equal to mean2
ATable <- subset(myDf, myDf$Generation<=3)
BTable <- subset(myDf, myDf$Generation>3)
A_SpSweepers <- sample(c(ATable$Sp..Atk+ATable$Speed), size = 300)#We add SPAttack+speed from every row in order to get the "Special Sweeper" and then we get 300 Pokemons at random from population A
B_SpSweepers <-sample(c(BTable$Sp..Atk+BTable$Speed), size = 300)#We add SPAttack+speed from every row in order to get the "Special Sweeper" and then we get 300 Pokemons at random from population B
A_SpSmean <- mean(A_SpSweepers)
A_SpS_StDes <- sd(A_SpSweepers)
B_SpSmean <- mean(B_SpSweepers)
B_SpS_StDes <- sd(B_SpSweepers)
if((A_SpSmean-B_SpSmean)>=0){
t <- (A_SpSmean-B_SpSmean)/(sqrt(((A_SpS_StDes^2)/300 + (B_SpS_StDes^2)/300)))
}else {
t <- (B_SpSmean-A_SpSmean)/(sqrt(((A_SpS_StDes^2)/300 + (B_SpS_StDes^2)/300)))
}
cat("Using the Critical Values:")
## Using the Critical Values:
if(t>qt(0.95, df=300+300-2)){
cat("We reject the null hypothesis (H0 μ1 = μ2) at 5% level of significance as t (",t,") is > than (",qt(0.95, df=300+300-2),").")
q<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ1 = μ2) at 5% level of significance as t (",t,") is < than (",qt(0.95, df=300+300-2),").")
q<-FALSE
}
## We confirm the null hypothesis (H0 µ1 = µ2) at 5% level of significance as t ( 0.1705851 ) is < than ( 1.647406 ).
cat("Using the P-Value:")
## Using the P-Value:
if(pt(-abs(t), df=300+300-2)<(0.05)){
cat("We reject de null hypothesis (H0 μ1 = μ2) at 5% level of significance as P-Value (",pt(-abs(t), df=300+300-2),") is < than (",0.05,").")
p<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ1 = μ2) at 5% level of significance as P-Value (",pt(-abs(t), df=300+300-2),") is > than (",0.05,").")
p<-FALSE
}
## We confirm the null hypothesis (H0 µ1 = µ2) at 5% level of significance as P-Value ( 0.4323038 ) is > than ( 0.05 ).
if(p&&q){
cat("There is enough evidence at the 5% level of significance to suggest that μ1 = μ2")
} else if(!p&&q ||p&&!q){
cat("There is not enough evidence at the 5% level of significance to suggest that the μ1 = μ2")
} else {
cat("There is enough evidence at the 5% level of significance to suggest that the μ1 != μ2")
}
## There is enough evidence at the 5% level of significance to suggest that the µ1 != µ2
#Hypothesis Testing (Physical Tank) #H0: μ1 = μ2 -> mean1 is equal to mean2
#H1: μ1 != μ2 -> mean1 is not equal to mean2
ATable <- subset(myDf, myDf$Generation<=3)
BTable <- subset(myDf, myDf$Generation>3)
A_PhTank <- sample(c(ATable$Attack+ATable$Defense), size = 300)#We add Attack+Defense from every row in order to get the "Physical Tank" and then we get 300 Pokemons at random grom population A
B_PhTank <-sample(c(BTable$Attack+BTable$Defense), size = 300)#We add Attack+Defense from every row in order to get the "Physical Tank" and then we get 300 Pokemons at random grom population B
A_PhTmean <- mean(A_PhTank)
A_PhT_StDes <- sd(A_PhTank)
B_PhTmean <- mean(B_PhTank)
B_PhT_StDes <- sd(B_PhTank)
if((A_PhTmean-B_PhTmean)>=0){
t <- (A_PhTmean-B_PhTmean)/(sqrt(((A_PhT_StDes^2)/300 + (B_PhT_StDes^2)/300)))
}else {
t <- (B_PhTmean-A_PhTmean)/(sqrt(((A_PhT_StDes^2)/300 + (B_PhT_StDes^2)/300)))
}
cat("Using the Critical Values:")
## Using the Critical Values:
if(t>qt(0.95, df=300+300-2)){
cat("We reject the null hypothesis (H0 μ1 = μ2) at 5% level of significance as t (",t,") is > than (",qt(0.95, df=300+300-2),").")
q<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ1 = μ2) at 5% level of significance as t (",t,") is < than (",qt(0.95, df=300+300-2),").")
q<-FALSE
}
## We confirm the null hypothesis (H0 µ1 = µ2) at 5% level of significance as t ( 1.565283 ) is < than ( 1.647406 ).
cat("Using the P-Value:")
## Using the P-Value:
if(pt(-abs(t), df=300+300-2)<(0.05)){
cat("We reject de null hypothesis (H0 μ1 = μ2) at 5% level of significance as P-Value (",pt(-abs(t), df=300+300-2),") is < than (",0.05,").")
p<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ1 = μ2) at 5% level of significance as P-Value (",pt(-abs(t), df=300+300-2),") is > than (",0.05,").")
p<-FALSE
}
## We confirm the null hypothesis (H0 µ1 = µ2) at 5% level of significance as P-Value ( 0.05902273 ) is > than ( 0.05 ).
if(p&&q){
cat("There is enough evidence at the 5% level of significance to suggest that μ1 = μ2")
} else if(!p&&q ||p&&!q){
cat("There is not enough evidence at the 5% level of significance to suggest that the μ1 = μ2")
} else {
cat("There is enough evidence at the 5% level of significance to suggest that the μ1 != μ2")
}
## There is enough evidence at the 5% level of significance to suggest that the µ1 != µ2
#Hypothesis Testing (Wall) #H0: μ1 = μ2 -> mean1 is equal to mean2
#H1: μ1 != μ2 -> mean1 is not equal to mean2
ATable <- subset(myDf, myDf$Generation<=3)
BTable <- subset(myDf, myDf$Generation>3)
A_Wall <- sample(c(ATable$HP+ATable$Defense+ATable$Sp..Def), size = 300)#We add HP+Defense+SpDefense from every row in order to get the "Wall" and then we get 300 Pokemons at random from population A
B_Wall <- sample(c(BTable$HP+BTable$Defense+BTable$Sp..Def), size = 300)#We add HP+Defense+SpDefense from every row in order to get the "Wall" and then we get 300 Pokemons at random from population B
A_Wmean <- mean(A_Wall)
A_W_StDes <- sd(A_Wall)
B_Wmean <- mean(B_Wall)
B_W_StDes <- sd(B_Wall)
if((A_Wmean-B_Wmean)>=0){
t <- (A_Wmean-B_Wmean)/(sqrt(((A_W_StDes^2)/300 + (B_W_StDes^2)/300)))
}else {
t <- (B_Wmean-A_Wmean)/(sqrt(((A_W_StDes^2)/300 + (B_W_StDes^2)/300)))
}
cat("Using the Critical Values:")
## Using the Critical Values:
if(t>qt(0.95, df=300+300-2)){
cat("We reject the null hypothesis (H0 μ1 = μ2) at 5% level of significance as t (",t,") is > than (",qt(0.95, df=300+300-2),").")
q<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ1 = μ2) at 5% level of significance as t (",t,") is < than (",qt(0.95, df=300+300-2),").")
q<-FALSE
}
## We reject the null hypothesis (H0 µ1 = µ2) at 5% level of significance as t ( 2.322765 ) is > than ( 1.647406 ).
cat("Using the P-Value:")
## Using the P-Value:
if(pt(-abs(t), df=300+300-2)<(0.05)){
cat("We reject de null hypothesis (H0 μ1 = μ2) at 5% level of significance as P-Value (",pt(-abs(t), df=300+300-2),") is < than (",0.05,").")
p<-TRUE
} else {
cat("We confirm the null hypothesis (H0 μ1 = μ2) at 5% level of significance as P-Value (",pt(-abs(t), df=300+300-2),") is > than (",0.05,").")
p<-FALSE
}
## We reject de null hypothesis (H0 µ1 = µ2) at 5% level of significance as P-Value ( 0.01026322 ) is < than ( 0.05 ).
if(p&&q){
cat("There is enough evidence at the 5% level of significance to suggest that μ1 = μ2")
} else if(!p&&q ||p&&!q){
cat("There is not enough evidence at the 5% level of significance to suggest that the μ1 = μ2")
} else {
cat("There is enough evidence at the 5% level of significance to suggest that the μ1 != μ2")
}
## There is enough evidence at the 5% level of significance to suggest that µ1 = µ2
We have been able to observe that the pokemons that have better stats are the legendary ones and the megaevolutions, these are usually the most optimal pokemons for all the game positions. These pokemons are usually dragon / flying types. Seeing this we have detected three possible competitive teams: Create a team fully developed by legendary pokemons, these have the best stats but being somewhat predictable can find the opposite. Create a team of legendary against, although with less stats this team will go well against legendary. Create a team against the legendary against, this team will go well against the legendary against but will be weak against the legendary for having these better stats. This study has ended up being a mini guide for players who want to reach a higher level in the game in a simple way to see that pokemons are the best according to their playing position.
We can also conclude that the worst combination that can be chosen is that of plant type pokemons.