##Empirical method from lab 3
Pokemon <- read.csv("Pokemon.csv")
Pokemonie <- (Pokemon$Generation>5)
Gen_5<-sum(Pokemonie==TRUE)
Probba<-Gen_5/800
Probba #The ratio of Pokemon's generation greater than 5th to total Pokemon in data set
## [1] 0.1025
##By sampling with replacement from lab 3
outcomes <- c("1", "2","3","4","5","6")
a<-sample(outcomes, size = 600, replace = TRUE)
Gen__replace<-sum(a==6)
probbii<- Gen__replace/800
probbii #Probablity of picking pokemon which has generation greter than 5th and put them back into our data
## [1] 0.135
##Using the theoretical normal distribution (see Lab 4) – Produce a histogram and Normal Q-Q plot, and assess the normality of the numeric variable.
five_gen_up <- subset(Pokemon, Generation>5 )
qqnorm(five_gen_up$Attack,main="Generation for Pokemon", ylab="Attack Points")

five_gen_up$Attack
## [1] 61 78 107 45 59 69 56 63 95 36 56 50 73 81 35 22 52 50 68
## [20] 38 45 65 65 100 82 124 80 48 48 48 80 110 150 50 52 72 48 80
## [39] 54 92 52 105 60 75 53 73 38 55 89 121 59 77 65 92 58 50 50
## [58] 75 100 80 70 110 66 66 66 66 90 85 95 100 69 117 30 70 131 131
## [77] 100 100 160 110 160 110
hist(five_gen_up$Attack, breaks=10, main="Generation for Pokemon", xlab="Attack Points")

# Empirical method is most trustworthy becasue if know the exact number of population of pokemon that has generation greter than 5th in our data set and if we also know the total population for ourdata set. So the probality will be the same everytime we do samplespace.