##Empirical method from lab 3
Pokemon <- read.csv("Pokemon.csv")
Pokemonie <- (Pokemon$Generation>5)
Gen_5<-sum(Pokemonie==TRUE)
Probba<-Gen_5/800
Probba #The ratio of Pokemon's generation greater than 5th to total Pokemon in data set
## [1] 0.1025
##By sampling with replacement from lab 3

outcomes <- c("1", "2","3","4","5","6")
a<-sample(outcomes, size = 600, replace = TRUE)
Gen__replace<-sum(a==6)
probbii<- Gen__replace/800
probbii #Probablity of picking pokemon which has generation greter than 5th and put them back into our data
## [1] 0.135
##Using the theoretical normal distribution (see Lab 4) – Produce a histogram and Normal Q-Q plot, and assess the normality of the numeric variable.

five_gen_up <- subset(Pokemon, Generation>5 )
qqnorm(five_gen_up$Attack,main="Generation for Pokemon", ylab="Attack Points")

five_gen_up$Attack
##  [1]  61  78 107  45  59  69  56  63  95  36  56  50  73  81  35  22  52  50  68
## [20]  38  45  65  65 100  82 124  80  48  48  48  80 110 150  50  52  72  48  80
## [39]  54  92  52 105  60  75  53  73  38  55  89 121  59  77  65  92  58  50  50
## [58]  75 100  80  70 110  66  66  66  66  90  85  95 100  69 117  30  70 131 131
## [77] 100 100 160 110 160 110
hist(five_gen_up$Attack, breaks=10, main="Generation for Pokemon", xlab="Attack Points")

# Empirical method is most trustworthy becasue if know the exact number of population of pokemon that has generation greter than 5th in our data set and if we also know the total population for ourdata set. So the probality will be the same everytime we do samplespace.