mydata <- read.csv("./POKEMON.csv")
str(mydata)
## 'data.frame':    721 obs. of  23 variables:
##  $ Number          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Name            : chr  "Bulbasaur" "Ivysaur" "Venusaur" "Charmander" ...
##  $ Type_1          : chr  "Grass" "Grass" "Grass" "Fire" ...
##  $ Type_2          : chr  "Poison" "Poison" "Poison" "" ...
##  $ Total           : int  318 405 525 309 405 534 314 405 530 195 ...
##  $ HP              : int  45 60 80 39 58 78 44 59 79 45 ...
##  $ Attack          : int  49 62 82 52 64 84 48 63 83 30 ...
##  $ Defense         : int  49 63 83 43 58 78 65 80 100 35 ...
##  $ Sp_Atk          : int  65 80 100 60 80 109 50 65 85 20 ...
##  $ Sp_Def          : int  65 80 100 50 65 85 64 80 105 20 ...
##  $ Speed           : int  45 60 80 65 80 100 43 58 78 45 ...
##  $ Generation      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ isLegendary     : chr  "False" "False" "False" "False" ...
##  $ Color           : chr  "Green" "Green" "Green" "Red" ...
##  $ hasGender       : chr  "True" "True" "True" "True" ...
##  $ Pr_Male         : num  0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.5 ...
##  $ Egg_Group_1     : chr  "Monster" "Monster" "Monster" "Monster" ...
##  $ Egg_Group_2     : chr  "Grass" "Grass" "Grass" "Dragon" ...
##  $ hasMegaEvolution: chr  "False" "False" "True" "False" ...
##  $ Height_m        : num  0.71 0.99 2.01 0.61 1.09 1.7 0.51 0.99 1.6 0.3 ...
##  $ Weight_kg       : num  6.9 13 100 8.5 19 90.5 9 22.5 85.5 2.9 ...
##  $ Catch_Rate      : int  45 45 45 45 45 45 45 45 45 255 ...
##  $ Body_Style      : chr  "quadruped" "quadruped" "quadruped" "bipedal_tailed" ...
head(mydata)
##   Number       Name Type_1 Type_2 Total HP Attack Defense Sp_Atk Sp_Def Speed
## 1      1  Bulbasaur  Grass Poison   318 45     49      49     65     65    45
## 2      2    Ivysaur  Grass Poison   405 60     62      63     80     80    60
## 3      3   Venusaur  Grass Poison   525 80     82      83    100    100    80
## 4      4 Charmander   Fire          309 39     52      43     60     50    65
## 5      5 Charmeleon   Fire          405 58     64      58     80     65    80
## 6      6  Charizard   Fire Flying   534 78     84      78    109     85   100
##   Generation isLegendary Color hasGender Pr_Male Egg_Group_1 Egg_Group_2
## 1          1       False Green      True   0.875     Monster       Grass
## 2          1       False Green      True   0.875     Monster       Grass
## 3          1       False Green      True   0.875     Monster       Grass
## 4          1       False   Red      True   0.875     Monster      Dragon
## 5          1       False   Red      True   0.875     Monster      Dragon
## 6          1       False   Red      True   0.875     Monster      Dragon
##   hasMegaEvolution Height_m Weight_kg Catch_Rate     Body_Style
## 1            False     0.71       6.9         45      quadruped
## 2            False     0.99      13.0         45      quadruped
## 3             True     2.01     100.0         45      quadruped
## 4            False     0.61       8.5         45 bipedal_tailed
## 5            False     1.09      19.0         45 bipedal_tailed
## 6             True     1.70      90.5         45 bipedal_tailed

The dataset that I have imported has 721 units of observation, the categorical variables being name and type1 and the numerical variables being total value, base health points, base special attack, base special defence, speed and weight.

The dataset was found on the KAGGLE website, with the author being Alopez247. Retrieved January 5th 2023 from: https://www.kaggle.com/datasets/alopez247/pokemon

The main goal of the analysis of this data is to see which type of Pokemon has on average the highest stats.

I have replaced the existing value of 150 in the 5th row and 7th column with the value of 200.

mydata [5,7]=200
head(mydata)
##   Number       Name Type_1 Type_2 Total HP Attack Defense Sp_Atk Sp_Def Speed
## 1      1  Bulbasaur  Grass Poison   318 45     49      49     65     65    45
## 2      2    Ivysaur  Grass Poison   405 60     62      63     80     80    60
## 3      3   Venusaur  Grass Poison   525 80     82      83    100    100    80
## 4      4 Charmander   Fire          309 39     52      43     60     50    65
## 5      5 Charmeleon   Fire          405 58    200      58     80     65    80
## 6      6  Charizard   Fire Flying   534 78     84      78    109     85   100
##   Generation isLegendary Color hasGender Pr_Male Egg_Group_1 Egg_Group_2
## 1          1       False Green      True   0.875     Monster       Grass
## 2          1       False Green      True   0.875     Monster       Grass
## 3          1       False Green      True   0.875     Monster       Grass
## 4          1       False   Red      True   0.875     Monster      Dragon
## 5          1       False   Red      True   0.875     Monster      Dragon
## 6          1       False   Red      True   0.875     Monster      Dragon
##   hasMegaEvolution Height_m Weight_kg Catch_Rate     Body_Style
## 1            False     0.71       6.9         45      quadruped
## 2            False     0.99      13.0         45      quadruped
## 3             True     2.01     100.0         45      quadruped
## 4            False     0.61       8.5         45 bipedal_tailed
## 5            False     1.09      19.0         45 bipedal_tailed
## 6             True     1.70      90.5         45 bipedal_tailed

I have expressed only the 12th row.

mydata1 <- mydata[12, ]
head(mydata1)
##    Number       Name Type_1 Type_2 Total HP Attack Defense Sp_Atk Sp_Def Speed
## 12     12 Butterfree    Bug Flying   395 60     45      50     90     80    70
##    Generation isLegendary Color hasGender Pr_Male Egg_Group_1 Egg_Group_2
## 12          1       False White      True     0.5         Bug            
##    hasMegaEvolution Height_m Weight_kg Catch_Rate Body_Style
## 12            False     1.09        32         45 four_wings

I have renamed the 3rd and 4th column to TYPE1 and TYPE2.

colnames(mydata)[3]="TYPE1"
colnames(mydata)[4]="TYPE2"
head(mydata)
##   Number       Name TYPE1  TYPE2 Total HP Attack Defense Sp_Atk Sp_Def Speed
## 1      1  Bulbasaur Grass Poison   318 45     49      49     65     65    45
## 2      2    Ivysaur Grass Poison   405 60     62      63     80     80    60
## 3      3   Venusaur Grass Poison   525 80     82      83    100    100    80
## 4      4 Charmander  Fire          309 39     52      43     60     50    65
## 5      5 Charmeleon  Fire          405 58    200      58     80     65    80
## 6      6  Charizard  Fire Flying   534 78     84      78    109     85   100
##   Generation isLegendary Color hasGender Pr_Male Egg_Group_1 Egg_Group_2
## 1          1       False Green      True   0.875     Monster       Grass
## 2          1       False Green      True   0.875     Monster       Grass
## 3          1       False Green      True   0.875     Monster       Grass
## 4          1       False   Red      True   0.875     Monster      Dragon
## 5          1       False   Red      True   0.875     Monster      Dragon
## 6          1       False   Red      True   0.875     Monster      Dragon
##   hasMegaEvolution Height_m Weight_kg Catch_Rate     Body_Style
## 1            False     0.71       6.9         45      quadruped
## 2            False     0.99      13.0         45      quadruped
## 3             True     2.01     100.0         45      quadruped
## 4            False     0.61       8.5         45 bipedal_tailed
## 5            False     1.09      19.0         45 bipedal_tailed
## 6             True     1.70      90.5         45 bipedal_tailed

I have put the total values of the pokemons in order from smallest to largest.

mydata2 <- mydata[order(mydata$Total) , ]
head(mydata2)
##     Number      Name  TYPE1  TYPE2 Total HP Attack Defense Sp_Atk Sp_Def Speed
## 191    191   Sunkern  Grass          180 30     30      30     30     30    30
## 298    298   Azurill Normal  Fairy   190 50     20      40     20     40    20
## 401    401 Kricketot    Bug          194 37     25      41     25     41    25
## 10      10  Caterpie    Bug          195 45     30      35     20     20    45
## 13      13    Weedle    Bug Poison   195 40     35      30     20     20    50
## 265    265   Wurmple    Bug          195 45     45      35     20     30    20
##     Generation isLegendary  Color hasGender Pr_Male  Egg_Group_1 Egg_Group_2
## 191          2       False Yellow      True    0.50        Grass            
## 298          3       False   Blue      True    0.25 Undiscovered            
## 401          4       False    Red      True    0.50          Bug            
## 10           1       False  Green      True    0.50          Bug            
## 13           1       False  Brown      True    0.50          Bug            
## 265          3       False    Red      True    0.50          Bug            
##     hasMegaEvolution Height_m Weight_kg Catch_Rate       Body_Style
## 191            False      0.3       1.8        235        head_only
## 298            False      0.2       2.0        150        head_legs
## 401            False      0.3       2.2        255 bipedal_tailless
## 10             False      0.3       2.9        255        insectoid
## 13             False      0.3       3.2        255  serpentine_body
## 265            False      0.3       3.6        255  serpentine_body

I have put the base health points of the pokemons in order from smallest to largest.

mydata3 <- mydata[order(mydata$HP) , ]
head(mydata3)
##     Number     Name    TYPE1 TYPE2 Total HP Attack Defense Sp_Atk Sp_Def Speed
## 292    292 Shedinja      Bug Ghost   236  1     90      45     30     30    40
## 50      50  Diglett   Ground         265 10     55      25     35     45    95
## 129    129 Magikarp    Water         200 20     10      55     15     20    80
## 172    172    Pichu Electric         205 20     40      15     35     35    60
## 213    213  Shuckle      Bug  Rock   505 20     10     230     10    230     5
## 349    349   Feebas    Water         200 20     15      20     10     55    80
##     Generation isLegendary  Color hasGender Pr_Male  Egg_Group_1 Egg_Group_2
## 292          3       False  Brown     False      NA      Mineral            
## 50           1       False  Brown      True     0.5        Field            
## 129          1       False    Red      True     0.5      Water_2      Dragon
## 172          2       False Yellow      True     0.5 Undiscovered            
## 213          2       False Yellow      True     0.5          Bug            
## 349          3       False  Brown      True     0.5      Water_1      Dragon
##     hasMegaEvolution Height_m Weight_kg Catch_Rate Body_Style
## 292            False     0.79       1.2         45  head_base
## 50             False     0.20       0.8        255  head_base
## 129            False     0.89      10.0        255  with_fins
## 172            False     0.30       2.0        190  quadruped
## 213            False     0.61      20.5        190  insectoid
## 349            False     0.61       7.4        255  with_fins

I have removed the following columns.

mydata <- mydata[c(-1,-4,-9,-10,-12,-13,-14,-15,-16,-17,-18,-19,-23)]

head(mydata)
##         Name TYPE1 Total HP Attack Defense Speed Height_m Weight_kg Catch_Rate
## 1  Bulbasaur Grass   318 45     49      49    45     0.71       6.9         45
## 2    Ivysaur Grass   405 60     62      63    60     0.99      13.0         45
## 3   Venusaur Grass   525 80     82      83    80     2.01     100.0         45
## 4 Charmander  Fire   309 39     52      43    65     0.61       8.5         45
## 5 Charmeleon  Fire   405 58    200      58    80     1.09      19.0         45
## 6  Charizard  Fire   534 78     84      78   100     1.70      90.5         45

VARIABLES

Name: Name of unit of observation TYPE1: Type of pokemon Total: Total value HP: Base health points Attack: Base special attack Defense: Base special defense Speed: Maximal speed Height_m: Height in meters Weight_kg: Weight in kilograms Catch_Rate Catch rate of pokemon

round(describe(mydata[c(-1,-2)]),1)
##            vars   n  mean    sd median trimmed   mad   min   max range skew
## Total         1 721 417.9 109.7    424   416.7 127.5 180.0 720.0 540.0  0.1
## HP            2 721  68.4  25.8     65    66.3  22.2   1.0 255.0 254.0  1.7
## Attack        3 721  75.2  29.4     75    74.1  29.7   5.0 200.0 195.0  0.4
## Defense       4 721  70.8  29.3     65    68.1  25.2   5.0 230.0 225.0  1.1
## Speed         5 721  65.7  27.3     65    64.9  29.7   5.0 160.0 155.0  0.3
## Height_m      6 721   1.1   1.0      1     1.0   0.6   0.1  14.5  14.4  5.5
## Weight_kg     7 721  56.8  89.1     28    36.9  31.9   0.1 950.0 949.9  4.0
## Catch_Rate    8 721 100.2  76.6     65    92.5  37.1   3.0 255.0 252.0  0.8
##            kurtosis  se
## Total          -0.7 4.1
## HP              7.7 1.0
## Attack          0.0 1.1
## Defense         2.4 1.1
## Speed          -0.5 1.0
## Height_m       49.6 0.0
## Weight_kg      23.6 3.3
## Catch_Rate     -0.7 2.9

MEAN

RANGE

ggplot(mydata, aes(y=Total, fill=TYPE1)) +
  geom_boxplot()

The five – number summary is the minimum, first quartile, median, third quartile, and the maximum. The horizontal line in each boxplot represents the median value. In the ggplot we can see the that the biggest range is for the normal type of pokemons, and the highest median being of the Flying type of pokemons.

ATTACK <- ggplot(mydata, aes(x=Attack)) +
  geom_histogram(position=position_identity(), binwidth=10) +
  ggtitle("Attack Rating") +
  xlab("Rating")

DEFENSE <- ggplot(mydata, aes(x=Defense)) +
  geom_histogram(position=position_identity(), binwidth=10) +
  ggtitle("Defense Rating") +
  xlab("Rating")

SPEED <- ggplot(mydata, aes(x=Speed)) +
  geom_histogram(position=position_identity(), binwidth=10) +
  ggtitle("Speed Rating") +
  xlab("Rating")

TOTAL <- ggplot(mydata, aes(x=Total)) +
  geom_histogram(position=position_identity(), binwidth=10) +
  ggtitle("Total Rating") +
  xlab("Rating")

ggarrange(ATTACK, DEFENSE, SPEED, TOTAL,
          ncol = 2, nrow = 2)

The attack rating histogram has a pretty normal distribution, the defense rating on the other hand is slightly skewed to the right and so is the speed rating.

scatterplotMatrix(mydata[,c(6,7,9)],
                  smooth = FALSE)

Looking at the scatterplots we can see the correlation between defense and weight, whereas there is no correlation between speed and weight.