INPUT DATA

options(repos = c(CRAN = "https://cloud.r-project.org"))
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
install.packages("lubridate")
## Installing package into 'C:/Users/Hafizh Fadhlah/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'lubridate' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'lubridate'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\Hafizh
## Fadhlah\AppData\Local\R\win-library\4.4\00LOCK\lubridate\libs\x64\lubridate.dll
## to C:\Users\Hafizh
## Fadhlah\AppData\Local\R\win-library\4.4\lubridate\libs\x64\lubridate.dll:
## Permission denied
## Warning: restored 'lubridate'
## 
## The downloaded binary packages are in
##  C:\Users\Hafizh Fadhlah\AppData\Local\Temp\RtmpIbBldh\downloaded_packages
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.4.3
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data <- read_xlsx("C:\\Users\\Hafizh Fadhlah\\OneDrive\\Documents\\Data AED Gol Pildun.xlsx")
data
## # A tibble: 2,720 × 27
##    key_id goal_id tournament_id tournament_name   match_id match_name match_date
##     <dbl> <chr>   <chr>         <chr>             <chr>    <chr>      <chr>     
##  1      1 G-0001  WC-1930       1930 FIFA World … M-1930-… France v … 7/13/1930 
##  2      2 G-0002  WC-1930       1930 FIFA World … M-1930-… France v … 7/13/1930 
##  3      3 G-0003  WC-1930       1930 FIFA World … M-1930-… France v … 7/13/1930 
##  4      4 G-0004  WC-1930       1930 FIFA World … M-1930-… France v … 7/13/1930 
##  5      5 G-0005  WC-1930       1930 FIFA World … M-1930-… France v … 7/13/1930 
##  6      6 G-0006  WC-1930       1930 FIFA World … M-1930-… United St… 7/13/1930 
##  7      7 G-0007  WC-1930       1930 FIFA World … M-1930-… United St… 7/13/1930 
##  8      8 G-0008  WC-1930       1930 FIFA World … M-1930-… United St… 7/13/1930 
##  9      9 G-0009  WC-1930       1930 FIFA World … M-1930-… Yugoslavi… 7/14/1930 
## 10     10 G-0010  WC-1930       1930 FIFA World … M-1930-… Yugoslavi… 7/14/1930 
## # ℹ 2,710 more rows
## # ℹ 20 more variables: stage_name <chr>, group_name <chr>, team_id <chr>,
## #   team_name <chr>, team_code <chr>, home_team <dbl>, away_team <dbl>,
## #   player_id <chr>, family_name <chr>, given_name <chr>, shirt_number <dbl>,
## #   player_team_id <chr>, player_team_name <chr>, player_team_code <chr>,
## #   minute_label <chr>, minute_regulation <dbl>, minute_stoppage <dbl>,
## #   match_period <chr>, own_goal <dbl>, penalty <dbl>

STRUKTUR DATA

str(data)
## tibble [2,720 × 27] (S3: tbl_df/tbl/data.frame)
##  $ key_id           : num [1:2720] 1 2 3 4 5 6 7 8 9 10 ...
##  $ goal_id          : chr [1:2720] "G-0001" "G-0002" "G-0003" "G-0004" ...
##  $ tournament_id    : chr [1:2720] "WC-1930" "WC-1930" "WC-1930" "WC-1930" ...
##  $ tournament_name  : chr [1:2720] "1930 FIFA World Cup" "1930 FIFA World Cup" "1930 FIFA World Cup" "1930 FIFA World Cup" ...
##  $ match_id         : chr [1:2720] "M-1930-01" "M-1930-01" "M-1930-01" "M-1930-01" ...
##  $ match_name       : chr [1:2720] "France v Mexico" "France v Mexico" "France v Mexico" "France v Mexico" ...
##  $ match_date       : chr [1:2720] "7/13/1930" "7/13/1930" "7/13/1930" "7/13/1930" ...
##  $ stage_name       : chr [1:2720] "group stage" "group stage" "group stage" "group stage" ...
##  $ group_name       : chr [1:2720] "Group 1" "Group 1" "Group 1" "Group 1" ...
##  $ team_id          : chr [1:2720] "T-28" "T-28" "T-28" "T-44" ...
##  $ team_name        : chr [1:2720] "France" "France" "France" "Mexico" ...
##  $ team_code        : chr [1:2720] "FRA" "FRA" "FRA" "MEX" ...
##  $ home_team        : num [1:2720] 1 1 1 0 1 1 1 1 1 1 ...
##  $ away_team        : num [1:2720] 0 0 0 1 0 0 0 0 0 0 ...
##  $ player_id        : chr [1:2720] "P-08962" "P-00280" "P-09030" "P-00780" ...
##  $ family_name      : chr [1:2720] "Laurent" "Langiller" "Maschinot" "Carreño" ...
##  $ given_name       : chr [1:2720] "Lucien" "Marcel" "André" "Juan" ...
##  $ shirt_number     : num [1:2720] 0 0 0 0 0 0 0 0 0 0 ...
##  $ player_team_id   : chr [1:2720] "T-28" "T-28" "T-28" "T-44" ...
##  $ player_team_name : chr [1:2720] "France" "France" "France" "Mexico" ...
##  $ player_team_code : chr [1:2720] "FRA" "FRA" "FRA" "MEX" ...
##  $ minute_label     : chr [1:2720] "19'" "40'" "43'" "70'" ...
##  $ minute_regulation: num [1:2720] 19 40 43 70 87 23 45 69 21 30 ...
##  $ minute_stoppage  : num [1:2720] 0 0 0 0 0 0 0 0 0 0 ...
##  $ match_period     : chr [1:2720] "first half" "first half" "first half" "second half" ...
##  $ own_goal         : num [1:2720] 0 0 0 0 0 0 0 0 0 0 ...
##  $ penalty          : num [1:2720] 0 0 0 0 0 0 0 0 0 0 ...

NAMA KOLOM

colnames(data)
##  [1] "key_id"            "goal_id"           "tournament_id"    
##  [4] "tournament_name"   "match_id"          "match_name"       
##  [7] "match_date"        "stage_name"        "group_name"       
## [10] "team_id"           "team_name"         "team_code"        
## [13] "home_team"         "away_team"         "player_id"        
## [16] "family_name"       "given_name"        "shirt_number"     
## [19] "player_team_id"    "player_team_name"  "player_team_code" 
## [22] "minute_label"      "minute_regulation" "minute_stoppage"  
## [25] "match_period"      "own_goal"          "penalty"

TOTAL GOL DI PIALA DUNIA

total_goals <- data %>%
  summarise(total_goals = n())

print(total_goals)
## # A tibble: 1 × 1
##   total_goals
##         <int>
## 1        2720

Insight: 1. 2720 gol telah dicetak di piala dunia

VISUALISASI TOTAL GOL PER EDISI PIALA DUNIA

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
data %>%
  group_by(tournament_name) %>%
  summarise(total_goals = n()) %>%
  arrange(desc(total_goals)) %>%
  ggplot(aes(x = reorder(tournament_name, total_goals), y = total_goals)) +
  geom_col(fill = "blue") +
  coord_flip() +
  labs(title = "Total Gol per Edisi Piala Dunia", x = "Turnamen", y = "Total Gol")

Insight: 1. Dari grafik diatas, dapat dilihat bahwa Piala Dunia 2022 adalah turnamen dengan jumlah gol paling banyak dan Piala Dunia 1930 adalah turnamen dengan jumlah gol paling sedikit 2. Terjadi kenaikan jumlah gol dari edisi ke edisi di piala dunia

10 NEGARA DENGAN JUMLAH GOL TERBANYAK DI PIALA DUNIA

data %>%
  group_by(team_name) %>%
  summarise(total_goals = n()) %>%
  arrange(desc(total_goals)) %>%
  head(10) %>%
  ggplot(aes(x = reorder(team_name, total_goals), y = total_goals)) +
  geom_col(fill = "green") +
  coord_flip() +
  labs(title = "10 Negara dengan Jumlah Gol Terbanyak", x = "Negara", y = "Total Gol")

Insight: 1. Brazil menjadi tim dengan total gol terbanyak di piala dunia diikuti oleh argentina 2. Dari daftar 10 tim diatas, didominasi oleh negara dari Benua Eropa

10 PEMAIN DENGAN JUMLAH GOL TERBANYAK DI PIALA DUNIA

data %>%
  group_by(family_name, given_name) %>%
  summarise(total_goals = n()) %>%
  arrange(desc(total_goals)) %>%
  head(10) %>%
  ggplot(aes(x = reorder(family_name, total_goals), y = total_goals, fill = given_name)) +
  geom_col() +
  coord_flip() +
  labs(title = "10 Pemain dengan Jumlah Gol Terbanyak", x = "Pemain", y = "Total Gol")
## `summarise()` has grouped output by 'family_name'. You can override using the
## `.groups` argument.

Insight: 1. Miroslav Klose menjadi top skor piala dunia sepanjang sejarah sampai saat ini 2. 2 pemain yang masih aktif dan masuk ke dalam top skor adalah Lionel Messi dan Kylian Mbappe 3. Pemain asal Benua Amerika Selatan dan Eropa masih mendominasi untuk menjadi top skor di piala dunia

SETIAP GOL YANG DICETAK BERDASARKAN BABAK PERTANDINGAN

data %>%
  group_by(match_period) %>%
  summarise(total_goals = n()) %>%
  ggplot(aes(x = match_period, y = total_goals, fill = match_period)) +
  geom_col() +
  labs(title = "Gol Berdasarkan Babak Pertandingan", x = "Babak", y = "Total Gol")

Insight: 1. Babak pertama dan Babak kedua waktu normal (45 menit) merupakan babak dengan jumlah gol terbanyak, hal ini disebabkan rentang waktunya paling besar dibandingkan babak extra time dan stoppage time

SETIAP GOL YANG DICETAK BERDASARKAN MENIT PERTANDINGAN

data %>%
  ggplot(aes(x = minute_regulation)) +
  geom_histogram(binwidth = 5, fill = "orange", color = "black") +
  labs(title = "Distribusi Gol Berdasarkan Menit Pertandingan", x = "Menit", y = "Jumlah Gol")

PERBANDINGAN JUMLAH GOL PENALTI VS GOL MON-PENALTI

data %>%
  group_by(penalty) %>%
  summarise(total_goals = n()) %>%
  mutate(type = ifelse(penalty == 1, "Penalti", "Non-Penalti")) %>%
  ggplot(aes(x = type, y = total_goals, fill = type)) +
  geom_col() +
  labs(title = "Distribusi Gol Penalti vs Non-Penalti", x = "Jenis Gol", y = "Total Gol")

Insight: 1. Gol non-penalti masih jauh mendominasi dibandingkan gol penalti

PERBANDINGAN GOL BUNUH DIRI VS GOL NORMAL

data %>%
  group_by(own_goal) %>%
  summarise(total_goals = n()) %>%
  mutate(type = ifelse(own_goal == 1, "Bunuh Diri", "Gol Normal")) %>%
  ggplot(aes(x = type, y = total_goals, fill = type)) +
  geom_col() +
  labs(title = "Distribusi Gol Bunuh Diri", x = "Jenis Gol", y = "Total Gol")

Insight: 1. Gol bunuh diri jika dibandingkan dengan gol non-bunuh diri masih sangan sedikit terjadi di piala dunia

HEAD-TO-HEAD NEGARA DENGAN JUMLAH GOL YANG TERCIPTA PALING BANYAK

data %>%
  group_by(match_name) %>%
  summarise(total_goals = n()) %>%
  arrange(desc(total_goals)) %>%
  head(20)
## # A tibble: 20 × 2
##    match_name              total_goals
##    <chr>                         <int>
##  1 Brazil v Sweden                  28
##  2 Brazil v Poland                  20
##  3 Argentina v Mexico               18
##  4 Brazil v Chile                   16
##  5 Portugal v North Korea           15
##  6 Brazil v Czechoslovakia          14
##  7 France v Mexico                  14
##  8 West Germany v Turkey            14
##  9 Brazil v France                  13
## 10 Brazil v Mexico                  13
## 11 Austria v Switzerland            12
## 12 Germany v Belgium                12
## 13 Soviet Union v Belgium           12
## 14 France v Paraguay                11
## 15 France v West Germany            11
## 16 Hungary v Bulgaria               11
## 17 Hungary v El Salvador            11
## 18 Hungary v West Germany           11
## 19 Italy v France                   11
## 20 Italy v United States            11

Insight: 1. Brazil vs Sweden menjadi pertadingan dengan jumlah gol dicetak paling banyak 2. 6 dari 10 daftar pertandingan diatas, terdapat Negara Brazil. Hal ini linear dengan Brazil sebagai negara dengan jumlah gol terbanyak di piala dunia

RATA-RATA GOAL DI SETIAP PIALA DUNIA

data %>%
  group_by(match_id) %>%
  summarise(total_goals = n()) %>%
  summarise(mean_goals = mean(total_goals))
## # A tibble: 1 × 1
##   mean_goals
##        <dbl>
## 1       3.07

Insight: 1. Rata-rata jumlah gol di piala dunia adalah 3 gol

10 NEGARA DENGAN TINGKAT PARTISIPASI PALING BANYAK

data %>%
  distinct(team_name, tournament_name) %>%
  group_by(team_name) %>%
  summarise(participation_count = n()) %>%
  arrange(desc(participation_count)) %>%
  head(10)
## # A tibble: 10 × 2
##    team_name participation_count
##    <chr>                   <int>
##  1 Brazil                     22
##  2 Argentina                  18
##  3 Italy                      18
##  4 Mexico                     17
##  5 England                    16
##  6 Spain                      16
##  7 France                     15
##  8 Uruguay                    14
##  9 Belgium                    13
## 10 Sweden                     12

Insight: 1. Selain menjadi negara pencetak gol paling banyak, Brazil juga menjadi negara paling banyak berpartisipasi di piala dunia. Hal ini menjadikan Brazil sebagai tim yang selalu lolos ke piala dunia 2. Seperti sebelumnya, negara-negara Eropa dan Amerika masih mendominasi di tingkat partisipasi

JUMLAH GOL DI SETIAP FINAL PIALA DUNIA

data %>%
  filter(stage_name == "final") %>%  
  group_by(tournament_name) %>%  
  summarise(total_goals = n()) %>%
  ggplot(aes(x = tournament_name, y = total_goals, fill = total_goals)) +
  geom_col() +
  theme_minimal() +
  labs(title = "Jumlah Gol di Setiap Final Piala Dunia",
       x = "Tahun Piala Dunia",
       y = "Total Gol") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Insight: 1. Piala Dunia 1958 menjadi piala dunia dengan gol terbanyak di final, yaitu 7 gol

PERBANDINGAN JUMLAH GOL YANG DICETAK OLEH TIM KANDANG DAN TANDANG

data %>%
  summarise(
    total_home_goals = sum(home_team, na.rm = TRUE),
    total_away_goals = sum(away_team, na.rm = TRUE)
  )
## # A tibble: 1 × 2
##   total_home_goals total_away_goals
##              <dbl>            <dbl>
## 1             1703             1017

Insight: 1. Gol yang dicetak oleh tim tuan rumah lebih banyak daripada tim yang bermain tandang # VISUALISASI PERBANDINGAN JUMLAH GOL YANG DICETAK OLEH TIM KANDANG DAN TANDANG

install.packages("tidyverse")
## Installing package into 'C:/Users/Hafizh Fadhlah/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'tidyverse' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Hafizh Fadhlah\AppData\Local\Temp\RtmpIbBldh\downloaded_packages
library("tidyverse")
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.2
## Warning: package 'tidyr' was built under R version 4.4.2
## Warning: package 'readr' was built under R version 4.4.2
## Warning: package 'purrr' was built under R version 4.4.2
## Warning: package 'stringr' was built under R version 4.4.2
## Warning: package 'forcats' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ stringr 1.5.1
## ✔ purrr   1.0.4     ✔ tibble  3.2.1
## ✔ readr   2.1.5     ✔ tidyr   1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data %>%
  summarise(
    total_home_goals = sum(home_team, na.rm = TRUE),
    total_away_goals = sum(away_team, na.rm = TRUE)
  ) %>%
  pivot_longer(cols = c(total_home_goals, total_away_goals), names_to = "team_type", values_to = "total_goals") %>%
  ggplot(aes(x = team_type, y = total_goals, fill = team_type)) +
  geom_col() +
  labs(title = "Perbandingan Total Gol Home Team vs Away Team di Piala Dunia",
       x = "Tipe Tim",
       y = "Total Gol") +
  theme_minimal()

TOTAL GOL YANG DICETAK PADA BABAK EXTRA TIME

data %>%
  filter(minute_regulation >= 91 & minute_regulation <= 120) %>%
  summarise(total_extra_time_goals = n())
## # A tibble: 1 × 1
##   total_extra_time_goals
##                    <int>
## 1                     71

Insight: 1. Jumlah gol yang dicetak pada babak extra time adalah 71 gol dari total 2720 gol di piala dunia

JUMLAH GOL YANG DICETAK PADA BABAK EXTRA TIME DI SETIAP EDISI PIALA DUNIA

data %>%
  filter(minute_regulation >= 91 & minute_regulation <= 120) %>%
  group_by(tournament_name) %>%  
  summarise(extra_time_goals = n()) %>%
  arrange(tournament_name)
## # A tibble: 18 × 2
##    tournament_name     extra_time_goals
##    <chr>                          <int>
##  1 1934 FIFA World Cup                4
##  2 1938 FIFA World Cup                9
##  3 1954 FIFA World Cup                4
##  4 1958 FIFA World Cup                1
##  5 1966 FIFA World Cup                2
##  6 1970 FIFA World Cup                7
##  7 1978 FIFA World Cup                2
##  8 1982 FIFA World Cup                4
##  9 1986 FIFA World Cup                5
## 10 1990 FIFA World Cup                6
## 11 1994 FIFA World Cup                3
## 12 1998 FIFA World Cup                1
## 13 2002 FIFA World Cup                3
## 14 2006 FIFA World Cup                3
## 15 2010 FIFA World Cup                2
## 16 2014 FIFA World Cup                8
## 17 2018 FIFA World Cup                3
## 18 2022 FIFA World Cup                4

Insight: 1. Piala DUnia 1938 menjadi edisi dengan jumlah gol tercetak di babak extra time paling banyak dengan total 9 gol

VISUALISASI JUMLAH GOL YANG DICETAK PADA BABAK EXTRA TIME DI SETIAP EDISI PIALA DUNIA

data %>%
  filter(minute_regulation >= 91 & minute_regulation <= 120) %>%
  group_by(tournament_name) %>%
  summarise(extra_time_goals = n()) %>%
  ggplot(aes(x = tournament_name, y = extra_time_goals, fill = extra_time_goals)) +
  geom_col() +
  labs(title = "Jumlah Gol di Extra Time per Piala Dunia",
       x = "Tahun Piala Dunia",
       y = "Total Gol di Extra Time") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))