options(repos = c(CRAN = "https://cloud.r-project.org"))
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
install.packages("lubridate")
## Installing package into 'C:/Users/Hafizh Fadhlah/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'lubridate' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'lubridate'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\Hafizh
## Fadhlah\AppData\Local\R\win-library\4.4\00LOCK\lubridate\libs\x64\lubridate.dll
## to C:\Users\Hafizh
## Fadhlah\AppData\Local\R\win-library\4.4\lubridate\libs\x64\lubridate.dll:
## Permission denied
## Warning: restored 'lubridate'
##
## The downloaded binary packages are in
## C:\Users\Hafizh Fadhlah\AppData\Local\Temp\RtmpIbBldh\downloaded_packages
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.4.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data <- read_xlsx("C:\\Users\\Hafizh Fadhlah\\OneDrive\\Documents\\Data AED Gol Pildun.xlsx")
data
## # A tibble: 2,720 × 27
## key_id goal_id tournament_id tournament_name match_id match_name match_date
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 G-0001 WC-1930 1930 FIFA World … M-1930-… France v … 7/13/1930
## 2 2 G-0002 WC-1930 1930 FIFA World … M-1930-… France v … 7/13/1930
## 3 3 G-0003 WC-1930 1930 FIFA World … M-1930-… France v … 7/13/1930
## 4 4 G-0004 WC-1930 1930 FIFA World … M-1930-… France v … 7/13/1930
## 5 5 G-0005 WC-1930 1930 FIFA World … M-1930-… France v … 7/13/1930
## 6 6 G-0006 WC-1930 1930 FIFA World … M-1930-… United St… 7/13/1930
## 7 7 G-0007 WC-1930 1930 FIFA World … M-1930-… United St… 7/13/1930
## 8 8 G-0008 WC-1930 1930 FIFA World … M-1930-… United St… 7/13/1930
## 9 9 G-0009 WC-1930 1930 FIFA World … M-1930-… Yugoslavi… 7/14/1930
## 10 10 G-0010 WC-1930 1930 FIFA World … M-1930-… Yugoslavi… 7/14/1930
## # ℹ 2,710 more rows
## # ℹ 20 more variables: stage_name <chr>, group_name <chr>, team_id <chr>,
## # team_name <chr>, team_code <chr>, home_team <dbl>, away_team <dbl>,
## # player_id <chr>, family_name <chr>, given_name <chr>, shirt_number <dbl>,
## # player_team_id <chr>, player_team_name <chr>, player_team_code <chr>,
## # minute_label <chr>, minute_regulation <dbl>, minute_stoppage <dbl>,
## # match_period <chr>, own_goal <dbl>, penalty <dbl>
str(data)
## tibble [2,720 × 27] (S3: tbl_df/tbl/data.frame)
## $ key_id : num [1:2720] 1 2 3 4 5 6 7 8 9 10 ...
## $ goal_id : chr [1:2720] "G-0001" "G-0002" "G-0003" "G-0004" ...
## $ tournament_id : chr [1:2720] "WC-1930" "WC-1930" "WC-1930" "WC-1930" ...
## $ tournament_name : chr [1:2720] "1930 FIFA World Cup" "1930 FIFA World Cup" "1930 FIFA World Cup" "1930 FIFA World Cup" ...
## $ match_id : chr [1:2720] "M-1930-01" "M-1930-01" "M-1930-01" "M-1930-01" ...
## $ match_name : chr [1:2720] "France v Mexico" "France v Mexico" "France v Mexico" "France v Mexico" ...
## $ match_date : chr [1:2720] "7/13/1930" "7/13/1930" "7/13/1930" "7/13/1930" ...
## $ stage_name : chr [1:2720] "group stage" "group stage" "group stage" "group stage" ...
## $ group_name : chr [1:2720] "Group 1" "Group 1" "Group 1" "Group 1" ...
## $ team_id : chr [1:2720] "T-28" "T-28" "T-28" "T-44" ...
## $ team_name : chr [1:2720] "France" "France" "France" "Mexico" ...
## $ team_code : chr [1:2720] "FRA" "FRA" "FRA" "MEX" ...
## $ home_team : num [1:2720] 1 1 1 0 1 1 1 1 1 1 ...
## $ away_team : num [1:2720] 0 0 0 1 0 0 0 0 0 0 ...
## $ player_id : chr [1:2720] "P-08962" "P-00280" "P-09030" "P-00780" ...
## $ family_name : chr [1:2720] "Laurent" "Langiller" "Maschinot" "Carreño" ...
## $ given_name : chr [1:2720] "Lucien" "Marcel" "André" "Juan" ...
## $ shirt_number : num [1:2720] 0 0 0 0 0 0 0 0 0 0 ...
## $ player_team_id : chr [1:2720] "T-28" "T-28" "T-28" "T-44" ...
## $ player_team_name : chr [1:2720] "France" "France" "France" "Mexico" ...
## $ player_team_code : chr [1:2720] "FRA" "FRA" "FRA" "MEX" ...
## $ minute_label : chr [1:2720] "19'" "40'" "43'" "70'" ...
## $ minute_regulation: num [1:2720] 19 40 43 70 87 23 45 69 21 30 ...
## $ minute_stoppage : num [1:2720] 0 0 0 0 0 0 0 0 0 0 ...
## $ match_period : chr [1:2720] "first half" "first half" "first half" "second half" ...
## $ own_goal : num [1:2720] 0 0 0 0 0 0 0 0 0 0 ...
## $ penalty : num [1:2720] 0 0 0 0 0 0 0 0 0 0 ...
colnames(data)
## [1] "key_id" "goal_id" "tournament_id"
## [4] "tournament_name" "match_id" "match_name"
## [7] "match_date" "stage_name" "group_name"
## [10] "team_id" "team_name" "team_code"
## [13] "home_team" "away_team" "player_id"
## [16] "family_name" "given_name" "shirt_number"
## [19] "player_team_id" "player_team_name" "player_team_code"
## [22] "minute_label" "minute_regulation" "minute_stoppage"
## [25] "match_period" "own_goal" "penalty"
total_goals <- data %>%
summarise(total_goals = n())
print(total_goals)
## # A tibble: 1 × 1
## total_goals
## <int>
## 1 2720
Insight: 1. 2720 gol telah dicetak di piala dunia
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
data %>%
group_by(tournament_name) %>%
summarise(total_goals = n()) %>%
arrange(desc(total_goals)) %>%
ggplot(aes(x = reorder(tournament_name, total_goals), y = total_goals)) +
geom_col(fill = "blue") +
coord_flip() +
labs(title = "Total Gol per Edisi Piala Dunia", x = "Turnamen", y = "Total Gol")
Insight: 1. Dari grafik diatas, dapat dilihat bahwa Piala Dunia 2022 adalah turnamen dengan jumlah gol paling banyak dan Piala Dunia 1930 adalah turnamen dengan jumlah gol paling sedikit 2. Terjadi kenaikan jumlah gol dari edisi ke edisi di piala dunia
data %>%
group_by(team_name) %>%
summarise(total_goals = n()) %>%
arrange(desc(total_goals)) %>%
head(10) %>%
ggplot(aes(x = reorder(team_name, total_goals), y = total_goals)) +
geom_col(fill = "green") +
coord_flip() +
labs(title = "10 Negara dengan Jumlah Gol Terbanyak", x = "Negara", y = "Total Gol")
Insight: 1. Brazil menjadi tim dengan total gol terbanyak di piala dunia diikuti oleh argentina 2. Dari daftar 10 tim diatas, didominasi oleh negara dari Benua Eropa
data %>%
group_by(family_name, given_name) %>%
summarise(total_goals = n()) %>%
arrange(desc(total_goals)) %>%
head(10) %>%
ggplot(aes(x = reorder(family_name, total_goals), y = total_goals, fill = given_name)) +
geom_col() +
coord_flip() +
labs(title = "10 Pemain dengan Jumlah Gol Terbanyak", x = "Pemain", y = "Total Gol")
## `summarise()` has grouped output by 'family_name'. You can override using the
## `.groups` argument.
Insight: 1. Miroslav Klose menjadi top skor piala dunia sepanjang sejarah sampai saat ini 2. 2 pemain yang masih aktif dan masuk ke dalam top skor adalah Lionel Messi dan Kylian Mbappe 3. Pemain asal Benua Amerika Selatan dan Eropa masih mendominasi untuk menjadi top skor di piala dunia
data %>%
group_by(match_period) %>%
summarise(total_goals = n()) %>%
ggplot(aes(x = match_period, y = total_goals, fill = match_period)) +
geom_col() +
labs(title = "Gol Berdasarkan Babak Pertandingan", x = "Babak", y = "Total Gol")
Insight: 1. Babak pertama dan Babak kedua waktu normal (45 menit) merupakan babak dengan jumlah gol terbanyak, hal ini disebabkan rentang waktunya paling besar dibandingkan babak extra time dan stoppage time
data %>%
ggplot(aes(x = minute_regulation)) +
geom_histogram(binwidth = 5, fill = "orange", color = "black") +
labs(title = "Distribusi Gol Berdasarkan Menit Pertandingan", x = "Menit", y = "Jumlah Gol")
data %>%
group_by(penalty) %>%
summarise(total_goals = n()) %>%
mutate(type = ifelse(penalty == 1, "Penalti", "Non-Penalti")) %>%
ggplot(aes(x = type, y = total_goals, fill = type)) +
geom_col() +
labs(title = "Distribusi Gol Penalti vs Non-Penalti", x = "Jenis Gol", y = "Total Gol")
Insight: 1. Gol non-penalti masih jauh mendominasi dibandingkan gol penalti
data %>%
group_by(own_goal) %>%
summarise(total_goals = n()) %>%
mutate(type = ifelse(own_goal == 1, "Bunuh Diri", "Gol Normal")) %>%
ggplot(aes(x = type, y = total_goals, fill = type)) +
geom_col() +
labs(title = "Distribusi Gol Bunuh Diri", x = "Jenis Gol", y = "Total Gol")
Insight: 1. Gol bunuh diri jika dibandingkan dengan gol non-bunuh diri masih sangan sedikit terjadi di piala dunia
data %>%
group_by(match_name) %>%
summarise(total_goals = n()) %>%
arrange(desc(total_goals)) %>%
head(20)
## # A tibble: 20 × 2
## match_name total_goals
## <chr> <int>
## 1 Brazil v Sweden 28
## 2 Brazil v Poland 20
## 3 Argentina v Mexico 18
## 4 Brazil v Chile 16
## 5 Portugal v North Korea 15
## 6 Brazil v Czechoslovakia 14
## 7 France v Mexico 14
## 8 West Germany v Turkey 14
## 9 Brazil v France 13
## 10 Brazil v Mexico 13
## 11 Austria v Switzerland 12
## 12 Germany v Belgium 12
## 13 Soviet Union v Belgium 12
## 14 France v Paraguay 11
## 15 France v West Germany 11
## 16 Hungary v Bulgaria 11
## 17 Hungary v El Salvador 11
## 18 Hungary v West Germany 11
## 19 Italy v France 11
## 20 Italy v United States 11
Insight: 1. Brazil vs Sweden menjadi pertadingan dengan jumlah gol dicetak paling banyak 2. 6 dari 10 daftar pertandingan diatas, terdapat Negara Brazil. Hal ini linear dengan Brazil sebagai negara dengan jumlah gol terbanyak di piala dunia
data %>%
group_by(match_id) %>%
summarise(total_goals = n()) %>%
summarise(mean_goals = mean(total_goals))
## # A tibble: 1 × 1
## mean_goals
## <dbl>
## 1 3.07
Insight: 1. Rata-rata jumlah gol di piala dunia adalah 3 gol
data %>%
distinct(team_name, tournament_name) %>%
group_by(team_name) %>%
summarise(participation_count = n()) %>%
arrange(desc(participation_count)) %>%
head(10)
## # A tibble: 10 × 2
## team_name participation_count
## <chr> <int>
## 1 Brazil 22
## 2 Argentina 18
## 3 Italy 18
## 4 Mexico 17
## 5 England 16
## 6 Spain 16
## 7 France 15
## 8 Uruguay 14
## 9 Belgium 13
## 10 Sweden 12
Insight: 1. Selain menjadi negara pencetak gol paling banyak, Brazil juga menjadi negara paling banyak berpartisipasi di piala dunia. Hal ini menjadikan Brazil sebagai tim yang selalu lolos ke piala dunia 2. Seperti sebelumnya, negara-negara Eropa dan Amerika masih mendominasi di tingkat partisipasi
data %>%
filter(stage_name == "final") %>%
group_by(tournament_name) %>%
summarise(total_goals = n()) %>%
ggplot(aes(x = tournament_name, y = total_goals, fill = total_goals)) +
geom_col() +
theme_minimal() +
labs(title = "Jumlah Gol di Setiap Final Piala Dunia",
x = "Tahun Piala Dunia",
y = "Total Gol") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Insight: 1. Piala Dunia 1958 menjadi piala dunia dengan gol terbanyak di final, yaitu 7 gol
data %>%
summarise(
total_home_goals = sum(home_team, na.rm = TRUE),
total_away_goals = sum(away_team, na.rm = TRUE)
)
## # A tibble: 1 × 2
## total_home_goals total_away_goals
## <dbl> <dbl>
## 1 1703 1017
Insight: 1. Gol yang dicetak oleh tim tuan rumah lebih banyak daripada tim yang bermain tandang # VISUALISASI PERBANDINGAN JUMLAH GOL YANG DICETAK OLEH TIM KANDANG DAN TANDANG
install.packages("tidyverse")
## Installing package into 'C:/Users/Hafizh Fadhlah/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'tidyverse' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Hafizh Fadhlah\AppData\Local\Temp\RtmpIbBldh\downloaded_packages
library("tidyverse")
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.2
## Warning: package 'tidyr' was built under R version 4.4.2
## Warning: package 'readr' was built under R version 4.4.2
## Warning: package 'purrr' was built under R version 4.4.2
## Warning: package 'stringr' was built under R version 4.4.2
## Warning: package 'forcats' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ purrr 1.0.4 ✔ tibble 3.2.1
## ✔ readr 2.1.5 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data %>%
summarise(
total_home_goals = sum(home_team, na.rm = TRUE),
total_away_goals = sum(away_team, na.rm = TRUE)
) %>%
pivot_longer(cols = c(total_home_goals, total_away_goals), names_to = "team_type", values_to = "total_goals") %>%
ggplot(aes(x = team_type, y = total_goals, fill = team_type)) +
geom_col() +
labs(title = "Perbandingan Total Gol Home Team vs Away Team di Piala Dunia",
x = "Tipe Tim",
y = "Total Gol") +
theme_minimal()
data %>%
filter(minute_regulation >= 91 & minute_regulation <= 120) %>%
summarise(total_extra_time_goals = n())
## # A tibble: 1 × 1
## total_extra_time_goals
## <int>
## 1 71
Insight: 1. Jumlah gol yang dicetak pada babak extra time adalah 71 gol dari total 2720 gol di piala dunia
data %>%
filter(minute_regulation >= 91 & minute_regulation <= 120) %>%
group_by(tournament_name) %>%
summarise(extra_time_goals = n()) %>%
arrange(tournament_name)
## # A tibble: 18 × 2
## tournament_name extra_time_goals
## <chr> <int>
## 1 1934 FIFA World Cup 4
## 2 1938 FIFA World Cup 9
## 3 1954 FIFA World Cup 4
## 4 1958 FIFA World Cup 1
## 5 1966 FIFA World Cup 2
## 6 1970 FIFA World Cup 7
## 7 1978 FIFA World Cup 2
## 8 1982 FIFA World Cup 4
## 9 1986 FIFA World Cup 5
## 10 1990 FIFA World Cup 6
## 11 1994 FIFA World Cup 3
## 12 1998 FIFA World Cup 1
## 13 2002 FIFA World Cup 3
## 14 2006 FIFA World Cup 3
## 15 2010 FIFA World Cup 2
## 16 2014 FIFA World Cup 8
## 17 2018 FIFA World Cup 3
## 18 2022 FIFA World Cup 4
Insight: 1. Piala DUnia 1938 menjadi edisi dengan jumlah gol tercetak di babak extra time paling banyak dengan total 9 gol
data %>%
filter(minute_regulation >= 91 & minute_regulation <= 120) %>%
group_by(tournament_name) %>%
summarise(extra_time_goals = n()) %>%
ggplot(aes(x = tournament_name, y = extra_time_goals, fill = extra_time_goals)) +
geom_col() +
labs(title = "Jumlah Gol di Extra Time per Piala Dunia",
x = "Tahun Piala Dunia",
y = "Total Gol di Extra Time") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))