FIFADataAnalysis

Loading libraries and getting the data

  library(tidyverse)
  library(ggthemes)
  data<-read.csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2018/2018-06-12/week11_fifa_audience.csv")
  data<-data%>%mutate(confederation = factor(confederation))
  summary(data)

##        X           country           confederation population_share 
##  Min.   :  1.0   Length:191         AFC     :43    Min.   : 0.0000  
##  1st Qu.: 48.5   Class :character   CAF     :50    1st Qu.: 0.0000  
##  Median : 96.0   Mode  :character   CONCACAF:30    Median : 0.1000  
##  Mean   : 96.0                      CONMEBOL:10    Mean   : 0.5225  
##  3rd Qu.:143.5                      OFC     :12    3rd Qu.: 0.3500  
##  Max.   :191.0                      UEFA    :46    Max.   :19.5000  
##  tv_audience_share gdp_weighted_share
##  Min.   : 0.000    Min.   : 0.0000   
##  1st Qu.: 0.000    1st Qu.: 0.0000   
##  Median : 0.100    Median : 0.0000   
##  Mean   : 0.523    Mean   : 0.5204   
##  3rd Qu.: 0.300    3rd Qu.: 0.3000   
##  Max.   :14.800    Max.   :11.3000

Summarizing the data based on gdp_weighted share and confederation

data%>%arrange(gdp_weighted_share)%>%select(confederation)%>%ggplot(aes(confederation,fill=confederation))+geom_bar()

Finding which countries have the highest and lowest viewership

data%>%arrange(tv_audience_share)%>%group_by(tv_audience_share)%>%count()

## # A tibble: 29 x 2
## # Groups:   tv_audience_share [29]
##    tv_audience_share     n
##                <dbl> <int>
##  1               0      67
##  2               0.1    46
##  3               0.2    19
##  4               0.3    15
##  5               0.4     6
##  6               0.5     7
##  7               0.6     2
##  8               0.7     3
##  9               0.8     2
## 10               0.9     1
## # ... with 19 more rows

data%>%arrange(desc(tv_audience_share))%>%group_by(tv_audience_share)

## # A tibble: 191 x 6
## # Groups:   tv_audience_share [29]
##        X country confederation population_share tv_audience_sha~
##    <int> <chr>   <fct>                    <dbl>            <dbl>
##  1     3 China   AFC                       19.5             14.8
##  2     5 Brazil  CONMEBOL                   2.8              7.1
##  3    12 Indone~ AFC                        3.5              6.7
##  4     2 Japan   AFC                        1.9              4.9
##  5     1 United~ CONCACAF                   4.5              4.3
##  6    13 Mexico  CONCACAF                   1.7              3.2
##  7     9 Russia  UEFA                       2.1              3.1
##  8     4 Germany UEFA                       1.2              2.9
##  9    27 Nigeria CAF                        2.3              2.6
## 10    30 Vietnam AFC                        1.3              2.6
## # ... with 181 more rows, and 1 more variable: gdp_weighted_share <dbl>

Finding which countries have the highest and lowest population

data%>%arrange(population_share)%>%group_by(population_share)%>%count()

## # A tibble: 26 x 2
## # Groups:   population_share [26]
##    population_share     n
##               <dbl> <int>
##  1              0      61
##  2              0.1    49
##  3              0.2    21
##  4              0.3    12
##  5              0.4     9
##  6              0.5     6
##  7              0.6     3
##  8              0.7     6
##  9              0.8     1
## 10              0.9     4
## # ... with 16 more rows

data%>%arrange(desc(population_share))%>%group_by(population_share)

## # A tibble: 191 x 6
## # Groups:   population_share [26]
##        X country confederation population_share tv_audience_sha~
##    <int> <chr>   <fct>                    <dbl>            <dbl>
##  1     3 China   AFC                       19.5             14.8
##  2    39 India   AFC                       17.6              2  
##  3     1 United~ CONCACAF                   4.5              4.3
##  4    12 Indone~ AFC                        3.5              6.7
##  5     5 Brazil  CONMEBOL                   2.8              7.1
##  6    75 Pakist~ AFC                        2.5              0.4
##  7    27 Nigeria CAF                        2.3              2.6
##  8   114 Bangla~ AFC                        2.2              0.1
##  9     9 Russia  UEFA                       2.1              3.1
## 10     2 Japan   AFC                        1.9              4.9
## # ... with 181 more rows, and 1 more variable: gdp_weighted_share <dbl>

Finding which countries have the highest and lowest GDP share

data%>%arrange(gdp_weighted_share)%>%group_by(gdp_weighted_share)%>%count()

## # A tibble: 28 x 2
## # Groups:   gdp_weighted_share [28]
##    gdp_weighted_share     n
##                 <dbl> <int>
##  1                0     101
##  2                0.1    30
##  3                0.2     6
##  4                0.3     7
##  5                0.4     5
##  6                0.5     7
##  7                0.6     6
##  8                0.7     5
##  9                0.8     1
## 10                0.9     2
## # ... with 18 more rows

data%>%arrange(desc(gdp_weighted_share))%>%group_by(gdp_weighted_share)

## # A tibble: 191 x 6
## # Groups:   gdp_weighted_share [28]
##        X country confederation population_share tv_audience_sha~
##    <int> <chr>   <fct>                    <dbl>            <dbl>
##  1     1 United~ CONCACAF                   4.5              4.3
##  2     2 Japan   AFC                        1.9              4.9
##  3     3 China   AFC                       19.5             14.8
##  4     4 Germany UEFA                       1.2              2.9
##  5     5 Brazil  CONMEBOL                   2.8              7.1
##  6     6 United~ UEFA                       0.9              2.1
##  7     7 Italy   UEFA                       0.9              2.1
##  8     8 France  UEFA                       0.9              2  
##  9     9 Russia  UEFA                       2.1              3.1
## 10    10 Spain   UEFA                       0.7              1.8
## # ... with 181 more rows, and 1 more variable: gdp_weighted_share <dbl>

ggplot(data,aes(x=gdp_weighted_share,y=..density..))+geom_freqpoly(mapping = aes(colour=confederation),binwidth=1)+theme_tufte()+ggtitle("Distribution of GDP shares based on confederations")

Finding the relationship between multiple different values

cor(data$population_share,data$tv_audience_share)

## [1] 0.7313239

ggplot(data,aes(x=population_share,y=tv_audience_share),color=confederation)+geom_point()

cor(data$population_share,data$gdp_weighted_share)

## [1] 0.4472681

Countries that contribute the most to the values collected and other summaries

majorCountries<-data%>%filter(population_share>mean(population_share) & gdp_weighted_share>mean(gdp_weighted_share) &
                                tv_audience_share>mean(tv_audience_share))
as_tibble(majorCountries)

## # A tibble: 22 x 6
##        X country confederation population_share tv_audience_sha~
##    <int> <chr>   <fct>                    <dbl>            <dbl>
##  1     1 United~ CONCACAF                   4.5              4.3
##  2     2 Japan   AFC                        1.9              4.9
##  3     3 China   AFC                       19.5             14.8
##  4     4 Germany UEFA                       1.2              2.9
##  5     5 Brazil  CONMEBOL                   2.8              7.1
##  6     6 United~ UEFA                       0.9              2.1
##  7     7 Italy   UEFA                       0.9              2.1
##  8     8 France  UEFA                       0.9              2  
##  9     9 Russia  UEFA                       2.1              3.1
## 10    10 Spain   UEFA                       0.7              1.8
## # ... with 12 more rows, and 1 more variable: gdp_weighted_share <dbl>

majorCountries%>%group_by(confederation)%>%summarise(avgPopulation=mean(population_share),avgTV=mean(tv_audience_share),avgGDP=mean(gdp_weighted_share))

## # A tibble: 5 x 4
##   confederation avgPopulation avgTV avgGDP
##   <fct>                 <dbl> <dbl>  <dbl>
## 1 AFC                    4.14  4.84   3.59
## 2 CAF                    1.5   1.95   0.75
## 3 CONCACAF               3.1   3.75   6.95
## 4 CONMEBOL               1.37  3.4    2.63
## 5 UEFA                   1.05  2.19   3.55

# geom_col(position = position_dodge2(width = 0.9, preserve = "single"))

FIFADataAnalysis

Amar Agrawal

6/19/2020

Loading libraries and getting the data

Finding which countries have the highest and lowest viewership

Finding which countries have the highest and lowest population

Finding the relationship between multiple different values

Countries that contribute the most to the values collected and other summaries