Loading libraries and getting the data

  library(tidyverse)
  library(ggthemes)
  data<-read.csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2018/2018-06-12/week11_fifa_audience.csv")
  data<-data%>%mutate(confederation = factor(confederation))
  summary(data)
##        X           country           confederation population_share 
##  Min.   :  1.0   Length:191         AFC     :43    Min.   : 0.0000  
##  1st Qu.: 48.5   Class :character   CAF     :50    1st Qu.: 0.0000  
##  Median : 96.0   Mode  :character   CONCACAF:30    Median : 0.1000  
##  Mean   : 96.0                      CONMEBOL:10    Mean   : 0.5225  
##  3rd Qu.:143.5                      OFC     :12    3rd Qu.: 0.3500  
##  Max.   :191.0                      UEFA    :46    Max.   :19.5000  
##  tv_audience_share gdp_weighted_share
##  Min.   : 0.000    Min.   : 0.0000   
##  1st Qu.: 0.000    1st Qu.: 0.0000   
##  Median : 0.100    Median : 0.0000   
##  Mean   : 0.523    Mean   : 0.5204   
##  3rd Qu.: 0.300    3rd Qu.: 0.3000   
##  Max.   :14.800    Max.   :11.3000

Summarizing the data based on gdp_weighted share and confederation

data%>%arrange(gdp_weighted_share)%>%select(confederation)%>%ggplot(aes(confederation,fill=confederation))+geom_bar()

Finding which countries have the highest and lowest viewership

data%>%arrange(tv_audience_share)%>%group_by(tv_audience_share)%>%count()
## # A tibble: 29 x 2
## # Groups:   tv_audience_share [29]
##    tv_audience_share     n
##                <dbl> <int>
##  1               0      67
##  2               0.1    46
##  3               0.2    19
##  4               0.3    15
##  5               0.4     6
##  6               0.5     7
##  7               0.6     2
##  8               0.7     3
##  9               0.8     2
## 10               0.9     1
## # ... with 19 more rows
data%>%arrange(desc(tv_audience_share))%>%group_by(tv_audience_share)
## # A tibble: 191 x 6
## # Groups:   tv_audience_share [29]
##        X country confederation population_share tv_audience_sha~
##    <int> <chr>   <fct>                    <dbl>            <dbl>
##  1     3 China   AFC                       19.5             14.8
##  2     5 Brazil  CONMEBOL                   2.8              7.1
##  3    12 Indone~ AFC                        3.5              6.7
##  4     2 Japan   AFC                        1.9              4.9
##  5     1 United~ CONCACAF                   4.5              4.3
##  6    13 Mexico  CONCACAF                   1.7              3.2
##  7     9 Russia  UEFA                       2.1              3.1
##  8     4 Germany UEFA                       1.2              2.9
##  9    27 Nigeria CAF                        2.3              2.6
## 10    30 Vietnam AFC                        1.3              2.6
## # ... with 181 more rows, and 1 more variable: gdp_weighted_share <dbl>

Finding which countries have the highest and lowest population

data%>%arrange(population_share)%>%group_by(population_share)%>%count()
## # A tibble: 26 x 2
## # Groups:   population_share [26]
##    population_share     n
##               <dbl> <int>
##  1              0      61
##  2              0.1    49
##  3              0.2    21
##  4              0.3    12
##  5              0.4     9
##  6              0.5     6
##  7              0.6     3
##  8              0.7     6
##  9              0.8     1
## 10              0.9     4
## # ... with 16 more rows
data%>%arrange(desc(population_share))%>%group_by(population_share)
## # A tibble: 191 x 6
## # Groups:   population_share [26]
##        X country confederation population_share tv_audience_sha~
##    <int> <chr>   <fct>                    <dbl>            <dbl>
##  1     3 China   AFC                       19.5             14.8
##  2    39 India   AFC                       17.6              2  
##  3     1 United~ CONCACAF                   4.5              4.3
##  4    12 Indone~ AFC                        3.5              6.7
##  5     5 Brazil  CONMEBOL                   2.8              7.1
##  6    75 Pakist~ AFC                        2.5              0.4
##  7    27 Nigeria CAF                        2.3              2.6
##  8   114 Bangla~ AFC                        2.2              0.1
##  9     9 Russia  UEFA                       2.1              3.1
## 10     2 Japan   AFC                        1.9              4.9
## # ... with 181 more rows, and 1 more variable: gdp_weighted_share <dbl>

Finding which countries have the highest and lowest GDP share

data%>%arrange(gdp_weighted_share)%>%group_by(gdp_weighted_share)%>%count()
## # A tibble: 28 x 2
## # Groups:   gdp_weighted_share [28]
##    gdp_weighted_share     n
##                 <dbl> <int>
##  1                0     101
##  2                0.1    30
##  3                0.2     6
##  4                0.3     7
##  5                0.4     5
##  6                0.5     7
##  7                0.6     6
##  8                0.7     5
##  9                0.8     1
## 10                0.9     2
## # ... with 18 more rows
data%>%arrange(desc(gdp_weighted_share))%>%group_by(gdp_weighted_share)
## # A tibble: 191 x 6
## # Groups:   gdp_weighted_share [28]
##        X country confederation population_share tv_audience_sha~
##    <int> <chr>   <fct>                    <dbl>            <dbl>
##  1     1 United~ CONCACAF                   4.5              4.3
##  2     2 Japan   AFC                        1.9              4.9
##  3     3 China   AFC                       19.5             14.8
##  4     4 Germany UEFA                       1.2              2.9
##  5     5 Brazil  CONMEBOL                   2.8              7.1
##  6     6 United~ UEFA                       0.9              2.1
##  7     7 Italy   UEFA                       0.9              2.1
##  8     8 France  UEFA                       0.9              2  
##  9     9 Russia  UEFA                       2.1              3.1
## 10    10 Spain   UEFA                       0.7              1.8
## # ... with 181 more rows, and 1 more variable: gdp_weighted_share <dbl>
ggplot(data,aes(x=gdp_weighted_share,y=..density..))+geom_freqpoly(mapping = aes(colour=confederation),binwidth=1)+theme_tufte()+ggtitle("Distribution of GDP shares based on confederations")

Finding the relationship between multiple different values

cor(data$population_share,data$tv_audience_share)
## [1] 0.7313239
ggplot(data,aes(x=population_share,y=tv_audience_share),color=confederation)+geom_point()

cor(data$population_share,data$gdp_weighted_share)
## [1] 0.4472681

Countries that contribute the most to the values collected and other summaries

majorCountries<-data%>%filter(population_share>mean(population_share) & gdp_weighted_share>mean(gdp_weighted_share) &
                                tv_audience_share>mean(tv_audience_share))
as_tibble(majorCountries)
## # A tibble: 22 x 6
##        X country confederation population_share tv_audience_sha~
##    <int> <chr>   <fct>                    <dbl>            <dbl>
##  1     1 United~ CONCACAF                   4.5              4.3
##  2     2 Japan   AFC                        1.9              4.9
##  3     3 China   AFC                       19.5             14.8
##  4     4 Germany UEFA                       1.2              2.9
##  5     5 Brazil  CONMEBOL                   2.8              7.1
##  6     6 United~ UEFA                       0.9              2.1
##  7     7 Italy   UEFA                       0.9              2.1
##  8     8 France  UEFA                       0.9              2  
##  9     9 Russia  UEFA                       2.1              3.1
## 10    10 Spain   UEFA                       0.7              1.8
## # ... with 12 more rows, and 1 more variable: gdp_weighted_share <dbl>
majorCountries%>%group_by(confederation)%>%summarise(avgPopulation=mean(population_share),avgTV=mean(tv_audience_share),avgGDP=mean(gdp_weighted_share))
## # A tibble: 5 x 4
##   confederation avgPopulation avgTV avgGDP
##   <fct>                 <dbl> <dbl>  <dbl>
## 1 AFC                    4.14  4.84   3.59
## 2 CAF                    1.5   1.95   0.75
## 3 CONCACAF               3.1   3.75   6.95
## 4 CONMEBOL               1.37  3.4    2.63
## 5 UEFA                   1.05  2.19   3.55
# geom_col(position = position_dodge2(width = 0.9, preserve = "single"))