Install packages

##install.packages("dplyr")
##install.packages("tidyr")
##install.packages("ggplot2")

Load packages

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)

Dataset 1

Engilsh Premier League Data, Source Provided by Ravi Kothari

Source: http://www.soccerstats.com/widetable.asp?league=england

objective: To find if there is any home advantage during a match by analyzing home and away goals

##.csv was created and uploaded to Github

Load data

epl <- read.csv("https://raw.githubusercontent.com/choudhury1023/Data-607/gh-pages/epl_data.csv", header = TRUE, stringsAsFactors = FALSE)
epl
##                 Team P W D L GF GA  GD Pts  PPG Wh Dh Lh GFh GAh Wa Da La
## 1   Manchester City  7 6 0 1 18  7  11  18 2.57  3  0  0   9   2  3  0  1
## 2         Tottenham  7 5 2 0 12  3   9  17 2.43  3  1  0   5   1  2  1  0
## 3           Arsenal  7 5 1 1 16  7   9  16 2.29  2  0  1   8   5  3  1  0
## 4         Liverpool  7 5 1 1 18 10   8  16 2.29  2  0  0   9   2  3  1  1
## 5           Everton  7 4 2 1 11  5   6  14 2.00  2  2  0   6   3  2  0  1
## 6    Manchester Utd  7 4 1 2 13  8   5  13 1.86  2  1  1   8   4  2  0  1
## 7           Chelsea  7 4 1 2 12  9   3  13 1.86  2  0  1   6   3  2  1  1
## 8    Crystal Palace  7 3 2 2 11  8   3  11 1.57  1  1  1   5   3  2  1  1
## 9     West Bromwich  7 2 3 2  8  7   1   9 1.29  1  1  1   5   4  1  2  1
## 10      Southampton  7 2 3 2  7  6   1   9 1.29  1  2  0   3   2  1  1  2
## 11          Watford  7 2 2 3 12 13  -1   8 1.14  1  1  2   7   8  1  1  1
## 12   Leicester City  7 2 2 3  8 11  -3   8 1.14  2  2  0   5   1  0  0  3
## 13      Bournemouth  7 2 2 3  6 11  -5   8 1.14  2  0  1   3   3  0  2  2
## 14          Burnley  7 2 1 4  5  9  -4   7 1.00  2  1  2   5   3  0  0  2
## 15        Hull City  7 2 1 4  7 14  -7   7 1.00  1  0  3   3   8  1  1  1
## 16    Middlesbrough  7 1 3 3  7 10  -3   6 0.86  0  1  2   3   5  1  2  1
## 17     Swansea City  7 1 1 5  6 12  -6   4 0.57  0  1  3   4   9  1  0  2
## 18     West Ham Utd  7 1 1 5  8 17  -9   4 0.57  1  1  2   4   8  0  0  3
## 19       Stoke City  7 0 3 4  5 16 -11   3 0.43  0  1  2   2   9  0  2  2
## 20       Sunderland  7 0 2 5  6 13  -7   2 0.29  0  1  3   4   9  0  1  2
##    GFa GAa  X
## 1    9   5 NA
## 2    7   2 NA
## 3    8   2 NA
## 4    9   8 NA
## 5    5   2 NA
## 6    5   4 NA
## 7    6   6 NA
## 8    6   5 NA
## 9    3   3 NA
## 10   4   4 NA
## 11   5   5 NA
## 12   3  10 NA
## 13   3   8 NA
## 14   0   6 NA
## 15   4   6 NA
## 16   4   5 NA
## 17   2   3 NA
## 18   4   9 NA
## 19   3   7 NA
## 20   2   4 NA

Find Column Names

names(epl)
##  [1] "Team" "P"    "W"    "D"    "L"    "GF"   "GA"   "GD"   "Pts"  "PPG" 
## [11] "Wh"   "Dh"   "Lh"   "GFh"  "GAh"  "Wa"   "Da"   "La"   "GFa"  "GAa" 
## [21] "X"

Select required columns and rename

epl1<- epl %>%
select(Team, GFh, GAh, GFa, GAa) %>%
rename(team = Team, goal_for_home = GFh, goal_against_home = GAh, goal_for_away = GFa, goal_against_away = GAa)
epl1
##                 team goal_for_home goal_against_home goal_for_away
## 1   Manchester City              9                 2             9
## 2         Tottenham              5                 1             7
## 3           Arsenal              8                 5             8
## 4         Liverpool              9                 2             9
## 5           Everton              6                 3             5
## 6    Manchester Utd              8                 4             5
## 7           Chelsea              6                 3             6
## 8    Crystal Palace              5                 3             6
## 9     West Bromwich              5                 4             3
## 10      Southampton              3                 2             4
## 11          Watford              7                 8             5
## 12   Leicester City              5                 1             3
## 13      Bournemouth              3                 3             3
## 14          Burnley              5                 3             0
## 15        Hull City              3                 8             4
## 16    Middlesbrough              3                 5             4
## 17     Swansea City              4                 9             2
## 18     West Ham Utd              4                 8             4
## 19       Stoke City              2                 9             3
## 20       Sunderland              4                 9             2
##    goal_against_away
## 1                  5
## 2                  2
## 3                  2
## 4                  8
## 5                  2
## 6                  4
## 7                  6
## 8                  5
## 9                  3
## 10                 4
## 11                 5
## 12                10
## 13                 8
## 14                 6
## 15                 6
## 16                 5
## 17                 3
## 18                 9
## 19                 7
## 20                 4

Summary analysis

summary(epl1)
##      team           goal_for_home  goal_against_home goal_for_away
##  Length:20          Min.   :2.00   Min.   :1.00      Min.   :0.0  
##  Class :character   1st Qu.:3.75   1st Qu.:2.75      1st Qu.:3.0  
##  Mode  :character   Median :5.00   Median :3.50      Median :4.0  
##                     Mean   :5.20   Mean   :4.60      Mean   :4.6  
##                     3rd Qu.:6.25   3rd Qu.:8.00      3rd Qu.:6.0  
##                     Max.   :9.00   Max.   :9.00      Max.   :9.0  
##  goal_against_away
##  Min.   : 2.00    
##  1st Qu.: 3.75    
##  Median : 5.00    
##  Mean   : 5.20    
##  3rd Qu.: 6.25    
##  Max.   :10.00

Tidy data, convert form wide to long

epl_tidy <- gather(epl1,"type", "goals", 2:5)
epl_tidy
##                 team              type goals
## 1   Manchester City      goal_for_home     9
## 2         Tottenham      goal_for_home     5
## 3           Arsenal      goal_for_home     8
## 4         Liverpool      goal_for_home     9
## 5           Everton      goal_for_home     6
## 6    Manchester Utd      goal_for_home     8
## 7           Chelsea      goal_for_home     6
## 8    Crystal Palace      goal_for_home     5
## 9     West Bromwich      goal_for_home     5
## 10      Southampton      goal_for_home     3
## 11          Watford      goal_for_home     7
## 12   Leicester City      goal_for_home     5
## 13      Bournemouth      goal_for_home     3
## 14          Burnley      goal_for_home     5
## 15        Hull City      goal_for_home     3
## 16    Middlesbrough      goal_for_home     3
## 17     Swansea City      goal_for_home     4
## 18     West Ham Utd      goal_for_home     4
## 19       Stoke City      goal_for_home     2
## 20       Sunderland      goal_for_home     4
## 21  Manchester City  goal_against_home     2
## 22        Tottenham  goal_against_home     1
## 23          Arsenal  goal_against_home     5
## 24        Liverpool  goal_against_home     2
## 25          Everton  goal_against_home     3
## 26   Manchester Utd  goal_against_home     4
## 27          Chelsea  goal_against_home     3
## 28   Crystal Palace  goal_against_home     3
## 29    West Bromwich  goal_against_home     4
## 30      Southampton  goal_against_home     2
## 31          Watford  goal_against_home     8
## 32   Leicester City  goal_against_home     1
## 33      Bournemouth  goal_against_home     3
## 34          Burnley  goal_against_home     3
## 35        Hull City  goal_against_home     8
## 36    Middlesbrough  goal_against_home     5
## 37     Swansea City  goal_against_home     9
## 38     West Ham Utd  goal_against_home     8
## 39       Stoke City  goal_against_home     9
## 40       Sunderland  goal_against_home     9
## 41  Manchester City      goal_for_away     9
## 42        Tottenham      goal_for_away     7
## 43          Arsenal      goal_for_away     8
## 44        Liverpool      goal_for_away     9
## 45          Everton      goal_for_away     5
## 46   Manchester Utd      goal_for_away     5
## 47          Chelsea      goal_for_away     6
## 48   Crystal Palace      goal_for_away     6
## 49    West Bromwich      goal_for_away     3
## 50      Southampton      goal_for_away     4
## 51          Watford      goal_for_away     5
## 52   Leicester City      goal_for_away     3
## 53      Bournemouth      goal_for_away     3
## 54          Burnley      goal_for_away     0
## 55        Hull City      goal_for_away     4
## 56    Middlesbrough      goal_for_away     4
## 57     Swansea City      goal_for_away     2
## 58     West Ham Utd      goal_for_away     4
## 59       Stoke City      goal_for_away     3
## 60       Sunderland      goal_for_away     2
## 61  Manchester City  goal_against_away     5
## 62        Tottenham  goal_against_away     2
## 63          Arsenal  goal_against_away     2
## 64        Liverpool  goal_against_away     8
## 65          Everton  goal_against_away     2
## 66   Manchester Utd  goal_against_away     4
## 67          Chelsea  goal_against_away     6
## 68   Crystal Palace  goal_against_away     5
## 69    West Bromwich  goal_against_away     3
## 70      Southampton  goal_against_away     4
## 71          Watford  goal_against_away     5
## 72   Leicester City  goal_against_away    10
## 73      Bournemouth  goal_against_away     8
## 74          Burnley  goal_against_away     6
## 75        Hull City  goal_against_away     6
## 76    Middlesbrough  goal_against_away     5
## 77     Swansea City  goal_against_away     3
## 78     West Ham Utd  goal_against_away     9
## 79       Stoke City  goal_against_away     7
## 80       Sunderland  goal_against_away     4

Plot all “for” and “against” goal for both home and away match

ggplot(data = epl_tidy, aes(x = team, y = goals, fill = type))+ geom_bar(stat="identity", position="dodge") + ggtitle("Home and Away Goals") + ylab("Goals") + coord_flip()

##unable to reach a conclusion from the plot, further analysis required

Seperate “for” goals from “against” and create new tables

“for” table for both home and away match with percent scored at home

epl_pct_home_for <- epl1 %>% 
    select(team, total_for_home = sum(goal_for_home), total_for_away = sum(goal_for_away))  %>% 
  mutate(pct_for_home = round(( total_for_home/ (total_for_home + total_for_away)) * 100))
epl_pct_home_for
##                 team total_for_home total_for_away pct_for_home
## 1   Manchester City               9              9           50
## 2         Tottenham               5              7           42
## 3           Arsenal               8              8           50
## 4         Liverpool               9              9           50
## 5           Everton               6              5           55
## 6    Manchester Utd               8              5           62
## 7           Chelsea               6              6           50
## 8    Crystal Palace               5              6           45
## 9     West Bromwich               5              3           62
## 10      Southampton               3              4           43
## 11          Watford               7              5           58
## 12   Leicester City               5              3           62
## 13      Bournemouth               3              3           50
## 14          Burnley               5              0          100
## 15        Hull City               3              4           43
## 16    Middlesbrough               3              4           43
## 17     Swansea City               4              2           67
## 18     West Ham Utd               4              4           50
## 19       Stoke City               2              3           40
## 20       Sunderland               4              2           67

Summary of the “for” table

summary(epl_pct_home_for)
##      team           total_for_home total_for_away  pct_for_home   
##  Length:20          Min.   :2.00   Min.   :0.0    Min.   : 40.00  
##  Class :character   1st Qu.:3.75   1st Qu.:3.0    1st Qu.: 44.50  
##  Mode  :character   Median :5.00   Median :4.0    Median : 50.00  
##                     Mean   :5.20   Mean   :4.6    Mean   : 54.45  
##                     3rd Qu.:6.25   3rd Qu.:6.0    3rd Qu.: 62.00  
##                     Max.   :9.00   Max.   :9.0    Max.   :100.00
nrow(filter(epl_pct_home_for, pct_for_home > 50))
## [1] 8
##8 teams out of 20 scores more than 50% of the goals at home with one team scoring all their goals at home

Plot percentage of “for”" scored goal at home

ggplot(data = epl_pct_home_for, aes(x = team, y = pct_for_home, fill = pct_for_home))+ geom_bar(stat="identity", position="dodge") + ggtitle("For Home Goal Percentage") + ylab("Percent")+ coord_flip()

percentage of total “for” goal scored at home by all team

epl_total_pct_home_for <- epl1 %>% 
    summarise(total_for_home = sum(goal_for_home), total_for_away = sum(goal_for_away))  %>% 
  mutate(pct_for_home = round(( total_for_home/ (total_for_home + total_for_away)) * 100))
epl_total_pct_home_for
##   total_for_home total_for_away pct_for_home
## 1            104             92           53
##53% of the total "for" goals were scored at home

“against” table for both home and away match with percent conceded at home

epl_pct_home_against <- epl1 %>% 
    select(team, total_against_home = sum(goal_against_home), total_against_away = sum(goal_against_away))  %>% 
  mutate(pct_against_home = round(( total_against_home/ (total_against_home + total_against_away)) * 100))
epl_pct_home_against
##                 team total_against_home total_against_away
## 1   Manchester City                   2                  5
## 2         Tottenham                   1                  2
## 3           Arsenal                   5                  2
## 4         Liverpool                   2                  8
## 5           Everton                   3                  2
## 6    Manchester Utd                   4                  4
## 7           Chelsea                   3                  6
## 8    Crystal Palace                   3                  5
## 9     West Bromwich                   4                  3
## 10      Southampton                   2                  4
## 11          Watford                   8                  5
## 12   Leicester City                   1                 10
## 13      Bournemouth                   3                  8
## 14          Burnley                   3                  6
## 15        Hull City                   8                  6
## 16    Middlesbrough                   5                  5
## 17     Swansea City                   9                  3
## 18     West Ham Utd                   8                  9
## 19       Stoke City                   9                  7
## 20       Sunderland                   9                  4
##    pct_against_home
## 1                29
## 2                33
## 3                71
## 4                20
## 5                60
## 6                50
## 7                33
## 8                38
## 9                57
## 10               33
## 11               62
## 12                9
## 13               27
## 14               33
## 15               57
## 16               50
## 17               75
## 18               47
## 19               56
## 20               69

Summary of the “against” table

summary(epl_pct_home_against)
##      team           total_against_home total_against_away pct_against_home
##  Length:20          Min.   :1.00       Min.   : 2.00      Min.   : 9.00   
##  Class :character   1st Qu.:2.75       1st Qu.: 3.75      1st Qu.:33.00   
##  Mode  :character   Median :3.50       Median : 5.00      Median :48.50   
##                     Mean   :4.60       Mean   : 5.20      Mean   :45.45   
##                     3rd Qu.:8.00       3rd Qu.: 6.25      3rd Qu.:57.75   
##                     Max.   :9.00       Max.   :10.00      Max.   :75.00
nrow(filter(epl_pct_home_against, pct_against_home < 50))
## [1] 10
##10 out of 20 or 50% of the teams conceded less than 50% of the goals at home

Plot percentage of “against” goal conceded at home

ggplot(data = epl_pct_home_against, aes(x = team, y = pct_against_home, fill = pct_against_home))+ geom_bar(stat="identity", position="dodge") + ggtitle("For Home Goal Percentage") + ylab("Percent")+ coord_flip()

epl_total_pct_home_against <- epl1 %>% 
    summarise(total_against_home = sum(goal_against_home), total_against_away = sum(goal_against_away))  %>% 
  mutate(pct_against_home = round(( total_against_home/ (total_against_home + total_against_away)) * 100))
epl_total_pct_home_against
##   total_against_home total_against_away pct_against_home
## 1                 92                104               47
##47% of the against goals were conceded at home

conclusion: From the data set we can not reach to a strong conclusion that playing at home has significant advatage for the home team

Dataset 2

Citizenship provided by regions, source provided by Jose Zuniga

[Source: Tips for Simplifying Crosstab Query Statements, Rob Gravelle, Database Journal, 2010}

Objective: to compare monthly citizenship for the given regions

##.csv was created and uploaded to Github

Load Data

citizenship <- read.csv("https://raw.githubusercontent.com/choudhury1023/Data-607/gh-pages/monthly_citizenship.csv", header = TRUE, stringsAsFactors = FALSE)
citizenship
##        Month REGION.1 REGION.2 REGION.3 REGION.4 REGION.5 TOTAL
## 1      April       13       33       76        2       47   171
## 2        May       17       55      209        1      143   425
## 3       June        8       63      221        1      127   420
## 4       July       13      104      240        6      123   486
## 5     August       18      121      274        9      111   533
## 6  September       25      160      239        2       88   514
## 7    October        9       88      295        2      127   521
## 8   November        2       86      292        2      120   502
## 9   December        1      128      232        6      155   522
## 10     TOTAL      106      838     2078       31     1041  4094

Tidy wide format data and convert to long format

tidy_citizenship <- citizenship %>%
  gather("region","month_total",2:6) %>%
  select(Month, region, month_total,TOTAL)
tidy_citizenship
##        Month   region month_total TOTAL
## 1      April REGION.1          13   171
## 2        May REGION.1          17   425
## 3       June REGION.1           8   420
## 4       July REGION.1          13   486
## 5     August REGION.1          18   533
## 6  September REGION.1          25   514
## 7    October REGION.1           9   521
## 8   November REGION.1           2   502
## 9   December REGION.1           1   522
## 10     TOTAL REGION.1         106  4094
## 11     April REGION.2          33   171
## 12       May REGION.2          55   425
## 13      June REGION.2          63   420
## 14      July REGION.2         104   486
## 15    August REGION.2         121   533
## 16 September REGION.2         160   514
## 17   October REGION.2          88   521
## 18  November REGION.2          86   502
## 19  December REGION.2         128   522
## 20     TOTAL REGION.2         838  4094
## 21     April REGION.3          76   171
## 22       May REGION.3         209   425
## 23      June REGION.3         221   420
## 24      July REGION.3         240   486
## 25    August REGION.3         274   533
## 26 September REGION.3         239   514
## 27   October REGION.3         295   521
## 28  November REGION.3         292   502
## 29  December REGION.3         232   522
## 30     TOTAL REGION.3        2078  4094
## 31     April REGION.4           2   171
## 32       May REGION.4           1   425
## 33      June REGION.4           1   420
## 34      July REGION.4           6   486
## 35    August REGION.4           9   533
## 36 September REGION.4           2   514
## 37   October REGION.4           2   521
## 38  November REGION.4           2   502
## 39  December REGION.4           6   522
## 40     TOTAL REGION.4          31  4094
## 41     April REGION.5          47   171
## 42       May REGION.5         143   425
## 43      June REGION.5         127   420
## 44      July REGION.5         123   486
## 45    August REGION.5         111   533
## 46 September REGION.5          88   514
## 47   October REGION.5         127   521
## 48  November REGION.5         120   502
## 49  December REGION.5         155   522
## 50     TOTAL REGION.5        1041  4094

Making Month an ordered factor so that ggplot does not rearrnge

tidy_citizenship$Month <- factor(tidy_citizenship$Month, levels = tidy_citizenship$Month)
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

Plot data

ggplot(data = tidy_citizenship, aes(x = region, y = month_total, fill = Month))+ geom_bar(stat="identity", position="dodge") + ggtitle("Citizenship by Month") + ylab("Citizenship")
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

Region 3 issues the most citizenzip by far and region 4 issues the least.

Another plot

ggplot(data = tidy_citizenship, aes(x = Month, y = TOTAL, fill = month_total)) + geom_bar(stat="identity", position="dodge") + ggtitle("Citizenship by Month") + ylab("Citizenship")
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

Citizenship issuance is consitant in all five region troughout the given month hovering aroud 420 to 533 with the exception of month of April where citizenship issuance was the least (171).

Dataset 3

Income by Religion, source provided by Marco Siqueira Campos

Source: http://www.pewforum.org/religious?landscape?study/incomedistribution/

Objective: Analyse the income by religion

##.csv was created and uploaded to Github

Load Data

income <- read.csv("https://raw.githubusercontent.com/choudhury1023/Data-607/gh-pages/Income_distribution_by_religious_group.csv", header = TRUE, stringsAsFactors = FALSE)
income
##                 Religious.tradition Less.than..30.000 X.30.000..49.999
## 1                          Buddhist               36%              18%
## 2                          Catholic               36%              19%
## 3            Evangelical Protestant               35%              22%
## 4                             Hindu               17%              13%
## 5     Historically Black Protestant               53%              22%
## 6                 Jehovah's Witness               48%              25%
## 7                            Jewish               16%              15%
## 8               Mainline Protestant               29%              20%
## 9                            Mormon               27%              20%
## 10                           Muslim               34%              17%
## 11               Orthodox Christian               18%              17%
## 12 Unaffiliated (religious "nones")               33%              20%
##    X.50.000..99.999 X.100.000.or.more Sample.Size
## 1               32%               13%         233
## 2               26%               19%       6,137
## 3               28%               14%       7,462
## 4               34%               36%         172
## 5               17%                8%       1,704
## 6               22%                4%         208
## 7               24%               44%         708
## 8               28%               23%       5,208
## 9               33%               20%         594
## 10              29%               20%         205
## 11              36%               29%         155
## 12              26%               21%       6,790

Get names of the column

names(income)
## [1] "Religious.tradition" "Less.than..30.000"   "X.30.000..49.999"   
## [4] "X.50.000..99.999"    "X.100.000.or.more"   "Sample.Size"

Rename Columns

income1 <-rename(income, religion = Religious.tradition,smple_size = Sample.Size)
income1
##                            religion Less.than..30.000 X.30.000..49.999
## 1                          Buddhist               36%              18%
## 2                          Catholic               36%              19%
## 3            Evangelical Protestant               35%              22%
## 4                             Hindu               17%              13%
## 5     Historically Black Protestant               53%              22%
## 6                 Jehovah's Witness               48%              25%
## 7                            Jewish               16%              15%
## 8               Mainline Protestant               29%              20%
## 9                            Mormon               27%              20%
## 10                           Muslim               34%              17%
## 11               Orthodox Christian               18%              17%
## 12 Unaffiliated (religious "nones")               33%              20%
##    X.50.000..99.999 X.100.000.or.more smple_size
## 1               32%               13%        233
## 2               26%               19%      6,137
## 3               28%               14%      7,462
## 4               34%               36%        172
## 5               17%                8%      1,704
## 6               22%                4%        208
## 7               24%               44%        708
## 8               28%               23%      5,208
## 9               33%               20%        594
## 10              29%               20%        205
## 11              36%               29%        155
## 12              26%               21%      6,790
names(income1)[2] <- "<30k"
names(income1)[3] <- "30k-49,999"
names(income1)[4] <- "50k-99.999"
names(income1)[5] <- "100k+"
income1
##                            religion <30k 30k-49,999 50k-99.999 100k+
## 1                          Buddhist  36%        18%        32%   13%
## 2                          Catholic  36%        19%        26%   19%
## 3            Evangelical Protestant  35%        22%        28%   14%
## 4                             Hindu  17%        13%        34%   36%
## 5     Historically Black Protestant  53%        22%        17%    8%
## 6                 Jehovah's Witness  48%        25%        22%    4%
## 7                            Jewish  16%        15%        24%   44%
## 8               Mainline Protestant  29%        20%        28%   23%
## 9                            Mormon  27%        20%        33%   20%
## 10                           Muslim  34%        17%        29%   20%
## 11               Orthodox Christian  18%        17%        36%   29%
## 12 Unaffiliated (religious "nones")  33%        20%        26%   21%
##    smple_size
## 1         233
## 2       6,137
## 3       7,462
## 4         172
## 5       1,704
## 6         208
## 7         708
## 8       5,208
## 9         594
## 10        205
## 11        155
## 12      6,790
##I was having problem renaming the column using dplyr, so had use basic r function for part of the renaming

Tidy data, convert from long format and getting rid of “%” symbol

tidy_income <- income1 %>%
gather("income_bracket", "percentage_raw", 2:5 )%>%
mutate(percentage = as.numeric(gsub("%", "", percentage_raw)))%>%
select(religion, income_bracket, percentage)
tidy_income
##                            religion income_bracket percentage
## 1                          Buddhist           <30k         36
## 2                          Catholic           <30k         36
## 3            Evangelical Protestant           <30k         35
## 4                             Hindu           <30k         17
## 5     Historically Black Protestant           <30k         53
## 6                 Jehovah's Witness           <30k         48
## 7                            Jewish           <30k         16
## 8               Mainline Protestant           <30k         29
## 9                            Mormon           <30k         27
## 10                           Muslim           <30k         34
## 11               Orthodox Christian           <30k         18
## 12 Unaffiliated (religious "nones")           <30k         33
## 13                         Buddhist     30k-49,999         18
## 14                         Catholic     30k-49,999         19
## 15           Evangelical Protestant     30k-49,999         22
## 16                            Hindu     30k-49,999         13
## 17    Historically Black Protestant     30k-49,999         22
## 18                Jehovah's Witness     30k-49,999         25
## 19                           Jewish     30k-49,999         15
## 20              Mainline Protestant     30k-49,999         20
## 21                           Mormon     30k-49,999         20
## 22                           Muslim     30k-49,999         17
## 23               Orthodox Christian     30k-49,999         17
## 24 Unaffiliated (religious "nones")     30k-49,999         20
## 25                         Buddhist     50k-99.999         32
## 26                         Catholic     50k-99.999         26
## 27           Evangelical Protestant     50k-99.999         28
## 28                            Hindu     50k-99.999         34
## 29    Historically Black Protestant     50k-99.999         17
## 30                Jehovah's Witness     50k-99.999         22
## 31                           Jewish     50k-99.999         24
## 32              Mainline Protestant     50k-99.999         28
## 33                           Mormon     50k-99.999         33
## 34                           Muslim     50k-99.999         29
## 35               Orthodox Christian     50k-99.999         36
## 36 Unaffiliated (religious "nones")     50k-99.999         26
## 37                         Buddhist          100k+         13
## 38                         Catholic          100k+         19
## 39           Evangelical Protestant          100k+         14
## 40                            Hindu          100k+         36
## 41    Historically Black Protestant          100k+          8
## 42                Jehovah's Witness          100k+          4
## 43                           Jewish          100k+         44
## 44              Mainline Protestant          100k+         23
## 45                           Mormon          100k+         20
## 46                           Muslim          100k+         20
## 47               Orthodox Christian          100k+         29
## 48 Unaffiliated (religious "nones")          100k+         21

Making income_bracket an ordered factor so that ggplot does not rearrnge

tidy_income$income_bracket <- factor(tidy_income$income_bracket, levels = tidy_income$income_bracket)
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

plot income data by religion

ggplot(data = tidy_income, aes(x = income_bracket, y = percentage, fill = religion))+ geom_bar(stat="identity", position="dodge") + ggtitle("Income by religion") + ylab("percentage")
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

plot income data by religion <30k

ggplot(data = filter(tidy_income, income_bracket %in% c("<30k")), aes(x = income_bracket, y = percentage, fill = religion))+ geom_bar(stat="identity", position="dodge") + ggtitle("Income by religion") + ylab("percentage")
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

Historically Black Protestant has the most representation and Jewish has the least rpresentation in under 30k bracket

plot income data by religion 30k-49,999

ggplot(data = filter(tidy_income, income_bracket %in% c("30k-49,999")), aes(x = income_bracket, y = percentage, fill = religion))+ geom_bar(stat="identity", position="dodge") + ggtitle("Income by religion") + ylab("percentage")
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

Jehovah’s Witness has the most representation and Hindu has the least representaton in 30k-49,999 bracket

plot income data by religion 50k-99.999

ggplot(data = filter(tidy_income, income_bracket %in% c("50k-99.999")), aes(x = income_bracket, y = percentage, fill = religion))+ geom_bar(stat="identity", position="dodge") + ggtitle("Income by religion") + ylab("percentage")
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

Orthodox Christian has the most representation and Historically Black Protestant has the most representation in 50k-99,99 bracket

plot income data by religion 100k+

ggplot(data = filter(tidy_income, income_bracket %in% c("100k+")), aes(x = income_bracket, y = percentage, fill = religion))+ geom_bar(stat="identity", position="dodge") + ggtitle("Income by religion") + ylab("percentage")
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

Jewish has the most representation and Jehovah’s Witness has the least representation in 100k+ bracket

Conclusion:In the population survayed Historically Black Protestant religious tradition followers has the least income and Jewish religious tradition follower has the most income