Description of the Project

Choose any three of the “wide” datasets identified in the Week 6 Discussion items.

Dataset Used: Religion Income

Suggested analysis: Analyze the income by religion

Steps

Attach libraries, load and show raw data

library(tidyr)
library(dplyr)
library(stringr)
library(knitr)
library(DT)
rawdata = read.csv("https://raw.githubusercontent.com/L-Velasco/Fall16_IS607/master/Project%202_Religion_Income.csv", stringsAsFactors = FALSE)

Prefix the different income types to maintain the hierarchy

names(rawdata) <- c("Religion","1. Less than $30,000","2. $30,000-$49,999","3. $50,000-$99,999","4. $100,000 or more","Sample_Size")

datatable(rawdata)

Tidy the data

Used select() to exclude the Sample Size variable, gather() to make the different types of income be part of observation and arrange() to sort the observations in descending order according to percentage per income type.

tidydata <- rawdata %>%
  select(-Sample_Size) %>% 
  gather("Income","pct",2:5) %>% 
  arrange(Income, desc(as.numeric(str_replace_all(pct,"%",""))))
## Warning in combine_vars(vars, ind_list): '.Random.seed' is not an integer
## vector but of type 'NULL', so ignored
datatable(tidydata)

Transform To Analyze the data


#1. Group by income type, what religions have the highest and lowest percentage income?

income_data <- tidydata %>%
  group_by(Income) %>% 
  slice(c(1,n())) %>%
  mutate(High_Low = c("Highest","Lowest"), Rel = str_c(Religion,pct, sep=" ")) %>% 
  select(Income, Rel, High_Low) %>% 
  spread(High_Low, Rel)
kable(income_data)
Income Highest Lowest
1. Less than $30,000 Historically Black Protestant 53% Jewish 16%
2. $30,000-$49,999 Jehovah’s Witness 25% Hindu 13%
3. $50,000-$99,999 Orthodox Christian 36% Historically Black Protestant 17%
4. $100,000 or more Jewish 44% Jehovah’s Witness 4%


#2. What Religion/s have the highest and lowest percentage overall?

income_data_overall <- tidydata %>%
  slice(c(1,n())) %>% 
  select(Religion, pct, Income)
income_data_overall$Income <- str_replace(income_data_overall$Income,str_sub(income_data_overall$Income,1,3),"")
kable(income_data_overall)
Religion pct Income
Historically Black Protestant 53% Less than $30,000
Jehovah’s Witness 4% $100,000 or more


#3. Group by religion, what income types have the highest and lowest percentage?

religion_data <- tidydata %>%
  arrange(Religion, desc(as.numeric(str_replace_all(pct,"%",""))),desc(Income)) %>% 
  group_by(Religion) %>% 
  slice(c(1,n()))

religion_data$Income <- str_replace(religion_data$Income,str_sub(religion_data$Income,1,3),"")

religion_data <- religion_data %>%
  mutate(High_Low = c("Highest","Lowest"), Inc = str_c(pct,"in",Income, sep=" ")) %>% 
  select(Religion, Inc, High_Low) %>%  
  spread(High_Low, Inc)

kable(religion_data)
Religion Highest Lowest
Buddhist 36% in Less than $30,000 13% in $100,000 or more
Catholic 36% in Less than $30,000 19% in $30,000-$49,999
Evangelical Protestant 35% in Less than $30,000 14% in $100,000 or more
Hindu 36% in $100,000 or more 13% in $30,000-$49,999
Historically Black Protestant 53% in Less than $30,000 8% in $100,000 or more
Jehovah’s Witness 48% in Less than $30,000 4% in $100,000 or more
Jewish 44% in $100,000 or more 15% in $30,000-$49,999
Mainline Protestant 29% in Less than $30,000 20% in $30,000-$49,999
Mormon 33% in $50,000-$99,999 20% in $30,000-$49,999
Muslim 34% in Less than $30,000 17% in $30,000-$49,999
Orthodox Christian 36% in $50,000-$99,999 17% in $30,000-$49,999
Unaffiliated (religious “nones”) 33% in Less than $30,000 20% in $30,000-$49,999