Choose any three of the “wide” datasets identified in the Week 6 Discussion items.
Dataset Used: Religion Income
Suggested analysis: Analyze the income by religion
library(tidyr)
library(dplyr)
library(stringr)
library(knitr)
library(DT)
rawdata = read.csv("https://raw.githubusercontent.com/L-Velasco/Fall16_IS607/master/Project%202_Religion_Income.csv", stringsAsFactors = FALSE)
Prefix the different income types to maintain the hierarchy
names(rawdata) <- c("Religion","1. Less than $30,000","2. $30,000-$49,999","3. $50,000-$99,999","4. $100,000 or more","Sample_Size")
datatable(rawdata)
Used select() to exclude the Sample Size variable, gather() to make the different types of income be part of observation and arrange() to sort the observations in descending order according to percentage per income type.
tidydata <- rawdata %>%
select(-Sample_Size) %>%
gather("Income","pct",2:5) %>%
arrange(Income, desc(as.numeric(str_replace_all(pct,"%",""))))
## Warning in combine_vars(vars, ind_list): '.Random.seed' is not an integer
## vector but of type 'NULL', so ignored
datatable(tidydata)
#1. Group by income type, what religions have the highest and lowest percentage income?
income_data <- tidydata %>%
group_by(Income) %>%
slice(c(1,n())) %>%
mutate(High_Low = c("Highest","Lowest"), Rel = str_c(Religion,pct, sep=" ")) %>%
select(Income, Rel, High_Low) %>%
spread(High_Low, Rel)
kable(income_data)
| Income | Highest | Lowest |
|---|---|---|
| 1. Less than $30,000 | Historically Black Protestant 53% | Jewish 16% |
| 2. $30,000-$49,999 | Jehovah’s Witness 25% | Hindu 13% |
| 3. $50,000-$99,999 | Orthodox Christian 36% | Historically Black Protestant 17% |
| 4. $100,000 or more | Jewish 44% | Jehovah’s Witness 4% |
#2. What Religion/s have the highest and lowest percentage overall?
income_data_overall <- tidydata %>%
slice(c(1,n())) %>%
select(Religion, pct, Income)
income_data_overall$Income <- str_replace(income_data_overall$Income,str_sub(income_data_overall$Income,1,3),"")
kable(income_data_overall)
| Religion | pct | Income |
|---|---|---|
| Historically Black Protestant | 53% | Less than $30,000 |
| Jehovah’s Witness | 4% | $100,000 or more |
#3. Group by religion, what income types have the highest and lowest percentage?
religion_data <- tidydata %>%
arrange(Religion, desc(as.numeric(str_replace_all(pct,"%",""))),desc(Income)) %>%
group_by(Religion) %>%
slice(c(1,n()))
religion_data$Income <- str_replace(religion_data$Income,str_sub(religion_data$Income,1,3),"")
religion_data <- religion_data %>%
mutate(High_Low = c("Highest","Lowest"), Inc = str_c(pct,"in",Income, sep=" ")) %>%
select(Religion, Inc, High_Low) %>%
spread(High_Low, Inc)
kable(religion_data)
| Religion | Highest | Lowest |
|---|---|---|
| Buddhist | 36% in Less than $30,000 | 13% in $100,000 or more |
| Catholic | 36% in Less than $30,000 | 19% in $30,000-$49,999 |
| Evangelical Protestant | 35% in Less than $30,000 | 14% in $100,000 or more |
| Hindu | 36% in $100,000 or more | 13% in $30,000-$49,999 |
| Historically Black Protestant | 53% in Less than $30,000 | 8% in $100,000 or more |
| Jehovah’s Witness | 48% in Less than $30,000 | 4% in $100,000 or more |
| Jewish | 44% in $100,000 or more | 15% in $30,000-$49,999 |
| Mainline Protestant | 29% in Less than $30,000 | 20% in $30,000-$49,999 |
| Mormon | 33% in $50,000-$99,999 | 20% in $30,000-$49,999 |
| Muslim | 34% in Less than $30,000 | 17% in $30,000-$49,999 |
| Orthodox Christian | 36% in $50,000-$99,999 | 17% in $30,000-$49,999 |
| Unaffiliated (religious “nones”) | 33% in Less than $30,000 | 20% in $30,000-$49,999 |