library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1     v purrr   0.2.4
## v tibble  1.4.1     v dplyr   0.7.4
## v tidyr   0.7.2     v stringr 1.2.0
## v readr   1.1.1     v forcats 0.2.0
## -- Conflicts ---------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(tidyr)
library(dplyr)
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
`Sustainability Center-dirty` <- read.csv("C:/LocalFiles/Documents/Freshman TSU/STAT-220/HW 4/Sustainability Center-dirty.csv")
View(`Sustainability Center-dirty`)
`Sustainability Center-clean` <- `Sustainability Center-dirty`
View(`Sustainability Center-clean`)

Turn all ordinal variables into numbers and all Binary variables into zeroes and ones.

Ordinal: Turned into numbers

`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, School=ifelse(X=="HSE", 1, ifelse(X=="SAM", 2, ifelse(X=="BUS", 3, ifelse(X=="SCS", 4, ifelse(X=="SAL", 5, ifelse(X=="IDS", 6, 9)))))))

HSE – 1; SAM – 2; BUS – 3; SCS – 4; SAL – 5; IDS – 6; other – 9

`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, `What.grade.level.are.you...Answers.should.be.based.on.number.of.years.at.Truman..not.credit.hours.completed..`=ifelse(`What.grade.level.are.you...Answers.should.be.based.on.number.of.years.at.Truman..not.credit.hours.completed..`=="First-year ", 1, ifelse(`What.grade.level.are.you...Answers.should.be.based.on.number.of.years.at.Truman..not.credit.hours.completed..`=="Second-year", 2, ifelse(`What.grade.level.are.you...Answers.should.be.based.on.number.of.years.at.Truman..not.credit.hours.completed..`=="Third-year", 3, ifelse(`What.grade.level.are.you...Answers.should.be.based.on.number.of.years.at.Truman..not.credit.hours.completed..`=="Fourth-year", 4, ifelse(`What.grade.level.are.you...Answers.should.be.based.on.number.of.years.at.Truman..not.credit.hours.completed..`=="more than 4 years at Truman", 5,9))))))

First-year – 1; Second-year – 2; Third-year – 3; Fourth-year – 4; more than 4 years at Truman – 5

`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, `Rate.how.confident.you.are.in.your.understanding.of.sustainability.Ã.Â.`=ifelse(`Rate.how.confident.you.are.in.your.understanding.of.sustainability.Ã.Â.`=="Not Confident at All", 1, ifelse(`Rate.how.confident.you.are.in.your.understanding.of.sustainability.Ã.Â.`=="Slightly Confident", 2, ifelse(`Rate.how.confident.you.are.in.your.understanding.of.sustainability.Ã.Â.`=="Neutral", 3, ifelse(`Rate.how.confident.you.are.in.your.understanding.of.sustainability.Ã.Â.`=="Confident", 4, ifelse(`Rate.how.confident.you.are.in.your.understanding.of.sustainability.Ã.Â.`=="Extremely Confident", 5,9))))))

1 – Not Confident at All; 2 – Slightly Confident; 3 – Neutral; 4 – Confident; 5 – Extremely Confident

`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, `If.you.checked.any.activities.on.the.previous.question..How.effective.were.these.activities.in.raising.your.awareness.of.sustainability.`=ifelse(`If.you.checked.any.activities.on.the.previous.question..How.effective.were.these.activities.in.raising.your.awareness.of.sustainability.`=="Completely Ineffective", 1, ifelse(`If.you.checked.any.activities.on.the.previous.question..How.effective.were.these.activities.in.raising.your.awareness.of.sustainability.`=="Ineffective", 2, ifelse(`If.you.checked.any.activities.on.the.previous.question..How.effective.were.these.activities.in.raising.your.awareness.of.sustainability.`=="Neutral", 3, ifelse(`If.you.checked.any.activities.on.the.previous.question..How.effective.were.these.activities.in.raising.your.awareness.of.sustainability.`=="Neutrale", 3, ifelse(`If.you.checked.any.activities.on.the.previous.question..How.effective.were.these.activities.in.raising.your.awareness.of.sustainability.`=="Effective", 4, ifelse(`If.you.checked.any.activities.on.the.previous.question..How.effective.were.these.activities.in.raising.your.awareness.of.sustainability.`=="Completely Effective", 5,0)))))))

1 – Completely Ineffective; 2 – Ineffective; 3 – Neutral/e; 4 – Effective; 5 – Completely Effective

`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, `How.much.would.you.be.willing.to.increase.tuition.rate..per.semester..in.order.to.pay.for.a.physical.location.for.a.Student.Sustainability.Office.`=ifelse(`How.much.would.you.be.willing.to.increase.tuition.rate..per.semester..in.order.to.pay.for.a.physical.location.for.a.Student.Sustainability.Office.`=="Not willing at all", 1, ifelse(`How.much.would.you.be.willing.to.increase.tuition.rate..per.semester..in.order.to.pay.for.a.physical.location.for.a.Student.Sustainability.Office.`=="$1-$10", 2, ifelse(`How.much.would.you.be.willing.to.increase.tuition.rate..per.semester..in.order.to.pay.for.a.physical.location.for.a.Student.Sustainability.Office.`=="$11-$20", 3, ifelse(`How.much.would.you.be.willing.to.increase.tuition.rate..per.semester..in.order.to.pay.for.a.physical.location.for.a.Student.Sustainability.Office.`=="$21-$30", 4, ifelse(`How.much.would.you.be.willing.to.increase.tuition.rate..per.semester..in.order.to.pay.for.a.physical.location.for.a.Student.Sustainability.Office.`=="anything above $30", 5, 0))))))

1 – Not willing at all; 2 – $1-10; 3 – $11-20; 4 – $21-30; 5 – anything above $30

Binary: Make into zeros and ones

`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, What.is.your.gender. = as.numeric(factor(What.is.your.gender.)))

1 – Female; 2 – Male; 3 – other

`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, Would.you.be.interested.in.working.in.a.student.sustainability.office. = as.numeric(factor(Would.you.be.interested.in.working.in.a.student.sustainability.office.)))

1 – no; 2 – yes

`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, Would.you.prefer.a.physical.office.or.an.online.office. = as.numeric(factor(Would.you.prefer.a.physical.office.or.an.online.office.)))

1 – no preference; 2 – online office; 3 – physical office

`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, Check.all.activities.that.youÃ.â..â..ve.heard.of.occurring.at.Truman_Sustainability.week.sustainability.day = as.numeric(factor(Check.all.activities.that.youÃ.â..â..ve.heard.of.occurring.at.Truman_Sustainability.week.sustainability.day)))
`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, Check.all.activities.that.youÃ.â..â..ve.heard.of.occurring.at.Truman_Earth.week = as.numeric(factor(Check.all.activities.that.youÃ.â..â..ve.heard.of.occurring.at.Truman_Earth.week)))
`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, Check.all.activities.that.youÃ.â..â..ve.heard.of.occurring.at.Truman_Green.thumb.project..garden.to.table.project. = as.numeric(factor(Check.all.activities.that.youÃ.â..â..ve.heard.of.occurring.at.Truman_Green.thumb.project..garden.to.table.project.)))
`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, Check.all.activities.that.youÃ.â..â..ve.heard.of.occurring.at.Truman_Bike.co.op = as.numeric(factor(Check.all.activities.that.youÃ.â..â..ve.heard.of.occurring.at.Truman_Bike.co.op)))
`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, Check.all.activities.that.youÃ.â..â..ve.heard.of.occurring.at.Truman_Environmental.studies.conference = as.numeric(factor(Check.all.activities.that.youÃ.â..â..ve.heard.of.occurring.at.Truman_Environmental.studies.conference)))
`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, Check.all.activities.that.youÃ.â..â..ve.heard.of.occurring.at.Truman_Recyclemania = as.numeric(factor(Check.all.activities.that.youÃ.â..â..ve.heard.of.occurring.at.Truman_Recyclemania)))
`Sustainability Center-clean` <- mutate(`Sustainability Center-clean`, Check.all.activities.that.youÃ.â..â..ve.heard.of.occurring.at.Truman_Environmental.buddies = as.numeric(factor(Check.all.activities.that.youÃ.â..â..ve.heard.of.occurring.at.Truman_Environmental.buddies)))

0 means they have not heard of the item in question; 1 means they have heard of the question

Make a single column for BirthMonth/Year by combining the two columns you have in a meaningful way.

`Sustainability Center-clean` <- unite(`Sustainability Center-clean`, `Bday.Month`, `BDay.Year`, col="Birthday", sep ="/")

School is a categorical variable, but since students could check more than one, it is even harder to work with. Clean this up in two directions. Make a single column (“School” that uses a 1-6 scale for those who reported a single school, and a 9 for those who answered with multiple schools.

See above

Make a column for each school (six in all, each named “HSE-School” or whatever), each as a binary variable (1=yes, 0 = no).

`Sustainability Center-clean`<-mutate(`Sustainability Center-clean`, HSESchool = ifelse(School == 1, 1, 0))
View(`Sustainability Center-clean`)

`Sustainability Center-clean`<-mutate(`Sustainability Center-clean`, SAMSchool = ifelse(School == 2, 1, 0))
View(`Sustainability Center-clean`)

`Sustainability Center-clean`<-mutate(`Sustainability Center-clean`, BUSSchool = ifelse(School == 3, 1, 0))
View(`Sustainability Center-clean`)

`Sustainability Center-clean`<-mutate(`Sustainability Center-clean`, SCSSchool = ifelse(School == 4, 1, 0))
View(`Sustainability Center-clean`)

`Sustainability Center-clean`<-mutate(`Sustainability Center-clean`, SALSchool = ifelse(School == 5, 1, 0))
View(`Sustainability Center-clean`)

`Sustainability Center-clean`<-mutate(`Sustainability Center-clean`, IDSSchool = ifelse(School == 6, 1, 0))
View(`Sustainability Center-clean`)

`Sustainability Center-clean`<-mutate(`Sustainability Center-clean`, OtherSchool = ifelse(School == 9, 1, 0))
View(`Sustainability Center-clean`)

Make an RMarkdown file that shows these visualizations: Using your new “School” column, make a chart of the seven categories. Using your multiple columns for school, make a simple frequency chart of the percent who said that they were enrolled in each school.

crosstab(`Sustainability Center-clean`, row.vars="School", type="f")
##  School Count
##       1    36
##       2    43
##       3    22
##       4    29
##       5    22
##       6     5
##       9    25
##     Sum   182
# --OR--

Tab1 <-  table(`Sustainability Center-clean`$School)
View(Tab1)
Tab2 <- prop.table(Tab1)
View(Tab2)
Tab3 <- margin.table(Tab1,1)
View(Tab3)

1 – HSE 2 – SAM 3 – BUS 4 – SCS 5 – SAL 6 – IDS 9 – Other/Combination

Export Clean Dataset

#write.csv(`Sustainability Center-clean`, file="STAT220hw4-tidy.csv")
#write.csv(`Sustainability Center-dirty`, file="STAT220hw4-raw.csv")

Summary

There were 182 respondants to the question about school, and the majority of them went to either HSE or SAM. This dataset clearly shows other details about the respondants because it is cleaned into mostly binary and ordinal variables. Consequently, it is easier to follow and read.

PART II QUESTIONS

How many individuals reported being both Buddhist and making less than $30,000 per year? (assume they round in the usual way)?

10,292 people reported being both Buddhist (233 people) and making less than $30,000 per year (10,059 people)

What percent of the sample is “Unaffiliated”?

6,790 (number of all unaffiliated) / 29,574 (total of all people sampled) = 22.96%

If you wanted to work with this data in a tidy way, you would need to “melt” the chart according to the principle of “one row = one respondent.” Assuming we were only melting this information (the actual survey is quite long, so they probably have it in a huge relational database).

How many rows would your tidy dataset have?

29,576 (one for each respondent)

How many columns would your tidy dataset have?

16 (each religion and each income bracket)

Write a few sentences explaining how you would make such a thing in R. Do NOT actually do it (I’m not sure you even can).

You would go about creating this in R by first separating all the responses by individual using some form of mutate. Then finding each of their responses under each of the categories by using mutate and ifelse together. The counts of the responses would then be shown.