#1. Write down 3 questions that you might want to answer based on this data.

##1. Is cullen skink is preffered over Partan bree?
##2. Which age group support cullen skink more?
##3. Which city does not support cullen skink?
#2. Create an R data frame with 2 observations to store this data in its current "messy" state. 
#Use whatever method you want to re-create and/or load the data.

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.1.3
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.1.3
#a. loaded from CSV file
# messy.df<-read.csv("ScotlandPoll.csv",header=TRUE,sep=",")
# messy.df
#b. Loaded directely to dataframe
messy <- data.frame(
  city = c("Edinburgh", "Glasgow"),
  age16_24Yes = c(80000, 99400),
  age25_plYes = c(143000, 150400),
  age16_24No = c(35900, 43000),
  age25_plNo = c(214800, 207000)
)
messy
##        city age16_24Yes age25_plYes age16_24No age25_plNo
## 1 Edinburgh       80000      143000      35900     214800
## 2   Glasgow       99400      150400      43000     207000
#3. Use the functionality in the tidyr package to convert the data frame to be "tidy data."
tidy<-messy %>%
  gather(key,value,age16_24Yes:age25_plNo)
tidy
##        city         key  value
## 1 Edinburgh age16_24Yes  80000
## 2   Glasgow age16_24Yes  99400
## 3 Edinburgh age25_plYes 143000
## 4   Glasgow age25_plYes 150400
## 5 Edinburgh  age16_24No  35900
## 6   Glasgow  age16_24No  43000
## 7 Edinburgh  age25_plNo 214800
## 8   Glasgow  age25_plNo 207000
tidier <-tidy %>%
  separate(key,into =c("age", "pollval"), 8)
tidier
##        city      age pollval  value
## 1 Edinburgh age16_24     Yes  80000
## 2   Glasgow age16_24     Yes  99400
## 3 Edinburgh age25_pl     Yes 143000
## 4   Glasgow age25_pl     Yes 150400
## 5 Edinburgh age16_24      No  35900
## 6   Glasgow age16_24      No  43000
## 7 Edinburgh age25_pl      No 214800
## 8   Glasgow age25_pl      No 207000
#4. Use the functionality in the dplyr package to answer the questions that you asked in step 1.
## 1. Is cullen skink is preffered over Partan bree?

tidier %>%
  group_by(pollval) %>%
  summarise(n=sum(value)) 
## Source: local data frame [2 x 2]
## 
##   pollval      n
## 1      No 500700
## 2     Yes 472800
## 2.Which age group support cullen skink more?
tidier %>%
  filter(pollval=="Yes") %>%
  group_by(age) %>%
  summarise(n=sum(value))  %>%
  arrange(desc(n))
## Source: local data frame [2 x 2]
## 
##        age      n
## 1 age25_pl 293400
## 2 age16_24 179400
##3. Which city does not support cullen skink?
tidier %>%
  filter(pollval=="Yes") %>%
  group_by(city) %>%
  summarise(n=sum(value))  %>%
  arrange(n)
## Source: local data frame [2 x 2]
## 
##        city      n
## 1 Edinburgh 223000
## 2   Glasgow 249800
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.1.3
tidier1 <-tidier %>%
  filter(city=="Edinburgh")
tidier2 <-tidier %>%
  filter(city=="Glasgow") 

p1 <- ggplot(data=tidier1, aes(x=age, y=value, fill=pollval)) +
    geom_bar(stat="identity", position=position_dodge(), colour="black")+ggtitle("Edinburgh")
p2 <- ggplot(data=tidier2, aes(x=age, y=value, fill=pollval)) +
    geom_bar(stat="identity", position=position_dodge(), colour="black")+ggtitle("Glasgow")
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.1.3
## Loading required package: grid
grid.arrange(p1,p2)

 #5. Having gone through the process, 
 # would you ask different questions and/or change the way that you structured your data frame? 
 #   1. In each age group what is the percentage difference of support for cullen skink per city