#install.packages("dslabs")
library("dslabs")
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
##  [1] "make-admissions.R"                   
##  [2] "make-brca.R"                         
##  [3] "make-brexit_polls.R"                 
##  [4] "make-death_prob.R"                   
##  [5] "make-divorce_margarine.R"            
##  [6] "make-gapminder-rdas.R"               
##  [7] "make-greenhouse_gases.R"             
##  [8] "make-historic_co2.R"                 
##  [9] "make-mnist_27.R"                     
## [10] "make-movielens.R"                    
## [11] "make-murders-rda.R"                  
## [12] "make-na_example-rda.R"               
## [13] "make-nyc_regents_scores.R"           
## [14] "make-olive.R"                        
## [15] "make-outlier_example.R"              
## [16] "make-polls_2008.R"                   
## [17] "make-polls_us_election_2016.R"       
## [18] "make-reported_heights-rda.R"         
## [19] "make-research_funding_rates.R"       
## [20] "make-stars.R"                        
## [21] "make-temp_carbon.R"                  
## [22] "make-tissue-gene-expression.R"       
## [23] "make-trump_tweets.R"                 
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"
data("admissions")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggthemes)
library(ggrepel)
library(RColorBrewer)
str(admissions)
## 'data.frame':    12 obs. of  4 variables:
##  $ major     : chr  "A" "B" "C" "D" ...
##  $ gender    : chr  "men" "men" "men" "men" ...
##  $ admitted  : num  62 63 37 33 28 6 82 68 34 35 ...
##  $ applicants: num  825 560 325 417 191 373 108 25 593 375 ...
#install.packages("highcharter")
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## Attaching package: 'highcharter'
## The following object is masked from 'package:dslabs':
## 
##     stars
admissions[admissions == "men"] <- "Men"
admissions[admissions == "women"] <- "Women"
highchart() %>% 
  hc_add_series(data=admissions,
                type = "area",
                hcaes(x=major, y = admitted, group = gender)) %>%
  hc_plotOptions(series = list(stacking="normal")) %>%
  hc_xAxis(categories=admissions$major, title= list(text="Major")) %>% 
  hc_yAxis(title = list(text = "Number of Admitted Applicants")) %>%
  hc_legend(align="right", verticalAlign="top") %>%
  hc_colors (brewer.pal(2, "PuRd")) %>%
  hc_title(text = "Number of Admitted Students per Major vs. Gender")
## Warning in brewer.pal(2, "PuRd"): minimal value for n is 3, returning requested palette with 3 different levels
highchart() %>% 
    hc_add_series(data = admissions, type = "line", hcaes(x = major, y = applicants, group = gender)) %>%
  hc_xAxis(categories=admissions$major) %>%
  hc_colors(brewer.pal(2, "Pastel2")) %>%
  hc_title(text = "Number of Applicants per Major vs. Gender") %>%
  hc_xAxis(categories=admissions$major, title= list(text="Major")) %>% 
  hc_yAxis(title = list(text = "Number of Applicants"))
## Warning in brewer.pal(2, "Pastel2"): minimal value for n is 3, returning requested palette with 3 different levels
highchart() %>% 
  hc_add_series(data=research_funding_rates$awards_men,
                type = "line",
                yAxis=0, name = "Men") %>%
  hc_add_series(data=research_funding_rates$awards_women,
                type = "column",
                yAxis=0, name = "Women") %>%
  hc_xAxis(categories=research_funding_rates$discipline, title= list(text="Discipline")) %>% 
  hc_yAxis(title = list(text = "Number of Awards Given")) %>%
  hc_colors(brewer.pal(2, "Dark2")) %>%
  hc_title(text = "Awards Given Per Research Discipline vs. Gender")
## Warning in brewer.pal(2, "Dark2"): minimal value for n is 3, returning requested palette with 3 different levels
highchart() %>% 
  hc_add_series(data=greenhouse_gases,
                type = "area",
                hcaes(x=year, y = concentration, group = gas)) %>%
  hc_plotOptions(series = list(stacking="percent")) %>%
  hc_xAxis(categories=admissions$major, title= list(text="Year")) %>% 
  hc_yAxis(title = list(text = "Percentage of Greenhouse Gases")) %>%
  hc_legend(align="right", verticalAlign="top") %>%
  hc_colors (brewer.pal(2, "PuRd")) %>%
  hc_title(text = "Percentage of Greenhouse Gases from Year 20-2000")
## Warning in brewer.pal(2, "PuRd"): minimal value for n is 3, returning requested palette with 3 different levels
ghgas <- greenhouse_gases %>% 
  group_by(year, gas) %>% 
  summarise (n = sum(concentration)) %>%
  mutate (percentage = n/sum(n)) 
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
ggplot(ghgas, aes(x = year, y = percentage, fill = gas)) + geom_area(alpha=0.5,colour = "black") + theme_minimal(base_size = 14, base_family = "Georgia") + scale_fill_brewer(name = "Gas", palette = "PuRd") + xlab("Year") + ylab ("Percentage of Gas") + ggtitle("Percentage of Greenhouse Gases from Year 20-2000")

ghgasNEW <- ghgas %>%
  filter(year > 1600)

ggplot(ghgasNEW, aes(x = year, y = percentage, fill = gas)) + geom_area(alpha=0.5,colour = "black") + theme_minimal(base_size = 14, base_family = "Georgia") + scale_fill_brewer(name = "Gas", palette = "PuRd") + xlab("Year") + ylab ("Percentage of Gas") + ggtitle("Percentage of Greenhouse Gases from Year 1600-2000")

I started this homework assignment after skimming the instructions quickly, and that is when I realized the importance of reading carefully. The first 3 graphs I did using the the admissions and research_funding_rates had only two variables and I noticed that the requirements noted three variables. I then created an area chart using the greenhouse_gases dataset, and then I read that I had to change the theme. Which is awesome because I used highcharter. Despite all of these mess-ups, it was not difficult trying to replicate my greenhouse_gases highcharter area chart to a ggplot area chart. What I found from creating this area chart is that CH4, or methane, is the most abundant greenhouse gas and has always been for thousands of years. I wanted to zoom into the past 2 thousand years to see how much methane has increased since it was harder to see in my original graph. After filtering the years to be 1600 and above, I notice that methane accounted for almost 75% of our greenhouse gases in 2000 and it was nearly half 400 years ago. There has been a dramtaic increase starting at around 1900.