#install.packages("dslabs")
library("dslabs")
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
## [1] "make-admissions.R"
## [2] "make-brca.R"
## [3] "make-brexit_polls.R"
## [4] "make-death_prob.R"
## [5] "make-divorce_margarine.R"
## [6] "make-gapminder-rdas.R"
## [7] "make-greenhouse_gases.R"
## [8] "make-historic_co2.R"
## [9] "make-mnist_27.R"
## [10] "make-movielens.R"
## [11] "make-murders-rda.R"
## [12] "make-na_example-rda.R"
## [13] "make-nyc_regents_scores.R"
## [14] "make-olive.R"
## [15] "make-outlier_example.R"
## [16] "make-polls_2008.R"
## [17] "make-polls_us_election_2016.R"
## [18] "make-reported_heights-rda.R"
## [19] "make-research_funding_rates.R"
## [20] "make-stars.R"
## [21] "make-temp_carbon.R"
## [22] "make-tissue-gene-expression.R"
## [23] "make-trump_tweets.R"
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"
data("admissions")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggthemes)
library(ggrepel)
library(RColorBrewer)
str(admissions)
## 'data.frame': 12 obs. of 4 variables:
## $ major : chr "A" "B" "C" "D" ...
## $ gender : chr "men" "men" "men" "men" ...
## $ admitted : num 62 63 37 33 28 6 82 68 34 35 ...
## $ applicants: num 825 560 325 417 191 373 108 25 593 375 ...
#install.packages("highcharter")
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'highcharter'
## The following object is masked from 'package:dslabs':
##
## stars
admissions[admissions == "men"] <- "Men"
admissions[admissions == "women"] <- "Women"
highchart() %>%
hc_add_series(data=admissions,
type = "area",
hcaes(x=major, y = admitted, group = gender)) %>%
hc_plotOptions(series = list(stacking="normal")) %>%
hc_xAxis(categories=admissions$major, title= list(text="Major")) %>%
hc_yAxis(title = list(text = "Number of Admitted Applicants")) %>%
hc_legend(align="right", verticalAlign="top") %>%
hc_colors (brewer.pal(2, "PuRd")) %>%
hc_title(text = "Number of Admitted Students per Major vs. Gender")
## Warning in brewer.pal(2, "PuRd"): minimal value for n is 3, returning requested palette with 3 different levels
highchart() %>%
hc_add_series(data = admissions, type = "line", hcaes(x = major, y = applicants, group = gender)) %>%
hc_xAxis(categories=admissions$major) %>%
hc_colors(brewer.pal(2, "Pastel2")) %>%
hc_title(text = "Number of Applicants per Major vs. Gender") %>%
hc_xAxis(categories=admissions$major, title= list(text="Major")) %>%
hc_yAxis(title = list(text = "Number of Applicants"))
## Warning in brewer.pal(2, "Pastel2"): minimal value for n is 3, returning requested palette with 3 different levels
highchart() %>%
hc_add_series(data=research_funding_rates$awards_men,
type = "line",
yAxis=0, name = "Men") %>%
hc_add_series(data=research_funding_rates$awards_women,
type = "column",
yAxis=0, name = "Women") %>%
hc_xAxis(categories=research_funding_rates$discipline, title= list(text="Discipline")) %>%
hc_yAxis(title = list(text = "Number of Awards Given")) %>%
hc_colors(brewer.pal(2, "Dark2")) %>%
hc_title(text = "Awards Given Per Research Discipline vs. Gender")
## Warning in brewer.pal(2, "Dark2"): minimal value for n is 3, returning requested palette with 3 different levels
highchart() %>%
hc_add_series(data=greenhouse_gases,
type = "area",
hcaes(x=year, y = concentration, group = gas)) %>%
hc_plotOptions(series = list(stacking="percent")) %>%
hc_xAxis(categories=admissions$major, title= list(text="Year")) %>%
hc_yAxis(title = list(text = "Percentage of Greenhouse Gases")) %>%
hc_legend(align="right", verticalAlign="top") %>%
hc_colors (brewer.pal(2, "PuRd")) %>%
hc_title(text = "Percentage of Greenhouse Gases from Year 20-2000")
## Warning in brewer.pal(2, "PuRd"): minimal value for n is 3, returning requested palette with 3 different levels
ghgas <- greenhouse_gases %>%
group_by(year, gas) %>%
summarise (n = sum(concentration)) %>%
mutate (percentage = n/sum(n))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
ggplot(ghgas, aes(x = year, y = percentage, fill = gas)) + geom_area(alpha=0.5,colour = "black") + theme_minimal(base_size = 14, base_family = "Georgia") + scale_fill_brewer(name = "Gas", palette = "PuRd") + xlab("Year") + ylab ("Percentage of Gas") + ggtitle("Percentage of Greenhouse Gases from Year 20-2000")
ghgasNEW <- ghgas %>%
filter(year > 1600)
ggplot(ghgasNEW, aes(x = year, y = percentage, fill = gas)) + geom_area(alpha=0.5,colour = "black") + theme_minimal(base_size = 14, base_family = "Georgia") + scale_fill_brewer(name = "Gas", palette = "PuRd") + xlab("Year") + ylab ("Percentage of Gas") + ggtitle("Percentage of Greenhouse Gases from Year 1600-2000")
I started this homework assignment after skimming the instructions quickly, and that is when I realized the importance of reading carefully. The first 3 graphs I did using the the admissions and research_funding_rates had only two variables and I noticed that the requirements noted three variables. I then created an area chart using the greenhouse_gases dataset, and then I read that I had to change the theme. Which is awesome because I used highcharter. Despite all of these mess-ups, it was not difficult trying to replicate my greenhouse_gases highcharter area chart to a ggplot area chart. What I found from creating this area chart is that CH4, or methane, is the most abundant greenhouse gas and has always been for thousands of years. I wanted to zoom into the past 2 thousand years to see how much methane has increased since it was harder to see in my original graph. After filtering the years to be 1600 and above, I notice that methane accounted for almost 75% of our greenhouse gases in 2000 and it was nearly half 400 years ago. There has been a dramtaic increase starting at around 1900.