#install.packages("tidyverse")
#install.packages("dslabs") # these are data science labs
#install.packages("treemap")
#install.packages("RColorBrewer")
#install.packages("ggplot2")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.2 v dplyr 1.0.6
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dslabs)
library(treemap)
library(RColorBrewer)
library(ggplot2)
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
## [1] "make-admissions.R"
## [2] "make-brca.R"
## [3] "make-brexit_polls.R"
## [4] "make-death_prob.R"
## [5] "make-divorce_margarine.R"
## [6] "make-gapminder-rdas.R"
## [7] "make-greenhouse_gases.R"
## [8] "make-historic_co2.R"
## [9] "make-mnist_27.R"
## [10] "make-movielens.R"
## [11] "make-murders-rda.R"
## [12] "make-na_example-rda.R"
## [13] "make-nyc_regents_scores.R"
## [14] "make-olive.R"
## [15] "make-outlier_example.R"
## [16] "make-polls_2008.R"
## [17] "make-polls_us_election_2016.R"
## [18] "make-reported_heights-rda.R"
## [19] "make-research_funding_rates.R"
## [20] "make-stars.R"
## [21] "make-temp_carbon.R"
## [22] "make-tissue-gene-expression.R"
## [23] "make-trump_tweets.R"
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"
data("research_funding_rates")
# save the research_funding_rates dataset to your folder using the write_csv command
write_csv(research_funding_rates, "research_funding_rates.csv", na="")
head(research_funding_rates)
## discipline applications_total applications_men applications_women
## 1 Chemical sciences 122 83 39
## 2 Physical sciences 174 135 39
## 3 Physics 76 67 9
## 4 Humanities 396 230 166
## 5 Technical sciences 251 189 62
## 6 Interdisciplinary 183 105 78
## awards_total awards_men awards_women success_rates_total success_rates_men
## 1 32 22 10 26.2 26.5
## 2 35 26 9 20.1 19.3
## 3 20 18 2 26.3 26.9
## 4 65 33 32 16.4 14.3
## 5 43 30 13 17.1 15.9
## 6 29 12 17 15.8 11.4
## success_rates_women
## 1 25.6
## 2 23.1
## 3 22.2
## 4 19.3
## 5 21.0
## 6 21.8
treemap(research_funding_rates, index="discipline", vSize="applications_total",
vColor="success_rates_total", type="value",
palette="Pastel1",
title = "Funding rates by disiciple")
As shown social science has the largest section because it received the most applications. Physics had the highest funding rate but lowest number of applicants.
funding_plot <- research_funding_rates %>%
ggplot() +
geom_point(aes(x=applications_total,y=success_rates_total, group=discipline, color=discipline)) +
xlab("Number of applications") +
ylab(" Application success rate ") +
ggtitle("Funding rate by discipline")
funding_plot + theme_dark()
This point plot shows the application funding success rate colored by discipline. It makes sense that the more applications the lower the success rate.