There are a number of datasets in this package to use to practice creating visualizations
# install.packages("dslabs") # these are data science labs
library("dslabs")
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
## [1] "make-admissions.R"
## [2] "make-brca.R"
## [3] "make-brexit_polls.R"
## [4] "make-death_prob.R"
## [5] "make-divorce_margarine.R"
## [6] "make-gapminder-rdas.R"
## [7] "make-greenhouse_gases.R"
## [8] "make-historic_co2.R"
## [9] "make-mnist_27.R"
## [10] "make-movielens.R"
## [11] "make-murders-rda.R"
## [12] "make-na_example-rda.R"
## [13] "make-nyc_regents_scores.R"
## [14] "make-olive.R"
## [15] "make-outlier_example.R"
## [16] "make-polls_2008.R"
## [17] "make-polls_us_election_2016.R"
## [18] "make-reported_heights-rda.R"
## [19] "make-research_funding_rates.R"
## [20] "make-stars.R"
## [21] "make-temp_carbon.R"
## [22] "make-tissue-gene-expression.R"
## [23] "make-trump_tweets.R"
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggthemes)
library(ggrepel)
library(treemap)
library(dplyr)
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
##
## Attaching package: 'highcharter'
## The following object is masked from 'package:dslabs':
##
## stars
library(RColorBrewer)
library(readr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(viridisLite)
data(gapminder)
summary(gapminder)
## country year infant_mortality life_expectancy
## Albania : 57 Min. :1960 Min. : 1.50 Min. :13.20
## Algeria : 57 1st Qu.:1974 1st Qu.: 16.00 1st Qu.:57.50
## Angola : 57 Median :1988 Median : 41.50 Median :67.54
## Antigua and Barbuda: 57 Mean :1988 Mean : 55.31 Mean :64.81
## Argentina : 57 3rd Qu.:2002 3rd Qu.: 85.10 3rd Qu.:73.00
## Armenia : 57 Max. :2016 Max. :276.90 Max. :83.90
## (Other) :10203 NA's :1453
## fertility population gdp continent
## Min. :0.840 Min. :3.124e+04 Min. :4.040e+07 Africa :2907
## 1st Qu.:2.200 1st Qu.:1.333e+06 1st Qu.:1.846e+09 Americas:2052
## Median :3.750 Median :5.009e+06 Median :7.794e+09 Asia :2679
## Mean :4.084 Mean :2.701e+07 Mean :1.480e+11 Europe :2223
## 3rd Qu.:6.000 3rd Qu.:1.523e+07 3rd Qu.:5.540e+10 Oceania : 684
## Max. :9.220 Max. :1.376e+09 Max. :1.174e+13
## NA's :187 NA's :185 NA's :2972
## region
## Western Asia :1026
## Eastern Africa : 912
## Western Africa : 912
## Caribbean : 741
## South America : 684
## Southern Europe: 684
## (Other) :5586
test <- gapminder %>%
dplyr::select(continent,year,population, life_expectancy)%>%
dplyr::arrange(year) %>%
mutate(population_in_millions = population/10^6)
cols <- brewer.pal(8, "Accent")
highchart() %>%
hc_add_series(data = test,
type = "column", hcaes(x = year,
y = life_expectancy,
group = continent))%>%
hc_colors(cols) %>%
hc_title(
text="Life Expectancy by Continent 1960 - 2016 ")%>%
hc_xAxis(
title = list(text="Year")) %>%
hc_yAxis(
title = list(text="Life Expectancy")) %>%
hc_legend(align = "right",
verticalAlign = "top")
#library(RColorBrewer)
#n <- 60
#qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
#col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
#pie(rep(1,n), col=sample(col_vector, n))
c <- brewer.pal(12, "Paired")
highchart() %>%
hc_add_series(data = gapminder,
type = "bar",
hcaes(x = year,
y = population,
group = region))%>%
hc_colors(c)%>%
hc_title(
text="Population by Region From 1960 - 2015 ")%>%
hc_xAxis(title = list(text="Year")) %>%
hc_yAxis(title = list(text=" Population in Millions "))
region6 <- gapminder %>%
filter(year%in%c(1960, 2016),region == "Eastern Europe" | region == "Australia and New Zealand" | region == "Western Africa" | region == "South America" | region== "South-Eastern Asia" |region=="Northern America" )
#region6 <- gapminder %>%
# filter(region == "Eastern Europe" | region == "Australia and New Zealand" | region == "Western Africa" | region == "South America" | region== "South-Eastern Asia" #|region=="Northern America" ) %>%
# arrange(year)
highchart() %>%
hc_add_series(data = region6,
type = "bar",
hcaes(x = year,
y = life_expectancy,
group = region))%>%
hc_title(
text="Life Expectancy by Select Region 1960 vs 2016 ")%>%
hc_xAxis(
title = list(text="Year")) %>%
hc_yAxis(
title = list(text="Life Expectancy")) %>%
hc_legend(align = "right",
verticalAlign = "top")
** My goal was to explore this dataset along with highchart. I found highchart to be a great way to create vizualization due to its interactive features. The first graph allows us to to view life expectancy by year and continent.The life expectancy has generally increased since availability of data when we group it by continent. The second graph to so show the population change by region. I decided on this because I felt aesthetic of this visual was visually pleasing but the variables of region are many so selecting few to compare would be the best use on this. I wanted to try same graph with less variables so the third chart only has data from the year 1960 and 2016 with randomly selected regions. We see that life expectancy has increased since the earliest available data.