Load libraries
library(ggplot2)
library(ggthemes)
# Set the theme to theme_solarized where the font size is 20
theme_set(theme_solarized(20))
Load data into a dataframe
deaths <- read.csv('childdeathnumbers.csv', header = TRUE)
Create histograms
# Create a histogram of "Number of deaths" v/s "Counts of Countries"
ggplot(aes(x = X2008), data = deaths) +
geom_histogram(color = 'black', fill = 'red', binwidth = 35000) +
scale_x_continuous(limit = c(0, 950000), breaks = seq(0, 950000, 35000)) +
xlab('Number of Deaths in 2008') +
ylab('Counts of Countries with that many Deaths')

ggsave('deathshistogram.jpg')
## Saving 7 x 5 in image
# Get a summary
summary(deaths$X2008)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.0 238.8 1928.0 29320.0 15540.0 826100.0
# Create a subset for countries which have more than 10000 deaths
deathssub = subset(deaths, deaths$X2008 >= 10000)
# Create a histogram of "Number of deaths" v/s "Counts of Countries"
ggplot(aes(x = X2008), data = deathssub) +
geom_histogram(color = 'black', fill = 'red', binwidth = 35000) +
scale_x_continuous(limit = c(0, 950000), breaks = seq(0, 950000, 35000)) +
xlab('Number of Deaths in 2008') +
ylab('Counts of Countries with that many Deaths')

ggsave('deathshistogramsubset.jpg')
## Saving 7 x 5 in image
Create frequency plots
ggplot(aes(x = X2008), data = deaths) +
geom_freqpoly(color = 'purple', binwidth = 35000) +
xlab('Number of Deaths in 2008') +
ylab('Counts of Countries with that many Deaths')

ggsave('deathsfrequency.jpg')
## Saving 7 x 5 in image
ggplot(aes(x = X2008), data = deathssub) +
geom_freqpoly(color = 'purple', binwidth = 18000) +
scale_x_continuous(limit = c(0, 950000), breaks = seq(0, 950000, 36000)) +
xlab('Number of Deaths in 2008') +
ylab('Counts of Countries with that many Deaths')
## Warning: Removed 2 rows containing missing values (geom_path).

ggsave('deathsfrequencysubset.jpg')
## Saving 7 x 5 in image
## Warning: Removed 2 rows containing missing values (geom_path).