library(RSQLite)
library(DBI)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
getwd()
## [1] "C:/Users/Toby/Desktop"
setwd("C:/Users/Toby/Desktop")
connect <- dbConnect(SQLite(), "factbook.db" )
dbListTables(connect)
## [1] "facts" "sqlite_sequence"
SELECT * from facts LIMIT 5
| id | code | name | area | area_land | area_water | population | population_growth | birth_rate | death_rate | migration_rate | created_at | updated_at |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | af | Afghanistan | 652230 | 652230 | 0 | 32564342 | 2.32 | 38.57 | 13.89 | 1.51 | 2015-11-01 13:19:49.461734 | 2015-11-01 13:19:49.461734 |
| 2 | al | Albania | 28748 | 27398 | 1350 | 3029278 | 0.30 | 12.92 | 6.58 | 3.30 | 2015-11-01 13:19:54.431082 | 2015-11-01 13:19:54.431082 |
| 3 | ag | Algeria | 2381741 | 2381741 | 0 | 39542166 | 1.84 | 23.67 | 4.31 | 0.92 | 2015-11-01 13:19:59.961286 | 2015-11-01 13:19:59.961286 |
| 4 | an | Andorra | 468 | 468 | 0 | 85580 | 0.12 | 8.13 | 6.96 | 0.00 | 2015-11-01 13:20:03.659945 | 2015-11-01 13:20:03.659945 |
| 5 | ao | Angola | 1246700 | 1246700 | 0 | 19625353 | 2.78 | 38.78 | 11.49 | 0.46 | 2015-11-01 13:20:08.625072 | 2015-11-01 13:20:08.625072 |
SELECT Name, MIN(Population) from facts
| name | MIN(Population) |
|---|---|
| Antarctica | 0 |
SELECT Name, MAX(Population) from facts
| name | MAX(Population) |
|---|---|
| World | 7256490011 |
The results show ‘World’ as a country and its is considered the max population within the facts table. The population of ‘World’ is in the negative. It would be best to remove this record within the table so overall table is not affected by this outlier.
SELECT Name, MIN(population_growth) from facts
| name | MIN(population_growth) |
|---|---|
| Holy See (Vatican City) | 0 |
SELECT Name, MAX(population_growth) from facts
| name | MAX(population_growth) |
|---|---|
| South Sudan | 4.02 |
SELECT Name, population from facts
| name | population |
|---|---|
| Afghanistan | 32564342 |
| Albania | 3029278 |
| Algeria | 39542166 |
| Andorra | 85580 |
| Angola | 19625353 |
| Antigua and Barbuda | 92436 |
| Argentina | 43431886 |
| Armenia | 3056382 |
| Australia | 22751014 |
| Austria | 8665550 |
SELECT Name, MAX(Population) from facts WHERE Name IS NOT 'World'
| name | MAX(Population) |
|---|---|
| China | 1367485388 |
China becomes the country with the most people. This is most accurate than when the ‘World’ was considered the population max.
SELECT name, population from facts WHERE Name IS NOT 'World' AND population != 0 ORDER BY population DESC LIMIT 5
ggplot(aes(x= name, y= population), data = mydataframe) + geom_bar(aes(fill = name),stat = 'identity') + ggtitle("Top 5 Countries' Populations of the World") + xlab("Highest Populated Countries") + ylab("Population Number") + scale_fill_discrete(name = "Countries")
This bar graph shows that China and India has the most populations in the world. The European Union, United States and Indonesia trail behind the top two.
SELECT name, population, population_growth from facts WHERE Name IS NOT 'World' AND population != 0 ORDER BY population DESC LIMIT 5
ggplot(aes(x= name, y = population_growth), data = plot3) + geom_bar(aes(fill = name), stat = 'identity') + ggtitle("Top 5 Growth Rates") + xlab("Highest Populated Countries") + ylab("Population Growth Rate") + scale_fill_discrete(name = "Countries")
India, Indonesia and the United States have the highest growth rates among the five largest populations.
SELECT name, birth_rate, population from facts WHERE Name IS NOT 'World' AND population != 0 ORDER BY population DESC LIMIT 5
ggplot(aes(x= name, y = birth_rate), data = plot5) + geom_bar(aes(fill = name), stat = 'identity') + ggtitle("Birth Rate of Countries") + xlab("Highest Populated Countries") + ylab("Birth Rate") + scale_fill_discrete(name = "Countries")
Like the first two graphs, India has the highest birth rates. Indonesia, United States and China also have high birth rates. The European Union has the lowest birth rate among the five of them.
SELECT name, death_rate, population from facts WHERE Name IS NOT 'World' AND population != 0 ORDER BY population DESC LIMIT 5
ggplot(aes(x= name, y = death_rate), data = plot6) + geom_bar(aes(fill = name), stat = 'identity') + ggtitle("Death Rate of Countries") + xlab("Highest Populated Countries") + ylab("Death Rate") + scale_fill_discrete(name = "Countries")
The European Union has the highest death rate. United States, China and India have death rates trailing behind the EU. Indonesia is has the lowest death rates.
~India’s population will keep increasing due to high growth rate, high birth rates and low death rates
~Indonesia’s population will keep increasing due to high growth rates, high birth rates and low death rates. Currently, it is the fifth largest population in the world. In the future, may reach a higher ranking.
~The European Union’s population will decrease due to low growth rates, low birth rates and high death rates. Perhaps it would be better to examine the individual countries within the EU instead of grouping them as a whole.
SELECT name, population from facts WHERE Name IS NOT 'World' AND population != 0 ORDER BY population LIMIT 5
ggplot(aes(x= name, y= population), data = plot2) + geom_bar(aes(fill = name), stat = 'identity') + ggtitle("Lowest 5 Countries' Population of the World") + xlab("Lowest Populated Countries") + ylab("Population Number") + theme(axis.text.x = element_blank()) + scale_x_discrete(breaks=NULL) + scale_fill_discrete(name = "Countries")
THe Pitcairn Islands and the Cocos (Keeling) Islands have the smallest populations in the world.
SELECT name, population, population_growth from facts WHERE Name IS NOT 'World' AND population != 0 ORDER BY population LIMIT 5
ggplot(aes(x= name, y= population_growth), data = plot4) + geom_bar(aes(fill = name), stat = 'identity') + ggtitle("Lowest 5 Growth Rates") + xlab("Lowest Populated Countries") + ylab("Population Growth Rate") + theme(axis.text.x = element_blank()) + scale_x_discrete(breaks=NULL) + scale_fill_discrete(name = "Countries")
Only Niue and Tokelau have data about it growth rates. Holy See (Vatican City), Cocos (Keeling) Islands and Pitcairn Islands do not have data on growth rates.
SELECT name, birth_rate, population from facts WHERE NAME IS NOT 'World' AND population != 0 ORDER BY population LIMIT 5
| name | birth_rate | population |
|---|---|---|
| Pitcairn Islands | NA | 48 |
| Cocos (Keeling) Islands | NA | 596 |
| Holy See (Vatican City) | NA | 842 |
| Niue | NA | 1190 |
| Tokelau | NA | 1337 |
None of the five smallest populations have data on birth rates.
SELECT name, death_rate, population from facts WHERE Name IS NOT 'World' AND population != 0 ORDER BY population LIMIT 5
| name | death_rate | population |
|---|---|---|
| Pitcairn Islands | NA | 48 |
| Cocos (Keeling) Islands | NA | 596 |
| Holy See (Vatican City) | NA | 842 |
| Niue | NA | 1190 |
| Tokelau | NA | 1337 |
Similarly, none of the populations have data on death rates.
SELECT name, population, birth_rate, death_rate, population_growth from facts WHERE Name IS NOT 'World' AND population != 0 AND birth_rate != 'NA' AND death_Rate != 'NA' AND population_growth != 'NA'ORDER BY population LIMIT 5
| name | population | birth_rate | death_rate | population_growth |
|---|---|---|---|---|
| Falkland Islands (Islas Malvinas) | 3361 | 10.90 | 4.90 | 0.01 |
| Montserrat | 5241 | 11.26 | 6.30 | 0.50 |
| Saint Pierre and Miquelon | 5657 | 7.42 | 9.72 | 1.08 |
| Saint Helena, Ascension, and Tristan da Cunha | 7795 | 9.88 | 7.44 | 0.24 |
| Nauru | 9540 | 24.95 | 5.87 | 0.55 |
SELECT name, population, birth_rate, death_rate, population_growth from facts WHERE Name IS NOT 'World' AND population != 0 AND birth_rate != 'NA' AND death_Rate != 'NA' AND population_growth != 'NA'ORDER BY population LIMIT 5
Since the previous five populations had missing data or no data on growth rate, birth rate or death rate, a new subset of five low populations were queried. These five populations do not have any missing data. These five populations will be considered as the lowest five populations.
ggplot(aes(x = name, y = population), data = plot7) +geom_bar(aes(fill = name), stat = 'identity') +ggtitle("New Lowest 5 Populations") + xlab("Lowest Populated Countries") + ylab("Population Number") +
theme(axis.text.x = element_blank()) + scale_x_discrete(breaks = NULL) + scale_fill_discrete(name = "Countries")
Among the new subset, Nauru and Saint Helena, Ascension, and Tristan de Cunha has the highest population. Falkland Islands (Islas Malvinas) has the smallest population.
ggplot(aes(x = name, y = population_growth), data = plot7) +geom_bar(aes(fill = name),stat = 'identity') +ggtitle("New Population Growth") + xlab("Lowest Populated Countries") + ylab("Population Growth Rate") +
theme(axis.text.x = element_blank()) + scale_x_discrete(breaks = NULL) + scale_fill_discrete(name = "Countries")
Represented in graph above, Falkland Islands (Islas Malvina) has the lowest growth rate of 0.01. Saint Helena, Ascension, and Tristan da Cunda is the second lowest population growth rate. Saint Pierre and Miquelon, Nauru are the highest two population growths.
ggplot(aes(x = name, y = birth_rate), data = plot7) +geom_bar(aes(fill = name),stat = 'identity') + ggtitle("Birth Rate of Lowest 5 Countries") + xlab("Lowest Populated Countries") + ylab("Birth Rate") + theme(axis.text.x = element_blank()) + scale_x_discrete(breaks=NULL) + scale_fill_discrete(name = "Countries")
Interestingly, Saint Pierre and Miquelon has the lowest birth rate. Saint Helena, Ascension, and Tristan da Cunha has the second lowest birth rate. The Falkland Islands (Islas Malvina) does not have the lowest birth rate. Naura has the highest birth rate.
ggplot(aes(x = name, y = death_rate), data = plot7) +geom_bar(aes(fill = name),stat = 'identity') +ggtitle("New Lowest 5 Death Rates") + xlab("Lowest Populated Countries") + ylab("Death Rate") + theme(axis.text.x = element_blank()) + scale_x_discrete(breaks=NULL) + scale_fill_discrete(name = "Countries")
In the graph above, Saint Pierre and Miquelon and Saint Helena, Ascension, and Tristan da Cunhahas the two highest death rates. Falkland Islands (Islas Malvinas) has the lowest death rate. Naura has the death rate second to last.
~Naura has the highest population among the five populations. It has the second highest population growth, highest birth rate and second lowest death rate. Naura will most likely keep growing as a population.
~Saint Helena, Ascension, and Tristan da Cunha has the second highest population among the five. But its population growth is the second lowest, the birth rate is also second lowest and has the second highest death rate. It is most likely that Saint Helena, Ascension, and Tristan de Cunha will decrease in population as years past if these rates are constant.
~Saint Pierre and Miquelon has interesting rates. It has the lowest birth rates and the highest death rates. But it has the highest population growths. Its current population is in the middle. Even when examining the numbers, its death rate (9.72) is higher than its birth rate (7.42) and its population growth is (1.08). Other factors must be involved with these observations.
~Similarly, Falkland Islands (Islas Malvinas) is an interesting statistic. Its birth rate is in the middle (10.90) and death rate is the lowest (4.90). Yet its population growth is (0.01). There must be other factors to explain these observations.