Load the libraries and install DSLabs package.
# install.packages("dslabs") # these are data science labs
library("dslabs")
# Need to have tidyr due to gather function. There is no gather function in dplyr.
library(tidyr)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
## [1] "make-admissions.R"
## [2] "make-brca.R"
## [3] "make-brexit_polls.R"
## [4] "make-death_prob.R"
## [5] "make-divorce_margarine.R"
## [6] "make-gapminder-rdas.R"
## [7] "make-greenhouse_gases.R"
## [8] "make-historic_co2.R"
## [9] "make-mnist_27.R"
## [10] "make-movielens.R"
## [11] "make-murders-rda.R"
## [12] "make-na_example-rda.R"
## [13] "make-nyc_regents_scores.R"
## [14] "make-olive.R"
## [15] "make-outlier_example.R"
## [16] "make-polls_2008.R"
## [17] "make-polls_us_election_2016.R"
## [18] "make-reported_heights-rda.R"
## [19] "make-research_funding_rates.R"
## [20] "make-stars.R"
## [21] "make-temp_carbon.R"
## [22] "make-tissue-gene-expression.R"
## [23] "make-trump_tweets.R"
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"
Select the data “Temp_Carbon”
data("temp_carbon")
str(temp_carbon)
## 'data.frame': 268 obs. of 5 variables:
## $ year : num 1880 1881 1882 1883 1884 ...
## $ temp_anomaly : num -0.11 -0.08 -0.1 -0.18 -0.26 -0.25 -0.24 -0.28 -0.13 -0.09 ...
## $ land_anomaly : num -0.48 -0.4 -0.48 -0.66 -0.69 -0.56 -0.51 -0.47 -0.41 -0.31 ...
## $ ocean_anomaly : num -0.01 0.01 0 -0.04 -0.14 -0.17 -0.17 -0.23 -0.05 -0.02 ...
## $ carbon_emissions: num 236 243 256 272 275 277 281 295 327 327 ...
Listed the years to designate the title of the graph.
table(temp_carbon$year)
##
## 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 1 1 1 1 1 1 1 1 1 1 1 1
Creat a scatterplot of Annual Temperature Anomaly, 1751-2018.
data(temp_carbon)
temp_carbon %>%
# Carbon_emission was not selected due to its large number.
select(year, temp_anomaly, land_anomaly, ocean_anomaly) %>%
rename(Year = year, Air = temp_anomaly, Land = land_anomaly, Ocean = ocean_anomaly) %>%
gather(region, Air, Land, Ocean, Air:Land:Ocean) %>%
mutate(Region = factor(region, levels = c("Air","Land","Ocean")))%>%
group_by(Region) %>%
ungroup() %>%
ggplot(aes(Year, Air, color = Region)) +
geom_point(show.legend = FALSE, alpha = 0.4, size = 1) +
geom_smooth(method = "loess", span = 0.15, size = 1) +
ylab("Temperature Anomaly") +
ggtitle("Annual Temperature Anomaly, 1751-2018") +
theme(strip.background = element_blank(),
strip.text.x = element_blank(),
strip.text.y = element_blank(),
legend.position = "top")
## Warning in x:y: numerical expression has 2 elements: only the first used
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 387 rows containing non-finite values (stat_smooth).
## Warning: Removed 387 rows containing missing values (geom_point).

Seems there is no data from 1751 to around 1880.
Let’s create another scatterplot of Annual Temperature Anomaly, 1880-2018.
data(temp_carbon)
temp_carbon %>%
select(year, temp_anomaly, land_anomaly, ocean_anomaly) %>%
rename(Year = year, Air = temp_anomaly, Land = land_anomaly, Ocean = ocean_anomaly) %>%
gather(region, Air, Land, Ocean, Air:Land:Ocean) %>%
mutate(Region = factor(region, levels = c("Air","Land","Ocean")))%>%
group_by(Region) %>%
ungroup() %>%
ggplot(aes(Year, Air, color = Region)) +
geom_point(show.legend = FALSE, alpha = 0.4, size = 1) +
geom_smooth(method = "loess", span = 0.15, size = 1) +
ylab("Temperature Anomaly") +
scale_x_continuous(limits = c(1880,2018)) +
ggtitle("Annual Temperature Anomaly, 1880-2018") +
theme(strip.background = element_blank(),
strip.text.x = element_blank(),
strip.text.y = element_blank(),
legend.position = "top")
## Warning in x:y: numerical expression has 2 elements: only the first used
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 387 rows containing non-finite values (stat_smooth).
## Warning: Removed 387 rows containing missing values (geom_point).

Now let’s list the numbers to plot the annual carbon_emission graph.
table(temp_carbon$carbon_emissions)
##
## 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 20 10 10 6 3 2 3 6 4 1 1 5 1 2 2 3
## 23 24 25 29 30 31 33 34 36 37 39 43 46 47 50 54
## 2 3 1 2 1 1 1 1 1 1 1 2 1 1 1 2
## 57 59 69 71 76 77 78 83 91 95 97 104 112 119 122 130
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 135 142 147 156 173 174 184 188 191 194 196 210 236 243 256 272
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 275 277 281 295 327 356 370 372 374 383 406 419 440 465 507 534
## 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1
## 552 566 617 624 663 707 750 784 785 803 806 819 836 838 845 847
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 850 879 893 901 932 936 940 943 955 963 970 973 975 983 1027 1053
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1062 1065 1130 1142 1145 1160 1192 1209 1238 1299 1334 1342 1383 1391 1392 1419
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1469 1630 1767 1795 1841 1865 2042 2177 2270 2330 2454 2569 2580 2686 2833 2995
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 3130 3288 3393 3566 3780 4053 4208 4376 4596 4614 4623 4864 5016 5074 5075 5094
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 5138 5258 5301 5357 5417 5583 5725 5936 6066 6070 6074 6078 6142 6174 6305 6448
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 6556 6561 6576 6733 6893 6994 7376 7743 8042 8336 8503 8697 8776 9128 9503 9673
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 9773 9855
## 1 1
Create a scatterplot of Annual Carbon Emissions, 1751-2018.
data(temp_carbon)
temp_carbon %>%
select(year, carbon_emissions) %>%
rename(Year = year, Carbon = carbon_emissions) %>%
ggplot(aes(Year, Carbon)) +
geom_point(show.legend = FALSE, alpha=0.4) +
geom_smooth(method = "loess", span = 0.15) +
ylab("Carbon Emissions") +
ggtitle("Annual Carbon Emissions, 1751-2018")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).

Thank you :)