Problem Set # 3

Your Name: Rebekah Jones

date()
## [1] "Mon Oct 03 22:59:49 2016"

Due Date: October 4, 2016

Total Points: 32

library(ggplot2)
library(ggmap)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(UsingR)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: HistData
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     combine, src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
## 
## Attaching package: 'UsingR'
## The following object is masked from 'package:survival':
## 
##     cancer
## The following object is masked from 'package:ggmap':
## 
##     crime
library(reshape2)
library(data.table)
## -------------------------------------------------------------------------
## data.table + dplyr code now lives in dtplyr.
## Please library(dtplyr)!
## -------------------------------------------------------------------------
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt
## The following objects are masked from 'package:dplyr':
## 
##     between, last

1 Consider the SSN.txt file from http://myweb.fsu.edu/jelsner/temp/data/SSN.txt. The file contains monthly sunspot numbers since 1851.

  1. Read the data into R. (4)
Sunsp = "http://myweb.fsu.edu/jelsner/temp/data/SSN.txt"
Suns = read.table(Sunsp, header=TRUE)
str(Suns)
## 'data.frame':    160 obs. of  13 variables:
##  $ Year: int  1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 ...
##  $ Jan : num  75.5 68.4 41.1 15.4 12.3 0.5 13.7 39 83.7 82.4 ...
##  $ Feb : num  105.4 66.4 42.9 20 11.4 ...
##  $ Mar : num  64.6 61.2 37.7 20.7 17.4 0.4 5.2 57.5 90.3 98.9 ...
##  $ Apr : num  56.5 65.4 47.6 26.5 4.4 6.5 11.1 38.3 85.7 71.4 ...
##  $ May : num  62.6 54.9 34.7 24 9.1 ...
##  $ Jun : num  63.2 46.9 40 21.1 5.3 ...
##  $ Jul : num  36.1 42.1 45.9 18.7 0.4 ...
##  $ Aug : num  57.4 39.7 50.4 15.8 3.1 ...
##  $ Sep : num  67.9 37.5 33.5 22.4 0 ...
##  $ Oct : num  62.5 67.3 42.3 12.6 9.6 ...
##  $ Nov : num  51 54.3 28.8 28.2 4.2 7.7 31.4 51.9 97.2 97.9 ...
##  $ Dec : num  71.4 45.4 23.4 21.6 3.1 7.2 37.2 66.9 81 95.6 ...
  1. Create a histogram of the September sunspot numbers. (2)
ggplot(Suns, aes(Sep)) + 
  geom_histogram(bins = 24, fill = "gold", col = "black") +
  labs(title="Frequency of September Sunspots") +
  xlab("Number of Sunspots") +
  ylab("Number of Years")

  1. Create a box plot of the June sunspot numbers. Label the axis. (4)

I tried this a few ways to see how the changes would affect the graphics.

ggplot(Suns, aes(x = Year, y = Jun, group = TRUE)) +
  geom_boxplot() +
  labs(title="June Sunspots") +
  xlab("Year") + 
  ylab("Sunspots")

ggplot(Suns, aes(x = "", y = Jun)) + 
  geom_boxplot() +
labs(title="June Sunspots") +
  ylab("Sunspots")

boxplot(Suns$Jun,
  xlab = "June" ,
  ylab = "Sunspots")
f = fivenum(Suns$Jun)
text(rep(1.3, 5), f, labels = c("Minimum", "1st Quartile", 
                                "Median", "3rd Quartile",
                                "Maximum"))

  1. Create a scatter plot placing the June sunspot numbers on the horizontal axis and September sunspot numbers on the vertical axis. Label the axes. (4)
ggplot(Suns, aes(x = Jun, y = Sep)) + 
  geom_point() + 
  labs(title="June and September Sunspots") +
  xlab("June") + 
  ylab("September")

2 The babyboom dataset (UsingR) contains the time of birth, sex, and birth weight for 44 babies born in one 24-hour period at a hospital in Brisbane, Australia.

Create side-by-side box plots of birth weight (grams) by gender. Place the birth weight on the vertical axis and gender on the horizontal axis. (3)

head(babyboom)
##   clock.time gender   wt running.time
## 1          5   girl 3837            5
## 2        104   girl 3334           64
## 3        118    boy 3554           78
## 4        155    boy 3838          115
## 5        257    boy 3625          177
## 6        405   girl 2208          245
boom1= melt(babyboom, id.vars = "gender")
head(boom1)
##   gender   variable value
## 1   girl clock.time     5
## 2   girl clock.time   104
## 3    boy clock.time   118
## 4    boy clock.time   155
## 5    boy clock.time   257
## 6   girl clock.time   405
ggplot(boom1, aes(x = gender, y = value)) +
geom_boxplot(fill="#DE980C") +
  labs(title="Birth Weight by Gender") +
  xlab("Gender") + 
  ylab("Birth Weight (grams)")

3 The data set diamond (UsingR) contains data about the price of 48 diamond rings. The variable price records the price in Singapore dollars and the variable carat records the size of the diamond and you are interested in predicting price from carat size.

Make a scatter plot with carat on the horizontal axis and price on the vertical axis. (3)

head(diamond)
##   carat price
## 1  0.17   355
## 2  0.16   328
## 3  0.17   350
## 4  0.18   325
## 5  0.25   642
## 6  0.16   342
ggplot(diamond, aes(x = carat, y = price)) + 
  geom_point() + 
  labs(title = "Price (SGD) by Diamond Carat") +
  xlab("Carat") + 
  ylab("Price (Singapore Dollars)")

To convert to USD:

elf = data.table(diamond)
elf2 = elf %>%
  mutate(USd = (elf$price * 0.731291))
ggplot(elf2, aes(x = carat, y = USd)) + 
  geom_point() + 
  labs(title = "Price (USD) by Diamond Carat") +
  xlab("Carat") + 
  ylab("Price (USD)")

4 The data frame homework (UsingR) contains the weekly average number of hours spent on homework for 15 private and 15 public schools.

  1. Use the function melt() from the reshape2 package to create a long data frame. (2)
head(homework)
##   Private Public
## 1    21.3   15.3
## 2    16.8   17.4
## 3     8.5   12.3
## 4    12.6   10.7
## 5    15.8   16.4
## 6    19.3   11.3
work1 = melt(homework)
## No id variables; using all as measure variables
head(work1)
##   variable value
## 1  Private  21.3
## 2  Private  16.8
## 3  Private   8.5
## 4  Private  12.6
## 5  Private  15.8
## 6  Private  19.3
  1. Use the long data frame and create side-by-side box plots of the hours spent on homework. (3)
ggplot(work1, aes(x = variable, y = value, color = variable)) + 
  geom_boxplot(fill = "beige") +
  labs(title = "Average Number of Hours Spent Studying per Week") +
  xlab("School") + 
  ylab("Hours Spent on Homework per Week")

5 Download and plot a road map of Sofia, Bulgaria. Use a zoom of 13. (7)

Sofia_Map = get_map(location = 'Sofia Bulgaria', 
              maptype = "roadmap", 
              zoom = 13, source = 'google')
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Sofia+Bulgaria&zoom=13&size=640x640&scale=2&maptype=roadmap&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Sofia%20Bulgaria&sensor=false
geocode("Roads, Sofia, Bulgaria")
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Roads,%20Sofia,%20Bulgaria&sensor=false
##        lon      lat
## 1 23.29303 42.69265
str(Sofia_Map)
##  chr [1:1280, 1:1280] "#EAEAEA" "#EAEAEA" "#EAEAEA" "#EAEAEA" ...
##  - attr(*, "class")= chr [1:2] "ggmap" "raster"
##  - attr(*, "bb")='data.frame':   1 obs. of  4 variables:
##   ..$ ll.lat: num 42.7
##   ..$ ll.lon: num 23.3
##   ..$ ur.lat: num 42.7
##   ..$ ur.lon: num 23.4
##  - attr(*, "source")= chr "google"
##  - attr(*, "maptype")= chr "roadmap"
##  - attr(*, "zoom")= num 13
ggmap(Sofia_Map) +
    xlab("Longitude") + ylab("Latitude")