date()
## [1] "Mon Oct 03 22:59:49 2016"
Due Date: October 4, 2016
Total Points: 32
library(ggplot2)
library(ggmap)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(UsingR)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: HistData
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## combine, src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
##
## Attaching package: 'UsingR'
## The following object is masked from 'package:survival':
##
## cancer
## The following object is masked from 'package:ggmap':
##
## crime
library(reshape2)
library(data.table)
## -------------------------------------------------------------------------
## data.table + dplyr code now lives in dtplyr.
## Please library(dtplyr)!
## -------------------------------------------------------------------------
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:dplyr':
##
## between, last
1 Consider the SSN.txt file from http://myweb.fsu.edu/jelsner/temp/data/SSN.txt. The file contains monthly sunspot numbers since 1851.
Sunsp = "http://myweb.fsu.edu/jelsner/temp/data/SSN.txt"
Suns = read.table(Sunsp, header=TRUE)
str(Suns)
## 'data.frame': 160 obs. of 13 variables:
## $ Year: int 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 ...
## $ Jan : num 75.5 68.4 41.1 15.4 12.3 0.5 13.7 39 83.7 82.4 ...
## $ Feb : num 105.4 66.4 42.9 20 11.4 ...
## $ Mar : num 64.6 61.2 37.7 20.7 17.4 0.4 5.2 57.5 90.3 98.9 ...
## $ Apr : num 56.5 65.4 47.6 26.5 4.4 6.5 11.1 38.3 85.7 71.4 ...
## $ May : num 62.6 54.9 34.7 24 9.1 ...
## $ Jun : num 63.2 46.9 40 21.1 5.3 ...
## $ Jul : num 36.1 42.1 45.9 18.7 0.4 ...
## $ Aug : num 57.4 39.7 50.4 15.8 3.1 ...
## $ Sep : num 67.9 37.5 33.5 22.4 0 ...
## $ Oct : num 62.5 67.3 42.3 12.6 9.6 ...
## $ Nov : num 51 54.3 28.8 28.2 4.2 7.7 31.4 51.9 97.2 97.9 ...
## $ Dec : num 71.4 45.4 23.4 21.6 3.1 7.2 37.2 66.9 81 95.6 ...
ggplot(Suns, aes(Sep)) +
geom_histogram(bins = 24, fill = "gold", col = "black") +
labs(title="Frequency of September Sunspots") +
xlab("Number of Sunspots") +
ylab("Number of Years")
ggplot(Suns, aes(x = Year, y = Jun, group = TRUE)) +
geom_boxplot() +
labs(title="June Sunspots") +
xlab("Year") +
ylab("Sunspots")
ggplot(Suns, aes(x = "", y = Jun)) +
geom_boxplot() +
labs(title="June Sunspots") +
ylab("Sunspots")
boxplot(Suns$Jun,
xlab = "June" ,
ylab = "Sunspots")
f = fivenum(Suns$Jun)
text(rep(1.3, 5), f, labels = c("Minimum", "1st Quartile",
"Median", "3rd Quartile",
"Maximum"))
ggplot(Suns, aes(x = Jun, y = Sep)) +
geom_point() +
labs(title="June and September Sunspots") +
xlab("June") +
ylab("September")
2 The babyboom dataset (UsingR) contains the time of birth, sex, and birth weight for 44 babies born in one 24-hour period at a hospital in Brisbane, Australia.
Create side-by-side box plots of birth weight (grams) by gender. Place the birth weight on the vertical axis and gender on the horizontal axis. (3)
head(babyboom)
## clock.time gender wt running.time
## 1 5 girl 3837 5
## 2 104 girl 3334 64
## 3 118 boy 3554 78
## 4 155 boy 3838 115
## 5 257 boy 3625 177
## 6 405 girl 2208 245
boom1= melt(babyboom, id.vars = "gender")
head(boom1)
## gender variable value
## 1 girl clock.time 5
## 2 girl clock.time 104
## 3 boy clock.time 118
## 4 boy clock.time 155
## 5 boy clock.time 257
## 6 girl clock.time 405
ggplot(boom1, aes(x = gender, y = value)) +
geom_boxplot(fill="#DE980C") +
labs(title="Birth Weight by Gender") +
xlab("Gender") +
ylab("Birth Weight (grams)")
3 The data set diamond (UsingR) contains data about the price of 48 diamond rings. The variable price records the price in Singapore dollars and the variable carat records the size of the diamond and you are interested in predicting price from carat size.
Make a scatter plot with carat on the horizontal axis and price on the vertical axis. (3)
head(diamond)
## carat price
## 1 0.17 355
## 2 0.16 328
## 3 0.17 350
## 4 0.18 325
## 5 0.25 642
## 6 0.16 342
ggplot(diamond, aes(x = carat, y = price)) +
geom_point() +
labs(title = "Price (SGD) by Diamond Carat") +
xlab("Carat") +
ylab("Price (Singapore Dollars)")
To convert to USD:
elf = data.table(diamond)
elf2 = elf %>%
mutate(USd = (elf$price * 0.731291))
ggplot(elf2, aes(x = carat, y = USd)) +
geom_point() +
labs(title = "Price (USD) by Diamond Carat") +
xlab("Carat") +
ylab("Price (USD)")
4 The data frame homework (UsingR) contains the weekly average number of hours spent on homework for 15 private and 15 public schools.
head(homework)
## Private Public
## 1 21.3 15.3
## 2 16.8 17.4
## 3 8.5 12.3
## 4 12.6 10.7
## 5 15.8 16.4
## 6 19.3 11.3
work1 = melt(homework)
## No id variables; using all as measure variables
head(work1)
## variable value
## 1 Private 21.3
## 2 Private 16.8
## 3 Private 8.5
## 4 Private 12.6
## 5 Private 15.8
## 6 Private 19.3
ggplot(work1, aes(x = variable, y = value, color = variable)) +
geom_boxplot(fill = "beige") +
labs(title = "Average Number of Hours Spent Studying per Week") +
xlab("School") +
ylab("Hours Spent on Homework per Week")
5 Download and plot a road map of Sofia, Bulgaria. Use a zoom of 13. (7)
Sofia_Map = get_map(location = 'Sofia Bulgaria',
maptype = "roadmap",
zoom = 13, source = 'google')
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Sofia+Bulgaria&zoom=13&size=640x640&scale=2&maptype=roadmap&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Sofia%20Bulgaria&sensor=false
geocode("Roads, Sofia, Bulgaria")
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Roads,%20Sofia,%20Bulgaria&sensor=false
## lon lat
## 1 23.29303 42.69265
str(Sofia_Map)
## chr [1:1280, 1:1280] "#EAEAEA" "#EAEAEA" "#EAEAEA" "#EAEAEA" ...
## - attr(*, "class")= chr [1:2] "ggmap" "raster"
## - attr(*, "bb")='data.frame': 1 obs. of 4 variables:
## ..$ ll.lat: num 42.7
## ..$ ll.lon: num 23.3
## ..$ ur.lat: num 42.7
## ..$ ur.lon: num 23.4
## - attr(*, "source")= chr "google"
## - attr(*, "maptype")= chr "roadmap"
## - attr(*, "zoom")= num 13
ggmap(Sofia_Map) +
xlab("Longitude") + ylab("Latitude")