Homework 3

download.file('https://github.com/ywchiu/rtibame/raw/master/Data/purchase.csv', 'purchase.csv')

getwd()
## [1] "D:/OS DATA/Desktop"
library(readr)
## Warning: package 'readr' was built under R version 3.3.3
purchase <- read_csv("~/purchase.csv", col_types = cols(Time = col_datetime(format = "%Y-%m-%d %H:%M:%S")))
## Warning: Missing column names filled in: 'X1' [1]
View(purchase)

str(purchase)
## Classes 'tbl_df', 'tbl' and 'data.frame':    54772 obs. of  7 variables:
##  $ X1      : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ Time    : POSIXct, format: "2015-07-01 00:00:01" "2015-07-01 00:00:03" ...
##  $ Action  : chr  "order" "order" "order" "order" ...
##  $ User    : chr  "U312622727" "U239012343" "U10007697373" "U296328517" ...
##  $ Product : chr  "P0006944501" "P0006018073" "P0002267974" "P0016144236" ...
##  $ Quantity: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Price   : num  1069 1680 285 550 249 ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 7
##   .. ..$ X1      : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ Time    :List of 1
##   .. .. ..$ format: chr "%Y-%m-%d %H:%M:%S"
##   .. .. ..- attr(*, "class")= chr  "collector_datetime" "collector"
##   .. ..$ Action  : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ User    : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ Product : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ Quantity: list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ Price   : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# SELECT hour(Time), sum(Quantity) FROM purchase GROUP BY hour(Time)

# Question1 
qty_byH <- purchase %>% mutate(hour = format(Time, '%H')) %>% select(Quantity, hour) %>% group_by(hour) %>% summarise(purchase_sum = sum(Quantity)) 
## Warning: package 'bindrcpp' was built under R version 3.3.3
# Question2
plot(qty_byH, type = 'b')

# Question3
View(purchase)

## SELECT user, sum(quantity * price) FROM purchase GROUP BY user ORDER BY sum(quantity * price) DESC LIMIT 3

purchase %>% select(User, Quantity, Price) %>% mutate(purchase_amount = Quantity * Price) %>% group_by(User) %>% summarise(purchase_sum = sum(purchase_amount)) %>% arrange(desc(purchase_sum)) %>% head(3)
## # A tibble: 3 x 2
##          User purchase_sum
##         <chr>        <dbl>
## 1  U166708333      2942744
## 2  U142809250       747550
## 3 U1006283751       515688
# Question 4
UserRank <- purchase %>% select(User, Quantity, Price) %>% mutate(purchase_amount = Quantity * Price) %>% group_by(User) %>% summarise(purchase_sum = sum(purchase_amount)) %>% arrange(desc(purchase_sum)) %>% head(10)

barplot(height = UserRank$purchase_sum, names.arg = UserRank$User, col = 'red')

Anscombe Dataset

data("anscombe")
anscombe
##    x1 x2 x3 x4    y1   y2    y3    y4
## 1  10 10 10  8  8.04 9.14  7.46  6.58
## 2   8  8  8  8  6.95 8.14  6.77  5.76
## 3  13 13 13  8  7.58 8.74 12.74  7.71
## 4   9  9  9  8  8.81 8.77  7.11  8.84
## 5  11 11 11  8  8.33 9.26  7.81  8.47
## 6  14 14 14  8  9.96 8.10  8.84  7.04
## 7   6  6  6  8  7.24 6.13  6.08  5.25
## 8   4  4  4 19  4.26 3.10  5.39 12.50
## 9  12 12 12  8 10.84 9.13  8.15  5.56
## 10  7  7  7  8  4.82 7.26  6.42  7.91
## 11  5  5  5  8  5.68 4.74  5.73  6.89
plot(y1 ~ x1, data = anscombe)

Line Chart

x <- seq(1,6)
y <- x

# chart1 
plot(x, y, type = 'l')

# chart2
plot(x, y, type = 'n')
lines(x,y, type= 'l', col="blue")
lines(x,y, type= 'p', col="red")

x <- seq(1,6)
y <- x
par(mfrow=c(2,4))

types <- c("p","l","o","b","c","s", "h", "n")
for(i in 1:length(types)){
  title <- paste("type: ",types[i])
  plot(x, y, type="n", main=title)
  lines(x, y, type=types[i])
}

taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type="o", col="blue", ylim=c(0,220), 
     xlab="Month", ylab="Rainfall", main = 'rainfall within different month')
lines(tainan , type="o", pch=22, lty=2, col="red")

Barchart

download.file('https://github.com/ywchiu/rtibame/raw/master/data/house-prices.csv', 'house-prices.csv')
house <- read.csv('house-prices.csv')
View(house)

bedroomsTable <- table(house$Bedrooms)

#?barplot

barplot(height = bedroomsTable, xlab = 'bedroom type', ylab = 'count', main = 'bedroom by type', col = 'blue')

Histogram

load('cdc.RData')
hist(cdc$weight, breaks = 500)

head(sort(table(cdc$weight), decreasing = TRUE))
## 
## 160 150 180 170 200 140 
## 992 970 933 922 805 794
table(cdc$weight %% 10)
## 
##    0    1    2    3    4    5    6    7    8    9 
## 9421  207  919  545  525 5865  481  543 1159  335
par(mfrow=c(2,1))
hist(cdc$weight, breaks = 50)
barplot(height = table(cdc$weight))

pie chart

house <- read.csv('house-prices.csv')

bedroomsTable <- table(house$Bedrooms)

labels <- c('2 unit', '3 unit', '4 unit', '5 unit')

pie(bedroomsTable, xlab = 'bedroom type', labels =labels, main = 'bedroom by type', col =rainbow(length(labels)))

sorted_table <- sort(bedroomsTable, decreasing = TRUE)

pie(sorted_table, xlab = 'bedroom type', labels =names(sorted_table), main = 'bedroom by type', col =rainbow(length(names(sorted_table))), init.angle = 90, clockwise = TRUE)

## Scatter plot

plot(cdc$weight, cdc$wtdesire)

data("anscombe")
View(anscombe)

fit <- lm(y3 ~ x1, data = anscombe)
plot(y3 ~ x1, data = anscombe)
abline(fit, col = "red")

data(iris)
x <- iris[,1]
y <- iris[,3]

species <- iris[,5]

plot(x, y, col = species)

setosa    <- which(iris$Species == 'setosa')
virginica <- which(iris$Species == 'virginica')


plot(x, y, type = 'n')
points(x[setosa], y[setosa], col = "green")
points(x[virginica], y[virginica], col = "red")

fit <- lm(wtdesire ~ weight, data = cdc)
fit
## 
## Call:
## lm(formula = wtdesire ~ weight, data = cdc)
## 
## Coefficients:
## (Intercept)       weight  
##      46.664        0.639
plot(cdc$weight, cdc$wtdesire)
abline(fit, col="red")

Mosaic Plot

smoker_gender <- table(cdc$gender, cdc$smoke100)
colnames(smoker_gender) <- c('no', 'yes')
mosaicplot(smoker_gender, col = rainbow(length(colnames(smoker_gender))))

Boxplot

hist(cdc$height)

boxplot(cdc$height)

boxplot(cdc$height ~ cdc$gender)

night_club <- c(100,200,150,160,170,110,120)
mean(night_club)
## [1] 144.2857
sort(night_club)
## [1] 100 110 120 150 160 170 200
median(night_club)
## [1] 150
night_club2 <- c(night_club, 20000, 30000)
mean(night_club2)
## [1] 5667.778
median(night_club2)
## [1] 160
temp <- sample.int(40 , 100, replace = TRUE)
mean(temp)
## [1] 22.07
temp2<- c(temp, 999, 999)
mean(temp2)
## [1] 41.22549
boxplot(temp2)

temp3 <- temp2[temp2 < 100]
boxplot(temp3)

Legend

taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type="o", col="blue", ylim=c(0,220), 
     xlab="Month", ylab="Rainfall", main = 'rainfall within different month')
lines(tainan , type="o", pch=22, lty=2, col="red")
legend(x=1, y=200, c('taipei', 'tainan'), lwd = 2, col = c('blue', 'red'), title = 'rainfall')

bedroomsTable <- table(house$Bedrooms)

labels <- c('2 unit', '3 unit', '4 unit', '5 unit')

pie(bedroomsTable, xlab = 'bedroom type', labels =labels, main = 'bedroom by type', col =rainbow(length(labels)))

sorted_table <- sort(bedroomsTable, decreasing = TRUE)

pie(sorted_table, xlab = 'bedroom type', labels =names(sorted_table), main = 'bedroom by type', col =rainbow(length(names(sorted_table))), init.angle = 90, clockwise = TRUE)

legend("topright", labels,
    fill=rainbow(length(labels)), title = "units", cex=0.8)

Par

showLayout = function(n){
for(i in 1:n){
plot(1,type="n",xaxt="n",yaxt="n",xlab="",ylab="")
text(1, 1, labels=i, cex=10)
    }
}

par(mar=c(1,1,1,1),mfrow=c(3,2))
showLayout(6)

par(mar=c(3,3,3,3),mfrow=c(3,2))
showLayout(6)

par(mar=c(3,3,3,3),mfcol=c(3,2))
showLayout(6)

Output Image

jpeg('bedroom.jpg')
pie(sorted_table, xlab = 'bedroom type', labels =names(sorted_table), main = 'bedroom by type', col =rainbow(length(names(sorted_table))), init.angle = 90, clockwise = TRUE)

legend("topright", labels,
    fill=rainbow(length(labels)), title = "units", cex=0.8)

dev.off()
## png 
##   2

Plotly

library(plotly)
## Warning: package 'plotly' was built under R version 3.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ds <- data.frame(label = c('A', 'B', 'C'), values = c(10,20,30) )
ds
##   label values
## 1     A     10
## 2     B     20
## 3     C     30
plot_ly(ds, labels = ds$label, values = ds$values, type = 'pie')
plot_ly(ds, labels = ds$label, values = ds$values, type = 'pie', hole = 0.6) %>% layout(title = "Donut Chart")

Area Chart

library(plotly)
month<- c(1,2,3,4,5)
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)

plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei",type='scatter', mode= 'markers') %>%
add_trace(x = month, y = tainan, fill = "tozeroy",name="tainan") %>%
layout(yaxis = list(title = 'rainfall') )
plot_ly(x = month, y = taipei, name="taipei",type='scatter', mode= 'line', color = I("black"))
## A line object has been specified, but lines is not in the mode
## Adding lines to the mode...
library(plotly)
month<- c(1,2,3,4,5)
taipei<- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
total <- taipei + tainan
y <- list(title="Rainfall")
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei",type='scatter', mode= 'markers') %>%
add_trace(x = month, y = total, fill = "tonexty", name="tainan") %>%
layout(yaxis = y)

Bubble Chart

data("diamonds")
diamonds
## # A tibble: 53,940 x 10
##    carat       cut color clarity depth table price     x     y     z
##    <dbl>     <ord> <ord>   <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
##  1  0.23     Ideal     E     SI2  61.5    55   326  3.95  3.98  2.43
##  2  0.21   Premium     E     SI1  59.8    61   326  3.89  3.84  2.31
##  3  0.23      Good     E     VS1  56.9    65   327  4.05  4.07  2.31
##  4  0.29   Premium     I     VS2  62.4    58   334  4.20  4.23  2.63
##  5  0.31      Good     J     SI2  63.3    58   335  4.34  4.35  2.75
##  6  0.24 Very Good     J    VVS2  62.8    57   336  3.94  3.96  2.48
##  7  0.24 Very Good     I    VVS1  62.3    57   336  3.95  3.98  2.47
##  8  0.26 Very Good     H     SI1  61.9    55   337  4.07  4.11  2.53
##  9  0.22      Fair     E     VS2  65.1    61   337  3.87  3.78  2.49
## 10  0.23 Very Good     H     VS1  59.4    61   338  4.00  4.05  2.39
## # ... with 53,930 more rows
d <- diamonds[sample(nrow(diamonds), 1000), ]

plot_ly(d, x = d$carat, y= d$price, color = d$clarity, type='scatter', mode= 'markers', size = d$carat, text= paste("Clarity", d$clarity))

Multiple Plots

data("economics")
#economics

p <- subplot(
plot_ly(economics, x = economics$date, y = economics$unemploy, type='scatter', mode = 'lines')
,
plot_ly(economics, x = economics$date, y = economics$uempmed, type='scatter', mode = 'lines')
,
margin=0.05
)
p %>% layout(showlegend=FALSE)
p <- subplot(
plot_ly(economics, x = economics$date, y = economics$unemploy, type='scatter', mode = 'lines')
,
plot_ly(economics, x = economics$date, y = economics$uempmed, type='scatter', mode = 'lines')
,
margin=0.05, 
nrows = 2
)
p %>% layout(showlegend=FALSE)

Calling Google Geolocation API

library(jsonlite)
## Warning: package 'jsonlite' was built under R version 3.3.3
jd <- fromJSON('https://maps.googleapis.com/maps/api/geocode/json?address=忠孝敦化')
jd$results$formatted_address
## [1] "Taiwan, Taipei City, Da’an District, 10686Zhongxiao E. RdNo. 182, Sec. 4 號"
jd$results$geometry$location$lat
## [1] 25.04162
jd$results$geometry$location$lng
## [1] 121.5517
jd <- fromJSON('https://maps.googleapis.com/maps/api/geocode/json?address=臺北市大安區和平東路三段1巷72弄1~30號')
jd$results$formatted_address[1]
## [1] "No. 1, Lane 1, Section 3, Heping East Road, Da’an District, Taipei City, Taiwan 106"
jd$results$geometry$location$lat[1]
## [1] 25.02538
jd$results$geometry$location$lng[1]
## [1] 121.5473