download.file('https://github.com/ywchiu/rtibame/raw/master/Data/purchase.csv', 'purchase.csv')
getwd()
## [1] "D:/OS DATA/Desktop"
library(readr)
## Warning: package 'readr' was built under R version 3.3.3
purchase <- read_csv("~/purchase.csv", col_types = cols(Time = col_datetime(format = "%Y-%m-%d %H:%M:%S")))
## Warning: Missing column names filled in: 'X1' [1]
View(purchase)
str(purchase)
## Classes 'tbl_df', 'tbl' and 'data.frame': 54772 obs. of 7 variables:
## $ X1 : int 0 1 2 3 4 5 6 7 8 9 ...
## $ Time : POSIXct, format: "2015-07-01 00:00:01" "2015-07-01 00:00:03" ...
## $ Action : chr "order" "order" "order" "order" ...
## $ User : chr "U312622727" "U239012343" "U10007697373" "U296328517" ...
## $ Product : chr "P0006944501" "P0006018073" "P0002267974" "P0016144236" ...
## $ Quantity: int 1 1 1 1 1 1 1 1 1 1 ...
## $ Price : num 1069 1680 285 550 249 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 7
## .. ..$ X1 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Time :List of 1
## .. .. ..$ format: chr "%Y-%m-%d %H:%M:%S"
## .. .. ..- attr(*, "class")= chr "collector_datetime" "collector"
## .. ..$ Action : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ User : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ Product : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ Quantity: list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Price : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# SELECT hour(Time), sum(Quantity) FROM purchase GROUP BY hour(Time)
# Question1
qty_byH <- purchase %>% mutate(hour = format(Time, '%H')) %>% select(Quantity, hour) %>% group_by(hour) %>% summarise(purchase_sum = sum(Quantity))
## Warning: package 'bindrcpp' was built under R version 3.3.3
# Question2
plot(qty_byH, type = 'b')
# Question3
View(purchase)
## SELECT user, sum(quantity * price) FROM purchase GROUP BY user ORDER BY sum(quantity * price) DESC LIMIT 3
purchase %>% select(User, Quantity, Price) %>% mutate(purchase_amount = Quantity * Price) %>% group_by(User) %>% summarise(purchase_sum = sum(purchase_amount)) %>% arrange(desc(purchase_sum)) %>% head(3)
## # A tibble: 3 x 2
## User purchase_sum
## <chr> <dbl>
## 1 U166708333 2942744
## 2 U142809250 747550
## 3 U1006283751 515688
# Question 4
UserRank <- purchase %>% select(User, Quantity, Price) %>% mutate(purchase_amount = Quantity * Price) %>% group_by(User) %>% summarise(purchase_sum = sum(purchase_amount)) %>% arrange(desc(purchase_sum)) %>% head(10)
barplot(height = UserRank$purchase_sum, names.arg = UserRank$User, col = 'red')
data("anscombe")
anscombe
## x1 x2 x3 x4 y1 y2 y3 y4
## 1 10 10 10 8 8.04 9.14 7.46 6.58
## 2 8 8 8 8 6.95 8.14 6.77 5.76
## 3 13 13 13 8 7.58 8.74 12.74 7.71
## 4 9 9 9 8 8.81 8.77 7.11 8.84
## 5 11 11 11 8 8.33 9.26 7.81 8.47
## 6 14 14 14 8 9.96 8.10 8.84 7.04
## 7 6 6 6 8 7.24 6.13 6.08 5.25
## 8 4 4 4 19 4.26 3.10 5.39 12.50
## 9 12 12 12 8 10.84 9.13 8.15 5.56
## 10 7 7 7 8 4.82 7.26 6.42 7.91
## 11 5 5 5 8 5.68 4.74 5.73 6.89
plot(y1 ~ x1, data = anscombe)
x <- seq(1,6)
y <- x
# chart1
plot(x, y, type = 'l')
# chart2
plot(x, y, type = 'n')
lines(x,y, type= 'l', col="blue")
lines(x,y, type= 'p', col="red")
x <- seq(1,6)
y <- x
par(mfrow=c(2,4))
types <- c("p","l","o","b","c","s", "h", "n")
for(i in 1:length(types)){
title <- paste("type: ",types[i])
plot(x, y, type="n", main=title)
lines(x, y, type=types[i])
}
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type="o", col="blue", ylim=c(0,220),
xlab="Month", ylab="Rainfall", main = 'rainfall within different month')
lines(tainan , type="o", pch=22, lty=2, col="red")
download.file('https://github.com/ywchiu/rtibame/raw/master/data/house-prices.csv', 'house-prices.csv')
house <- read.csv('house-prices.csv')
View(house)
bedroomsTable <- table(house$Bedrooms)
#?barplot
barplot(height = bedroomsTable, xlab = 'bedroom type', ylab = 'count', main = 'bedroom by type', col = 'blue')
load('cdc.RData')
hist(cdc$weight, breaks = 500)
head(sort(table(cdc$weight), decreasing = TRUE))
##
## 160 150 180 170 200 140
## 992 970 933 922 805 794
table(cdc$weight %% 10)
##
## 0 1 2 3 4 5 6 7 8 9
## 9421 207 919 545 525 5865 481 543 1159 335
par(mfrow=c(2,1))
hist(cdc$weight, breaks = 50)
barplot(height = table(cdc$weight))
house <- read.csv('house-prices.csv')
bedroomsTable <- table(house$Bedrooms)
labels <- c('2 unit', '3 unit', '4 unit', '5 unit')
pie(bedroomsTable, xlab = 'bedroom type', labels =labels, main = 'bedroom by type', col =rainbow(length(labels)))
sorted_table <- sort(bedroomsTable, decreasing = TRUE)
pie(sorted_table, xlab = 'bedroom type', labels =names(sorted_table), main = 'bedroom by type', col =rainbow(length(names(sorted_table))), init.angle = 90, clockwise = TRUE)
## Scatter plot
plot(cdc$weight, cdc$wtdesire)
data("anscombe")
View(anscombe)
fit <- lm(y3 ~ x1, data = anscombe)
plot(y3 ~ x1, data = anscombe)
abline(fit, col = "red")
data(iris)
x <- iris[,1]
y <- iris[,3]
species <- iris[,5]
plot(x, y, col = species)
setosa <- which(iris$Species == 'setosa')
virginica <- which(iris$Species == 'virginica')
plot(x, y, type = 'n')
points(x[setosa], y[setosa], col = "green")
points(x[virginica], y[virginica], col = "red")
fit <- lm(wtdesire ~ weight, data = cdc)
fit
##
## Call:
## lm(formula = wtdesire ~ weight, data = cdc)
##
## Coefficients:
## (Intercept) weight
## 46.664 0.639
plot(cdc$weight, cdc$wtdesire)
abline(fit, col="red")
smoker_gender <- table(cdc$gender, cdc$smoke100)
colnames(smoker_gender) <- c('no', 'yes')
mosaicplot(smoker_gender, col = rainbow(length(colnames(smoker_gender))))
hist(cdc$height)
boxplot(cdc$height)
boxplot(cdc$height ~ cdc$gender)
night_club <- c(100,200,150,160,170,110,120)
mean(night_club)
## [1] 144.2857
sort(night_club)
## [1] 100 110 120 150 160 170 200
median(night_club)
## [1] 150
night_club2 <- c(night_club, 20000, 30000)
mean(night_club2)
## [1] 5667.778
median(night_club2)
## [1] 160
temp <- sample.int(40 , 100, replace = TRUE)
mean(temp)
## [1] 22.07
temp2<- c(temp, 999, 999)
mean(temp2)
## [1] 41.22549
boxplot(temp2)
temp3 <- temp2[temp2 < 100]
boxplot(temp3)
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type="o", col="blue", ylim=c(0,220),
xlab="Month", ylab="Rainfall", main = 'rainfall within different month')
lines(tainan , type="o", pch=22, lty=2, col="red")
legend(x=1, y=200, c('taipei', 'tainan'), lwd = 2, col = c('blue', 'red'), title = 'rainfall')
bedroomsTable <- table(house$Bedrooms)
labels <- c('2 unit', '3 unit', '4 unit', '5 unit')
pie(bedroomsTable, xlab = 'bedroom type', labels =labels, main = 'bedroom by type', col =rainbow(length(labels)))
sorted_table <- sort(bedroomsTable, decreasing = TRUE)
pie(sorted_table, xlab = 'bedroom type', labels =names(sorted_table), main = 'bedroom by type', col =rainbow(length(names(sorted_table))), init.angle = 90, clockwise = TRUE)
legend("topright", labels,
fill=rainbow(length(labels)), title = "units", cex=0.8)
showLayout = function(n){
for(i in 1:n){
plot(1,type="n",xaxt="n",yaxt="n",xlab="",ylab="")
text(1, 1, labels=i, cex=10)
}
}
par(mar=c(1,1,1,1),mfrow=c(3,2))
showLayout(6)
par(mar=c(3,3,3,3),mfrow=c(3,2))
showLayout(6)
par(mar=c(3,3,3,3),mfcol=c(3,2))
showLayout(6)
jpeg('bedroom.jpg')
pie(sorted_table, xlab = 'bedroom type', labels =names(sorted_table), main = 'bedroom by type', col =rainbow(length(names(sorted_table))), init.angle = 90, clockwise = TRUE)
legend("topright", labels,
fill=rainbow(length(labels)), title = "units", cex=0.8)
dev.off()
## png
## 2
library(plotly)
## Warning: package 'plotly' was built under R version 3.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ds <- data.frame(label = c('A', 'B', 'C'), values = c(10,20,30) )
ds
## label values
## 1 A 10
## 2 B 20
## 3 C 30
plot_ly(ds, labels = ds$label, values = ds$values, type = 'pie')
plot_ly(ds, labels = ds$label, values = ds$values, type = 'pie', hole = 0.6) %>% layout(title = "Donut Chart")
library(plotly)
month<- c(1,2,3,4,5)
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei",type='scatter', mode= 'markers') %>%
add_trace(x = month, y = tainan, fill = "tozeroy",name="tainan") %>%
layout(yaxis = list(title = 'rainfall') )
plot_ly(x = month, y = taipei, name="taipei",type='scatter', mode= 'line', color = I("black"))
## A line object has been specified, but lines is not in the mode
## Adding lines to the mode...
library(plotly)
month<- c(1,2,3,4,5)
taipei<- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
total <- taipei + tainan
y <- list(title="Rainfall")
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei",type='scatter', mode= 'markers') %>%
add_trace(x = month, y = total, fill = "tonexty", name="tainan") %>%
layout(yaxis = y)
data("diamonds")
diamonds
## # A tibble: 53,940 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## 7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47
## 8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49
## 10 0.23 Very Good H VS1 59.4 61 338 4.00 4.05 2.39
## # ... with 53,930 more rows
d <- diamonds[sample(nrow(diamonds), 1000), ]
plot_ly(d, x = d$carat, y= d$price, color = d$clarity, type='scatter', mode= 'markers', size = d$carat, text= paste("Clarity", d$clarity))
data("economics")
#economics
p <- subplot(
plot_ly(economics, x = economics$date, y = economics$unemploy, type='scatter', mode = 'lines')
,
plot_ly(economics, x = economics$date, y = economics$uempmed, type='scatter', mode = 'lines')
,
margin=0.05
)
p %>% layout(showlegend=FALSE)
p <- subplot(
plot_ly(economics, x = economics$date, y = economics$unemploy, type='scatter', mode = 'lines')
,
plot_ly(economics, x = economics$date, y = economics$uempmed, type='scatter', mode = 'lines')
,
margin=0.05,
nrows = 2
)
p %>% layout(showlegend=FALSE)
library(jsonlite)
## Warning: package 'jsonlite' was built under R version 3.3.3
jd <- fromJSON('https://maps.googleapis.com/maps/api/geocode/json?address=忠孝敦化')
jd$results$formatted_address
## [1] "Taiwan, Taipei City, Da’an District, 10686Zhongxiao E. RdNo. 182, Sec. 4 號"
jd$results$geometry$location$lat
## [1] 25.04162
jd$results$geometry$location$lng
## [1] 121.5517
jd <- fromJSON('https://maps.googleapis.com/maps/api/geocode/json?address=臺北市大安區和平東路三段1巷72弄1~30號')
jd$results$formatted_address[1]
## [1] "No. 1, Lane 1, Section 3, Heping East Road, Da’an District, Taipei City, Taiwan 106"
jd$results$geometry$location$lat[1]
## [1] 25.02538
jd$results$geometry$location$lng[1]
## [1] 121.5473