download.file('https://github.com/ywchiu/rtibame/raw/master/Data/purchase.csv', 'purchase.csv')
library(readr)
purchase <- read_csv("~/purchase.csv", col_types = cols(Product = col_character(),
Time = col_datetime(format = "%Y-%m-%d %H:%M:%S")))
## Warning: Missing column names filled in: 'X1' [1]
View(purchase)
str(purchase)
## Classes 'tbl_df', 'tbl' and 'data.frame': 54772 obs. of 7 variables:
## $ X1 : int 0 1 2 3 4 5 6 7 8 9 ...
## $ Time : POSIXct, format: "2015-07-01 00:00:01" "2015-07-01 00:00:03" ...
## $ Action : chr "order" "order" "order" "order" ...
## $ User : chr "U312622727" "U239012343" "U10007697373" "U296328517" ...
## $ Product : chr "P0006944501" "P0006018073" "P0002267974" "P0016144236" ...
## $ Quantity: int 1 1 1 1 1 1 1 1 1 1 ...
## $ Price : num 1069 1680 285 550 249 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 7
## .. ..$ X1 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Time :List of 1
## .. .. ..$ format: chr "%Y-%m-%d %H:%M:%S"
## .. .. ..- attr(*, "class")= chr "collector_datetime" "collector"
## .. ..$ Action : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ User : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ Product : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ Quantity: list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Price : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
purchase$Hour <- format(purchase$Time, '%H')
purchase$Hour <- as.numeric(purchase$Hour)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# problem 1 and 2
purchase_by_hour <- purchase %>%
select(Hour, Quantity) %>%
group_by(Hour) %>%
summarize(sum(Quantity))
plot(purchase_by_hour$Hour, purchase_by_hour$`sum(Quantity)`, type= 'b')
# problem 3
purchase %>%
select(User, Quantity, Price) %>%
group_by(User) %>%
summarise(buy_sum = sum(Quantity * Price)) %>%
arrange(desc(buy_sum)) %>%
head(3)
## # A tibble: 3 × 2
## User buy_sum
## <chr> <dbl>
## 1 U166708333 2942744
## 2 U142809250 747550
## 3 U1006283751 515688
purchase %>% filter(User == 'U166708333') %>% select(Product, Price, Quantity)
## # A tibble: 24 × 3
## Product Price Quantity
## <chr> <dbl> <int>
## 1 P0007258134 2990 1
## 2 P0000025675 2980 1
## 3 P0000025675 2980 1
## 4 P0005860480006 300 1
## 5 P0003259373 1899 1
## 6 P0005860480006 300 1
## 7 P0003177856 10980 1
## 8 P0006584093 320000 3
## 9 P0006584093 320000 3
## 10 P0006584093 320000 3
## # ... with 14 more rows
# Problem 4
purchase_by_user <- purchase %>%
select(User, Quantity, Price) %>%
group_by(User) %>%
summarise(buy_sum = sum(Quantity * Price)) %>%
arrange(desc(buy_sum)) %>%
head(10)
barplot(purchase_by_user$buy_sum, names.arg = as.factor(purchase_by_user$User), col="red")
data(anscombe)
View(anscombe)
plot(y1 ~ x1, data = anscombe)
plot(y2 ~ x1, data = anscombe)
plot(y3 ~ x1, data = anscombe)
plot(y4 ~ x1, data = anscombe)
## Line Plot
x <- seq(1,6)
y <- x
par(mfrow=c(2,4))
types = c("p","l","o","b","c","s", "h", "n")
for(i in 1:length(types)){
title <- paste("type: ",types[i])
plot(x, y, type="n", main=title)
lines(x, y, type=types[i])
}
par(mfrow=c(1,1))
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type="o", col="blue", ylim=c(0,220), xlim=c(0,8),
xlab="Month", ylab="Rainfall", main = "Rainfall in Taipei v.s. Tainan")
#?legend
lines(tainan , type="o", pch=22, lty=2, col="red")
legend("topright", c("Taipei", 'Tainan'), pch = c(1,22), col=c("blue", "red"),inset = .03)
text(6,2, "hello")
download.file('https://raw.githubusercontent.com/ywchiu/rtibame/master/data/house-prices.csv', 'house-prices.csv')
housePrice <- read.csv('house-prices.csv')
View(housePrice)
bedroomsTable <- table(housePrice$Bedrooms)
barplot(bedroomsTable)
barplot(bedroomsTable, main="Bedroom Type Calculate", xlab="bedroom type", ylab="count")
load("C:/Users/USER/Desktop/cdc.Rdata")
hist(cdc$weight)
hist(cdc$weight, breaks = 500)
sort(table(cdc$weight))
##
## 68 70 79 83 86 221 229 244 271 273 294 296 297 308 309 313 318 319
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 324 327 328 344 348 364 370 371 390 405 495 500 78 80 82 231 247 249
## 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2
## 254 276 279 282 283 287 292 360 362 385 84 94 96 272 274 286 298 325
## 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3
## 85 88 233 239 241 243 253 256 263 267 268 305 380 99 237 258 400 93
## 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 6
## 97 101 246 262 278 330 340 92 111 211 223 315 201 224 226 234 257 191
## 6 6 6 6 6 6 6 7 7 7 7 7 8 8 8 8 8 9
## 213 232 252 209 227 242 181 217 219 236 295 90 238 310 320 109 214 104
## 9 9 9 10 10 10 11 11 11 11 11 12 12 12 12 14 14 17
## 102 199 216 222 248 161 206 207 285 350 95 177 228 106 121 202 98 151
## 18 18 18 18 18 19 19 21 21 21 22 22 22 24 24 24 25 25
## 131 166 171 255 159 169 196 103 139 141 193 194 119 203 107 116 189 208
## 26 26 26 27 28 28 29 30 30 30 31 31 32 32 33 33 33 33
## 113 197 218 204 265 129 179 114 149 176 184 275 290 144 186 146 167 183
## 34 34 34 36 36 37 37 39 40 40 40 40 40 41 42 43 45 45
## 212 133 164 153 154 187 117 157 156 188 173 174 108 270 280 124 126 136
## 45 46 46 47 47 47 48 48 49 49 50 53 55 56 57 58 59 60
## 163 137 143 147 182 192 123 112 245 198 300 127 134 122 152 100 172 162
## 60 62 62 62 64 64 65 69 69 70 70 71 71 74 80 94 95 96
## 118 158 260 178 132 142 148 105 168 128 235 138 225 250 240 215 205 110
## 102 102 104 106 110 110 111 112 122 125 137 144 196 202 204 206 230 235
## 115 230 220 195 210 120 125 155 185 135 145 175 130 165 190 140 200 170
## 244 268 376 393 431 440 473 527 577 589 615 626 627 692 715 794 805 922
## 180 150 160
## 933 970 992
table(cdc$weight %% 10)
##
## 0 1 2 3 4 5 6 7 8 9
## 9421 207 919 545 525 5865 481 543 1159 335
par(mfrow=c(2,1))
hist(cdc$weight,breaks=50,xlim=c(70,380))
barplot(table(cdc
$weight),xlab="weight",ylab="Frequency")
par(mfrow=c(1,1))
bedrooms <- housePrice$Bedrooms
bedroomsTable <- table(bedrooms)
bedroomsTable
## bedrooms
## 2 3 4 5
## 30 67 29 2
labels = c("2 unit", "3 unit", "4 unit", "5 unit")
pie(bedroomsTable,labels=labels,
col=rainbow(length(labels)),
main="Pie Chart of Bedroom")
bedroomsTable
## bedrooms
## 2 3 4 5
## 30 67 29 2
pie(bedroomsTable[order(bedroomsTable , decreasing = TRUE)], labels[order(bedroomsTable , decreasing = TRUE)],col=rainbow(length(labels)),
main="Pie Chart of Bedroom", clockwise = TRUE)
?pie
## starting httpd help server ...
## done
plot(cdc$weight, cdc$wtdesire)
data(iris)
xlab <- names(iris)[1]
ylab <- names(iris)[3]
x <- iris[,1]
y <- iris[,3]
plot(x, y, xlab=xlab, ylab=ylab,
col=ifelse(iris[,3] > median(iris[,3]), "red", "blue"))
abline(h = median(iris[,3]), col="orange", lty = 2)
data(iris)
xlab = names(iris)[1]
ylab = names(iris)[3]
x = iris[,1]
y = iris[,3]
plot(x, y, xlab=xlab, ylab=ylab,type="n")
setosa = which(iris$Species=="setosa")
versicolor = which(iris$Species=="versicolor")
virginica = which(iris$Species=="virginica")
points(iris[setosa,1],iris[setosa,3],col="green")
points(iris[versicolor ,1],iris[versicolor,3],col="red")
points(iris[virginica ,1],iris[virginica,3],col="orange")
plot(cdc$weight, cdc$wtdesire,
xlab="weigth",ylab="weight desire",
main="Scatter of Weight")
fit2 <- lm(weight~wtdesire, data = cdc)
fit2
##
## Call:
## lm(formula = weight ~ wtdesire, data = cdc)
##
## Coefficients:
## (Intercept) wtdesire
## 14.330 1.002
fit <- lm(weight~wtdesire, data = cdc[cdc$wtdesire < 500,])
fit
##
## Call:
## lm(formula = weight ~ wtdesire, data = cdc[cdc$wtdesire < 500,
## ])
##
## Coefficients:
## (Intercept) wtdesire
## 11.149 1.022
abline(fit,col="red")
smokers_gender <- table(cdc$gender, cdc$smoke100)
smokers_gender
##
## 0 1
## m 4547 5022
## f 6012 4419
colnames(smokers_gender) = c("no","yes")
mosaicplot(smokers_gender
,col=rainbow(length(colnames(smokers_gender))))
## Box Plot
boxplot(cdc$height,
ylab="Height",
main="Box Plot of Height")
?sample
temp <- sample(1:30, 100, replace=TRUE)
temp <- c(temp, 999,999,999)
mean(temp)
## [1] 45.03883
boxplot(temp)
boxplot(temp[temp < 50])
boxplot(cdc$height ~ cdc$gender
,ylab="Height",xlab="Gender"
,main="Height vs Gender")
par(mfrow=c(1,1))
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type="o", col="blue", ylim=c(0,220),
xlab="Month", ylab="Rainfall", main = "Rainfall in Taipei v.s. Tainan")
lines(tainan , type="o", pch=22, lty=2, col="red")
legend(1,200, c("taipei","tainan"), lwd=c(2.5,2.5),col=c("blue","red"), title = "Rainfall")
pie(bedroomsTable[order(bedroomsTable , decreasing = TRUE)], labels[order(bedroomsTable , decreasing = TRUE)],col=rainbow(length(labels)),
main="Pie Chart of Bedroom", clockwise = TRUE)
legend("topright", labels[order(bedroomsTable , decreasing = TRUE)],fill=rainbow(length(labels[order(bedroomsTable , decreasing = TRUE)])), title = "units", cex=0.8)
showLayout = function(n){
for(i in 1:n){
plot(1,type="n",xaxt="n",yaxt="n",xlab="",ylab="")
text(1, 1, labels=i, cex=10)
}
}
par(mar=c(1,1,1,1),mfrow=c(3,2))
showLayout(6)
par(mar=c(3,3,3,3),mfrow=c(3,2))
showLayout(6)
par(mar=c(3,3,3,3),mfcol=c(3,2))
showLayout(6)
png('pie.png')
pie(bedroomsTable[order(bedroomsTable , decreasing = TRUE)], labels[order(bedroomsTable , decreasing = TRUE)],col=rainbow(length(labels)),
main="Pie Chart of Bedroom", clockwise = TRUE)
legend("topright", labels[order(bedroomsTable , decreasing = TRUE)],fill=rainbow(length(labels[order(bedroomsTable , decreasing = TRUE)])), title = "units", cex=0.8)
dev.off()
## png
## 2
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ds <- data.frame(labels = c("A", "B", "C"),
values = c(10, 20, 30))
ds
## labels values
## 1 A 10
## 2 B 20
## 3 C 30
plot_ly(ds, labels = ds$labels, values = ds$values, type = "pie") %>% layout(title = "Pie Chart")
plot_ly(ds, labels = ds$labels, values = ds$values, type = "pie", hole=0.6) %>% layout(title = "Donut Chart")
library(plotly)
month<- c(1,2,3,4,5)
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
y <- list(title="Rainfall")
# line chart
plot_ly(x = month, y = taipei, name="taipei",mode = 'lines')
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
# scatter chart
plot_ly(x = month, y = taipei, name="taipei")
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
#area chart
library(plotly)
month<- c(1,2,3,4,5)
taipei<- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
y <-list(title="Rainfall")
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei", type='scatter', mode= 'markers') %>% add_trace(x = month, y = tainan, fill = "tozeroy" ,name="tainan") %>% layout(yaxis= y)
library(plotly)
month<- c(1,2,3,4,5)
taipei<- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
total <- taipei + tainan
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei", type='scatter', mode= 'markers') %>% add_trace(x = month, y = total, fill = "tonexty" ,name="tainan") %>% layout(yaxis= y)
View(diamonds)
library(plotly)
d <- diamonds[sample(nrow(diamonds),1000), ]
plot_ly(d, x = d$carat, y = d$price, text = paste("Clarity: ", d$clarity),
mode = "markers", color = d$clarity, size = d$carat)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
View(economics)
plot_ly(economics, x = economics$date, y = economics$uempmed, mode = "scatter")
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
p <- subplot(
plot_ly(economics, x = economics$date, y = economics$uempmed, mode = "scatter"),
plot_ly(economics, x = economics$date, y = economics$unemploy, mode = "scatter"),
margin = 0.05
) %>% layout(showlegend = FALSE)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
p
p <- subplot(
plot_ly(economics, x = economics$date, y = economics$uempmed, mode = "scatter"),
plot_ly(economics, x = economics$date, y = economics$unemploy, mode = "scatter"),
margin = 0.05,
nrows=2
) %>% layout(showlegend = FALSE)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
p
url <- 'https://maps.googleapis.com/maps/api/geocode/json?address='
url <- paste0(url,'臺北市大安區和平東路三段1巷72弄1~30號')
library(jsonlite)
res <- fromJSON(url)
res$results[1,"geometry"][1,"location"]
## lat lng
## 1 25.02531 121.5462