作業三

download.file('https://github.com/ywchiu/rtibame/raw/master/Data/purchase.csv', 'purchase.csv')

library(readr)
purchase <- read_csv("~/purchase.csv", col_types = cols(Product = col_character(), 
    Time = col_datetime(format = "%Y-%m-%d %H:%M:%S")))
## Warning: Missing column names filled in: 'X1' [1]
View(purchase)

str(purchase)
## Classes 'tbl_df', 'tbl' and 'data.frame':    54772 obs. of  7 variables:
##  $ X1      : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ Time    : POSIXct, format: "2015-07-01 00:00:01" "2015-07-01 00:00:03" ...
##  $ Action  : chr  "order" "order" "order" "order" ...
##  $ User    : chr  "U312622727" "U239012343" "U10007697373" "U296328517" ...
##  $ Product : chr  "P0006944501" "P0006018073" "P0002267974" "P0016144236" ...
##  $ Quantity: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Price   : num  1069 1680 285 550 249 ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 7
##   .. ..$ X1      : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ Time    :List of 1
##   .. .. ..$ format: chr "%Y-%m-%d %H:%M:%S"
##   .. .. ..- attr(*, "class")= chr  "collector_datetime" "collector"
##   .. ..$ Action  : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ User    : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ Product : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ Quantity: list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ Price   : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"
purchase$Hour <- format(purchase$Time, '%H')
purchase$Hour <- as.numeric(purchase$Hour)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# problem 1 and 2
purchase_by_hour <- purchase %>% 
  select(Hour, Quantity) %>% 
  group_by(Hour) %>%
  summarize(sum(Quantity))

plot(purchase_by_hour$Hour, purchase_by_hour$`sum(Quantity)`, type= 'b')

# problem 3
purchase %>%
  select(User, Quantity, Price) %>%
  group_by(User) %>%
  summarise(buy_sum = sum(Quantity * Price)) %>%
  arrange(desc(buy_sum)) %>%
  head(3)
## # A tibble: 3 × 2
##          User buy_sum
##         <chr>   <dbl>
## 1  U166708333 2942744
## 2  U142809250  747550
## 3 U1006283751  515688
purchase %>% filter(User == 'U166708333') %>% select(Product, Price, Quantity)
## # A tibble: 24 × 3
##           Product  Price Quantity
##             <chr>  <dbl>    <int>
## 1     P0007258134   2990        1
## 2     P0000025675   2980        1
## 3     P0000025675   2980        1
## 4  P0005860480006    300        1
## 5     P0003259373   1899        1
## 6  P0005860480006    300        1
## 7     P0003177856  10980        1
## 8     P0006584093 320000        3
## 9     P0006584093 320000        3
## 10    P0006584093 320000        3
## # ... with 14 more rows
# Problem 4
purchase_by_user <- purchase %>%
  select(User, Quantity, Price) %>%
  group_by(User) %>%
  summarise(buy_sum = sum(Quantity * Price)) %>%
  arrange(desc(buy_sum)) %>%
  head(10)

barplot(purchase_by_user$buy_sum, names.arg = as.factor(purchase_by_user$User), col="red")

Anscombe Quartet

data(anscombe)
View(anscombe)
plot(y1 ~ x1, data = anscombe)

plot(y2 ~ x1, data = anscombe)

plot(y3 ~ x1, data = anscombe)

plot(y4 ~ x1, data = anscombe)

## Line Plot

x <-  seq(1,6)
y <-  x

par(mfrow=c(2,4))
types = c("p","l","o","b","c","s", "h", "n")
for(i in 1:length(types)){
  title <-  paste("type: ",types[i])
  plot(x, y, type="n", main=title)
  lines(x, y, type=types[i])
}

par(mfrow=c(1,1))
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type="o", col="blue", ylim=c(0,220), xlim=c(0,8),
     xlab="Month", ylab="Rainfall", main = "Rainfall in Taipei v.s. Tainan")
#?legend

lines(tainan , type="o", pch=22, lty=2, col="red")

legend("topright", c("Taipei", 'Tainan'), pch = c(1,22), col=c("blue", "red"),inset = .03)


text(6,2, "hello")

barplot

download.file('https://raw.githubusercontent.com/ywchiu/rtibame/master/data/house-prices.csv', 'house-prices.csv')
housePrice <- read.csv('house-prices.csv')
View(housePrice)

bedroomsTable <- table(housePrice$Bedrooms)
barplot(bedroomsTable)

barplot(bedroomsTable, main="Bedroom Type Calculate", xlab="bedroom type", ylab="count")

histogram

load("C:/Users/USER/Desktop/cdc.Rdata")
hist(cdc$weight)

hist(cdc$weight, breaks = 500)

sort(table(cdc$weight))
## 
##  68  70  79  83  86 221 229 244 271 273 294 296 297 308 309 313 318 319 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 324 327 328 344 348 364 370 371 390 405 495 500  78  80  82 231 247 249 
##   1   1   1   1   1   1   1   1   1   1   1   1   2   2   2   2   2   2 
## 254 276 279 282 283 287 292 360 362 385  84  94  96 272 274 286 298 325 
##   2   2   2   2   2   2   2   2   2   2   3   3   3   3   3   3   3   3 
##  85  88 233 239 241 243 253 256 263 267 268 305 380  99 237 258 400  93 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   5   5   5   5   6 
##  97 101 246 262 278 330 340  92 111 211 223 315 201 224 226 234 257 191 
##   6   6   6   6   6   6   6   7   7   7   7   7   8   8   8   8   8   9 
## 213 232 252 209 227 242 181 217 219 236 295  90 238 310 320 109 214 104 
##   9   9   9  10  10  10  11  11  11  11  11  12  12  12  12  14  14  17 
## 102 199 216 222 248 161 206 207 285 350  95 177 228 106 121 202  98 151 
##  18  18  18  18  18  19  19  21  21  21  22  22  22  24  24  24  25  25 
## 131 166 171 255 159 169 196 103 139 141 193 194 119 203 107 116 189 208 
##  26  26  26  27  28  28  29  30  30  30  31  31  32  32  33  33  33  33 
## 113 197 218 204 265 129 179 114 149 176 184 275 290 144 186 146 167 183 
##  34  34  34  36  36  37  37  39  40  40  40  40  40  41  42  43  45  45 
## 212 133 164 153 154 187 117 157 156 188 173 174 108 270 280 124 126 136 
##  45  46  46  47  47  47  48  48  49  49  50  53  55  56  57  58  59  60 
## 163 137 143 147 182 192 123 112 245 198 300 127 134 122 152 100 172 162 
##  60  62  62  62  64  64  65  69  69  70  70  71  71  74  80  94  95  96 
## 118 158 260 178 132 142 148 105 168 128 235 138 225 250 240 215 205 110 
## 102 102 104 106 110 110 111 112 122 125 137 144 196 202 204 206 230 235 
## 115 230 220 195 210 120 125 155 185 135 145 175 130 165 190 140 200 170 
## 244 268 376 393 431 440 473 527 577 589 615 626 627 692 715 794 805 922 
## 180 150 160 
## 933 970 992
table(cdc$weight %% 10)
## 
##    0    1    2    3    4    5    6    7    8    9 
## 9421  207  919  545  525 5865  481  543 1159  335
par(mfrow=c(2,1))
hist(cdc$weight,breaks=50,xlim=c(70,380))
barplot(table(cdc
$weight),xlab="weight",ylab="Frequency")

par(mfrow=c(1,1))

Pie Chart

bedrooms <- housePrice$Bedrooms
bedroomsTable <- table(bedrooms)
bedroomsTable
## bedrooms
##  2  3  4  5 
## 30 67 29  2
labels = c("2 unit", "3 unit", "4 unit", "5 unit")
pie(bedroomsTable,labels=labels,
    col=rainbow(length(labels)),
    main="Pie Chart of Bedroom")

bedroomsTable
## bedrooms
##  2  3  4  5 
## 30 67 29  2
pie(bedroomsTable[order(bedroomsTable , decreasing = TRUE)], labels[order(bedroomsTable , decreasing = TRUE)],col=rainbow(length(labels)),
    main="Pie Chart of Bedroom", clockwise = TRUE)

?pie
## starting httpd help server ...
##  done

Scatter Plot

plot(cdc$weight, cdc$wtdesire)

data(iris)
xlab <- names(iris)[1]
ylab <- names(iris)[3]
x <- iris[,1]
y <- iris[,3]
plot(x, y, xlab=xlab, ylab=ylab,
       col=ifelse(iris[,3] > median(iris[,3]), "red", "blue"))

abline(h = median(iris[,3]), col="orange", lty = 2)

data(iris)
xlab = names(iris)[1]
ylab = names(iris)[3]
x = iris[,1]
y = iris[,3]
plot(x, y, xlab=xlab, ylab=ylab,type="n")

setosa = which(iris$Species=="setosa")
versicolor = which(iris$Species=="versicolor")
virginica = which(iris$Species=="virginica")

points(iris[setosa,1],iris[setosa,3],col="green")
points(iris[versicolor ,1],iris[versicolor,3],col="red")
points(iris[virginica ,1],iris[virginica,3],col="orange")

plot(cdc$weight, cdc$wtdesire,
     xlab="weigth",ylab="weight desire",
     main="Scatter of Weight")

fit2 <- lm(weight~wtdesire, data = cdc)
fit2
## 
## Call:
## lm(formula = weight ~ wtdesire, data = cdc)
## 
## Coefficients:
## (Intercept)     wtdesire  
##      14.330        1.002
fit <- lm(weight~wtdesire, data = cdc[cdc$wtdesire < 500,])
fit
## 
## Call:
## lm(formula = weight ~ wtdesire, data = cdc[cdc$wtdesire < 500, 
##     ])
## 
## Coefficients:
## (Intercept)     wtdesire  
##      11.149        1.022
abline(fit,col="red")

Mosaic Chart

smokers_gender <- table(cdc$gender, cdc$smoke100)
smokers_gender
##    
##        0    1
##   m 4547 5022
##   f 6012 4419
colnames(smokers_gender) = c("no","yes")
mosaicplot(smokers_gender
             ,col=rainbow(length(colnames(smokers_gender))))

## Box Plot

boxplot(cdc$height,
     ylab="Height",
     main="Box Plot of Height")

?sample
temp <- sample(1:30, 100, replace=TRUE)
temp <- c(temp, 999,999,999)
mean(temp)
## [1] 45.03883
boxplot(temp)

boxplot(temp[temp < 50])

boxplot(cdc$height ~ cdc$gender
            ,ylab="Height",xlab="Gender"
            ,main="Height vs Gender")

par(mfrow=c(1,1))
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type="o", col="blue", ylim=c(0,220),
     xlab="Month", ylab="Rainfall", main = "Rainfall in Taipei v.s. Tainan")
lines(tainan , type="o", pch=22, lty=2, col="red")

legend(1,200, c("taipei","tainan"), lwd=c(2.5,2.5),col=c("blue","red"), title = "Rainfall")

pie(bedroomsTable[order(bedroomsTable , decreasing = TRUE)], labels[order(bedroomsTable , decreasing = TRUE)],col=rainbow(length(labels)),
    main="Pie Chart of Bedroom", clockwise = TRUE)

legend("topright", labels[order(bedroomsTable , decreasing = TRUE)],fill=rainbow(length(labels[order(bedroomsTable , decreasing = TRUE)])), title = "units", cex=0.8)

par

showLayout = function(n){
for(i in 1:n){
plot(1,type="n",xaxt="n",yaxt="n",xlab="",ylab="")
text(1, 1, labels=i, cex=10)
    }
}

par(mar=c(1,1,1,1),mfrow=c(3,2))
showLayout(6)

par(mar=c(3,3,3,3),mfrow=c(3,2))
showLayout(6)

par(mar=c(3,3,3,3),mfcol=c(3,2))
showLayout(6)

Save Image

png('pie.png')

pie(bedroomsTable[order(bedroomsTable , decreasing = TRUE)], labels[order(bedroomsTable , decreasing = TRUE)],col=rainbow(length(labels)),
    main="Pie Chart of Bedroom", clockwise = TRUE)

legend("topright", labels[order(bedroomsTable , decreasing = TRUE)],fill=rainbow(length(labels[order(bedroomsTable , decreasing = TRUE)])), title = "units", cex=0.8)


dev.off()
## png 
##   2

Plotly

library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ds <- data.frame(labels = c("A", "B", "C"),
        values = c(10, 20, 30))
ds
##   labels values
## 1      A     10
## 2      B     20
## 3      C     30
plot_ly(ds, labels = ds$labels, values = ds$values, type = "pie") %>% layout(title = "Pie Chart")
plot_ly(ds, labels = ds$labels, values = ds$values, type = "pie", hole=0.6) %>% layout(title = "Donut Chart")

Area Chart

library(plotly)

month<- c(1,2,3,4,5)
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
y <- list(title="Rainfall")

# line chart
plot_ly(x = month, y = taipei, name="taipei",mode = 'lines')
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
# scatter chart
plot_ly(x = month, y = taipei, name="taipei")
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
#area chart
library(plotly)
month<- c(1,2,3,4,5)
taipei<- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)

y <-list(title="Rainfall")
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei", type='scatter', mode= 'markers')  %>% add_trace(x = month, y = tainan, fill = "tozeroy" ,name="tainan") %>% layout(yaxis= y)
library(plotly)
month<- c(1,2,3,4,5)
taipei<- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)

total <- taipei + tainan
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei", type='scatter', mode= 'markers')  %>% add_trace(x = month, y = total, fill = "tonexty" ,name="tainan") %>% layout(yaxis= y)

Bubble Chart

View(diamonds)

library(plotly)
d <- diamonds[sample(nrow(diamonds),1000), ]
plot_ly(d, x = d$carat, y = d$price, text = paste("Clarity: ", d$clarity),
    mode = "markers", color = d$clarity, size = d$carat)
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter

Multiple plots

View(economics)

plot_ly(economics, x = economics$date, y = economics$uempmed, mode = "scatter")
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
p <- subplot(
    plot_ly(economics, x =  economics$date, y =  economics$uempmed, mode = "scatter"),
    plot_ly(economics, x =  economics$date, y =  economics$unemploy, mode = "scatter"),
    margin = 0.05
) %>% layout(showlegend = FALSE)
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
p
p <- subplot(
    plot_ly(economics, x =  economics$date, y =  economics$uempmed, mode = "scatter"),
    plot_ly(economics, x =  economics$date, y =  economics$unemploy, mode = "scatter"),
    margin = 0.05,
    nrows=2
) %>% layout(showlegend = FALSE)
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
p

用Tableau 做視覺化

Get Geoencoding

url <- 'https://maps.googleapis.com/maps/api/geocode/json?address='

url <- paste0(url,'臺北市大安區和平東路三段1巷72弄1~30號')

library(jsonlite)
res <- fromJSON(url)
res$results[1,"geometry"][1,"location"]
##        lat      lng
## 1 25.02531 121.5462