Demo20170415

作業三

download.file('https://github.com/ywchiu/rtibame/raw/master/Data/purchase.csv', 'purchase.csv')

library(readr)
purchase <- read_csv("~/purchase.csv", col_types = cols(Product = col_character(), 
    Time = col_datetime(format = "%Y-%m-%d %H:%M:%S")))

## Warning: Missing column names filled in: 'X1' [1]

View(purchase)

str(purchase)

## Classes 'tbl_df', 'tbl' and 'data.frame':    54772 obs. of  7 variables:
##  $ X1      : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ Time    : POSIXct, format: "2015-07-01 00:00:01" "2015-07-01 00:00:03" ...
##  $ Action  : chr  "order" "order" "order" "order" ...
##  $ User    : chr  "U312622727" "U239012343" "U10007697373" "U296328517" ...
##  $ Product : chr  "P0006944501" "P0006018073" "P0002267974" "P0016144236" ...
##  $ Quantity: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Price   : num  1069 1680 285 550 249 ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 7
##   .. ..$ X1      : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ Time    :List of 1
##   .. .. ..$ format: chr "%Y-%m-%d %H:%M:%S"
##   .. .. ..- attr(*, "class")= chr  "collector_datetime" "collector"
##   .. ..$ Action  : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ User    : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ Product : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ Quantity: list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ Price   : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"

purchase$Hour <- format(purchase$Time, '%H')
purchase$Hour <- as.numeric(purchase$Hour)

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# problem 1 and 2
purchase_by_hour <- purchase %>% 
  select(Hour, Quantity) %>% 
  group_by(Hour) %>%
  summarize(sum(Quantity))

plot(purchase_by_hour$Hour, purchase_by_hour$`sum(Quantity)`, type= 'b')

# problem 3
purchase %>%
  select(User, Quantity, Price) %>%
  group_by(User) %>%
  summarise(buy_sum = sum(Quantity * Price)) %>%
  arrange(desc(buy_sum)) %>%
  head(3)

## # A tibble: 3 × 2
##          User buy_sum
##         <chr>   <dbl>
## 1  U166708333 2942744
## 2  U142809250  747550
## 3 U1006283751  515688

purchase %>% filter(User == 'U166708333') %>% select(Product, Price, Quantity)

## # A tibble: 24 × 3
##           Product  Price Quantity
##             <chr>  <dbl>    <int>
## 1     P0007258134   2990        1
## 2     P0000025675   2980        1
## 3     P0000025675   2980        1
## 4  P0005860480006    300        1
## 5     P0003259373   1899        1
## 6  P0005860480006    300        1
## 7     P0003177856  10980        1
## 8     P0006584093 320000        3
## 9     P0006584093 320000        3
## 10    P0006584093 320000        3
## # ... with 14 more rows

# Problem 4
purchase_by_user <- purchase %>%
  select(User, Quantity, Price) %>%
  group_by(User) %>%
  summarise(buy_sum = sum(Quantity * Price)) %>%
  arrange(desc(buy_sum)) %>%
  head(10)

barplot(purchase_by_user$buy_sum, names.arg = as.factor(purchase_by_user$User), col="red")

Anscombe Quartet

data(anscombe)
View(anscombe)
plot(y1 ~ x1, data = anscombe)

plot(y2 ~ x1, data = anscombe)

plot(y3 ~ x1, data = anscombe)

plot(y4 ~ x1, data = anscombe)

## Line Plot

x <-  seq(1,6)
y <-  x

par(mfrow=c(2,4))
types = c("p","l","o","b","c","s", "h", "n")
for(i in 1:length(types)){
  title <-  paste("type: ",types[i])
  plot(x, y, type="n", main=title)
  lines(x, y, type=types[i])
}

par(mfrow=c(1,1))
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type="o", col="blue", ylim=c(0,220), xlim=c(0,8),
     xlab="Month", ylab="Rainfall", main = "Rainfall in Taipei v.s. Tainan")
#?legend

lines(tainan , type="o", pch=22, lty=2, col="red")

legend("topright", c("Taipei", 'Tainan'), pch = c(1,22), col=c("blue", "red"),inset = .03)


text(6,2, "hello")

barplot

download.file('https://raw.githubusercontent.com/ywchiu/rtibame/master/data/house-prices.csv', 'house-prices.csv')
housePrice <- read.csv('house-prices.csv')
View(housePrice)

bedroomsTable <- table(housePrice$Bedrooms)
barplot(bedroomsTable)

barplot(bedroomsTable, main="Bedroom Type Calculate", xlab="bedroom type", ylab="count")

histogram

load("C:/Users/USER/Desktop/cdc.Rdata")
hist(cdc$weight)

hist(cdc$weight, breaks = 500)

sort(table(cdc$weight))

## 
##  68  70  79  83  86 221 229 244 271 273 294 296 297 308 309 313 318 319 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 324 327 328 344 348 364 370 371 390 405 495 500  78  80  82 231 247 249 
##   1   1   1   1   1   1   1   1   1   1   1   1   2   2   2   2   2   2 
## 254 276 279 282 283 287 292 360 362 385  84  94  96 272 274 286 298 325 
##   2   2   2   2   2   2   2   2   2   2   3   3   3   3   3   3   3   3 
##  85  88 233 239 241 243 253 256 263 267 268 305 380  99 237 258 400  93 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   5   5   5   5   6 
##  97 101 246 262 278 330 340  92 111 211 223 315 201 224 226 234 257 191 
##   6   6   6   6   6   6   6   7   7   7   7   7   8   8   8   8   8   9 
## 213 232 252 209 227 242 181 217 219 236 295  90 238 310 320 109 214 104 
##   9   9   9  10  10  10  11  11  11  11  11  12  12  12  12  14  14  17 
## 102 199 216 222 248 161 206 207 285 350  95 177 228 106 121 202  98 151 
##  18  18  18  18  18  19  19  21  21  21  22  22  22  24  24  24  25  25 
## 131 166 171 255 159 169 196 103 139 141 193 194 119 203 107 116 189 208 
##  26  26  26  27  28  28  29  30  30  30  31  31  32  32  33  33  33  33 
## 113 197 218 204 265 129 179 114 149 176 184 275 290 144 186 146 167 183 
##  34  34  34  36  36  37  37  39  40  40  40  40  40  41  42  43  45  45 
## 212 133 164 153 154 187 117 157 156 188 173 174 108 270 280 124 126 136 
##  45  46  46  47  47  47  48  48  49  49  50  53  55  56  57  58  59  60 
## 163 137 143 147 182 192 123 112 245 198 300 127 134 122 152 100 172 162 
##  60  62  62  62  64  64  65  69  69  70  70  71  71  74  80  94  95  96 
## 118 158 260 178 132 142 148 105 168 128 235 138 225 250 240 215 205 110 
## 102 102 104 106 110 110 111 112 122 125 137 144 196 202 204 206 230 235 
## 115 230 220 195 210 120 125 155 185 135 145 175 130 165 190 140 200 170 
## 244 268 376 393 431 440 473 527 577 589 615 626 627 692 715 794 805 922 
## 180 150 160 
## 933 970 992

table(cdc$weight %% 10)

## 
##    0    1    2    3    4    5    6    7    8    9 
## 9421  207  919  545  525 5865  481  543 1159  335

par(mfrow=c(2,1))
hist(cdc$weight,breaks=50,xlim=c(70,380))
barplot(table(cdc
$weight),xlab="weight",ylab="Frequency")

par(mfrow=c(1,1))

Pie Chart

bedrooms <- housePrice$Bedrooms
bedroomsTable <- table(bedrooms)
bedroomsTable

## bedrooms
##  2  3  4  5 
## 30 67 29  2

labels = c("2 unit", "3 unit", "4 unit", "5 unit")
pie(bedroomsTable,labels=labels,
    col=rainbow(length(labels)),
    main="Pie Chart of Bedroom")

bedroomsTable

## bedrooms
##  2  3  4  5 
## 30 67 29  2

pie(bedroomsTable[order(bedroomsTable , decreasing = TRUE)], labels[order(bedroomsTable , decreasing = TRUE)],col=rainbow(length(labels)),
    main="Pie Chart of Bedroom", clockwise = TRUE)

?pie

## starting httpd help server ...

##  done

Scatter Plot

plot(cdc$weight, cdc$wtdesire)

data(iris)
xlab <- names(iris)[1]
ylab <- names(iris)[3]
x <- iris[,1]
y <- iris[,3]
plot(x, y, xlab=xlab, ylab=ylab,
       col=ifelse(iris[,3] > median(iris[,3]), "red", "blue"))

abline(h = median(iris[,3]), col="orange", lty = 2)

data(iris)
xlab = names(iris)[1]
ylab = names(iris)[3]
x = iris[,1]
y = iris[,3]
plot(x, y, xlab=xlab, ylab=ylab,type="n")

setosa = which(iris$Species=="setosa")
versicolor = which(iris$Species=="versicolor")
virginica = which(iris$Species=="virginica")

points(iris[setosa,1],iris[setosa,3],col="green")
points(iris[versicolor ,1],iris[versicolor,3],col="red")
points(iris[virginica ,1],iris[virginica,3],col="orange")

plot(cdc$weight, cdc$wtdesire,
     xlab="weigth",ylab="weight desire",
     main="Scatter of Weight")

fit2 <- lm(weight~wtdesire, data = cdc)
fit2

## 
## Call:
## lm(formula = weight ~ wtdesire, data = cdc)
## 
## Coefficients:
## (Intercept)     wtdesire  
##      14.330        1.002

fit <- lm(weight~wtdesire, data = cdc[cdc$wtdesire < 500,])
fit

## 
## Call:
## lm(formula = weight ~ wtdesire, data = cdc[cdc$wtdesire < 500, 
##     ])
## 
## Coefficients:
## (Intercept)     wtdesire  
##      11.149        1.022

abline(fit,col="red")

Mosaic Chart

smokers_gender <- table(cdc$gender, cdc$smoke100)
smokers_gender

##    
##        0    1
##   m 4547 5022
##   f 6012 4419

colnames(smokers_gender) = c("no","yes")
mosaicplot(smokers_gender
             ,col=rainbow(length(colnames(smokers_gender))))

## Box Plot

boxplot(cdc$height,
     ylab="Height",
     main="Box Plot of Height")

?sample
temp <- sample(1:30, 100, replace=TRUE)
temp <- c(temp, 999,999,999)
mean(temp)

## [1] 45.03883

boxplot(temp)

boxplot(temp[temp < 50])

boxplot(cdc$height ~ cdc$gender
            ,ylab="Height",xlab="Gender"
            ,main="Height vs Gender")

par(mfrow=c(1,1))
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type="o", col="blue", ylim=c(0,220),
     xlab="Month", ylab="Rainfall", main = "Rainfall in Taipei v.s. Tainan")
lines(tainan , type="o", pch=22, lty=2, col="red")

legend(1,200, c("taipei","tainan"), lwd=c(2.5,2.5),col=c("blue","red"), title = "Rainfall")

pie(bedroomsTable[order(bedroomsTable , decreasing = TRUE)], labels[order(bedroomsTable , decreasing = TRUE)],col=rainbow(length(labels)),
    main="Pie Chart of Bedroom", clockwise = TRUE)

legend("topright", labels[order(bedroomsTable , decreasing = TRUE)],fill=rainbow(length(labels[order(bedroomsTable , decreasing = TRUE)])), title = "units", cex=0.8)

par

showLayout = function(n){
for(i in 1:n){
plot(1,type="n",xaxt="n",yaxt="n",xlab="",ylab="")
text(1, 1, labels=i, cex=10)
    }
}

par(mar=c(1,1,1,1),mfrow=c(3,2))
showLayout(6)

par(mar=c(3,3,3,3),mfrow=c(3,2))
showLayout(6)

par(mar=c(3,3,3,3),mfcol=c(3,2))
showLayout(6)

Save Image

png('pie.png')

pie(bedroomsTable[order(bedroomsTable , decreasing = TRUE)], labels[order(bedroomsTable , decreasing = TRUE)],col=rainbow(length(labels)),
    main="Pie Chart of Bedroom", clockwise = TRUE)

legend("topright", labels[order(bedroomsTable , decreasing = TRUE)],fill=rainbow(length(labels[order(bedroomsTable , decreasing = TRUE)])), title = "units", cex=0.8)


dev.off()

## png 
##   2

Plotly

library(plotly)

## Loading required package: ggplot2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

ds <- data.frame(labels = c("A", "B", "C"),
        values = c(10, 20, 30))
ds

##   labels values
## 1      A     10
## 2      B     20
## 3      C     30

plot_ly(ds, labels = ds$labels, values = ds$values, type = "pie") %>% layout(title = "Pie Chart")

plot_ly(ds, labels = ds$labels, values = ds$values, type = "pie", hole=0.6) %>% layout(title = "Donut Chart")

Area Chart

library(plotly)

month<- c(1,2,3,4,5)
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
y <- list(title="Rainfall")

# line chart
plot_ly(x = month, y = taipei, name="taipei",mode = 'lines')

## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter

# scatter chart
plot_ly(x = month, y = taipei, name="taipei")

## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter

## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

#area chart
library(plotly)
month<- c(1,2,3,4,5)
taipei<- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)

y <-list(title="Rainfall")
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei", type='scatter', mode= 'markers')  %>% add_trace(x = month, y = tainan, fill = "tozeroy" ,name="tainan") %>% layout(yaxis= y)

library(plotly)
month<- c(1,2,3,4,5)
taipei<- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)

total <- taipei + tainan
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei", type='scatter', mode= 'markers')  %>% add_trace(x = month, y = total, fill = "tonexty" ,name="tainan") %>% layout(yaxis= y)

Bubble Chart

View(diamonds)

library(plotly)
d <- diamonds[sample(nrow(diamonds),1000), ]
plot_ly(d, x = d$carat, y = d$price, text = paste("Clarity: ", d$clarity),
    mode = "markers", color = d$clarity, size = d$carat)

## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter

Multiple plots

View(economics)

plot_ly(economics, x = economics$date, y = economics$uempmed, mode = "scatter")

## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter

p <- subplot(
    plot_ly(economics, x =  economics$date, y =  economics$uempmed, mode = "scatter"),
    plot_ly(economics, x =  economics$date, y =  economics$unemploy, mode = "scatter"),
    margin = 0.05
) %>% layout(showlegend = FALSE)

## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter

p <- subplot(
    plot_ly(economics, x =  economics$date, y =  economics$uempmed, mode = "scatter"),
    plot_ly(economics, x =  economics$date, y =  economics$unemploy, mode = "scatter"),
    margin = 0.05,
    nrows=2
) %>% layout(showlegend = FALSE)

## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter

用Tableau 做視覺化

Get Geoencoding

url <- 'https://maps.googleapis.com/maps/api/geocode/json?address='

url <- paste0(url,'臺北市大安區和平東路三段1巷72弄1~30號')

library(jsonlite)
res <- fromJSON(url)
res$results[1,"geometry"][1,"location"]

##        lat      lng
## 1 25.02531 121.5462

Demo20170415

David Chiu

2017年4月15日

作業三

Anscombe Quartet

barplot

histogram

Pie Chart

Scatter Plot

Mosaic Chart

par

Save Image

Plotly

Area Chart

Bubble Chart

Multiple plots

用Tableau 做視覺化

Get Geoencoding