download.file('https://raw.githubusercontent.com/ywchiu/rtibame/master/Data/purchase.csv', 'purchase.csv')
purchase <- read.csv('purchase.csv', header = TRUE)
head(purchase)
## X Time Action User Product Quantity Price
## 1 0 2015-07-01 00:00:01 order U312622727 P0006944501 1 1069
## 2 1 2015-07-01 00:00:03 order U239012343 P0006018073 1 1680
## 3 2 2015-07-01 00:00:19 order U10007697373 P0002267974 1 285
## 4 3 2015-07-01 00:01:10 order U296328517 P0016144236 1 550
## 5 4 2015-07-01 00:01:36 order U300884570 P0014516980122 1 249
## 6 5 2015-07-01 00:01:48 order U451050374 P0004134266 1 1780
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
purchase$Time <- as.POSIXct(purchase$Time)
# Answer1
purchase %>%
select(Time, Quantity) %>%
group_by(dt = format(purchase$Time, '%H')) %>%
summarise(total_count = sum(Quantity)) %>%
arrange(desc(total_count)) %>% head(1)
## Warning: package 'bindrcpp' was built under R version 3.4.2
## # A tibble: 1 x 2
## dt total_count
## <chr> <int>
## 1 23 6931
# Answer2
buy_stat <- purchase %>%
select(Time, Quantity) %>%
group_by(dt = format(purchase$Time, '%H')) %>%
summarise(total_count = sum(Quantity))
buy_stat$dt <- as.integer(buy_stat$dt)
plot(total_count ~ dt, data = buy_stat, type = 'b')
head(purchase)
## X Time Action User Product Quantity Price
## 1 0 2015-07-01 00:00:01 order U312622727 P0006944501 1 1069
## 2 1 2015-07-01 00:00:03 order U239012343 P0006018073 1 1680
## 3 2 2015-07-01 00:00:19 order U10007697373 P0002267974 1 285
## 4 3 2015-07-01 00:01:10 order U296328517 P0016144236 1 550
## 5 4 2015-07-01 00:01:36 order U300884570 P0014516980122 1 249
## 6 5 2015-07-01 00:01:48 order U451050374 P0004134266 1 1780
# Answer3
purchase_stat <- purchase %>%
select(User, Quantity, Price) %>%
mutate(buy_amount = Quantity * Price) %>%
group_by(User) %>%
summarise(total_buy_amount = sum(buy_amount)) %>%
arrange(desc(total_buy_amount)) %>%
head(3)
purchase_stat
## # A tibble: 3 x 2
## User total_buy_amount
## <fctr> <dbl>
## 1 U166708333 2942744
## 2 U142809250 747550
## 3 U1006283751 515688
# Answer4
purchase_stat <- purchase %>%
select(User, Quantity, Price) %>%
mutate(buy_amount = Quantity * Price) %>%
group_by(User) %>%
summarise(total_buy_amount = sum(buy_amount)) %>%
arrange(desc(total_buy_amount)) %>%
head(10)
purchase_stat
## # A tibble: 10 x 2
## User total_buy_amount
## <fctr> <dbl>
## 1 U166708333 2942744
## 2 U142809250 747550
## 3 U1006283751 515688
## 4 U10114715330 456782
## 5 U10062834851 439657
## 6 U32775842 422396
## 7 U330237576 335390
## 8 U300915168 330322
## 9 U32775853 319535
## 10 U331652356 257210
barplot(purchase_stat$total_buy_amount, names.arg = purchase_stat$User, col="blue")
data("anscombe")
#anscombe
plot(y1 ~ x1, data = anscombe)
fit <- lm(y1 ~ x1, data = anscombe)
plot(y1 ~ x1, data = anscombe)
abline(fit, col = "red")
plot(y2 ~ x2, data = anscombe)
plot(y3 ~ x3, data = anscombe)
plot(y4 ~ x4, data = anscombe)
x <- seq(1,6)
y <- x
plot(y ~ x, type = 'l')
types <- c('p', 'l', 'o', 'b', 'c', 's', 'h' ,'n')
par(mfrow = c(2,4))
for (i in types){
#print(i)
plot(y ~ x, type = i, main = i)
}
plot(y ~ x, type = 'n')
lines(y ~ x, col = "blue")
types <- c('p', 'l', 'o', 'b', 'c', 's', 'h' ,'n')
par(mfrow = c(2,4))
for (i in types){
plot(y ~ x, type = 'n', main = i)
lines(y ~ x, type = i)
}
par(mfrow =c(1,1))
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type = 'o', main = 'Rainfall by Region', ylab = 'Rainfall', xlab = 'Month', ylim = c(0,220), col= 'blue', lwd=3)
lines(tainan, type = 'o', col ='red', lwd = 3, pch = 22, lty=2)
#barplot(c(80,82,86,88), ylim = c(80,100))
plot(c(80,82,86,88), ylim = c(80,100), type = 'b')
plot(c(80,82,86,88), ylim = c(0,100), type = 'b')
plot(taipei, type = 'o', main = 'Rainfall by Region', ylab = 'Rainfall', xlab = 'Month', ylim = c(0,220), xlim = c(1,6),col= 'blue', lwd=3)
lines(tainan, type = 'o', col ='red', lwd = 3, pch = 22, lty=2)
text(x = 5.5, y= 210, 'Taipei', col='blue', cex = 1.1)
text(x = 5.5, y= 180, 'Tainan', col='red', cex = 1.1)
?text
## starting httpd help server ... done
download.file('https://raw.githubusercontent.com/ywchiu/rtibame/master/data/house-prices.csv', 'house-price.csv')
housePrice <- read.csv('house-price.csv', header = TRUE)
bedroomsTable <- table(housePrice$Bedrooms)
barplot(bedroomsTable, main = 'Bedrooms Stat', xlab = 'Bedroom Number', ylab= 'Bedroom Count', col = 'blue')
bedroomsTable
##
## 2 3 4 5
## 30 67 29 2
roomType <- as.factor(names(bedroomsTable))
barplot(bedroomsTable, main = 'Bedrooms Stat', xlab = 'Bedroom Number', ylab= 'Bedroom Count', col = roomType)
load('cdc.RData')
head(cdc)
## genhlth exerany hlthplan smoke100 height weight wtdesire age gender
## 1 good 0 1 0 70 175 175 77 m
## 2 good 0 1 1 64 125 115 33 f
## 3 good 1 1 1 60 105 105 49 f
## 4 good 1 1 0 66 132 124 42 f
## 5 very good 0 1 0 61 150 130 55 f
## 6 very good 1 1 0 64 114 114 55 f
hist(cdc$weight)
hist(cdc$weight, breaks = 2)
hist(cdc$weight, breaks = 50)
hist(cdc$weight, breaks = 200)
weight_tb <- table(cdc$weight)
head(sort(weight_tb, decreasing = TRUE))
##
## 160 150 180 170 200 140
## 992 970 933 922 805 794
weight_tb2 <- table(cdc$weight %% 10)
barplot(weight_tb2)
par(mfrow=c(2,1))
hist(cdc$weight,breaks=50,xlim=c(70,380))
barplot(table(cdc$weight),xlab="weight",ylab="Frequency")
par(mfrow=c(1,1))
housePrice <- read.csv('house-price.csv', header = TRUE)
bedrooms <- housePrice$Bedrooms
bedroomsTable=table(bedrooms)
bedroomsTable
## bedrooms
## 2 3 4 5
## 30 67 29 2
barplot(bedroomsTable)
labels <- c('2 Units', '3 Units', '4 Units', '5 Units')
pie(bedroomsTable, labels = labels, col = rainbow(length(labels)), main = 'Pie Chart By Bedrooms')
bedroomsTable <- sort(bedroomsTable, decreasing = TRUE)
pie(bedroomsTable, init.angle = 90, clockwise = TRUE)
#?pie
plot(cdc$weight, cdc$wtdesire)
data(iris)
plot(Petal.Length ~ Petal.Width, data = iris, col = iris$Species)
versicolor <- iris[iris$Species == 'versicolor', ]
virginica <- iris[iris$Species == 'virginica', ]
plot(Petal.Length ~ Petal.Width, data = versicolor, col = 'red', xlim = c(0,3), ylim = c(0,7))
points(Petal.Length ~ Petal.Width, data = virginica, col = 'blue')
points(mean(versicolor$Petal.Width), mean(versicolor$Petal.Length), col = 'orange', pch= 16)
points(mean(virginica$Petal.Width), mean(virginica$Petal.Length), col = 'orange', pch= 16)
abline(v = 1.75, col='green')
fit <- lm(wtdesire ~ weight, data = cdc)
filter.cond <- cdc$wtdesire - cdc$weight < 200
cdc_new <- cdc[filter.cond, ]
fit2 <- lm(wtdesire ~ weight, data = cdc_new)
fit2
##
## Call:
## lm(formula = wtdesire ~ weight, data = cdc_new)
##
## Coefficients:
## (Intercept) weight
## 46.8825 0.6375
plot(cdc$weight, cdc$wtdesire)
# wtdesire = weight * 0.639 + 46.664
abline(fit, col = 'red')
# wtdesire = weight * 0.6375 + 46.8825
abline(fit2, col = 'blue')
## Mosaic Plot
smoker_gender <- table(cdc$smoke100, cdc$gender)
colnames(smoker_gender)
## [1] "m" "f"
as.factor(colnames(smoker_gender))
## [1] m f
## Levels: f m
mosaicplot(smoker_gender, col = as.factor(colnames(smoker_gender)))
mosaicplot(smoker_gender, col = c('red', 'green'))
hist(cdc$weight)
boxplot(cdc$weight, ylab = 'Wight')
boxplot(cdc$weight~ cdc$gender, ylab = 'Wight')
boxplot(cdc$height~ cdc$gender, ylab = 'height')
temp <- c(20,40,30,25,28,30,999)
# mean temp
sum(temp) / length(temp)
## [1] 167.4286
mean(temp)
## [1] 167.4286
#median temp
sort(temp, decreasing = FALSE)
## [1] 20 25 28 30 30 40 999
median(temp)
## [1] 30
quantile(temp, 0.25)
## 25%
## 26.5
quantile(temp, 0.75)
## 75%
## 35
# IQR
quantile(temp, 0.75) - quantile(temp, 0.25)
## 75%
## 8.5
IQR(temp)
## [1] 8.5
max(min(temp), median(temp) - 1.5 * IQR(temp) )
## [1] 20
min(max(temp), median(temp) + 1.5 * IQR(temp) )
## [1] 42.75
boxplot(temp)
boxplot(temp[temp<100])
mean(temp[temp<100])
## [1] 28.83333
# House Price
#download.file('https://raw.githubusercontent.com/ywchiu/cathayr/master/data/lvr_prices.csv', 'lvr_prices.csv')
library(readr)
lvr_prices <- read_csv("C:/Users/Administrator/Desktop/lvr_prices.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_character(),
## X1 = col_integer(),
## land_sqmeter = col_double(),
## trading_ymd = col_date(format = ""),
## finish_ymd = col_date(format = ""),
## building_sqmeter = col_double(),
## room = col_integer(),
## living_room = col_integer(),
## bath = col_integer(),
## total_price = col_integer(),
## price_per_sqmeter = col_double(),
## parking_sqmeter = col_double(),
## parking_price = col_integer()
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 32 parsing failures.
## row # A tibble: 5 x 5 col row col expected actual expected <int> <chr> <chr> <chr> actual 1 1282 total_price an integer 6700000000 file 2 2243 total_price an integer 3882685600 row 3 2244 total_price an integer 3373314400 col 4 4629 total_price an integer 3050000000 expected 5 5890 total_price an integer 3133800000 actual # ... with 1 more variables: file <chr>
## ... ................. ... ......................................... ........ ......................................... ...... ......................................... .... ......................................... ... ......................................... ... ......................................... ........ ......................................... ...... .......................................
## See problems(...) for more details.
head(lvr_prices)
## # A tibble: 6 x 29
## X1 area trading_target address
## <int> <chr> <chr> <chr>
## 1 0 大安區 房地(土地+建物) 臺北市大安區和平東路三段1巷72弄1~30號
## 2 1 中正區 房地(土地+建物) 臺北市中正區忠孝東路二段121~150號
## 3 2 大同區 土地 橋北段二小段601~630地號
## 4 3 大同區 房地(土地+建物) 臺北市大同區重慶北路一段61~90號
## 5 4 內湖區 房地(土地+建物) 臺北市內湖區民權東路六段90巷6弄1~30號
## 6 5 信義區 土地 福德段一小段661~690地號
## # ... with 25 more variables: land_sqmeter <dbl>, city_land_type <chr>,
## # non_city_land_type <chr>, non_city_code <chr>, trading_ymd <date>,
## # trading_num <chr>, floor <chr>, total_floor <chr>,
## # building_type <chr>, main_purpose <chr>, built_with <chr>,
## # finish_ymd <date>, building_sqmeter <dbl>, room <int>,
## # living_room <int>, bath <int>, compartment <chr>, management <chr>,
## # total_price <int>, price_per_sqmeter <dbl>, parking_type <chr>,
## # parking_sqmeter <dbl>, parking_price <int>, comments <chr>,
## # numbers <chr>
library(dplyr)
area_price_stat <- lvr_prices %>%
select(area, price_per_sqmeter, trading_target, city_land_type) %>%
filter(city_land_type == '住' & trading_target == '房地(土地+建物)') %>%
group_by(area ) %>%
summarise(area_price = mean(price_per_sqmeter, na.rm=TRUE)) %>%
arrange(desc(area_price))
barplot(area_price_stat$area_price/0.3025, names.arg = area_price_stat$area, cex.axis = 0.6, cex.names = 0.6)
living_area <- lvr_prices %>%
select(area, price_per_sqmeter, trading_target, city_land_type) %>%
filter(city_land_type == '住' & trading_target == '房地(土地+建物)' & price_per_sqmeter < 5000000)
living_area %>%
filter(area == '文山區' & price_per_sqmeter < 10000000) %>%
arrange(desc(price_per_sqmeter))
## # A tibble: 3,812 x 4
## area price_per_sqmeter trading_target city_land_type
## <chr> <dbl> <chr> <chr>
## 1 文山區 1459922 房地(土地+建物) 住
## 2 文山區 1001558 房地(土地+建物) 住
## 3 文山區 602410 房地(土地+建物) 住
## 4 文山區 598097 房地(土地+建物) 住
## 5 文山區 522987 房地(土地+建物) 住
## 6 文山區 518110 房地(土地+建物) 住
## 7 文山區 483793 房地(土地+建物) 住
## 8 文山區 449000 房地(土地+建物) 住
## 9 文山區 418060 房地(土地+建物) 住
## 10 文山區 416181 房地(土地+建物) 住
## # ... with 3,802 more rows
boxplot(price_per_sqmeter ~ area, data = living_area, cex.axis = 0.5, cex.names = 0.5)
boxplot(log(price_per_sqmeter) ~ area, data = living_area, cex.axis = 0.5, cex.names = 0.5)
## Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z
## $group == : Outlier (-Inf) in boxplot 1 is not drawn
## Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z
## $group == : Outlier (-Inf) in boxplot 3 is not drawn
## Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z
## $group == : Outlier (-Inf) in boxplot 5 is not drawn
## Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z
## $group == : Outlier (-Inf) in boxplot 6 is not drawn
## Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z
## $group == : Outlier (-Inf) in boxplot 9 is not drawn
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type = 'o', main = 'Rainfall by Region', ylab = 'Rainfall', xlab = 'Month', ylim = c(0,220), col= 'blue', lwd=3)
lines(tainan, type = 'o', col ='red', lwd = 3, pch = 22, lty=2)
legend(1,200, c("taipei","tainan"), lwd=c(2.5,2.5),col=c("blue","red"), title="Rainfall")
legend(4,80, c("taipei","tainan"), lwd=c(2.5,2.5),col=c("blue","red"), title="Rainfall")
bedrooms <- housePrice$Bedrooms
bedroomsTable=table(bedrooms)
bedroomsTable <- sort(bedroomsTable, decreasing = TRUE)
pie(bedroomsTable, init.angle = 90, clockwise = TRUE, col = rainbow(length(names(bedroomsTable))))
legend('bottomright', names(bedroomsTable), fill=rainbow(length(names(bedroomsTable))))
showLayout <- function(n){
for(i in 1:n){
plot(1,type="n",xaxt="n",yaxt="n",xlab="",ylab="")
text(1, 1, labels=i, cex=10)
}
}
par(mfrow = c(3,2))
showLayout(6)
par(mfrow = c(2,3))
showLayout(6)
par(mfcol = c(3,2))
showLayout(6)
par(mfcol = c(2,3))
showLayout(6)
par(mar=c(1,1,1,1),mfrow=c(3,2))
showLayout(6)
par(mar=c(3,3,3,3),mfrow=c(3,2))
showLayout(6)
par(mar=c(3,3,3,3),mfcol=c(3,2))
showLayout(6)
## Export Plot
png('rainfall.png')
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type = 'o', main = 'Rainfall by Region', ylab = 'Rainfall', xlab = 'Month', ylim = c(0,220), col= 'blue', lwd=3)
lines(tainan, type = 'o', col ='red', lwd = 3, pch = 22, lty=2)
legend(1,200, c("taipei","tainan"), lwd=c(2.5,2.5),col=c("blue","red"), title="Rainfall")
dev.off()
## png
## 2
#install.packages('quantmod')
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
getSymbols('AAPL')
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
##
## WARNING: There have been significant changes to Yahoo Finance data.
## Please see the Warning section of '?getSymbols.yahoo' for details.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.yahoo.warning"=FALSE).
## [1] "AAPL"
chartSeries(AAPL)
getMetals('gold')
## [1] "XAUUSD"
chartSeries(XAUUSD)
library(forecast)
## Warning: package 'forecast' was built under R version 3.4.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.2
XAUUSD %>%
ets() %>%
forecast(h=30) %>%
autoplot()
library(plotly)
## Warning: package 'plotly' was built under R version 3.4.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
x <- seq(1,100)
random_y <- rnorm(100, mean = 0)
data <- data.frame(x, random_y)
p <- plot_ly(data, x = ~x, y = ~random_y, type = 'scatter', mode = 'lines')
p
## Donut Chart
ds <-data.frame(labels=c("A", "B", "C"),values =c(10, 20, 30))
plot_ly(ds, labels=ds$labels, values =ds$values, type ="pie") %>% layout(title="Pie Chart Example")
plot_ly(ds, labels=ds$labels, values =ds$values, type ="pie", hole=0.6) %>% layout(title="Donut Chart Example")
library(plotly)
month <- c(1,2,3,4,5)
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
data <- data.frame(month, taipei, tainan)
p <- plot_ly(data, x = ~month, y = ~taipei, type = 'scatter', mode = 'lines') %>%
add_trace(data, x = ~month, y = ~tainan, type = 'scatter', mode = 'lines')
p <- plot_ly(data, x = ~month, y = ~taipei, type = 'scatter', mode = 'lines', fill= 'tozeroy')
p <- plot_ly(data, x = ~month, y = ~taipei, type = 'scatter', mode = 'markers', fill= 'tozeroy', name= 'Taipei') %>% add_trace(data, x = ~month, y = ~tainan, type = 'scatter', mode = 'markers', name = 'Tainan') %>% layout(yaxis=list(title='rainfall'))
p
total <-taipei+tainan
data <- data.frame(month, taipei, tainan, total)
y <- list(title="Rainfall")
p <- plot_ly(data, x = ~month, y = ~total, type = 'scatter', mode = 'markers', fill= 'tozeroy', name= 'Taipei') %>% add_trace(data, x = ~month, y = ~tainan, type = 'scatter', mode = 'markers', name = 'Tainan', fill='tonexty') %>% layout(yaxis=list(title='rainfall'))
p
## Bubble Chart
data("diamonds")
#diamonds
d <-diamonds[sample(nrow(diamonds), 1000), ]
plot_ly(d, x =d$carat, y=d$price, color =d$clarity, type='scatter', mode='markers', size =d$carat, text=paste("Clarity", d$clarity))
data('economics')
p <-subplot(
plot_ly(economics, x =economics$date, y =economics$unemploy, type='scatter', mode='lines'),
plot_ly(economics, x =economics$date, y =economics$uempmed, type='scatter', mode='lines'
),margin=0.05)
p %>%layout(showlegend=FALSE)
p <-subplot(
plot_ly(economics, x =economics$date, y =economics$unemploy, type='scatter', mode='lines'),
plot_ly(economics, x =economics$date, y =economics$uempmed, type='scatter', mode='lines'
),margin=0.05, nrows = 2)
p %>%layout(showlegend=FALSE)
## Map
Sys.setenv('MAPBOX_TOKEN' = '')
df = read.csv('https://raw.githubusercontent.com/bcdunbar/datasets/master/meteorites_subset.csv')
p <- df %>% head(100) %>%
plot_mapbox(lat = ~reclat, lon = ~reclong,
split = ~class, size=2,
mode = 'scattermapbox', hoverinfo='name') %>%
layout(title = 'Meteorites by Class',
font = list(color='white'),
legend = list(orientation = 'h',
font = list(size = 8)),
margin = list(l = 25, r = 25,
b = 25, t = 25,
pad = 2))
p