download.file('https://github.com/ywchiu/rtibame/raw/master/Data/purchase.csv', 'purchase.csv')
trying URL 'https://github.com/ywchiu/rtibame/raw/master/Data/purchase.csv'
Content type 'text/plain; charset=utf-8' length 3497968 bytes (3.3 MB)
downloaded 3.3 MB
purchase <- read.csv('purchase.csv', header = TRUE, stringsAsFactors = FALSE)
#View(purchase)
str(purchase)
'data.frame': 54772 obs. of 7 variables:
$ X : int 0 1 2 3 4 5 6 7 8 9 ...
$ Time : chr "2015-07-01 00:00:01" "2015-07-01 00:00:03" "2015-07-01 00:00:19" "2015-07-01 00:01:10" ...
$ Action : chr "order" "order" "order" "order" ...
$ User : chr "U312622727" "U239012343" "U10007697373" "U296328517" ...
$ Product : chr "P0006944501" "P0006018073" "P0002267974" "P0016144236" ...
$ Quantity: int 1 1 1 1 1 1 1 1 1 1 ...
$ Price : num 1069 1680 285 550 249 ...
purchase$Time <- as.POSIXct(purchase$Time)
head(purchase$Time)
[1] "2015-07-01 00:00:01 CST" "2015-07-01 00:00:03 CST"
[3] "2015-07-01 00:00:19 CST" "2015-07-01 00:01:10 CST"
[5] "2015-07-01 00:01:36 CST" "2015-07-01 00:01:48 CST"
?strftime
#strftime(purchase$Time, '%a %A %b')
## Question 1
buyhour <- strftime(purchase$Time, '%H')
buyhourtrend <- table(buyhour)
plot(buyhourtrend, type='l')
## Question 2
library(dplyr)
vip <- purchase %>%
select(User, Quantity, Price) %>%
mutate(total_price = Quantity * Price) %>%
group_by(User) %>%
summarise(final_price = sum(total_price)) %>%
arrange(desc(final_price)) %>%
head(3) %>%
select(User)
vip
## Question 3
vip <- purchase %>%
select(User, Quantity, Price) %>%
mutate(total_price = Quantity * Price) %>%
group_by(User) %>%
summarise(final_price = sum(total_price)) %>%
arrange(desc(final_price)) %>%
head(10)
?barplot
vip
barplot(height = vip$final_price, names.arg =vip$User, col=factor(vip$User))
a <- c(1,2,3,4,5, NA)
?sum
sum(a, na.rm=TRUE)
[1] 15
#install.packages('Amelia')
library(Amelia)
#AmeliaView()
which(is.na(purchase$Price))
[1] 109 1207 1751 2427 2489 2925 3338 3350
[9] 3411 3507 3624 3672 3978 4278 4343 4624
[17] 4819 7034 7185 9479 9973 10921 14387 15008
[25] 15216 15452 18566 20291 20490 20687 22680 25090
[33] 27972 28036 28056 30810 31004 31016 31049 33704
[41] 34226 37989 40096 42762 42831 45121 46596 47345
[49] 47737 51506 52224
length(which(is.na(purchase$Price))) / nrow(pruchase)
[1] 0.0009311327
purchase[which(is.na(purchase$Price)), 'Product']
[1] "P0012242731" "P0012242760003" "P0013365715"
[4] "P0012242820026" "P0013293660004" "P0012242731"
[7] "P0012242731" "P0012242731" "P0013254695"
[10] "P0012242820026" "P0022457780004" "P0012242731"
[13] "P0012242820015" "P0012242731" "P0013365715"
[16] "P0021903670003" "P0012242820004" "P0021903460003"
[19] "P0012242753" "P0012242820026" "P0013293660004"
[22] "P0012242716" "P0022827125" "P0013034600014"
[25] "P0000096850014" "P0012242760003" "P0022822973"
[28] "P0022780330005" "P0022822984" "P0013898776"
[31] "P0013365693" "P0022822984" "P0013365715"
[34] "P0004629950010" "P0013036790001" "P0022822973"
[37] "P0013898791" "P0022457770005" "P0000387100000"
[40] "P0022457770016" "P0022822984" "P0013898791"
[43] "P0023532655" "P0001238112" "P0024243450"
[46] "P0022457770016" "P0013034500006" "P0012242700002"
[49] "P0005664850004" "P0022822973" "P0025213134"
purchase[purchase$Product == 'P0012242731', ]
purchase2 <- na.omit(purchase)
str(purchase)
'data.frame': 54772 obs. of 7 variables:
$ X : int 0 1 2 3 4 5 6 7 8 9 ...
$ Time : POSIXct, format: "2015-07-01 00:00:01" ...
$ Action : chr "order" "order" "order" "order" ...
$ User : chr "U312622727" "U239012343" "U10007697373" "U296328517" ...
$ Product : chr "P0006944501" "P0006018073" "P0002267974" "P0016144236" ...
$ Quantity: int 1 1 1 1 1 1 1 1 1 1 ...
$ Price : num 1069 1680 285 550 249 ...
missmap(purchase)
library(dplyr)
vip <- purchase %>%
select(User, Quantity, Price) %>%
filter(!is.na(Price)) %>%
mutate(total_price = Quantity * Price) %>%
group_by(User) %>%
summarise(final_price = sum(total_price)) %>%
arrange(desc(final_price)) %>%
head(3) %>%
select(User)
x <- seq(1,6)
y <- x
plot(x, y, type='l', col="red")
types =c("p","l","o","b","c","s", "h", "n")
types[3]
[1] "o"
plot(x, y, type=types[3], col="red")
par(mfrow=c(2,4))
plot(x, y, type=types[1], col="red")
plot(x, y, type=types[2], col="red")
plot(x, y, type=types[3], col="red")
par(mfrow=c(2,4))
for (i in 1:length(types)){
plot(x, y, type=types[i], col="red")
}
par(mfrow=c(2,4))
for (i in 1:length(types)){
plot(x, y, type='n')
lines(x, y, type=types[i], col="red")
}
par(mfrow=c(2,4))
for (i in 1:length(types)){
title <- paste('type:', types[i])
plot(x, y, type='n', main= title)
lines(x, y, type=types[i], col="red")
}
par(mfrow=c(1,1))
plot(x, y, type='l', col="red")
lines(x,y, type='p', col="blue")
par(mfrow=c(1,1))
taipei <-c(92.5,132.6,168.8,159.1, 218.7)
tainan <-c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type="o", col="blue", ylim=c(0,220), xlab="Month", ylab="Rainfall")
lines(tainan, type="o", pch=22, lty=2, col="red")
download.file('https://raw.githubusercontent.com/ywchiu/rtibame/master/data/house-prices.csv', destfile = 'house-price.csv')
trying URL 'https://raw.githubusercontent.com/ywchiu/rtibame/master/data/house-prices.csv'
Content type 'text/plain; charset=utf-8' length 3867 bytes
downloaded 3867 bytes
housePrice <- read.csv('house-price.csv', header = TRUE)
#View(housePrice)
bedrooms <- housePrice$Bedrooms
bedroomsTable <- table(bedrooms)
?barplot
barplot(bedroomsTable)
barplot(height = bedroomsTable, names.arg= names(bedroomsTable), col =c("blue", "orange", "yellow", "red"))
barplot(height = bedroomsTable, names.arg= names(bedroomsTable), col = factor(names(bedroomsTable) ))
barplot(height = bedroomsTable, names.arg= names(bedroomsTable), col = factor(names(bedroomsTable) ), main = "Bedroom Type Calculate", xlab = "bedroom type", ylab = "count")
str(cdc)
'data.frame': 20000 obs. of 9 variables:
$ genhlth : Factor w/ 5 levels "excellent","very good",..: 3 3 3 3 2 2 2 2 3 3 ...
$ exerany : num 0 0 1 1 0 1 1 0 0 1 ...
$ hlthplan: num 1 1 1 1 1 1 1 1 1 1 ...
$ smoke100: num 0 1 1 0 0 0 0 0 1 0 ...
$ height : num 70 64 60 66 61 64 71 67 65 70 ...
$ weight : int 175 125 105 132 150 114 194 170 150 180 ...
$ wtdesire: int 175 115 105 124 130 114 185 160 130 170 ...
$ age : int 77 33 49 42 55 55 31 45 27 44 ...
$ gender : Factor w/ 2 levels "m","f": 1 2 2 2 2 2 1 1 2 1 ...
cdc
weigths <- cdc$weight
hist(weigths,breaks=500)
table(weigths %% 10)
0 1 2 3 4 5 6 7 8
9421 207 919 545 525 5865 481 543 1159
9
335
par(mfrow=c(2,1))
hist(weigths,breaks=500, xlim=c(70,380))
barplot(table(cdc$weight),xlab="weight",ylab="Frequency")
plot(cdc$weight, cdc$wtdesire)
Warning message:
In strsplit(code, "\n", fixed = TRUE) :
input string 1 is invalid in this locale
str(cdc)
'data.frame': 20000 obs. of 9 variables:
$ genhlth : Factor w/ 5 levels "excellent","very good",..: 3 3 3 3 2 2 2 2 3 3 ...
$ exerany : num 0 0 1 1 0 1 1 0 0 1 ...
$ hlthplan: num 1 1 1 1 1 1 1 1 1 1 ...
$ smoke100: num 0 1 1 0 0 0 0 0 1 0 ...
$ height : num 70 64 60 66 61 64 71 67 65 70 ...
$ weight : int 175 125 105 132 150 114 194 170 150 180 ...
$ wtdesire: int 175 115 105 124 130 114 185 160 130 170 ...
$ age : int 77 33 49 42 55 55 31 45 27 44 ...
$ gender : Factor w/ 2 levels "m","f": 1 2 2 2 2 2 1 1 2 1 ...
plot(cdc$weight, cdc$wtdesire, col =cdc$genhlth)
data(iris)
iris
plot(iris$Petal.Width, iris$Petal.Length, col=iris$Species)
# plot + point
plot(iris$Petal.Width, iris$Petal.Length, type = 'n')
setosa <- iris[iris$Species == 'setosa',]
versicolor <- iris[iris$Species == 'versicolor',]
points(setosa$Petal.Width, setosa$Petal.Length, col="blue")
points(versicolor$Petal.Width, versicolor$Petal.Length, col="green")
plot(cdc$weight, cdc$wtdesire,xlab="weigth",ylab="weight desire",main="Scatter of Weight")
fit <- lm(cdc$wtdesire~cdc$weight)
abline(fit,col="red")
lvr_prices<- lvr_prices_mac[(lvr_prices_mac$total_price > 0) & (lvr_prices_mac$trading_target == '<e6><e5>(<e5><9c>+撱箇)'), ]
plot(log(lvr_prices$total_price) ~ log(lvr_prices$building_sqmeter))
fit <- lm(log(total_price) ~ log(building_sqmeter), data = lvr_prices)
abline(fit, col="red")
fit2 <- lm(total_price ~ building_sqmeter, data = lvr_prices)
fit2
Call:
lm(formula = total_price ~ building_sqmeter, data = lvr_prices)
Coefficients:
(Intercept) building_sqmeter
859604 176640
176640 / 0.3025
[1] 583933.9
str(cdc)
'data.frame': 20000 obs. of 9 variables:
$ genhlth : Factor w/ 5 levels "excellent","very good",..: 3 3 3 3 2 2 2 2 3 3 ...
$ exerany : num 0 0 1 1 0 1 1 0 0 1 ...
$ hlthplan: num 1 1 1 1 1 1 1 1 1 1 ...
$ smoke100: num 0 1 1 0 0 0 0 0 1 0 ...
$ height : num 70 64 60 66 61 64 71 67 65 70 ...
$ weight : int 175 125 105 132 150 114 194 170 150 180 ...
$ wtdesire: int 175 115 105 124 130 114 185 160 130 170 ...
$ age : int 77 33 49 42 55 55 31 45 27 44 ...
$ gender : Factor w/ 2 levels "m","f": 1 2 2 2 2 2 1 1 2 1 ...
smokers_gender <- table(cdc$gender, cdc$smoke100)
colnames(smokers_gender) <- c('no', 'yes')
mosaicplot(smokers_gender, col=rainbow(length(colnames(smokers_gender))))
boxplot(cdc$height,ylab="Height",main="Box Plot of Height")
boxplot(cdc$height ~ cdc$gender,ylab="Height",main="Box Plot of Height")
?sample.int
set.seed(2)
temp <- sample.int(40, 100, replace=TRUE)
mean(temp)
[1] 20.18
temp <- c(temp, 999,999,999)
mean(temp)
[1] 48.68932
hist(temp)
boxplot(temp)
boxplot(temp[temp< 100])
par(mfrow=c(1,1))
taipei <-c(92.5,132.6,168.8,159.1, 218.7)
tainan <-c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type="o", col="blue", ylim=c(0,220), xlab="Month", ylab="Rainfall")
lines(tainan, type="o", pch=22, lty=2, col="red")
legend(1,200, c("taipei","tainan"), lwd=c(2.5,2.5),col=c("blue","red"), title="Rainfall")
legend("center", c("taipei","tainan"), lwd=c(2.5,2.5),col=c("blue","red"), title="Rainfall")
bedroomsTable2
bedrooms
3 2 4 5
67 30 29 2
label <- c('3 unit', '2 unit', '4 unit', '5 unit')
pie(bedroomsTable2, col = rainbow(length(label)), init.angle = 90, clockwise = TRUE)
legend("bottomleft", label,fill=rainbow(length(label)), title="units", cex=0.8)
showLayout=function(n){
for(i in 1:n){
plot(1,type="n",xaxt="n",yaxt="n",xlab="",ylab="")
text(1, 1, labels=i, cex=10)
}
}
par(mar=c(1,1,1,1),mfrow=c(3,2))
showLayout(6)
par(mar=c(3,3,3,3),mfrow=c(3,2))
showLayout(6)
par(mar=c(3,3,3,3),mfcol=c(3,2))
showLayout(6)
bedroomsTable2
bedrooms
3 2 4 5
67 30 29 2
label <- c('3 unit', '2 unit', '4 unit', '5 unit')
png('pie.png')
pie(bedroomsTable2, col = rainbow(length(label)), init.angle = 90, clockwise = TRUE)
legend("bottomleft", label,fill=rainbow(length(label)), title="units", cex=0.8)
dev.off()
null device
1
getwd()
[1] "C:/Users/USER/Desktop"
library(plotly)
taipei<-c(92.5,132.6,168.8,159.1,218.7)
tainan <-c(21.2, 30.6, 37.3, 84.6, 184.3)
plot_ly(x = month, y = taipei, type='scatter', mode='lines',name="taipei") %>% add_trace(x = month, y = tainan ,name="tainan")
y <-list(title="Rainfall")
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei", type='scatter', mode= 'markers') %>% add_trace(x = month, y = tainan, fill = "tozeroy" ,name="tainan") %>% layout(yaxis= y)
total <- taipei + tainan
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei", type='scatter', mode= 'markers') %>% add_trace(x = month, y = total, fill = "tonexty" ,name="tainan") %>% layout(yaxis= y)
library(plotly)
d <-diamonds[sample(nrow(diamonds),1000), ]
plot_ly(d, x =d$carat, y =d$price, text=paste("Clarity: ", d$clarity),mode="markers", color =d$clarity, size =d$carat)
No trace type specified:
Based on info supplied, a 'scatter' trace seems appropriate.
Read more about this trace type -> https://plot.ly/r/reference/#scatter
No trace type specified:
Based on info supplied, a 'scatter' trace seems appropriate.
Read more about this trace type -> https://plot.ly/r/reference/#scatter
m <-matrix(rnorm(9), nrow=3, ncol=3)
m
[,1] [,2] [,3]
[1,] 0.2518793 -1.4133258 -0.3171303
[2,] 0.6016700 0.6394737 -1.4489802
[3,] -2.4642367 -0.1220622 -1.5154701
plot_ly(z =m,x =c("a", "b", "c"), y =c("d", "e", "f"),type ="heatmap")
m <- cor(housePrice[,2:6])
plot_ly(z =m,x =colnames(m), y =colnames(m),type ="heatmap")
plot_ly(z =volcano, colorscale="Hot", type ="heatmap")
df<-read.csv("https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv")
df$hover <- with(df, paste(state, '<br>', "Beef", beef, "Dairy", dairy, "<br>",
"Fruits", total.fruits, "Veggies", total.veggies,
"<br>", "Wheat", wheat, "Corn", corn))
# give state boundaries a white border
l <- list(color = toRGB("white"), width = 2)
# specify some map projection/options
g <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showlakes = TRUE,
lakecolor = toRGB('white')
)
plot_geo(df, locationmode = 'USA-states') %>%
add_trace(
z = ~total.exports,
text = ~hover,
locations = ~code,
color = ~total.exports, colors = 'Purples'
) %>%
colorbar(title = "Millions USD") %>%
layout(
title = '2011 US Agriculture Exports by State<br>(Hover for breakdown)',
geo = g
)
data("economics")
p <-subplot(
plot_ly(economics, x =economics$date, y =economics$uempmed, type = 'scatter', mode='line'),
plot_ly(economics, x =economics$date, y =economics$unemploy, type = 'scatter', mode='line'),
margin =0.05,nrows=1
)%>%
layout(showlegend=FALSE)
A line object has been specified, but lines is not in the mode
Adding lines to the mode...
A line object has been specified, but lines is not in the mode
Adding lines to the mode...
p
p <-subplot(
plot_ly(economics, x =economics$date, y =economics$uempmed, type = 'scatter', mode='line'),
plot_ly(economics, x =economics$date, y =economics$unemploy, type = 'scatter', mode='line'),
margin =0.05,nrows=2
)%>%
layout(showlegend=FALSE)
A line object has been specified, but lines is not in the mode
Adding lines to the mode...
A line object has been specified, but lines is not in the mode
Adding lines to the mode...
p