x <- 1:6
y <- 1:6
plot(x,y, type='l' )
plot(x,y, type='o' )
x1 <- 2:4
y1 <- c(3,3,3)
plot(x,y,type='n')
lines(x, y, col="blue")
lines(x1, y1, col="red")
types <- c('p', 'l', 'o', 'b', 'c' , 's', 'h', 'n')
par(mfrow=c(2,4))
for(t in types){
title <- paste('types:', t)
plot(x,y, type = 'n', main = title)
lines(x,y,type=t)
}
par(mfrow=c(1,1))
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type="o", col="blue", ylim=c(0,220),
xlab="Month", ylab="Rainfall")
lines(tainan , type="o", pch=22, lty=2, col="red")
download.file('https://raw.githubusercontent.com/ywchiu/fubonr/master/data/house-prices.csv', 'house-prices.csv')
house <- read.csv('house-prices.csv')
bedroomsTable <- table(house$Bedrooms)
barplot(bedroomsTable, main='Bedrooms Type', xlab = 'Bedroom Type', ylab = 'count', col = "orange", ylim = c(0,80))
barplot(bedroomsTable, main='Bedrooms Type', xlab = 'Bedroom Type', ylab = 'count', col = c(1,2,3,4), ylim = c(0,80))
load("D:/OS DATA/Desktop/cdc.Rdata")
hist(cdc$weight)
hist(cdc$height)
hist(cdc$weight, breaks= 500)
head(sort(table(cdc$weight), decreasing = TRUE))
##
## 160 150 180 170 200 140
## 992 970 933 922 805 794
table(cdc$weight %% 10)
##
## 0 1 2 3 4 5 6 7 8 9
## 9421 207 919 545 525 5865 481 543 1159 335
par(mfrow=c(2,1))
hist(cdc$weight, breaks = 50)
barplot(table(cdc$weight), xlab = 'weight', ylab = 'frequency')
#barplot(ta)
bedroomsTable
##
## 2 3 4 5
## 30 67 29 2
labels <- c('2 unit', '3 unit', '4 unit', '5 unit')
rainbow(length(labels))
## [1] "#FF0000FF" "#80FF00FF" "#00FFFFFF" "#8000FFFF"
tb <- sort(bedroomsTable, decreasing = TRUE)
pie(tb, labels = labels, col = c('#3eb2ad', '#de6000', '#ff6257', '#517d7d'), main='pie chart of bedrooms', init.angle = 90, clockwise = TRUE)
plot(cdc$weight, cdc$wtdesire)
plot(cdc$weight, cdc$height)
data(iris)
xlab <- 'Sepal.Length'
ylab <- 'Petal.Length'
class(iris$Species)
## [1] "factor"
plot(iris$Sepal.Length, iris$Petal.Length,col = iris$Species)
abline(h = 4.7, col="orange")
abline(h = 2.4, col="blue")
versicolor <- iris[iris$Species == 'versicolor',]
virginica <- iris[iris$Species == 'virginica',]
plot(versicolor$Petal.Length, versicolor$Petal.Width, col="red", xlim= c(0,6), ylim= c(0,2))
points(virginica$Petal.Length, virginica$Petal.Width, col="orange")
plot(cdc$weight, cdc$wtdesire, xlab = 'Weight', ylab = 'Desire Weight')
fit <- lm(wtdesire ~ weight, data = cdc)
fit
##
## Call:
## lm(formula = wtdesire ~ weight, data = cdc)
##
## Coefficients:
## (Intercept) weight
## 46.664 0.639
plot(cdc$weight, cdc$wtdesire, xlab = 'Weight', ylab = 'Desire Weight')
abline(fit, col="red")
barplot(c(80, 82 ,84, 88 ))
## Mosaic Plot
table(cdc$gender)
##
## m f
## 9569 10431
table(cdc$smoke100)
##
## 0 1
## 10559 9441
tb <- table(cdc$gender, cdc$smoke100)
colnames(tb) <- c('Non-Smokers', 'Smokers')
rownames(tb) <- c('Male', 'Female')
mosaicplot(tb, col= c(2,3))
hist(cdc$height, breaks = 50)
boxplot(cdc$height)
boxplot(cdc$height~ cdc$gender)
salary <- c(80,90,80,85,100,120,150,130,110)
sum(salary) / length(salary)
## [1] 105
mean(salary)
## [1] 105
salary2 <- c(80,90,80,85,100,120,150,130,110,1000)
mean(salary2)
## [1] 194.5
sort(salary2)
## [1] 80 80 85 90 100 110 120 130 150 1000
median(salary2)
## [1] 105
quantile(salary2, 0.25)
## 25%
## 86.25
quantile(salary2, 0.75)
## 75%
## 127.5
quantile(salary2, 0.75) - quantile(salary2, 0.25)
## 75%
## 41.25
IQR(salary2)
## [1] 41.25
max(min(salary2), median(salary2) - 1.5 * IQR(salary2))
## [1] 80
min(max(salary2), median(salary2) + 1.5 * IQR(salary2))
## [1] 166.875
boxplot(salary2)
boxplot(salary2[salary2 < 200])
## Legend
par(mfrow=c(1,1))
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot(taipei, type="o", col="blue", ylim=c(0,220),
xlab="Month", ylab="Rainfall")
lines(tainan , type="o", pch=22, lty=2, col="red")
legend(1, 200, c('taipei', 'tainan'), col = c('blue', 'red'), lwd=c(2.5,2.5) )
plot(taipei, type="o", col="blue", ylim=c(0,220),
xlab="Month", ylab="Rainfall")
lines(tainan , type="o", pch=22, lty=2, col="red")
legend('topleft', c('taipei', 'tainan'), col = c('blue', 'red'), lwd=c(2.5,2.5) )
plot(taipei, type="o", col="blue", ylim=c(0,220),
xlab="Month", ylab="Rainfall")
lines(tainan , type="o", pch=22, lty=2, col="red")
text(1.5,200, 'Let the rain falls')
plot(taipei, type="o", col="blue", ylim=c(0,250), xlim=c(1,6),
xlab="Month", ylab="Rainfall")
lines(tainan , type="o", pch=22, lty=2, col="red")
text(5.2, 230, 'Taipei', col="blue")
text(5.2, 170, 'Tainan', col="red")
tb <- sort(bedroomsTable, decreasing = TRUE)
pie(tb, labels = labels, col = c('#3eb2ad', '#de6000', '#ff6257', '#517d7d'), main='pie chart of bedrooms', init.angle = 90, clockwise = TRUE)
legend("bottomright", labels,
fill=c('#3eb2ad', '#de6000', '#ff6257', '#517d7d'), title = "units", cex=0.8)
showLayout = function(n){
for(i in 1:n){
plot(1,type="n",xaxt="n",yaxt="n",xlab="",ylab="")
text(1, 1, labels=i, cex=10)
}
}
par(mar=c(1,1,1,1),mfrow=c(3,2))
showLayout(6)
par(mar=c(3,3,3,3),mfrow=c(3,2))
showLayout(6)
par(mar=c(3,3,3,3),mfcol=c(3,2))
showLayout(6)
jpeg('cdc.jpg')
plot(cdc$weight, cdc$wtdesire)
dev.off()
## png
## 2
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data(Titanic)
titanic <- data.frame(Titanic)
titanic[ (titanic$Sex == 'Male') & (titanic$Age == 'Adult') , ]
## Class Sex Age Survived Freq
## 9 1st Male Adult No 118
## 10 2nd Male Adult No 154
## 11 3rd Male Adult No 387
## 12 Crew Male Adult No 670
## 25 1st Male Adult Yes 57
## 26 2nd Male Adult Yes 14
## 27 3rd Male Adult Yes 75
## 28 Crew Male Adult Yes 192
filter(titanic, Sex == 'Male', Age == 'Adult')
## Class Sex Age Survived Freq
## 1 1st Male Adult No 118
## 2 2nd Male Adult No 154
## 3 3rd Male Adult No 387
## 4 Crew Male Adult No 670
## 5 1st Male Adult Yes 57
## 6 2nd Male Adult Yes 14
## 7 3rd Male Adult Yes 75
## 8 Crew Male Adult Yes 192
filter(titanic, Sex == 'Male' | Age == 'Adult')
## Class Sex Age Survived Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 1st Male Adult No 118
## 6 2nd Male Adult No 154
## 7 3rd Male Adult No 387
## 8 Crew Male Adult No 670
## 9 1st Female Adult No 4
## 10 2nd Female Adult No 13
## 11 3rd Female Adult No 89
## 12 Crew Female Adult No 3
## 13 1st Male Child Yes 5
## 14 2nd Male Child Yes 11
## 15 3rd Male Child Yes 13
## 16 Crew Male Child Yes 0
## 17 1st Male Adult Yes 57
## 18 2nd Male Adult Yes 14
## 19 3rd Male Adult Yes 75
## 20 Crew Male Adult Yes 192
## 21 1st Female Adult Yes 140
## 22 2nd Female Adult Yes 80
## 23 3rd Female Adult Yes 76
## 24 Crew Female Adult Yes 20
filter(titanic, Sex == 'Male' & Age == 'Adult')
## Class Sex Age Survived Freq
## 1 1st Male Adult No 118
## 2 2nd Male Adult No 154
## 3 3rd Male Adult No 387
## 4 Crew Male Adult No 670
## 5 1st Male Adult Yes 57
## 6 2nd Male Adult Yes 14
## 7 3rd Male Adult Yes 75
## 8 Crew Male Adult Yes 192
filter(titanic, Class %in% c('1st', 'Crew') )
## Class Sex Age Survived Freq
## 1 1st Male Child No 0
## 2 Crew Male Child No 0
## 3 1st Female Child No 0
## 4 Crew Female Child No 0
## 5 1st Male Adult No 118
## 6 Crew Male Adult No 670
## 7 1st Female Adult No 4
## 8 Crew Female Adult No 3
## 9 1st Male Child Yes 5
## 10 Crew Male Child Yes 0
## 11 1st Female Child Yes 1
## 12 Crew Female Child Yes 0
## 13 1st Male Adult Yes 57
## 14 Crew Male Adult Yes 192
## 15 1st Female Adult Yes 140
## 16 Crew Female Adult Yes 20
titanic[, c('Sex', 'Age')]
## Sex Age
## 1 Male Child
## 2 Male Child
## 3 Male Child
## 4 Male Child
## 5 Female Child
## 6 Female Child
## 7 Female Child
## 8 Female Child
## 9 Male Adult
## 10 Male Adult
## 11 Male Adult
## 12 Male Adult
## 13 Female Adult
## 14 Female Adult
## 15 Female Adult
## 16 Female Adult
## 17 Male Child
## 18 Male Child
## 19 Male Child
## 20 Male Child
## 21 Female Child
## 22 Female Child
## 23 Female Child
## 24 Female Child
## 25 Male Adult
## 26 Male Adult
## 27 Male Adult
## 28 Male Adult
## 29 Female Adult
## 30 Female Adult
## 31 Female Adult
## 32 Female Adult
select(titanic, Sex, Age)
## Sex Age
## 1 Male Child
## 2 Male Child
## 3 Male Child
## 4 Male Child
## 5 Female Child
## 6 Female Child
## 7 Female Child
## 8 Female Child
## 9 Male Adult
## 10 Male Adult
## 11 Male Adult
## 12 Male Adult
## 13 Female Adult
## 14 Female Adult
## 15 Female Adult
## 16 Female Adult
## 17 Male Child
## 18 Male Child
## 19 Male Child
## 20 Male Child
## 21 Female Child
## 22 Female Child
## 23 Female Child
## 24 Female Child
## 25 Male Adult
## 26 Male Adult
## 27 Male Adult
## 28 Male Adult
## 29 Female Adult
## 30 Female Adult
## 31 Female Adult
## 32 Female Adult
select(titanic, Sex:Survived)
## Sex Age Survived
## 1 Male Child No
## 2 Male Child No
## 3 Male Child No
## 4 Male Child No
## 5 Female Child No
## 6 Female Child No
## 7 Female Child No
## 8 Female Child No
## 9 Male Adult No
## 10 Male Adult No
## 11 Male Adult No
## 12 Male Adult No
## 13 Female Adult No
## 14 Female Adult No
## 15 Female Adult No
## 16 Female Adult No
## 17 Male Child Yes
## 18 Male Child Yes
## 19 Male Child Yes
## 20 Male Child Yes
## 21 Female Child Yes
## 22 Female Child Yes
## 23 Female Child Yes
## 24 Female Child Yes
## 25 Male Adult Yes
## 26 Male Adult Yes
## 27 Male Adult Yes
## 28 Male Adult Yes
## 29 Female Adult Yes
## 30 Female Adult Yes
## 31 Female Adult Yes
## 32 Female Adult Yes
select(titanic, contains('S'))
## Class Sex Survived
## 1 1st Male No
## 2 2nd Male No
## 3 3rd Male No
## 4 Crew Male No
## 5 1st Female No
## 6 2nd Female No
## 7 3rd Female No
## 8 Crew Female No
## 9 1st Male No
## 10 2nd Male No
## 11 3rd Male No
## 12 Crew Male No
## 13 1st Female No
## 14 2nd Female No
## 15 3rd Female No
## 16 Crew Female No
## 17 1st Male Yes
## 18 2nd Male Yes
## 19 3rd Male Yes
## 20 Crew Male Yes
## 21 1st Female Yes
## 22 2nd Female Yes
## 23 3rd Female Yes
## 24 Crew Female Yes
## 25 1st Male Yes
## 26 2nd Male Yes
## 27 3rd Male Yes
## 28 Crew Male Yes
## 29 1st Female Yes
## 30 2nd Female Yes
## 31 3rd Female Yes
## 32 Crew Female Yes
sum(tail(head(iris[iris$Sepal.Length> 5, ]), 3)$Sepal.Length)
## [1] 16.9
library(magrittr)
iris %>% .[iris$Sepal.Length > 5, ] %>% head() %>% tail(3) %>% .$Sepal.Length %>% sum()
## [1] 16.9
filter(select(titanic, Sex, Age), Age == 'Child')
## Sex Age
## 1 Male Child
## 2 Male Child
## 3 Male Child
## 4 Male Child
## 5 Female Child
## 6 Female Child
## 7 Female Child
## 8 Female Child
## 9 Male Child
## 10 Male Child
## 11 Male Child
## 12 Male Child
## 13 Female Child
## 14 Female Child
## 15 Female Child
## 16 Female Child
titanic %>%
select(Sex, Age) %>%
filter(Age == 'Child')
## Sex Age
## 1 Male Child
## 2 Male Child
## 3 Male Child
## 4 Male Child
## 5 Female Child
## 6 Female Child
## 7 Female Child
## 8 Female Child
## 9 Male Child
## 10 Male Child
## 11 Male Child
## 12 Male Child
## 13 Female Child
## 14 Female Child
## 15 Female Child
## 16 Female Child
titanic %>%
select(Sex, Class, Age, Freq) %>%
filter(Age == 'Child') %>%
arrange(Freq)
## Sex Class Age Freq
## 1 Male 1st Child 0
## 2 Male 2nd Child 0
## 3 Male Crew Child 0
## 4 Female 1st Child 0
## 5 Female 2nd Child 0
## 6 Female Crew Child 0
## 7 Male Crew Child 0
## 8 Female Crew Child 0
## 9 Female 1st Child 1
## 10 Male 1st Child 5
## 11 Male 2nd Child 11
## 12 Male 3rd Child 13
## 13 Female 2nd Child 13
## 14 Female 3rd Child 14
## 15 Female 3rd Child 17
## 16 Male 3rd Child 35
titanic %>%
select(Sex, Class, Age, Freq) %>%
filter(Age == 'Child') %>%
arrange(desc(Freq) )
## Sex Class Age Freq
## 1 Male 3rd Child 35
## 2 Female 3rd Child 17
## 3 Female 3rd Child 14
## 4 Male 3rd Child 13
## 5 Female 2nd Child 13
## 6 Male 2nd Child 11
## 7 Male 1st Child 5
## 8 Female 1st Child 1
## 9 Male 1st Child 0
## 10 Male 2nd Child 0
## 11 Male Crew Child 0
## 12 Female 1st Child 0
## 13 Female 2nd Child 0
## 14 Female Crew Child 0
## 15 Male Crew Child 0
## 16 Female Crew Child 0
freqsum <- titanic$Freq %>% sum()
titanic <- titanic %>%
select(Sex, Age, Freq) %>%
mutate(portion = Freq/ freqsum)
head(titanic)
## Sex Age Freq portion
## 1 Male Child 0 0.00000000
## 2 Male Child 0 0.00000000
## 3 Male Child 35 0.01590186
## 4 Male Child 0 0.00000000
## 5 Female Child 0 0.00000000
## 6 Female Child 0 0.00000000
# select sex, sum(freq) from titanic group by sex
sex_ratio <- titanic %>%
select(Sex, Freq) %>%
group_by(Sex) %>%
summarise(Sexsum = sum(Freq, na.rm=TRUE))
pie(sex_ratio$Sexsum, labels = sex_ratio$Sex, init.angle = 90, clockwise = TRUE, col = c('blue', 'red'))
titanic %>%
group_by(Sex) %>%
summarise_each(funs(sum), Freq, portion)
## `summarise_each()` is deprecated.
## Use `summarise_all()`, `summarise_at()` or `summarise_if()` instead.
## To map `funs` over a selection of variables, use `summarise_at()`
## # A tibble: 2 x 3
## Sex Freq portion
## <fctr> <dbl> <dbl>
## 1 Male 1731 0.7864607
## 2 Female 470 0.2135393
titanic %>%
group_by(Sex) %>%
summarise_each(funs(min(., na.rm=TRUE), max(., na.rm=TRUE)), Freq)
## `summarise_each()` is deprecated.
## Use `summarise_all()`, `summarise_at()` or `summarise_if()` instead.
## To map `funs` over a selection of variables, use `summarise_at()`
## # A tibble: 2 x 3
## Sex Freq_min Freq_max
## <fctr> <dbl> <dbl>
## 1 Male 0 670
## 2 Female 0 140
titanic %>%
group_by(Sex) %>%
summarise_each(funs(min(., na.rm=TRUE), max(., na.rm=TRUE)), matches('Freq') )
## `summarise_each()` is deprecated.
## Use `summarise_all()`, `summarise_at()` or `summarise_if()` instead.
## To map `funs` over a selection of variables, use `summarise_at()`
## # A tibble: 2 x 3
## Sex Freq_min Freq_max
## <fctr> <dbl> <dbl>
## 1 Male 0 670
## 2 Female 0 140
titanic %>%
select(Sex) %>%
summarise_each(funs(n()))
## `summarise_each()` is deprecated.
## Use `summarise_all()`, `summarise_at()` or `summarise_if()` instead.
## To map `funs` over all variables, use `summarise_all()`
## Sex
## 1 32
titanic %>%
select(Sex) %>%
summarise_each(funs(n_distinct(Sex)))
## `summarise_each()` is deprecated.
## Use `summarise_all()`, `summarise_at()` or `summarise_if()` instead.
## To map `funs` over all variables, use `summarise_all()`
## Sex
## 1 2
# select age, sex, sum(freq) as frequency_sum from titanic
# group by age, sex order by frequency_sum desc
titanic %>%
group_by(Age, Sex) %>%
summarise(frequency_sum = sum(Freq)) %>%
arrange(desc(frequency_sum) )
## # A tibble: 4 x 3
## # Groups: Age [2]
## Age Sex frequency_sum
## <fctr> <fctr> <dbl>
## 1 Adult Male 1667
## 2 Adult Female 425
## 3 Child Male 64
## 4 Child Female 45
# selec sex, sum(Freq) from titanic group by Sex
sex_stat <- titanic %>%
group_by(Sex) %>%
summarise(sexsum = sum(Freq))
pie(sex_stat$sexsum, labels = sex_stat$Sex, init.angle = 90, clockwise = TRUE)
barplot(height = sex_stat$sexsum,names.arg= sex_stat$Sex, ylim = c(0,2000), col = c('blue', 'red'), main = 'Male v.s. Female', ylab = 'Frequency' )
# Stat by Age
age_stat <- titanic %>%
group_by(Age) %>%
summarise(sexsum = sum(Freq))
age_stat
## # A tibble: 2 x 2
## Age sexsum
## <fctr> <dbl>
## 1 Child 109
## 2 Adult 2092
#age_stat
data(Titanic)
titanic <- data.frame(Titanic)
survived_stat <- titanic %>%
group_by(Sex, Survived) %>%
summarise_each(funs(sum), Freq)
## `summarise_each()` is deprecated.
## Use `summarise_all()`, `summarise_at()` or `summarise_if()` instead.
## To map `funs` over a selection of variables, use `summarise_at()`
library(reshape2)
survived_tb <- dcast(survived_stat, Survived ~ Sex, value.var = 'Freq')
#survived_tb[2:3]
mosaicplot(survived_tb[2:3], main = 'Survived by Sex')
m <- as.matrix(survived_tb[2:3])
m
## Male Female
## [1,] 1364 126
## [2,] 367 344
barplot(m, legend.text = c('Perished', 'Survived'))
download.file('https://raw.githubusercontent.com/ywchiu/fubonr/master/data/WA.TXT', 'WA.txt')
babyname = read.csv("WA.txt", header=FALSE)
head(babyname)
## V1 V2 V3 V4 V5
## 1 WA F 1910 Helen 156
## 2 WA F 1910 Dorothy 122
## 3 WA F 1910 Mary 112
## 4 WA F 1910 Margaret 104
## 5 WA F 1910 Ruth 94
## 6 WA F 1910 Alice 67
colnames(babyname) = c("state", "sex", "year", "name", "freq")
library(dplyr)
babyname %>%
group_by(name) %>%
summarise(name_freq = sum(freq)) %>%
arrange(desc(name_freq)) %>%
head(10)
## # A tibble: 10 x 2
## name name_freq
## <fctr> <int>
## 1 Michael 69037
## 2 Robert 65322
## 3 David 59728
## 4 John 59124
## 5 James 58379
## 6 William 44416
## 7 Richard 39892
## 8 Mary 33980
## 9 Daniel 32935
## 10 Christopher 30595
babyname %>%
filter(sex == 'F') %>%
group_by(name) %>%
summarise(name_freq = sum(freq)) %>%
arrange(desc(name_freq)) %>%
head(10)
## # A tibble: 10 x 2
## name name_freq
## <fctr> <int>
## 1 Mary 33970
## 2 Jennifer 26280
## 3 Linda 22702
## 4 Patricia 21886
## 5 Elizabeth 20097
## 6 Susan 19565
## 7 Jessica 19227
## 8 Barbara 19137
## 9 Sarah 18528
## 10 Karen 16991
babyname %>%
filter(sex == 'M') %>%
group_by(name) %>%
summarise(name_freq = sum(freq)) %>%
arrange(desc(name_freq)) %>%
head(10)
## # A tibble: 10 x 2
## name name_freq
## <fctr> <int>
## 1 Michael 68741
## 2 Robert 65219
## 3 David 59617
## 4 John 59092
## 5 James 58315
## 6 William 44401
## 7 Richard 39892
## 8 Daniel 32884
## 9 Christopher 30504
## 10 Thomas 26549
max(babyname$year)
## [1] 2013
top10_male <- babyname %>%
filter(sex == 'M', year== 2013) %>%
group_by(name) %>%
summarise(name_freq = sum(freq)) %>%
arrange(desc(name_freq)) %>%
head(10)
top10_female <- babyname %>%
filter(sex == 'F', year== 2013) %>%
group_by(name) %>%
summarise(name_freq = sum(freq)) %>%
arrange(desc(name_freq)) %>%
head(10)
top10_female_history <- babyname %>%
select(year, name, freq, sex) %>%
filter(name %in% top10_female$name, sex == 'F')
top10_names <- top10_female$name
emma <- top10_female_history %>% filter(name == 'Emma')
plot(emma$year, emma$freq, type = 'n', xlab = 'year', ylab = 'frequency', main = 'top10 female name trend in 2013')
for (n in seq_along(top10_names)){
print(n)
data <- top10_female_history %>% filter(name == top10_names[n])
lines(data$year, data$freq, type='l', col=n )
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
legend(1910, 600, legend = top10_names, col=1:10, lwd=2.5)
david <- babyname %>%
filter(name == 'David', sex == 'M')
plot(david$year, david$freq, type = 'l')
## 實價登錄分析
download.file('https://raw.githubusercontent.com/ywchiu/fubonr/master/data/lvr_prices.csv', 'lvr_price.csv')
library(readr)
lvr_price <- read_csv("D:/OS DATA/Desktop/lvr_price.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_character(),
## X1 = col_integer(),
## land_sqmeter = col_double(),
## trading_ymd = col_date(format = ""),
## finish_ymd = col_date(format = ""),
## building_sqmeter = col_double(),
## room = col_integer(),
## living_room = col_integer(),
## bath = col_integer(),
## total_price = col_integer(),
## price_per_sqmeter = col_double(),
## parking_sqmeter = col_double(),
## parking_price = col_integer()
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 32 parsing failures.
## row # A tibble: 5 x 5 col row col expected actual expected <int> <chr> <chr> <chr> actual 1 1282 total_price an integer 6700000000 file 2 2243 total_price an integer 3882685600 row 3 2244 total_price an integer 3373314400 col 4 4629 total_price an integer 3050000000 expected 5 5890 total_price an integer 3133800000 actual # ... with 1 more variables: file <chr>
## ... ................. ... ......................................... ........ ......................................... ...... ......................................... .... ......................................... ... ......................................... ... ......................................... ........ ......................................... ...... .......................................
## See problems(...) for more details.
#View(lvr_price)
head(lvr_price)
## # A tibble: 6 x 29
## X1 area trading_target address
## <int> <chr> <chr> <chr>
## 1 0 大安區 房地(土地+建物) 臺北市大安區和平東路三段1巷72弄1~30號
## 2 1 中正區 房地(土地+建物) 臺北市中正區忠孝東路二段121~150號
## 3 2 大同區 土地 橋北段二小段601~630地號
## 4 3 大同區 房地(土地+建物) 臺北市大同區重慶北路一段61~90號
## 5 4 內湖區 房地(土地+建物) 臺北市內湖區民權東路六段90巷6弄1~30號
## 6 5 信義區 土地 福德段一小段661~690地號
## # ... with 25 more variables: land_sqmeter <dbl>, city_land_type <chr>,
## # non_city_land_type <chr>, non_city_code <chr>, trading_ymd <date>,
## # trading_num <chr>, floor <chr>, total_floor <chr>,
## # building_type <chr>, main_purpose <chr>, built_with <chr>,
## # finish_ymd <date>, building_sqmeter <dbl>, room <int>,
## # living_room <int>, bath <int>, compartment <chr>, management <chr>,
## # total_price <int>, price_per_sqmeter <dbl>, parking_type <chr>,
## # parking_sqmeter <dbl>, parking_price <int>, comments <chr>,
## # numbers <chr>
daan <- lvr_price[lvr_price$area == '大安區', ]
sum(as.numeric(daan$total_price), na.rm=TRUE)
## [1] 2.79477e+11
mean(as.numeric(daan$total_price), na.rm=TRUE)
## [1] 29798170
median(as.numeric(daan$total_price), na.rm=TRUE)
## [1] 2e+07
zhongshan <- lvr_price[lvr_price$area == '中山區', c('address','total_price')]
idx <- order(zhongshan$total_price, decreasing = TRUE)
res <- zhongshan[idx,]
res
## # A tibble: 15,020 x 2
## address total_price
## <chr> <int>
## 1 臺北市中山區建國北路一段138巷1~30號 1850000000
## 2 臺北市中山區南京東路三段1~30號 1400000000
## 3 中山段二小段31~60地號 1084948034
## 4 中山段三小段301~330地號 1011136500
## 5 金泰段61~90地號 952875000
## 6 中山段一小段361~390地號 903865500
## 7 正義段三小段331~360地號 850093721
## 8 臺北市中山區中山北路二段181~210號 760000000
## 9 臺北市中山區民權東路三段1~30號 600000000
## 10 臺北市中山區一江街31~60號 500000000
## # ... with 15,010 more rows
getTopThree <- function(area){
zhongshan <- lvr_price[lvr_price$area ==area, ]
idx <- order(zhongshan$total_price, decreasing = TRUE)
res <- zhongshan[idx, c('address', 'total_price', 'area')]
return(res[1:3,])
}
getTopThree('中山區')
## # A tibble: 3 x 3
## address total_price area
## <chr> <int> <chr>
## 1 臺北市中山區建國北路一段138巷1~30號 1850000000 中山區
## 2 臺北市中山區南京東路三段1~30號 1400000000 中山區
## 3 中山段二小段31~60地號 1084948034 中山區
getTopThree('大安區')
## # A tibble: 3 x 3
## address total_price area
## <chr> <int> <chr>
## 1 臺北市大安區羅斯福路三段283巷4弄1~30號 1869781219 大安區
## 2 臺北市大安區忠孝東路四段241~270號 971340000 大安區
## 3 學府段三小段31~60地號 966660000 大安區
getTopThree('文山區')
## # A tibble: 3 x 3
## address total_price area
## <chr> <int> <chr>
## 1 景美段四小段601~630地號 1550000000 文山區
## 2 萬芳段二小段241~270地號 836350000 文山區
## 3 興安段四小段271~300地號 581500000 文山區
price_per_sec <- tapply(lvr_price$total_price, lvr_price$area, function(e)mean(e, na.rm=TRUE))
barplot(sort(price_per_sec, decreasing = TRUE), col='blue', cex.axis = 0.6, cex.names = 0.6)
price_per_sec2 <- tapply(lvr_price$total_price, lvr_price$area, function(e)median(e, na.rm=TRUE))
barplot(sort(price_per_sec2, decreasing = TRUE), col='blue', cex.axis = 0.6, cex.names = 0.6)
boxplot(lvr_price$total_price ~ lvr_price$area)
## Warning in x[floor(d)] + x[ceiling(d)]: 整數向上溢位產生了 NA
## Warning in x[floor(d)] + x[ceiling(d)]: 整數向上溢位產生了 NA
## Warning in x[floor(d)] + x[ceiling(d)]: 整數向上溢位產生了 NA
## Warning in x[floor(d)] + x[ceiling(d)]: 整數向上溢位產生了 NA
## Warning in x[floor(d)] + x[ceiling(d)]: 整數向上溢位產生了 NA
## Warning in x[floor(d)] + x[ceiling(d)]: 整數向上溢位產生了 NA
## Warning in x[floor(d)] + x[ceiling(d)]: 整數向上溢位產生了 NA
## Warning in x[floor(d)] + x[ceiling(d)]: 整數向上溢位產生了 NA
## Warning in x[floor(d)] + x[ceiling(d)]: 整數向上溢位產生了 NA
boxplot(log(lvr_price$total_price) ~ lvr_price$area, cex.axis = 0.6, cex.names = 0.6)
## Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z
## $group == : Outlier (-Inf) in boxplot 1 is not drawn
## Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z
## $group == : Outlier (-Inf) in boxplot 2 is not drawn
## Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z
## $group == : Outlier (-Inf) in boxplot 3 is not drawn
## Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z
## $group == : Outlier (-Inf) in boxplot 4 is not drawn
## Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z
## $group == : Outlier (-Inf) in boxplot 6 is not drawn
lvr_price %>%
select(area, address,total_price) %>%
filter(area == '中山區') %>%
arrange(desc(total_price)) %>%
head(3)
## # A tibble: 3 x 3
## area address total_price
## <chr> <chr> <int>
## 1 中山區 臺北市中山區建國北路一段138巷1~30號 1850000000
## 2 中山區 臺北市中山區南京東路三段1~30號 1400000000
## 3 中山區 中山段二小段31~60地號 1084948034
lvr_price$trading_ym <- as.Date(format(lvr_price$trading_ymd, '%Y-%m-01'))
lvr_stat <- lvr_price %>%
select(trading_ym, area, total_price) %>%
filter(trading_ym >= '2012-01-01') %>%
group_by(trading_ym, area) %>%
summarise(overall_price = sum(as.numeric(total_price), na.rm=TRUE))
daan <- lvr_stat[lvr_stat$area == '大安區',]
#daan
plot(daan$trading_ym, daan$overall_price, type = 'l')
lvr_stat$area <- as.factor(lvr_stat$area)
par(mfrow=c(3,4))
for(a in levels(lvr_stat$area)){
area_data <- lvr_stat[lvr_stat$area == a,]
#daan
plot(area_data$trading_ym, area_data$overall_price, type = 'l', ,main = a)
}
#install.packages('tidyr')
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.4.2
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
## The following object is masked from 'package:magrittr':
##
## extract
price_pivot <- spread(lvr_stat, trading_ym, overall_price, fill = 0)
#price_pivot
write.csv(price_pivot, 'taipei_house_price.csv')