read data
library(readr)
## Warning: package 'readr' was built under R version 3.2.5
lvr_price <- read_csv("D:/OS DATA/Desktop/lvr_price.csv",
col_types = cols(finish_ymd = col_date(format = "%Y-%m-%d"),
parking_price = col_number(), total_price = col_number(),
trading_ymd = col_date(format = "%Y-%m-%d")))
## Warning: Missing column names filled in: 'X1' [1]
library("dplyr")
## Warning: package 'dplyr' was built under R version 3.2.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
lvr_price$trading_ym <- as.Date(format(lvr_price$trading_ymd, '%Y-%m-01'))
lvr_stat <- lvr_price %>% select(trading_ym, total_price) %>% filter(trading_ym >= '2012-01-01') %>% group_by(trading_ym) %>% summarise(overall_price = sum(as.numeric(total_price), na.rm=TRUE))
plot(overall_price ~ trading_ym,lvr_stat, type='l')

library(tidyr)
## Warning: package 'tidyr' was built under R version 3.2.5
a <- lvr_price %>% select(total_price, trading_ym, area) %>% group_by(area, trading_ym) %>% summarise(p = median(total_price)) %>% arrange(trading_ym) %>% spread(trading_ym, p, fill=0)
a
## Source: local data frame [12 x 114]
## Groups: area [12]
##
## area 1973-08-01 1975-10-01 1989-03-01 1989-06-01 1994-06-01
## * <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 士林區 0 0 0 0 0
## 2 大同區 0 173000 0 145200 0
## 3 大安區 0 0 0 0 0
## 4 中山區 0 0 0 0 0
## 5 中正區 0 0 124117 0 0
## 6 內湖區 0 0 0 0 0
## 7 文山區 0 0 0 0 0
## 8 北投區 24833 0 0 0 0
## 9 松山區 0 0 0 0 0
## 10 信義區 0 0 0 0 0
## 11 南港區 0 0 0 0 0
## 12 萬華區 0 0 0 0 60511
## # ... with 108 more variables: 1998-01-01 <dbl>, 2001-12-01 <dbl>,
## # 2003-04-01 <dbl>, 2003-10-01 <dbl>, 2003-12-01 <dbl>,
## # 2004-04-01 <dbl>, 2004-12-01 <dbl>, 2005-03-01 <dbl>,
## # 2005-04-01 <dbl>, 2006-05-01 <dbl>, 2007-06-01 <dbl>,
## # 2007-11-01 <dbl>, 2007-12-01 <dbl>, 2008-03-01 <dbl>,
## # 2008-04-01 <dbl>, 2008-07-01 <dbl>, 2008-11-01 <dbl>,
## # 2008-12-01 <dbl>, 2009-01-01 <dbl>, 2009-02-01 <dbl>,
## # 2009-03-01 <dbl>, 2009-04-01 <dbl>, 2009-05-01 <dbl>,
## # 2009-06-01 <dbl>, 2009-07-01 <dbl>, 2009-08-01 <dbl>,
## # 2009-09-01 <dbl>, 2009-10-01 <dbl>, 2009-11-01 <dbl>,
## # 2009-12-01 <dbl>, 2010-01-01 <dbl>, 2010-02-01 <dbl>,
## # 2010-03-01 <dbl>, 2010-04-01 <dbl>, 2010-05-01 <dbl>,
## # 2010-06-01 <dbl>, 2010-07-01 <dbl>, 2010-08-01 <dbl>,
## # 2010-09-01 <dbl>, 2010-10-01 <dbl>, 2010-11-01 <dbl>,
## # 2010-12-01 <dbl>, 2011-01-01 <dbl>, 2011-02-01 <dbl>,
## # 2011-03-01 <dbl>, 2011-04-01 <dbl>, 2011-05-01 <dbl>,
## # 2011-06-01 <dbl>, 2011-07-01 <dbl>, 2011-08-01 <dbl>,
## # 2011-09-01 <dbl>, 2011-10-01 <dbl>, 2011-11-01 <dbl>,
## # 2011-12-01 <dbl>, 2012-01-01 <dbl>, 2012-02-01 <dbl>,
## # 2012-03-01 <dbl>, 2012-04-01 <dbl>, 2012-05-01 <dbl>,
## # 2012-06-01 <dbl>, 2012-07-01 <dbl>, 2012-08-01 <dbl>,
## # 2012-09-01 <dbl>, 2012-10-01 <dbl>, 2012-11-01 <dbl>,
## # 2012-12-01 <dbl>, 2013-01-01 <dbl>, 2013-02-01 <dbl>,
## # 2013-03-01 <dbl>, 2013-04-01 <dbl>, 2013-05-01 <dbl>,
## # 2013-06-01 <dbl>, 2013-07-01 <dbl>, 2013-08-01 <dbl>,
## # 2013-09-01 <dbl>, 2013-10-01 <dbl>, 2013-11-01 <dbl>,
## # 2013-12-01 <dbl>, 2014-01-01 <dbl>, 2014-02-01 <dbl>,
## # 2014-03-01 <dbl>, 2014-04-01 <dbl>, 2014-05-01 <dbl>,
## # 2014-06-01 <dbl>, 2014-07-01 <dbl>, 2014-08-01 <dbl>,
## # 2014-09-01 <dbl>, 2014-10-01 <dbl>, 2014-11-01 <dbl>,
## # 2014-12-01 <dbl>, 2015-01-01 <dbl>, 2015-02-01 <dbl>,
## # 2015-03-01 <dbl>, 2015-04-01 <dbl>, 2015-05-01 <dbl>,
## # 2015-06-01 <dbl>, 2015-07-01 <dbl>, 2015-08-01 <dbl>,
## # 2015-09-01 <dbl>, 2015-10-01 <dbl>, ...
lvr_new <- lvr_price[
lvr_price$total_price < 100000000 &
lvr_price$building_sqmeter< 1000 &
lvr_price$area == '大安區' &
lvr_price$building_sqmeter > 0,]
head(lvr_new)
## # A tibble: 6 x 26
## X1 area trading_target address
## <int> <chr> <chr> <chr>
## 1 0 大安區 房地(土地+建物) 臺北市大安區和平東路三段1巷72弄1~30號
## 2 9 大安區 房地(土地+建物) 臺北市大安區敦化南路一段270巷1~30號
## 3 10 大安區 車位 臺北市大安區永康街7巷1~30號
## 4 11 大安區 車位 臺北市大安區永康街7巷1~30號
## 5 12 大安區 車位 臺北市大安區永康街7巷1~30號
## 6 47 大安區 房地(土地+建物) 臺北市大安區辛亥路二段31~60號
## # ... with 22 more variables: land_sqmeter <dbl>, city_land_type <chr>,
## # trading_ymd <date>, trading_num <chr>, floor <chr>, total_floor <chr>,
## # building_type <chr>, main_purpose <chr>, built_with <chr>,
## # finish_ymd <date>, building_sqmeter <dbl>, room <int>,
## # living_room <int>, bath <int>, compartment <chr>, management <chr>,
## # total_price <dbl>, price_per_sqmeter <dbl>, parking_type <chr>,
## # parking_sqmeter <dbl>, parking_price <dbl>, trading_ym <date>
plot(total_price ~ building_sqmeter, data = lvr_new)
fit <- lm(total_price ~ building_sqmeter, data = lvr_new)
fit
##
## Call:
## lm(formula = total_price ~ building_sqmeter, data = lvr_new)
##
## Coefficients:
## (Intercept) building_sqmeter
## 2241224 222169
abline(fit, col="red")

fit2 <- lm(total_price ~ parking_sqmeter + building_sqmeter, data = lvr_price)
summary(fit2)
##
## Call:
## lm(formula = total_price ~ parking_sqmeter + building_sqmeter,
## data = lvr_price)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.977e+09 -7.273e+06 -1.647e+06 2.886e+06 1.512e+10
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.046e+06 2.705e+05 -7.564 3.93e-14 ***
## parking_sqmeter -2.483e+01 5.724e+01 -0.434 0.665
## building_sqmeter 2.208e+05 6.048e+02 365.076 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 83010000 on 102051 degrees of freedom
## Multiple R-squared: 0.5664, Adjusted R-squared: 0.5663
## F-statistic: 6.664e+04 on 2 and 102051 DF, p-value: < 2.2e-16
#install.packages('RJDBC')
library(RJDBC)
## Warning: package 'RJDBC' was built under R version 3.2.5
## Loading required package: DBI
## Warning: package 'DBI' was built under R version 3.2.5
## Loading required package: rJava
drv <- JDBC("com.mysql.jdbc.Driver",
"C:\\Program Files (x86)\\MySQL\\Connector.J 5.1\\mysql-connector-java-5.1.38-bin.jar", "`")
conn <- dbConnect(drv, "jdbc:mysql://localhost/lvr", "root", "test")
dbListTables(conn)
## [1] "iris" "lvr_data"
data(iris)
dbWriteTable(conn, "iris", iris)
## [1] TRUE
dbGetQuery(conn, "select count(*) from iris")
## count(*)
## 1 150
d <- dbReadTable(conn, "iris")
class(d)
## [1] "data.frame"
dbDisconnect(conn)
## [1] TRUE
iris$Species
## [1] setosa setosa setosa setosa setosa setosa
## [7] setosa setosa setosa setosa setosa setosa
## [13] setosa setosa setosa setosa setosa setosa
## [19] setosa setosa setosa setosa setosa setosa
## [25] setosa setosa setosa setosa setosa setosa
## [31] setosa setosa setosa setosa setosa setosa
## [37] setosa setosa setosa setosa setosa setosa
## [43] setosa setosa setosa setosa setosa setosa
## [49] setosa setosa versicolor versicolor versicolor versicolor
## [55] versicolor versicolor versicolor versicolor versicolor versicolor
## [61] versicolor versicolor versicolor versicolor versicolor versicolor
## [67] versicolor versicolor versicolor versicolor versicolor versicolor
## [73] versicolor versicolor versicolor versicolor versicolor versicolor
## [79] versicolor versicolor versicolor versicolor versicolor versicolor
## [85] versicolor versicolor versicolor versicolor versicolor versicolor
## [91] versicolor versicolor versicolor versicolor versicolor versicolor
## [97] versicolor versicolor versicolor versicolor virginica virginica
## [103] virginica virginica virginica virginica virginica virginica
## [109] virginica virginica virginica virginica virginica virginica
## [115] virginica virginica virginica virginica virginica virginica
## [121] virginica virginica virginica virginica virginica virginica
## [127] virginica virginica virginica virginica virginica virginica
## [133] virginica virginica virginica virginica virginica virginica
## [139] virginica virginica virginica virginica virginica virginica
## [145] virginica virginica virginica virginica virginica virginica
## Levels: setosa versicolor virginica
library(rpart)
## Warning: package 'rpart' was built under R version 3.2.5
fit <- rpart(Species~Sepal.Length + Sepal.Width + Petal.Width + Petal.Length, data = iris)
plot(fit, margin=0.1)
text(fit)

fit <- rpart(Species~ . , data = iris)
ni <- names(iris)
ni[- (ni %in% c('Sepal.Width'))]
## [1] "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
features <- paste(ni[!(ni %in% c('Petal.Width', 'Species'))], collapse = '+')
as.formula(paste('Species ~',features))
## Species ~ Sepal.Length + Sepal.Width + Petal.Length
fit2 <- rpart(paste('Species ~',features),data = iris)
fit2
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 100 setosa (0.33333333 0.33333333 0.33333333)
## 2) Petal.Length< 2.45 50 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.45 100 50 versicolor (0.00000000 0.50000000 0.50000000)
## 6) Petal.Length< 4.75 45 1 versicolor (0.00000000 0.97777778 0.02222222) *
## 7) Petal.Length>=4.75 55 6 virginica (0.00000000 0.10909091 0.89090909) *