read data

library(readr)
## Warning: package 'readr' was built under R version 3.2.5
lvr_price <- read_csv("D:/OS DATA/Desktop/lvr_price.csv", 
    col_types = cols(finish_ymd = col_date(format = "%Y-%m-%d"), 
        parking_price = col_number(), total_price = col_number(), 
        trading_ymd = col_date(format = "%Y-%m-%d")))
## Warning: Missing column names filled in: 'X1' [1]
library("dplyr")
## Warning: package 'dplyr' was built under R version 3.2.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
lvr_price$trading_ym <- as.Date(format(lvr_price$trading_ymd, '%Y-%m-01'))

lvr_stat <- lvr_price %>% select(trading_ym, total_price) %>% filter(trading_ym >= '2012-01-01') %>% group_by(trading_ym) %>% summarise(overall_price = sum(as.numeric(total_price), na.rm=TRUE))

plot(overall_price ~ trading_ym,lvr_stat, type='l')

library(tidyr)
## Warning: package 'tidyr' was built under R version 3.2.5
a <- lvr_price %>% select(total_price, trading_ym, area) %>% group_by(area, trading_ym) %>% summarise(p = median(total_price)) %>% arrange(trading_ym) %>% spread(trading_ym, p, fill=0)
a
## Source: local data frame [12 x 114]
## Groups: area [12]
## 
##      area 1973-08-01 1975-10-01 1989-03-01 1989-06-01 1994-06-01
## *   <chr>      <dbl>      <dbl>      <dbl>      <dbl>      <dbl>
## 1  士林區          0          0          0          0          0
## 2  大同區          0     173000          0     145200          0
## 3  大安區          0          0          0          0          0
## 4  中山區          0          0          0          0          0
## 5  中正區          0          0     124117          0          0
## 6  內湖區          0          0          0          0          0
## 7  文山區          0          0          0          0          0
## 8  北投區      24833          0          0          0          0
## 9  松山區          0          0          0          0          0
## 10 信義區          0          0          0          0          0
## 11 南港區          0          0          0          0          0
## 12 萬華區          0          0          0          0      60511
## # ... with 108 more variables: 1998-01-01 <dbl>, 2001-12-01 <dbl>,
## #   2003-04-01 <dbl>, 2003-10-01 <dbl>, 2003-12-01 <dbl>,
## #   2004-04-01 <dbl>, 2004-12-01 <dbl>, 2005-03-01 <dbl>,
## #   2005-04-01 <dbl>, 2006-05-01 <dbl>, 2007-06-01 <dbl>,
## #   2007-11-01 <dbl>, 2007-12-01 <dbl>, 2008-03-01 <dbl>,
## #   2008-04-01 <dbl>, 2008-07-01 <dbl>, 2008-11-01 <dbl>,
## #   2008-12-01 <dbl>, 2009-01-01 <dbl>, 2009-02-01 <dbl>,
## #   2009-03-01 <dbl>, 2009-04-01 <dbl>, 2009-05-01 <dbl>,
## #   2009-06-01 <dbl>, 2009-07-01 <dbl>, 2009-08-01 <dbl>,
## #   2009-09-01 <dbl>, 2009-10-01 <dbl>, 2009-11-01 <dbl>,
## #   2009-12-01 <dbl>, 2010-01-01 <dbl>, 2010-02-01 <dbl>,
## #   2010-03-01 <dbl>, 2010-04-01 <dbl>, 2010-05-01 <dbl>,
## #   2010-06-01 <dbl>, 2010-07-01 <dbl>, 2010-08-01 <dbl>,
## #   2010-09-01 <dbl>, 2010-10-01 <dbl>, 2010-11-01 <dbl>,
## #   2010-12-01 <dbl>, 2011-01-01 <dbl>, 2011-02-01 <dbl>,
## #   2011-03-01 <dbl>, 2011-04-01 <dbl>, 2011-05-01 <dbl>,
## #   2011-06-01 <dbl>, 2011-07-01 <dbl>, 2011-08-01 <dbl>,
## #   2011-09-01 <dbl>, 2011-10-01 <dbl>, 2011-11-01 <dbl>,
## #   2011-12-01 <dbl>, 2012-01-01 <dbl>, 2012-02-01 <dbl>,
## #   2012-03-01 <dbl>, 2012-04-01 <dbl>, 2012-05-01 <dbl>,
## #   2012-06-01 <dbl>, 2012-07-01 <dbl>, 2012-08-01 <dbl>,
## #   2012-09-01 <dbl>, 2012-10-01 <dbl>, 2012-11-01 <dbl>,
## #   2012-12-01 <dbl>, 2013-01-01 <dbl>, 2013-02-01 <dbl>,
## #   2013-03-01 <dbl>, 2013-04-01 <dbl>, 2013-05-01 <dbl>,
## #   2013-06-01 <dbl>, 2013-07-01 <dbl>, 2013-08-01 <dbl>,
## #   2013-09-01 <dbl>, 2013-10-01 <dbl>, 2013-11-01 <dbl>,
## #   2013-12-01 <dbl>, 2014-01-01 <dbl>, 2014-02-01 <dbl>,
## #   2014-03-01 <dbl>, 2014-04-01 <dbl>, 2014-05-01 <dbl>,
## #   2014-06-01 <dbl>, 2014-07-01 <dbl>, 2014-08-01 <dbl>,
## #   2014-09-01 <dbl>, 2014-10-01 <dbl>, 2014-11-01 <dbl>,
## #   2014-12-01 <dbl>, 2015-01-01 <dbl>, 2015-02-01 <dbl>,
## #   2015-03-01 <dbl>, 2015-04-01 <dbl>, 2015-05-01 <dbl>,
## #   2015-06-01 <dbl>, 2015-07-01 <dbl>, 2015-08-01 <dbl>,
## #   2015-09-01 <dbl>, 2015-10-01 <dbl>, ...
lvr_new <- lvr_price[
     lvr_price$total_price < 100000000 &     
     lvr_price$building_sqmeter< 1000 & 
     lvr_price$area == '大安區' &
     lvr_price$building_sqmeter > 0,]
head(lvr_new)
## # A tibble: 6 x 26
##      X1   area  trading_target                               address
##   <int>  <chr>           <chr>                                 <chr>
## 1     0 大安區 房地(土地+建物) 臺北市大安區和平東路三段1巷72弄1~30號
## 2     9 大安區 房地(土地+建物)   臺北市大安區敦化南路一段270巷1~30號
## 3    10 大安區            車位           臺北市大安區永康街7巷1~30號
## 4    11 大安區            車位           臺北市大安區永康街7巷1~30號
## 5    12 大安區            車位           臺北市大安區永康街7巷1~30號
## 6    47 大安區 房地(土地+建物)         臺北市大安區辛亥路二段31~60號
## # ... with 22 more variables: land_sqmeter <dbl>, city_land_type <chr>,
## #   trading_ymd <date>, trading_num <chr>, floor <chr>, total_floor <chr>,
## #   building_type <chr>, main_purpose <chr>, built_with <chr>,
## #   finish_ymd <date>, building_sqmeter <dbl>, room <int>,
## #   living_room <int>, bath <int>, compartment <chr>, management <chr>,
## #   total_price <dbl>, price_per_sqmeter <dbl>, parking_type <chr>,
## #   parking_sqmeter <dbl>, parking_price <dbl>, trading_ym <date>
plot(total_price ~ building_sqmeter, data = lvr_new)
fit <- lm(total_price ~ building_sqmeter, data = lvr_new)
fit
## 
## Call:
## lm(formula = total_price ~ building_sqmeter, data = lvr_new)
## 
## Coefficients:
##      (Intercept)  building_sqmeter  
##          2241224            222169
abline(fit, col="red")

fit2 <- lm(total_price ~  parking_sqmeter + building_sqmeter, data = lvr_price)

summary(fit2)
## 
## Call:
## lm(formula = total_price ~ parking_sqmeter + building_sqmeter, 
##     data = lvr_price)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -5.977e+09 -7.273e+06 -1.647e+06  2.886e+06  1.512e+10 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -2.046e+06  2.705e+05  -7.564 3.93e-14 ***
## parking_sqmeter  -2.483e+01  5.724e+01  -0.434    0.665    
## building_sqmeter  2.208e+05  6.048e+02 365.076  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 83010000 on 102051 degrees of freedom
## Multiple R-squared:  0.5664, Adjusted R-squared:  0.5663 
## F-statistic: 6.664e+04 on 2 and 102051 DF,  p-value: < 2.2e-16
#install.packages('RJDBC')
library(RJDBC)
## Warning: package 'RJDBC' was built under R version 3.2.5
## Loading required package: DBI
## Warning: package 'DBI' was built under R version 3.2.5
## Loading required package: rJava
drv <- JDBC("com.mysql.jdbc.Driver",
  "C:\\Program Files (x86)\\MySQL\\Connector.J 5.1\\mysql-connector-java-5.1.38-bin.jar", "`")
conn <- dbConnect(drv, "jdbc:mysql://localhost/lvr", "root", "test")

dbListTables(conn)
## [1] "iris"     "lvr_data"
data(iris)
dbWriteTable(conn, "iris", iris)
## [1] TRUE
dbGetQuery(conn, "select count(*) from iris")
##   count(*)
## 1      150
d <- dbReadTable(conn, "iris")
class(d)
## [1] "data.frame"
dbDisconnect(conn)
## [1] TRUE
iris$Species
##   [1] setosa     setosa     setosa     setosa     setosa     setosa    
##   [7] setosa     setosa     setosa     setosa     setosa     setosa    
##  [13] setosa     setosa     setosa     setosa     setosa     setosa    
##  [19] setosa     setosa     setosa     setosa     setosa     setosa    
##  [25] setosa     setosa     setosa     setosa     setosa     setosa    
##  [31] setosa     setosa     setosa     setosa     setosa     setosa    
##  [37] setosa     setosa     setosa     setosa     setosa     setosa    
##  [43] setosa     setosa     setosa     setosa     setosa     setosa    
##  [49] setosa     setosa     versicolor versicolor versicolor versicolor
##  [55] versicolor versicolor versicolor versicolor versicolor versicolor
##  [61] versicolor versicolor versicolor versicolor versicolor versicolor
##  [67] versicolor versicolor versicolor versicolor versicolor versicolor
##  [73] versicolor versicolor versicolor versicolor versicolor versicolor
##  [79] versicolor versicolor versicolor versicolor versicolor versicolor
##  [85] versicolor versicolor versicolor versicolor versicolor versicolor
##  [91] versicolor versicolor versicolor versicolor versicolor versicolor
##  [97] versicolor versicolor versicolor versicolor virginica  virginica 
## [103] virginica  virginica  virginica  virginica  virginica  virginica 
## [109] virginica  virginica  virginica  virginica  virginica  virginica 
## [115] virginica  virginica  virginica  virginica  virginica  virginica 
## [121] virginica  virginica  virginica  virginica  virginica  virginica 
## [127] virginica  virginica  virginica  virginica  virginica  virginica 
## [133] virginica  virginica  virginica  virginica  virginica  virginica 
## [139] virginica  virginica  virginica  virginica  virginica  virginica 
## [145] virginica  virginica  virginica  virginica  virginica  virginica 
## Levels: setosa versicolor virginica
library(rpart)
## Warning: package 'rpart' was built under R version 3.2.5
fit <- rpart(Species~Sepal.Length + Sepal.Width + Petal.Width + Petal.Length, data = iris)
plot(fit, margin=0.1)
text(fit)

fit <- rpart(Species~ . , data = iris)
ni <- names(iris) 
ni[- (ni %in% c('Sepal.Width'))]
## [1] "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"
features <- paste(ni[!(ni %in% c('Petal.Width', 'Species'))], collapse = '+')
as.formula(paste('Species ~',features))
## Species ~ Sepal.Length + Sepal.Width + Petal.Length
fit2 <- rpart(paste('Species ~',features),data = iris)

fit2
## n= 150 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 150 100 setosa (0.33333333 0.33333333 0.33333333)  
##   2) Petal.Length< 2.45 50   0 setosa (1.00000000 0.00000000 0.00000000) *
##   3) Petal.Length>=2.45 100  50 versicolor (0.00000000 0.50000000 0.50000000)  
##     6) Petal.Length< 4.75 45   1 versicolor (0.00000000 0.97777778 0.02222222) *
##     7) Petal.Length>=4.75 55   6 virginica (0.00000000 0.10909091 0.89090909) *