1 Introuduce data and data cleaning

we use taipei city(108 third season) as our dataset.Our Y is house price and we have 27 feature.We show it

taipei<- read_csv("~/house/A_lvr_land_A.csv")
## Parsed with column specification:
## cols(
##   .default = col_character()
## )
## See spec(...) for full column specifications.
colnames(taipei)
##  [1] "鄉鎮市區"                 "交易標的"                
##  [3] "土地區段位置建物區段門牌" "土地移轉總面積平方公尺"  
##  [5] "都市土地使用分區"         "非都市土地使用分區"      
##  [7] "非都市土地使用編定"       "交易年月日"              
##  [9] "交易筆棟數"               "移轉層次"                
## [11] "總樓層數"                 "建物型態"                
## [13] "主要用途"                 "主要建材"                
## [15] "建築完成年月"             "建物移轉總面積平方公尺"  
## [17] "建物現況格局-房"          "建物現況格局-廳"         
## [19] "建物現況格局-衛"          "建物現況格局-隔間"       
## [21] "有無管理組織"             "總價元"                  
## [23] "單價元平方公尺"           "車位類別"                
## [25] "車位移轉總面積平方公尺"   "車位總價元"              
## [27] "備註"                     "編號"

Change the colname to english

featurename<- taipei[1,]
featurename <- as.character(featurename)
colnames(taipei)<-featurename
taipei <- taipei[-1,]
Y <- taipei$`total price NTD`
Y <- as.numeric(Y)

2 Data visualization

taipeiuse <- data.frame(type=as.factor(taipei$`transaction sign`),price=as.numeric(taipei$`total price NTD`),district=taipei$`The villages and towns urban district`,unit_price=as.numeric(taipei$`the unit price (NTD / square meter)`),building_area=as.numeric(taipei$`building shifting total area`))

taipeiuse$unit_price <- taipeiuse$price/taipeiuse$building_area

taipei1 <- arrange(taipeiuse,type,district,unit_price)

taipei1<- filter(taipei1,type=="房地(土地+建物)+車位"|type=="房地(土地+建物)")
#taipeiuse$type

each district per price and show it in map

taipei2 <- aggregate(3.305785*taipei1$unit_price, list(taipei1$district), mean)
taipei2
##    Group.1        x
## 1   士林區 599045.2
## 2   大同區 601327.3
## 3   大安區 839837.3
## 4   中山區 672512.1
## 5   中正區 737896.0
## 6   內湖區 536891.8
## 7   文山區 446789.4
## 8   北投區 466152.1
## 9   松山區 709970.8
## 10  信義區 748504.6
## 11  南港區 553427.3
## 12  萬華區 497452.3
require(ggplot2)
## Loading required package: ggplot2
taiwan.town.map<- st_read("TOWN_MOI_1080617.shp")
## Reading layer `TOWN_MOI_1080617' from data source `C:\Users\User\Documents\house\TOWN_MOI_1080617.shp' using driver `ESRI Shapefile'
## Simple feature collection with 368 features and 7 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 114.3593 ymin: 10.37135 xmax: 124.5611 ymax: 26.38528
## epsg (SRID):    NA
## proj4string:    +proj=longlat +ellps=GRS80 +no_defs
taipei.map <- taiwan.town.map[taiwan.town.map$COUNTYNAME == "臺北市",]
g1 <- ggplot(data = taipei.map) + geom_sf() + labs(title = "台北市行政區圖")
g3 <- ggplot(data = taipei.map) +
geom_sf(aes(fill = TOWNNAME), show.legend= F) +
geom_sf_text(aes(label = TOWNNAME), size = 3) +
labs(title = "台北市行政區圖")
my.taipei.map <- taipei.map[c("TOWNNAME", "geometry")]
my.taipei.map$TOWNNAME <- as.character(my.taipei.map$TOWNNAME)

#將資料合併 使用left join函數

my.taipei.map.data <- left_join(my.taipei.map,taipei2,
by= c("TOWNNAME"= "Group.1"))
## Warning: Column `TOWNNAME`/`Group.1` joining character vector and factor,
## coercing into character vector
g3
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data

#geom_sf的好處是可以更直覺的使用地圖的資料

g4 <- ggplot(data = my.taipei.map.data) +
geom_sf(aes(fill = x/10000))+
geom_sf_text(aes(label = TOWNNAME), size = 3) +
#scale_fill_distiller(palette = "Spectral", name = "人口(萬)") +
#scale_fill_gradientn(colours = tim.colors(22), name = "人口(萬)") +
#scale_fill_viridis(name = "人口(萬)") +
#scale_fill_distiller(palette = "YlOrRd", name = "人口(萬)") +
scale_fill_distiller(palette = "YlOrRd", direction = 1, name = "坪(萬)") +
labs(title="台北市各行政區房價分佈圖", x ="經度", y = "緯度")
g4
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data

#diredtion =1 由小到大的房價會由淺到深 =-1則相反