we use taipei city(108 third season) as our dataset.Our Y is house price and we have 27 feature.We show it
taipei<- read_csv("~/house/A_lvr_land_A.csv")
## Parsed with column specification:
## cols(
## .default = col_character()
## )
## See spec(...) for full column specifications.
colnames(taipei)
## [1] "鄉鎮市區" "交易標的"
## [3] "土地區段位置建物區段門牌" "土地移轉總面積平方公尺"
## [5] "都市土地使用分區" "非都市土地使用分區"
## [7] "非都市土地使用編定" "交易年月日"
## [9] "交易筆棟數" "移轉層次"
## [11] "總樓層數" "建物型態"
## [13] "主要用途" "主要建材"
## [15] "建築完成年月" "建物移轉總面積平方公尺"
## [17] "建物現況格局-房" "建物現況格局-廳"
## [19] "建物現況格局-衛" "建物現況格局-隔間"
## [21] "有無管理組織" "總價元"
## [23] "單價元平方公尺" "車位類別"
## [25] "車位移轉總面積平方公尺" "車位總價元"
## [27] "備註" "編號"
Change the colname to english
featurename<- taipei[1,]
featurename <- as.character(featurename)
colnames(taipei)<-featurename
taipei <- taipei[-1,]
Y <- taipei$`total price NTD`
Y <- as.numeric(Y)
taipeiuse <- data.frame(type=as.factor(taipei$`transaction sign`),price=as.numeric(taipei$`total price NTD`),district=taipei$`The villages and towns urban district`,unit_price=as.numeric(taipei$`the unit price (NTD / square meter)`),building_area=as.numeric(taipei$`building shifting total area`))
taipeiuse$unit_price <- taipeiuse$price/taipeiuse$building_area
taipei1 <- arrange(taipeiuse,type,district,unit_price)
taipei1<- filter(taipei1,type=="房地(土地+建物)+車位"|type=="房地(土地+建物)")
#taipeiuse$type
each district per price and show it in map
taipei2 <- aggregate(3.305785*taipei1$unit_price, list(taipei1$district), mean)
taipei2
## Group.1 x
## 1 士林區 599045.2
## 2 大同區 601327.3
## 3 大安區 839837.3
## 4 中山區 672512.1
## 5 中正區 737896.0
## 6 內湖區 536891.8
## 7 文山區 446789.4
## 8 北投區 466152.1
## 9 松山區 709970.8
## 10 信義區 748504.6
## 11 南港區 553427.3
## 12 萬華區 497452.3
require(ggplot2)
## Loading required package: ggplot2
taiwan.town.map<- st_read("TOWN_MOI_1080617.shp")
## Reading layer `TOWN_MOI_1080617' from data source `C:\Users\User\Documents\house\TOWN_MOI_1080617.shp' using driver `ESRI Shapefile'
## Simple feature collection with 368 features and 7 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 114.3593 ymin: 10.37135 xmax: 124.5611 ymax: 26.38528
## epsg (SRID): NA
## proj4string: +proj=longlat +ellps=GRS80 +no_defs
taipei.map <- taiwan.town.map[taiwan.town.map$COUNTYNAME == "臺北市",]
g1 <- ggplot(data = taipei.map) + geom_sf() + labs(title = "台北市行政區圖")
g3 <- ggplot(data = taipei.map) +
geom_sf(aes(fill = TOWNNAME), show.legend= F) +
geom_sf_text(aes(label = TOWNNAME), size = 3) +
labs(title = "台北市行政區圖")
my.taipei.map <- taipei.map[c("TOWNNAME", "geometry")]
my.taipei.map$TOWNNAME <- as.character(my.taipei.map$TOWNNAME)
#將資料合併 使用left join函數
my.taipei.map.data <- left_join(my.taipei.map,taipei2,
by= c("TOWNNAME"= "Group.1"))
## Warning: Column `TOWNNAME`/`Group.1` joining character vector and factor,
## coercing into character vector
g3
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
#geom_sf的好處是可以更直覺的使用地圖的資料
g4 <- ggplot(data = my.taipei.map.data) +
geom_sf(aes(fill = x/10000))+
geom_sf_text(aes(label = TOWNNAME), size = 3) +
#scale_fill_distiller(palette = "Spectral", name = "人口(萬)") +
#scale_fill_gradientn(colours = tim.colors(22), name = "人口(萬)") +
#scale_fill_viridis(name = "人口(萬)") +
#scale_fill_distiller(palette = "YlOrRd", name = "人口(萬)") +
scale_fill_distiller(palette = "YlOrRd", direction = 1, name = "坪(萬)") +
labs(title="台北市各行政區房價分佈圖", x ="經度", y = "緯度")
g4
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
#diredtion =1 由小到大的房價會由淺到深 =-1則相反