library(readxl)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
library(DT)
## Warning: package 'DT' was built under R version 4.3.1
EDA <- read_excel("D:/data/EDA.xlsx")
## Warning: Expecting numeric in G3081 / R3081C7: got a date
## New names:
## • `` -> `...1`
str(EDA)
## tibble [153,430 × 15] (S3: tbl_df/tbl/data.frame)
##  $ ...1         : num [1:153430] 0 1 2 3 4 5 6 7 8 9 ...
##  $ property_type: chr [1:153430] "Flat" "Flat" "House" "House" ...
##  $ price        : num [1:153430] 10000000 6900000 16500000 43500000 7000000 34500000 27000000 7800000 50000000 40000000 ...
##  $ location     : chr [1:153430] "G-10" "E-11" "G-15" "Bani Gala" ...
##  $ city         : chr [1:153430] "Islamabad" "Islamabad" "Islamabad" "Islamabad" ...
##  $ province_name: chr [1:153430] "Islamabad Capital" "Islamabad Capital" "Islamabad Capital" "Islamabad Capital" ...
##  $ latitude     : num [1:153430] 3.37e+06 3.37e+07 3.36e+16 3.37e+13 3.35e+07 ...
##  $ longitude    : num [1:153430] 7.30e+06 7.30e+07 7.29e+07 7.32e+12 7.33e+07 ...
##  $ baths        : num [1:153430] 2 3 6 4 3 8 8 2 7 5 ...
##  $ purpose      : chr [1:153430] "For Sale" "For Sale" "For Sale" "For Sale" ...
##  $ bedrooms     : num [1:153430] 2 3 5 4 3 8 8 2 7 5 ...
##  $ date_added   : POSIXct[1:153430], format: "2019-02-04" "2019-05-04" ...
##  $ agency       : chr [1:153430] "Self" "Self" "Self" "Self" ...
##  $ agent        : chr [1:153430] "Self" "Self" "Self" "Self" ...
##  $ Area_in_Marla: num [1:153430] 4 5.6 8 40 8 32 20 6.2 20 20 ...

1 Thống kê mô tả biến Purpose

table(EDA$purpose)
## 
## For Rent For Sale 
##    43183   110247

Có 43183 căn hộ cho thuê và 110247 căn hộ để bán

table(EDA$CC)/sum(table(EDA$purpose))
## Warning: Unknown or uninitialised column: `CC`.
## numeric(0)

Tỷ lệ căn hộ cho thuê chiếm 28,14%, để bán chiếm 71,85%

ggplot(EDA,aes(purpose))+
  geom_bar(color = "black", fill = "pink")+
   geom_text(aes(label = scales :: percent(after_stat(count/sum(count)))), stat=  'count', color = 'black', vjust = -.5)+
  ylab("soluong")+ xlab("purpose")