library(DescTools)
## Warning: package 'DescTools' was built under R version 4.3.1
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.3.1
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
library(epitools)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.1
## Warning: package 'tibble' was built under R version 4.3.1
## Warning: package 'tidyr' was built under R version 4.3.1
## Warning: package 'readr' was built under R version 4.3.1
## Warning: package 'purrr' was built under R version 4.3.1
## Warning: package 'dplyr' was built under R version 4.3.1
## Warning: package 'stringr' was built under R version 4.3.1
## Warning: package 'forcats' was built under R version 4.3.1
## Warning: package 'lubridate' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Dữ liệu
setwd("F:/PTDLĐT")
data <- read.csv("EDAdataset.csv")
str(data)
## 'data.frame': 153430 obs. of 15 variables:
## $ N : int 0 1 2 3 4 5 6 7 8 9 ...
## $ property_type: chr "Flat" "Flat" "House" "House" ...
## $ price : int 10000000 6900000 16500000 43500000 7000000 34500000 27000000 7800000 50000000 40000000 ...
## $ location : chr "G-10" "E-11" "G-15" "Bani Gala" ...
## $ city : chr "Islamabad" "Islamabad" "Islamabad" "Islamabad" ...
## $ province_name: chr "Islamabad Capital" "Islamabad Capital" "Islamabad Capital" "Islamabad Capital" ...
## $ latitude : chr "3,367,989" "33,700,993" "33,631,485,999,999,900" "33,707,572,937,012" ...
## $ longitude : chr "7,301,264" "72,971,492" "72,926,559" "7,315,119,934,082" ...
## $ baths : int 2 3 6 4 3 8 8 2 7 5 ...
## $ purpose : chr "For Sale" "For Sale" "For Sale" "For Sale" ...
## $ bedrooms : int 2 3 5 4 3 8 8 2 7 5 ...
## $ date_added : chr "04-02-19" "04-05-19" "17-07-19" "05-04-19" ...
## $ agency : chr "Self" "Self" "Self" "Self" ...
## $ agent : chr "Self" "Self" "Self" "Self" ...
## $ Area_in_Marla: chr "4" "5.6" "8" "40" ...
Thống kê mô tả các biến
pur1 <- table(data$purpose)
pur1
##
## For Rent For Sale
## 43183 110247
pur1a <- prop.table(pur1);pur1a
##
## For Rent For Sale
## 0.2814508 0.7185492
addmargins(pur1)
##
## For Rent For Sale Sum
## 43183 110247 153430
library(ggplot2)
data |> ggplot(aes(x = data$purpose, y = after_stat(count))) + geom_bar(fill = 'blue') + geom_text(aes(label = scales::percent(after_stat(count/sum(count)))), stat = 'count', color = 'black', vjust = - .5) + theme_classic() + labs(x = 'Purpose', y = 'Frequency')
## Warning: Use of `data$purpose` is discouraged.
## ℹ Use `purpose` instead.
## Use of `data$purpose` is discouraged.
## ℹ Use `purpose` instead.

Mô hình hồi quy
Hồi quy logit