title: “gg2” output: html_document
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.5 v stringr 1.4.0
## v tidyr 1.1.2 v forcats 0.5.0
## v readr 1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(htmlwidgets)
sFolder = "C:/Users/AlexBer127/"
sInFile = "Downloads/SPb_dwellings_for_rent_EMLS_sample_8.csv"
paste(sFolder, sInFile, sep="")
## [1] "C:/Users/AlexBer127/Downloads/SPb_dwellings_for_rent_EMLS_sample_8.csv"
mytable = read.csv(paste(sFolder, sInFile, sep=""), dec = ",", sep = ";")
view(mytable)
mytable$Rooms[mytable$Rooms == "1 (studiya)"] <- "1 (studiya)"
mytable$Rooms[mytable$Rooms == ""] <- "no answer"
rooms <- table(mytable$Rooms)
rooms <- data.frame(rooms)
names(rooms)[names(rooms) == "Var1"] <- "количество комнат"
names(rooms)[names(rooms) == "Freq"] <- "частота"
view(rooms)
mytable$No_agents[mytable$No_agents == ""] <- "0"
agents <- table(mytable$No_agents)
agents <-data.frame(agents)
agents$Var1 <- sapply(agents$Var1, as.character)
agents$Var1[agents$Var1 == 0] <- "нет"
agents$Var1[agents$Var1 == 1] <- "да"
names(agents)[names(agents) == "Var1"] <- "через посредника"
names(agents)[names(agents) == "Freq"] <- "частота"
view(agents)
mytable$Minimum_duration <- str_replace_all(mytable$Minimum_duration, " mes.", "")
mytable$Minimum_duration <- sapply(mytable$Minimum_duration, as.numeric)
mytable$Minimum_duration [is.na(mytable$Minimum_duration)] <- 0
min_dur <- table(mytable$Minimum_duration)
min_dur <- data.frame(min_dur)
names(min_dur)[names(min_dur) == "Var1"] <- "месяцев"
names(min_dur)[names(min_dur) == "Freq"] <- "частота"
view(min_dur)
mytable$Lift <- str_replace_all(mytable$Lift, "Est'", "1")
mytable$Lift <- str_replace_all(mytable$Lift, "Net", "0")
mytable$Lift[mytable$Lift == ""] <- "no answer"
lift <- table(mytable$Lift)
lift <- data.frame(lift)
view(lift)
names(lift)[names(lift) == "Var1"] <- "наличие лифта"
names(lift)[names(lift) == "Freq"] <- "частота"
mytable$NFloor <- str_replace_all(mytable$NFloor, ".{0,5}[a-z].[a-z].{1,7}", "0")
mytable$NFloor[mytable$NFloor == ""] <- "0"
mytable$NFloor <- sapply(mytable$NFloor, as.numeric)
mytable[2388,8] <- 20000
mytable[88,8] <- 16000
mytable[838, 8] <- 18000
mytable[1283, 8] <- 19000
mytable[2840, 8] <- 30000
mytable$Metro[mytable$Metro == ""] <- "no answer"
mytable$Dist_metro_ad[is.na(mytable$Dist_metro_ad)] <- 0
mytable$Building[mytable$Building == ""] <- "no answer"
mytable$Area_total[is.na(mytable$Area_total)] <- 0.0
mytable$Area_living[is.na(mytable$Area_living)] <- 0.0
mytable$Area_kitchen[is.na(mytable$Area_kitchen)] <- 0.0
mytable$Floor[is.na(mytable$Floor)] <- 0
mytable$Furnished[mytable$Furnished == ""] <- "no answer"
mytable$Bath[mytable$Bath == ""] <- "no answer"
mytable$Refurbished[mytable$Refurbished == ""] <- "no answer"
mytable$Furnished[mytable$Furnished == ""] <- "no answer"
mytable$Bath[mytable$Bath == ""] <- "no answer"
mytable$Balcony[mytable$Balcony == ""] <- "no answer"
mytable$Year_construction[is.na(mytable$Year_construction)] <- "no answer"
mytable$Area_kitchen <- round(mytable$Area_kitchen, digits = 1)
mytable$Area_living <- round(mytable$Area_living, digits = 1)
mytable$Balcony[mytable$Balcony == "Net"] <- "net"
mytable$Balcony[mytable$Balcony == "est'"] <- "Balkon"
mytable$Longitude[is.na(mytable$Longitude)] <- 0.0
mytable$Latitude[is.na(mytable$Latitude)] <- 0.0
mytable$Bath[mytable$Refurbished == "Dush"] <- "Dush"
mytable$Bath[mytable$Refurbished == "Na kuhne"] <- "Na kuhne"
mytable$Bath[mytable$Refurbished == "Otdel'naya"] <- "Otdel'naya"
mytable$Bath[mytable$Refurbished == "Sidyachaya"] <- "Sidyachaya"
mytable$Bath[mytable$Refurbished == "Sovmeshchennaya"] <- "Sovmeshchennaya"
mytable$Refurbished[mytable$Refurbished == "Dush"] <- "wrong data"
mytable$Refurbished[mytable$Refurbished == "Na kuhne"] <- "wrong data"
mytable$Refurbished[mytable$Refurbished == "Otdel'naya"] <- "wrong data"
mytable$Refurbished[mytable$Refurbished == "Sidyachaya"] <- "wrong data"
mytable$Refurbished[mytable$Refurbished == "Sovmeshchennaya"] <- "wrong data"
mytable <- mytable[-c(416, 1059, 2359,2459,745,2076,2154,2178,193,1300,536,2529,1415,564,711),]