# part3 예제 1번
rm(list=ls())
ls()
## character(0)
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.2.2에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
house<-read.csv("housing.csv")
View(house)
glimpse(house)
## Rows: 20,640
## Columns: 10
## $ longitude <dbl> -122.23, -122.22, -122.24, -122.25, -122.25, -122.2…
## $ latitude <dbl> 37.88, 37.86, 37.85, 37.85, 37.85, 37.85, 37.84, 37…
## $ housing_median_age <int> 41, 21, 52, 52, 52, 52, 52, 52, 42, 52, 52, 52, 52,…
## $ total_rooms <int> 880, 7099, 1467, 1274, 1627, 919, 2535, 3104, 2555,…
## $ total_bedrooms <int> 129, 1106, 190, 235, 280, 213, 489, 687, 665, 707, …
## $ population <int> 322, 2401, 496, 558, 565, 413, 1094, 1157, 1206, 15…
## $ households <int> 126, 1138, 177, 219, 259, 193, 514, 647, 595, 714, …
## $ median_income <dbl> 8.3252, 8.3014, 7.2574, 5.6431, 3.8462, 4.0368, 3.6…
## $ median_house_value <int> 452600, 358500, 352100, 341300, 342200, 269700, 299…
## $ ocean_proximity <chr> "NEAR BAY", "NEAR BAY", "NEAR BAY", "NEAR BAY", "NE…
nrow(house)
## [1] 20640
rownum<-nrow(house)*0.8
house1<-house[1:rownum,]
glimpse(house1)
## Rows: 16,512
## Columns: 10
## $ longitude <dbl> -122.23, -122.22, -122.24, -122.25, -122.25, -122.2…
## $ latitude <dbl> 37.88, 37.86, 37.85, 37.85, 37.85, 37.85, 37.84, 37…
## $ housing_median_age <int> 41, 21, 52, 52, 52, 52, 52, 52, 42, 52, 52, 52, 52,…
## $ total_rooms <int> 880, 7099, 1467, 1274, 1627, 919, 2535, 3104, 2555,…
## $ total_bedrooms <int> 129, 1106, 190, 235, 280, 213, 489, 687, 665, 707, …
## $ population <int> 322, 2401, 496, 558, 565, 413, 1094, 1157, 1206, 15…
## $ households <int> 126, 1138, 177, 219, 259, 193, 514, 647, 595, 714, …
## $ median_income <dbl> 8.3252, 8.3014, 7.2574, 5.6431, 3.8462, 4.0368, 3.6…
## $ median_house_value <int> 452600, 358500, 352100, 341300, 342200, 269700, 299…
## $ ocean_proximity <chr> "NEAR BAY", "NEAR BAY", "NEAR BAY", "NEAR BAY", "NE…
house1 %>% glimpse
## Rows: 16,512
## Columns: 10
## $ longitude <dbl> -122.23, -122.22, -122.24, -122.25, -122.25, -122.2…
## $ latitude <dbl> 37.88, 37.86, 37.85, 37.85, 37.85, 37.85, 37.84, 37…
## $ housing_median_age <int> 41, 21, 52, 52, 52, 52, 52, 52, 42, 52, 52, 52, 52,…
## $ total_rooms <int> 880, 7099, 1467, 1274, 1627, 919, 2535, 3104, 2555,…
## $ total_bedrooms <int> 129, 1106, 190, 235, 280, 213, 489, 687, 665, 707, …
## $ population <int> 322, 2401, 496, 558, 565, 413, 1094, 1157, 1206, 15…
## $ households <int> 126, 1138, 177, 219, 259, 193, 514, 647, 595, 714, …
## $ median_income <dbl> 8.3252, 8.3014, 7.2574, 5.6431, 3.8462, 4.0368, 3.6…
## $ median_house_value <int> 452600, 358500, 352100, 341300, 342200, 269700, 299…
## $ ocean_proximity <chr> "NEAR BAY", "NEAR BAY", "NEAR BAY", "NEAR BAY", "NE…
table(is.na(house1))
##
## FALSE TRUE
## 164961 159
df1<-sd(house1$total_bedroom, na.rm=TRUE)
df1
## [1] 435.9006
df2<-median(house1$total_bedrooms,na.rm=TRUE)
df2
## [1] 436
house1$total_bedrooms<-ifelse(is.na(house1$total_bedrooms), df2, house1$total_bedrooms)
colSums(is.na(house1))
## longitude latitude housing_median_age total_rooms
## 0 0 0 0
## total_bedrooms population households median_income
## 0 0 0 0
## median_house_value ocean_proximity
## 0 0
df3<-sd(house1$total_bedroom)
df3
## [1] 433.9254
df4<-df1-df3
df4
## [1] 1.975147
#예제 2번
rm(list=ls())
ls()
## character(0)
house<-read.csv("housing.csv")
nrow(house)
## [1] 20640
colSums(is.na(house))
## longitude latitude housing_median_age total_rooms
## 0 0 0 0
## total_bedrooms population households median_income
## 207 0 0 0
## median_house_value ocean_proximity
## 0 0
house<-house %>% filter(!is.na(total_bedrooms))
colSums(is.na(house))
## longitude latitude housing_median_age total_rooms
## 0 0 0 0
## total_bedrooms population households median_income
## 0 0 0 0
## median_house_value ocean_proximity
## 0 0
rownum<-nrow(house)*0.7
rownum
## [1] 14303.1
house1<-house[1:rownum,]
quantile(house1$housing_median_age)
## 0% 25% 50% 75% 100%
## 1 19 30 38 52
df<-quantile(house1$housing_median_age)[[2]]
print(df)
## [1] 19