# exam2) 캘리포니아 주택데이터에서 결측값을 제거한후에 처음부터 순서대로
# 70% 추출하여 'housing_median_age' 변수에 1사분위 값을 구하시오
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.1.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
housing<-read.csv("housing1.csv")
summary(housing)
## longitude latitude housing_median_age total_rooms
## Min. :-124.3 Min. :32.54 Min. : 1.00 Min. : 2
## 1st Qu.:-121.8 1st Qu.:33.93 1st Qu.:18.00 1st Qu.: 1448
## Median :-118.5 Median :34.26 Median :29.00 Median : 2127
## Mean :-119.6 Mean :35.63 Mean :28.64 Mean : 2636
## 3rd Qu.:-118.0 3rd Qu.:37.71 3rd Qu.:37.00 3rd Qu.: 3148
## Max. :-114.3 Max. :41.95 Max. :52.00 Max. :39320
##
## total_bedrooms population households median_income
## Min. : 1.0 Min. : 3 Min. : 1.0 Min. : 0.4999
## 1st Qu.: 296.0 1st Qu.: 787 1st Qu.: 280.0 1st Qu.: 2.5634
## Median : 435.0 Median : 1166 Median : 409.0 Median : 3.5348
## Mean : 537.9 Mean : 1425 Mean : 499.5 Mean : 3.8707
## 3rd Qu.: 647.0 3rd Qu.: 1725 3rd Qu.: 605.0 3rd Qu.: 4.7432
## Max. :6445.0 Max. :35682 Max. :6082.0 Max. :15.0001
## NA's :207
## median_house_value ocean_proximity
## Min. : 14999 Length:20640
## 1st Qu.:119600 Class :character
## Median :179700 Mode :character
## Mean :206856
## 3rd Qu.:264725
## Max. :500001
##
colSums(is.na(housing))
## longitude latitude housing_median_age total_rooms
## 0 0 0 0
## total_bedrooms population households median_income
## 207 0 0 0
## median_house_value ocean_proximity
## 0 0
# ① 결측값 제거
house<-housing %>% filter(!is.na(total_bedrooms))
colSums(is.na(house))
## longitude latitude housing_median_age total_rooms
## 0 0 0 0
## total_bedrooms population households median_income
## 0 0 0 0
## median_house_value ocean_proximity
## 0 0
# ② 순서대로 70% 추출
rownum<-nrow(housing)*0.7
rownum
## [1] 14448
df<-16777.5
df1<-3.3333
round(df)
## [1] 16778
round(df1,2)
## [1] 3.33
house1<-house[1:rownum,]
# ③ housing_median_age 1사분위수 값 구하기
quantile(house1$housing_median_age)
## 0% 25% 50% 75% 100%
## 1 19 30 37 52
df<-quantile(house1$housing_median_age)[[2]]
print(df)
## [1] 19