Loading Data

Menampilkan 5 data teratas

data = read.csv("C:/Users/Friza Chintia Putri/Documents/KULIAH SAINS DATA\\SAINS DATA SEM 4\\ANALISIS MULTIVARIAT\\Housing.csv", header = TRUE, sep = ",")
data_df = as.data.frame(data)
head(data_df, n=5)
##      price area bedrooms bathrooms stories mainroad guestroom basement
## 1 13300000 7420        4         2       3      yes        no       no
## 2 12250000 8960        4         4       4      yes        no       no
## 3 12250000 9960        3         2       2      yes        no      yes
## 4 12215000 7500        4         2       2      yes        no      yes
## 5 11410000 7420        4         1       2      yes       yes      yes
##   hotwaterheating airconditioning parking prefarea furnishingstatus
## 1              no             yes       2      yes        furnished
## 2              no             yes       3       no        furnished
## 3              no              no       2      yes   semi-furnished
## 4              no             yes       3      yes        furnished
## 5              no             yes       2       no        furnished

Cek tipe Data di tiap fitur

str(data)
## 'data.frame':    545 obs. of  13 variables:
##  $ price           : int  13300000 12250000 12250000 12215000 11410000 10850000 10150000 10150000 9870000 9800000 ...
##  $ area            : int  7420 8960 9960 7500 7420 7500 8580 16200 8100 5750 ...
##  $ bedrooms        : int  4 4 3 4 4 3 4 5 4 3 ...
##  $ bathrooms       : int  2 4 2 2 1 3 3 3 1 2 ...
##  $ stories         : int  3 4 2 2 2 1 4 2 2 4 ...
##  $ mainroad        : chr  "yes" "yes" "yes" "yes" ...
##  $ guestroom       : chr  "no" "no" "no" "no" ...
##  $ basement        : chr  "no" "no" "yes" "yes" ...
##  $ hotwaterheating : chr  "no" "no" "no" "no" ...
##  $ airconditioning : chr  "yes" "yes" "no" "yes" ...
##  $ parking         : int  2 3 2 3 2 2 2 0 2 1 ...
##  $ prefarea        : chr  "yes" "no" "yes" "yes" ...
##  $ furnishingstatus: chr  "furnished" "furnished" "semi-furnished" "furnished" ...

Mencari nilai Variance di tiap fitur

var_price = var(data$price)
var_area = var(data$area)
var_bedrooms = var(data$bedrooms)
var_bathrooms = var(data$bathrooms)
var_stories = var(data$stories)
var_parking = var(data$parking)

var_price
## [1] 3.498544e+12
var_area
## [1] 4709512
var_bedrooms 
## [1] 0.5447383
var_bathrooms
## [1] 0.2524757
var_stories
## [1] 0.7525432
var_parking
## [1] 0.74233

Mencari nilai Covariance di tiap fitur

cov_PriceArea = cov(data$price, data$area)
cov_PriceBedrooms = cov(data$price, data$bedrooms)
cov_PriceBathrooms = cov(data$price, data$bathrooms)
cov_PriceStories = cov(data$price, data$stories )
cov_PriceParking = cov(data$price, data$parking)
cov_AreaParking = cov(data$area, data$parking)
cov_BedroomsParking = cov(data$bedrooms, data$parking)
cov_BedroomsArea = cov(data$bedrooms, data$area)
cov_BathroomsParking = cov(data$bathrooms, data$parking)
cov_BathroomsArea = cov(data$bathrooms, data$area)
cov_BathroomsBedrooms = cov(data$bathrooms, data$bedrooms)
cov_StoriesParking = cov(data$stories, data$parking)
cov_StoriesArea = cov(data$stories, data$area)
cov_StoriesBedrooms = cov(data$stories, data$bedrooms)
cov_StoriesBathrooms = cov(data$stories, data$bathrooms)
cov_PriceArea 
## [1] 2175676335
cov_PriceBedrooms 
## [1] 505946.4
cov_PriceBathrooms 
## [1] 486409.3
cov_PriceStories 
## [1] 682644.6
cov_PriceParking 
## [1] 619467.3
cov_AreaParking 
## [1] 659.9897
cov_BedroomsParking 
## [1] 0.08856247
cov_BedroomsArea 
## [1] 243.2321
cov_BathroomsParking 
## [1] 0.07684161
cov_BathroomsArea 
## [1] 211.3466
cov_BathroomsBedrooms 
## [1] 0.1386738
cov_StoriesParking 
## [1] 0.03404277
cov_StoriesArea 
## [1] 158.1294
cov_StoriesBedrooms 
## [1] 0.2615893
cov_StoriesBathrooms 
## [1] 0.1421715

Mencari Correlation di tiap fitur

corr_PriceArea = cov_PriceArea / sqrt(var_price*var_area)
corr_PriceBedrooms = cov_PriceBedrooms  / sqrt(var_price*var_bedrooms)
corr_PriceBathrooms = cov_PriceBathrooms / sqrt(var_price*var_bathrooms)
corr_PriceStories = cov_PriceStories / sqrt(var_price*var_stories)
corr_PriceParking = cov_PriceParking / sqrt(var_price*var_parking)
corr_AreaParking = cov_AreaParking / sqrt(var_area*var_parking)
corr_BedroomsParking = cov_BedroomsParking / sqrt(var_bedrooms*var_parking)
corr_BedroomsArea = cov_BedroomsArea / sqrt(var_bedrooms*var_area)
corr_BathroomParking = cov_BathroomsParking / sqrt(var_bathrooms*var_parking)
corr_BathroomArea = cov_BathroomsArea / sqrt (var_bathrooms*var_area)
corr_BathroomBedroom = cov_BathroomsBedrooms / sqrt(var_bathrooms*var_bedrooms)
corr_StoriesParking = cov_StoriesParking / sqrt(var_stories*var_parking)
corr_StoriesArea = cov_StoriesArea / sqrt(var_stories*var_area)
corr_StoriesBedrooms = cov_StoriesBedrooms / sqrt(var_stories*var_bedrooms)
corr_StoriesBathrooms = cov_StoriesBathrooms / sqrt(var_stories*var_bathrooms)
corr_PriceArea 
## [1] 0.5359973
corr_PriceBedrooms 
## [1] 0.366494
corr_PriceBathrooms
## [1] 0.5175453
corr_PriceStories
## [1] 0.4207124
corr_PriceParking 
## [1] 0.3843936
corr_AreaParking 
## [1] 0.3529805
corr_BedroomsParking 
## [1] 0.1392699
corr_BedroomsArea
## [1] 0.1518585
corr_BathroomParking
## [1] 0.1774958
corr_BathroomArea
## [1] 0.1938195
corr_BathroomBedroom 
## [1] 0.3739302
corr_StoriesParking 
## [1] 0.04554709
corr_StoriesArea 
## [1] 0.08399605
corr_StoriesBedrooms 
## [1] 0.4085642
corr_StoriesBathrooms 
## [1] 0.3261647

Menentukan Variance dan Covariance Matrix

data_numerik = data[, sapply(data, is.numeric)]
cov_matriks = cov(data_numerik)

cov_matriks_df = as.data.frame(cov_matriks)
cov_matriks_df
##                  price         area     bedrooms    bathrooms      stories
## price     3.498544e+12 2.175676e+09 5.059464e+05 4.864093e+05 6.826446e+05
## area      2.175676e+09 4.709512e+06 2.432321e+02 2.113466e+02 1.581294e+02
## bedrooms  5.059464e+05 2.432321e+02 5.447383e-01 1.386738e-01 2.615893e-01
## bathrooms 4.864093e+05 2.113466e+02 1.386738e-01 2.524757e-01 1.421715e-01
## stories   6.826446e+05 1.581294e+02 2.615893e-01 1.421715e-01 7.525432e-01
## parking   6.194673e+05 6.599897e+02 8.856247e-02 7.684161e-02 3.404277e-02
##                parking
## price     6.194673e+05
## area      6.599897e+02
## bedrooms  8.856247e-02
## bathrooms 7.684161e-02
## stories   3.404277e-02
## parking   7.423300e-01

Menghitung Correlation Matrix

corr_matriks = cor(data_numerik)

corr_matriks_df = as.data.frame(corr_matriks)
corr_matriks_df
##               price       area  bedrooms bathrooms    stories    parking
## price     1.0000000 0.53599735 0.3664940 0.5175453 0.42071237 0.38439365
## area      0.5359973 1.00000000 0.1518585 0.1938195 0.08399605 0.35298048
## bedrooms  0.3664940 0.15185849 1.0000000 0.3739302 0.40856424 0.13926990
## bathrooms 0.5175453 0.19381953 0.3739302 1.0000000 0.32616471 0.17749582
## stories   0.4207124 0.08399605 0.4085642 0.3261647 1.00000000 0.04554709
## parking   0.3843936 0.35298048 0.1392699 0.1774958 0.04554709 1.00000000

Mencari Eigen Value berdasarkan Correlation Matriks

hasil_eigen = eigen(corr_matriks)

Hasil Eigen Value

hasil_eigen$values
## [1] 2.5561051 1.2171486 0.6771415 0.6566698 0.5908395 0.3020955

Hasil Eigen Vector

hasil_eigen$vectors
##            [,1]       [,2]        [,3]        [,4]        [,5]        [,6]
## [1,] -0.5395439  0.1203486  0.24279104  0.04162299 -0.14135501  0.78342034
## [2,] -0.3685384  0.5178529  0.51575760 -0.22455991  0.31542421 -0.42436114
## [3,] -0.3915181 -0.3822393 -0.38949056 -0.24634216  0.69703170  0.04864311
## [4,] -0.4322131 -0.2116685  0.01788812  0.81460710 -0.03855905 -0.32093105
## [5,] -0.3682862 -0.4917297  0.12475034 -0.46684594 -0.55146176 -0.29146070
## [6,] -0.3119977  0.5335130 -0.71236303 -0.07500705 -0.29845646 -0.12592842