Projek BMBG

Import Data

library(readxl)
library(readr)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dataset<-read_excel("C:/Users/User/Downloads/DATMIN.xlsx")
library(terra)
## Warning: package 'terra' was built under R version 4.4.3
## terra 1.8.42
## 
## Attaching package: 'terra'
## The following object is masked from 'package:janitor':
## 
##     crosstab
library(exactextractr)
## Warning: package 'exactextractr' was built under R version 4.4.3
library(sf)
## Warning: package 'sf' was built under R version 4.4.3
## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
# Load vektor dan raster
v <- vect("C:/Users/User/Documents/AKA/Tingkat 3/Semester 6/DMBG/Projek/Jawa_Kab.shp")
r <- rast("C:/Users/User/Downloads/NTL_Jawa.tif")


# Pastikan CRS sama
# Cek apakah CRS vektor dan raster sama
if (crs(v) != crs(r)) {
  v <- project(v, crs(r))
}


# Ubah vektor ke format sf (karena exactextractr perlu sf)
v_sf <- sf::st_as_sf(v)

# Cek nama kolom ID kamu
names(v_sf)  # pastikan nama kolomnya benar (harus ada kolom 'idkab')
## [1] "fid"      "idkab"    "nmprov"   "nmkab"    "kdprov"   "kdkab"    "sumber"  
## [8] "periode"  "geometry"
# Hitung mean NTL per kabupaten
mean_ntl <- exact_extract(r, v_sf, 'mean')
##   |                                                                              |                                                                      |   0%  |                                                                              |=                                                                     |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |===                                                                   |   4%  |                                                                              |====                                                                  |   5%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   7%  |                                                                              |=====                                                                 |   8%  |                                                                              |======                                                                |   8%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |========                                                              |  11%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  13%  |                                                                              |==========                                                            |  14%  |                                                                              |===========                                                           |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  17%  |                                                                              |============                                                          |  18%  |                                                                              |=============                                                         |  18%  |                                                                              |==============                                                        |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |===============                                                       |  21%  |                                                                              |===============                                                       |  22%  |                                                                              |================                                                      |  23%  |                                                                              |================                                                      |  24%  |                                                                              |=================                                                     |  24%  |                                                                              |==================                                                    |  25%  |                                                                              |==================                                                    |  26%  |                                                                              |===================                                                   |  27%  |                                                                              |===================                                                   |  28%  |                                                                              |====================                                                  |  29%  |                                                                              |=====================                                                 |  29%  |                                                                              |=====================                                                 |  30%  |                                                                              |======================                                                |  31%  |                                                                              |======================                                                |  32%  |                                                                              |=======================                                               |  33%  |                                                                              |========================                                              |  34%  |                                                                              |=========================                                             |  35%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  37%  |                                                                              |==========================                                            |  38%  |                                                                              |===========================                                           |  39%  |                                                                              |============================                                          |  39%  |                                                                              |============================                                          |  40%  |                                                                              |=============================                                         |  41%  |                                                                              |=============================                                         |  42%  |                                                                              |==============================                                        |  43%  |                                                                              |===============================                                       |  44%  |                                                                              |===============================                                       |  45%  |                                                                              |================================                                      |  45%  |                                                                              |================================                                      |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |==================================                                    |  48%  |                                                                              |==================================                                    |  49%  |                                                                              |===================================                                   |  50%  |                                                                              |====================================                                  |  51%  |                                                                              |====================================                                  |  52%  |                                                                              |=====================================                                 |  53%  |                                                                              |======================================                                |  54%  |                                                                              |======================================                                |  55%  |                                                                              |=======================================                               |  55%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  57%  |                                                                              |=========================================                             |  58%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |==========================================                            |  61%  |                                                                              |===========================================                           |  61%  |                                                                              |============================================                          |  62%  |                                                                              |============================================                          |  63%  |                                                                              |=============================================                         |  64%  |                                                                              |=============================================                         |  65%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |================================================                      |  68%  |                                                                              |================================================                      |  69%  |                                                                              |=================================================                     |  70%  |                                                                              |=================================================                     |  71%  |                                                                              |==================================================                    |  71%  |                                                                              |===================================================                   |  72%  |                                                                              |===================================================                   |  73%  |                                                                              |====================================================                  |  74%  |                                                                              |====================================================                  |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  76%  |                                                                              |======================================================                |  77%  |                                                                              |=======================================================               |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  80%  |                                                                              |========================================================              |  81%  |                                                                              |=========================================================             |  82%  |                                                                              |==========================================================            |  82%  |                                                                              |==========================================================            |  83%  |                                                                              |===========================================================           |  84%  |                                                                              |===========================================================           |  85%  |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  87%  |                                                                              |==============================================================        |  88%  |                                                                              |==============================================================        |  89%  |                                                                              |===============================================================       |  90%  |                                                                              |================================================================      |  91%  |                                                                              |================================================================      |  92%  |                                                                              |=================================================================     |  92%  |                                                                              |=================================================================     |  93%  |                                                                              |==================================================================    |  94%  |                                                                              |==================================================================    |  95%  |                                                                              |===================================================================   |  96%  |                                                                              |====================================================================  |  97%  |                                                                              |===================================================================== |  98%  |                                                                              |===================================================================== |  99%  |                                                                              |======================================================================| 100%
# Masukkan nilai mean ke objek vektor
v_sf$mean_NTL <- mean_ntl
bigdata<-v_sf

Data Cleaning

Cek Struktur Data

class(dataset)
## [1] "tbl_df"     "tbl"        "data.frame"
str(dataset)
## tibble [119 × 6] (S3: tbl_df/tbl/data.frame)
##  $ id       : num [1:119] 3672 3673 3671 3674 3602 ...
##  $ kode_prov: num [1:119] 36 36 36 36 36 36 36 36 34 34 ...
##  $ Y        : num [1:119] 8.44 4.51 9.18 6.55 3.75 ...
##  $ X1       : num [1:119] 6.08 9.18 5.92 5.09 6.23 8.09 9.18 6.06 3.62 2.16 ...
##  $ X2       : num [1:119] 6.61 7.88 7.16 9.06 10.39 ...
##  $ X3       : num [1:119] 14294 14603 15641 17028 9631 ...

Terdapat kesalahan tipe data unutk id dan kode_prov. Dimana dan kode_prov yang seharusnya dalam bentuk string/character teridentifikasi sebagai numeric

class(bigdata)
## [1] "sf"         "data.frame"
str(bigdata)
## Classes 'sf' and 'data.frame':   119 obs. of  10 variables:
##  $ fid     : num  25624 25630 25695 25760 25804 ...
##  $ idkab   : chr  "3101" "3171" "3172" "3173" ...
##  $ nmprov  : chr  "DKI JAKARTA" "DKI JAKARTA" "DKI JAKARTA" "DKI JAKARTA" ...
##  $ nmkab   : chr  "KEPULAUAN SERIBU" "JAKARTA SELATAN" "JAKARTA TIMUR" "JAKARTA PUSAT" ...
##  $ kdprov  : chr  "31" "31" "31" "31" ...
##  $ kdkab   : chr  "01" "71" "72" "73" ...
##  $ sumber  : chr  "BPS" "BPS" "BPS" "BPS" ...
##  $ periode : chr  "2020_1" "2020_1" "2020_1" "2020_1" ...
##  $ geometry:sfc_GEOMETRY of length 119; first list element: List of 27
##   ..$ :List of 1
##   .. ..$ : num [1:7, 1:2] 106 106 106 106 106 ...
##   ..$ :List of 1
##   .. ..$ : num [1:4, 1:2] 106.49 106.49 106.49 106.49 -5.42 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:4, 1:2] 106.53 106.53 106.53 106.53 -5.57 ...
##   ..$ :List of 1
##   .. ..$ : num [1:4, 1:2] 106.5 106.5 106.5 106.5 -5.8 ...
##   ..$ :List of 1
##   .. ..$ : num [1:6, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:8, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:7, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:8, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:4, 1:2] 106.58 106.58 106.58 106.58 -5.63 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:7, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:7, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:6, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:6, 1:2] 107 107 107 107 107 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  $ mean_NTL: num  0.633 35.111 29.941 51.794 35.456 ...
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA
##   ..- attr(*, "names")= chr [1:9] "fid" "idkab" "nmprov" "nmkab" ...

Menyamakan bentuk ketegori peyimpanan dataset ke tibble Terdapat kesalahan tipe data unutk id . Dimana id yang seharusnya dalam bentuk string/character teridentifikasi sebagai numeric

dataset<-as_tibble(dataset)
bigdata<-as_tibble(bigdata)
dataset$id<-as.character(dataset$id)
dataset$kode_prov<-as.character(dataset$kode_prov)

names(bigdata)[names(bigdata) == "mean_NTL"] <- "X4"

Cek Missing Value

colSums(is.na(dataset))
##        id kode_prov         Y        X1        X2        X3 
##         0         0         0         0         0         0
colSums(is.na(bigdata))
##      fid    idkab   nmprov    nmkab   kdprov    kdkab   sumber  periode 
##        0        0        0        0        0        0        0        0 
## geometry       X4 
##        0        0
library(skimr)
## Warning: package 'skimr' was built under R version 4.4.3
skim(dataset)
Data summary
Name dataset
Number of rows 119
Number of columns 6
_______________________
Column type frequency:
character 2
numeric 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
id 0 1 4 4 0 119 0
kode_prov 0 1 2 2 0 6 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Y 0 1 9.07 3.72 2.34 6.38 8.98 11.43 20.83 ▃▇▅▂▁
X1 0 1 5.04 1.83 1.56 3.70 4.91 6.26 9.18 ▂▇▅▅▂
X2 0 1 8.77 1.66 5.08 7.53 8.24 9.97 12.12 ▁▇▆▂▅
X3 0 1 12977.18 2937.19 8965.00 11000.00 12252.00 14152.50 25573.00 ▇▅▂▁▁
skim(bigdata)
## Warning: Couldn't find skimmers for class: sfc_GEOMETRY, sfc; No user-defined
## `sfl` provided. Falling back to `character`.
Data summary
Name bigdata
Number of rows 119
Number of columns 10
_______________________
Column type frequency:
character 8
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
idkab 0 1 4 4 0 119 0
nmprov 0 1 6 13 0 6 0
nmkab 0 1 4 17 0 100 0
kdprov 0 1 2 2 0 6 0
kdkab 0 1 2 2 0 38 0
sumber 0 1 3 3 0 1 0
periode 0 1 6 6 0 1 0
geometry 0 1 1360 55924 0 119 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
fid 0 1 38558.03 7661.93 25624.00 31713.50 39293.00 45010.00 50938.00 ▆▇▇▆▇
X4 0 1 7.13 9.40 0.63 1.73 2.72 9.09 51.79 ▇▁▁▁▁

tidak terdapat

Cek Duplikasi data

nrow(get_dupes(dataset))
## No variable names specified - using all columns.
## No duplicate combinations found of: id, kode_prov, Y, X1, X2, X3
## [1] 0
nrow(get_dupes(bigdata))
## No variable names specified - using all columns.
## No duplicate combinations found of: fid, idkab, nmprov, nmkab, kdprov, kdkab, sumber, periode, geometry, X4
## [1] 0

Selain itu dengan asumsi variabel id adalah primary keys akan diliat duplikasi dengan indikator variabel id

nrow(get_dupes(dataset,id))
## No duplicate combinations found of: id
## [1] 0
nrow(get_dupes(bigdata,idkab))
## No duplicate combinations found of: idkab
## [1] 0

tidak terdapat duplikasi

Cek Konsistensi Data

head(data.frame(dataset$id,dataset$kode_prov))
##   dataset.id dataset.kode_prov
## 1       3672                36
## 2       3673                36
## 3       3671                36
## 4       3674                36
## 5       3602                36
## 6       3601                36

Dimana id merupakan dua digit pertama pada kode_prov

# Identifikasi variabel numerik
num_vars <- sapply(dataset, is.numeric)

# Definisikan aturan range untuk tiap variabel
rules <- list(
  "Y" = c(0, 100),
  "X1" = c(0, 100),
  "X2" = c(0, 15),
  "X3" = c(0, Inf) # hanya boleh > 0
)

# Loop cek konsistensi berdasarkan aturan
for (var in names(rules)) {
  if (var %in% names(dataset)) {
    cat("Checking", var, ":\n")
    min_val <- rules[[var]][1]
    max_val <- rules[[var]][2]
    
    # Ambil nilai yang tidak NA dan tidak dalam range
    if (is.infinite(max_val)) {
      vals <- dataset[[var]][!is.na(dataset[[var]]) & dataset[[var]] <= min_val]
    } else {
      vals <- dataset[[var]][!is.na(dataset[[var]]) & (dataset[[var]] < min_val | dataset[[var]] > max_val)]
    }
    
    if (length(vals) > 0) {
      cat("  Inconsistent values:\n")
      print(unique(vals))
      cat("  Count:", length(vals), "\n\n")
    } else {
      cat("  All values are consistent.\n\n")
    }
  } else {
    cat("Variable", var, "not found in dataset.\n\n")
  }
}
## Checking Y :
##   All values are consistent.
## 
## Checking X1 :
##   All values are consistent.
## 
## Checking X2 :
##   All values are consistent.
## 
## Checking X3 :
##   All values are consistent.
# Identifikasi variabel numerik
num_vars <- sapply(bigdata, is.numeric)

# Definisikan aturan range untuk tiap variabel
rules <- list(
  "X4" = c(0, 532)
)

# Loop cek konsistensi berdasarkan aturan
for (var in names(rules)) {
  if (var %in% names(bigdata)) {
    cat("Checking", var, ":\n")
    min_val <- rules[[var]][1]
    max_val <- rules[[var]][2]
    
    # Ambil nilai yang tidak NA dan tidak dalam range
    if (is.infinite(max_val)) {
      vals <- bigdata[[var]][!is.na(bigdata[[var]]) & bigdata[[var]] <= min_val]
    } else {
      vals <- bigdata[[var]][!is.na(bigdata[[var]]) & (bigdata[[var]] < min_val | bigdata[[var]] > max_val)]
    }
    
    if (length(vals) > 0) {
      cat("  Inconsistent values:\n")
      print(unique(vals))
      cat("  Count:", length(vals), "\n\n")
    } else {
      cat("  All values are consistent.\n\n")
    }
  } else {
    cat("Variable", var, "not found in dataset.\n\n")
  }
}
## Checking X4 :
##   All values are consistent.

Data Validation

library(validate)
## 
## Attaching package: 'validate'
## The following objects are masked from 'package:terra':
## 
##     cells, compare, meta, origin, origin<-, values
## The following object is masked from 'package:dplyr':
## 
##     expr
val_dataset<-validator( "id"= is_unique(id)==T,
                "kode_prov" = substr(id, 1, 2) == kode_prov,
                "X1"= X1 >=0 & X1<=100,
                "X2"= X2 >=0 & X2<=15,
                "X3"= X3 >0
                )

val_bigdata<-validator( "idkab"= is_unique(idkab)==T,
                "X4"= X4 >=0 & X4<=52
                )
data_val_dataset <- confront(dataset, val_dataset, key  ="id")
data_val_bigdata <- confront(bigdata, val_bigdata, key  ="idkab")
summary(data_val_dataset)
##        name items passes fails nNA error warning
## 1        id   119    119     0   0 FALSE   FALSE
## 2 kode_prov   119    119     0   0 FALSE   FALSE
## 3        X1   119    119     0   0 FALSE   FALSE
## 4        X2   119    119     0   0 FALSE   FALSE
## 5        X3   119    119     0   0 FALSE   FALSE
##                             expression
## 1                   is_unique(id) == T
## 2        substr(id, 1, 2) == kode_prov
## 3 X1 - 0 >= -1e-08 & X1 - 100 <= 1e-08
## 4  X2 - 0 >= -1e-08 & X2 - 15 <= 1e-08
## 5                               X3 > 0
summary(data_val_bigdata)
##    name items passes fails nNA error warning
## 1 idkab   119    119     0   0 FALSE   FALSE
## 2    X4   119    119     0   0 FALSE   FALSE
##                            expression
## 1               is_unique(idkab) == T
## 2 X4 - 0 >= -1e-08 & X4 - 52 <= 1e-08
plot(data_val_dataset)

plot(data_val_bigdata)

Tidak ada yang terlanggar pada data cleaning

Data Integration

# Misal, bigdata memiliki kolom "idkab" dan "X4"
# dataset memiliki kolom "id"

# Gabungkan kedua data frame berdasarkan ID yang sesuai
int_data <- merge(dataset, bigdata, by.x = "id", by.y = "idkab", all.x = TRUE)

# Cek hasil
head(int_data)
##     id kode_prov     Y   X1    X2    X3   fid      nmprov            nmkab
## 1 3101        31 13.03 7.93  9.26 14110 25624 DKI JAKARTA KEPULAUAN SERIBU
## 2 3171        31  3.03 5.22 11.95 25573 25630 DKI JAKARTA  JAKARTA SELATAN
## 3 3172        31  4.09 6.95 11.99 19193 25695 DKI JAKARTA    JAKARTA TIMUR
## 4 3173        31  4.63 6.24 11.61 18661 25760 DKI JAKARTA    JAKARTA PUSAT
## 5 3174        31  3.94 6.18 11.24 22119 25804 DKI JAKARTA    JAKARTA BARAT
## 6 3175        31  6.44 6.18 10.85 20032 25860 DKI JAKARTA    JAKARTA UTARA
##   kdprov kdkab sumber periode                       geometry         X4
## 1     31    01    BPS  2020_1 MULTIPOLYGON (((106.4881 -5...  0.6328887
## 2     31    71    BPS  2020_1 POLYGON ((106.7374 -6.22393... 35.1112022
## 3     31    72    BPS  2020_1 POLYGON ((106.849 -6.207551... 29.9406319
## 4     31    73    BPS  2020_1 POLYGON ((106.8213 -6.13687... 51.7937012
## 5     31    74    BPS  2020_1 POLYGON ((106.6873 -6.09743... 35.4558067
## 6     31    75    BPS  2020_1 POLYGON ((106.9698 -6.09262... 33.4573936
str(int_data)
## 'data.frame':    119 obs. of  15 variables:
##  $ id       : chr  "3101" "3171" "3172" "3173" ...
##  $ kode_prov: chr  "31" "31" "31" "31" ...
##  $ Y        : num  13.03 3.03 4.09 4.63 3.94 ...
##  $ X1       : num  7.93 5.22 6.95 6.24 6.18 6.18 7.34 7.11 5.99 6.36 ...
##  $ X2       : num  9.26 11.95 11.99 11.61 11.24 ...
##  $ X3       : num  14110 25573 19193 18661 22119 ...
##  $ fid      : num  25624 25630 25695 25760 25804 ...
##  $ nmprov   : chr  "DKI JAKARTA" "DKI JAKARTA" "DKI JAKARTA" "DKI JAKARTA" ...
##  $ nmkab    : chr  "KEPULAUAN SERIBU" "JAKARTA SELATAN" "JAKARTA TIMUR" "JAKARTA PUSAT" ...
##  $ kdprov   : chr  "31" "31" "31" "31" ...
##  $ kdkab    : chr  "01" "71" "72" "73" ...
##  $ sumber   : chr  "BPS" "BPS" "BPS" "BPS" ...
##  $ periode  : chr  "2020_1" "2020_1" "2020_1" "2020_1" ...
##  $ geometry :sfc_GEOMETRY of length 119; first list element: List of 27
##   ..$ :List of 1
##   .. ..$ : num [1:7, 1:2] 106 106 106 106 106 ...
##   ..$ :List of 1
##   .. ..$ : num [1:4, 1:2] 106.49 106.49 106.49 106.49 -5.42 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:4, 1:2] 106.53 106.53 106.53 106.53 -5.57 ...
##   ..$ :List of 1
##   .. ..$ : num [1:4, 1:2] 106.5 106.5 106.5 106.5 -5.8 ...
##   ..$ :List of 1
##   .. ..$ : num [1:6, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:8, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:7, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:8, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:4, 1:2] 106.58 106.58 106.58 106.58 -5.63 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:7, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:5, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:7, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:6, 1:2] 107 107 107 107 107 ...
##   ..$ :List of 1
##   .. ..$ : num [1:6, 1:2] 107 107 107 107 107 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  $ X4       : num  0.633 35.111 29.941 51.794 35.456 ...

Data Redcution

reduc_data<-int_data[,c("Y","X1","X2","X3","X4")]
head(reduc_data)
##       Y   X1    X2    X3         X4
## 1 13.03 7.93  9.26 14110  0.6328887
## 2  3.03 5.22 11.95 25573 35.1112022
## 3  4.09 6.95 11.99 19193 29.9406319
## 4  4.63 6.24 11.61 18661 51.7937012
## 5  3.94 6.18 11.24 22119 35.4558067
## 6  6.44 6.18 10.85 20032 33.4573936
str(reduc_data)
## 'data.frame':    119 obs. of  5 variables:
##  $ Y : num  13.03 3.03 4.09 4.63 3.94 ...
##  $ X1: num  7.93 5.22 6.95 6.24 6.18 6.18 7.34 7.11 5.99 6.36 ...
##  $ X2: num  9.26 11.95 11.99 11.61 11.24 ...
##  $ X3: num  14110 25573 19193 18661 22119 ...
##  $ X4: num  0.633 35.111 29.941 51.794 35.456 ...

Data Transformation

trans_data<-reduc_data%>%
  mutate(X3=log(X3))
head(trans_data)
##       Y   X1    X2        X3         X4
## 1 13.03 7.93  9.26  9.554639  0.6328887
## 2  3.03 5.22 11.95 10.149292 35.1112022
## 3  4.09 6.95 11.99  9.862301 29.9406319
## 4  4.63 6.24 11.61  9.834191 51.7937012
## 5  3.94 6.18 11.24 10.004192 35.4558067
## 6  6.44 6.18 10.85  9.905086 33.4573936