Summary of the lesson

  1. R is Case-Sensitive,
  1. You will need to use install.packages() to install any packages that are not already downloaded onto your machine. You then load the package into your workspace using the library() function:

R Programming Basic

  1. Vectors, Speaking of objects, R objects can take on one of five classes:
  • character <- c(“andi”, “albert”, “alb3r7”)
  • numeric <- c(-1, 1, 2, 3/4, 0.5)
  • integer <- c(1L, 2L)
  • complex <- c(1+3i, (1+3i)*2)
  • logical <- c(TRUE, TRUE, FALSE)
  • E.g:
#contoh membuat vector dan memberikan label nama 
vector1 <- c("andi", "albert", "alb3r7")
names(vector1) <- c("Nama1","Nama2","Nama3")
#cara print/ melihat vector
print (vector1)
##    Nama1    Nama2    Nama3 
##   "andi" "albert" "alb3r7"
#melihat class vector dan panjang/jumlah
class (vector1)
## [1] "character"
length(vector1)
## [1] 3
#mengubah kelas vector "as.xxxx", as.logical, as.character, dll
  1. Matriks

When we create a vector and give it a dimension attribute, we end up with a matrix:

#contoh buat matriks, munculn dan melihat dimensi nya
matrix1 <- matrix(11:16, nrow=3, ncol=2)
matrix1
##      [,1] [,2]
## [1,]   11   14
## [2,]   12   15
## [3,]   13   16
dim(matrix1)
## [1] 3 2
# matrix [1,] =  row semuanya pada kolom 1
matrix1 [1,]
## [1] 11 14
# matrix [,1] = kolom semuanya pada row 1
matrix1 [,1]
## [1] 11 12 13
accounts <- c("alfa", "indo", "asia")
sales <- c(400,320,380)
returns <- c(0,0,480)
netsales <- sales - returns
# cbind = bind as columns
# rbind = bind as rows
# rbind(accounts, sales, returns)

sales_records <- cbind(accounts, sales, netsales)
sales_records
##      accounts sales netsales
## [1,] "alfa"   "400" "400"   
## [2,] "indo"   "320" "320"   
## [3,] "asia"   "380" "-100"
sales_records <- rbind(accounts, sales, netsales)
sales_records
##          [,1]   [,2]   [,3]  
## accounts "alfa" "indo" "asia"
## sales    "400"  "320"  "380" 
## netsales "400"  "320"  "-100"
  1. List
#bisa memuat vector dengan class yang berbeda beda
our.list <- list(TRUE, "TRUE", c(1,6,12), 1+5i)
our.list
## [[1]]
## [1] TRUE
## 
## [[2]]
## [1] "TRUE"
## 
## [[3]]
## [1]  1  6 12
## 
## [[4]]
## [1] 1+5i
  1. Factor suatu kelas data yang memiliki pengaruh atau menentukan keputusan dalam suatu rangkaian data, contoh kredit (kolek, jenis usaha, lama usaha, dll).

  2. Data frame Data frames can be thought of as a special case of lists where every element of the list has to have the same length. Each element of the list can be thought of as a column in the data frame.

categories_df <- data.frame(categories=c("OfficeSupplies", "Computers", "Packaging", "Machinery", "Building"), category_id=111:115)
categories_df
##       categories category_id
## 1 OfficeSupplies         111
## 2      Computers         112
## 3      Packaging         113
## 4      Machinery         114
## 5       Building         115
categories_df$category_id + 1
## [1] 112 113 114 115 116

Programing with DATA Set (Dive DEEPER)

  1. analisa data awal
  • First make sure the data you’ll like to work with is also in your current directory, and use the read.csv() to read our csv file into your global environment, For Example:
library(readxl)
debitur <- read_xlsx("C:/Users/User/Desktop/Albert Analitical/data tugas 1.xlsx")
# data tersebut adalah contoh data2 debitur bca yang telah disamarkan nama, cis, jmlh plafond, dll
names (debitur) #untuk melihat judul data
##  [1] "INT_RATE_FINAL"     "OS_AMOUNT_IDR"      "PLAFOND_AMOUNT_IDR"
##  [4] "FAC_DSC"            "CIS"                "BCA_COM_SECTOR_CD" 
##  [7] "CORE_BUSINESS"      "MARITAL_STATUS_CD"  "KAT"               
## [10] "BIZ_CITY_DESC"      "BIZ_ZIP"            "BCA_DEBTOR_SINCE"  
## [13] "BCA_CUST_SINCE"     "TGL_MEMO"           "BRANCH_CD"         
## [16] "TGL_KEPUTUSAN"      "IS_OWNER_GUARANTEE" "APP_TYPE_CD"       
## [19] "JENIS_MEMO"
str(debitur) #untuk melihat struktur data
## Classes 'tbl_df', 'tbl' and 'data.frame':    999 obs. of  19 variables:
##  $ INT_RATE_FINAL    : num  12.5 13 11 12.2 12 ...
##  $ OS_AMOUNT_IDR     : num  6.96e+08 5.65e+08 9.30e+08 0.00 1.80e+09 ...
##  $ PLAFOND_AMOUNT_IDR: num  7.0e+08 8.0e+08 9.5e+08 1.0e+09 4.5e+09 ...
##  $ FAC_DSC           : chr  "Kredit Lokal" "Kredit Lokal" "Kredit Modal Kerja" "Kredit Modal Kerja" ...
##  $ CIS               : num  2.13e+10 2.13e+10 2.13e+10 2.14e+10 2.14e+10 ...
##  $ BCA_COM_SECTOR_CD : chr  "33" "33" "32" "17" ...
##  $ CORE_BUSINESS     : chr  "Perdagangan Keramik" "Perdagangan Keramik" "Jasa konstruksi" "Industri Sparepart Mobil (Produk PVC)" ...
##  $ MARITAL_STATUS_CD : chr  "001" "001" "X" "X" ...
##  $ KAT               : chr  "S" "S" "S" "M" ...
##  $ BIZ_CITY_DESC     : chr  "KAB. SUKOHARJO" "KAB. SUKOHARJO" "KAB. JEMBER" "KAB. TANGERANG" ...
##  $ BIZ_ZIP           : chr  "57552" "57552" "68135" "15560" ...
##  $ BCA_DEBTOR_SINCE  : chr  "0" "0" "0" "0" ...
##  $ BCA_CUST_SINCE    : chr  "08/02/2012" "08/02/2012" "15/12/2016" "13/09/1991" ...
##  $ TGL_MEMO          : POSIXct, format: "2017-09-06" "2017-09-06" ...
##  $ BRANCH_CD         : chr  "7850" "7850" "0024" "0971" ...
##  $ TGL_KEPUTUSAN     : POSIXct, format: "2017-10-25" "2017-10-25" ...
##  $ IS_OWNER_GUARANTEE: chr  "N" "N" "N" "N" ...
##  $ APP_TYPE_CD       : chr  "SME" "SME" "SME" "KOMERSIAL" ...
##  $ JENIS_MEMO        : chr  "Baru" "Baru" "Baru" "Baru" ...
nrow(debitur) #untuk melihat jumlah row
## [1] 999
ncol(debitur) #untuk melihat jumlah coloum
## [1] 19
head (debitur) #untuk melihat 6 data teratas
## # A tibble: 6 x 19
##   INT_RATE_FINAL OS_AMOUNT_IDR PLAFOND_AMOUNT_~ FAC_DSC     CIS
##            <dbl>         <dbl>            <dbl> <chr>     <dbl>
## 1           12.5     695980423        700000000 Kredit~ 2.13e10
## 2           13       564533594        800000000 Kredit~ 2.13e10
## 3           11       930399221        950000000 Kredit~ 2.13e10
## 4           12.2             0       1000000000 Kredit~ 2.14e10
## 5           12      1802685958       4500000000 Kredit~ 2.14e10
## 6           12      2550000000       2550000000 Kredit~ 2.14e10
## # ... with 14 more variables: BCA_COM_SECTOR_CD <chr>,
## #   CORE_BUSINESS <chr>, MARITAL_STATUS_CD <chr>, KAT <chr>,
## #   BIZ_CITY_DESC <chr>, BIZ_ZIP <chr>, BCA_DEBTOR_SINCE <chr>,
## #   BCA_CUST_SINCE <chr>, TGL_MEMO <dttm>, BRANCH_CD <chr>,
## #   TGL_KEPUTUSAN <dttm>, IS_OWNER_GUARANTEE <chr>, APP_TYPE_CD <chr>,
## #   JENIS_MEMO <chr>
tail (debitur) #untuk melihat 6 data terbawah
## # A tibble: 6 x 19
##   INT_RATE_FINAL OS_AMOUNT_IDR PLAFOND_AMOUNT_~ FAC_DSC     CIS
##            <dbl>         <dbl>            <dbl> <chr>     <dbl>
## 1           13.8    882999999         890000000 Kredit~ 2.13e10
## 2           13.5   1374558701        2000000000 Kredit~ 2.14e10
## 3            9       13888269          13888269 KUR - ~ 2.13e10
## 4           12     2304716610        3000000000 Kredit~ 2.14e10
## 5           12.8   1561858776        1561858776 Instal~ 2.14e10
## 6           13      926044238.        930000000 Kredit~ 2.14e10
## # ... with 14 more variables: BCA_COM_SECTOR_CD <chr>,
## #   CORE_BUSINESS <chr>, MARITAL_STATUS_CD <chr>, KAT <chr>,
## #   BIZ_CITY_DESC <chr>, BIZ_ZIP <chr>, BCA_DEBTOR_SINCE <chr>,
## #   BCA_CUST_SINCE <chr>, TGL_MEMO <dttm>, BRANCH_CD <chr>,
## #   TGL_KEPUTUSAN <dttm>, IS_OWNER_GUARANTEE <chr>, APP_TYPE_CD <chr>,
## #   JENIS_MEMO <chr>
head (debitur,10)#untuk melihat data teratas, (x) sesuai jumlah yang kita inginkan
## # A tibble: 10 x 19
##    INT_RATE_FINAL OS_AMOUNT_IDR PLAFOND_AMOUNT_~ FAC_DSC     CIS
##             <dbl>         <dbl>            <dbl> <chr>     <dbl>
##  1           12.5     695980423        700000000 Kredit~ 2.13e10
##  2           13       564533594        800000000 Kredit~ 2.13e10
##  3           11       930399221        950000000 Kredit~ 2.13e10
##  4           12.2             0       1000000000 Kredit~ 2.14e10
##  5           12      1802685958       4500000000 Kredit~ 2.14e10
##  6           12      2550000000       2550000000 Kredit~ 2.14e10
##  7           12      3765177768       3765177768 Kredit~ 2.14e10
##  8           12      1275000000       1275000000 Kredit~ 2.14e10
##  9           13.5    2972665707       3000000000 Kredit~ 2.13e10
## 10           13      1248946207       1248946207 Kredit~ 2.13e10
## # ... with 14 more variables: BCA_COM_SECTOR_CD <chr>,
## #   CORE_BUSINESS <chr>, MARITAL_STATUS_CD <chr>, KAT <chr>,
## #   BIZ_CITY_DESC <chr>, BIZ_ZIP <chr>, BCA_DEBTOR_SINCE <chr>,
## #   BCA_CUST_SINCE <chr>, TGL_MEMO <dttm>, BRANCH_CD <chr>,
## #   TGL_KEPUTUSAN <dttm>, IS_OWNER_GUARANTEE <chr>, APP_TYPE_CD <chr>,
## #   JENIS_MEMO <chr>
  1. Pengolahan data lanjutan untuk mengubah kelas dari sebuah data serta jika dia berupa tanggal, harus disesuaikan format tanggalnya,
  2. Konversi kolom as.Date() # %d (date) # %m month (number ) # %b month (abbreviated) # %B month (full name) # %y year ( 2 digit) # %Y year (4 digit)

contoh:

debitur$TGL_MEMO.baru <- as.Date(debitur$TGL_MEMO, "%m/%d/%Y")
## Warning in as.POSIXlt.POSIXct(x, tz = tz): unknown timezone '%m/%d/%Y'
#untuk mengambil bulan saja, to get months on date type.
debitur$TGL_MEMO.bulan<-months(debitur$TGL_MEMO)
head (debitur)
## # A tibble: 6 x 21
##   INT_RATE_FINAL OS_AMOUNT_IDR PLAFOND_AMOUNT_~ FAC_DSC     CIS
##            <dbl>         <dbl>            <dbl> <chr>     <dbl>
## 1           12.5     695980423        700000000 Kredit~ 2.13e10
## 2           13       564533594        800000000 Kredit~ 2.13e10
## 3           11       930399221        950000000 Kredit~ 2.13e10
## 4           12.2             0       1000000000 Kredit~ 2.14e10
## 5           12      1802685958       4500000000 Kredit~ 2.14e10
## 6           12      2550000000       2550000000 Kredit~ 2.14e10
## # ... with 16 more variables: BCA_COM_SECTOR_CD <chr>,
## #   CORE_BUSINESS <chr>, MARITAL_STATUS_CD <chr>, KAT <chr>,
## #   BIZ_CITY_DESC <chr>, BIZ_ZIP <chr>, BCA_DEBTOR_SINCE <chr>,
## #   BCA_CUST_SINCE <chr>, TGL_MEMO <dttm>, BRANCH_CD <chr>,
## #   TGL_KEPUTUSAN <dttm>, IS_OWNER_GUARANTEE <chr>, APP_TYPE_CD <chr>,
## #   JENIS_MEMO <chr>, TGL_MEMO.baru <date>, TGL_MEMO.bulan <chr>

Programing with DATA Set (using table, xtabs, aggregate)

#table digunakan untuk membuat conditional subsetting, tapi hanya 2 kriteria, contoh menampilkan seluruh jumlah pengolahan (jenis memo) kategori K :
debitur.K <- debitur [debitur$KAT == "K",]
table (debitur.K$KAT, debitur.K$JENIS_MEMO)
##    
##     Baru Penambahan Pengurangan Penukaran Perpanjangan
##   K   82          2           1         2            1
# menampilkan persentase debitur Baru pada kategori K
prop.table (table(debitur.K$KAT, debitur.K$JENIS_MEMO))*100
##    
##          Baru Penambahan Pengurangan Penukaran Perpanjangan
##   K 93.181818   2.272727    1.136364  2.272727     1.136364
#xtabs digunakan untuk membuat conditional subsetting lebih dari 2), contoh menampilkan amount KAtegori debitur berdasarkan jenis memo
xtabs(OS_AMOUNT_IDR ~ KAT + JENIS_MEMO,debitur)
##    JENIS_MEMO
## KAT         Baru    Lain-lain   Penambahan  Pengurangan    Penukaran
##   K 3.131867e+10 0.000000e+00 7.182619e+08 1.687500e+08 1.106548e+09
##   M 2.883192e+11 0.000000e+00 1.011126e+11 0.000000e+00 0.000000e+00
##   S 1.242954e+12 1.012833e+09 1.543201e+11 4.995590e+08 3.500000e+08
##    JENIS_MEMO
## KAT Perpanjangan Perpanjangan - BS
##   K 2.500000e+08      0.000000e+00
##   M 1.434326e+09      0.000000e+00
##   S 2.222660e+10      1.221180e+09
xtabs(OS_AMOUNT_IDR ~ KAT + MARITAL_STATUS_CD,debitur [debitur$JENIS_MEMO=="Baru",])
##    MARITAL_STATUS_CD
## KAT          001          002          003          005            X
##   K  22285235361    852107087    887410344            0   7293921096
##   M  65593125146  33601167874            0   6558922602 182565987740
##   S 412823089215  22949583045  19940032915  29075041306 758166002255
#aggregate digunakan untuk membuat conditional subsetting lebih dari 2 dan bisa diberi function ato kriteria lain.
aggregate(OS_AMOUNT_IDR ~ KAT  + JENIS_MEMO,debitur,sum)
##    KAT        JENIS_MEMO OS_AMOUNT_IDR
## 1    K              Baru  3.131867e+10
## 2    M              Baru  2.883192e+11
## 3    S              Baru  1.242954e+12
## 4    S         Lain-lain  1.012833e+09
## 5    K        Penambahan  7.182619e+08
## 6    M        Penambahan  1.011126e+11
## 7    S        Penambahan  1.543201e+11
## 8    K       Pengurangan  1.687500e+08
## 9    S       Pengurangan  4.995590e+08
## 10   K         Penukaran  1.106548e+09
## 11   S         Penukaran  3.500000e+08
## 12   K      Perpanjangan  2.500000e+08
## 13   M      Perpanjangan  1.434326e+09
## 14   S      Perpanjangan  2.222660e+10
## 15   S Perpanjangan - BS  1.221180e+09
#untuk membuat cross-tabulation 
plot(xtabs(OS_AMOUNT_IDR ~ KAT + JENIS_MEMO,debitur))

#untuk membuat cross-tabulation heat maps, Colv untuk diagram kolom, Rowv untuk diagram Row, cexCol untuk ukurantulisan kolom
heatmap (xtabs(OS_AMOUNT_IDR ~ KAT + JENIS_MEMO,debitur),Colv = NA, Rowv = NA, cexCol = 1,cexRow=1,scale = "column")