setwd("D:/SEMESTER 3/Regresi Terapan/UAS")
df <- read.csv("mobil.csv", sep = ",")
head(df)
## jenis_bahan_bakar banyak_pintu body_mobil panjang_mobil lebar_mobil
## 1 bensin dua convertible 168.8 64.1
## 2 bensin dua convertible 168.8 64.1
## 3 bensin dua hatchback 171.2 65.5
## 4 bensin empat sedan 176.6 66.2
## 5 bensin empat sedan 176.6 66.4
## 6 bensin dua sedan 177.3 66.3
## tinggi_mobil berat_moil compressionratio horsepower highwaympg harga
## 1 48.8 2548 9.0 111 27 13495
## 2 48.8 2548 9.0 111 27 16500
## 3 52.4 2823 9.0 154 26 16500
## 4 54.3 2337 10.0 102 30 13950
## 5 54.3 2824 8.0 115 22 17450
## 6 53.1 2507 8.5 110 25 15250
data <- data.frame(
jenis_bahan_bakar = df$jenis_bahan_bakar,
body_mobil = df$body_mobil,
banyak_pintu = df$banyak_pintu,
panjang_mobil = df$panjang_mobil,
lebar_mobil = df$lebar_mobil,
tinggi_mobil = df$tinggi_mobil,
berat_mobil = df$berat_moil,
compras= df$compressionratio,
horsepower= df$horsepower,
mpg= df$highwaympg,
harga = df$harga
)
data_dummies <- cbind(
model.matrix(~ jenis_bahan_bakar + body_mobil +banyak_pintu - 1, data),
data[, c("panjang_mobil", "lebar_mobil", "tinggi_mobil", "berat_mobil","compras","horsepower","mpg", "harga"), drop = FALSE]
)
data_dummies
## jenis_bahan_bakarbensin jenis_bahan_bakarsolar body_mobilhardtop
## 1 1 0 0
## 2 1 0 0
## 3 1 0 0
## 4 1 0 0
## 5 1 0 0
## 6 1 0 0
## 7 1 0 0
## 8 1 0 0
## 9 1 0 0
## 10 1 0 0
## 11 1 0 0
## 12 1 0 0
## 13 1 0 0
## 14 1 0 0
## 15 1 0 0
## 16 1 0 0
## 17 1 0 0
## 18 1 0 0
## 19 1 0 0
## 20 1 0 0
## 21 1 0 0
## 22 1 0 0
## 23 1 0 0
## 24 1 0 0
## 25 1 0 0
## 26 1 0 0
## 27 1 0 0
## 28 1 0 0
## 29 1 0 0
## 30 1 0 0
## 31 1 0 0
## 32 1 0 0
## 33 1 0 0
## 34 1 0 0
## 35 1 0 0
## 36 1 0 0
## 37 1 0 0
## 38 1 0 0
## 39 1 0 0
## 40 1 0 0
## 41 1 0 0
## 42 1 0 0
## 43 1 0 0
## 44 1 0 0
## 45 1 0 0
## 46 1 0 0
## 47 1 0 0
## 48 1 0 0
## 49 1 0 0
## 50 1 0 0
## 51 1 0 0
## 52 1 0 0
## 53 1 0 0
## 54 1 0 0
## 55 1 0 0
## 56 1 0 0
## 57 1 0 0
## 58 1 0 0
## 59 1 0 0
## 60 1 0 0
## 61 1 0 0
## 62 1 0 0
## 63 1 0 0
## 64 0 1 0
## 65 1 0 0
## 66 1 0 0
## 67 0 1 0
## 68 0 1 0
## 69 0 1 0
## 70 0 1 1
## 71 0 1 0
## 72 1 0 0
## 73 1 0 0
## 74 1 0 0
## 75 1 0 1
## 76 1 0 0
## 77 1 0 0
## 78 1 0 0
## 79 1 0 0
## 80 1 0 0
## 81 1 0 0
## 82 1 0 0
## 83 1 0 0
## 84 1 0 0
## 85 1 0 0
## 86 1 0 0
## 87 1 0 0
## 88 1 0 0
## 89 1 0 0
## 90 1 0 0
## 91 0 1 0
## 92 1 0 0
## 93 1 0 0
## 94 1 0 0
## 95 1 0 0
## 96 1 0 0
## 97 1 0 0
## 98 1 0 0
## 99 1 0 1
## 100 1 0 0
## 101 1 0 0
## 102 1 0 0
## 103 1 0 0
## 104 1 0 0
## 105 1 0 0
## 106 1 0 0
## 107 1 0 0
## 108 1 0 0
## 109 0 1 0
## 110 1 0 0
## 111 0 1 0
## 112 1 0 0
## 113 0 1 0
## 114 1 0 0
## 115 0 1 0
## 116 1 0 0
## 117 0 1 0
## 118 1 0 0
## 119 1 0 0
## 120 1 0 0
## 121 1 0 0
## 122 1 0 0
## 123 1 0 0
## 124 1 0 0
## 125 1 0 0
## 126 1 0 0
## 127 1 0 1
## 128 1 0 1
## 129 1 0 0
## 130 1 0 0
## 131 1 0 0
## 132 1 0 0
## 133 1 0 0
## 134 1 0 0
## 135 1 0 0
## 136 1 0 0
## 137 1 0 0
## 138 1 0 0
## 139 1 0 0
## 140 1 0 0
## 141 1 0 0
## 142 1 0 0
## 143 1 0 0
## 144 1 0 0
## 145 1 0 0
## 146 1 0 0
## 147 1 0 0
## 148 1 0 0
## 149 1 0 0
## 150 1 0 0
## 151 1 0 0
## 152 1 0 0
## 153 1 0 0
## 154 1 0 0
## 155 1 0 0
## 156 1 0 0
## 157 1 0 0
## 158 1 0 0
## 159 0 1 0
## 160 0 1 0
## 161 1 0 0
## 162 1 0 0
## 163 1 0 0
## 164 1 0 0
## 165 1 0 0
## 166 1 0 0
## 167 1 0 0
## 168 1 0 1
## 169 1 0 1
## 170 1 0 0
## 171 1 0 1
## 172 1 0 0
## 173 1 0 0
## 174 1 0 0
## 175 0 1 0
## 176 1 0 0
## 177 1 0 0
## 178 1 0 0
## 179 1 0 0
## 180 1 0 0
## 181 1 0 0
## 182 1 0 0
## 183 0 1 0
## 184 1 0 0
## 185 0 1 0
## 186 1 0 0
## 187 1 0 0
## 188 0 1 0
## 189 1 0 0
## 190 1 0 0
## 191 1 0 0
## 192 1 0 0
## 193 0 1 0
## 194 1 0 0
## 195 1 0 0
## 196 1 0 0
## 197 1 0 0
## 198 1 0 0
## 199 1 0 0
## 200 1 0 0
## 201 1 0 0
## 202 1 0 0
## 203 1 0 0
## 204 0 1 0
## 205 1 0 0
## body_mobilhatchback body_mobilsedan body_mobilwagon banyak_pintuempat
## 1 0 0 0 0
## 2 0 0 0 0
## 3 1 0 0 0
## 4 0 1 0 1
## 5 0 1 0 1
## 6 0 1 0 0
## 7 0 1 0 1
## 8 0 0 1 1
## 9 0 1 0 1
## 10 1 0 0 0
## 11 0 1 0 0
## 12 0 1 0 1
## 13 0 1 0 0
## 14 0 1 0 1
## 15 0 1 0 1
## 16 0 1 0 1
## 17 0 1 0 0
## 18 0 1 0 1
## 19 1 0 0 0
## 20 1 0 0 0
## 21 0 1 0 1
## 22 1 0 0 0
## 23 1 0 0 0
## 24 1 0 0 0
## 25 1 0 0 1
## 26 0 1 0 1
## 27 0 1 0 1
## 28 0 1 0 0
## 29 0 0 1 1
## 30 1 0 0 0
## 31 1 0 0 0
## 32 1 0 0 0
## 33 1 0 0 0
## 34 1 0 0 0
## 35 1 0 0 0
## 36 0 1 0 1
## 37 0 0 1 1
## 38 1 0 0 0
## 39 1 0 0 0
## 40 0 1 0 1
## 41 0 1 0 1
## 42 0 1 0 1
## 43 0 1 0 0
## 44 0 1 0 1
## 45 0 1 0 0
## 46 0 1 0 1
## 47 1 0 0 0
## 48 0 1 0 1
## 49 0 1 0 1
## 50 0 1 0 0
## 51 1 0 0 0
## 52 1 0 0 0
## 53 1 0 0 0
## 54 0 1 0 1
## 55 0 1 0 1
## 56 1 0 0 0
## 57 1 0 0 0
## 58 1 0 0 0
## 59 1 0 0 0
## 60 1 0 0 0
## 61 0 1 0 1
## 62 1 0 0 0
## 63 0 1 0 1
## 64 0 1 0 1
## 65 1 0 0 1
## 66 0 1 0 1
## 67 0 1 0 1
## 68 0 1 0 1
## 69 0 0 1 1
## 70 0 0 0 0
## 71 0 1 0 1
## 72 0 1 0 1
## 73 0 0 0 0
## 74 0 1 0 1
## 75 0 0 0 0
## 76 1 0 0 0
## 77 1 0 0 0
## 78 1 0 0 0
## 79 1 0 0 0
## 80 1 0 0 0
## 81 1 0 0 0
## 82 1 0 0 0
## 83 1 0 0 0
## 84 1 0 0 0
## 85 1 0 0 0
## 86 0 1 0 1
## 87 0 1 0 1
## 88 0 1 0 1
## 89 0 1 0 1
## 90 0 1 0 0
## 91 0 1 0 0
## 92 0 1 0 0
## 93 0 1 0 1
## 94 0 0 1 1
## 95 0 1 0 0
## 96 1 0 0 0
## 97 0 1 0 1
## 98 0 0 1 1
## 99 0 0 0 0
## 100 1 0 0 1
## 101 0 1 0 1
## 102 0 1 0 1
## 103 0 0 1 1
## 104 0 1 0 1
## 105 1 0 0 0
## 106 1 0 0 0
## 107 1 0 0 0
## 108 0 1 0 1
## 109 0 1 0 1
## 110 0 0 1 1
## 111 0 0 1 1
## 112 0 1 0 1
## 113 0 1 0 1
## 114 0 0 1 1
## 115 0 0 1 1
## 116 0 1 0 1
## 117 0 1 0 1
## 118 0 1 0 1
## 119 1 0 0 0
## 120 1 0 0 0
## 121 1 0 0 1
## 122 0 1 0 1
## 123 0 1 0 1
## 124 0 0 1 1
## 125 1 0 0 0
## 126 1 0 0 0
## 127 0 0 0 0
## 128 0 0 0 0
## 129 0 0 0 0
## 130 1 0 0 0
## 131 0 0 1 1
## 132 1 0 0 0
## 133 1 0 0 0
## 134 0 1 0 1
## 135 1 0 0 0
## 136 0 1 0 1
## 137 1 0 0 0
## 138 0 1 0 1
## 139 1 0 0 0
## 140 1 0 0 0
## 141 1 0 0 0
## 142 0 1 0 1
## 143 0 1 0 1
## 144 0 1 0 1
## 145 0 1 0 1
## 146 0 1 0 1
## 147 0 0 1 1
## 148 0 0 1 1
## 149 0 0 1 1
## 150 0 0 1 1
## 151 1 0 0 0
## 152 1 0 0 0
## 153 1 0 0 1
## 154 0 0 1 1
## 155 0 0 1 1
## 156 0 0 1 1
## 157 0 1 0 1
## 158 1 0 0 1
## 159 0 1 0 1
## 160 1 0 0 1
## 161 0 1 0 1
## 162 1 0 0 1
## 163 0 1 0 1
## 164 0 1 0 0
## 165 1 0 0 0
## 166 0 1 0 0
## 167 1 0 0 0
## 168 0 0 0 0
## 169 0 0 0 0
## 170 1 0 0 0
## 171 0 0 0 0
## 172 1 0 0 0
## 173 0 0 0 0
## 174 0 1 0 1
## 175 0 1 0 1
## 176 1 0 0 1
## 177 0 1 0 1
## 178 1 0 0 1
## 179 1 0 0 0
## 180 1 0 0 0
## 181 0 1 0 1
## 182 0 0 1 1
## 183 0 1 0 0
## 184 0 1 0 0
## 185 0 1 0 1
## 186 0 1 0 1
## 187 0 1 0 1
## 188 0 1 0 1
## 189 0 1 0 1
## 190 0 0 0 0
## 191 1 0 0 0
## 192 0 1 0 1
## 193 0 1 0 1
## 194 0 0 1 1
## 195 0 1 0 1
## 196 0 0 1 1
## 197 0 1 0 1
## 198 0 0 1 1
## 199 0 1 0 1
## 200 0 0 1 1
## 201 0 1 0 1
## 202 0 1 0 1
## 203 0 1 0 1
## 204 0 1 0 1
## 205 0 1 0 1
## panjang_mobil lebar_mobil tinggi_mobil berat_mobil compras horsepower mpg
## 1 168.8 64.1 48.8 2548 9.00 111 27
## 2 168.8 64.1 48.8 2548 9.00 111 27
## 3 171.2 65.5 52.4 2823 9.00 154 26
## 4 176.6 66.2 54.3 2337 10.00 102 30
## 5 176.6 66.4 54.3 2824 8.00 115 22
## 6 177.3 66.3 53.1 2507 8.50 110 25
## 7 192.7 71.4 55.7 2844 8.50 110 25
## 8 192.7 71.4 55.7 2954 8.50 110 25
## 9 192.7 71.4 55.9 3086 8.30 140 20
## 10 178.2 67.9 52.0 3053 7.00 160 22
## 11 176.8 64.8 54.3 2395 8.80 101 29
## 12 176.8 64.8 54.3 2395 8.80 101 29
## 13 176.8 64.8 54.3 2710 9.00 121 28
## 14 176.8 64.8 54.3 2765 9.00 121 28
## 15 189.0 66.9 55.7 3055 9.00 121 25
## 16 189.0 66.9 55.7 3230 8.00 182 22
## 17 193.8 67.9 53.7 3380 8.00 182 22
## 18 197.0 70.9 56.3 3505 8.00 182 20
## 19 141.1 60.3 53.2 1488 9.50 48 53
## 20 155.9 63.6 52.0 1874 9.60 70 43
## 21 158.8 63.6 52.0 1909 9.60 70 43
## 22 157.3 63.8 50.8 1876 9.41 68 41
## 23 157.3 63.8 50.8 1876 9.40 68 38
## 24 157.3 63.8 50.8 2128 7.60 102 30
## 25 157.3 63.8 50.6 1967 9.40 68 38
## 26 157.3 63.8 50.6 1989 9.40 68 38
## 27 157.3 63.8 50.6 1989 9.40 68 38
## 28 157.3 63.8 50.6 2191 7.60 102 30
## 29 174.6 64.6 59.8 2535 8.50 88 30
## 30 173.2 66.3 50.2 2811 7.00 145 24
## 31 144.6 63.9 50.8 1713 9.60 58 54
## 32 144.6 63.9 50.8 1819 9.20 76 38
## 33 150.0 64.0 52.6 1837 10.10 60 42
## 34 150.0 64.0 52.6 1940 9.20 76 34
## 35 150.0 64.0 52.6 1956 9.20 76 34
## 36 163.4 64.0 54.5 2010 9.20 76 34
## 37 157.1 63.9 58.3 2024 9.20 76 34
## 38 167.5 65.2 53.3 2236 9.00 86 33
## 39 167.5 65.2 53.3 2289 9.00 86 33
## 40 175.4 65.2 54.1 2304 9.00 86 33
## 41 175.4 62.5 54.1 2372 9.00 86 33
## 42 175.4 65.2 54.1 2465 9.00 101 28
## 43 169.1 66.0 51.0 2293 9.10 100 31
## 44 170.7 61.8 53.5 2337 8.50 78 29
## 45 155.9 63.6 52.0 1874 9.60 70 43
## 46 155.9 63.6 52.0 1909 9.60 70 43
## 47 172.6 65.2 51.4 2734 9.20 90 29
## 48 199.6 69.6 52.8 4066 8.10 176 19
## 49 199.6 69.6 52.8 4066 8.10 176 19
## 50 191.7 70.6 47.8 3950 11.50 262 17
## 51 159.1 64.2 54.1 1890 9.00 68 31
## 52 159.1 64.2 54.1 1900 9.00 68 38
## 53 159.1 64.2 54.1 1905 9.00 68 38
## 54 166.8 64.2 54.1 1945 9.00 68 38
## 55 166.8 64.2 54.1 1950 9.00 68 38
## 56 169.0 65.7 49.6 2380 9.40 101 23
## 57 169.0 65.7 49.6 2380 9.40 101 23
## 58 169.0 65.7 49.6 2385 9.40 101 23
## 59 169.0 65.7 49.6 2500 9.40 135 23
## 60 177.8 66.5 53.7 2385 8.60 84 32
## 61 177.8 66.5 55.5 2410 8.60 84 32
## 62 177.8 66.5 53.7 2385 8.60 84 32
## 63 177.8 66.5 55.5 2410 8.60 84 32
## 64 177.8 66.5 55.5 2443 22.70 64 42
## 65 177.8 66.5 55.5 2425 8.60 84 32
## 66 175.0 66.1 54.4 2670 8.00 120 27
## 67 175.0 66.1 54.4 2700 22.00 72 39
## 68 190.9 70.3 56.5 3515 21.50 123 25
## 69 190.9 70.3 58.7 3750 21.50 123 25
## 70 187.5 70.3 54.9 3495 21.50 123 25
## 71 202.6 71.7 56.3 3770 21.50 123 25
## 72 202.6 71.7 56.5 3740 8.30 155 18
## 73 180.3 70.5 50.8 3685 8.30 155 18
## 74 208.1 71.7 56.7 3900 8.00 184 16
## 75 199.2 72.0 55.4 3715 8.00 184 16
## 76 178.4 68.0 54.8 2910 8.00 175 24
## 77 157.3 64.4 50.8 1918 9.40 68 41
## 78 157.3 64.4 50.8 1944 9.40 68 38
## 79 157.3 64.4 50.8 2004 9.40 68 38
## 80 157.3 63.8 50.8 2145 7.60 102 30
## 81 173.0 65.4 49.4 2370 7.50 116 30
## 82 173.0 65.4 49.4 2328 8.50 88 32
## 83 173.2 66.3 50.2 2833 7.00 145 24
## 84 173.2 66.3 50.2 2921 7.00 145 24
## 85 173.2 66.3 50.2 2926 7.00 145 24
## 86 172.4 65.4 51.6 2365 8.50 88 32
## 87 172.4 65.4 51.6 2405 8.50 88 32
## 88 172.4 65.4 51.6 2403 7.50 116 30
## 89 172.4 65.4 51.6 2403 7.50 116 30
## 90 165.3 63.8 54.5 1889 9.40 69 37
## 91 165.3 63.8 54.5 2017 21.90 55 50
## 92 165.3 63.8 54.5 1918 9.40 69 37
## 93 165.3 63.8 54.5 1938 9.40 69 37
## 94 170.2 63.8 53.5 2024 9.40 69 37
## 95 165.3 63.8 54.5 1951 9.40 69 37
## 96 165.6 63.8 53.3 2028 9.40 69 37
## 97 165.3 63.8 54.5 1971 9.40 69 37
## 98 170.2 63.8 53.5 2037 9.40 69 37
## 99 162.4 63.8 53.3 2008 9.40 69 37
## 100 173.4 65.2 54.7 2324 8.50 97 34
## 101 173.4 65.2 54.7 2302 8.50 97 34
## 102 181.7 66.5 55.1 3095 9.00 152 22
## 103 184.6 66.5 56.1 3296 9.00 152 22
## 104 184.6 66.5 55.1 3060 9.00 152 25
## 105 170.7 67.9 49.7 3071 9.00 160 25
## 106 170.7 67.9 49.7 3139 7.80 200 23
## 107 178.5 67.9 49.7 3139 9.00 160 25
## 108 186.7 68.4 56.7 3020 8.40 97 24
## 109 186.7 68.4 56.7 3197 21.00 95 33
## 110 198.9 68.4 58.7 3230 8.40 97 24
## 111 198.9 68.4 58.7 3430 21.00 95 25
## 112 186.7 68.4 56.7 3075 8.40 95 24
## 113 186.7 68.4 56.7 3252 21.00 95 33
## 114 198.9 68.4 56.7 3285 8.40 95 24
## 115 198.9 68.4 58.7 3485 21.00 95 25
## 116 186.7 68.4 56.7 3075 8.40 97 24
## 117 186.7 68.4 56.7 3252 21.00 95 33
## 118 186.7 68.3 56.0 3130 7.00 142 24
## 119 157.3 63.8 50.8 1918 9.40 68 41
## 120 157.3 63.8 50.8 2128 7.60 102 30
## 121 157.3 63.8 50.6 1967 9.40 68 38
## 122 167.3 63.8 50.8 1989 9.40 68 38
## 123 167.3 63.8 50.8 2191 9.40 68 38
## 124 174.6 64.6 59.8 2535 8.50 88 30
## 125 173.2 66.3 50.2 2818 7.00 145 24
## 126 168.9 68.3 50.2 2778 9.50 143 27
## 127 168.9 65.0 51.6 2756 9.50 207 25
## 128 168.9 65.0 51.6 2756 9.50 207 25
## 129 168.9 65.0 51.6 2800 9.50 207 25
## 130 175.7 72.3 50.5 3366 10.00 288 28
## 131 181.5 66.5 55.2 2579 8.70 90 31
## 132 176.8 66.6 50.5 2460 8.70 90 31
## 133 186.6 66.5 56.1 2658 9.31 110 28
## 134 186.6 66.5 56.1 2695 9.30 110 28
## 135 186.6 66.5 56.1 2707 9.30 110 28
## 136 186.6 66.5 56.1 2758 9.30 110 28
## 137 186.6 66.5 56.1 2808 9.00 160 26
## 138 186.6 66.5 56.1 2847 9.00 160 26
## 139 156.9 63.4 53.7 2050 9.00 69 36
## 140 157.9 63.6 53.7 2120 8.70 73 31
## 141 157.3 63.8 55.7 2240 8.70 73 31
## 142 172.0 65.4 52.5 2145 9.50 82 37
## 143 172.0 65.4 52.5 2190 9.50 82 33
## 144 172.0 65.4 52.5 2340 9.00 94 32
## 145 172.0 65.4 54.3 2385 9.00 82 25
## 146 172.0 65.4 54.3 2510 7.70 111 29
## 147 173.5 65.4 53.0 2290 9.00 82 32
## 148 173.5 65.4 53.0 2455 9.00 94 31
## 149 173.6 65.4 54.9 2420 9.00 82 29
## 150 173.6 65.4 54.9 2650 7.70 111 23
## 151 158.7 63.6 54.5 1985 9.00 62 39
## 152 158.7 63.6 54.5 2040 9.00 62 38
## 153 158.7 63.6 54.5 2015 9.00 62 38
## 154 169.7 63.6 59.1 2280 9.00 62 37
## 155 169.7 63.6 59.1 2290 9.00 62 32
## 156 169.7 63.6 59.1 3110 9.00 62 32
## 157 166.3 64.4 53.0 2081 9.00 70 37
## 158 166.3 64.4 52.8 2109 9.00 70 37
## 159 166.3 64.4 53.0 2275 22.50 56 36
## 160 166.3 64.4 52.8 2275 22.50 56 47
## 161 166.3 64.4 53.0 2094 9.00 70 47
## 162 166.3 64.4 52.8 2122 9.00 70 34
## 163 166.3 64.4 52.8 2140 9.00 70 34
## 164 168.7 64.0 52.6 2169 9.00 70 34
## 165 168.7 64.0 52.6 2204 9.00 70 34
## 166 168.7 64.0 52.6 2265 9.40 112 29
## 167 168.7 64.0 52.6 2300 9.40 112 29
## 168 176.2 65.6 52.0 2540 9.30 116 30
## 169 176.2 65.6 52.0 2536 9.30 116 30
## 170 176.2 65.6 52.0 2551 9.30 116 30
## 171 176.2 65.6 52.0 2679 9.30 116 30
## 172 176.2 65.6 52.0 2714 9.30 116 30
## 173 176.2 65.6 53.0 2975 9.30 116 30
## 174 175.6 66.5 54.9 2326 8.70 92 34
## 175 175.6 66.5 54.9 2480 22.50 73 33
## 176 175.6 66.5 53.9 2414 8.70 92 32
## 177 175.6 66.5 54.9 2414 8.70 92 32
## 178 175.6 66.5 53.9 2458 8.70 92 32
## 179 183.5 67.7 52.0 2976 9.30 161 24
## 180 183.5 67.7 52.0 3016 9.30 161 24
## 181 187.8 66.5 54.1 3131 9.20 156 24
## 182 187.8 66.5 54.1 3151 9.20 156 24
## 183 171.7 65.5 55.7 2261 23.00 52 46
## 184 171.7 65.5 55.7 2209 9.00 85 34
## 185 171.7 65.5 55.7 2264 23.00 52 46
## 186 171.7 65.5 55.7 2212 9.00 85 34
## 187 171.7 65.5 55.7 2275 9.00 85 34
## 188 171.7 65.5 55.7 2319 23.00 68 42
## 189 171.7 65.5 55.7 2300 10.00 100 32
## 190 159.3 64.2 55.6 2254 8.50 90 29
## 191 165.7 64.0 51.4 2221 8.50 90 29
## 192 180.2 66.9 55.1 2661 8.50 110 24
## 193 180.2 66.9 55.1 2579 23.00 68 38
## 194 183.1 66.9 55.1 2563 9.00 88 31
## 195 188.8 67.2 56.2 2912 9.50 114 28
## 196 188.8 67.2 57.5 3034 9.50 114 28
## 197 188.8 67.2 56.2 2935 9.50 114 28
## 198 188.8 67.2 57.5 3042 9.50 114 28
## 199 188.8 67.2 56.2 3045 7.50 162 22
## 200 188.8 67.2 57.5 3157 7.50 162 22
## 201 188.8 68.9 55.5 2952 9.50 114 28
## 202 188.8 68.8 55.5 3049 8.70 160 25
## 203 188.8 68.9 55.5 3012 8.80 134 23
## 204 188.8 68.9 55.5 3217 23.00 106 27
## 205 188.8 68.9 55.5 3062 9.50 114 25
## harga
## 1 13495.00
## 2 16500.00
## 3 16500.00
## 4 13950.00
## 5 17450.00
## 6 15250.00
## 7 17710.00
## 8 18920.00
## 9 23875.00
## 10 17859.17
## 11 16430.00
## 12 16925.00
## 13 20970.00
## 14 21105.00
## 15 24565.00
## 16 30760.00
## 17 41315.00
## 18 36880.00
## 19 5151.00
## 20 6295.00
## 21 6575.00
## 22 5572.00
## 23 6377.00
## 24 7957.00
## 25 6229.00
## 26 6692.00
## 27 7609.00
## 28 8558.00
## 29 8921.00
## 30 12964.00
## 31 6479.00
## 32 6855.00
## 33 5399.00
## 34 6529.00
## 35 7129.00
## 36 7295.00
## 37 7295.00
## 38 7895.00
## 39 9095.00
## 40 8845.00
## 41 10295.00
## 42 12945.00
## 43 10345.00
## 44 6785.00
## 45 8916.50
## 46 8916.50
## 47 11048.00
## 48 32250.00
## 49 35550.00
## 50 36000.00
## 51 5195.00
## 52 6095.00
## 53 6795.00
## 54 6695.00
## 55 7395.00
## 56 10945.00
## 57 11845.00
## 58 13645.00
## 59 15645.00
## 60 8845.00
## 61 8495.00
## 62 10595.00
## 63 10245.00
## 64 10795.00
## 65 11245.00
## 66 18280.00
## 67 18344.00
## 68 25552.00
## 69 28248.00
## 70 28176.00
## 71 31600.00
## 72 34184.00
## 73 35056.00
## 74 40960.00
## 75 45400.00
## 76 16503.00
## 77 5389.00
## 78 6189.00
## 79 6669.00
## 80 7689.00
## 81 9959.00
## 82 8499.00
## 83 12629.00
## 84 14869.00
## 85 14489.00
## 86 6989.00
## 87 8189.00
## 88 9279.00
## 89 9279.00
## 90 5499.00
## 91 7099.00
## 92 6649.00
## 93 6849.00
## 94 7349.00
## 95 7299.00
## 96 7799.00
## 97 7499.00
## 98 7999.00
## 99 8249.00
## 100 8949.00
## 101 9549.00
## 102 13499.00
## 103 14399.00
## 104 13499.00
## 105 17199.00
## 106 19699.00
## 107 18399.00
## 108 11900.00
## 109 13200.00
## 110 12440.00
## 111 13860.00
## 112 15580.00
## 113 16900.00
## 114 16695.00
## 115 17075.00
## 116 16630.00
## 117 17950.00
## 118 18150.00
## 119 5572.00
## 120 7957.00
## 121 6229.00
## 122 6692.00
## 123 7609.00
## 124 8921.00
## 125 12764.00
## 126 22018.00
## 127 32528.00
## 128 34028.00
## 129 37028.00
## 130 31400.50
## 131 9295.00
## 132 9895.00
## 133 11850.00
## 134 12170.00
## 135 15040.00
## 136 15510.00
## 137 18150.00
## 138 18620.00
## 139 5118.00
## 140 7053.00
## 141 7603.00
## 142 7126.00
## 143 7775.00
## 144 9960.00
## 145 9233.00
## 146 11259.00
## 147 7463.00
## 148 10198.00
## 149 8013.00
## 150 11694.00
## 151 5348.00
## 152 6338.00
## 153 6488.00
## 154 6918.00
## 155 7898.00
## 156 8778.00
## 157 6938.00
## 158 7198.00
## 159 7898.00
## 160 7788.00
## 161 7738.00
## 162 8358.00
## 163 9258.00
## 164 8058.00
## 165 8238.00
## 166 9298.00
## 167 9538.00
## 168 8449.00
## 169 9639.00
## 170 9989.00
## 171 11199.00
## 172 11549.00
## 173 17669.00
## 174 8948.00
## 175 10698.00
## 176 9988.00
## 177 10898.00
## 178 11248.00
## 179 16558.00
## 180 15998.00
## 181 15690.00
## 182 15750.00
## 183 7775.00
## 184 7975.00
## 185 7995.00
## 186 8195.00
## 187 8495.00
## 188 9495.00
## 189 9995.00
## 190 11595.00
## 191 9980.00
## 192 13295.00
## 193 13845.00
## 194 12290.00
## 195 12940.00
## 196 13415.00
## 197 15985.00
## 198 16515.00
## 199 18420.00
## 200 18950.00
## 201 16845.00
## 202 19045.00
## 203 21485.00
## 204 22470.00
## 205 22625.00
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.3
statistika_deskriptif <- function(data) {
# Mendapatkan hanya kolom-kolom numerik
kolom_numerik <- data[, sapply(data, is.numeric)]
# Inisialisasi vektor kosong untuk menyimpan hasil perhitungan
variables <- names(kolom_numerik)
means <- medians <- q1s <- q3s <- variances <- sds <- ranges <- sums <- mins <- maxs <- numeric(length(variables))
# Loop untuk setiap variabel numerik
for (i in seq_along(variables)) {
variable <- variables[i]
values <- kolom_numerik[[variable]]
# Hitung statistika deskriptif
means[i] <- mean(values)
medians[i] <- median(values)
q1s[i] <- quantile(values, 0.25)
q3s[i] <- quantile(values, 0.75)
variances[i] <- var(values)
sds[i] <- sd(values)
ranges[i] <- max(values) - min(values)
sums[i] <- sum(values)
mins[i] <- min(values)
maxs[i] <- max(values)
}
# Buat data frame dari hasil perhitungan
hasil <- data.frame(Variabel = variables,
Mean = means,
Median = medians,
Q1 = q1s,
Q3 = q3s,
Variance = variances,
SD = sds,
Range = ranges,
Sum = sums,
Min = mins,
Max = maxs,
stringsAsFactors = FALSE)
return(hasil)
}
statistika_deskriptif(data_dummies)
## Variabel Mean Median Q1 Q3 Variance
## 1 jenis_bahan_bakarbensin 9.024390e-01 1.0 1.0 1.0 8.847441e-02
## 2 jenis_bahan_bakarsolar 9.756098e-02 0.0 0.0 0.0 8.847441e-02
## 3 body_mobilhardtop 3.902439e-02 0.0 0.0 0.0 3.768532e-02
## 4 body_mobilhatchback 3.414634e-01 0.0 0.0 1.0 2.259684e-01
## 5 body_mobilsedan 4.682927e-01 0.0 0.0 1.0 2.502152e-01
## 6 body_mobilwagon 1.219512e-01 0.0 0.0 0.0 1.076040e-01
## 7 banyak_pintuempat 5.609756e-01 1.0 0.0 1.0 2.474892e-01
## 8 panjang_mobil 1.740493e+02 173.2 166.3 183.1 1.522087e+02
## 9 lebar_mobil 6.590780e+01 65.5 64.1 66.9 4.601900e+00
## 10 tinggi_mobil 5.372488e+01 54.1 52.0 55.5 5.970800e+00
## 11 berat_mobil 2.555566e+03 2414.0 2145.0 2935.0 2.711079e+05
## 12 compras 1.014254e+01 9.0 8.6 9.4 1.577710e+01
## 13 horsepower 1.041171e+02 95.0 70.0 116.0 1.563741e+03
## 14 mpg 3.075122e+01 30.0 25.0 34.0 4.742310e+01
## 15 harga 1.327671e+04 10295.0 7788.0 16503.0 6.382176e+07
## SD Range Sum Min Max
## 1 0.2974465 1 185.00 0.0 1.0
## 2 0.2974465 1 20.00 0.0 1.0
## 3 0.1941271 1 8.00 0.0 1.0
## 4 0.4753614 1 70.00 0.0 1.0
## 5 0.5002152 1 96.00 0.0 1.0
## 6 0.3280305 1 25.00 0.0 1.0
## 7 0.4974829 1 115.00 0.0 1.0
## 8 12.3372885 67 35680.10 141.1 208.1
## 9 2.1452039 12 13511.10 60.3 72.3
## 10 2.4435220 12 11013.60 47.8 59.8
## 11 520.6802035 2578 523891.00 1488.0 4066.0
## 12 3.9720403 16 2079.22 7.0 23.0
## 13 39.5441668 240 21344.00 48.0 288.0
## 14 6.8864431 38 6304.00 16.0 54.0
## 15 7988.8523317 40282 2721725.67 5118.0 45400.0
library (mice)
## Warning: package 'mice' was built under R version 4.2.3
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
md.pattern(data_dummies, rotate.names = TRUE)
## /\ /\
## { `---' }
## { O O }
## ==> V <== No need for mice. This data set is completely observed.
## \ \|/ /
## `-----'
## jenis_bahan_bakarbensin jenis_bahan_bakarsolar body_mobilhardtop
## 205 1 1 1
## 0 0 0
## body_mobilhatchback body_mobilsedan body_mobilwagon banyak_pintuempat
## 205 1 1 1 1
## 0 0 0 0
## panjang_mobil lebar_mobil tinggi_mobil berat_mobil compras horsepower mpg
## 205 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0
## harga
## 205 1 0
## 0 0
detect_variable_type <- function(variable) {
if (is.numeric(variable)) {
return("Numeric")
} else if (is.factor(variable)) {
return("Factor")
} else if (is.character(variable)) {
return("Character")
} else {
return("Other")
}
}
numeric_variables <- c()
character_variables <- c()
for (col in names(data_dummies)) {
if (sapply(data_dummies[[col]], is.numeric)[1]) {
numeric_variables <- c(numeric_variables, col)
} else if (sapply(data_dummies[[col]], is.character)[1]) {
character_variables <- c(character_variables, col)
}
}
print("Variabel numerik:")
## [1] "Variabel numerik:"
print(numeric_variables)
## [1] "jenis_bahan_bakarbensin" "jenis_bahan_bakarsolar"
## [3] "body_mobilhardtop" "body_mobilhatchback"
## [5] "body_mobilsedan" "body_mobilwagon"
## [7] "banyak_pintuempat" "panjang_mobil"
## [9] "lebar_mobil" "tinggi_mobil"
## [11] "berat_mobil" "compras"
## [13] "horsepower" "mpg"
## [15] "harga"
print("Variabel karakter:")
## [1] "Variabel karakter:"
print(character_variables)
## NULL
par(mar = c(1, 1, 1, 1))
par(mfrow = c(4, 4))
for (i in seq_along(numeric_variables)) {
# Buat boxplot
boxplot(data_dummies[, numeric_variables[i]], main = numeric_variables[i], cex.main = 0.7)
}
### Penghapusan outliers
handle_outliers <- function(dataframe, multiplier = 1.5) {
# Loop melalui setiap kolom numerik
8
for (col in names(dataframe)) {
if (is.numeric(dataframe[[col]])) {
# Identifikasi batas bawah dan batas atas
lower_bound <- quantile(dataframe[[col]], 0.25) - multiplier * IQR(dataframe[[col]])
upper_bound <- quantile(dataframe[[col]], 0.75) + multiplier * IQR(dataframe[[col]])
# Menangani outlier dengan menggantinya dengan nilai yang mendekati batas bawah dan batas atas
dataframe[[col]][dataframe[[col]] < lower_bound] <- lower_bound
dataframe[[col]][dataframe[[col]] > upper_bound] <- upper_bound
}
}
return(dataframe)
}
# Panggil fungsi untuk menangani outlier
datafix <- handle_outliers(data_dummies)
head(datafix)
## jenis_bahan_bakarbensin jenis_bahan_bakarsolar body_mobilhardtop
## 1 1 0 0
## 2 1 0 0
## 3 1 0 0
## 4 1 0 0
## 5 1 0 0
## 6 1 0 0
## body_mobilhatchback body_mobilsedan body_mobilwagon banyak_pintuempat
## 1 0 0 0 0
## 2 0 0 0 0
## 3 1 0 0 0
## 4 0 1 0 1
## 5 0 1 0 1
## 6 0 1 0 0
## panjang_mobil lebar_mobil tinggi_mobil berat_mobil compras horsepower mpg
## 1 168.8 64.1 48.8 2548 9.0 111 27
## 2 168.8 64.1 48.8 2548 9.0 111 27
## 3 171.2 65.5 52.4 2823 9.0 154 26
## 4 176.6 66.2 54.3 2337 10.0 102 30
## 5 176.6 66.4 54.3 2824 8.0 115 22
## 6 177.3 66.3 53.1 2507 8.5 110 25
## harga
## 1 13495
## 2 16500
## 3 16500
## 4 13950
## 5 17450
## 6 15250
par(mar = c(1, 1, 1, 1))
par(mfrow = c(4, 4))
for (i in seq_along(numeric_variables)) {
# Buat boxplot
boxplot(datafix[, numeric_variables[i]], main = numeric_variables[i], cex.main = 0.7)
}
1.Hipotesis H0: Tidak ada outliers yang signifikan dalam sampel H1: Terdapat setidaknya satu outliers yang signifikan dalam sampel 2.Daerah Penolakan Tolak H0 jika nilai p-value < alpha 3.Uji Statistik dan Keputusan alpha = 5%
library(outliers)
grubbs_and_sign <- function(column) {
if (is.factor(column) | is.character(column)) {
return(c(NA, ""))
} else {
test_result <- grubbs.test(column)
p_value <- test_result$p.value
significant <- ifelse(p_value > 0.05, "", "*")
return(c(p_value, significant))
}
}
# Menyaring hanya kolom-kolom yang bersifat numerik
numeric_data <- datafix[, numeric_variables]
# Mengaplikasikan fungsi grubbs_and_sign pada variabel numerik
p_values <- data.frame(t(sapply(numeric_data, grubbs_and_sign)))
colnames(p_values) <- c("p_value", "significant")
print(p_values)
## p_value significant
## jenis_bahan_bakarbensin 0 *
## jenis_bahan_bakarsolar 0 *
## body_mobilhardtop 0 *
## body_mobilhatchback 1
## body_mobilsedan 1
## body_mobilwagon 0 *
## banyak_pintuempat 1
## panjang_mobil 0.552113434530805
## lebar_mobil 0.707254792521349
## tinggi_mobil 1
## berat_mobil 0.349664340108172
## compras 1
## horsepower 1
## mpg 1
## harga 1
Berdasarkan hasil Grubb’s Test dapat diketahui bahwa semua variabel gagal tolak H0, artinya tidak terdapat outliers di semua variabel
y <- datafix$harga
x1 <- datafix$jenis_bahan_bakarbensin
x2 <- datafix$jenis_bahan_bakarsolar
x3 <- datafix$body_mobilhardtop
x4 <- datafix$body_mobilhatchback
x5 <- datafix$body_mobilsedan
x6 <- datafix$body_mobilwagon
x7 <- datafix$banyak_pintuempat
x8 <- datafix$panjang_mobil
x9 <- datafix$lebar_mobil
x10 <- datafix$tinggi_mobil
x11 <- datafix$berat_mobil
x12 <- datafix$compras
x13 <- datafix$horsepower
x14 <- datafix$mpg
# Import Library
library(ggplot2)
library(huxtable)
##
## Attaching package: 'huxtable'
## The following object is masked from 'package:ggplot2':
##
## theme_grey
Scatterplot for Model
outliers_plot <- ggplot(data = datafix, aes(x = x1 + x2 + x3 + x4 + x5+ x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14, y = y)) +
geom_point() +
geom_smooth(method = lm) +
ggtitle("Scatter plot")
outliers_plot
## `geom_smooth()` using formula = 'y ~ x'
model_before_cook <- lm(y ~ x1 + x2 + x3 + x4 + x5+ x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14, data = datafix)
summary(model_before_cook)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 +
## x10 + x11 + x12 + x13 + x14, data = datafix)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7235.6 -1582.2 -74.8 1297.2 7523.4
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -55181.609 12252.877 -4.504 1.15e-05 ***
## x1 NA NA NA NA
## x2 NA NA NA NA
## x3 NA NA NA NA
## x4 -1671.932 656.351 -2.547 0.011631 *
## x5 716.510 557.323 1.286 0.200106
## x6 NA NA NA NA
## x7 -872.688 554.950 -1.573 0.117452
## x8 -131.623 43.327 -3.038 0.002710 **
## x9 780.892 207.275 3.767 0.000219 ***
## x10 102.471 110.993 0.923 0.357039
## x11 5.871 1.183 4.965 1.50e-06 ***
## x12 1177.402 311.181 3.784 0.000206 ***
## x13 91.476 11.709 7.813 3.45e-13 ***
## x14 -11.784 66.206 -0.178 0.858916
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2669 on 194 degrees of freedom
## Multiple R-squared: 0.8499, Adjusted R-squared: 0.8421
## F-statistic: 109.8 on 10 and 194 DF, p-value: < 2.2e-16
# Plot for Cook’s distance (Influential Obs)
cooksD <- cooks.distance(model_before_cook)
n <- nrow(datafix)
plot(cooksD, main = "Cooks Distance for Influential Obs")
abline(h = 4 / n, lty = 2, col = "red")
abline(h = -4 / n, lty = 2, col = "red")
Interpretasi : Plot di atas menampilkan nilai Cook’s Distance dari
masing-masing observasi. Garis merah putus-putus melambangkan Threshold
dengan nilai4/n. Kita dapat melihat bahwa observasi yang melebihi
Threshold tersebut tidak terlalu banyak. Observasi - observasi itulah
yang nantinya diidentifikasi sebagai influential observation yang dapat
mempengaruhi model regresi kita.
#identify influential points
influential_obs <- as.numeric(names(cooksD)[(cooksD > (4 / n))])
#define new data frame with influential points removed
outliers_removed <- datafix[-influential_obs, ]
outliers_removed1 <- as.data.frame(outliers_removed)
y <- outliers_removed1$harga
x1 <- outliers_removed1$jenis_bahan_bakarbensin
x2 <- outliers_removed1$jenis_bahan_bakarsolar
x3 <- outliers_removed1$body_mobilhardtop
x4 <- outliers_removed1$body_mobilhatchback
x5 <- outliers_removed1$body_mobilsedan
x6 <- outliers_removed1$body_mobilwagon
x7 <- outliers_removed1$banyak_pintuempat
x8 <- outliers_removed1$panjang_mobil
x9 <- outliers_removed1$lebar_mobil
x10 <- outliers_removed1$tinggi_mobil
x11 <- outliers_removed1$berat_mobil
x12 <- outliers_removed1$compras
x13 <- outliers_removed1$horsepower
x14 <- outliers_removed1$mpg
model_after_cook <- lm(y ~ x1 + x2 + x3 + x4 + x5+ x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14, data = outliers_removed1)
summary(model_after_cook)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 +
## x10 + x11 + x12 + x13 + x14, data = outliers_removed1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5035.0 -1389.9 -46.1 1249.7 7157.2
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -53200.242 10052.836 -5.292 3.55e-07 ***
## x1 NA NA NA NA
## x2 NA NA NA NA
## x3 NA NA NA NA
## x4 -709.530 560.031 -1.267 0.206839
## x5 1029.360 469.458 2.193 0.029637 *
## x6 NA NA NA NA
## x7 -330.445 449.791 -0.735 0.463517
## x8 -129.696 36.166 -3.586 0.000434 ***
## x9 816.079 175.032 4.662 6.13e-06 ***
## x10 52.360 91.248 0.574 0.566822
## x11 7.519 1.062 7.078 3.29e-11 ***
## x12 1070.488 265.341 4.034 8.13e-05 ***
## x13 50.621 11.174 4.530 1.08e-05 ***
## x14 -72.778 52.687 -1.381 0.168920
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2068 on 177 degrees of freedom
## Multiple R-squared: 0.8871, Adjusted R-squared: 0.8807
## F-statistic: 139.1 on 10 and 177 DF, p-value: < 2.2e-16
# Plot for Cook’s distance (Influential Obs)
cooksD <- cooks.distance(model_after_cook)
n <- nrow(datafix)
plot(cooksD, main = "Cooks Distance for Influential Obs")
abline(h = 4 / n, lty = 2, col = "red")
abline(h = -4 / n, lty = 2, col = "red")
Plot for Residual Vs Leverage
plot(model_before_cook, main = "Residual vs Leverage", which=5)
plot(model_after_cook, main = "Residual vs Leverage", which=5)
comparing_models <- list(model_before_cook, model_after_cook)
huxreg(comparing_models)
| (1) | (2) | |
|---|---|---|
| (Intercept) | -55181.609 *** | -53200.242 *** |
| (12252.877) | (10052.836) | |
| x1 | ||
| x2 | ||
| x3 | ||
| x4 | -1671.932 * | -709.530 |
| (656.351) | (560.031) | |
| x5 | 716.510 | 1029.360 * |
| (557.323) | (469.458) | |
| x6 | ||
| x7 | -872.688 | -330.445 |
| (554.950) | (449.791) | |
| x8 | -131.623 ** | -129.696 *** |
| (43.327) | (36.166) | |
| x9 | 780.892 *** | 816.079 *** |
| (207.275) | (175.032) | |
| x10 | 102.471 | 52.360 |
| (110.993) | (91.248) | |
| x11 | 5.871 *** | 7.519 *** |
| (1.183) | (1.062) | |
| x12 | 1177.402 *** | 1070.488 *** |
| (311.181) | (265.341) | |
| x13 | 91.476 *** | 50.621 *** |
| (11.709) | (11.174) | |
| x14 | -11.784 | -72.778 |
| (66.206) | (52.687) | |
| N | 205 | 188 |
| R2 | 0.850 | 0.887 |
| logLik | -1902.538 | -1696.334 |
| AIC | 3829.076 | 3416.668 |
| *** p < 0.001; ** p < 0.01; * p < 0.05. | ||
stepwise_model <- step(model_after_cook, direction = "both")
## Start: AIC=2881.15
## y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 +
## x12 + x13 + x14
##
##
## Step: AIC=2881.15
## y ~ x1 + x2 + x3 + x4 + x5 + x7 + x8 + x9 + x10 + x11 + x12 +
## x13 + x14
##
##
## Step: AIC=2881.15
## y ~ x1 + x2 + x4 + x5 + x7 + x8 + x9 + x10 + x11 + x12 + x13 +
## x14
##
##
## Step: AIC=2881.15
## y ~ x1 + x4 + x5 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14
##
##
## Step: AIC=2881.15
## y ~ x4 + x5 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14
##
## Df Sum of Sq RSS AIC
## - x10 1 1407918 758254496 2879.5
## - x7 1 2307868 759154447 2879.7
## - x4 1 6863582 763710161 2880.8
## <none> 756846578 2881.2
## - x14 1 8158796 765005375 2881.2
## - x5 1 20557711 777404289 2884.2
## - x8 1 54991249 811837828 2892.3
## - x12 1 69596717 826443295 2895.7
## - x13 1 87765035 844611613 2899.8
## - x9 1 92952657 849799236 2900.9
## - x11 1 214245782 971092360 2926.0
##
## Step: AIC=2879.5
## y ~ x4 + x5 + x7 + x8 + x9 + x11 + x12 + x13 + x14
##
## Df Sum of Sq RSS AIC
## - x7 1 1810100 760064596 2877.9
## - x14 1 8028023 766282519 2879.5
## <none> 758254496 2879.5
## - x4 1 8750545 767005041 2879.7
## + x10 1 1407918 756846578 2881.2
## - x5 1 19241443 777495939 2882.2
## - x8 1 55135443 813389940 2890.7
## - x12 1 69225597 827480094 2893.9
## - x13 1 87986517 846241014 2898.1
## - x9 1 93644401 851898897 2899.4
## - x11 1 214463436 972717932 2924.3
##
## Step: AIC=2877.94
## y ~ x4 + x5 + x8 + x9 + x11 + x12 + x13 + x14
##
## Df Sum of Sq RSS AIC
## - x4 1 7181693 767246289 2877.7
## <none> 760064596 2877.9
## - x14 1 9092412 769157008 2878.2
## + x7 1 1810100 758254496 2879.5
## + x10 1 910150 759154447 2879.7
## - x5 1 17870254 777934851 2880.3
## - x8 1 62651462 822716059 2890.8
## - x12 1 76061869 836126465 2893.9
## - x9 1 93490085 853554681 2897.8
## - x13 1 99333733 859398329 2899.0
## - x11 1 212757777 972822373 2922.3
##
## Step: AIC=2877.71
## y ~ x5 + x8 + x9 + x11 + x12 + x13 + x14
##
## Df Sum of Sq RSS AIC
## <none> 767246289 2877.7
## + x4 1 7181693 760064596 2877.9
## - x14 1 9991933 777238223 2878.2
## + x10 1 2735585 764510705 2879.0
## + x7 1 241248 767005041 2879.7
## - x8 1 55694008 822940297 2888.9
## - x5 1 68027133 835273422 2891.7
## - x12 1 77131583 844377872 2893.7
## - x9 1 86389884 853636173 2895.8
## - x13 1 92228463 859474752 2897.1
## - x11 1 229560951 996807240 2924.9
# Menampilkan summary model
summary(stepwise_model)
##
## Call:
## lm(formula = y ~ x5 + x8 + x9 + x11 + x12 + x13 + x14, data = outliers_removed1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5080.2 -1418.6 -154.6 1175.8 7005.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -49991.904 9540.184 -5.240 4.46e-07 ***
## x5 1329.786 332.868 3.995 9.42e-05 ***
## x8 -115.795 32.035 -3.615 0.00039 ***
## x9 761.756 169.206 4.502 1.21e-05 ***
## x11 7.678 1.046 7.339 7.15e-12 ***
## x12 1106.879 260.205 4.254 3.37e-05 ***
## x13 47.307 10.170 4.652 6.36e-06 ***
## x14 -79.839 52.146 -1.531 0.12751
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2065 on 180 degrees of freedom
## Multiple R-squared: 0.8856, Adjusted R-squared: 0.8811
## F-statistic: 199 on 7 and 180 DF, p-value: < 2.2e-16
model_after_stepwise <- lm(y ~ x5 + x8 + x9 + x11 + x12 + x13 + x14, data = outliers_removed1)
summary(model_after_stepwise)
##
## Call:
## lm(formula = y ~ x5 + x8 + x9 + x11 + x12 + x13 + x14, data = outliers_removed1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5080.2 -1418.6 -154.6 1175.8 7005.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -49991.904 9540.184 -5.240 4.46e-07 ***
## x5 1329.786 332.868 3.995 9.42e-05 ***
## x8 -115.795 32.035 -3.615 0.00039 ***
## x9 761.756 169.206 4.502 1.21e-05 ***
## x11 7.678 1.046 7.339 7.15e-12 ***
## x12 1106.879 260.205 4.254 3.37e-05 ***
## x13 47.307 10.170 4.652 6.36e-06 ***
## x14 -79.839 52.146 -1.531 0.12751
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2065 on 180 degrees of freedom
## Multiple R-squared: 0.8856, Adjusted R-squared: 0.8811
## F-statistic: 199 on 7 and 180 DF, p-value: < 2.2e-16
library(moments)
residuals <- residuals(model_after_stepwise)
ks_test <- ks.test(residuals, "pnorm", mean = mean(residuals), sd = sd(residuals))
## Warning in ks.test.default(residuals, "pnorm", mean = mean(residuals), sd =
## sd(residuals)): ties should not be present for the Kolmogorov-Smirnov test
print(ks_test)
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: residuals
## D = 0.049362, p-value = 0.7494
## alternative hypothesis: two-sided
hist(residuals, main = "Histogram Residu", col = "lightblue", border = "black")
Hipotesis: H0: Data berasal dari distribusi normal H1: Data tidak
berasal dari distribusi normal
Kepututusan: dengan alfa 0.05 maka gagal tolak h0, data berdistribusi normal
library(car)
## Warning: package 'car' was built under R version 4.2.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.2.3
dw_test <- durbinWatsonTest(model_after_stepwise)
print(dw_test)
## lag Autocorrelation D-W Statistic p-value
## 1 0.5341752 0.9146733 0
## Alternative hypothesis: rho != 0
plot(residuals, main = "Residual Plot", ylab = "Residuals")
abline(h = 0, col = "red")
Hipotesis: H0: Tidak ada autokorelasi H1: ada autokorelasi
Kepututusan: dengan alfa 0.05 maka tolak h0, data terdapat autokorelasi
library(car)
vif(model_after_stepwise)
## x5 x8 x9 x11 x12 x13 x14
## 1.216764 6.489988 5.069591 11.602284 1.824669 4.875876 5.062531
Hipotesis: H0: Tidak ada multikolinearitas H1: ada multikolinearitas
Kepututusan: ada multikolinearitas pada variabel 11 (pelanggaran)
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.2.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.2.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(model_after_stepwise, studentize = FALSE, data= outliers_removed1)
##
## Breusch-Pagan test
##
## data: model_after_stepwise
## BP = 44.838, df = 7, p-value = 1.47e-07
Hipotesis: H0: homokesdastisitas (variabilitas konstan) H1: heterokesdastisitas (variabilitas tidak konstan)
Kepututusan: dengan alfa 0.05 maka tolak h0, data heterokesdastisitas (pelanggaran)
library(nlme)
## Warning: package 'nlme' was built under R version 4.2.3
model_gls <- gls(y ~ x5 + x8 + x9 + x11 + x12 + x13 + x14, data = outliers_removed1, correlation = corAR1(form=~1))
summary(model_gls)
## Generalized least squares fit by REML
## Model: y ~ x5 + x8 + x9 + x11 + x12 + x13 + x14
## Data: outliers_removed1
## AIC BIC logLik
## 3266.04 3297.97 -1623.02
##
## Correlation Structure: AR(1)
## Formula: ~1
## Parameter estimate(s):
## Phi
## 0.6103186
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) -50495.47 9010.092 -5.604323 0.0000
## x5 724.28 243.336 2.976451 0.0033
## x8 -101.46 30.654 -3.309811 0.0011
## x9 754.64 161.303 4.678438 0.0000
## x11 8.19 0.980 8.359477 0.0000
## x12 632.12 221.091 2.859120 0.0047
## x13 40.93 9.743 4.201144 0.0000
## x14 -1.21 42.983 -0.028041 0.9777
##
## Correlation:
## (Intr) x5 x8 x9 x11 x12 x13
## x5 0.223
## x8 -0.154 -0.272
## x9 -0.859 -0.086 -0.313
## x11 0.447 0.208 -0.588 -0.229
## x12 0.016 -0.016 0.145 -0.165 -0.343
## x13 -0.030 -0.109 0.388 -0.236 -0.549 0.362
## x14 -0.138 -0.107 0.148 -0.060 0.194 -0.493 0.288
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.3411383 -0.6328058 -0.1975277 0.4932949 3.4753185
##
## Residual standard error: 2129.014
## Degrees of freedom: 188 total; 180 residual
mean((y - predict(model_gls))^2)
## [1] 4267709
R2_gls <- cor(y,predict(model_gls))^2
R2_gls
## [1] 0.8813235
# Kebaikan Model
# OLS
predicted_values <- predict(model_after_stepwise)
actual_values <- y
mse <- mean((actual_values - predicted_values)^2)
print(paste("Mean Squared Error (MSE) OLS:", mse))
## [1] "Mean Squared Error (MSE) OLS: 4081097.28271785"
# GLS
predicted_values <- predict(model_gls)
actual_values <- y
mse <- mean((actual_values - predicted_values)^2)
print(paste("Mean Squared Error (MSE) GLS:", mse))
## [1] "Mean Squared Error (MSE) GLS: 4267709.22381794"