Transformasi

Data 1

#=========================================DATA WINDMILL=======================================
#read excel
library(readxl)
windmill=read_excel("E:\\praktikum 12.xlsx",sheet="Sheet1")
windmill
## # A tibble: 25 x 2
##        x     y
##    <dbl> <dbl>
##  1  5    1.58 
##  2  6    1.82 
##  3  3.4  1.06 
##  4  2.7  0.5  
##  5 10    2.24 
##  6  9.7  2.39 
##  7  9.55 2.29 
##  8  3.05 0.558
##  9  8.15 2.17 
## 10  6.2  1.87 
## # ... with 15 more rows

model regresi

windmill_mod=lm(y~x,data=windmill)
summary(windmill_mod)
## 
## Call:
## lm(formula = y ~ x, data = windmill)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.59869 -0.14099  0.06059  0.17262  0.32184 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.13088    0.12599   1.039     0.31    
## x            0.24115    0.01905  12.659 7.55e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2361 on 23 degrees of freedom
## Multiple R-squared:  0.8745, Adjusted R-squared:  0.869 
## F-statistic: 160.3 on 1 and 23 DF,  p-value: 7.546e-12
anova(windmill_mod)
## Analysis of Variance Table
## 
## Response: y
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## x          1 8.9296  8.9296  160.26 7.546e-12 ***
## Residuals 23 1.2816  0.0557                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot x dan y

plot(y~x,data=windmill, main="Kecepatan Angin (X) dan keluaran DC (Y)",
     xlab="Kecepatan Angin (X)",ylab="keluaran DC (Y)",col="steelblue",pch=1)
abline(windmill_mod,col="red")

cek residual

plot(predict(windmill_mod),residuals(windmill_mod),xlab="y duga",
     ylab="sisaan",pch=19,main="Plot Sisaan vs Y duga")
abline(c(0,0),lty=2,col="red")

transformasi x

c=1/(windmill$x)
windmill1=data.frame(windmill$y,c)
colnames(windmill1)=c("Y","X")
windmill1
##        Y          X
## 1  1.582 0.20000000
## 2  1.822 0.16666667
## 3  1.057 0.29411765
## 4  0.500 0.37037037
## 5  2.236 0.10000000
## 6  2.386 0.10309278
## 7  2.294 0.10471204
## 8  0.558 0.32786885
## 9  2.166 0.12269939
## 10 1.866 0.16129032
## 11 0.653 0.34482759
## 12 1.930 0.15748031
## 13 1.562 0.21739130
## 14 1.737 0.17241379
## 15 2.088 0.13513514
## 16 1.137 0.27777778
## 17 2.179 0.12738854
## 18 2.112 0.11363636
## 19 1.800 0.14285714
## 20 1.501 0.18348624
## 21 2.303 0.10989011
## 22 2.310 0.09803922
## 23 1.194 0.24390244
## 24 1.144 0.25316456
## 25 0.123 0.40816327

model regresi setelah tranformasi

windmill_trans_mod=lm(Y~X,data=windmill1)
summary(windmill_trans_mod)
## 
## Call:
## lm(formula = Y ~ X, data = windmill1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.20547 -0.04940  0.01100  0.08352  0.12204 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.9789     0.0449   66.34   <2e-16 ***
## X            -6.9345     0.2064  -33.59   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09417 on 23 degrees of freedom
## Multiple R-squared:   0.98,  Adjusted R-squared:  0.9792 
## F-statistic:  1128 on 1 and 23 DF,  p-value: < 2.2e-16
anova(windmill_trans_mod)
## Analysis of Variance Table
## 
## Response: Y
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## X          1 10.007 10.0072  1128.4 < 2.2e-16 ***
## Residuals 23  0.204  0.0089                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(Y~X,data=windmill1,main="Kecepatan Angin (1/X) dan keluaran DC (Y)",
     xlab="Kecepatan Angin (1/X)",ylab="keluaran DC (Y)",col="steelblue",pch=1)
abline(windmill_trans_mod,col="red")

cek residual

plot(predict(windmill_trans_mod),residuals(windmill_trans_mod),xlab="y duga",
     ylab="sisaan",pch=19,main="Plot Sisaan vs Y duga")
abline(c(0,0),lty=2,col="red")

Data 2

#===================================INITECH======================================
#Data format csv
initech=read.csv("E:\\initech.csv")
initech
##     years salary
## 1       1  41504
## 2       1  32619
## 3       1  44322
## 4       2  40038
## 5       2  46147
## 6       2  38447
## 7       2  38163
## 8       3  42104
## 9       3  25597
## 10      3  39599
## 11      3  55698
## 12      4  47220
## 13      4  65929
## 14      4  55794
## 15      4  45959
## 16      5  52460
## 17      5  60308
## 18      5  61458
## 19      5  56951
## 20      6  56174
## 21      6  59363
## 22      6  57642
## 23      6  69792
## 24      7  59321
## 25      7  66379
## 26      7  64282
## 27      7  48901
## 28      8 100711
## 29      8  59324
## 30      8  54752
## 31      8  73619
## 32      9  65382
## 33      9  58823
## 34      9  65717
## 35      9  92816
## 36      9  72550
## 37     10  71365
## 38     10  88888
## 39     10  62969
## 40     10  45298
## 41     11 111292
## 42     11  91491
## 43     11 106345
## 44     11  99009
## 45     12  73981
## 46     12  72547
## 47     12  74991
## 48     12 139249
## 49     13 119948
## 50     13 128962
## 51     13  98112
## 52     13  97159
## 53     14 125246
## 54     14  89694
## 55     14  73333
## 56     14 108710
## 57     15  97567
## 58     15  90359
## 59     15 119806
## 60     15 101343
## 61     16 147406
## 62     16 153020
## 63     16 143200
## 64     16  97327
## 65     17 184807
## 66     17 146263
## 67     17 127925
## 68     17 159785
## 69     17 174822
## 70     18 177610
## 71     18 210984
## 72     18 160044
## 73     18 137044
## 74     19 182996
## 75     19 184183
## 76     19 168666
## 77     19 121350
## 78     20 193627
## 79     20 142611
## 80     20 170131
## 81     20 134140
## 82     21 129446
## 83     21 201469
## 84     21 202104
## 85     21 220556
## 86     22 166419
## 87     22 149044
## 88     22 247017
## 89     22 247730
## 90     23 252917
## 91     23 235517
## 92     23 241276
## 93     23 197229
## 94     24 175879
## 95     24 253682
## 96     24 262578
## 97     24 207715
## 98     25 221179
## 99     25 212028
## 100    25 312549
plot(salary ~ years, data = initech, col = "grey", pch = 20, cex = 1.5,
     main = "Salaries at Initech, By Seniority")

model regresi

initech_mod=lm(salary ~ years,data=initech)
summary(initech_mod)
## 
## Call:
## lm(formula = salary ~ years, data = initech)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -57225 -18104    241  15589  91332 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     5302       5750   0.922    0.359    
## years           8637        389  22.200   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27360 on 98 degrees of freedom
## Multiple R-squared:  0.8341, Adjusted R-squared:  0.8324 
## F-statistic: 492.8 on 1 and 98 DF,  p-value: < 2.2e-16
anova(initech_mod)
## Analysis of Variance Table
## 
## Response: salary
##           Df     Sum Sq    Mean Sq F value    Pr(>F)    
## years      1 3.6878e+11 3.6878e+11  492.82 < 2.2e-16 ***
## Residuals 98 7.3334e+10 7.4830e+08                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot x dan y

plot(salary ~ years, data = initech, col = "grey", pch = 20, cex = 1.5,
     main = "Salaries at Initech, By Seniority")
abline(initech_mod,col="red")

cek residual

plot(predict(initech_mod),residuals(initech_mod),xlab="y duga",
     ylab="sisaan",pch=19,main="Plot Sisaan vs Y duga")
abline(c(0,0),lty=2,col="red")

transformasi y

y1=log(initech$salary)
initech1=data.frame(initech$years,y1)
colnames(initech1)=c("years","salary1")
initech1
##     years  salary1
## 1       1 10.63355
## 2       1 10.39265
## 3       1 10.69924
## 4       2 10.59758
## 5       2 10.73959
## 6       2 10.55704
## 7       2 10.54962
## 8       3 10.64790
## 9       3 10.15023
## 10      3 10.58656
## 11      3 10.92770
## 12      4 10.76257
## 13      4 11.09633
## 14      4 10.92942
## 15      4 10.73550
## 16      5 10.86781
## 17      5 11.00722
## 18      5 11.02611
## 19      5 10.94995
## 20      6 10.93621
## 21      6 10.99143
## 22      6 10.96201
## 23      6 11.15327
## 24      7 10.99072
## 25      7 11.10314
## 26      7 11.07103
## 27      7 10.79755
## 28      8 11.52001
## 29      8 10.99077
## 30      8 10.91057
## 31      8 11.20666
## 32      9 11.08800
## 33      9 10.98229
## 34      9 11.09311
## 35      9 11.43837
## 36      9 11.19203
## 37     10 11.17556
## 38     10 11.39513
## 39     10 11.05040
## 40     10 10.72102
## 41     11 11.61991
## 42     11 11.42400
## 43     11 11.57444
## 44     11 11.50297
## 45     12 11.21156
## 46     12 11.19199
## 47     12 11.22512
## 48     12 11.84402
## 49     13 11.69481
## 50     13 11.76727
## 51     13 11.49386
## 52     13 11.48410
## 53     14 11.73804
## 54     14 11.40416
## 55     14 11.20277
## 56     14 11.59644
## 57     15 11.48829
## 58     15 11.41155
## 59     15 11.69363
## 60     15 11.52627
## 61     16 11.90095
## 62     16 11.93832
## 63     16 11.87200
## 64     16 11.48583
## 65     17 12.12707
## 66     17 11.89316
## 67     17 11.75920
## 68     17 11.98158
## 69     17 12.07152
## 70     18 12.08735
## 71     18 12.25954
## 72     18 11.98320
## 73     18 11.82806
## 74     19 12.11722
## 75     19 12.12369
## 76     19 12.03568
## 77     19 11.70643
## 78     20 12.17369
## 79     20 11.86788
## 80     20 12.04432
## 81     20 11.80664
## 82     21 11.77102
## 83     21 12.21339
## 84     21 12.21654
## 85     21 12.30391
## 86     22 12.02226
## 87     22 11.91200
## 88     22 12.41721
## 89     22 12.42009
## 90     23 12.44082
## 91     23 12.36954
## 92     23 12.39370
## 93     23 12.19212
## 94     24 12.07755
## 95     24 12.44384
## 96     24 12.47830
## 97     24 12.24392
## 98     25 12.30673
## 99     25 12.26447
## 100    25 12.65252

model regresi setelah tranformasi

initech_trans_mod=lm(salary1~years,data=initech1)
summary(initech_trans_mod)
## 
## Call:
## lm(formula = salary1 ~ years, data = initech1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57022 -0.13560  0.03048  0.14157  0.41366 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.48381    0.04108  255.18   <2e-16 ***
## years        0.07888    0.00278   28.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1955 on 98 degrees of freedom
## Multiple R-squared:  0.8915, Adjusted R-squared:  0.8904 
## F-statistic: 805.2 on 1 and 98 DF,  p-value: < 2.2e-16
anova(initech_trans_mod)
## Analysis of Variance Table
## 
## Response: salary1
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## years      1 30.7616 30.7616  805.22 < 2.2e-16 ***
## Residuals 98  3.7439  0.0382                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(salary1~years,data=initech1,main="Salaries at Initech, By Seniority",
     xlab="years (X)",ylab="Salary ln(Y)",col="steelblue",pch=1)
abline(initech_trans_mod,col="red")

cek residual

plot(predict(initech_trans_mod),residuals(initech_trans_mod),xlab="y duga",
     ylab="sisaan",pch=19,main="Plot Sisaan vs Y duga")
abline(c(0,0),lty=2,col="red")