This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
########### ΧΩΡΙΚΗ ΑΝΑΛΥΣΗ ΟΙΚΟΛΟΓΙΚΩΝ ΔΕΔΟΜΕΝΩΝ ########
######## ΕΝΟΤΗΤΑ ΠΑΛΙΝΔΡΟΜΗΣΗ
##### 1. ΥΠΟΕΝΟΤΗΤΑ ΑΠΛΗ ΓΡΑΜΜΙΚΗ ΠΑΝΙΔΡΟΜΗΣΗ
## ΕΙΣΑΓΩΓΗ ΑΡΧΕΙΟΥ xls
# SET WORKING DIRECTORY
setwd("G:/R_projects/OLD_FILES/2.R_PROJECTS/STROFADIA_ANALYSIS")
# READ DATA FILE
data <- read.csv("STROFADIA1.csv", header=TRUE)
data
## D.E Eidos.A Age AGEcode Radius Rad...mm. Height..cm. Diam..cm.
## 1 3 Juniperus phoenica 67 1 4.9 49 45 10.0
## 2 6 Juniperus phoenica 90 1 7.6 76 60 16.0
## 3 13 Juniperus phoenica 90 1 7.8 78 50 15.5
## 4 16 Juniperus phoenica 100 1 11.7 117 68 23.5
## 5 19 Juniperus phoenica 70 1 7.3 73 60 15.0
## 6 20 Juniperus phoenica 90 1 9.1 91 80 18.0
## 7 23 Juniperus phoenica 75 1 7.2 72 55 14.0
## 8 25 Juniperus phoenica 90 1 7.7 77 50 15.5
## 9 27 Juniperus phoenica 75 1 7.0 70 55 14.0
## 10 28 Juniperus phoenica 30 1 2.8 28 25 5.6
## 11 2 Juniperus phoenica 140 2 11.4 114 70 23.0
## 12 4 Juniperus phoenica 130 2 10.7 107 80 23.0
## 13 5 Juniperus phoenica 125 2 10.9 109 78 22.0
## 14 7 Juniperus phoenica 140 2 12.5 125 70 25.0
## 15 8 Juniperus phoenica 130 2 13.8 138 75 27.0
## 16 10 Juniperus phoenica 130 2 14.0 140 75 28.0
## 17 12 Juniperus phoenica 110 2 8.8 88 65 17.0
## 18 14 Juniperus phoenica 130 2 11.2 112 65 22.5
## 19 15 Juniperus phoenica 140 2 13.3 133 80 26.3
## 20 26 Juniperus phoenica 130 2 11.2 112 85 23.0
## 21 1 Juniperus phoenica 165 3 15.9 159 80 32.0
## 22 9 Juniperus phoenica 150 3 14.7 147 70 29.0
## 23 11 Juniperus phoenica 180 3 20.9 209 110 42.3
## 24 17 Juniperus phoenica 160 3 14.2 142 75 29.5
## 25 18 Juniperus phoenica 170 3 19.6 196 90 42.0
## 26 21 Juniperus phoenica 150 3 13.5 135 80 27.3
## 27 22 Juniperus phoenica 160 3 15.5 155 65 31.3
## 28 24 Juniperus phoenica 180 3 12.1 121 80 25.6
## X0.10 X10.20 X20.30 X30.40 X40.50 X50.60 X60.70 X70.80 X80.90 X90.100
## 1 8 8 9 6 5 5 4 NA NA NA
## 2 8 10 10 9 8 7 6 6 6 NA
## 3 9 9 10 9 8 9 9 8 7 NA
## 4 12 13 12 11 13 10 10 10 9 9
## 5 13 15 10 10 11 7 7 NA NA NA
## 6 12 13 10 10 12 10 10 7 7 NA
## 7 7 10 8 8 11 9 11 8 NA NA
## 8 9 9 10 9 8 9 8 8 7 NA
## 9 7 10 11 10 9 10 8 5 NA NA
## 10 9 7 6 6 NA NA NA NA NA NA
## 11 10 10 12 7 10 7 12 12 11 12
## 12 10 10 12 9 9 7 7 9 6 6
## 13 9 11 10 8 10 7 8 8 8 10
## 14 10 14 12 10 13 8 7 8 6 6
## 15 11 10 10 14 12 13 12 12 11 9
## 16 12 14 13 12 14 13 13 12 9 8
## 17 10 11 10 9 10 7 8 7 6 5
## 18 11 13 12 10 11 9 8 9 8 5
## 19 12 13 12 11 11 9 11 11 10 8
## 20 10 10 11 10 9 9 8 10 9 8
## 21 12 14 12 10 9 8 13 13 9 9
## 22 9 13 10 9 13 11 12 12 11 9
## 23 13 15 14 13 15 13 14 13 13 11
## 24 11 10 11 11 11 9 10 10 9 8
## 25 14 15 14 13 14 12 14 13 11 10
## 26 12 14 11 10 13 10 9 8 8 7
## 27 12 14 11 10 13 11 12 10 9 8
## 28 9 8 9 8 6 5 6 5 5 5
## X100.110 X110.120 X120.130 X130.140 X140.150 X150.160 X160.170 X170.180
## 1 NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA
## 4 8 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA
## 7 NA NA NA NA NA NA NA NA
## 8 NA NA NA NA NA NA NA NA
## 9 NA NA NA NA NA NA NA NA
## 10 NA NA NA NA NA NA NA NA
## 11 8 9 7 7 NA NA NA NA
## 12 7 8 7 NA NA NA NA NA
## 13 7 7 6 NA NA NA NA NA
## 14 7 8 8 8 NA NA NA NA
## 15 7 9 8 NA NA NA NA NA
## 16 7 7 6 NA NA NA NA NA
## 17 5 NA NA NA NA NA NA NA
## 18 5 5 6 NA NA NA NA NA
## 19 6 6 6 7 NA NA NA NA
## 20 6 6 6 NA NA NA NA NA
## 21 7 8 7 7 7 7 7 NA
## 22 7 9 9 7 6 NA NA NA
## 23 11 11 11 10 9 8 8 7
## 24 7 7 8 7 7 6 NA NA
## 25 9 11 11 9 9 9 8 NA
## 26 7 6 7 6 7 NA NA NA
## 27 8 8 8 7 7 7 NA NA
## 28 7 7 6 7 7 7 7 7
head(data)
## D.E Eidos.A Age AGEcode Radius Rad...mm. Height..cm. Diam..cm.
## 1 3 Juniperus phoenica 67 1 4.9 49 45 10.0
## 2 6 Juniperus phoenica 90 1 7.6 76 60 16.0
## 3 13 Juniperus phoenica 90 1 7.8 78 50 15.5
## 4 16 Juniperus phoenica 100 1 11.7 117 68 23.5
## 5 19 Juniperus phoenica 70 1 7.3 73 60 15.0
## 6 20 Juniperus phoenica 90 1 9.1 91 80 18.0
## X0.10 X10.20 X20.30 X30.40 X40.50 X50.60 X60.70 X70.80 X80.90 X90.100
## 1 8 8 9 6 5 5 4 NA NA NA
## 2 8 10 10 9 8 7 6 6 6 NA
## 3 9 9 10 9 8 9 9 8 7 NA
## 4 12 13 12 11 13 10 10 10 9 9
## 5 13 15 10 10 11 7 7 NA NA NA
## 6 12 13 10 10 12 10 10 7 7 NA
## X100.110 X110.120 X120.130 X130.140 X140.150 X150.160 X160.170 X170.180
## 1 NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA
## 4 8 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA
# creation vectors
age <- data$Age
age
## [1] 67 90 90 100 70 90 75 90 75 30 140 130 125 140 130 130 110 130 140
## [20] 130 165 150 180 160 170 150 160 180
agecode <- data$AGEcode
agecode
## [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3
agecode <- as.factor(agecode)
radius <- data$Radius
radius
## [1] 4.9 7.6 7.8 11.7 7.3 9.1 7.2 7.7 7.0 2.8 11.4 10.7 10.9 12.5 13.8
## [16] 14.0 8.8 11.2 13.3 11.2 15.9 14.7 20.9 14.2 19.6 13.5 15.5 12.1
height <- data$Height..cm.
height
## [1] 45 60 50 68 60 80 55 50 55 25 70 80 78 70 75 75 65 65 80
## [20] 85 80 70 110 75 90 80 65 80
diam <- data$Diam..cm.
diam
## [1] 10.0 16.0 15.5 23.5 15.0 18.0 14.0 15.5 14.0 5.6 23.0 23.0 22.0 25.0 27.0
## [16] 28.0 17.0 22.5 26.3 23.0 32.0 29.0 42.3 29.5 42.0 27.3 31.3 25.6
# Συσχέτιση (correlation) ανάμεσα στις μεταβλητές
cor (age,radius)
## [1] 0.9087815
# Διάγραμμα ανάμεσα σε δύο μεταβλητές
plot (age,radius)
# Διαμόρφωση απλού γραμμικού μοντέλου fit linear model
# η πρόβλεψη ηλικίας με βάση τη διάμετρο
lm.diam <- lm(age~diam) # lm = y~x
lm_ages <- lm(age~diam) # lm = y~x
# μετρικά του μοντέλου
summary (lm.diam)
##
## Call:
## lm(formula = age ~ diam)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.654 -9.819 1.603 8.960 47.820
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.8546 8.8564 3.032 0.00544 **
## diam 4.1143 0.3624 11.354 1.41e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.06 on 26 degrees of freedom
## Multiple R-squared: 0.8322, Adjusted R-squared: 0.8257
## F-statistic: 128.9 on 1 and 26 DF, p-value: 1.414e-11
# διαγνωστικά διαγράμματα του μοντέλου
plot(lm.diam)
### ΑΠΕΙΚΟΝΙΣΗ ΤΟΥ ΜΟΝΤΕΛΟΥ
### MAIN PLOT
plot(diam, age, xlab="Diameters", ylab="Ages",
main="Fit lines", xlim=c(0,50), ylim=c(20,200),
pch=23, bg = "blue", col="red", cex=1) # plot (x,y)
abline(lm.diam, lwd=2, col="red") # regression line (y~x)
#lines(lowess(diam,age), col="blue", lwd=2) # lowess line (x,y)
# απεικόνιση κατάλοιπων του μοντέλου
# calculate residuals and predicted values
res <- signif(residuals(lm.diam), 5)
pre <- predict(lm.diam) # plot distances between points and the regression line
segments(diam, age, diam, pre, col="red")
# add labels (res values) to points
library(calibrate)
## Loading required package: MASS
textxy(diam, age, res, cx=2, digits=1, offset = 1)
## Warning in text.default(X[posXposY], Y[posXposY], labs[posXposY], adj = c(0.5 -
## : "cx" is not a graphical parameter
## Warning in text.default(X[posXposY], Y[posXposY], labs[posXposY], adj = c(0.5 -
## : "digits" is not a graphical parameter
####### 2. ΥΠΟΕΝΟΤΗΤΑ ΠΟΛΛΑΠΛΗ ΓΡΑΜΜΙΚΗ ΠΑΝΙΔΡΟΜΗΣΗ
## ΕΙΣΑΓΩΓΗ ΑΡΧΕΙΟΥ csv
rank <- read.csv("G:/DATA/MULTIPLE REGRESSKION/ΕΠΙΔΟΣΕΙΣ ΜΑΘΗΤΩΝ/Student_Performance.csv")
rank
## Hours_Studied Previous_Score New_Score
## 1 7 91 100
## 2 4 65 82
## 3 8 45 98
## 4 5 36 82
## 5 7 66 99
## 6 3 61 88
## 7 7 63 98
## 8 8 42 100
## 9 5 61 90
## 10 4 50 80
## 11 8 84 100
## 12 8 73 100
## 13 3 27 47
## 14 6 33 87
## 15 5 68 91
## 16 2 43 52
## 17 8 57 97
## 18 6 70 99
## 19 2 30 40
## 20 5 63 85
## 21 1 71 75
## 22 6 85 96
## 23 9 73 100
## 24 1 57 60
## 25 3 35 61
## 26 7 49 62
## 27 4 66 79
## 28 9 83 100
## 29 3 74 86
## 30 5 74 90
## 31 3 39 61
## 32 7 36 83
## 33 5 58 75
## 34 9 47 100
## 35 7 60 89
## 36 2 74 78
## 37 4 42 59
## 38 9 68 100
## 39 2 32 39
## 40 9 64 99
## 41 5 45 68
## 42 2 39 43
## 43 4 58 73
## 44 7 36 86
## 45 8 71 99
## 46 3 54 76
## 47 1 17 20
## 48 4 54 73
## 49 2 58 65
## 50 8 53 90
## 51 4 27 44
## 52 2 65 75
## 53 6 75 90
## 54 6 52 86
## 55 4 78 93
## 56 6 91 99
## 57 2 33 40
## 58 2 47 57
## 59 4 78 95
## 60 8 38 78
## 61 7 70 94
## 62 9 98 100
## 63 8 87 99
## 64 5 49 73
## 65 2 41 51
## 66 5 71 88
## 67 8 54 85
## 68 9 42 96
## 69 9 91 100
## 70 1 61 68
## 71 9 74 99
## 72 7 54 86
## 73 9 81 100
## 74 8 52 90
## 75 1 65 70
## 76 8 36 86
## 77 8 61 99
## 78 3 35 54
## 79 1 15 17
## 80 8 88 100
## 81 3 45 60
## 82 3 49 68
## 83 1 33 59
## 84 5 60 83
## 85 7 71 98
## 86 9 81 100
## 87 7 67 95
## 88 9 95 100
## 89 8 58 92
## 90 2 29 39
## 91 1 21 25
## 92 7 38 78
## 93 7 60 90
## 94 8 76 96
## 95 5 69 89
## 96 3 30 50
## 97 8 57 86
## 98 6 81 95
## 99 3 36 50
## 100 1 25 30
## διαμόρφωση αντικειμένων
hours <- rank$Hours_Studied
prev <- rank$Previous_Score
new <- rank$New_Score
## δημιουργία data frame
rank.data <- data.frame(hours, prev, new)
# Προσαρμογή του μοντέλου πολλαπλής γραμμικής παλινδρόμησης
new.score <- lm(new~hours+prev)
# μετρικά του μοντέλου
summary(new.score)
##
## Call:
## lm(formula = new ~ hours + prev)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.2070 -5.2471 0.3944 6.2401 18.1917
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.29041 2.72177 8.557 1.73e-13 ***
## hours 4.54175 0.37013 12.271 < 2e-16 ***
## prev 0.55356 0.05088 10.881 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.482 on 97 degrees of freedom
## Multiple R-squared: 0.8439, Adjusted R-squared: 0.8407
## F-statistic: 262.3 on 2 and 97 DF, p-value: < 2.2e-16
### ΕΛΕΓΧΟΣ ΥΠΟΘΕΣΕΩΝ
##1. # Get the model residuals
model_residuals = new.score$residuals
# Plot the result
hist(model_residuals)
# Plot the residuals
qqnorm(model_residuals)
# Plot the Q-Q line
qqline(model_residuals, col="red", lwd=2)
# Shapiro-Wilks normality test
shapiro.test(residuals(new.score))
##
## Shapiro-Wilk normality test
##
## data: residuals(new.score)
## W = 0.98871, p-value = 0.562
### 3. Έλεγχος της παραδοχής πολλαπλής συνάφειας - Multicollinearity assumption
# Install and load the ggcorrplot package
#install.packages("ggcorrplot")
library(ggcorrplot)
## Loading required package: ggplot2
library(ggplot2)
# Compute correlation at 2 decimal places
corr_matrix = round(cor(rank.data), 2)
# Compute and show the result
ggcorrplot(corr_matrix, hc.order = TRUE, type = "lower",
lab = TRUE)
## Διαγνωστικά διαγράμματα
plot(new.score)
## FINAL PLOT
# 1. μοντελο νέα βαθμολογία με ώρα διαβάσματος
new.score.hours <- lm(new~hours) # για πλοτάρισμα του μοντέλου (y~x)
plot(hours, new,xlim=c(0,10))
abline(new.score.hours, lwd=2, col="red") # regression line (y~x))
axis(1, at = seq((min(1)),(max(10)), by = 1))
axis(2, at = seq((min(0)),(max(100)), by = 10))
# όριο για 50
abline (h=50, col="green", lwd=3, lty=4)
# οριο ωρών για 50
abline (v=1, col="blue", lwd=2, lty=2)
# 2. μοντελο νέα βαθμολογία με παλιά βαθμολογία
new.score.prev <- lm(new~prev) # για πλοτάρισμα του μοντέλου (y~x)
plot(prev, new)
abline(new.score.prev, lwd=2, col="red") # regression line (y~x))
axis(1, at = seq((min(0)),(max(100)), by = 10))
axis(2, at = seq((min(0)),(max(100)), by = 10))
# οριο για 50
abline (h=50, col="green", lwd=3, lty=4)
# οριο παλιάς βαθμολογίας για 50
abline (v=24, col="blue", lwd=2, lty=2)
# lm = y~x