This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

data1<- read.csv(file.choose())
summary(data1)
##     ï..Price         County            Size           Elevation     
##  Min.   : 1.70   Min.   :0.0000   Min.   :   6.90   Min.   : 0.000  
##  1st Qu.: 5.35   1st Qu.:0.0000   1st Qu.:  20.35   1st Qu.: 2.000  
##  Median :11.70   Median :1.0000   Median :  51.40   Median : 4.000  
##  Mean   :11.95   Mean   :0.6129   Mean   : 139.97   Mean   : 4.645  
##  3rd Qu.:16.05   3rd Qu.:1.0000   3rd Qu.: 104.10   3rd Qu.: 7.000  
##  Max.   :37.20   Max.   :1.0000   Max.   :1695.20   Max.   :20.000  
##      Sewer            Date             Flood           Distance     
##  Min.   :    0   Min.   :-103.00   Min.   :0.0000   Min.   : 0.000  
##  1st Qu.:    0   1st Qu.: -63.50   1st Qu.:0.0000   1st Qu.: 0.850  
##  Median :  900   Median : -59.00   Median :0.0000   Median : 4.900  
##  Mean   : 1981   Mean   : -58.65   Mean   :0.1613   Mean   : 5.132  
##  3rd Qu.: 3450   3rd Qu.: -51.00   3rd Qu.:0.0000   3rd Qu.: 5.500  
##  Max.   :10000   Max.   :  -4.00   Max.   :1.0000   Max.   :16.500
#drawing a histogram to check how the price of plot is distributed

hist(data1$ï..Price)

#Histogram is skewed. Looks the average price of the plot could be around 10k/acre
#but to have a better histogram; lets take a log to base 10 for Price. 
#just transforming the price factor

logprice <- log(data1$ï..Price)
hist(logprice, main= "History of Price taking log")
#taking lof of price helps to have a better histogram

#calling libraries
library(car)
## Warning: package 'car' was built under R version 3.4.2

library(corrplot) 
## Warning: package 'corrplot' was built under R version 3.4.2
## corrplot 0.84 loaded
library(visreg) 
## Warning: package 'visreg' was built under R version 3.4.2
library(rgl)
## Warning: package 'rgl' was built under R version 3.4.2
library(knitr)
## Warning: package 'knitr' was built under R version 3.4.2
library(scatterplot3d) #for 3Dfigures
library(GGally)
## Warning: package 'GGally' was built under R version 3.4.2
#drawing a scatter plot to check how the correlation is 
#distributed in the group. 
#Price is corelated to distance, elevation, sewer, date.

plot(data1, pch=10, col="blue")

ggpairs(data1)

#Lets draw a correlation plot in numbers
cordata1 <- cor(data1)
corrplot(cordata1, method= "number")

#there is some correlation between independant variables
#distance & country; flood & country; elevation & country

#drawing a scatter plot
?scatterplot
## starting httpd help server ...
##  done
scatterplot(data1$ï..Price, data1$Date, main="Price vs Date over time")

scatterplot(data1$ï..Price, data1$Size)

scatterplot(data1$ï..Price, data1$Elevation, main="Price Vs Elevation")

scatterplot(data1$ï..Price, data1$Distance)

#just to see how the data is correlated


attach(data1)


set.seed(1)

# Center predictors.
elevation.c <- scale(Elevation, center = T, scale = F)
date.c <- scale(Date, center = T, scale = F)
flood.c <- scale(Flood, center = T, scale = F)
distance.c <- scale(Distance, center = T, scale = F)

#just to try if center predictor will be of help

# bind these new variables into newdata and display a summary.
new.c.vars <- cbind(elevation.c, date.c, flood.c, distance.c)
data2 <- cbind(data1, new.c.vars)
data2
##    ï..Price County   Size Elevation Sewer Date Flood Distance          1
## 1       4.5      1  138.4        10  3000 -103     0      0.3  5.3548387
## 2      10.6      1   52.0         4     0 -103     0      2.5 -0.6451613
## 3       1.7      0   16.1         0  2640  -98     1     10.3 -4.6451613
## 4       5.0      0 1695.2         1  3500  -93     0     14.0 -3.6451613
## 5       5.0      0  845.0         1  1000  -92     1     14.0 -3.6451613
## 6       3.3      1    6.9         2 10000  -86     0      0.0 -2.6451613
## 7       5.7      1  105.9         4     0  -68     0      0.0 -0.6451613
## 8       6.2      1   56.6         4     0  -64     0      0.0 -0.6451613
## 9      19.4      1   51.4        20  1300  -63     0      1.2 15.3548387
## 10      3.2      1   22.1         0  6000  -62     0      0.0 -4.6451613
## 11      4.7      1   22.1         0  6000  -61     0      0.0 -4.6451613
## 12      6.9      1   27.7         3  4500  -60     0      0.0 -1.6451613
## 13      8.1      1   18.6         5  5000  -59     0      0.5  0.3548387
## 14     11.6      1   69.9         8     0  -59     0      4.4  3.3548387
## 15     19.3      1  145.7        10     0  -59     0      4.2  5.3548387
## 16     11.7      1   77.2         9     0  -59     0      4.5  4.3548387
## 17     13.3      1   26.2         8     0  -59     0      4.7  3.3548387
## 18     15.1      1  102.3         6     0  -59     0      4.9  1.3548387
## 19     12.4      1   49.5        11     0  -59     0      4.6  6.3548387
## 20     15.3      1   12.2         8     0  -59     0      5.0  3.3548387
## 21     12.2      0  320.6         0  4000  -54     0     16.5 -4.6451613
## 22     18.1      1    9.9         5     0  -54     0      5.2  0.3548387
## 23     16.8      1   15.3         2     0  -53     0      5.5 -2.6451613
## 24      5.9      0   55.2         0  1320  -49     1     11.9 -4.6451613
## 25      4.0      0  116.2         2   900  -45     1      5.5 -2.6451613
## 26     37.2      0   15.0         5     0  -39     0      7.2  0.3548387
## 27     18.2      0   23.4         5  4420  -39     0      5.5  0.3548387
## 28     15.1      0  132.8         2  2640  -35     0     10.2 -2.6451613
## 29     22.9      0   12.0         5  3400  -16     0      5.5  0.3548387
## 30     15.2      0   67.0         2   900   -5     1      5.5 -2.6451613
## 31     21.9      0   30.8         2   900   -4     0      5.5 -2.6451613
##              2          3           4
## 1  -44.3548387 -0.1612903 -4.83225806
## 2  -44.3548387 -0.1612903 -2.63225806
## 3  -39.3548387  0.8387097  5.16774194
## 4  -34.3548387 -0.1612903  8.86774194
## 5  -33.3548387  0.8387097  8.86774194
## 6  -27.3548387 -0.1612903 -5.13225806
## 7   -9.3548387 -0.1612903 -5.13225806
## 8   -5.3548387 -0.1612903 -5.13225806
## 9   -4.3548387 -0.1612903 -3.93225806
## 10  -3.3548387 -0.1612903 -5.13225806
## 11  -2.3548387 -0.1612903 -5.13225806
## 12  -1.3548387 -0.1612903 -5.13225806
## 13  -0.3548387 -0.1612903 -4.63225806
## 14  -0.3548387 -0.1612903 -0.73225806
## 15  -0.3548387 -0.1612903 -0.93225806
## 16  -0.3548387 -0.1612903 -0.63225806
## 17  -0.3548387 -0.1612903 -0.43225806
## 18  -0.3548387 -0.1612903 -0.23225806
## 19  -0.3548387 -0.1612903 -0.53225806
## 20  -0.3548387 -0.1612903 -0.13225806
## 21   4.6451613 -0.1612903 11.36774194
## 22   4.6451613 -0.1612903  0.06774194
## 23   5.6451613 -0.1612903  0.36774194
## 24   9.6451613  0.8387097  6.76774194
## 25  13.6451613  0.8387097  0.36774194
## 26  19.6451613 -0.1612903  2.06774194
## 27  19.6451613 -0.1612903  0.36774194
## 28  23.6451613 -0.1612903  5.06774194
## 29  42.6451613 -0.1612903  0.36774194
## 30  53.6451613  0.8387097  0.36774194
## 31  54.6451613 -0.1612903  0.36774194
names(data2)[9:12] <- c("elevation.c", "date.c", "flood.c", "distance.c" )





#Model1 - plain comparing with independent variables
mod1 <- lm(data2$ï..Price~ elevation.c+date.c+flood.c+distance.c)
summary(mod1)
## 
## Call:
## lm(formula = data2$ï..Price ~ elevation.c + date.c + flood.c + 
##     distance.c)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.311 -2.954 -1.040  1.273 18.915 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  11.9516     0.8958  13.342 3.86e-13 ***
## elevation.c   0.6861     0.2326   2.950  0.00664 ** 
## date.c        0.1907     0.0372   5.125 2.41e-05 ***
## flood.c      -6.9243     2.7845  -2.487  0.01964 *  
## distance.c    0.5933     0.2285   2.596  0.01531 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.988 on 26 degrees of freedom
## Multiple R-squared:  0.6378, Adjusted R-squared:  0.582 
## F-statistic: 11.44 on 4 and 26 DF,  p-value: 1.718e-05
#Rsquare is 0.582

#taking log of income 
mod2 <- lm(log(data2$ï..Price)~ elevation.c+date.c+flood.c+distance.c)
summary(mod2)
## 
## Call:
## lm(formula = log(data2$ï..Price) ~ elevation.c + date.c + flood.c + 
##     distance.c)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5099 -0.2090 -0.1097  0.1721  1.0031 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.25944    0.06478  34.879  < 2e-16 ***
## elevation.c  0.07459    0.01682   4.435 0.000149 ***
## date.c       0.01857    0.00269   6.902  2.5e-07 ***
## flood.c     -0.77886    0.20136  -3.868 0.000659 ***
## distance.c   0.05908    0.01653   3.575 0.001401 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3607 on 26 degrees of freedom
## Multiple R-squared:  0.7806, Adjusted R-squared:  0.7468 
## F-statistic: 23.12 on 4 and 26 DF,  p-value: 3.049e-08
#increased R square to 0.746. Taking log of price is giving a better model

#testing without center predictors
mod3 <- lm(data2$ï..Price~ Elevation+ Date+ Flood+Distance)
summary(mod3)
## 
## Call:
## lm(formula = data2$ï..Price ~ Elevation + Date + Flood + Distance)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.311 -2.954 -1.040  1.273 18.915 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  18.0191     2.9536   6.101 1.90e-06 ***
## Elevation     0.6861     0.2326   2.950  0.00664 ** 
## Date          0.1907     0.0372   5.125 2.41e-05 ***
## Flood        -6.9243     2.7845  -2.487  0.01964 *  
## Distance      0.5933     0.2285   2.596  0.01531 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.988 on 26 degrees of freedom
## Multiple R-squared:  0.6378, Adjusted R-squared:  0.582 
## F-statistic: 11.44 on 4 and 26 DF,  p-value: 1.718e-05
#R square is 0.58
#removing log reduced the R square

plot(mod2, pch=10, which = 1)

dwt(mod2)
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.1952503       2.37622   0.398
##  Alternative hypothesis: rho != 0
??dwtest

qqPlot(mod2)

outlierTest(mod2)
##   rstudent unadjusted p-value Bonferonni p
## 2 3.704906          0.0010527     0.032634
help("outlier.test")

#Lets take all the factors and see which has more P value

mod4 <- lm(data2$ï..Price ~ ., data=data1)
summary(mod4)
## 
## Call:
## lm(formula = data2$ï..Price ~ ., data = data1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.169 -2.957 -0.256  2.070 13.031 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.364e+01  3.829e+00   6.174 2.68e-06 ***
## County      -8.789e+00  3.652e+00  -2.407 0.024532 *  
## Size        -6.043e-03  3.501e-03  -1.726 0.097702 .  
## Elevation    5.193e-01  2.386e-01   2.177 0.040030 *  
## Sewer       -9.573e-04  4.169e-04  -2.296 0.031126 *  
## Date         8.508e-02  4.865e-02   1.749 0.093646 .  
## Flood       -1.202e+01  2.989e+00  -4.020 0.000536 ***
## Distance     1.858e-01  3.395e-01   0.547 0.589386    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.431 on 23 degrees of freedom
## Multiple R-squared:  0.747,  Adjusted R-squared:   0.67 
## F-statistic: 9.703 on 7 and 23 DF,  p-value: 1.351e-05
#We will take the variables for which the pvalue is less than 0.05
#looks like distance & size have more than 0.05 p value. 
#R square is 0.747. Therefore, our error is more than 1/4
#lets try with a different value eliminating the distance & size
#and see if our R square increases

mod5 <- lm(data1$ï..Price ~ . -Distance - Size, data=data1)
summary(mod5)
## 
## Call:
## lm(formula = data1$ï..Price ~ . - Distance - Size, data = data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.5688 -2.7883 -0.3453  1.9312 14.4498 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.497e+01  2.597e+00   9.614 7.05e-10 ***
## County      -7.439e+00  2.383e+00  -3.122  0.00450 ** 
## Elevation    5.291e-01  2.397e-01   2.207  0.03671 *  
## Sewer       -9.513e-04  3.800e-04  -2.504  0.01919 *  
## Date         1.247e-01  3.840e-02   3.249  0.00330 ** 
## Flood       -1.064e+01  2.871e+00  -3.707  0.00105 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.518 on 25 degrees of freedom
## Multiple R-squared:  0.7143, Adjusted R-squared:  0.6571 
## F-statistic:  12.5 on 5 and 25 DF,  p-value: 3.831e-06
#R square of 0.657. Lets see if this can be slightly improved

  
?par

par(mfrow= c(2,2))
plot(mod5)

#26 row seems to be an outlier; lets try removing this row 26 alone
#to increase our model

data3 <-data1[-26,]
data3
##    ï..Price County   Size Elevation Sewer Date Flood Distance
## 1       4.5      1  138.4        10  3000 -103     0      0.3
## 2      10.6      1   52.0         4     0 -103     0      2.5
## 3       1.7      0   16.1         0  2640  -98     1     10.3
## 4       5.0      0 1695.2         1  3500  -93     0     14.0
## 5       5.0      0  845.0         1  1000  -92     1     14.0
## 6       3.3      1    6.9         2 10000  -86     0      0.0
## 7       5.7      1  105.9         4     0  -68     0      0.0
## 8       6.2      1   56.6         4     0  -64     0      0.0
## 9      19.4      1   51.4        20  1300  -63     0      1.2
## 10      3.2      1   22.1         0  6000  -62     0      0.0
## 11      4.7      1   22.1         0  6000  -61     0      0.0
## 12      6.9      1   27.7         3  4500  -60     0      0.0
## 13      8.1      1   18.6         5  5000  -59     0      0.5
## 14     11.6      1   69.9         8     0  -59     0      4.4
## 15     19.3      1  145.7        10     0  -59     0      4.2
## 16     11.7      1   77.2         9     0  -59     0      4.5
## 17     13.3      1   26.2         8     0  -59     0      4.7
## 18     15.1      1  102.3         6     0  -59     0      4.9
## 19     12.4      1   49.5        11     0  -59     0      4.6
## 20     15.3      1   12.2         8     0  -59     0      5.0
## 21     12.2      0  320.6         0  4000  -54     0     16.5
## 22     18.1      1    9.9         5     0  -54     0      5.2
## 23     16.8      1   15.3         2     0  -53     0      5.5
## 24      5.9      0   55.2         0  1320  -49     1     11.9
## 25      4.0      0  116.2         2   900  -45     1      5.5
## 27     18.2      0   23.4         5  4420  -39     0      5.5
## 28     15.1      0  132.8         2  2640  -35     0     10.2
## 29     22.9      0   12.0         5  3400  -16     0      5.5
## 30     15.2      0   67.0         2   900   -5     1      5.5
## 31     21.9      0   30.8         2   900   -4     0      5.5
str(data3)
## 'data.frame':    30 obs. of  8 variables:
##  $ ï..Price : num  4.5 10.6 1.7 5 5 3.3 5.7 6.2 19.4 3.2 ...
##  $ County   : int  1 1 0 0 0 1 1 1 1 1 ...
##  $ Size     : num  138.4 52 16.1 1695.2 845 ...
##  $ Elevation: int  10 4 0 1 1 2 4 4 20 0 ...
##  $ Sewer    : int  3000 0 2640 3500 1000 10000 0 0 1300 6000 ...
##  $ Date     : int  -103 -103 -98 -93 -92 -86 -68 -64 -63 -62 ...
##  $ Flood    : int  0 0 1 0 1 0 0 0 0 0 ...
##  $ Distance : num  0.3 2.5 10.3 14 14 0 0 0 1.2 0 ...
str(data1)
## 'data.frame':    31 obs. of  8 variables:
##  $ ï..Price : num  4.5 10.6 1.7 5 5 3.3 5.7 6.2 19.4 3.2 ...
##  $ County   : int  1 1 0 0 0 1 1 1 1 1 ...
##  $ Size     : num  138.4 52 16.1 1695.2 845 ...
##  $ Elevation: int  10 4 0 1 1 2 4 4 20 0 ...
##  $ Sewer    : int  3000 0 2640 3500 1000 10000 0 0 1300 6000 ...
##  $ Date     : int  -103 -103 -98 -93 -92 -86 -68 -64 -63 -62 ...
##  $ Flood    : int  0 0 1 0 1 0 0 0 0 0 ...
##  $ Distance : num  0.3 2.5 10.3 14 14 0 0 0 1.2 0 ...
mod6 <- lm(log(ï..Price) ~ . -Distance - Size, data= data3)
summary(mod6)
## 
## Call:
## lm(formula = log(ï..Price) ~ . - Distance - Size, data = data3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54690 -0.21040  0.01803  0.23982  0.56446 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.419e+00  2.005e-01  17.056 6.42e-15 ***
## County      -3.592e-01  1.857e-01  -1.934 0.065025 .  
## Elevation    4.525e-02  1.763e-02   2.567 0.016920 *  
## Sewer       -9.915e-05  2.848e-05  -3.482 0.001926 ** 
## Date         1.403e-02  2.825e-03   4.965 4.54e-05 ***
## Flood       -9.153e-01  2.198e-01  -4.164 0.000347 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3321 on 24 degrees of freedom
## Multiple R-squared:  0.8041, Adjusted R-squared:  0.7633 
## F-statistic:  19.7 on 5 and 24 DF,  p-value: 8.585e-08
#We have an improved R Square of 0.763. The looks like a better model

#trying it with step function. 

step(lm(ï..Price ~ ., data=data3))
## Start:  AIC=75.45
## ï..Price ~ County + Size + Elevation + Sewer + Date + Flood + 
##     Distance
## 
##             Df Sum of Sq    RSS    AIC
## - County     1     8.266 225.90 74.566
## <none>                   217.63 75.448
## - Size       1    18.032 235.66 75.836
## - Sewer      1    26.544 244.17 76.901
## - Distance   1    27.596 245.23 77.030
## - Elevation  1   100.809 318.44 84.867
## - Flood      1   116.091 333.72 86.273
## - Date       1   127.501 345.13 87.282
## 
## Step:  AIC=74.57
## ï..Price ~ Size + Elevation + Sewer + Date + Flood + Distance
## 
##             Df Sum of Sq    RSS    AIC
## - Size       1    12.890 238.79 74.231
## <none>                   225.90 74.566
## - Sewer      1    18.645 244.54 74.946
## - Distance   1    85.238 311.14 82.171
## - Elevation  1    98.595 324.49 83.432
## - Flood      1   126.247 352.14 85.885
## - Date       1   308.603 534.50 98.404
## 
## Step:  AIC=74.23
## ï..Price ~ Elevation + Sewer + Date + Flood + Distance
## 
##             Df Sum of Sq    RSS     AIC
## <none>                   238.79  74.231
## - Sewer      1     20.87 259.66  74.745
## - Distance   1     78.70 317.48  80.777
## - Elevation  1    101.70 340.49  82.875
## - Flood      1    115.21 354.00  84.043
## - Date       1    451.51 690.29 104.078
## 
## Call:
## lm(formula = ï..Price ~ Elevation + Sewer + Date + Flood + Distance, 
##     data = data3)
## 
## Coefficients:
## (Intercept)    Elevation        Sewer         Date        Flood  
##  17.9571991    0.5422328   -0.0004005    0.1627357   -6.1840320  
##    Distance  
##   0.4229008
mod9 <- lm(ï..Price ~ . -Distance - Size, data= data3)
summary(mod9)
## 
## Call:
## lm(formula = ï..Price ~ . - Distance - Size, data = data3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.0186 -2.2651 -0.3114  2.1549  5.1596 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 22.0187525  1.9634490  11.214 5.01e-11 ***
## County      -4.4613706  1.8189990  -2.453  0.02183 *  
## Elevation    0.5086667  0.1726287   2.947  0.00704 ** 
## Sewer       -0.0006846  0.0002789  -2.455  0.02173 *  
## Date         0.1308357  0.0276699   4.728 8.28e-05 ***
## Flood       -7.6795702  2.1524916  -3.568  0.00156 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.252 on 24 degrees of freedom
## Multiple R-squared:  0.7747, Adjusted R-squared:  0.7278 
## F-statistic: 16.51 on 5 and 24 DF,  p-value: 4.372e-07
#We have an R square of 0.7278. since, we have a better results of mod6, we will choose mod6

?predict
lesiesaltprice <- predict(mod6)
lesiesaltprice
##         1         2         3         4         5         6         7 
## 1.7704282 1.7963983 0.8676524 1.8130681 1.1596736 0.9528662 2.2873697 
##         8         9        10        11        12        13        14 
## 2.3434807 2.9525663 1.5956419 1.6096697 1.9081652 1.9631118 2.5946080 
##        15        16        17        18        19        20        21 
## 2.6851022 2.6398551 2.5946080 2.5041137 2.7303493 2.5946080 2.2653279 
##        22        23        24        25        27        28        29 
## 2.5290053 2.4072917 1.6858916 1.8741403 2.6603364 2.7571948 3.0841087 
##        30        31 
## 2.4352504 3.3645778

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).