url : http://college.cengage.com/mathematics/brase/understandable_statistics/7e/students/datasets/mlr/excel/mlr01.xls
library(ggplot2)
library(readxl)
library(gdata)
## gdata: Unable to locate valid perl interpreter
## gdata:
## gdata: read.xls() will be unable to read Excel XLS and XLSX files
## gdata: unless the 'perl=' argument is used to specify the location
## gdata: of a valid perl intrpreter.
## gdata:
## gdata: (To avoid display of this message in the future, please
## gdata: ensure perl is installed and available on the executable
## gdata: search path.)
## gdata: Unable to load perl libaries needed by read.xls()
## gdata: to support 'XLX' (Excel 97-2004) files.
##
## gdata: Unable to load perl libaries needed by read.xls()
## gdata: to support 'XLSX' (Excel 2007+) files.
##
## gdata: Run the function 'installXLSXsupport()'
## gdata: to automatically download and install the perl
## gdata: libaries needed to support Excel XLS and XLSX formats.
##
## Attaching package: 'gdata'
## The following object is masked from 'package:stats':
##
## nobs
## The following object is masked from 'package:utils':
##
## object.size
## The following object is masked from 'package:base':
##
## startsWith
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
datalink <- "http://college.cengage.com/mathematics/brase/understandable_statistics/7e/students/datasets/mlr/excel/mlr01.xls"
#download the data into an excell file, using gdata
download.file(datalink,destfile = "./file.xls", mode = 'wb')
Read data from file
df <- read_xls("file.xls")
head(df)
## # A tibble: 6 x 4
## X1 X2 X3 X4
## <dbl> <dbl> <dbl> <dbl>
## 1 2.90 9.20 13.2 2.00
## 2 2.40 8.70 11.5 3.00
## 3 2.00 7.20 10.8 4.00
## 4 2.30 8.50 12.3 2.00
## 5 3.20 9.60 12.6 3.00
## 6 1.90 6.80 10.6 5.00
The first column shows the number of fawn in a given spring (fawn are baby
Antelope). The second column shows the population of adult antelope, the third
shows the annual precipitation that year, and finally, the last column shows how bad the winter was during that year
str(df)
## Classes 'tbl_df', 'tbl' and 'data.frame': 8 obs. of 4 variables:
## $ X1: num 2.9 2.4 2 2.3 3.2 ...
## $ X2: num 9.2 8.7 7.2 8.5 9.6 ...
## $ X3: num 13.2 11.5 10.8 12.3 12.6 ...
## $ X4: num 2 3 4 2 3 5 1 3
The dataframe has 8 year observation with 4 variables
#rename the columns
columns <- c("No_of_fawn", "adult_population","annual_precipitation","winter_condition")
colnames(df) <- columns
head(df)
## # A tibble: 6 x 4
## No_of_fawn adult_population annual_precipitation winter_condition
## <dbl> <dbl> <dbl> <dbl>
## 1 2.90 9.20 13.2 2.00
## 2 2.40 8.70 11.5 3.00
## 3 2.00 7.20 10.8 4.00
## 4 2.30 8.50 12.3 2.00
## 5 3.20 9.60 12.6 3.00
## 6 1.90 6.80 10.6 5.00
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
g <- ggplot(df, aes(x=adult_population, y=No_of_fawn)) + geom_point() + theme_classic()
h <- ggplot(df, aes(x=annual_precipitation, y=No_of_fawn)) + geom_point() + theme_classic()
i <- ggplot(df, aes(x=winter_condition, y=No_of_fawn)) + geom_point() + theme_classic()
plot_grid(g,h,i, nrow = 1, ncol = 3, labels = "AUTO")
pairs.panels(df)
ggplot(df, aes(x=adult_population, y=No_of_fawn)) + geom_point(aes(color = annual_precipitation, size=winter_condition)) + ggtitle("baby fawns versus adult antelope population") + theme_classic()
#linear model with one variable to predict the number of fawns
temp <- df[,c(1,4)]
model1 <- lm(No_of_fawn ~ ., data = temp) #using winter condition to predict number of fawns
summary(model1)
##
## Call:
## lm(formula = No_of_fawn ~ ., data = temp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52069 -0.20431 -0.00172 0.13017 0.71724
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.4966 0.3904 8.957 0.000108 ***
## winter_condition -0.3379 0.1258 -2.686 0.036263 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.415 on 6 degrees of freedom
## Multiple R-squared: 0.5459, Adjusted R-squared: 0.4702
## F-statistic: 7.213 on 1 and 6 DF, p-value: 0.03626
anova(model1)
## Analysis of Variance Table
##
## Response: No_of_fawn
## Df Sum Sq Mean Sq F value Pr(>F)
## winter_condition 1 1.2419 1.24190 7.2126 0.03626 *
## Residuals 6 1.0331 0.17218
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
temp <- df[,-3]
model2 <- lm(No_of_fawn ~ ., data = temp) #using winter condition and adult population to predict number of fawns
summary(model2)
##
## Call:
## lm(formula = No_of_fawn ~ ., data = temp)
##
## Residuals:
## 1 2 3 4 5 6 7 8
## 0.01231 -0.27531 0.10301 -0.19154 0.01535 0.15880 0.29992 -0.12256
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.46009 1.53443 -1.603 0.1698
## adult_population 0.56594 0.14439 3.920 0.0112 *
## winter_condition 0.07058 0.12461 0.566 0.5956
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2252 on 5 degrees of freedom
## Multiple R-squared: 0.8885, Adjusted R-squared: 0.8439
## F-statistic: 19.92 on 2 and 5 DF, p-value: 0.004152
anova(model2)
## Analysis of Variance Table
##
## Response: No_of_fawn
## Df Sum Sq Mean Sq F value Pr(>F)
## adult_population 1 2.00505 2.00505 39.5204 0.001497 **
## winter_condition 1 0.01628 0.01628 0.3208 0.595580
## Residuals 5 0.25367 0.05073
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#using all the three variables
model3 <- lm(No_of_fawn ~ ., data = df)
summary(model3)
##
## Call:
## lm(formula = No_of_fawn ~ ., data = df)
##
## Residuals:
## 1 2 3 4 5 6 7 8
## -0.11533 -0.02661 0.09882 -0.11723 0.02734 -0.04854 0.11715 0.06441
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.92201 1.25562 -4.716 0.0092 **
## adult_population 0.33822 0.09947 3.400 0.0273 *
## annual_precipitation 0.40150 0.10990 3.653 0.0217 *
## winter_condition 0.26295 0.08514 3.089 0.0366 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1209 on 4 degrees of freedom
## Multiple R-squared: 0.9743, Adjusted R-squared: 0.955
## F-statistic: 50.52 on 3 and 4 DF, p-value: 0.001229
anova(model3)
## Analysis of Variance Table
##
## Response: No_of_fawn
## Df Sum Sq Mean Sq F value Pr(>F)
## adult_population 1 2.00505 2.00505 137.1117 0.0003042 ***
## annual_precipitation 1 0.07196 0.07196 4.9210 0.0907804 .
## winter_condition 1 0.13949 0.13949 9.5391 0.0366262 *
## Residuals 4 0.05849 0.01462
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1