Read in data from the following URL

url : http://college.cengage.com/mathematics/brase/understandable_statistics/7e/students/datasets/mlr/excel/mlr01.xls

library(ggplot2)
library(readxl)
library(gdata)
## gdata: Unable to locate valid perl interpreter
## gdata: 
## gdata: read.xls() will be unable to read Excel XLS and XLSX files
## gdata: unless the 'perl=' argument is used to specify the location
## gdata: of a valid perl intrpreter.
## gdata: 
## gdata: (To avoid display of this message in the future, please
## gdata: ensure perl is installed and available on the executable
## gdata: search path.)
## gdata: Unable to load perl libaries needed by read.xls()
## gdata: to support 'XLX' (Excel 97-2004) files.
## 
## gdata: Unable to load perl libaries needed by read.xls()
## gdata: to support 'XLSX' (Excel 2007+) files.
## 
## gdata: Run the function 'installXLSXsupport()'
## gdata: to automatically download and install the perl
## gdata: libaries needed to support Excel XLS and XLSX formats.
## 
## Attaching package: 'gdata'
## The following object is masked from 'package:stats':
## 
##     nobs
## The following object is masked from 'package:utils':
## 
##     object.size
## The following object is masked from 'package:base':
## 
##     startsWith
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
datalink <- "http://college.cengage.com/mathematics/brase/understandable_statistics/7e/students/datasets/mlr/excel/mlr01.xls"

#download the data into an excell file, using gdata
download.file(datalink,destfile = "./file.xls", mode = 'wb')

Read data from file

df <- read_xls("file.xls")
head(df)
## # A tibble: 6 x 4
##      X1    X2    X3    X4
##   <dbl> <dbl> <dbl> <dbl>
## 1  2.90  9.20  13.2  2.00
## 2  2.40  8.70  11.5  3.00
## 3  2.00  7.20  10.8  4.00
## 4  2.30  8.50  12.3  2.00
## 5  3.20  9.60  12.6  3.00
## 6  1.90  6.80  10.6  5.00

Data Description

The first column shows the number of fawn in a given spring (fawn are baby
Antelope). The second column shows the population of adult antelope, the third
shows the annual precipitation that year, and finally, the last column shows how bad the winter was during that year

str(df)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8 obs. of  4 variables:
##  $ X1: num  2.9 2.4 2 2.3 3.2 ...
##  $ X2: num  9.2 8.7 7.2 8.5 9.6 ...
##  $ X3: num  13.2 11.5 10.8 12.3 12.6 ...
##  $ X4: num  2 3 4 2 3 5 1 3

The dataframe has 8 year observation with 4 variables

#rename the columns
columns <- c("No_of_fawn", "adult_population","annual_precipitation","winter_condition")
colnames(df) <- columns

head(df)
## # A tibble: 6 x 4
##   No_of_fawn adult_population annual_precipitation winter_condition
##        <dbl>            <dbl>                <dbl>            <dbl>
## 1       2.90             9.20                 13.2             2.00
## 2       2.40             8.70                 11.5             3.00
## 3       2.00             7.20                 10.8             4.00
## 4       2.30             8.50                 12.3             2.00
## 5       3.20             9.60                 12.6             3.00
## 6       1.90             6.80                 10.6             5.00
library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
g <- ggplot(df, aes(x=adult_population, y=No_of_fawn)) + geom_point() + theme_classic()
h <- ggplot(df, aes(x=annual_precipitation, y=No_of_fawn)) + geom_point() + theme_classic()
i <- ggplot(df, aes(x=winter_condition, y=No_of_fawn)) + geom_point() + theme_classic()

plot_grid(g,h,i, nrow = 1, ncol = 3, labels = "AUTO")

pair plots to check the correlation of the variables using psych

pairs.panels(df)

ggplot(df, aes(x=adult_population, y=No_of_fawn)) + geom_point(aes(color = annual_precipitation, size=winter_condition)) + ggtitle("baby fawns versus adult antelope population") + theme_classic()

Model

#linear model with one variable to predict the number of fawns
temp <- df[,c(1,4)]

model1 <- lm(No_of_fawn ~ ., data = temp) #using winter condition to predict number of fawns
summary(model1)
## 
## Call:
## lm(formula = No_of_fawn ~ ., data = temp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.52069 -0.20431 -0.00172  0.13017  0.71724 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        3.4966     0.3904   8.957 0.000108 ***
## winter_condition  -0.3379     0.1258  -2.686 0.036263 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.415 on 6 degrees of freedom
## Multiple R-squared:  0.5459, Adjusted R-squared:  0.4702 
## F-statistic: 7.213 on 1 and 6 DF,  p-value: 0.03626
anova(model1)
## Analysis of Variance Table
## 
## Response: No_of_fawn
##                  Df Sum Sq Mean Sq F value  Pr(>F)  
## winter_condition  1 1.2419 1.24190  7.2126 0.03626 *
## Residuals         6 1.0331 0.17218                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
temp <- df[,-3]

model2 <- lm(No_of_fawn ~ ., data = temp) #using winter condition and adult population to predict number of fawns
summary(model2)
## 
## Call:
## lm(formula = No_of_fawn ~ ., data = temp)
## 
## Residuals:
##        1        2        3        4        5        6        7        8 
##  0.01231 -0.27531  0.10301 -0.19154  0.01535  0.15880  0.29992 -0.12256 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -2.46009    1.53443  -1.603   0.1698  
## adult_population  0.56594    0.14439   3.920   0.0112 *
## winter_condition  0.07058    0.12461   0.566   0.5956  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2252 on 5 degrees of freedom
## Multiple R-squared:  0.8885, Adjusted R-squared:  0.8439 
## F-statistic: 19.92 on 2 and 5 DF,  p-value: 0.004152
anova(model2)
## Analysis of Variance Table
## 
## Response: No_of_fawn
##                  Df  Sum Sq Mean Sq F value   Pr(>F)   
## adult_population  1 2.00505 2.00505 39.5204 0.001497 **
## winter_condition  1 0.01628 0.01628  0.3208 0.595580   
## Residuals         5 0.25367 0.05073                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#using all the three variables
model3 <- lm(No_of_fawn ~ ., data = df)
summary(model3)
## 
## Call:
## lm(formula = No_of_fawn ~ ., data = df)
## 
## Residuals:
##        1        2        3        4        5        6        7        8 
## -0.11533 -0.02661  0.09882 -0.11723  0.02734 -0.04854  0.11715  0.06441 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)   
## (Intercept)          -5.92201    1.25562  -4.716   0.0092 **
## adult_population      0.33822    0.09947   3.400   0.0273 * 
## annual_precipitation  0.40150    0.10990   3.653   0.0217 * 
## winter_condition      0.26295    0.08514   3.089   0.0366 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1209 on 4 degrees of freedom
## Multiple R-squared:  0.9743, Adjusted R-squared:  0.955 
## F-statistic: 50.52 on 3 and 4 DF,  p-value: 0.001229
anova(model3)
## Analysis of Variance Table
## 
## Response: No_of_fawn
##                      Df  Sum Sq Mean Sq  F value    Pr(>F)    
## adult_population      1 2.00505 2.00505 137.1117 0.0003042 ***
## annual_precipitation  1 0.07196 0.07196   4.9210 0.0907804 .  
## winter_condition      1 0.13949 0.13949   9.5391 0.0366262 *  
## Residuals             4 0.05849 0.01462                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1