This report was automatically generated with the R package knitr
(version 0.8).

install.packages("knitr")
## Installing package(s) into 'C:/Users/Tom/Documents/R/win-library/2.14' (as
## 'lib' is unspecified)
## Warning in install.packages :
##   package 'knitr' is in use and will not be installed
library("knitr")

setwd("F:/Documents/Work/Courses/Stats_LUND_2012_October/Exercises/Ex_01")

tits <- read.csv("tits.csv")

names(tits)
## [1] "spe" "wei" "egg"

tits
##    spe  wei egg
## 1   TX 17.3  10
## 2   TX 17.2   6
## 3   TX 14.5   9
## 4   TX 17.8   4
## 5   TX 18.0   7
## 6   TX 18.3   8
## 7   BM  9.3  10
## 8   TX 15.1   9
## 9   TX   NA  10
## 10  TX 15.8  10
## 11  TX 16.9   7
## 12  TX 16.2   9
## 13  BM 11.0  11
## 14  TX 16.8   7
## 15  TX 16.3  11
## 16  TX   NA   7
## 17  BM 11.8  13
## 18  TX 16.5  11
## 19  TX   NA   6
## 20  BM 11.9  12
## 21  TX 17.8   9
## 22  BM 10.6  10
## 23  TX 17.6   6
## 24  TX 18.8  10
## 25  TX 19.2  11
## 26  TX 16.5   9
## 27  BM 11.9  11
## 28  TX   NA   9
## 29  BM 11.9  10
## 30  TX 19.5   8
## 31  BM 10.8  13
## 32  TX 18.1   8
## 33  TX 18.6   9
## 34  TX 18.8   9
## 35  TX   NA   8
## 36  TX   NA   7
## 37  TX 19.2   9
## 38  TX   NA   8
## 39  TX   NA   8
## 40  TX   NA   5
## 41  BM 11.2  12
## 42  TX 18.8   7
## 43  BM 12.3  13
## 44  TX 19.3  11
## 45  TX   NA  10
## 46  TX 18.7   7
## 47  BM 11.3  12
## 48  TX   NA   9
## 49  TX 18.7  10
## 50  TX 17.5   7
## 51  TX   NA   6
## 52  TX   NA   6
## 53  TX   NA   9
## 54  TX   NA   8
## 55  TX 16.9   8
## 56  TX 18.0  11
## 57  TX 18.3  10
## 58  TX 17.6  10
## 59  TX   NA   8
## 60  BM  8.3   8
## 61  TX   NA   9
## 62  TX 17.5   8
## 63  TX   NA  11
## 64  TX 17.5  11
## 65  TX 19.5   8
## 66  BM 11.1  13
## 67  TX 18.8  11
## 68  TX 15.9   7
## 69  TX   NA   9
## 70  TX 17.0   7
## 71  TX 16.6   9
## 72  TX 16.8   9
## 73  BM 11.7   9
## 74  BM 11.9  13
## 75  BM 12.0  11
## 76  TX 18.6  12
## 77  TX   NA   7
## 78  TX 18.3   9
## 79  BM   NA   7
## 80  TX   NA   6
## 81  BM 11.7  11
## 82  TX 15.8  11
## 83  TX 13.2  10
## 84  TX   NA   6
## 85  BM   NA  10
## 86  TX   NA   9
## 87  BM   NA  10
## 88  BM 10.2  12
## 89  TX 13.4   9
## 90  TX 17.3  11

factor(tits$spe)  #to declare a variable as a factor - using read.csv this is automatically done, however if that were not the case, you can use this.
##  [1] TX TX TX TX TX TX BM TX TX TX TX TX BM TX TX TX BM TX TX BM TX BM TX
## [24] TX TX TX BM TX BM TX BM TX TX TX TX TX TX TX TX TX BM TX BM TX TX TX
## [47] BM TX TX TX TX TX TX TX TX TX TX TX TX BM TX TX TX TX TX BM TX TX TX
## [70] TX TX TX BM BM BM TX TX TX BM TX BM TX TX TX BM TX BM BM TX TX
## Levels: BM TX

# 5.
tits$names <- ifelse(tits$spe == "TX", "Great tit", "Blue tit")  #create new vairable containing full common names for species, as only two can use ifelse command

# 6. alternatively do one at a time, if you have more than two
# possibilties
tits$names[tits$spe == "TX"] <- "Great tit"
tits$names[tits$spe == "BM"] <- "Blue tit"

# 7. some summary statistics
tits$names <- as.factor(tits$names)  #make names a factor first
summary(tits)
##  spe          wei            egg              names   
##  BM:21   Min.   : 8.3   Min.   : 4.00   Blue tit :21  
##  TX:69   1st Qu.:12.0   1st Qu.: 8.00   Great tit:69  
##          Median :16.9   Median : 9.00                 
##          Mean   :15.7   Mean   : 9.12                 
##          3rd Qu.:18.1   3rd Qu.:11.00                 
##          Max.   :19.5   Max.   :13.00                 
##          NA's   :26.0
mean(tits$egg)
## [1] 9.122
sapply(tits, mean, na.rm = TRUE)  #use sapply, applying mean function to each column of data frame
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
##    spe    wei    egg  names 
##     NA 15.652  9.122     NA

# 8. To get values by species, use 'describe.by' function, available in
# package 'psych'
install.packages("psych")
## Installing package(s) into 'C:/Users/Tom/Documents/R/win-library/2.14' (as
## 'lib' is unspecified)
## package 'psych' successfully unpacked and MD5 sums checked
## 
## The downloaded packages are in
##  C:\Users\Tom\AppData\Local\Temp\RtmpCAEA9u\downloaded_packages
library("psych")
## Warning: package 'psych' was built under R version 2.14.2
describe.by(tits, tits$names)  #each vairable is summarised, subset by species
## Error: could not find function "describe.by"

# 9. t-tests to compare the species, using form t.test(y~x)
t.test(tits$wei ~ tits$names)
## 
##  Welch Two Sample t-test
## 
## data:  tits$wei by tits$names 
## t = -19.1, df = 44.22, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0 
## 95 percent confidence interval:
##  -6.907 -5.588 
## sample estimates:
##  mean in group Blue tit mean in group Great tit 
##                   11.16                   17.41
t.test(tits$egg ~ tits$names)
## 
##  Welch Two Sample t-test
## 
## data:  tits$egg by tits$names 
## t = 5.751, df = 33.53, p-value = 1.901e-06
## alternative hypothesis: true difference in means is not equal to 0 
## 95 percent confidence interval:
##  1.583 3.315 
## sample estimates:
##  mean in group Blue tit mean in group Great tit 
##                  11.000                   8.551

# both are significantly different

# 10. scatterplot of egg by weight
plot(tits$wei, tits$eg, col = as.numeric(tits$spe))  #colouring data points by species.

plot of chunk auto-report


# 11. subsetting data by species:
great <- subset(tits, tits$spe == "TX")
blue <- subset(tits, tits$spe == "BM")

# 12. now build up plot above, but adding the data from one afterwards:
plot(great$egg ~ great$wei, xlab = "weight", ylab = "eggs", xlim = range(tits$wei, 
    na.rm = T), ylim = range(tits$egg, na.rm = T))
points(blue$egg ~ blue$wei, col = "red")
lm.1 <- lm(tits$egg ~ tits$wei)  #make a regression object, used to apply a best fit line
abline(lm.1, lty = 2, lwd = 2)  #add the above regression line to the plot

# 13. can change colour of line with lwd, lty, col etc.
abline(lm.1, lwd = 4)
abline(lm.1, lwd = 2, col = "red")
abline(lm.1, lty = 2)

plot of chunk auto-report


# 14. read in new data-set
xydata <- read.csv("xydata.csv")

names(xydata)
## [1] "x" "y"

summary(xydata)
##        x               y       
##  Min.   : 3.61   Min.   :3.32  
##  1st Qu.:21.25   1st Qu.:4.55  
##  Median :29.06   Median :4.78  
##  Mean   :27.91   Mean   :4.69  
##  3rd Qu.:34.09   3rd Qu.:4.92  
##  Max.   :48.32   Max.   :5.21

plot(y ~ x, data = xydata)
lm.2 <- lm(y ~ x, data = xydata)
abline(lm.2, lty = 2, lwd = 2, col = "blue")

plot of chunk auto-report


# is evidence that there is not a direct linear relationship, with lower
# values of x below the regression line, and higher values of x, also
# below the line, surgesting a curvilinear relationship. We therefore try
# taking the natural logarithm of x then plot again

xydata$ln.x <- log(xydata$x)
plot(y ~ ln.x, data = xydata)
lm.3 <- lm(y ~ ln.x, data = xydata)
abline(lm.3, lwd = 2, lty = 2, col = "blue")

plot of chunk auto-report

# this second model is much improved, the points falling much more nicely
# along the line.

The R session information (including the OS info, R version and all
packages used):

sessionInfo()
## R version 2.14.1 (2011-12-22)
## Platform: x86_64-pc-mingw32/x64 (64-bit)
## 
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252 
## [2] LC_CTYPE=English_United Kingdom.1252   
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C                           
## [5] LC_TIME=English_United Kingdom.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] psych_1.2.8 knitr_0.8  
## 
## loaded via a namespace (and not attached):
## [1] digest_0.5.2   evaluate_0.4.2 formatR_0.6    plyr_1.7.1    
## [5] stringr_0.6.1  tools_2.14.1
Sys.time()
## [1] "2012-10-22 15:00:07 CEST"