setwd("F:/Documents/Work/Courses/Stats_LUND_2012_October/Exercises/Ex_02")

# 1.
gud56 <- read.csv("GUD56.csv")

# 2.
gud34 <- read.csv("GUD34.csv")

# 3. combine the two dataframes
gud <- rbind(gud56, gud34)

# 4.
summary(gud)
##       twig           yr              pid           gud       
##  O37    :  4   Min.   :93.0   ROG   BOG: 32   Min.   : 0.00  
##  O38    :  4   1st Qu.:93.0   RGR   BYL: 15   1st Qu.: 0.29  
##  O50    :  4   Median :95.0   YRY   BMY: 13   Median : 0.88  
##  O54    :  4   Mean   :94.5   RLB   BOO: 13   Mean   : 1.51  
##  O1     :  3   3rd Qu.:96.0   ROV   RLL: 13   3rd Qu.: 2.08  
##  O2     :  3   Max.   :96.0   YGB   BGG: 13   Max.   :11.71  
##  (Other):201                  (Other)  :124
names(gud)
## [1] "twig" "yr"   "pid"  "gud"

# Make a new dataframe called aveGUD containing the identity of the pairs,
# and the average values of gud for each pair (averaged across years).
aveGUD <- aggregate(gud, by = list(gud$pid), FUN = mean, na.rm = TRUE)
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA

# To check the warnings from above. (now suppressed) warnings()

# 5. get errors above from trying to calculate mean for non numeric data.
# Remove the non-numeric twig data
aveGUD$twig <- NULL

# Assign pair ID information back to 'pid' column, copying data from
# 'Group.1', and then removing the 'Group.1' column.
aveGUD$pid <- aveGUD$Group.1
aveGUD$Group.1 <- NULL

# 6. Read in file FLEDGE
fledge <- read.csv("FLEDGE.csv")

# Aggregate this dataframe into a new one called aveFLE with the average
# number of fledglings, produced by each pair over the years. Remove
# uninformative columns.
aveFLE <- aggregate(fledge, by = list(fledge$pid), FUN = mean, na.rm = TRUE)
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA
## Warning: argument is not numeric or logical: returning NA

# Reassign the pair ID value to the pid column.
aveFLE$pid <- aveFLE$Group.1
aveFLE$Group.1 <- NULL

# 7. Make one final dataframe with data on both average gud and average
# number of fledglings. merge(df1,df2, by.x = id1, by.y = id2, all=TRUE)
# all = True option, forces it to keep all data, even where the are
# missing values. E.g. if there is a value in one data frame but not in
# the other.
merged.data <- merge(aveFLE, aveGUD, by.x = "pid", by.y = "pid", all = TRUE)

# 8. Finally, plot average number of fledglings against average gud.
names(merged.data)
## [1] "pid"    "yr.x"   "fledge" "yr.y"   "gud"
# Plot n fledglings against average giving up density
plot(merged.data$fledge ~ merged.data$gud, xlab = "Giving up density", ylab = "Fledglings, number")

# do a linear regression for this model
lm.01 <- lm(merged.data$fledge ~ merged.data$gud)

# Add a linear regression line to the plot
abline(lm.01, col = "blue", lwd = 2, lty = 2)

plot of chunk unnamed-chunk-1


# Get a summary of the linear regression model, including the p value,
# which is <0.05, thus significant at an alpha value of 0.05
summary(lm.01)
## 
## Call:
## lm(formula = merged.data$fledge ~ merged.data$gud)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7339 -0.4216  0.0433  0.6235  1.6726 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        3.196      0.562    5.69  3.4e-05 ***
## merged.data$gud    0.846      0.383    2.21    0.042 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 1.06 on 16 degrees of freedom
##   (22 observations deleted due to missingness)
## Multiple R-squared: 0.233,   Adjusted R-squared: 0.185 
## F-statistic: 4.87 on 1 and 16 DF,  p-value: 0.0423