Gravett_DataContainersHW

knitr::opts_chunk$set(echo = TRUE)
# Create a data.frame object with examples of numeric, character, logical, and factor data types.  Summarize the data.frame object in total as well as for each type within the text (e.g., in-text R component).

Height <- c(1.3, 1.6, 1.2, 1.8, 1.9)
Name <- c("John","Dora","Kevin","Mike", "Sarah")
IQ <- c("Smart", "Smart", "Dumb", "Smartest", "Dumbest")
Is.Doctor <- c(FALSE, FALSE, FALSE, TRUE, FALSE)
df1 <- as.data.frame(cbind(Height, Name, IQ, Is.Doctor))
summary(Height)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.20    1.30    1.60    1.56    1.80    1.90

summary(Name)

##    Length     Class      Mode 
##         5 character character

summary(IQ)

##    Length     Class      Mode 
##         5 character character

summary(Is.Doctor)

##    Mode   FALSE    TRUE 
## logical       4       1

summary(df1)

##  Height     Name          IQ    Is.Doctor
##  1.2:1   Dora :1   Dumb    :1   FALSE:4  
##  1.3:1   John :1   Dumbest :1   TRUE :1  
##  1.6:1   Kevin:1   Smart   :2            
##  1.8:1   Mike :1   Smartest:1            
##  1.9:1   Sarah:1

# Create 2 matrices of type numeric, each with 4 rows and 4 columns with 16 of your favorite numbers.  In the text, describe the processes of matrix multiplication and kronecker products and demonstrate it with your example matrices.

x <- matrix(c(12,28,33,42,10,9,8,13,7,1,55,61,34,87,68,42),nrow=4)
z <- matrix(c(27,26,13,14,99,18,85,12,16,8,62,65,24,9,1,99),nrow=4)

# Matrix multiplication falls into two general categories. Scalar in which a single number is multiplied with every entry of a matrix and multiplication of an entire matrix by another entire matrix.

# Scalar Multiplication

x*2

##      [,1] [,2] [,3] [,4]
## [1,]   24   20   14   68
## [2,]   56   18    2  174
## [3,]   66   16  110  136
## [4,]   84   26  122   84

# Matrix Multiplication can only be performed if the number of rows of the first matrix matches the number of columns of the second matrix. This is due to how each entry of the product matrix is calculated. Each number in the first row of the first matrix is multiplied by the corresponding number in the first coulmn of the second matrix and then summed. 

x %*% z

##      [,1]  [,2] [,3] [,4]
## [1,] 1151  2371 2916 3751
## [2,] 2221  4063 6237 9367
## [3,] 2766  8902 8422 7651
## [4,] 2853 10081 7288 5344

#The Kronecker product is a different way to multiply two matrices. The second matrix is scalared by the first number in the first matrix and so on. If you multiply two 4x4 matrices then the resulting matrix will have 16 rows and 16 columns as shown below.

x %x% z

##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
##  [1,]  324 1188  192  288  270  990  160  240  189   693   112   168   918
##  [2,]  312  216   96  108  260  180   80   90  182   126    56    63   884
##  [3,]  156 1020  744   12  130  850  620   10   91   595   434     7   442
##  [4,]  168  144  780 1188  140  120  650  990   98    84   455   693   476
##  [5,]  756 2772  448  672  243  891  144  216   27    99    16    24  2349
##  [6,]  728  504  224  252  234  162   72   81   26    18     8     9  2262
##  [7,]  364 2380 1736   28  117  765  558    9   13    85    62     1  1131
##  [8,]  392  336 1820 2772  126  108  585  891   14    12    65    99  1218
##  [9,]  891 3267  528  792  216  792  128  192 1485  5445   880  1320  1836
## [10,]  858  594  264  297  208  144   64   72 1430   990   440   495  1768
## [11,]  429 2805 2046   33  104  680  496    8  715  4675  3410    55   884
## [12,]  462  396 2145 3267  112   96  520  792  770   660  3575  5445   952
## [13,] 1134 4158  672 1008  351 1287  208  312 1647  6039   976  1464  1134
## [14,] 1092  756  336  378  338  234  104  117 1586  1098   488   549  1092
## [15,]  546 3570 2604   42  169 1105  806   13  793  5185  3782    61   546
## [16,]  588  504 2730 4158  182  156  845 1287  854   732  3965  6039   588
##       [,14] [,15] [,16]
##  [1,]  3366   544   816
##  [2,]   612   272   306
##  [3,]  2890  2108    34
##  [4,]   408  2210  3366
##  [5,]  8613  1392  2088
##  [6,]  1566   696   783
##  [7,]  7395  5394    87
##  [8,]  1044  5655  8613
##  [9,]  6732  1088  1632
## [10,]  1224   544   612
## [11,]  5780  4216    68
## [12,]   816  4420  6732
## [13,]  4158   672  1008
## [14,]   756   336   378
## [15,]  3570  2604    42
## [16,]   504  2730  4158

#Dr. D has been growing hops to support his craft beer hobby and has established a field of Cascade hops for his favorite Pale Ale.  The yields on this patch have been 1.11, 1.46, 1.91, 6.15, 8.43, and 13.82 pounds per year.  Following from the example in the lecture, construct a regression of yield as a function of year for these data.  Make a plot and describe the components of the basic lm() model textually as you would if you were writing a manuscript.

library(ggplot2)

url <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSWwoYeXnxJiD2zAbgKAku-bc5e_J57s3L0GTQL8yuUI_Y7_iBwKtwv0sz1SxsZj28baYBV62-PkdKk/pub?output=csv")

data <- url

ggplot( data, aes(x=Yield, y=Year)) +
  stat_smooth( se=FALSE, formula = y~log(x), method="lm" ) + 
  geom_point( size=2)

# lm is used to fit linear models. It can be used to carry out regression, single stratum analysis of variance and analysis of covariance. lm uses a least squares regression model to estimate the trendline. The Least Squares Regression Line is the line that makes the vertical distance from the data points to the regression line as small as possible. It’s called a “least squares” because the best line of fit is one that minimizes the variance (the sum of squares of the errors).

Gravett_DataContainersHW

Brandon Gravett

10/27/2019