ANLY 505 - Problem Set #1

Questions

Simulate data for 30 draws from a normal distribution where the means and standard deviations vary among three distributions.

# place the code to simulate the data here
set.seed(111)
p1 <- rnorm(30, mean = c(-5,0,5), sd = c(2,1,3))
print(p1)

##  [1] -4.52955858 -0.33073587  4.06512853 -9.60469132 -0.17087604
##  [6]  5.42083468 -7.99485331 -1.01018842  2.15457319 -5.98792443
## [11] -0.17367413  3.78020366 -1.30872747  0.39405411  7.39258550
## [16] -8.13333072 -0.08585101  3.92258156 -7.38721793  0.36418674
## [21]  6.08498735 -4.30607126  0.18973653  4.52126958 -4.34690152
## [26]  0.59825420 -0.52460290  0.43611120  0.19124439  1.09611180

Simulate 2 continuous variables (normal distribution) (n=20) and plot the relationship between them

# place the code to simulate the data here
library(ggplot2)

p2.1 <- rnorm(20, 2, 1)
p2.2 <- rnorm(20, 1, 2)
p2 <- data.frame(X=p2.1, Y=p2.2)
ggplot(p2, aes(x=X, y=Y)) + geom_point()

Simulate 3 variables (x1, x2 and y). x1 and x2 should be drawn from a uniform distribution and y should be drawn from a normal distribution. Fit a multiple linear regression.

# place the code to simulate the data here
x1 <- runif(30, 5, 10) 
x2 <- runif(30, 10, 20)
y <- rnorm(30, 5, 1)
p3 <- data.frame(Y=y, X1=x1, X2=x2)
m1 <- lm(y ~ x1 + x2, data=p3)
summary(m1)

## 
## Call:
## lm(formula = y ~ x1 + x2, data = p3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0293 -0.8099  0.1630  0.6932  1.9869 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.85579    1.22345   3.969  0.00048 ***
## x1          -0.10142    0.15081  -0.673  0.50698    
## x2           0.06902    0.06937   0.995  0.32860    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.091 on 27 degrees of freedom
## Multiple R-squared:  0.03933,    Adjusted R-squared:  -0.03183 
## F-statistic: 0.5527 on 2 and 27 DF,  p-value: 0.5818

Simulate 3 letters repeating each letter twice, 2 times.

# place the code to simulate the data here
rep(LETTERS[1:3], each=2, 2)

##  [1] "A" "A" "B" "B" "C" "C" "A" "A" "B" "B" "C" "C"

Create a dataframe (n = 27) with 3 groups, 2 factors and two quantitative response variables. Use the replicate function.

# place the code to simulate the data here
library(knitr)

p5 <- data.frame(Variable.Group = as.character(rep(LETTERS[1:3], length.out=25)),
                  Variable.Factor = as.factor(rep(LETTERS[24:25], length.out=25)),
                  Variable.quant.1 = rnorm(25, 0, 1),
                  Variable.quant.2 = rnorm(25, 5, 1),
                   stringsAsFactors=F)
kable(p5)

Variable.Group	Variable.Factor	Variable.quant.1	Variable.quant.2
A	X	2.5922274	4.906266
B	Y	1.0743470	4.572680
C	X	-1.5969019	5.442979
A	Y	-0.0875886	3.812192
B	X	0.3607676	5.626425
C	Y	-0.8799600	5.361647
A	X	-3.3233350	4.527039
B	Y	-0.4675155	5.944894
C	X	0.4315403	5.906402
A	Y	-0.6039895	5.471774
B	X	0.6744467	5.389500
C	Y	0.6359205	5.521940
A	X	-0.6129704	5.904453
B	Y	0.4148913	6.696791
C	X	0.8773434	4.115663
A	Y	0.0211576	5.856490
B	X	1.8103833	5.174943
C	Y	-0.4520957	4.601495
A	X	-0.1251240	5.275203
B	Y	0.7668004	3.731281
C	X	-0.0915203	4.989919
A	Y	-1.8743058	5.653402
B	X	-0.6641620	5.098580
C	Y	0.2034128	5.902617
A	X	-2.5944434	4.480103

ANLY 505 - Problem Set #1

Alan Hitch

2019-09-16

Directions

Questions