STAT Simulation HW2

Problem 1 Ch 5 Problem 3

Part A.

Chevalier de Mere game - roll 4 dice, if at least one is 6 you win! Otherwise you lose.

I am assuming one round entails 4 throws.

My code prints out the four throws of the dice.

dice <- function() {
  rolls <- sample(1:6,size=4, replace =TRUE)
  if (6 %in% rolls)
  print(paste("Winner, one of your four rolls is a six"))
  else print(paste("Loser, none of your four rolls is a six"))
  print(rolls)
}

dice()

## [1] "Loser, none of your four rolls is a six"
## [1] 3 3 5 5

Part B.

The function sixes outputs TRUE if there is at least one six in n rolls of a fair dice and FALSE otherwise. It also prints the dice rolls so its clear what the dice rolls you have are.

To give n the default value of 4, set size = 4.

sixes <- function(n=4) {
  rolls <- sample(1:6,size=n, replace =TRUE)
  if (6 %in% rolls)
  print(paste("TRUE"))
  else print(paste("FALSE"))
  print(rolls)
}

sixes()

## [1] "TRUE"
## [1] 6 5 4 1

Part C.

The function sixes has been modified to include a while loop that counts the number of TRUEs and the number of FALSEs.

It returns the theoretical probability, the probability of getting at least one 6 in n rolls, and then it outputs the accuracy.

#Accuracy
true_count <- 0
false_count <- 0
sixes <- function(n, N) {
counter = 1
while (counter <= N)
{
  rolls <- sample(1:6,size=n, replace =TRUE)
  if (6 %in% rolls)
  true_count <- true_count + 1
  else false_count <- false_count + 1
  counter = counter + 1
}
theoretical <- print(1-(5/6)**n)
theoretical
true <- print(true_count/(false_count+true_count))
true
accuracy <- theoretical - true
accuracy
}

#Theoretical Value (same for all), True Proportion Value, Accuracy (Theoretical - True) for each N
sixes(4,10)

## [1] 0.5177469
## [1] 0.5

## [1] 0.01774691

sixes(4,100)

## [1] 0.5177469
## [1] 0.61

## [1] -0.09225309

sixes(4,1000)

## [1] 0.5177469
## [1] 0.502

## [1] 0.01574691

Now we are looking at the variability. I used the same code as above and the replicate function to run each program 10 times.I notice the obvious again- as the value of N increases, the variability between the proportions decreases.

#Variability
true_count <- 0
false_count <- 0
sixes <- function(n, N) {
counter = 1
while (counter <= N)
{
  rolls <- sample(1:6,size=n, replace =TRUE)
  if (6 %in% rolls)
  true_count <- true_count + 1
  else false_count <- false_count + 1
  counter = counter + 1
}
true <- print(true_count/(false_count+true_count))
true
}

#Variability for N=10
output_10 <-replicate(10, sixes(4,10), simplify=TRUE)

## [1] 0.4
## [1] 0.7
## [1] 0.7
## [1] 0.5
## [1] 0.8
## [1] 0.6
## [1] 0.5
## [1] 0.6
## [1] 0.5
## [1] 0.8

sd(output_10)

## [1] 0.137032

#Variability for N=100
output_100 <-replicate(10, sixes(4,100), simplify=TRUE)

## [1] 0.54
## [1] 0.51
## [1] 0.49
## [1] 0.53
## [1] 0.53
## [1] 0.61
## [1] 0.56
## [1] 0.46
## [1] 0.55
## [1] 0.48

sd(output_100)

## [1] 0.04351245

#Variability for N=100
output_1000 <-replicate(10, sixes(4,1000), simplify=TRUE)

## [1] 0.53
## [1] 0.505
## [1] 0.501
## [1] 0.506
## [1] 0.52
## [1] 0.521
## [1] 0.536
## [1] 0.516
## [1] 0.503
## [1] 0.55

sd(output_1000)

## [1] 0.01608864

Part D.

Here, we label a win as a TRUE and a loss as a FALSE. We output this to a csv file and then read that same file in.

This was a great excercise.

x<-as.vector(c())
sixes <- function(n, N) {
counter = 1
while (counter <= N)
{
  rolls <- sample(1:6,size=n, replace =TRUE)
  if (6 %in% rolls)
  x <- c(x, "TRUE")
  else 
  x <- c(x, "FALSE")
  counter <- counter + 1
}
x}

output <- sixes(4,10)

write.table(output, file="hw2output.csv", sep="",
col.names=NA,qmethod="double")

input <- read.csv(file="hw2output.csv", header=TRUE, sep="")
input

##     X     x
## 1   1  TRUE
## 2   2  TRUE
## 3   3 FALSE
## 4   4  TRUE
## 5   5  TRUE
## 6   6  TRUE
## 7   7 FALSE
## 8   8 FALSE
## 9   9  TRUE
## 10 10  TRUE

sum(input$x == "TRUE")/nrow(input)

## [1] 0.7

Problem 2 Ch 5 Problem 5

I tried it for different values of x and r and posted a few graphs that I found interesting.

require(ggplot2)

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 3.5.2

dds <- function(x1, r, n){
x <- rep(NA,n)  
for (i in 1:n) {
  if (i==1) {x[1] = x1}
  else {
    x[i] <- r*x[i-1]*(1-x[i-1])
  }
}
outdf <- data.frame(X = c(1:n), Y=x)
plotdds <- ggplot(outdf, aes(x=X, y=Y)) + geom_point()
return(plotdds)
}

#As R increases the concavity changes 
dds(x1=0.25,r=1,n=10)

dds(x1=0.25,r=2,n=10)

dds(x1=0.25,r=3,n=10)

#As X increases, the line has less of a curve
dds(x1=0.3,r=1,n=10)

dds(x1=0.5,r=1,n=10)

dds(x1=1,r=1,n=10)

Problem 3

#Using Base R
#I chose to use 10 bins because I felt it provided a better picture
x <- mtcars$mpg 
hist(x, breaks=10, xlab="Miles Per Gallon", xlim=c(0,40), main="Histogram of Miles Per Gallon")

#Using ggplot
require(ggplot2)
ggplot(data=mtcars, aes(x=mpg))+geom_histogram(bins=20)+ylab("Number of Cars") + xlab("MPG") + ggtitle("Miles per gallon") + xlim(0,40)

## Warning: Removed 2 rows containing missing values (geom_bar).

Problem 4

#Using Base R
plot(mtcars$wt, mtcars$mpg, main="Scatterplot of Weight vs. MPG", 
    xlab="Car Weight ", ylab="Miles Per Gallon ", pch=19)

#Using ggplot
ggplot(mtcars, aes(wt, mpg)) + geom_point() + ylab("Weight") + xlab("Miles Per Gallon") + ggtitle("Scatterplot of Miles Per Gallon")

Problem 5

#PLOT 1 
ggplot(aes(y = mpg, x = factor(am), fill=factor(am)), data = mtcars) + geom_violin() + ggtitle("Miles per Gallon (MPG) by Transmission") + xlab("Transmission") + ylab("Miles per Gallon (MPG") + scale_fill_discrete(name="Type", breaks=c("0", "1"),labels=c("Automatic","Manual")) + theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

#PLOT 2
ggplot(data=diamonds,aes(x=carat,y=price))+geom_point()+
  facet_wrap(~cut)+geom_smooth()

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'