install.packages(“rmarkdown”)

# set seed and sample size;
seed = 48183130
n    = 200

# define population
pop = c(rep(1,100),rep(2,150),rep(3,250),rep(4,250),rep(5,150),rep(6,100))

#get sample of 200
set.seed(48183130)  #use your student ID instead of 12345678
data<-data.frame(X=sample(pop, n, replace=FALSE))
#tail(data)

#creates histogram
histdat = hist(data$X,main="Histogram of X values",xlab="X",ylab="Frequency",
               breaks = seq(min(data$X),max(data$X)+1),right=FALSE)

#creates frequency table
table = data.frame(table(data))  #fill in the table on your homework
colnames(table)=c("X","f") # label columns X and f

# R creates X as a factor (nominal scale), make it a numeric (ratio scale)
table$X = as.numeric(table$X)

# creates percent column
table$per = (table$f/sum(table$f))*100

#creates true percentile rank column
table$tpr = cumsum(table$per)

#computes and prints mean
meanX<-mean(data$X); meanX

## [1] 3.31

#computes and prints standard deviaton
sdX<-sd(data$X); sdX

## [1] 1.454123

#calculates Z and estimated percentile rank columns on the table
table$Z<-round((table$X-meanX)/sdX, digits=2)
table$estpr<-round(pnorm(table$Z), digits=4)*100
table$error<-round((table$estpr - table$tpr),2)

# print the table to the console
table

##   X  f  per   tpr     Z estpr  error
## 1 1 27 13.5  13.5 -1.59  5.59  -7.91
## 2 2 30 15.0  28.5 -0.90 18.41 -10.09
## 3 3 56 28.0  56.5 -0.21 41.68 -14.82
## 4 4 47 23.5  80.0  0.47 68.08 -11.92
## 5 5 21 10.5  90.5  1.16 87.70  -2.80
## 6 6 19  9.5 100.0  1.85 96.78  -3.22

# plots tpr and estpr
plot(table$X,table$tpr,main = "True vs. Estimated Percentile Rank", type="l",col="blue",xlab="X",ylab="Percentile Rank")
lines(table$X,table$estpr,type="l",col="red")

legend(x=1,y=100,c('True','Estimated'),
       col=c("blue","red"),cex=.8,lwd=1)