#### Page 6 # 3 ####
cat("(a) Populations: cars produced by first shift; cars produced by second shift.\n")
## (a) Populations: cars produced by first shift; cars produced by second shift.
cat("(b) Hypothetical? Yes conceptual process populations over time.\n")
## (b) Hypothetical? Yes conceptual process populations over time.
cat("(c) Characteristic: nonconformances per car; compare the means mu1 and mu2.\n")
## (c) Characteristic: nonconformances per car; compare the means mu1 and mu2.
#### Page 10 # 6 ####
# make 1000 fake client IDs
clients <- 1:1000
# 100 of them at random
set.seed(1)
sample_clients <- sample(clients, 100)
# how many we got
length(sample_clients)
## [1] 100
head(sample_clients)
## [1] 836 679 129 930 509 471
##### Page 10 # 8 ####
# make fake production: 30% A, 70% B
production <- c(rep("A", 300), rep("B", 700))
# simple random sample of 50
set.seed(1)
srs <- sample(production, 50)
table(srs)
## srs
## A B
## 10 40
# coin flip way: flip coin 50 times, heads=A, tails=B
set.seed(1)
coin_flips <- sample(c("A","B"), 50, replace=TRUE)
# now actually pick 1 item from that facility each time
coin_sample <- c()
for(i in 1:50){
if(coin_flips[i]=="A"){
pick <- sample(which(production=="A"), 1)
coin_sample <- c(coin_sample, production[pick])
} else {
pick <- sample(which(production=="B"), 1)
coin_sample <- c(coin_sample, production[pick])
}
}
table(coin_sample)
## coin_sample
## A B
## 27 23
# coin sample ends up closer to 50/50
# while real production is 30/70. so it's not a true srs.
#### Page 13 # 3 ####
# (a) Univariate example: total nonconformances (engine + transmission together)
total_nonconf <- c(2, 0, 1, 3, 2)
print("Total nonconformances (univariate)")
## [1] "Total nonconformances (univariate)"
print(total_nonconf)
## [1] 2 0 1 3 2
# (b) Check if it's quantitative
print("Is total_nonconf numeric?")
## [1] "Is total_nonconf numeric?"
print(is.numeric(total_nonconf)) # TRUE means quantitative
## [1] TRUE
# (c) Statistical population = all BMW cars produced at Graz plant
# Here we just pretend there are 500 cars in the population
N <- 500
cars <- 1:N
set.seed(1)
sample_cars <- sample(cars, 10) # pick 10 cars randomly
print("Random sample of cars from population:")
## [1] "Random sample of cars from population:"
print(sample_cars)
## [1] 324 167 129 418 471 299 270 466 187 307
# (d) If engine + transmission recorded separately → bivariate
engine <- c(1,0,1,2,1)
trans <- c(1,0,0,1,1)
car_data <- data.frame(engine, trans)
print("Engine and transmission nonconformances (bivariate):")
## [1] "Engine and transmission nonconformances (bivariate):"
print(car_data)
## engine trans
## 1 1 1
## 2 0 0
## 3 1 0
## 4 2 1
## 5 1 1
#### Page 20 # 1 ####
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #had to do this because on personal computer
cs<- read.table("Concr.Strength.1s.Data.txt", header = TRUE)
# see what’s inside
names(cs)
## [1] "Str"
head(cs)
## Str
## 1 43.49
## 2 43.09
## 3 47.68
## 4 44.14
## 5 44.23
## 6 41.52
# grab the strength column
strength <- cs$Str
# histogram
hist(strength, freq = FALSE,
main = "Histogram of Concrete Strength",
xlab = "Compressive Strength (MPa)")
# add density curve
lines(density(strength))

# stem-and-leaf
stem(strength)
##
## The decimal point is at the |
##
## 41 | 5
## 42 | 39
## 43 | 1445788
## 44 | 122357
## 45 | 1446
## 46 | 00246
## 47 | 3577
## 48 | 36
## 49 | 3
#### Page 20 # 4 ####
tempdata <- read.table("Temp.Long.Lat.txt", header = TRUE)
# keep just the numeric columns we need
numdata <- tempdata[, c("JanTemp", "Lat", "Long")]
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# basic 3D scatter (click + drag to rotate)
plot_ly(
data = numdata,
x = ~Long, y = ~Lat, z = ~JanTemp,
type = "scatter3d",
mode = "markers"
) %>%
layout(
title = "3D: January Min Temp vs Latitude & Longitude",
scene = list(
xaxis = list(title = "Longitude"),
yaxis = list(title = "Latitude"),
zaxis = list(title = "Avg Jan Min Temp (°F)")
)
)
#### Page 21 #7 ####
# read the data
bd <- read.table("SpeedStopCarTruck.txt", header = TRUE)
# color by vehicle type
cols <- ifelse(bd$Auto == "Car", "blue", "red")
# scatter plot
plot(bd$Speed, bd$StopDist,
col = cols, pch = 19,
xlab = "Speed (mph)",
ylab = "Stopping Distance (ft)",
main = "Stopping Distance vs Speed (Cars vs Trucks)")
# legend
legend("topleft",
legend = c("Car", "Truck"),
col = c("blue", "red"),
pch = 19, bty = "n")

#### Page 21 #13 ####
head(airquality)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
# scatterplot matrix of first 5 variables (Ozone, Solar, Wind, Temp, Month)
pairs(airquality[1:5],
main = "Scatterplot Matrix of Ozone, Solar, Wind, Temp, Month")

# (a) do ozone levels go up on hot days?
# yeah looks like ozone and temp go up together (positive trend).
# so higher ozone is more likely on hot days.
# (b) what about windy days?
# plot shows ozone goes down when wind goes up.
# so nope, ozone is actually lower on windy days.
# (c) what about solar radiation?
# ozone kinda increases when solar radiation is higher.
# so more sunlight = more ozone.
# (d) which month is worst?
# looking at ozone vs month, july (month 7) is the highest.
# so july is when ozone peaks.
#### Page 22 #17 ####
lw <- read.table("ReasonsLateForWork.txt", sep = ",", header = TRUE)
print(lw)
## Reason Percent
## 1 Traffic 34.2
## 2 Child Care 28.0
## 3 Pub Transp 17.5
## 4 Weather 12.2
## 5 Overslept 8.9
## 6 Other 3.4
lw <- lw[order(lw$Percent, decreasing = TRUE), ]
# colors
bar_colors <- rainbow(nrow(lw))
# basic bar plot
barplot(lw$Percent,
names.arg = lw$Reason,
col = bar_colors,
width = 0.8,
las = 2,
main = "Reasons Late for Work — Bar Chart",
xlab = "",
ylab = "Percent")

# (a) PIE CHART
pie(lw$Percent,
labels = paste0(lw$Reason, " (", lw$Percent, "%)"),
col = bar_colors,
main = "Reasons Late for Work — Pie Chart")

# (b) PARETO CHART (bars + cumulative percentage line)
xpos <- barplot(lw$Percent,
names.arg = lw$Reason,
col = bar_colors,
width = 0.8,
las = 2,
ylim = c(0, max(100, sum(lw$Percent))), # keep room for line
main = "Pareto Chart: Reasons Late for Work",
xlab = "",
ylab = "Percent")
# cumulative percentages
cum_perc <- cumsum(lw$Percent)
lines(x = xpos, y = cum_perc, col = "red", lwd = 2)
points(x = xpos, y = cum_perc, col = "red", pch = 19)
abline(h = 80, lty = 2, col = "gray") # 80% reference line (optional)
axis(side = 4, at = seq(0, 100, by = 20), labels = paste0(seq(0, 100, by = 20), "%"))
mtext("Cumulative %", side = 4, line = 2)
legend("topright",
legend = c("Bar height = Percent", "Cumulative %"),
col = c("gray40", "red"),
pch = c(15, 19),
lty = c(NA, 1),
bty = "n")

#### Page 29-31 #1 ####
# (a) sample mean
xbar <- 37
xbar # print it
## [1] 37
# (b) sample standard deviation
s <- 18
s # print it
## [1] 18
# (c) sample proportion
phat <- 0.72
phat # print
## [1] 0.72
#### Page 29-31 #4 ####
# read the data
cs <- read.table("Concr.Strength.1s.Data.txt", header = TRUE)
# column names
names(cs)
## [1] "Str"
Str <- cs$Str
# proportion with strength <= 44
prop_leq_44 <- sum(Str <= 44) / length(Str)
prop_leq_44 # print it
## [1] 0.3125
# proportion with strength >= 47
prop_geq_47 <- sum(Str >= 47) / length(Str)
prop_geq_47 # print it
## [1] 0.21875
#### Page 29-31 #6 ####
# fake data:
population <- c(65,70,72,68,75,80,77,74,69,73)
# function to take one random sample of size 50, return mean + variance
one_sample <- function() {
samp <- sample(population, size = 50, replace = TRUE) # random sample
xbar <- mean(samp) # sample mean
s2 <- var(samp) # sample variance
return(c(xbar, s2))
}
# repeat 5 times
results <- replicate(5, one_sample())
# transpose so each row = one sample
results <- t(results)
# show the 5 pairs (xbar, s^2)
colnames(results) <- c("xbar", "s2")
results
## xbar s2
## [1,] 73.16 18.21878
## [2,] 73.20 22.04082
## [3,] 71.84 13.93306
## [4,] 71.80 18.44898
## [5,] 71.82 17.78327
#### Page 29-31 #9 ####
# (a) population mean (mu) and variance (sigma^2) for a fair die {1,2,3,4,5,6}
vals <- 1:6
mu <- mean(vals) # should be 3.5
sigma2 <- var(vals) * (length(vals)-1)/length(vals)
# note: var() uses sample variance; multiply by (n-1)/n to get population variance
mu
## [1] 3.5
sigma2
## [1] 2.916667
# answer: mu = 3.5 ; sigma^2 = 35/12 ≈ 2.9166667
# (b) take a sample of size 100 with replacement; compute sample mean & variance
set.seed(1) # just so we can all get the same example
x <- sample(1:6, 100, replace = TRUE)
xbar <- mean(x)
s2 <- var(x) # sample variance
xbar
## [1] 3.52
s2
## [1] 3.34303
# comment: xbar should be near 3.5 and s2 near 2.9167, but not exact because of randomness.
# (c) sample proportions for outcomes 1..6
props <- table(x) / 100
props
## x
## 1 2 3 4 5 6
## 0.19 0.18 0.12 0.15 0.15 0.21
#### Page 29-31 #16 ####
# Problem 16
# sample salaries (in $1000 units)
salaries <- c(152,169,178,179,185,188,195,196,198,203,204,209,210,212,214)
# (a) sample mean and variance
mean_salary <- mean(salaries)
var_salary <- var(salaries)
cat("(a) Sample mean =", mean_salary, "\n")
## (a) Sample mean = 192.8
cat("(a) Sample variance =", var_salary, "\n\n")
## (a) Sample variance = 312.3143
# (b)(i)
salaries_raise5000 <- salaries + 5
mean_raise5000 <- mean(salaries_raise5000)
var_raise5000 <- var(salaries_raise5000)
cat("(b)(i) Mean with $5000 raise =", mean_raise5000, "\n")
## (b)(i) Mean with $5000 raise = 197.8
cat("(b)(i) Variance with $5000 raise =", var_raise5000, "\n\n")
## (b)(i) Variance with $5000 raise = 312.3143
# (b)(ii) 5% raise for everyone
salaries_raise5pct <- salaries * 1.05
mean_raise5pct <- mean(salaries_raise5pct)
var_raise5pct <- var(salaries_raise5pct)
cat("(b)(ii) Mean with 5% raise =", mean_raise5pct, "\n")
## (b)(ii) Mean with 5% raise = 202.44
cat("(b)(ii) Variance with 5% raise =", var_raise5pct, "\n")
## (b)(ii) Variance with 5% raise = 344.3265
#### Page 35 #2 ####
# Problem 2
t <- read.table("RobotReactTime.txt", header = TRUE)
# look at the column names
names(t)
## [1] "Time" "Robot"
# take only Robot 1's times
t1 <- t$Time[t$Robot == 1]
# sort the times
t1_sorted <- sort(t1)
# (a) median and 25th / 75th percentiles
median_t1 <- median(t1_sorted)
q25 <- quantile(t1_sorted, 0.25)
q75 <- quantile(t1_sorted, 0.75)
cat("(a) Median =", median_t1, "\n")
## (a) Median = 30.55
cat("(a) 25th percentile =", q25, "\n")
## (a) 25th percentile = 29.6325
cat("(a) 75th percentile =", q75, "\n\n")
## (a) 75th percentile = 31.375
# (b) interquartile range (IQR)
iqr_t1 <- IQR(t1_sorted)
cat("(b) Interquartile range =", iqr_t1, "\n\n")
## (b) Interquartile range = 1.7425
# (c) percentile of the 19th ordered value
n <- length(t1_sorted)
value_19 <- t1_sorted[19]
percentile_19 <- 100 * 19 / n
cat("(c) 19th ordered value =", value_19, "\n")
## (c) 19th ordered value = 31.6
cat("(c) This value is at about the", percentile_19, "th percentile\n")
## (c) This value is at about the 86.36364 th percentile
#### Page 35 #3 ####
# Problem 3
t <- read.table("RobotReactTime.txt", header = TRUE)
# take only Robot 2's times
t2 <- t$Time[t$Robot == 2]
# (a) five number summary
cat("(a) Five number summary:\n")
## (a) Five number summary:
summary(t2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 28.97 29.30 29.94 30.11 30.82 32.23
# (b) 90th percentile
p90 <- quantile(t2, 0.90)
cat("\n(b) 90th percentile =", p90, "\n")
##
## (b) 90th percentile = 31.068
# (c) boxplot + check for outliers
boxplot(t2,
main = "Boxplot of Robot 2 Reaction Times",
ylab = "Reaction Time")

#### Page 35 #4a ####
# Problem 4
si <- read.table("SolarIntensAuData.txt", header = TRUE)
head(si)
## SI
## 1 673
## 2 673
## 3 676
## 4 677
## 5 680
## 6 682
# (a) boxplot of solar intensity
boxplot(si$SI,
main = "Boxplot of Solar Intensity Measurements",
ylab = "Solar Intensity")

# (b) summary statistics (five-number summary + mean)
summary(si$SI)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 673.0 697.0 717.0 714.2 734.0 758.0
# (c) list outliers explicitly
outliers <- boxplot.stats(si$SI)$out
outliers
## integer(0)