Jack Watters
#2) Real estate probability problem:
#Creating a vector of the values
values <- c(-100, -50, 0, 100, 150, 200, 250)
#Creating a vector with the probabilities
probabilities <- c(0.08, 0.07, 0.1, 0.5, 0.2, 0.03, 0.02)
#calculate expected value
EV <- sum(values*probabilities)
EV*1000
## [1] 79500
#Expected Value is 79.5
#4) Plane problem:
#i)
#Required for function: mean, sd, and threshold
#mean = 5460, sd = 301, and limit = 5800
#Calculating the probability based on the above
#(Setting lower.tail to TRUE gives the probability on the left)
#i.e. the probability the weight does not exceed the given limit
P <- pnorm(5800, mean = 5460, sd = 301, lower.tail = TRUE)
print(P) #The probability is 87.07%
## [1] 0.8706709
#Graph the area under the curve where the airline is safe:
#Package for pnormGC is tigerstats, remove # to install/load:
#install.packages("tigerstats")
#library(tigerstats)
pnormGC(5800,region="below",mean=5460,
sd=301,graph=TRUE)

## [1] 0.8706709
#ii
#Obtaining the Z value will help:
(5800-5460)/301
## [1] 1.129568
#The airline should not invest in this model because the allowable weight
#is only 1.13 standard deviations from the mean. In a normal distribution,
#95% of values are within 2 standard deviations from the mean. With the
#area under the allowable limit only being 87.07%, you have a very high
#chance of exceeding the limit (~13%)
#5) Software engineer housing problem.
#Make sure the path is to your local drive/folder:
sachousing <- read.csv("E:/Stern/SacramentoHousePricesThreeBed.csv")
summary(sachousing)
## beds baths sq__ft type price
## Min. :3 Min. :1.000 Min. : 696 Length:391 Min. : 30000
## 1st Qu.:3 1st Qu.:2.000 1st Qu.:1155 Class :character 1st Qu.:149300
## Median :3 Median :2.000 Median :1329 Mode :character Median :193500
## Mean :3 Mean :1.798 Mean :1395 Mean :211182
## 3rd Qu.:3 3rd Qu.:2.000 3rd Qu.:1524 3rd Qu.:250000
## Max. :3 Max. :3.000 Max. :3173 Max. :677048
## latitude longitude
## Min. :38.24 Min. :-121.6
## 1st Qu.:38.48 1st Qu.:-121.5
## Median :38.61 Median :-121.4
## Mean :38.58 Mean :-121.4
## 3rd Qu.:38.68 3rd Qu.:-121.3
## Max. :39.01 Max. :-120.6
hist(sachousing$price)

hist(sachousing$sq__ft)

#391 observations (n)
observations <- 391
sqftmean <- mean(sachousing$sq__ft)
sqftsd <- sd(sachousing$sq__ft)
prcmean <- mean(sachousing$price)
prcsd <- sd(sachousing$price)
#To set up confidence interval calculations, we'll use the mean
#and sd values for each divided by sqrt of sample size (n):
sqftmean-(1.96*(sqftsd)/sqrt(observations)) #1360.53 sqft
## [1] 1360.529
sqftmean+(1.96*(sqftsd)/sqrt(observations)) #1429.07 sqft
## [1] 1429.072
prcmean-(1.96*(prcsd)/sqrt(observations)) #$202,198.90
## [1] 202198.9
prcmean+(1.96*(prcsd)/sqrt(observations)) #$220,166.00
## [1] 220166
#To make a scatterplot with the given price/sqft, convert to x and y alias:
x <- sachousing$price
y <- sachousing$sq__ft
#Base R plot:
plot(x, y, main = "Price by Square Footage",
xlab = "Price", ylab = "SQFT",
pch = 19, frame = FALSE)
abline(lm(y ~ x, data = sachousing), col = "blue")
#To add a locally weighted scatter-plot smoothing line:
lines(lowess(x, y), col = "red")

coef(lm(y~x))
## (Intercept) x
## 8.841515e+02 2.418047e-03
cor(x,y)
## [1] 0.6338317