Jack Watters

#2) Real estate probability problem:

#Creating a vector of the values
values <- c(-100, -50, 0, 100, 150, 200, 250)

#Creating a vector with the probabilities
probabilities <- c(0.08, 0.07, 0.1, 0.5, 0.2, 0.03, 0.02)

#calculate expected value
EV <- sum(values*probabilities)
EV*1000
## [1] 79500
#Expected Value is 79.5

#4) Plane problem:
#i)
#Required for function: mean, sd, and threshold
#mean = 5460, sd = 301, and limit = 5800
#Calculating the probability based on the above
#(Setting lower.tail to TRUE gives the probability on the left)
#i.e. the probability the weight does not exceed the given limit
P <- pnorm(5800, mean = 5460, sd = 301, lower.tail = TRUE)
print(P) #The probability is 87.07%
## [1] 0.8706709
#Graph the area under the curve where the airline is safe:
#Package for pnormGC is tigerstats, remove # to install/load:
#install.packages("tigerstats")
#library(tigerstats)
pnormGC(5800,region="below",mean=5460,
        sd=301,graph=TRUE)

## [1] 0.8706709
#ii
#Obtaining the Z value will help:
(5800-5460)/301
## [1] 1.129568
#The airline should not invest in this model because the allowable weight
#is only 1.13 standard deviations from the mean. In a normal distribution,
#95% of values are within 2 standard deviations from the mean. With the 
#area under the allowable limit only being 87.07%, you have a very high
#chance of exceeding the limit (~13%)


#5) Software engineer housing problem.
#Make sure the path is to your local drive/folder:
sachousing <- read.csv("E:/Stern/SacramentoHousePricesThreeBed.csv")
summary(sachousing)
##       beds       baths           sq__ft         type               price       
##  Min.   :3   Min.   :1.000   Min.   : 696   Length:391         Min.   : 30000  
##  1st Qu.:3   1st Qu.:2.000   1st Qu.:1155   Class :character   1st Qu.:149300  
##  Median :3   Median :2.000   Median :1329   Mode  :character   Median :193500  
##  Mean   :3   Mean   :1.798   Mean   :1395                      Mean   :211182  
##  3rd Qu.:3   3rd Qu.:2.000   3rd Qu.:1524                      3rd Qu.:250000  
##  Max.   :3   Max.   :3.000   Max.   :3173                      Max.   :677048  
##     latitude       longitude     
##  Min.   :38.24   Min.   :-121.6  
##  1st Qu.:38.48   1st Qu.:-121.5  
##  Median :38.61   Median :-121.4  
##  Mean   :38.58   Mean   :-121.4  
##  3rd Qu.:38.68   3rd Qu.:-121.3  
##  Max.   :39.01   Max.   :-120.6
hist(sachousing$price)

hist(sachousing$sq__ft)

#391 observations (n)
observations <- 391
sqftmean <- mean(sachousing$sq__ft)
sqftsd <- sd(sachousing$sq__ft)
prcmean <- mean(sachousing$price)
prcsd <- sd(sachousing$price)
#To set up confidence interval calculations, we'll use the mean 
#and sd values for each divided by sqrt of sample size (n):
sqftmean-(1.96*(sqftsd)/sqrt(observations)) #1360.53 sqft
## [1] 1360.529
sqftmean+(1.96*(sqftsd)/sqrt(observations)) #1429.07 sqft
## [1] 1429.072
prcmean-(1.96*(prcsd)/sqrt(observations)) #$202,198.90
## [1] 202198.9
prcmean+(1.96*(prcsd)/sqrt(observations)) #$220,166.00
## [1] 220166
#To make a scatterplot with the given price/sqft, convert to x and y alias:
x <- sachousing$price
y <- sachousing$sq__ft
#Base R plot:
plot(x, y, main = "Price by Square Footage",
     xlab = "Price", ylab = "SQFT",
     pch = 19, frame = FALSE)
abline(lm(y ~ x, data = sachousing), col = "blue")
#To add a locally weighted scatter-plot smoothing line:
lines(lowess(x, y), col = "red")

coef(lm(y~x))
##  (Intercept)            x 
## 8.841515e+02 2.418047e-03
cor(x,y)
## [1] 0.6338317