Inculde libraries
############################
house_prices_data <- data.frame(read.csv ("houseprices.csv"))
# 1a
# Null Hypothesis H0 : Avg house price in this area is less $150K --> mu < 150000
# Alternate Hypothesis Ha: Avg house price in this area is below $150K --> mu >= 150000
mu = 150*1000
t.test(house_prices_data$Price, y = NULL, alternative = "greater", paired =
FALSE, var.equal = FALSE,mu)
##
## One Sample t-test
##
## data: house_prices_data$Price
## t = 6.6302, df = 1046, p-value = 2.681e-11
## alternative hypothesis: true mean is greater than 150000
## 95 percent confidence interval:
## 160420.1 Inf
## sample estimates:
## mean of x
## 163862.1
# => Assuming level of significance as 0.05 Reject H0 as p-value is very low 2.684541e-11
###################################################
#1b
# Null Hypothesis H0 : average living area is less than 1800 Sq. Ft. --> mu < 1800
# Alternate Hypothesis Ha: average living area is more than 1800 Sq. Ft. --> mu >= 1800
mu = 1800
t.test(house_prices_data$Living.Area, y = NULL, alternative = "greater", paired =
FALSE, var.equal = TRUE,mu)
##
## One Sample t-test
##
## data: house_prices_data$Living.Area
## t = 0.36838, df = 1046, p-value = 0.3563
## alternative hypothesis: true mean is greater than 1800
## 95 percent confidence interval:
## 1774.666 Inf
## sample estimates:
## mean of x
## 1807.303
# p is 0.35 ie p > level of significane (0.05) hence we don't reject H0.
#############################################
##2a
fireplace <- house_prices_data[house_prices_data$Fireplace == 1,]
no_fireplace <- house_prices_data[house_prices_data$Fireplace == 0,]
boxplot(fireplace$Price, no_fireplace$Price)

# Looking at the box plots we can see the following
# The inter-quartile range for prices of houses with fire place is more
# We can also notice that ranges in values are different
# No fire place prices are heavly right skewed
#2b
#Null Hypothesis H0: Average home prices for houses with fireplaces is not greater than houses without fireplaces
# ie. H0: mu2 > mu1 ==> mu1-mu2 < 0
#Alternate Hypothesi HA: Average home prices for houses with fireplaces is greater than houses without fireplaces
t.test(fireplace$Price, no_fireplace$Price, alternative = "greater" )
##
## Welch Two Sample t-test
##
## data: fireplace$Price and no_fireplace$Price
## t = 17.661, df = 1037, p-value < 2.2e-16
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 57444.68 Inf
## sample estimates:
## mean of x mean of y
## 189637.8 126287.7
# since p-value is 2.2e-16 we reject H0.
##################################
#3
#Null Hypothesis H0: old houses do not have bigr lot sizes than new houses
# ie. H0: mu2 > mu1 ==> mu1-mu2 <= 0
#Alternate Hypothesi HA: old houses have bigr lot sizes than new houses
old <- house_prices_data[((house_prices_data$Age)) > 30,]
new <- house_prices_data[((house_prices_data$Age)) <= 30,]
t.test(old$Lot.Size, new$Lot.Size, alternative = "greater", paired =
FALSE, var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: old$Lot.Size and new$Lot.Size
## t = -0.59026, df = 610.28, p-value = 0.7224
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -0.1140159 Inf
## sample estimates:
## mean of x mean of y
## 0.5481788 0.5782550
#### since p-value is greater than alpha value 0.05 we reject null hypothesis.
############################3
#4
#Null Hypothesis H0: old houses do not have more fire places
# ie. H0: mu2 > mu1 ==> mu1-mu2 <= 0
#Alternate Hypothesi HA: old houses have more fire places
t.test(new$Fireplace, old$Fireplace, alternative = "greater", paired =
FALSE, var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: new$Fireplace and old$Fireplace
## t = 6.1188, df = 536.04, p-value = 9.102e-10
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.1500374 Inf
## sample estimates:
## mean of x mean of y
## 0.6523490 0.4470199
#We reject H0 as p-value = 9.102e-10
#############################
##5
#Null Hypothesis H0: all that the prices of Small, Medium and Big houses the same.
Category <- ifelse((house_prices_data$Bedrooms < 3), 'Small',
ifelse((house_prices_data$Bedrooms == 3) | (house_prices_data$Bedrooms == 4 ), 'Medium',
ifelse((house_prices_data$Bedrooms > 4), 'big', NA )))
house_prices_data$Category <- Category
x <- aov(house_prices_data$Price~house_prices_data$Category)
x
## Call:
## aov(formula = house_prices_data$Price ~ house_prices_data$Category)
##
## Terms:
## house_prices_data$Category Residuals
## Sum of Squares 4.839968e+11 4.303266e+12
## Deg. of Freedom 2 1044
##
## Residual standard error: 64202.05
## Estimated effects may be unbalanced
summary(x)
## Df Sum Sq Mean Sq F value Pr(>F)
## house_prices_data$Category 2 4.840e+11 2.420e+11 58.71 <2e-16 ***
## Residuals 1044 4.303e+12 4.122e+09
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Since F and P are very low and level of significance is 0.01 we reject Null Hypothesis.