R Markdown

Inculde libraries

############################
house_prices_data <-  data.frame(read.csv ("houseprices.csv"))


# 1a 
# Null Hypothesis H0 : Avg house price in this area is less $150K --> mu < 150000
# Alternate Hypothesis Ha: Avg house price in this area is below $150K --> mu >= 150000

mu = 150*1000



t.test(house_prices_data$Price, y = NULL, alternative = "greater",  paired =
           FALSE, var.equal = FALSE,mu)
## 
##  One Sample t-test
## 
## data:  house_prices_data$Price
## t = 6.6302, df = 1046, p-value = 2.681e-11
## alternative hypothesis: true mean is greater than 150000
## 95 percent confidence interval:
##  160420.1      Inf
## sample estimates:
## mean of x 
##  163862.1
# =>  Assuming level of significance as 0.05 Reject H0 as p-value is very low 2.684541e-11 

###################################################

#1b
# Null Hypothesis H0 : average living area is less than 1800 Sq. Ft.  --> mu < 1800
# Alternate Hypothesis Ha: average living area is more than 1800 Sq. Ft. --> mu >= 1800

mu = 1800


t.test(house_prices_data$Living.Area, y = NULL, alternative = "greater",  paired =
           FALSE, var.equal = TRUE,mu)
## 
##  One Sample t-test
## 
## data:  house_prices_data$Living.Area
## t = 0.36838, df = 1046, p-value = 0.3563
## alternative hypothesis: true mean is greater than 1800
## 95 percent confidence interval:
##  1774.666      Inf
## sample estimates:
## mean of x 
##  1807.303
# p is 0.35 ie p > level of significane (0.05) hence we don't reject H0. 




#############################################


##2a
fireplace <- house_prices_data[house_prices_data$Fireplace == 1,]
no_fireplace <- house_prices_data[house_prices_data$Fireplace == 0,]
boxplot(fireplace$Price, no_fireplace$Price)

# Looking at the box plots we can see the following
# The inter-quartile range for prices of houses with fire place is more
# We can also notice that ranges in values are different
# No fire place prices are heavly right skewed

#2b
#Null Hypothesis H0: Average home prices for houses with fireplaces is not greater than houses without fireplaces
# ie. H0: mu2 > mu1  ==> mu1-mu2 < 0
#Alternate Hypothesi HA: Average home prices for houses with fireplaces is  greater than houses without fireplaces



t.test(fireplace$Price, no_fireplace$Price, alternative = "greater" )
## 
##  Welch Two Sample t-test
## 
## data:  fireplace$Price and no_fireplace$Price
## t = 17.661, df = 1037, p-value < 2.2e-16
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  57444.68      Inf
## sample estimates:
## mean of x mean of y 
##  189637.8  126287.7
# since p-value is 2.2e-16 we reject H0.

##################################


#3

#Null Hypothesis H0: old houses do not have bigr lot sizes than new houses
# ie. H0: mu2 > mu1  ==> mu1-mu2 <= 0
#Alternate Hypothesi HA: old houses have bigr lot sizes than new houses

old <- house_prices_data[((house_prices_data$Age)) > 30,]
new <- house_prices_data[((house_prices_data$Age)) <= 30,]

t.test(old$Lot.Size, new$Lot.Size, alternative = "greater",  paired =
           FALSE, var.equal = FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  old$Lot.Size and new$Lot.Size
## t = -0.59026, df = 610.28, p-value = 0.7224
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -0.1140159        Inf
## sample estimates:
## mean of x mean of y 
## 0.5481788 0.5782550
 #### since p-value is greater than alpha value 0.05 we reject null hypothesis.
 
 
 ############################3
 
 #4
#Null Hypothesis H0: old houses do not have more fire places
# ie. H0: mu2 > mu1  ==> mu1-mu2 <= 0
#Alternate Hypothesi HA: old houses  have more fire places

 
 t.test(new$Fireplace, old$Fireplace, alternative = "greater",  paired =
            FALSE, var.equal = FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  new$Fireplace and old$Fireplace
## t = 6.1188, df = 536.04, p-value = 9.102e-10
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.1500374       Inf
## sample estimates:
## mean of x mean of y 
## 0.6523490 0.4470199
 #We reject H0 as p-value = 9.102e-10
 
 #############################


 ##5
 
 #Null Hypothesis H0: all that the prices of Small, Medium and Big houses  the same.
 
 Category <- ifelse((house_prices_data$Bedrooms < 3), 'Small',
            ifelse((house_prices_data$Bedrooms  == 3) | (house_prices_data$Bedrooms == 4 ), 'Medium',
            ifelse((house_prices_data$Bedrooms > 4), 'big', NA ))) 
 
house_prices_data$Category <- Category


x <- aov(house_prices_data$Price~house_prices_data$Category)

x
## Call:
##    aov(formula = house_prices_data$Price ~ house_prices_data$Category)
## 
## Terms:
##                 house_prices_data$Category    Residuals
## Sum of Squares                4.839968e+11 4.303266e+12
## Deg. of Freedom                          2         1044
## 
## Residual standard error: 64202.05
## Estimated effects may be unbalanced
summary(x)
##                              Df    Sum Sq   Mean Sq F value Pr(>F)    
## house_prices_data$Category    2 4.840e+11 2.420e+11   58.71 <2e-16 ***
## Residuals                  1044 4.303e+12 4.122e+09                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Since F and P are very low and level of significance is 0.01 we reject Null Hypothesis.