# A chunk of R code
mydata <- read.csv("Table2.csv")

# Some exploration
mydata

4. For the variable condition, remap the values from numbers to text as defined in the assignment.

# Add a new column called condition2

mydata$condition2 <- as.factor(mydata$condition)
mydata

5. Replicate (question 2a) using R using any of the techniques you learned in the tutorial.

# 2a i. mean, mode, median, minimum, maximum, standard deviation for lot size

mean(mydata$lotsize)
## [1] 16766.75
Mode(mydata$lotsize)
## [1] 12500
median(mydata$lotsize)
## [1] 12500
min(mydata$lotsize)
## [1] 8000
max(mydata$lotsize)
## [1] 42025
sd(mydata$lotsize)
## [1] 10841.87
# 2a ii. mean, mode, median, minimum, maximum, standard deviation for value

mean(mydata$value)
## [1] 401.875
Mode(mydata$value)
## [1] 200 290 340 360 410 475 530 610
median(mydata$value)
## [1] 385
min(mydata$value)
## [1] 200
max(mydata$value)
## [1] 610
sd(mydata$value)
## [1] 133.0933
# 2a iii. mean, mode, median, minimum, maximum for bedrooms

mean(mydata$bedrooms)
## [1] 3.25
Mode(mydata$bedrooms)
## [1] 3
median(mydata$bedrooms)
## [1] 3
min(mydata$bedrooms)
## [1] 2
max(mydata$bedrooms)
## [1] 5
# 2a iv. mean, mode, median, minimum, maximum for height

mean(mydata$height)
## [1] 2.3125
Mode(mydata$height)
## [1] 2.5
median(mydata$height)
## [1] 2.5
min(mydata$height)
## [1] 1
max(mydata$height)
## [1] 3
# 2a v. mean, mode, median, minimum, maximum for condition

mean(mydata$condition)
## [1] 3.5
Mode(mydata$condition)
## [1] 4
median(mydata$condition)
## [1] 4
min(mydata$condition)
## [1] 1
max(mydata$condition)
## [1] 5

6. Replicate (question 2b) using R. For the variable lot size, create a new variable that converts the values from square feet to acres.

# 2b i. Convert Square feet to square acres
mydata$lotsize2 <- (mydata$lotsize)*0.00002296
mydata
# 2a v. Re-compute the summary statistics.

mean(mydata$lotsize2)
## [1] 0.3849646
Mode(mydata$lotsize2)
## [1] 0.287
median(mydata$lotsize2)
## [1] 0.287
min(mydata$lotsize2)
## [1] 0.18368
max(mydata$lotsize2)
## [1] 0.964894

7. If you’re feeling daring, try to make a summary table using stargazer. If you can’t get it to work, don’t sweat it.

# Summary table using stargazer

stargazer(mydata, summary=TRUE, header=FALSE, type = "text")
## 
## ================================================================
## Statistic N    Mean     St. Dev.   Min  Pctl(25) Pctl(75)  Max  
## ----------------------------------------------------------------
## street    8   17.000     9.856      4     9.8      23.8     31  
## lotsize   8 16,766.750 10,841.870 8,000  11,875  17,707.2 42,025
## value     8  401.875    133.093    200   327.5    488.8    610  
## bedrooms  8   3.250      1.035      2     2.8       4       5   
## height    8   2.312      0.594    1.000  2.375    2.500   3.000 
## condition 8   3.500      1.195      1      3        4       5   
## lotsize2  8   0.385      0.249    0.184  0.273    0.407   0.965 
## ----------------------------------------------------------------

8. Generate a boxplot of the variables lot size and value. Make sure you label them.

boxplot(mydata$lotsize, main="Boxplot of the variables lot size", xlab="", ylab="lot size (square feets)")

boxplot(mydata$value, main="Boxplot of the variables value", xlab="", ylab="value ($1000s)")

9. Generate a histogram of the variable lot size. Overlay a red line indicating the average lot size.

hist(mydata$lotsize, breaks = 15, 
main="Histogram of the lot size",
xlab="lot size (square feets)",
ylab="frequency")

# Calculate the mean and assign the result to the variable the mean
themean <- mean(mydata$lotsize, na.rm = TRUE)
# Calculate the median and assign the result to the variable themedian
themedian <- median(mydata$lotsize, na.rm= TRUE)

# Add the Mean and the Median
abline(v=themean,col="red")
abline(v=themedian,col="blue")

# Add a legend
legend(300,8, c("The Mean","The Median"), lty=c(1,1), lwd = c(2.5,2.5),col = c("red","blue"))

10. Create a new variable (a new column) that is defined as the property value per square foot.

# Property value per square foot
mydata$valuepersquarefoot <- (mydata$value)/(mydata$lotsize)
mydata

10. Generate a plot with the number of bedrooms on the x-axis and the value/lotsize on the y-axis.

plot(mydata$bedrooms,mydata$valuepersquarefoot,
main="Plot of number of bedrooms",
xlab="number of bedrooms",
ylab="value/lotsize")