midterm450

data<-read.csv("~/Downloads/house_selling_prices_FL(1).csv",header = TRUE)
View(data)
summary(data)

##      House            Taxes         Bedrooms        Baths      
##  Min.   :  1.00   Min.   :  20   Min.   :1.00   Min.   :1.000  
##  1st Qu.: 25.75   1st Qu.: 970   1st Qu.:3.00   1st Qu.:1.875  
##  Median : 50.50   Median :1535   Median :3.00   Median :2.000  
##  Mean   : 50.50   Mean   :1668   Mean   :2.99   Mean   :1.890  
##  3rd Qu.: 75.25   3rd Qu.:2042   3rd Qu.:3.00   3rd Qu.:2.000  
##  Max.   :100.00   Max.   :4900   Max.   :5.00   Max.   :3.000  
##    Quadrant               NW           price             size     
##  Length:100         Min.   :0.00   Min.   : 21000   Min.   : 370  
##  Class :character   1st Qu.:0.75   1st Qu.: 86875   1st Qu.:1158  
##  Mode  :character   Median :1.00   Median :123750   Median :1410  
##                     Mean   :0.75   Mean   :126698   Mean   :1526  
##                     3rd Qu.:1.00   3rd Qu.:153075   3rd Qu.:1760  
##                     Max.   :1.00   Max.   :338000   Max.   :4050  
##       lot       
##  Min.   : 3500  
##  1st Qu.:12875  
##  Median :18000  
##  Mean   :19417  
##  3rd Qu.:25000  
##  Max.   :47400

head(data)

##   House Taxes Bedrooms Baths Quadrant NW  price size   lot
## 1     1  1360        3   2.0       NW  1 145000 1240 18000
## 2     2  1050        1   1.0       NW  1  68000  370 25000
## 3     3  1010        3   1.5       NW  1 115000 1130 25000
## 4     4   830        3   2.0       SW  0  69000 1120 17000
## 5     5  2150        3   2.0       NW  1 163000 1710 14000
## 6     6  1230        3   2.0       NW  1  69900 1010  8000

dim(data)

## [1] 100   9

# I ran these functions because I believe they show me the most information from the table that I can use to get a better sense of what is going on. The dimension function shows me that the data set is 101 rows and 9 columns of data. The summary shows me what the basic data in each column is. The head function just shows the first six rows of data and gives me a general sense of the kind of data that is in the rest of the set. 
#install.packages("ggplot2")
library(ggplot2)
p<-ggplot(data,aes(x=size, y=price)) + geom_point() +
  geom_smooth(method = "lm", col = "red", size = 2)

plot(p)

## `geom_smooth()` using formula 'y ~ x'

# The point plot shows me that there is a positive relationship between the size of a house and the price. There are a few outliers where the price outpaces the common relationship, as well as outliers in the size that dip below the relationship. 

p2<-ggplot(data,aes(x=lot, y=price)) + geom_point()+
  geom_smooth(method = "lm", col = "blue", size = 1)

plot(p2)

## `geom_smooth()` using formula 'y ~ x'

# The point plot shows me that, like with size, there is a positive relationship between the lot size and the price. This tells me that as the lot size increases, so does the price. There are less obvious outliers in the plot, with some lots being less overpriced than that of the normal size of the house. 

dfnew2 <- data[,c("NW", "price")]

filter(dfnew2, "NW"==1 | "price">=130000)

## Time Series:
## Start = 1 
## End = 100 
## Frequency = 1 
##     [,1]   [,2]
##   1    1 145000
##   2    1  68000
##   3    1 115000
##   4    0  69000
##   5    1 163000
##   6    1  69900
##   7    1  50000
##   8    1 137000
##   9    1 121300
##  10    1  70000
##  11    0  64500
##  12    1 167000
##  13    1 114600
##  14    1 103000
##  15    1 101000
##  16    1  50000
##  17    0  85000
##  18    0  22500
##  19    1  90000
##  20    1 133000
##  21    1  90500
##  22    0 260000
##  23    0 142500
##  24    1 160000
##  25    1 240000
##  26    1  87000
##  27    1 118600
##  28    1 140000
##  29    1 148000
##  30    0  65000
##  31    1 176000
##  32    0  86500
##  33    1 180000
##  34    1 179000
##  35    1 338000
##  36    1 130000
##  37    1  77300
##  38    1 125000
##  39    1 100000
##  40    0 100000
##  41    1 100000
##  42    1 146500
##  43    1 144900
##  44    1 183000
##  45    0  77000
##  46    1  60000
##  47    1 127000
##  48    0  86000
##  49    0  95000
##  50    1 270500
##  51    0  75000
##  52    1  81000
##  53    0 188000
##  54    0  85000
##  55    1 137000
##  56    1  92900
##  57    1  93000
##  58    1 109300
##  59    1 131500
##  60    1 200000
##  61    1  81900
##  62    1  91200
##  63    1 124500
##  64    0 225000
##  65    1 136500
##  66    1 268000
##  67    0  70700
##  68    0  70000
##  69    1 140000
##  70    1  89900
##  71    1 137000
##  72    0 103000
##  73    1 183000
##  74    1 140000
##  75    1 160000
##  76    1 192000
##  77    1 130000
##  78    1 123000
##  79    1  21000
##  80    0  85000
##  81    0  69900
##  82    1 125000
##  83    1 162600
##  84    1 156900
##  85    1 105900
##  86    0 167500
##  87    1 151800
##  88    1 118300
##  89    1  94300
##  90    0  93900
##  91    1 165000
##  92    1 285000
##  93    1  45000
##  94    1 124900
##  95    1 147000
##  96    1 176000
##  97    0 196500
##  98    1 132200
##  99    0  88400
## 100    1 127200

write.csv(dfnew2,"dfnew2.csv")

midterm450

Mountain

10/15/2021