library(readxl)
datos = read_excel("C:/Archivos R/datos_vivienda.xlsx")
head(datos,3)
## # A tibble: 3 x 2
## Area_contruida precio_millon
## <dbl> <dbl>
## 1 86 250
## 2 118 385
## 3 130 395
require(ggplot2)
## Loading required package: ggplot2
require(ggpubr)
## Loading required package: ggpubr
g1=ggplot(datos,aes(x=Area_contruida))+geom_histogram()+theme_bw()
g2=ggplot(datos,aes(x=Area_contruida))+geom_bar()+theme_bw()
g3=ggplot(datos,aes(x=precio_millon))+geom_histogram()+theme_bw()
g4=ggplot(datos,aes(x=precio_millon))+geom_bar()+theme_bw()
ggarrange(g1,g2,g3,g4,labels = c("A", "B","C","D"),ncol = 2, nrow = 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
require(table1)
## Loading required package: table1
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
y = table1::table1(~ Area_contruida+precio_millon, data = datos)
y
| Overall (N=26) |
|
|---|---|
| Area_contruida | |
| Mean (SD) | 116 (35.5) |
| Median [Min, Max] | 97.0 [80.0, 195] |
| precio_millon | |
| Mean (SD) | 332 (82.1) |
| Median [Min, Max] | 305 [240, 480] |
t.test(datos$Area_contruida,datos$precio_millon)
##
## Welch Two Sample t-test
##
## data: datos$Area_contruida and datos$precio_millon
## t = -12.324, df = 34.044, p-value = 4.202e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -252.0007 -180.6593
## sample estimates:
## mean of x mean of y
## 115.7469 332.0769
datos$Area_contruida_grupo=cut(datos$Area_contruida,breaks = c(0,50,100,150,200))
datos$precio_millon_grupo=cut(datos$precio_millon,breaks = c(200,300,400,500))
require(CGPfunctions)
## Loading required package: CGPfunctions
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "packedMatrix" of class "replValueSp"; definition not updated
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "packedMatrix" of class "mMatrix"; definition not updated
PlotXTabs2(data = datos,x =Area_contruida_grupo ,y =precio_millon_grupo )
attach(datos)
plot(Area_contruida,precio_millon)
cor(precio_millon,Area_contruida)
## [1] 0.9190295
mod = lm(precio_millon~Area_contruida)
summary(mod)
##
## Call:
## lm(formula = precio_millon ~ Area_contruida)
##
## Residuals:
## Min 1Q Median 3Q Max
## -51.673 -25.612 -6.085 24.875 67.650
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 86.234 22.479 3.836 0.000796 ***
## Area_contruida 2.124 0.186 11.422 3.45e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.05 on 24 degrees of freedom
## Multiple R-squared: 0.8446, Adjusted R-squared: 0.8381
## F-statistic: 130.5 on 1 and 24 DF, p-value: 3.45e-11
predict(mod,newdata = list(Area_contruida=110))
## 1
## 319.8706
predict(mod,newdata = list(Area_contruida=110),interval = "confidence",level = 0.95)
## fit lwr upr
## 1 319.8706 306.3133 333.4279
t.test(datos$Area_contruida,datos$precio_millon)
##
## Welch Two Sample t-test
##
## data: datos$Area_contruida and datos$precio_millon
## t = -12.324, df = 34.044, p-value = 4.202e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -252.0007 -180.6593
## sample estimates:
## mean of x mean of y
## 115.7469 332.0769
predict(mod,newdata = list(Area_contruida=110))
## 1
## 319.8706
par(mfrow=c(2,2))
plot(mod)
mod = lm(precio_millon~log(Area_contruida))
summary(mod)
##
## Call:
## lm(formula = precio_millon ~ log(Area_contruida))
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.837 -20.153 -1.878 20.145 55.145
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -948.53 89.09 -10.65 1.42e-10 ***
## log(Area_contruida) 271.88 18.88 14.40 2.63e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27 on 24 degrees of freedom
## Multiple R-squared: 0.8963, Adjusted R-squared: 0.8919
## F-statistic: 207.4 on 1 and 24 DF, p-value: 2.63e-13
par(mfrow=c(2,2))
plot(mod)