Analiza Danych.

Analiza Opisowa

Na podstawie danych dot. rynku nieruchomości z pewnego regionu USA, dokonaj podobnej analizy opisowej.

download.file("https://github.com/kflisikowski/ds/blob/master/b.csv?raw=true", destfile ="real_estates.csv",mode="wb")
houses <- read.csv("real_estates.csv",row.names=1)
attach(houses)
any(is.na(houses))
## [1] FALSE

Porządkowanie

#porządkowanie danych
houses$chas<-as.factor(houses$chas)
houses$rad<-as.factor(houses$rad)
houses$rm<- as.factor(round(houses$rm,0))
attach(houses)

Etykietowanie, test Jenkinsa

etykiety <- c("0-5 k$","5-10 k$","10-15 k$","15-20 k$","20-25 k$","25-30 k$","30-35 k$","35-40 k$","40-45 k$","45-50 k$")
limits <- cut(houses$medv,seq(0,50, by=5), labels = etykiety)
tabela2 <- freq(limits, type="html")
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
kbl(tabela2, caption = "Mieszkania w Ameryce - ceny w k$") %>%
  kable_material(c("striped"))
Mieszkania w Ameryce - ceny w k$
x label Freq Percent Valid Percent Cumulative Percent
Valid 0-5 k$ 2 0.4 0.4 0.4
5-10 k$ 22 4.3 4.3 4.7
10-15 k$ 73 14.4 14.4 19.2
15-20 k$ 118 23.3 23.3 42.5
20-25 k$ 167 33.0 33.0 75.5
25-30 k$ 40 7.9 7.9 83.4
30-35 k$ 36 7.1 7.1 90.5
35-40 k$ 17 3.4 3.4 93.9
40-45 k$ 9 1.8 1.8 95.7
45-50 k$ 22 4.3 4.3 100.0
Total 506 100.0 100.0
Missing <blank> 0 0.0
<NA> 0 0.0
Total 506 100.0
tab2 <- classIntervals(houses$medv, n=10, style="fixed", fixedBreaks=seq(5,50,by=5))
jenks.tests(tab2)
##        # classes  Goodness of fit Tabular accuracy 
##        9.0000000        0.9760087        0.8179821

Generowanie wykresu ceny mieszkań od dostępności autostrad

density.p <- ggdensity(houses, x = "medv", 
                       fill = "rad", palette = "jco")+
  stat_overlay_normal_density(color = "blue", linetype = "dashed")

stable <- desc_statby(houses, measure.var = "medv",
                      grps = "rad")
stable <- stable[, c("rad", "length", "mean", "sd")]

stable.p <- ggtexttable(stable, rows = NULL, 
                        theme = ttheme("mGreen"))

text <- paste("Ceny mieszkań wg dostępności do autostrad radialnych w Ameryce.",
              "Próba 506 mieszkań.",
               sep = " ")
text.p <- ggparagraph(text = text, face = "italic", size = 10, color = "navyblue")

ggarrange(density.p, stable.p, text.p, 
          ncol = 2, nrow = 2,
          heights = c(5, 2.5, 0,5))

library(psych)
raport <-
  list("medv" =
       list("Min"       = ~ min(medv),
            "Max"       = ~ max(medv),
            "Q1"        = ~ quantile(medv,0.25),
            "Mediana" = ~ round(median(medv),2),
            "Q3"        = ~ quantile(medv,0.75),
            "Mean" = ~ round(mean(medv),2),
            "Odch. std." = ~ round(sd(medv),2),
            "IQR" = ~ round(iqr(medv),2),
            "Sx" = ~ round(iqr(medv)/2,2),
            "Var %" = ~ round((sd(medv)/mean(medv)),2),
            "IQR Var %" = ~ round((iqr(medv)/median(medv)),2),
            "Skośność" = ~  round(skew(medv),2),
            "Kurtoza" = ~  round(kurtosi(medv),2)
            ))
tabela<-summary_table(houses, summaries = raport, by = c("rm"))

kbl(tabela,
  digits = 2,
  caption="Tabela 1. Mieszkania w Ameryce - średnie ceny w k$ wg średniej liczby pokoi.",
  col.names = c('4 pokoje', '5 pokojów', '6 pokojów', '7 pokojów','8 pokojów','9 pokojów'))%>%
 kable_classic(full_width = F, html_font = "TimesNewRoman")%>%
 kable_styling(bootstrap_options = c("striped", "hover"))
Tabela 1. Mieszkania w Ameryce - średnie ceny w k$ wg średniej liczby pokoi.
4 pokoje 5 pokojów 6 pokojów 7 pokojów 8 pokojów 9 pokojów
Min 8.80 5.00 5.00 7.50 35.20 21.90
Max 27.50 50.00 50.00 50.00 50.00 50.00
Q1 11.90 10.40 16.67 23.90 42.67 35.95
Mediana 13.80 14.40 19.90 28.70 46.35 50.00
Q3 23.10 17.90 22.50 33.00 50.00 50.00
Mean 17.02 14.95 19.37 28.05 45.56 40.63
Odch. std. 7.92 7.62 5.22 8.02 4.57 16.22
IQR 11.20 7.50 5.83 9.10 7.33 14.05
Sx 5.60 3.75 2.91 4.55 3.66 7.02
Var % 0.47 0.51 0.27 0.29 0.10 0.40
IQR Var % 0.81 0.52 0.29 0.32 0.16 0.28
Skośność 0.26 2.50 0.88 -0.11 -0.65 -0.38
Kurtoza -2.05 9.48 6.83 0.60 -0.83 -2.33