setwd("C:/Users/Taiyyab Ali/Desktop/R language")
Housingpriceindex <- read.csv("Housingpriceindex.csv")
View(Housingpriceindex)
It’s a data set about change n housing price index in a year of span 1975 to 2015 in states of America with crime rate.
2.1 Quick review of dataset
str(Housingpriceindex)
## 'data.frame': 1708 obs. of 9 variables:
## $ Year : int 1975 1975 1975 1975 1975 1975 1976 1976 1976 1976 ...
## $ index_nsa : num 41.1 30.8 36.4 20.9 20.4 ...
## $ City..State : Factor w/ 44 levels "Albuquerque, NM",..: 3 10 12 30 39 43 3 6 10 11 ...
## $ Population : int 490584 3150000 659931 337748 503500 716000 457300 860974 3134499 427045 ...
## $ Violent.Crimes: int 8033 37160 10403 5900 3971 12704 7529 14191 30640 3461 ...
## $ Homicides : int 185 818 288 111 52 235 154 200 814 56 ...
## $ Rapes : int 443 1657 491 316 324 520 477 460 1179 263 ...
## $ Assaults : int 3518 12514 2524 2288 1492 2812 3518 5776 11070 1617 ...
## $ Robberies : int 3887 22171 7100 3185 2103 9137 3380 7755 17577 1525 ...
index_nsa stand for housing price index in USD. Housing price Index = change in price/ Initial price
2.2 Dimension of data
dim(Housingpriceindex)
## [1] 1708 9
2.3 Mean, max and median of data
library(psych)
describe(Housingpriceindex[ ,c(2,4:9)])[ ,c(2,3,4,5,8,9)]
## n mean sd median min max
## index_nsa 1708 123.78 61.32 108.58 20.39 386.21
## Population 1708 621931.25 499821.21 470970.00 112994.00 3150000.00
## Violent.Crimes 1708 7794.24 8798.88 5119.00 385.00 90520.00
## Homicides 1708 115.13 143.51 62.00 1.00 960.00
## Rapes 1708 411.85 405.57 297.50 34.00 3754.00
## Assaults 1708 3853.74 4344.99 2588.00 264.00 42237.00
## Robberies 1708 3415.84 4206.70 2108.50 83.00 43783.00
table(Housingpriceindex$Year)
##
## 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989
## 6 24 40 43 44 44 44 44 44 44 44 44 44 44 44
## 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004
## 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44
## 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
## 44 44 44 44 44 44 44 44 44 44 11
Mostly we have 44 state’s observation sets every year.
table(Housingpriceindex$City..State)
##
## Albuquerque, NM Arlington, TX Atlanta, GA Aurora, CO
## 39 38 41 39
## Austin, TX Baltimore, MD Boston, MA Buffalo, NY
## 39 40 39 39
## Charlotte, NC Chicago, IL Cincinnati, OH Cleveland, OH
## 39 41 40 41
## Columbus, OH Dallas, TX Denver, CO Detroit, MI
## 40 39 39 39
## Fresno, CA Honolulu, HI Houston, TX Indianapolis, IN
## 38 38 39 38
## Jacksonville, FL Louisville, KY Memphis, TN Mesa, AZ
## 37 39 38 38
## Miami, FL Milwaukee, WI Minneapolis, MN Nashville, TN
## 39 38 39 36
## Newark, NJ Oakland, CA Omaha, NE Orlando, FL
## 39 40 37 37
## Philadelphia, PA Phoenix, AZ Pittsburgh, PA Portland, OR
## 39 38 39 39
## Raleigh, NC Sacramento, CA Seattle, WA Tampa, FL
## 38 39 40 39
## Tucson, AZ Tulsa, OK Washington, DC Wichita, KS
## 38 38 40 39
There are total 44 states of america in dataset.
meancrime = aggregate(Housingpriceindex$Violent.Crimes, by = list(State = Housingpriceindex$City..State),mean)
meancrime
## State x
## 1 Albuquerque, NM 12217.590
## 2 Arlington, TX 5326.053
## 3 Atlanta, GA 6847.732
## 4 Aurora, CO 5522.333
## 5 Austin, TX 9443.641
## 6 Baltimore, MD 7451.400
## 7 Boston, MA 6857.205
## 8 Buffalo, NY 5917.872
## 9 Charlotte, NC 7273.128
## 10 Chicago, IL 7095.878
## 11 Cincinnati, OH 5905.475
## 12 Cleveland, OH 6193.293
## 13 Columbus, OH 5999.425
## 14 Dallas, TX 7123.615
## 15 Denver, CO 6111.308
## 16 Detroit, MI 6909.051
## 17 Fresno, CA 7084.553
## 18 Honolulu, HI 6732.368
## 19 Houston, TX 5093.231
## 20 Indianapolis, IN 6453.816
## 21 Jacksonville, FL 15465.838
## 22 Louisville, KY 6269.308
## 23 Memphis, TN 7386.921
## 24 Mesa, AZ 9535.342
## 25 Miami, FL 7297.410
## 26 Milwaukee, WI 9191.158
## 27 Minneapolis, MN 5203.333
## 28 Nashville, TN 7821.500
## 29 Newark, NJ 10248.667
## 30 Oakland, CA 7172.975
## 31 Omaha, NE 7322.865
## 32 Orlando, FL 7560.216
## 33 Philadelphia, PA 13513.590
## 34 Phoenix, AZ 8605.447
## 35 Pittsburgh, PA 8231.333
## 36 Portland, OR 10210.795
## 37 Raleigh, NC 9260.605
## 38 Sacramento, CA 7178.282
## 39 Seattle, WA 6979.925
## 40 Tampa, FL 9321.667
## 41 Tucson, AZ 8036.500
## 42 Tulsa, OK 8756.737
## 43 Washington, DC 7258.625
## 44 Wichita, KS 8254.564
plot(meancrime, main = "Mean Violent crime rate in states")
Some city have been very high in violent crime.
meanPopulation = aggregate(Housingpriceindex$Population, by = list(State = Housingpriceindex$City..State),mean)
meanPopulation
## State x
## 1 Albuquerque, NM 534349.6
## 2 Arlington, TX 562341.3
## 3 Atlanta, GA 569319.4
## 4 Aurora, CO 486596.0
## 5 Austin, TX 584607.9
## 6 Baltimore, MD 512723.5
## 7 Boston, MA 952875.6
## 8 Buffalo, NY 600227.6
## 9 Charlotte, NC 978754.2
## 10 Chicago, IL 636766.9
## 11 Cincinnati, OH 592461.3
## 12 Cleveland, OH 528310.0
## 13 Columbus, OH 546036.4
## 14 Dallas, TX 586302.9
## 15 Denver, CO 551614.2
## 16 Detroit, MI 502257.8
## 17 Fresno, CA 484269.0
## 18 Honolulu, HI 531729.5
## 19 Houston, TX 502107.2
## 20 Indianapolis, IN 585038.5
## 21 Jacksonville, FL 528063.2
## 22 Louisville, KY 515232.9
## 23 Memphis, TN 407784.2
## 24 Mesa, AZ 504605.3
## 25 Miami, FL 555914.2
## 26 Milwaukee, WI 1259156.8
## 27 Minneapolis, MN 475220.4
## 28 Nashville, TN 522815.9
## 29 Newark, NJ 667643.4
## 30 Oakland, CA 822692.1
## 31 Omaha, NE 478915.5
## 32 Orlando, FL 582112.8
## 33 Philadelphia, PA 646698.7
## 34 Phoenix, AZ 536581.0
## 35 Pittsburgh, PA 683420.4
## 36 Portland, OR 1104236.8
## 37 Raleigh, NC 714322.6
## 38 Sacramento, CA 712689.2
## 39 Seattle, WA 660126.2
## 40 Tampa, FL 685315.0
## 41 Tucson, AZ 596459.7
## 42 Tulsa, OK 652768.0
## 43 Washington, DC 567694.1
## 44 Wichita, KS 641348.4
boxplot(Housingpriceindex$index_nsa, horizontal = TRUE, main = "Housing price index distribution over the years")
Most of the time price doubled as median is more than hundred.
boxplot(Housingpriceindex$Population, horizontal = TRUE, main = "Population distribution over the years")
There are too much outlier in population boxplot in righthand side.
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(Population ~ Year, data = Housingpriceindex)
4. Hypothesis: With increase in poplulation, housing price index should be incresing
House_index <- aggregate(Housingpriceindex$index_nsa, by = list(Year = Housingpriceindex$Year),mean)
House_index
## Year x
## 1 1975 30.11500
## 2 1976 36.72017
## 3 1977 40.73010
## 4 1978 46.90176
## 5 1979 53.80188
## 6 1980 58.45125
## 7 1981 61.55432
## 8 1982 63.95102
## 9 1983 66.26108
## 10 1984 68.61102
## 11 1985 71.55506
## 12 1986 75.43449
## 13 1987 78.77506
## 14 1988 81.64983
## 15 1989 84.95335
## 16 1990 88.14335
## 17 1991 96.97639
## 18 1992 99.92093
## 19 1993 103.17143
## 20 1994 107.40221
## 21 1995 110.89804
## 22 1996 114.66321
## 23 1997 118.34258
## 24 1998 124.34049
## 25 1999 131.87725
## 26 2000 141.28338
## 27 2001 151.43585
## 28 2002 160.82273
## 29 2003 171.12045
## 30 2004 186.22026
## 31 2005 209.28146
## 32 2006 224.55007
## 33 2007 223.26373
## 34 2008 203.34487
## 35 2009 187.41925
## 36 2010 180.93774
## 37 2011 173.03646
## 38 2012 177.97761
## 39 2013 191.83261
## 40 2014 204.67202
## 41 2015 199.17545
plot(House_index ,main = " Mean Housing Price Index from 1975 to 2015", ylab = "Mean housing Price index")
Overall trend is upward, Housing price contantly increasing over the years.
YearlyPolulation <- aggregate(Population ~ Year, data = Housingpriceindex,sum)
YearlyPolulation
## Year Population
## 1 1975 5857763
## 2 1976 16983123
## 3 1977 23420031
## 4 1978 23987753
## 5 1979 24946275
## 6 1980 24636321
## 7 1981 24612296
## 8 1982 24853004
## 9 1983 27788236
## 10 1984 25181519
## 11 1985 25344823
## 12 1986 25601365
## 13 1987 26192138
## 14 1988 25435384
## 15 1989 25433873
## 16 1990 25988349
## 17 1991 25583366
## 18 1992 25803789
## 19 1993 26373767
## 20 1994 27283435
## 21 1995 25922116
## 22 1996 26557239
## 23 1997 26879303
## 24 1998 26612583
## 25 1999 27553312
## 26 2000 28055928
## 27 2001 28620733
## 28 2002 28676866
## 29 2003 28667377
## 30 2004 28762411
## 31 2005 28955986
## 32 2006 29196347
## 33 2007 29658684
## 34 2008 29992130
## 35 2009 29480743
## 36 2010 29442632
## 37 2011 29761041
## 38 2012 30082832
## 39 2013 30399496
## 40 2014 30805707
## 41 2015 6868491
plot(YearlyPolulation, main = "Total Yearly Populatin of America's 44 states", ylab = "YearlyPolulation")