This data set includes prices and characteristics of 128 houses in a major US metropolitan Area.
library(lattice)
library(nutshell)
## Loading required package: nutshell.bbdb
## Loading required package: nutshell.audioscrobbler
don <- read.csv("~/DataMining/Data/HousePrices.csv")
summary(don)
## HomeID Price SqFt Bedrooms
## Min. : 1.00 Min. : 69100 Min. :1450 Min. :2.000
## 1st Qu.: 32.75 1st Qu.:111325 1st Qu.:1880 1st Qu.:3.000
## Median : 64.50 Median :125950 Median :2000 Median :3.000
## Mean : 64.50 Mean :130427 Mean :2001 Mean :3.023
## 3rd Qu.: 96.25 3rd Qu.:148250 3rd Qu.:2140 3rd Qu.:3.000
## Max. :128.00 Max. :211200 Max. :2590 Max. :5.000
## Bathrooms Offers Brick Neighborhood
## Min. :2.000 Min. :1.000 No :86 East :45
## 1st Qu.:2.000 1st Qu.:2.000 Yes:42 North:44
## Median :2.000 Median :3.000 West :39
## Mean :2.445 Mean :2.578
## 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :4.000 Max. :6.000
sd(don$Price)
## [1] 26868.77
boxplot(don$Offers,outline = FALSE, horizontal=TRUE,xlab="Number of",col="blueviolet")
barplot(don$Price, col = blues9, xlab = "Houses", ylab = "Price")
This data set includes data from a diret marketer who sells his products only using direct mail
DM1 <- read.csv("~/DataMining/Data/DirectMarketing.csv")
summary(DM1)
## Age Gender OwnHome Married Location
## Middle:508 Female:506 Own :516 Married:502 Close:710
## Old :205 Male :494 Rent:484 Single :498 Far :290
## Young :287
##
##
##
## Salary Children History Catalogs
## Min. : 10100 Min. :0.000 High :255 Min. : 6.00
## 1st Qu.: 29975 1st Qu.:0.000 Low :230 1st Qu.: 6.00
## Median : 53700 Median :1.000 Medium:212 Median :12.00
## Mean : 56104 Mean :0.934 NA's :303 Mean :14.68
## 3rd Qu.: 77025 3rd Qu.:2.000 3rd Qu.:18.00
## Max. :168800 Max. :3.000 Max. :24.00
## AmountSpent
## Min. : 38.0
## 1st Qu.: 488.2
## Median : 962.0
## Mean :1216.8
## 3rd Qu.:1688.5
## Max. :6217.0
sd(DM1$AmountSpent)
## [1] 961.0686
barchart(table(DM1$AmountSpent), horizontal = FALSE, xlab= "amount spent")
plot(x=DM1$Age, y=DM1$AmountSpent)
This is a data set that includes 208 individuals and looks at their gender, experience, and salary
GD1 <- read.csv("~/DataMining/Data/GenderDiscrimination.csv")
plot(x=GD1$Gender, y=GD1$Salary)
sd(GD1$Salary)
## [1] 22512.31
table(GD1$Gender, GD1$Salary)
##
## 53400 53600 54000 57000 57200 57520 58000 58200 58400 59000 59200
## Female 0 1 1 2 1 1 0 1 1 3 3
## Male 1 0 1 0 0 0 1 0 0 1 0
##
## 59300 59600 59800 60000 60900 61000 61200 61500 61600 62000 62400
## Female 1 1 1 3 1 2 1 0 1 1 2
## Male 0 0 1 1 0 1 0 1 0 2 1
##
## 62600 63000 63400 63800 64000 65000 65200 65480 66000 66400 67000
## Female 1 1 1 1 1 2 2 1 3 1 2
## Male 0 0 0 0 1 0 0 0 2 0 1
##
## 67400 67600 67800 68000 68200 68400 68600 69000 69200 69400 69800
## Female 1 1 1 8 1 1 1 0 2 1 1
## Male 0 0 0 2 0 0 0 1 0 0 0
##
## 69840 70000 70600 70800 71000 71200 72000 72200 72400 72600 72800
## Female 0 4 3 1 1 1 2 1 1 1 1
## Male 1 0 0 0 2 0 2 0 0 1 0
##
## 73000 74000 74500 74600 75000 75200 76000 76400 77600 78000 78040
## Female 0 1 1 2 1 1 5 1 1 1 1
## Male 1 1 0 0 0 0 0 0 1 1 0
##
## 78200 79000 79400 79600 80000 80400 80520 80600 81320 82000 82600
## Female 1 1 1 1 1 1 1 1 1 1 1
## Male 0 1 0 0 0 0 0 0 0 1 0
##
## 82800 83000 83600 84000 84400 84800 85000 85400 86000 86200 86400
## Female 1 0 0 2 1 1 2 1 1 1 1
## Male 0 1 1 1 0 0 2 0 0 0 0
##
## 87000 87200 87600 87800 88000 88800 89000 89600 90000 90600 91000
## Female 1 2 1 1 2 1 2 1 2 1 1
## Male 0 0 0 0 3 0 2 0 1 0 1
##
## 91600 93000 94000 95000 96000 97000 98000 100000 102400 104000
## Female 1 0 0 0 1 1 0 1 1 0
## Male 0 1 4 2 1 1 1 0 0 1
##
## 105000 108600 110000 114000 116000 117000 118000 120000 123000
## Female 1 1 0 0 0 1 0 0 1
## Male 0 0 1 2 1 0 1 4 0
##
## 123600 130000 148000 176000 188000 190000 194000
## Female 1 0 0 0 0 0 0
## Male 0 1 1 1 1 1 1
The loan data set includes information about 5611 loans
LD1 <- read.csv("~/DataMining/Data/LoanData.csv")
barchart(table(LD1$Credit.Grade), horizontal = FALSE, xlab= "grade of loan", col="orange")
boxplot(LD1$Borrower.Rate, outline = FALSE, horizontal = TRUE, xlab= "Borrower Rate")
sd(LD1$Amount)
## [1] 4436.923
This last data set lists indicators of the financial health of 7112 companies listed at various stock exchanges.
FI1 <- read.csv("~/DataMining/Data/FinancialIndicators.csv")
plot(x=FI1$Net.Income)
sd(FI1$Net.Income)
## [1] 864.5611
xyplot(FI1$Total.Debt~FI1$Net.Income|FI1$Country, data = FI1)