House Prices

This data set includes prices and characteristics of 128 houses in a major US metropolitan Area.

library(lattice)
library(nutshell)
## Loading required package: nutshell.bbdb
## Loading required package: nutshell.audioscrobbler
don <- read.csv("~/DataMining/Data/HousePrices.csv")
summary(don)
##      HomeID           Price             SqFt         Bedrooms    
##  Min.   :  1.00   Min.   : 69100   Min.   :1450   Min.   :2.000  
##  1st Qu.: 32.75   1st Qu.:111325   1st Qu.:1880   1st Qu.:3.000  
##  Median : 64.50   Median :125950   Median :2000   Median :3.000  
##  Mean   : 64.50   Mean   :130427   Mean   :2001   Mean   :3.023  
##  3rd Qu.: 96.25   3rd Qu.:148250   3rd Qu.:2140   3rd Qu.:3.000  
##  Max.   :128.00   Max.   :211200   Max.   :2590   Max.   :5.000  
##    Bathrooms         Offers      Brick    Neighborhood
##  Min.   :2.000   Min.   :1.000   No :86   East :45    
##  1st Qu.:2.000   1st Qu.:2.000   Yes:42   North:44    
##  Median :2.000   Median :3.000            West :39    
##  Mean   :2.445   Mean   :2.578                        
##  3rd Qu.:3.000   3rd Qu.:3.000                        
##  Max.   :4.000   Max.   :6.000
sd(don$Price)
## [1] 26868.77
boxplot(don$Offers,outline = FALSE, horizontal=TRUE,xlab="Number of",col="blueviolet")

barplot(don$Price, col = blues9, xlab = "Houses", ylab = "Price")

Direct Marketing

This data set includes data from a diret marketer who sells his products only using direct mail

DM1 <- read.csv("~/DataMining/Data/DirectMarketing.csv")
summary(DM1)
##      Age         Gender    OwnHome       Married     Location  
##  Middle:508   Female:506   Own :516   Married:502   Close:710  
##  Old   :205   Male  :494   Rent:484   Single :498   Far  :290  
##  Young :287                                                    
##                                                                
##                                                                
##                                                                
##      Salary          Children       History       Catalogs    
##  Min.   : 10100   Min.   :0.000   High  :255   Min.   : 6.00  
##  1st Qu.: 29975   1st Qu.:0.000   Low   :230   1st Qu.: 6.00  
##  Median : 53700   Median :1.000   Medium:212   Median :12.00  
##  Mean   : 56104   Mean   :0.934   NA's  :303   Mean   :14.68  
##  3rd Qu.: 77025   3rd Qu.:2.000                3rd Qu.:18.00  
##  Max.   :168800   Max.   :3.000                Max.   :24.00  
##   AmountSpent    
##  Min.   :  38.0  
##  1st Qu.: 488.2  
##  Median : 962.0  
##  Mean   :1216.8  
##  3rd Qu.:1688.5  
##  Max.   :6217.0
sd(DM1$AmountSpent)
## [1] 961.0686
barchart(table(DM1$AmountSpent), horizontal = FALSE, xlab= "amount spent")

plot(x=DM1$Age, y=DM1$AmountSpent)

Gender Discrimination

This is a data set that includes 208 individuals and looks at their gender, experience, and salary

GD1 <- read.csv("~/DataMining/Data/GenderDiscrimination.csv")
plot(x=GD1$Gender, y=GD1$Salary)

sd(GD1$Salary)
## [1] 22512.31
table(GD1$Gender, GD1$Salary)
##         
##          53400 53600 54000 57000 57200 57520 58000 58200 58400 59000 59200
##   Female     0     1     1     2     1     1     0     1     1     3     3
##   Male       1     0     1     0     0     0     1     0     0     1     0
##         
##          59300 59600 59800 60000 60900 61000 61200 61500 61600 62000 62400
##   Female     1     1     1     3     1     2     1     0     1     1     2
##   Male       0     0     1     1     0     1     0     1     0     2     1
##         
##          62600 63000 63400 63800 64000 65000 65200 65480 66000 66400 67000
##   Female     1     1     1     1     1     2     2     1     3     1     2
##   Male       0     0     0     0     1     0     0     0     2     0     1
##         
##          67400 67600 67800 68000 68200 68400 68600 69000 69200 69400 69800
##   Female     1     1     1     8     1     1     1     0     2     1     1
##   Male       0     0     0     2     0     0     0     1     0     0     0
##         
##          69840 70000 70600 70800 71000 71200 72000 72200 72400 72600 72800
##   Female     0     4     3     1     1     1     2     1     1     1     1
##   Male       1     0     0     0     2     0     2     0     0     1     0
##         
##          73000 74000 74500 74600 75000 75200 76000 76400 77600 78000 78040
##   Female     0     1     1     2     1     1     5     1     1     1     1
##   Male       1     1     0     0     0     0     0     0     1     1     0
##         
##          78200 79000 79400 79600 80000 80400 80520 80600 81320 82000 82600
##   Female     1     1     1     1     1     1     1     1     1     1     1
##   Male       0     1     0     0     0     0     0     0     0     1     0
##         
##          82800 83000 83600 84000 84400 84800 85000 85400 86000 86200 86400
##   Female     1     0     0     2     1     1     2     1     1     1     1
##   Male       0     1     1     1     0     0     2     0     0     0     0
##         
##          87000 87200 87600 87800 88000 88800 89000 89600 90000 90600 91000
##   Female     1     2     1     1     2     1     2     1     2     1     1
##   Male       0     0     0     0     3     0     2     0     1     0     1
##         
##          91600 93000 94000 95000 96000 97000 98000 100000 102400 104000
##   Female     1     0     0     0     1     1     0      1      1      0
##   Male       0     1     4     2     1     1     1      0      0      1
##         
##          105000 108600 110000 114000 116000 117000 118000 120000 123000
##   Female      1      1      0      0      0      1      0      0      1
##   Male        0      0      1      2      1      0      1      4      0
##         
##          123600 130000 148000 176000 188000 190000 194000
##   Female      1      0      0      0      0      0      0
##   Male        0      1      1      1      1      1      1

Loan Data

The loan data set includes information about 5611 loans

LD1 <- read.csv("~/DataMining/Data/LoanData.csv")
barchart(table(LD1$Credit.Grade), horizontal = FALSE, xlab= "grade of loan", col="orange")

boxplot(LD1$Borrower.Rate, outline = FALSE, horizontal = TRUE, xlab= "Borrower Rate")

sd(LD1$Amount)
## [1] 4436.923

Financial Indicators

This last data set lists indicators of the financial health of 7112 companies listed at various stock exchanges.

FI1 <- read.csv("~/DataMining/Data/FinancialIndicators.csv")
plot(x=FI1$Net.Income)

sd(FI1$Net.Income)
## [1] 864.5611
xyplot(FI1$Total.Debt~FI1$Net.Income|FI1$Country, data = FI1)