library(lattice)
hp<- read.csv("c:/users/abbey/Desktop/Data Mining/HousePrices.csv")
hp[1:3,]
## HomeID Price SqFt Bedrooms Bathrooms Offers Brick Neighborhood
## 1 1 114300 1790 2 2 2 No East
## 2 2 114200 2030 4 2 3 No East
## 3 3 114800 1740 3 2 1 No East
boxplot(SqFt~Neighborhood,data=hp,ylab="SqFt")

## This tells us the homes in the West Neighborhood have more square footages than those in the North.
nh=table(hp$Neighborhood)
nh
##
## East North West
## 45 44 39
## This table shows there are more homes being sold on the North Side. That means that more smaller houses are on the market compared to bigger.
smoothScatter(hp$Price,hp$SqFt)

## This shows that price is affected by the squarefootage of the home and that the more square footage the higher the price. So now we know that the cheaper homes are in the North and the more expensive ones are in the West neighborhood.
dm<- read.csv("c:/users/abbey/Desktop/Data Mining/DirectMarketing.csv")
dm[1:3,]
## Age Gender OwnHome Married Location Salary Children History Catalogs
## 1 Old Female Own Single Far 47500 0 High 6
## 2 Middle Male Rent Single Close 63600 0 High 6
## 3 Young Female Rent Single Close 13500 0 Low 18
## AmountSpent
## 1 755
## 2 1318
## 3 296
boxplot(Salary~Children, data=dm, ylab="Salary")

##Based on the customers who get the catalog those who have more kids have a higher median salary.
boxplot(AmountSpent~Children, data=dm, ylab="AmountSpent")

## The more children in a house hold the less likely they are to spend money on the catalog.
xyplot(AmountSpent~Salary, data=dm, col="Blue")

## This then shows that those who spend money on the catalog and have a higher salary will spend more than those who have a lower salary.
gd<- read.csv("c:/users/abbey/Desktop/Data Mining/GenderDiscrimination.csv")
gd[1:3,]
## Gender Experience Salary
## 1 Female 15 78200
## 2 Female 12 66400
## 3 Female 15 61200
xyplot(Salary~Experience|Gender,data=gd, col="black")

##In this data set Females get paid less than men based on years of experience.
boxplot(Salary~Gender,data=gd,ylab="Salary")

##The median for men is greater than the median for women in this sample size.
boxplot(Experience~Gender,data=gd,ylab="Experience")

## In this sample women who have more experience than men are getting paid less for more experience than males who have lesser experience are getting paid more.
ld<-read.csv("c:/users/abbey/Desktop/Data Mining/LoanData.csv")
ld[1:3,]
## Status Credit.Grade Amount Age Borrower.Rate Debt.To.Income.Ratio
## 1 Current C 5000 4 0.150 0.04
## 2 Current HR 1900 6 0.265 0.02
## 3 Current HR 1000 3 0.150 0.02
barchart(ld$Status,horizontal=FALSE)

##There are more people who are current on their loan payments then there are defaults and late payments.
xyplot(Borrower.Rate~Amount|Status, data=ld)

## There is no correlation between the Borrower Rate and the Amount borrowed.
xyplot(Age~Amount|Status, data=ld)

## There is no correlation between the age and amount borrowed
xyplot(Borrower.Rate~Debt.To.Income.Ratio|Status, data=ld)

## There is not correlation between the debt to income and borrower rate.
##This data set has no correlations one aspect of it does not affect another one.
fi<- read.csv("c:/users/abbey/Desktop/Data Mining/FinancialIndicators.csv")
xyplot(Stock.Price~Net.Income|Country,data=fi)

## This graphs shows us there is not correlation between a company's stock price foreign or US and it's net income.
smoothScatter(fi$Total.Debt,fi$Stock.Price)
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, :
## Binning grid too coarse for current (small) bandwidth: consider increasing
## 'gridsize'

## Looking extremely closely you can see that total debt of a company also does not affect the price of the stock. We can conclude it is very difficult to see how a stock is going to move based on past accounting information.