getwd()

‘/Users/JoJo’

'/Users/JoJo/Desktop/Projects/650/Week 7/'

‘/Users/JoJo/Desktop/Projects/650/Week 7/’

filepath <- '/Users/JoJo/Desktop/Projects/650/Week 7/dfa-supervised.csv'
df = read.csv('/Users/JoJO/Desktop/Projects/650/Week 7/dfa-supervised.csv')
head(df)
<tr><td>Art  </td><td>    8</td><td>  0  </td><td>0.00 </td></tr>
<tr><td>Art  </td><td>    2</td><td>  0  </td><td>0.00 </td></tr>
<tr><td>Art  </td><td>   37</td><td>  0  </td><td>0.00 </td></tr>
<tr><td>Art  </td><td>48320</td><td>109  </td><td>0.23 </td></tr>
<tr><td>Art  </td><td>    2</td><td>  0  </td><td>0.00 </td></tr>
<tr><td>Art  </td><td>    1</td><td>  0  </td><td>0.00 </td></tr>
Creative.Groups.1 Impressions Clicks Click.Rate
summary(df)
          Creative.Groups.1  Impressions           Clicks         
 Recreation        :3041    Min.   :     0.0   Min.   :   -1.000  
 Art               :3024    1st Qu.:     3.0   1st Qu.:    0.000  
 Sports            :3009    Median :    11.0   Median :    0.000  
 Science           :3006    Mean   :   781.1   Mean   :    2.464  
 29 Collections    :1637    3rd Qu.:    52.0   3rd Qu.:    0.000  
 Ease of Membership:1238    Max.   :404186.0   Max.   :18896.000  
 (Other)           :6791                                          
   Click.Rate       
 Min.   :-100.0000  
 1st Qu.:   0.0000  
 Median :   0.0000  
 Mean   :   0.5261  
 3rd Qu.:   0.0000  
 Max.   : 200.0000  
                    
summary(df[c("Impressions", "Clicks")])
  Impressions           Clicks         
 Min.   :     0.0   Min.   :   -1.000  
 1st Qu.:     3.0   1st Qu.:    0.000  
 Median :    11.0   Median :    0.000  
 Mean   :   781.1   Mean   :    2.464  
 3rd Qu.:    52.0   3rd Qu.:    0.000  
 Max.   :404186.0   Max.   :18896.000  
summary(df[c("Impressions", "Click.Rate")])
  Impressions         Click.Rate       
 Min.   :     0.0   Min.   :-100.0000  
 1st Qu.:     3.0   1st Qu.:   0.0000  
 Median :    11.0   Median :   0.0000  
 Mean   :   781.1   Mean   :   0.5261  
 3rd Qu.:    52.0   3rd Qu.:   0.0000  
 Max.   :404186.0   Max.   : 200.0000  
install.packages('ggvis')
The downloaded binary packages are in
    /var/folders/mr/m5qxlyt52516tbgsf2w8nmf80000gn/T//RtmpRRn6Cm/downloaded_packages
library(ggvis)
df %>% ggvis(~Impressions, ~Click.Rate, fill = ~Creative.Groups.1) %>% layer_points()
# file:///var/folders/mr/m5qxlyt52516tbgsf2w8nmf80000gn/T/RtmpWwWbVv/viewhtml3ebde4461d7/index.html
install.packages("class")
The downloaded binary packages are in
    /var/folders/mr/m5qxlyt52516tbgsf2w8nmf80000gn/T//RtmpRRn6Cm/downloaded_packages
library(class)
set.seed(42)   
ind <- sample(2, nrow(df), replace=TRUE, prob=c(0.8, 0.2))
dfTrain <- df[ind==1, 1:4]
dfTest <- df[ind==2, 1:4]
dfTrainLabels <- df[ind==1, 5]
dfTestLabels <- df[ind==2, 5]
df_pred <- knn(train=dfTrain, test=dfTest, cl=dfTrainLabels, k=3)   #whyyyyy???
Warning message in is.na(cl):
“is.na() applied to non-(list or vector) of type 'NULL'”


Error in knn(train = dfTrain, test = dfTest, cl = dfTrainLabels, k = 3): 'train' and 'class' have different lengths
Traceback:


1. knn(train = dfTrain, test = dfTest, cl = dfTrainLabels, k = 3)

2. stop("'train' and 'class' have different lengths")
df_pred
Error in eval(expr, envir, enclos): object 'df_pred' not found
Traceback:
install.packages("gmodels")
The downloaded binary packages are in
    /var/folders/mr/m5qxlyt52516tbgsf2w8nmf80000gn/T//RtmpRRn6Cm/downloaded_packages
library(gmodels)
CrossTable(x=dfTestLabels, y=df_pred, prop.chisq=F, prop.r=F, prop.c=F, prop.t=F)
Error in CrossTable(x = dfTestLabels, y = df_pred, prop.chisq = F, prop.r = F, : object 'df_pred' not found
Traceback:


1. CrossTable(x = dfTestLabels, y = df_pred, prop.chisq = F, prop.r = F, 
 .     prop.c = F, prop.t = F)