The code here generates a histogram of all numerical parameters included in libFM computation with its quantiles The variables are outlier treated(replaced with mean) For categorical values, the factors are displayed.
library(rpart)
library(mlr)
library(dummies)
library(lattice)
library(knitr)
setwd("/Users/adarsa/ilimi/github/Learning-Platform-Analytics/platform-scripts/shell/local/libfm.input")
opts_knit$set(root.dir = '/Users/adarsa/ilimi/github/Learning-Platform-Analytics/platform-scripts/shell/local/libfm.input/libfm_input/')
libfm_input<-read.csv("libfm_input")
col_names<-colnames(libfm_input)
libfm_input<-libfm_input[libfm_input$c1_total_ts!=0,]
libfm_input_onehot <- dummy.data.frame(libfm_input, names=c("c1_subject","c1_contentType","c1_language","c2_subject","c2_contentType","c2_language","device_spec"), sep="_")
print("###########################################")
## [1] "###########################################"
print("Categorical Variables :")
## [1] "Categorical Variables :"
cat_features<-c("c1_subject","c1_contentType","c1_language","c2_subject","c2_contentType","c2_language","device_spec")
print("Numeric Variables:")
## [1] "Numeric Variables:"
for (i in cat_features){
print("###########################################")
print(unique(libfm_input[i]))
}
## [1] "###########################################"
## c1_subject
## 1 literacy
## 160 numeracy
## 534 literacy,numeracy
## [1] "###########################################"
## c1_contentType
## 1 Game
## 2 Story
## 4 Worksheet
## 14 Collection
## [1] "###########################################"
## c1_language
## 1 English
## 2 Hindi
## 74 Kannada
## 570 Telugu
## 583 Other
## 1026 Tamil
## [1] "###########################################"
## c2_subject
## 1 literacy
## 8 numeracy
## 24 Unknown
## 574 literacy,numeracy
## [1] "###########################################"
## c2_contentType
## 1 Game
## 2 Story
## 4 Worksheet
## 14 Collection
## 24 Unknown
## [1] "###########################################"
## c2_language
## 1 English
## 2 Hindi
## 24 Unknown
## 74 Kannada
## 570 Telugu
## 637 Other
## 1033 Tamil
## [1] "###########################################"
## device_spec
## 1 MicromaxP480
## 2 Motorola MotoE2
## 54 Samsung SCH-I535
## 55 Motorola XT1052
## 63 Motorola XT1033
## 73 Xiaomi MI PAD
## 100 Micromax A311
## 101 LENOVO Lenovo K50a40
## 336 Micromax A093
## 349 IBall 3G 1026-Q18
## 354 LENOVO Lenovo A6000
## 356 Micromax A089
## 357 LYF LS-5501
## 365 Motorola MotoE2(4G-LTE)
## 370 XOLO Opus 3
## 371 LAVA Flair Z1
## 374 Lava X1 Selfie
## 375 HTC One_M8
## 437 Micromax AQ4501
## 438 Motorola XT1068
## 444 Motorola XT1562
## 445 Samsung SM-G925I
## 447 Letv Le X507
## 467 Micromax A102
## 471 Motorola MotoG3
## 532 Samsung SM-N910G
## 694 HTC Desire 526GPLUS dual sim
## 896 Unknown Google Nexus 7 - 5.1.0 - API 22 - 800x1280
## 911 Xiaomi 2014818
## 913 Lava iris Fuel50
## 914 Micromax A107
## 932 OnePlus A0001
## 952 LENOVO Lenovo S5000-H
## 953 IBall 3G 7345Q-800
## 1024 LGE LG-VS450PP
## 1025 Micromax Q340
## 1040 Samsung Galaxy Nexus
## 1118 ZTE U950
## 1125 LGE Nexus 5
## 1136 IBall 3G Q7271-IPS20
## 1138 Samsung GT-N7100
## 1190 LAVA iris 450P
## 1191 Samsung SM-J200G
## 1204 Huawei Nexus 6P
## 1207 Sony D2502
## 1209 Samsung SM-N750
## 1227 Hewlett-Packard HP 7 G2
## 1230 Samsung GT-S7582
## 1231 Samsung SM-T116NY
## 1239 Samsung SM-N900
## 1248 Samsung Nexus 10
## 1261 HUAWEI KIW-L22
## 1278 Samsung GT-I9060
## 1279 LENOVO Lenovo TAB 2 A7-30HC
## 1281 Asus Nexus 7
## 1296 MicromaxP680
## 1322 LAVA irisX8
## 1326 LENOVO Lenovo TAB S8-50LC
## 1328 Asus ASUS_Z010D
## 1338 RELIANCE LS-5501
## 1402 OnePlus ONE A2003
## 1432 Genymotion Google Nexus 7 2013 - 6.0.0 - API 23 - 1200x1920
## 1472 Samsung SM-T231
## 1534 Sony C1904
## 1535 LENOVO Lenovo P1a42
## 1539 Intex Cloud M6
## 1541 INTEX Cloud 4G Star
## 1542 Samsung SM-T800
## 1543 Samsung SM-G531F
## 1616 Asus ASUS_Z00AD
## 1636 Micromax E313
## 1637 Unknown Android SDK built for x86
## 1642 HTC One
## 1813 Micromax P666
## 1826 Intex Aqua Y2 Pro
## 1856 Samsung SM-G7102
## 1893 Samsung SM-G355H
## 1931 Motorola Nexus 6
## 1932 Micromax AQ5001
## 1933 Micromax D321
## 1935 Motorola XT1022
## 1944 IBall 3G Q27
## 1945 LGE Nexus 5X
## 1982 Dell Venue 8 3830
## 2003 Micromax Q392
## 2006 Asus K012
## 2050 Micromax E311
## 2051 Motorola MotoG3-TE
## 2064 Samsung SM-G316HU
## 2066 Micromax A290
## 2085 XOLO A500 Club
## 2089 Samsung SM-T211
## 2210 HTC Desire 816G dual sim
## 2213 Samsung GT-I9300
## 2217 Xiaomi Mi 4i
## 2358 SWIPE ACE Strike
## 2367 LGE LG-D855
## 2369 Samsung GT-I9500
## 2374 Samsung GT-I9060I
## 2384 Dell Venue7 3740 LTE
## 2385 Sony C6603
## 2401 Xiaomi Redmi Note 3
## 2444 OPPO A33f
## 2475 Micromax Q331
## 2692 INTEX Aqua_LifeIII
features_index<-c(101:124,225:251,371:375)
remove_outliers <- function(x,outlier ) {
for (i in outlier){
x[x==i]=max(0,mean(x)) #outlier replaced with mean value
}
return (x)
}
for ( i in features_index){
outlier_values <- boxplot.stats(libfm_input_onehot[[i]])$out
libfm_input_onehot[[i]]<-remove_outliers(libfm_input_onehot[[i]],outlier_values)
print("##########################################")
print(paste("LibFM Parameter:",colnames( libfm_input_onehot)[[i]]))
print(quantile( libfm_input_onehot[[i]]))
hist(libfm_input_onehot[[i]], breaks = 200,xlab =colnames( libfm_input_onehot)[[i]], main =paste("histogram of",colnames( libfm_input_onehot)[[i]], sep=" ") )
}
## [1] "##########################################"
## [1] "LibFM Parameter: c1_total_ts"
## 0% 25% 50% 75% 100%
## 1.0000 40.0375 145.7300 402.0000 1250.8000
## [1] "##########################################"
## [1] "LibFM Parameter: c1_avg_interactions_min"
## 0% 25% 50% 75% 100%
## 0.00 8.42 17.68 27.45 66.18
## [1] "##########################################"
## [1] "LibFM Parameter: c1_download_date"
## 0% 25% 50% 75% 100%
## 0.000000e+00 0.000000e+00 1.463380e+12 1.465294e+12 1.470302e+12
## [1] "##########################################"
## [1] "LibFM Parameter: c1_downloaded"
## 0% 25% 50% 75% 100%
## 0 0 1 1 1
## [1] "##########################################"
## [1] "LibFM Parameter: c1_last_played_on"
## 0% 25% 50% 75% 100%
## 1.446302e+12 1.453534e+12 1.462362e+12 1.464949e+12 1.470306e+12
## [1] "##########################################"
## [1] "LibFM Parameter: c1_mean_play_time_interval"
## 0% 25% 50% 75% 100%
## -510.00 0.00 0.00 372.75 86065.67
## [1] "##########################################"
## [1] "LibFM Parameter: c1_num_group_user"
## 0% 25% 50% 75% 100%
## 0.000000000 0.000000000 0.000000000 0.000000000 0.005939124
## [1] "##########################################"
## [1] "LibFM Parameter: c1_num_individual_user"
## 0% 25% 50% 75% 100%
## 0.00000 1.00000 2.00000 4.44001 11.00000
## [1] "##########################################"
## [1] "LibFM Parameter: c1_num_sessions"
## 0% 25% 50% 75% 100%
## 1.000000 1.000000 2.000000 4.446235 11.000000
## [1] "##########################################"
## [1] "LibFM Parameter: c1_start_time"
## 0% 25% 50% 75% 100%
## 1.446046e+12 1.453463e+12 1.462356e+12 1.464943e+12 1.470306e+12
## [1] "##########################################"
## [1] "LibFM Parameter: c1_total_interactions"
## 0% 25% 50% 75% 100%
## 0.0000 12.0000 45.0000 114.2056 341.0000
## [1] "##########################################"
## [1] "LibFM Parameter: c1_subject_literacy"
## 0% 25% 50% 75% 100%
## 0.9732739 1.0000000 1.0000000 1.0000000 1.0000000
## [1] "##########################################"
## [1] "LibFM Parameter: c1_subject_literacy,numeracy"
## 0% 25% 50% 75% 100%
## 0.000000000 0.000000000 0.000000000 0.000000000 0.002227171
## [1] "##########################################"
## [1] "LibFM Parameter: c1_subject_numeracy"
## 0% 25% 50% 75% 100%
## 0.00000000 0.00000000 0.00000000 0.00000000 0.02449889
## [1] "##########################################"
## [1] "LibFM Parameter: c1_contentType_Collection"
## 0% 25% 50% 75% 100%
## 0.00000000 0.00000000 0.00000000 0.00000000 0.08389013
## [1] "##########################################"
## [1] "LibFM Parameter: c1_contentType_Game"
## 0% 25% 50% 75% 100%
## 0.00000000 0.00000000 0.00000000 0.00000000 0.08314774
## [1] "##########################################"
## [1] "LibFM Parameter: c1_contentType_Story"
## 0% 25% 50% 75% 100%
## 0 0 1 1 1
## [1] "##########################################"
## [1] "LibFM Parameter: c1_contentType_Worksheet"
## 0% 25% 50% 75% 100%
## 0.0000000 0.0000000 0.0000000 0.0000000 0.1889384
## [1] "##########################################"
## [1] "LibFM Parameter: c1_language_English"
## 0% 25% 50% 75% 100%
## 0 0 1 1 1
## [1] "##########################################"
## [1] "LibFM Parameter: c1_language_Hindi"
## 0% 25% 50% 75% 100%
## 0 0 0 1 1
## [1] "##########################################"
## [1] "LibFM Parameter: c1_language_Kannada"
## 0% 25% 50% 75% 100%
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0155902
## [1] "##########################################"
## [1] "LibFM Parameter: c1_language_Other"
## 0% 25% 50% 75% 100%
## 0.000000000 0.000000000 0.000000000 0.000000000 0.002227171
## [1] "##########################################"
## [1] "LibFM Parameter: c1_language_Tamil"
## 0% 25% 50% 75% 100%
## 0.000000000 0.000000000 0.000000000 0.000000000 0.002227171
## [1] "##########################################"
## [1] "LibFM Parameter: c1_language_Telugu"
## 0% 25% 50% 75% 100%
## 0.000000000 0.000000000 0.000000000 0.000000000 0.004083148
## [1] "##########################################"
## [1] "LibFM Parameter: c2_total_ts"
## 0% 25% 50% 75% 100%
## 0.0000 0.0000 55.9800 251.4013 829.0000
## [1] "##########################################"
## [1] "LibFM Parameter: c2_avg_interactions_min"
## 0% 25% 50% 75% 100%
## 0.00 0.00 10.10 21.49 60.79
## [1] "##########################################"
## [1] "LibFM Parameter: c2_download_date"
## 0% 25% 50% 75% 100%
## 1.108090e+12 1.459158e+12 1.463512e+12 1.465447e+12 1.470396e+12
## [1] "##########################################"
## [1] "LibFM Parameter: c2_downloaded"
## 0% 25% 50% 75% 100%
## 0 0 1 1 1
## [1] "##########################################"
## [1] "LibFM Parameter: c2_last_played_on"
## 0% 25% 50% 75% 100%
## 0.000000e+00 0.000000e+00 1.454567e+12 1.464156e+12 1.470562e+12
## [1] "##########################################"
## [1] "LibFM Parameter: c2_mean_play_time_interval"
## 0% 25% 50% 75% 100%
## -29.00 0.00 0.00 21669.77 91297.96
## [1] "##########################################"
## [1] "LibFM Parameter: c2_num_group_user"
## 0% 25% 50% 75% 100%
## 0.000000000 0.000000000 0.000000000 0.000000000 0.002598367
## [1] "##########################################"
## [1] "LibFM Parameter: c2_num_individual_user"
## 0% 25% 50% 75% 100%
## 0 0 1 3 10
## [1] "##########################################"
## [1] "LibFM Parameter: c2_num_sessions"
## 0% 25% 50% 75% 100%
## 0 0 1 3 10
## [1] "##########################################"
## [1] "LibFM Parameter: c2_start_time"
## 0% 25% 50% 75% 100%
## 0.000000e+00 0.000000e+00 1.454566e+12 1.464144e+12 1.470562e+12
## [1] "##########################################"
## [1] "LibFM Parameter: c2_total_interactions"
## 0% 25% 50% 75% 100%
## 0.00000 0.00000 19.00000 70.15285 227.00000
## [1] "##########################################"
## [1] "LibFM Parameter: c2_subject_literacy"
## 0% 25% 50% 75% 100%
## 0.8314774 1.0000000 1.0000000 1.0000000 1.0000000
## [1] "##########################################"
## [1] "LibFM Parameter: c2_subject_literacy,numeracy"
## 0% 25% 50% 75% 100%
## 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0007423905
## [1] "##########################################"
## [1] "LibFM Parameter: c2_subject_numeracy"
## 0% 25% 50% 75% 100%
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0237565
## [1] "##########################################"
## [1] "LibFM Parameter: c2_subject_Unknown"
## 0% 25% 50% 75% 100%
## 0.0000000 0.0000000 0.0000000 0.0000000 0.1440238
## [1] "##########################################"
## [1] "LibFM Parameter: c2_contentType_Collection"
## 0% 25% 50% 75% 100%
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0701559
## [1] "##########################################"
## [1] "LibFM Parameter: c2_contentType_Game"
## 0% 25% 50% 75% 100%
## 0.00000000 0.00000000 0.00000000 0.00000000 0.07089829
## [1] "##########################################"
## [1] "LibFM Parameter: c2_contentType_Story"
## 0% 25% 50% 75% 100%
## 0 0 1 1 1
## [1] "##########################################"
## [1] "LibFM Parameter: c2_contentType_Unknown"
## 0% 25% 50% 75% 100%
## 0.0000000 0.0000000 0.0000000 0.0000000 0.1440238
## [1] "##########################################"
## [1] "LibFM Parameter: c2_contentType_Worksheet"
## 0% 25% 50% 75% 100%
## 0.0000000 0.0000000 0.0000000 0.0000000 0.1625835
## [1] "##########################################"
## [1] "LibFM Parameter: c2_language_English"
## 0% 25% 50% 75% 100%
## 0 0 0 1 1
## [1] "##########################################"
## [1] "LibFM Parameter: c2_language_Hindi"
## 0% 25% 50% 75% 100%
## 0 0 0 1 1
## [1] "##########################################"
## [1] "LibFM Parameter: c2_language_Kannada"
## 0% 25% 50% 75% 100%
## 0.00000000 0.00000000 0.00000000 0.00000000 0.01336303
## [1] "##########################################"
## [1] "LibFM Parameter: c2_language_Other"
## 0% 25% 50% 75% 100%
## 0.000000000 0.000000000 0.000000000 0.000000000 0.001484781
## [1] "##########################################"
## [1] "LibFM Parameter: c2_language_Tamil"
## 0% 25% 50% 75% 100%
## 0.000000000 0.000000000 0.000000000 0.000000000 0.001484781
## [1] "##########################################"
## [1] "LibFM Parameter: c2_language_Telugu"
## 0% 25% 50% 75% 100%
## 0.000000000 0.000000000 0.000000000 0.000000000 0.003340757
## [1] "##########################################"
## [1] "LibFM Parameter: c2_language_Unknown"
## 0% 25% 50% 75% 100%
## 0.0000000 0.0000000 0.0000000 0.0000000 0.1440238
## [1] "##########################################"
## [1] "LibFM Parameter: screen_size"
## 0% 25% 50% 75% 100%
## 3.89 4.65 5.46 5.71 7.42
## [1] "##########################################"
## [1] "LibFM Parameter: external_disk"
## 0% 25% 50% 75% 100%
## 1.2700 5.0800 11.5800 12.9575 25.5900
## [1] "##########################################"
## [1] "LibFM Parameter: internal_disk"
## 0% 25% 50% 75% 100%
## 0.0000 0.0000 0.0000 0.0000 13.2953
## [1] "##########################################"
## [1] "LibFM Parameter: primary_camera"
## 0% 25% 50% 75% 100%
## 0 6 8 13 22
## [1] "##########################################"
## [1] "LibFM Parameter: secondary_camera"
## 0% 25% 50% 75% 100%
## 0 2 3 5 8