Load Libraries

## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows

Load Datasets

df_apps <- read.csv("../../data/v1/apps.csv")
df_perm <- read.csv("../../data/v1/permissions.csv")
df_pii <- read.csv("../../data/v1/pii.csv") #Personally Identifiable Information (PII) 

General

Hoeveel apps zitten momenteel in de dataset?

nrow(df_apps %>% distinct(appPackageName))
## [1] 99

Which permissions are requested most often?

  • ACCESS_NETWORK_STATE = ?
  • ACCES_FINE_LOCATION is top 10
top_permissions <- df_perm %>% 
  group_by(permissionId) %>% 
  summarize(count = n()) %>% 
  arrange(desc(count))

top_permissions %>% top_n(20, count) %>% kable() %>% kable_styling(latex_options = "striped", position = "float_left")
permissionId count
WAKE_LOCK 109
ACCESS_NETWORK_STATE 102
INTERNET 102
WRITE_EXTERNAL_STORAGE 76
RECEIVE_BOOT_COMPLETED 72
VIBRATE 69
ACCESS_FINE_LOCATION 58
ACCESS_WIFI_STATE 58
CAMERA 53
GET_ACCOUNTS 50
ACCESS_COARSE_LOCATION 48
READ_EXTERNAL_STORAGE 46
FOREGROUND_SERVICE 40
READ_CONTACTS 37
USE_CREDENTIALS 32
MANAGE_ACCOUNTS 31
READ_PHONE_STATE 27
RECORD_AUDIO 27
BLUETOOTH 23
AUTHENTICATE_ACCOUNTS 20
READ_APP_BADGE 20

Indicators

2. Which apps have the most different data recipients?

Welke apps versturen data naar x aantal ontvangers

domains_per_app <- df_pii %>% 
  distinct(appPackageName, serverHostname) %>% 
  group_by(appPackageName) %>%
  summarize(count = n()) %>% 
  arrange(desc(count))

domains_per_app %>% 
  kable() %>% 
  kable_styling(latex_options = "striped", position = "float_left")
appPackageName count
com.h8games.helixjump 23
com.game5mobile.lineandwater 20
com.amazon.mShop.android.shopping 10
com.surpax.ledflashlight.panel 10
com.ihandysoft.alarmclock 9
com.ihg.apps.android 8
com.calm.android 7
com.hoteltonight.android.prod 7
com.joom 6
de.flixbus.app 6
com.coinbase.android 5
com.expedia.bookings 5
com.fitnesskeeper.runkeeper.pro 5
com.foursquare.robin 5
com.getsomeheadspace.android 5
com.joelapenna.foursquared 5
com.runtastic.android 5
ch.publisheria.bring 4
com.autoscout24 4
com.duolingo 3
com.goodreads 3
com.pinterest 3
com.thetrainline 3
vivino.web.app 3
be.smartschool.mobile 2
com.contextlogic.wish 2
com.google.android.apps.docs.editors.docs 2
com.hbwares.wordfeud.free 2
com.irobot.home 2
com.nianticlabs.pokemongo 2
com.Slack 2
com.Splitwise.SplitwiseMobile 2
com.untappdllc.app 2
com.viber.voip 2
de.stocard.stocard 2
messenger.messenger.messenger.messenger 2
be.persgroep.android.news.mobilehln 1
be.wandelknooppunt 1
com.bnpp.easybanking 1
com.deliveroo.orderapp 1
com.flightradar24free 1
com.google.android.apps.youtube.music 1
com.google.android.calendar 1
com.google.android.gm 1
com.google.android.keep 1
com.google.android.youtube 1
com.halfbrick.fruitninjafree 1
com.hm.goe 1
com.magmamobile.game.SpiderSolitaire 1
com.skype.raider 1
com.ubercab 1
com.waze 1
de.hafas.android.sncbnmbs 1

3.1 These apps transmit at least two identifiers to the same host

  • Welke combinaties tussen androidid and persistent identifiers zijn verboden?
  • Zijn 2 persistent identifiers samen ook verboden (vb: androidid + hwid)?
  • Wat is attempted, fingerprint, geolatlon? Zijn dit persistent identifiers?
df_pii %>% 
  filter(piiType != "aaid") %>% 
  group_by(appPackageName, piiType, serverHostname) %>% 
  summarize(count = n()) %>% 
  group_by(appPackageName) %>% 
  count() %>% 
  arrange(desc(n))
infractions <- df_pii %>% 
  group_by(appPackageName, piiType, serverHostname) %>% 
  summarize(count = n()) %>% 
  mutate(count = ifelse(count > 0, 1, 0)) %>%
  spread(piiType, count) %>%
  filter(!is.na(aaid)) %>% 
  gather(identifier, value, -c(1:2)) %>% 
  group_by(appPackageName, serverHostname) %>% 
  summarize(total = sum(value, na.rm=T)) %>%
  filter(total > 1)

3.2 These apps transmit multiple identifiers to the same host, multiple times

multiple_infractions <- infractions %>% 
  mutate(total = 1) %>% 
  group_by(appPackageName) %>% 
  summarize(total = sum(total, na.rm=T)) %>% 
  arrange(desc(total))

multiple_infractions %>% kable()
appPackageName total
com.calm.android 5
com.foursquare.robin 4
com.h8games.helixjump 4
com.ihandysoft.alarmclock 4
com.joelapenna.foursquared 4
com.game5mobile.lineandwater 3
com.hoteltonight.android.prod 3
com.fitnesskeeper.runkeeper.pro 2
com.getsomeheadspace.android 2
com.joom 2
com.bnpp.easybanking 1
com.hbwares.wordfeud.free 1
com.pinterest 1
com.surpax.ledflashlight.panel 1
com.ubercab 1
com.untappdllc.app 1

4. Which apps request permission to your location?

Wat betekenen deze locatietypes?

  • ACCESS_FINE_LOCATION = detailed location
  • ACCESS_COARSE_LOCATION = approx. location
  • ACCESS_LOCATION_EXTRA_COMMANDS = ?
  • ACCESS_MEDIA_LOCATION = hard-drive?

Is this harmful? Do ‘todoist’, ‘Vivino’ and ‘KMI weather’ really need fine location data? Shouldn’t this be an indicator?

perm_loc <- df_perm %>% 
  filter( grepl("location", permissionId, ignore.case=T) )

perm_loc %>% 
  select(appPackageName, permissionId) %>% 
  arrange(desc(permissionId))
  # kable() %>% 
  # kable_styling(latex_options = "striped", position = "float_left")

perm_loc %>% 
  group_by(appPackageName, permissionId) %>% 
  count()

5. Which apps transmit location data to external domains?

Only 1? Deliveroo transmits your location data to it’s own services?

trans_loc <- df_pii %>% 
  filter(piiTypePrecise == "geolatlon") %>%
  select(appPackageName, piiTypePrecise, serverHostname) 

trans_loc %>% kable() %>% kable_styling(latex_options = "striped", position = "float_left")
appPackageName piiTypePrecise serverHostname
com.deliveroo.orderapp geolatlon deliveroo.co.uk

6. Which domain receives data the most often?

df_pii %>% 
  distinct(appPackageName, serverHostname) %>% 
  group_by(serverHostname) %>% 
  summarize(count = n()) %>% 
  arrange(desc(count)) %>% 
  kable() %>% 
  kable_styling(latex_options = "striped", position = "float_left")
serverHostname count
graph.facebook.com 24
t.appsflyer.com 12
app.adjust.com 11
settings.crashlytics.com 9
e.crashlytics.com 8
api2.branch.io 4
register.appsflyer.com 4
www.googleadservices.com 4
api.branch.io 3
data.flurry.com 3
events.appsflyer.com 3
identity.mparticle.com 3
nativesdks.mparticle.com 3
www.googleapis.com 3
aax-eu.amazon-adsystem.com 2
ads.mopub.com 2
api.amplitude.com 2
api.usebutton.com 2
connect.tapjoy.com 2
device-metrics-us-2.amazon.com 2
ecommerce.iap.unity3d.com 2
is-gateway.supersonicads.com 2
mads.amazon-adsystem.com 2
outcome-ssp.supersonicads.com 2
placements.tapjoy.com 2
rpc.tapjoy.com 2
rv-gateway.supersonicads.com 2
td-service.appcloudbox.net 2
159126.deeplink.mobileapptracking.com 1
accounts.us1.gigya.com 1
ad.doubleclick.net 1
addelivery-engine-api.voodoo-ads.io 1
ads.api.vungle.com 1
analytics.rayjump.com 1
ap.appcloudbox.net 1
api-gdpr.voodoo-tech.io 1
api-h2.hoteltonight.com 1
api.accengage.com 1
api.amazon.com 1
api.appcloudbox.net 1
api.apptentive.com 1
api.coinbase.com 1
api.flixbus.com 1
api.gameanalytics.com 1
api.goodreads.com 1
api.iterable.com 1
api.joom.com 1
api.leanplum.com 1
api.mixpanel.com 1
api.prod.headspace.com 1
api.segment.io 1
api.vungle.com 1
api3.siftscience.com 1
apiproxy3.accengage.com 1
appinstall.webtrekk.net 1
appx.transient.amazon.com 1
auction.unityads.unity3d.com 1
cdn.krxd.net 1
cdp.cloud.unity3d.com 1
client-analytics.braintreegateway.com 1
cn-geo1.uber.com 1
config.inmobi.com 1
config.uca.cloud.unity3d.com 1
crosspromo.voodoo.io 1
deliveroo.co.uk 1
docs.google.com 1
dpm.demdex.net 1
events.iap.unity3d.com 1
fitnesskeeperapi.com 1
httpkafka.unityads.unity3d.com 1
ihg.demdex.net 1
impression-europe.liftoff.io 1
inbox.google.com 1
init.supersonicads.com 1
joom.helpshift.com 1
mads.amazon.com 1
mag-na.amazon.com 1
mobile-collector.newrelic.com 1
mobile-crash.newrelic.com 1
mobile.eum-appdynamics.com 1
mpx.mopub.com 1
msh-tablet.amazon.com 1
msh.amazon.com 1
niantic.helpshift.com 1
oms.expedia.com 1
p-v2.presage.io 1
pad-v3.presage.io 1
plankton-gtm.a.autoscout24.com 1
production.bringapi.app 1
publisher-config.unityads.unity3d.com 1
push.eservice.emarsys.net 1
reports.crashlytics.com 1
rtproxy-na.waze.com 1
rubick.gameanalytics.com 1
s.amazon-adsystem.com 1
sb.scorecardresearch.com 1
sdk-android.ad.smaato.net 1
sdk.follow-apps.com 1
secure.analytics.ihg.com 1
service.appcloudbox.net 1
slack.com 1
smetrics.thetrainline.com 1
socialize.us1.gigya.com 1
stats.appsflyer.com 1
track.tenjin.io 1
trk.pinterest.com 1
unagi-na.amazon.com 1
vivino.helpshift.com 1
wv.inner-active.mobi 1
www.formidapps.com 1
www.wish.com 1

```