5  Digitale Verhaltensdaten

5.1 Instagram

library(tidyverse)
theme_set(theme_minimal())

# Instagram views ---------------------------------------------------------

insta_views <- jsonlite::fromJSON("posts_viewed.json")$impressions_history_posts_seen %>%
  jsonlite::flatten() %>%
  as_tibble()

insta_views
# A tibble: 126 × 2
  string_map_data.Author.value string_map_data.Time.timestamp
  <chr>                                                 <int>
1 zdfheute                                         1678890194
2 aufrollschuhen                                   1678890194
3 zdfheute                                         1678906211
4 zdfheute                                         1678906211
5 audreylglass                                     1678906211
# ℹ 121 more rows
# Zeitvariablen erstellen
insta_views <- insta_views %>%
  rename(
    account = string_map_data.Author.value,
    timestamp = string_map_data.Time.timestamp
  ) %>%
  mutate(
    timestamp = as.POSIXct(timestamp, origin = "1970-01-01"),
    day = as.Date(timestamp),
    hour = lubridate::hour(timestamp),
    weekday = lubridate::wday(timestamp, label = TRUE, week_start = 1)
  )

insta_views
# A tibble: 126 × 5
  account        timestamp           day         hour weekday
  <chr>          <dttm>              <date>     <int> <ord>  
1 zdfheute       2023-03-15 15:23:14 2023-03-15    15 Wed    
2 aufrollschuhen 2023-03-15 15:23:14 2023-03-15    15 Wed    
3 zdfheute       2023-03-15 19:50:11 2023-03-15    19 Wed    
4 zdfheute       2023-03-15 19:50:11 2023-03-15    19 Wed    
5 audreylglass   2023-03-15 19:50:11 2023-03-15    19 Wed    
# ℹ 121 more rows
# Verlauf
insta_views %>%
  count(day) %>%
  ggplot(aes(x = day, y = n)) +
  geom_col() +
  labs(title = "Instagram posts viewed per day")

# Uhrzeit
insta_views %>%
  count(hour) %>%
  ggplot(aes(x = hour, y = n)) +
  geom_col() +
  labs(title = "Instagram posts viewed per hour of day")

# Wochentage
insta_views %>%
  count(weekday) %>%
  ggplot(aes(x = weekday, y = n)) +
  geom_col() +
  labs(title = "Instagram posts viewed per day of the week")

# Instagram Followings ----------------------------------------------------

insta_follow <- jsonlite::fromJSON("following.json")$relationships_following %>%
  unnest(string_list_data) %>%
  select(user = value, timestamp) %>%
  mutate(
    timestamp = as.POSIXct(timestamp, origin = "1970-01-01"),
    day = as.Date(timestamp),
    hour = lubridate::hour(timestamp),
    weekday = lubridate::wday(timestamp, label = TRUE, week_start = 1)
  )

# Followings aggregieren
insta_follow_counts <- insta_follow %>%
  count(day) %>%
  mutate(cumulative_n = cumsum(n))

insta_follow_counts
# A tibble: 119 × 3
  day            n cumulative_n
  <date>     <int>        <int>
1 2011-06-10     1            1
2 2011-06-12     2            3
3 2011-06-18     1            4
4 2011-06-19     2            6
5 2011-06-22     1            7
# ℹ 114 more rows
# Zeitverlauf
insta_follow_counts %>%
  ggplot(aes(x = day, y = cumulative_n)) +
  geom_line(group = 1) +
  labs(title = "Instagram followings over time")

5.2 TikTok

library(tidyverse)
theme_set(theme_minimal())

# Diese Datei vorher hochladen!
tiktok <- jsonlite::fromJSON("data/user_data.json")

# TikTok Logins -----------------------------------------------------------

tt_logins <- tiktok$Activity$`Login History`$LoginHistoryList %>%
  as_tibble()

tt_logins
# A tibble: 62 × 6
  Date                    IP        DeviceModel DeviceSystem NetworkType Carrier
  <chr>                   <chr>     <chr>       <chr>        <chr>       <chr>  
1 2023-04-20 18:18:27 UTC 46.114.6… iPhone10,6  iOS 16.4.1   4G          o2-de  
2 2023-04-20 18:17:06 UTC 46.114.6… iPhone10,6  iOS 16.4.1   4G          o2-de  
3 2023-04-20 09:30:01 UTC 134.93.2… iPhone10,6  iOS 16.4.1   Wi-Fi       o2-de  
4 2023-04-20 09:30:01 UTC 46.114.6… iPhone10,6  iOS 16.4.1   4G          o2-de  
5 2023-04-20 09:25:15 UTC 134.93.2… iPhone10,6  iOS 16.4.1   Wi-Fi       o2-de  
# ℹ 57 more rows
# Zeitvariablen erstellen
tt_logins <- tt_logins %>%
  mutate(
    day = as.Date(Date),
    hour = lubridate::hour(Date),
    weekday = lubridate::wday(Date, label = TRUE, week_start = 1)
  )

tt_logins
# A tibble: 62 × 9
  Date       IP    DeviceModel DeviceSystem NetworkType Carrier day         hour
  <chr>      <chr> <chr>       <chr>        <chr>       <chr>   <date>     <int>
1 2023-04-2… 46.1… iPhone10,6  iOS 16.4.1   4G          o2-de   2023-04-20    18
2 2023-04-2… 46.1… iPhone10,6  iOS 16.4.1   4G          o2-de   2023-04-20    18
3 2023-04-2… 134.… iPhone10,6  iOS 16.4.1   Wi-Fi       o2-de   2023-04-20     9
4 2023-04-2… 46.1… iPhone10,6  iOS 16.4.1   4G          o2-de   2023-04-20     9
5 2023-04-2… 134.… iPhone10,6  iOS 16.4.1   Wi-Fi       o2-de   2023-04-20     9
# ℹ 57 more rows
# ℹ 1 more variable: weekday <ord>
# Verlauf
tt_logins %>%
  count(day) %>%
  ggplot(aes(x = day, y = n)) +
  geom_col() +
  labs(title = "TikTok logins per day")

# Uhrzeit
tt_logins %>%
  count(hour) %>%
  ggplot(aes(x = hour, y = n)) +
  geom_col() +
  labs(title = "TikTok logins per hour of day")

# Wochentage
tt_logins %>%
  count(weekday) %>%
  ggplot(aes(x = weekday, y = n)) +
  geom_col() +
  labs(title = "TikTok logins per day of the week")

# Viewing History ---------------------------------------------------------

tt_views <- tiktok$Activity$`Video Browsing History`$VideoList %>%
  as_tibble() %>%
  mutate(
    day = as.Date(Date),
    hour = lubridate::hour(Date),
    weekday = lubridate::wday(Date, label = TRUE, week_start = 1)
  )

tt_views
# A tibble: 150 × 5
  Date                Link                              day         hour weekday
  <chr>               <chr>                             <date>     <int> <ord>  
1 2023-04-20 18:18:23 https://www.tiktokv.com/share/vi… 2023-04-20    18 Thu    
2 2023-04-20 18:18:09 https://www.tiktokv.com/share/vi… 2023-04-20    18 Thu    
3 2023-04-20 18:18:02 https://www.tiktokv.com/share/vi… 2023-04-20    18 Thu    
4 2023-04-20 18:17:38 https://www.tiktokv.com/share/vi… 2023-04-20    18 Thu    
5 2023-04-20 18:17:28 https://www.tiktokv.com/share/vi… 2023-04-20    18 Thu    
# ℹ 145 more rows
# Verlauf
views_per_day <- tt_views %>%
  count(day)

views_per_day
# A tibble: 12 × 2
  day            n
  <date>     <int>
1 2023-02-21    19
2 2023-02-23     1
3 2023-02-24    21
4 2023-03-02    24
5 2023-03-03     1
# ℹ 7 more rows
# Statistiken
views_per_day %>%
  summary()
      day                   n       
 Min.   :2023-02-21   Min.   : 1.0  
 1st Qu.:2023-02-28   1st Qu.: 2.0  
 Median :2023-03-05   Median : 7.5  
 Mean   :2023-03-07   Mean   :12.5  
 3rd Qu.:2023-03-08   3rd Qu.:19.5  
 Max.   :2023-04-20   Max.   :46.0  
views_per_day %>%
  ggplot(aes(x = day, y = n)) +
  geom_col() +
  labs(title = "TikTok videos watched per day")

# Direct Messages ---------------------------------------------------------

tt_dm <- tiktok$`Direct Messages`$`Chat History`$ChatHistory %>%
  bind_rows() %>%
  as_tibble()

tt_dm
# A tibble: 4 × 3
  Date                From         Content                                      
  <chr>               <chr>        <chr>                                        
1 2023-04-20 09:26:20 alicia.ernst 👋👋👋                                       
2 2023-04-20 09:26:53 annaslblddr  ⭐️⭐️⭐️                                       
3 2023-04-20 09:28:29 alicia.ernst https://www.tiktokv.com/share/video/70987776…
4 2023-04-20 09:29:03 alicia.ernst test                                         
# Chat partner
tt_dm %>%
  count(From, sort = TRUE)
# A tibble: 2 × 2
  From             n
  <chr>        <int>
1 alicia.ernst     3
2 annaslblddr      1