To copy the code, click the button in the upper right corner of the code-chunks.
rm(list = ls())
gc()
We defined a number custom functions, at Download custom_functions.R.
source("./custom_functions.R")
tidyverse: data wranglingigraph: generate and visualize graphsparallel: parallel computing to speed up
simulationforeach: looping in paralleldoParallel: parallel backend for
foreachggplot2: data visualizationggh4x: hacks for ggplot2ggpubr: make visualizations publication-readypackages = c("tidyverse", "igraph", "ggplot2", "parallel", "doParallel", "foreach", "ggh4x", "ggpubr",
"plotly", "RColorBrewer", "grid", "gridExtra", "patchwork", "ggplotify", "ggraph", "gganimate", "RColorBrewer",
"ggtext", "magick", "jsonlite", "lubridate", "ggtext")
invisible(fpackage.check(packages))
rm(packages)
data <- read.csv("./rawdata/all_apps_wide_2026-02-12.csv")
times <- read.csv("./rawdata/PageTimes-2026-02-12.csv")
# cbind(data$participant.node,data$participant.role)[complete.cases(
# #cbind(data$participant.node,data$participant.role)),]
# data[, c('participant.bonus', 'participant.label')][!is.na(data$participant.bonus),]
On 12-2-2026, I recruited 100 Prolific participants, to populate a network of N=50 (with a 10% minority group).
net <- jsonlite::fromJSON("./networks/network_test_n50.json")
g <- graph_from_adjacency_matrix(net$adj_matrix, mode = "undirected")
V(g)$role <- ifelse(net$role_vector == 1, "trendsetter", "conformist")
fplot_graph(g)

# subset experimental session
data <- data[data$session.code == "bg6q9igc", ]
times <- times[times$session_code == "bg6q9igc", ]
# clean
test <- data %>%
transmute(participant_id = participant.code, participant_label = participant.label, id_in_session = participant.id_in_session,
consent_given = consent.1.player.consent, consent_timestamp = consent.1.player.consent_timestamp,
role = participant.role, is_dropout = participant.is_dropout, dropout_app = participant._current_app_name,
comprehension_retries = comprehension.1.player.comprehension_retries, passed_comprehension = !participant._current_app_name %in%
c("consent", "comprehension"), choice = unpop.1.player.choice, failed_checks = participant.failed_checks,
exit_early = participant.exit_early, participated = participant._current_page_name == "PaymentInfo") %>%
filter(!is.na(consent_given)) %>%
mutate(bot = ifelse(participant_label == "", 1, 0), bot = factor(bot, levels = c(0, 1), labels = c("Prolific participant",
"Bot")), consent_timestamp = ymd_hms(consent_timestamp), final_state = case_when(failed_checks >
0 ~ "Failed comprehension", exit_early == 1 ~ "Could not be grouped", participated ~ "Participated",
TRUE ~ "Too late")) %>%
arrange(consent_timestamp) %>%
mutate(arrival_order = row_number())
ggplot(test, aes(x = consent_timestamp, y = arrival_order)) + geom_point(aes(color = role, shape = bot),
size = 3, alpha = 0.5) + scale_shape_manual(values = c(16, 2)) + scale_color_manual(values = c("blue",
"red")) + labs(x = "Arrival time", y = "Arrival order", color = "Role", shape = "Type", title = "Participant arrivals from Prolific over time") +
theme_minimal()

ggplot(test, aes(x = consent_timestamp, y = arrival_order)) + geom_point(aes(color = final_state), size = 3,
alpha = 0.6) + scale_color_manual(values = c(Participated = "green", `Failed comprehension` = "orange",
`Could not be grouped` = "red", `Too late` = "gray")) + facet_wrap(~role) + labs(x = "Arrival time",
y = "Arrival order", color = "Final state", title = "Participant arrivals by role and final state") +
theme_minimal()

fshowdf(table(test$final_state, test$role), caption = "participant status by role")
| Blue | Red | |
|---|---|---|
| Could not be grouped | 17 | 7 |
| Failed comprehension | 8 | 35 |
| Participated | 5 | 45 |
| Too late | 0 | 2 |
times <- times %>%
mutate(timestamp = as_datetime(epoch_time_completed))
arrival_times <- times %>%
group_by(participant_id_in_session) %>%
summarize(arrival_time = min(timestamp), .groups = "drop")
times <- times %>%
left_join(arrival_times, by = "participant_id_in_session") %>%
mutate(participant_ordered = factor(participant_id_in_session, levels = arrival_times %>%
arrange(arrival_time) %>%
pull(participant_id_in_session)))
times_roles <- times %>%
left_join(test %>%
select(id_in_session, role), by = c(participant_id_in_session = "id_in_session"))
page_levels <- unique(times$page_name)
times_roles <- times_roles %>%
mutate(page_name = factor(page_name, levels = page_levels))
custom_colors <- c(InitializeParticipant = "#c6dbef", ConsentPage = "#9ecae1", IntroductionPage = "#6baed6",
ComprehensionPage = "#3182bd", NetworkFormationWaitPage = "#ffcc99", DecisionPage = "#ff9966", ResultsWaitPage = "#ff6666",
ResultsPage = "#cc0033", FinalGameResults = "#660000")
# colored y-axis labels based on role
y_labels_colored <- times_roles %>%
select(participant_ordered, role) %>%
distinct() %>%
arrange(participant_ordered) %>%
mutate(label_colored = case_when(role == "Red" ~ paste0("<span style='color:red'>", participant_ordered,
"</span>"), role == "Blue" ~ paste0("<span style='color:blue'>", participant_ordered, "</span>"),
TRUE ~ paste0("<span style='color:darkgrey'>", participant_ordered, "</span>")))
# create a named vector for scale_y_discrete labels
y_labels_vector <- y_labels_colored$label_colored
names(y_labels_vector) <- y_labels_colored$participant_ordered
ggplot(times_roles[times_roles$round_number == 1, ], aes(x = timestamp, y = participant_ordered, color = page_name)) +
geom_line(aes(group = participant_id_in_session), size = 1) + geom_point(size = 2) + scale_color_manual(values = custom_colors) +
scale_y_discrete(labels = y_labels_vector) + labs(x = "Time", y = "Participant (ordered by arrival)",
color = "Stage/Page", title = "Participant progression through experiment stages (by arrival)") +
theme_minimal() + theme(axis.text.y = element_markdown(size = 6))

dropout_long <- data %>%
filter(participant._current_page_name == "PaymentInfo") %>% # only completed participants
filter(participant.label != "") %>% # exclude bots (participants controlled by experimener have no label)
select(
participant.label,
participant.role,
matches("unpop\\.[0-9]+\\.player\\.is_dropout")
) %>%
pivot_longer(
cols = matches("unpop\\.[0-9]+\\.player\\.is_dropout"),
names_to = "round",
values_to = "is_dropout"
) %>%
mutate(
round = str_extract(round, "[0-9]+"),
round = as.numeric(round)
)
# get first dropout round per participant
dropout_summary <- dropout_long %>%
group_by(participant.label, participant.role) %>%
summarise(
event = any(is_dropout == 1, na.rm = TRUE),
dropout_round = ifelse(
event,
min(round[is_dropout == 1], na.rm = TRUE),
30
),
.groups = "drop"
)
# count cumulative dropouts
cum_dropout_role <- dropout_summary %>%
filter(event == TRUE) %>%
count(participant.role, dropout_round) %>%
group_by(participant.role) %>%
complete(dropout_round = 1:30, fill = list(n = 0)) %>%
arrange(participant.role, dropout_round) %>%
mutate(
cumulative_dropout = cumsum(n)
) %>%
ungroup()
ggplot(cum_dropout_role,
aes(x = dropout_round,
y = cumulative_dropout,
color = participant.role)) +
geom_line(linewidth = 1.2) +
geom_point() +
scale_x_continuous(breaks = 1:30) +
scale_color_manual(
values = c(
"Red" = "red",
"Blue" = "blue"
)) +
labs(
x = "Round",
y = "Dropout",
color = "Role",
title = "Cumulative dropout (by role)"
) +
theme_minimal()

# choice behavior over rounds:
df_long <- data %>%
filter(participant._current_page_name == "PaymentInfo") %>% # filter actual participants
select(participant.label, participant.role, participant.node, starts_with("unpop.")) %>% #also include network node.
pivot_longer(
cols = matches("unpop\\.\\d+\\.player\\.choice$"), # only choice columns
names_to = "round",
values_to = "choice"
) %>%
mutate(
round = as.integer(gsub("unpop\\.(\\d+)\\.player\\.choice", "\\1", round)),
is_bot = ifelse(participant.label == "", TRUE, FALSE)
) %>%
select(participant.label, participant.node, participant.role, round, choice, is_bot)
# identify round of dropout:
first_dropout <- dropout_long %>%
filter(is_dropout == 1) %>%
group_by(participant.label) %>%
summarise(dropout_round = min(round),
.groups = "drop")
# and add to the df:
df_long <- df_long %>%
left_join(first_dropout, by = "participant.label")
# aggregated
df_plot <- df_long %>%
group_by(round) %>%
summarise(
pct_choice1 = mean(choice, na.rm = TRUE) * 100, # proportion * 100
n = n()
)
ggplot(df_plot, aes(x = round, y = pct_choice1)) +
geom_line(group = 1, color = "steelblue", size = .5) +
geom_point(color = "steelblue", size = 2) +
geom_hline(yintercept = 10, linetype = "longdash", color = "darkgrey", size = 0.8) + # dashed line at 10
scale_x_continuous(breaks = df_plot$round) +
scale_y_continuous(limits = c(0, 100)) +
labs(
x = "Round",
y = "% agents choosing 'blue'",
title = "Evolution of an unpopular norm"
)

#sort by node
df_long <- df_long %>%
arrange(participant.node) %>%
select(-participant.label)
df_long$dropout_round[is.na(df_long$dropout_round)] <- 30 #non dropouts, set to 30.
# make roles consistent with utility function roles
df_long$role <- ifelse(df_long$participant.role == "Blue", "trendsetter", "conformist")
# specificy incentive structure parameters
params = list(s = 15, e = 10, w = 40, z = 50, lambda1 = 5, lambda2 = 1.8)
df_long$id <- df_long$participant.node + 1 #nodes are 0-indexed
#add degree
deg <- degree(g)
df_long <- df_long %>%
mutate(degree = deg[id])
calculate_round_utilities <- function(current_round, df_long, network, params) {
# previous round data
df_prev <- df_long %>%
filter(round == current_round - 1) %>%
select(id, role, choice)
# current round data
df_curr <- df_long %>%
filter(round == current_round)
# compute utilities
df_curr <- df_curr %>%
rowwise() %>%
mutate(
util_0 = futility(agent_id = id, choice = 0,
agents = df_prev,
network = network,
params = params)$utility,
util_1 = futility(agent_id = id, choice = 1,
agents = df_prev,
network = network,
params = params)$utility
) %>%
ungroup()
return(df_curr)
}
##calculate_round_utilities(2, df_long, network = g, params = params)
#compute utilities for all rounsd (except round 1)
max_round <- max(df_long$round)
df <- map_dfr(2:max_round, ~calculate_round_utilities(.x, df_long, g, params))
# identify best replies
df <- df %>%
mutate(
predicted_choice = ifelse(util_1 > util_0, 1, 0),
best_reply = (choice == predicted_choice) # TRUE if agent picked the choice with highest utility
)
df_summary <- df %>%
mutate(
preferred_choice = ifelse(role == "trendsetter", 1, 0), # define preferred option by role
chose_preferred = (choice == preferred_choice) # TRUE if they picked their preferred option
) %>%
group_by(round, role) %>%
summarize(
n_agents = n(),
prop_preferred = mean(chose_preferred), # fraction that chose their preferred option
prop_best_reply = mean(best_reply), # fraction that picked the highest-utility choice
.groups = "drop"
)
ggplot(df_summary, aes(x = round)) +
geom_line(aes(y = prop_preferred, color = "Preference"), size = 1) +
geom_line(aes(y = prop_best_reply, color = "Best reply"), size = 1, linetype = "dashed") +
facet_wrap(~role) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0,1)) +
scale_color_manual(values = c("Preference" = "darkgreen", "Best reply" = "steelblue")) +
labs(
x = "Round",
y = "Proportion of agents",
color = "Metric",
title = "Following preference vs best-reply over rounds"
)

Copyright © Rob Franken