Code
projectdir = splitpath(pwd()) |>
ss -> joinpath(ss[1:findall([s == "endogenous-macrodynamics-in-algorithmic-recourse" for s in ss])[1]]...)
cd(projectdir)projectdir = splitpath(pwd()) |>
ss -> joinpath(ss[1:findall([s == "endogenous-macrodynamics-in-algorithmic-recourse" for s in ss])[1]]...)
cd(projectdir)This notebook was used to run the experiments for the synthetic datasets and can be used to reproduce the results in the paper. In the following we first run the experiments and then generate visualizations and tables.
models = [
:LogisticRegression,
:FluxModel,
:FluxEnsemble,
]
opt = Flux.Descent(0.01)
generators = Dict(
:Greedy=>GreedyGenerator(),
:Generic=>GenericGenerator(opt = opt),
:REVISE=>REVISEGenerator(opt = opt),
:DICE=>DiCEGenerator(opt = opt),
)max_obs = 1000
catalogue = load_synthetic(max_obs)
choices = [
:linearly_separable,
:overlapping,
:circles,
:moons,
]
data_sets = filter(p -> p[1] in choices, catalogue)experiments = set_up_experiments(data_sets,models,generators)plts = []
for (exp_name, exp_) in experiments
for (M_name, M) in exp_.models
score = round(model_evaluation(M, exp_.test_data),digits=2)
plt = plot(M, exp_.test_data, title="$exp_name;\n $M_name ($score)")
# Errors:
ids = findall(vec(round.(probs(M, exp_.test_data.X)) .!= exp_.test_data.y))
x_wrongly_labelled = exp_.test_data.X[:,ids]
scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="")
plts = vcat(plts..., plt)
end
end
plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300))
savefig(plt, joinpath(www_path,"models_test_before.png"))using AlgorithmicRecourseDynamics.Models: model_evaluation
plts = []
for (exp_name, exp_) in experiments
for (M_name, M) in exp_.models
score = round(model_evaluation(M, exp_.train_data),digits=2)
plt = plot(M, exp_.train_data, title="$exp_name;\n $M_name ($score)")
# Errors:
ids = findall(vec(round.(probs(M, exp_.train_data.X)) .!= exp_.train_data.y))
x_wrongly_labelled = exp_.train_data.X[:,ids]
scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="")
plts = vcat(plts..., plt)
end
end
plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300))
savefig(plt, joinpath(www_path,"models_train_before.png"))n_evals = 5
n_rounds = 50
evaluate_every = Int(round(n_rounds/n_evals))
n_folds = 5
T = 100
results = run_experiments(
experiments;
save_path=output_path,evaluate_every=evaluate_every,n_rounds=n_rounds, n_folds=n_folds, T=T
)
Serialization.serialize(joinpath(output_path,"results.jls"),results)plot_dict = Dict(key => Dict() for (key,val) in results)
fold = 1
for (name, res) in results
exp_ = res.experiment
plot_dict[name] = Dict(key => [] for (key,val) in exp_.generators)
rec_sys = exp_.recourse_systems[fold]
sys_ids = collect(exp_.system_identifiers)
M = length(rec_sys)
for m in 1:M
model_name, generator_name = sys_ids[m]
M = rec_sys[m].model
score = round(model_evaluation(M, exp_.test_data),digits=2)
plt = plot(M, exp_.test_data, title="$name;\n $model_name ($score)")
# Errors:
ids = findall(vec(round.(probs(M, exp_.test_data.X)) .!= exp_.test_data.y))
x_wrongly_labelled = exp_.test_data.X[:,ids]
scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="")
plot_dict[name][generator_name] = vcat(plot_dict[name][generator_name], plt)
end
end
plot_dict = Dict(key => reduce(vcat, [plots[key] for plots in values(plot_dict)]) for (key, value) in generators)
for (name, plts) in plot_dict
plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300))
savefig(plt, joinpath(www_path,"models_test_after_$(name).png"))
endusing AlgorithmicRecourseDynamics.Models: model_evaluation
plot_dict = Dict(key => Dict() for (key,val) in results)
fold = 1
for (name, res) in results
exp_ = res.experiment
plot_dict[name] = Dict(key => [] for (key,val) in exp_.generators)
rec_sys = exp_.recourse_systems[fold]
sys_ids = collect(exp_.system_identifiers)
M = length(rec_sys)
for m in 1:M
model_name, generator_name = sys_ids[m]
M = rec_sys[m].model
data = rec_sys[m].data
score = round(model_evaluation(M, data),digits=2)
plt = plot(M, data, title="$name;\n $model_name ($score)")
# Errors:
ids = findall(vec(round.(probs(M, data.X)) .!= data.y))
x_wrongly_labelled = data.X[:,ids]
scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="")
plot_dict[name][generator_name] = vcat(plot_dict[name][generator_name], plt)
end
end
plot_dict = Dict(key => reduce(vcat, [plots[key] for plots in values(plot_dict)]) for (key, value) in generators)
for (name, plts) in plot_dict
plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300))
savefig(plt, joinpath(www_path,"models_train_after_$(name).png"))
endresults = load_synthetic_results()df = DataFrame(group=["A", "B", "C"], total=[7.7, 4.6, 5.1], std_error = [0.04, 0.05, 0.06])
bar(df.group, df.total, c=:blues, lw=0, widen=false)
plot!(1/2:(ncol(df)-1/2), df.total, lw=0, yerror=20*df.std_error, ms=10)using Images
line_charts = Dict()
errorbar_charts = Dict()
for (data_name, res) in results
plt = plot_res(res)
Images.save(joinpath(www_path, "line_chart_$(data_name).png"), plt)
Images.save(joinpath(www_paper_path, "line_chart_$(data_name).png"), plt)
line_charts[data_name] = plt
plt = plot_res(res,maximum(res.output.n))
Images.save(joinpath(www_path, "errorbar_chart_$(data_name).png"), plt)
Images.save(joinpath(www_paper_path, "errorbar_chart_$(data_name).png"), plt)
errorbar_charts[data_name] = plt
endThe evolution of the evaluation metrics over the course of the experiment is shown for different datasets in Figure 3.1 to Figure 3.4.
The evaluation metrics at the end of the experiment are shown for different datasets in Figure 3.5 to Figure 3.8.
n_bootstrap = 100
df = run_bootstrap(results, n_bootstrap; filename=joinpath(output_path,"bootstrap.csv"))Table 3.1 presents the tests for statistical significance of the estimated MMD metrics.
df = EMAR.aggregate_bs_synthetic()
CSV.write(joinpath(www_path, "bootstrap.csv"), df)
CSV.write(joinpath(www_paper_path, "bootstrap.csv"), df)
EMAR.tabulate_bs(df, Val(:html))| Metric | Data | Generator | Model | p-value |
|---|---|---|---|---|
| MMD | Circles | DICE | Deep Ensemble | 0.988 |
| MMD | Circles | DICE | Linear | 1.0 |
| MMD | Circles | DICE | MLP | 0.99 |
| MMD | Circles | Generic (γ=0.5) | Deep Ensemble | 0.996 |
| MMD | Circles | Generic (γ=0.5) | Linear | 0.996 |
| MMD | Circles | Generic (γ=0.5) | MLP | 0.99 |
| MMD | Circles | Greedy | Deep Ensemble | 0.992 |
| MMD | Circles | Greedy | Linear | 1.0 |
| MMD | Circles | Greedy | MLP | 0.994 |
| MMD | Circles | Latent | Deep Ensemble | 0.9975 |
| MMD | Circles | Latent | Linear | 0.9925 |
| MMD | Circles | Latent | MLP | 1.0 |
| MMD | Linearly Separable | DICE | Deep Ensemble | 0.0 |
| MMD | Linearly Separable | DICE | Linear | 0.0 |
| MMD | Linearly Separable | DICE | MLP | 0.0 |
| MMD | Linearly Separable | Generic (γ=0.5) | Deep Ensemble | 0.0 |
| MMD | Linearly Separable | Generic (γ=0.5) | Linear | 0.0 |
| MMD | Linearly Separable | Generic (γ=0.5) | MLP | 0.0 |
| MMD | Linearly Separable | Greedy | Deep Ensemble | 0.0 |
| MMD | Linearly Separable | Greedy | Linear | 0.0 |
| MMD | Linearly Separable | Greedy | MLP | 0.0 |
| MMD | Linearly Separable | Latent | Deep Ensemble | 0.748 |
| MMD | Linearly Separable | Latent | Linear | 0.768 |
| MMD | Linearly Separable | Latent | MLP | 0.69 |
| MMD | Moons | DICE | Deep Ensemble | 0.0 |
| MMD | Moons | DICE | Linear | 0.0 |
| MMD | Moons | DICE | MLP | 0.0 |
| MMD | Moons | Generic (γ=0.5) | Deep Ensemble | 0.0 |
| MMD | Moons | Generic (γ=0.5) | Linear | 0.0 |
| MMD | Moons | Generic (γ=0.5) | MLP | 0.0 |
| MMD | Moons | Greedy | Deep Ensemble | 0.0 |
| MMD | Moons | Greedy | Linear | 0.0 |
| MMD | Moons | Greedy | MLP | 0.0 |
| MMD | Moons | Latent | Deep Ensemble | 0.0 |
| MMD | Moons | Latent | Linear | 0.0 |
| MMD | Moons | Latent | MLP | 0.0 |
| MMD | Overlapping | DICE | Deep Ensemble | 0.0 |
| MMD | Overlapping | DICE | Linear | 0.0 |
| MMD | Overlapping | DICE | MLP | 0.0 |
| MMD | Overlapping | Generic (γ=0.5) | Deep Ensemble | 0.0 |
| MMD | Overlapping | Generic (γ=0.5) | Linear | 0.0 |
| MMD | Overlapping | Generic (γ=0.5) | MLP | 0.0 |
| MMD | Overlapping | Greedy | Deep Ensemble | 0.0 |
| MMD | Overlapping | Greedy | Linear | 0.0 |
| MMD | Overlapping | Greedy | MLP | 0.0 |
| MMD | Overlapping | Latent | Deep Ensemble | 0.0 |
| MMD | Overlapping | Latent | Linear | 0.0 |
| MMD | Overlapping | Latent | MLP | 0.0 |
| PP MMD | Circles | DICE | Deep Ensemble | 0.996 |
| PP MMD | Circles | DICE | Linear | 0.796 |
| PP MMD | Circles | DICE | MLP | 0.9975 |
| PP MMD | Circles | Generic (γ=0.5) | Deep Ensemble | 1.0 |
| PP MMD | Circles | Generic (γ=0.5) | Linear | 0.996 |
| PP MMD | Circles | Generic (γ=0.5) | MLP | 0.992 |
| PP MMD | Circles | Greedy | Deep Ensemble | 1.0 |
| PP MMD | Circles | Greedy | Linear | 0.0 |
| PP MMD | Circles | Greedy | MLP | 0.996 |
| PP MMD | Circles | Latent | Deep Ensemble | 0.9975 |
| PP MMD | Circles | Latent | Linear | 0.0 |
| PP MMD | Circles | Latent | MLP | 0.994 |
| PP MMD | Linearly Separable | DICE | Deep Ensemble | 0.9525 |
| PP MMD | Linearly Separable | DICE | Linear | 0.0 |
| PP MMD | Linearly Separable | DICE | MLP | 0.964 |
| PP MMD | Linearly Separable | Generic (γ=0.5) | Deep Ensemble | 0.958 |
| PP MMD | Linearly Separable | Generic (γ=0.5) | Linear | 0.0 |
| PP MMD | Linearly Separable | Generic (γ=0.5) | MLP | 0.944 |
| PP MMD | Linearly Separable | Greedy | Deep Ensemble | 0.716 |
| PP MMD | Linearly Separable | Greedy | Linear | 0.0 |
| PP MMD | Linearly Separable | Greedy | MLP | 0.684 |
| PP MMD | Linearly Separable | Latent | Deep Ensemble | 0.856 |
| PP MMD | Linearly Separable | Latent | Linear | 0.46 |
| PP MMD | Linearly Separable | Latent | MLP | 0.852 |
| PP MMD | Moons | DICE | Deep Ensemble | 0.865 |
| PP MMD | Moons | DICE | Linear | 0.0 |
| PP MMD | Moons | DICE | MLP | 0.87 |
| PP MMD | Moons | Generic (γ=0.5) | Deep Ensemble | 0.678 |
| PP MMD | Moons | Generic (γ=0.5) | Linear | 0.0 |
| PP MMD | Moons | Generic (γ=0.5) | MLP | 0.84 |
| PP MMD | Moons | Greedy | Deep Ensemble | 0.388 |
| PP MMD | Moons | Greedy | Linear | 0.0 |
| PP MMD | Moons | Greedy | MLP | 0.346 |
| PP MMD | Moons | Latent | Deep Ensemble | 0.902 |
| PP MMD | Moons | Latent | Linear | 0.004 |
| PP MMD | Moons | Latent | MLP | 0.91 |
| PP MMD | Overlapping | DICE | Deep Ensemble | 0.0 |
| PP MMD | Overlapping | DICE | Linear | 0.0 |
| PP MMD | Overlapping | DICE | MLP | 0.002 |
| PP MMD | Overlapping | Generic (γ=0.5) | Deep Ensemble | 0.004 |
| PP MMD | Overlapping | Generic (γ=0.5) | Linear | 0.0 |
| PP MMD | Overlapping | Generic (γ=0.5) | MLP | 0.002 |
| PP MMD | Overlapping | Greedy | Deep Ensemble | 0.002 |
| PP MMD | Overlapping | Greedy | Linear | 0.0 |
| PP MMD | Overlapping | Greedy | MLP | 0.004 |
| PP MMD | Overlapping | Latent | Deep Ensemble | 0.034 |
| PP MMD | Overlapping | Latent | Linear | 0.012 |
| PP MMD | Overlapping | Latent | MLP | 0.034 |
| PP MMD (grid) | Circles | DICE | Deep Ensemble | 0.762 |
| PP MMD (grid) | Circles | DICE | Linear | 0.814 |
| PP MMD (grid) | Circles | DICE | MLP | 0.7375 |
| PP MMD (grid) | Circles | Generic (γ=0.5) | Deep Ensemble | 0.89 |
| PP MMD (grid) | Circles | Generic (γ=0.5) | Linear | 0.994 |
| PP MMD (grid) | Circles | Generic (γ=0.5) | MLP | 0.688 |
| PP MMD (grid) | Circles | Greedy | Deep Ensemble | 0.568 |
| PP MMD (grid) | Circles | Greedy | Linear | 0.0 |
| PP MMD (grid) | Circles | Greedy | MLP | 0.776 |
| PP MMD (grid) | Circles | Latent | Deep Ensemble | 1.0 |
| PP MMD (grid) | Circles | Latent | Linear | 0.0 |
| PP MMD (grid) | Circles | Latent | MLP | 0.996 |
| PP MMD (grid) | Linearly Separable | DICE | Deep Ensemble | 0.0 |
| PP MMD (grid) | Linearly Separable | DICE | Linear | 0.0 |
| PP MMD (grid) | Linearly Separable | DICE | MLP | 0.0 |
| PP MMD (grid) | Linearly Separable | Generic (γ=0.5) | Deep Ensemble | 0.0 |
| PP MMD (grid) | Linearly Separable | Generic (γ=0.5) | Linear | 0.0 |
| PP MMD (grid) | Linearly Separable | Generic (γ=0.5) | MLP | 0.0 |
| PP MMD (grid) | Linearly Separable | Greedy | Deep Ensemble | 0.0 |
| PP MMD (grid) | Linearly Separable | Greedy | Linear | 0.0 |
| PP MMD (grid) | Linearly Separable | Greedy | MLP | 0.0 |
| PP MMD (grid) | Linearly Separable | Latent | Deep Ensemble | 0.0 |
| PP MMD (grid) | Linearly Separable | Latent | Linear | 0.0 |
| PP MMD (grid) | Linearly Separable | Latent | MLP | 0.0 |
| PP MMD (grid) | Moons | DICE | Deep Ensemble | 0.1225 |
| PP MMD (grid) | Moons | DICE | Linear | 0.0 |
| PP MMD (grid) | Moons | DICE | MLP | 0.01 |
| PP MMD (grid) | Moons | Generic (γ=0.5) | Deep Ensemble | 0.016 |
| PP MMD (grid) | Moons | Generic (γ=0.5) | Linear | 0.0 |
| PP MMD (grid) | Moons | Generic (γ=0.5) | MLP | 0.02 |
| PP MMD (grid) | Moons | Greedy | Deep Ensemble | 0.006 |
| PP MMD (grid) | Moons | Greedy | Linear | 0.0 |
| PP MMD (grid) | Moons | Greedy | MLP | 0.0 |
| PP MMD (grid) | Moons | Latent | Deep Ensemble | 0.114 |
| PP MMD (grid) | Moons | Latent | Linear | 0.004 |
| PP MMD (grid) | Moons | Latent | MLP | 0.174 |
| PP MMD (grid) | Overlapping | DICE | Deep Ensemble | 0.002 |
| PP MMD (grid) | Overlapping | DICE | Linear | 0.0 |
| PP MMD (grid) | Overlapping | DICE | MLP | 0.0 |
| PP MMD (grid) | Overlapping | Generic (γ=0.5) | Deep Ensemble | 0.0 |
| PP MMD (grid) | Overlapping | Generic (γ=0.5) | Linear | 0.0 |
| PP MMD (grid) | Overlapping | Generic (γ=0.5) | MLP | 0.0 |
| PP MMD (grid) | Overlapping | Greedy | Deep Ensemble | 0.0 |
| PP MMD (grid) | Overlapping | Greedy | Linear | 0.0 |
| PP MMD (grid) | Overlapping | Greedy | MLP | 0.002 |
| PP MMD (grid) | Overlapping | Latent | Deep Ensemble | 0.208 |
| PP MMD (grid) | Overlapping | Latent | Linear | 0.02 |
| PP MMD (grid) | Overlapping | Latent | MLP | 0.342 |
Figure 3.9 shows the chart that went into the paper.
Images.load(joinpath(www_artifact_path,"paper_synthetic_results.png"))# echo: false
generate_artifacts(output_path)
generate_artifacts(www_path)