3 Synthetic Data

Code

projectdir = splitpath(pwd()) |>
    ss -> joinpath(ss[1:findall([s == "endogenous-macrodynamics-in-algorithmic-recourse" for s in ss])[1]]...) 
cd(projectdir)

This notebook was used to run the experiments for the synthetic datasets and can be used to reproduce the results in the paper. In the following we first run the experiments and then generate visualizations and tables.

3.1 Running the Experiment

Code

models = [
    :LogisticRegression, 
    :FluxModel, 
    :FluxEnsemble,
]
opt = Flux.Descent(0.01) 
generators = Dict(
    :Greedy=>GreedyGenerator(), 
    :Generic=>GenericGenerator(opt = opt),
    :REVISE=>REVISEGenerator(opt = opt),
    :DICE=>DiCEGenerator(opt = opt),
)

Code

max_obs = 1000
catalogue = load_synthetic(max_obs)
choices = [
    :linearly_separable, 
    :overlapping, 
    :circles, 
    :moons,
]
data_sets = filter(p -> p[1] in choices, catalogue)

Code

experiments = set_up_experiments(data_sets,models,generators)

Code

plts = []
for (exp_name, exp_) in experiments
    for (M_name, M) in exp_.models
        score = round(model_evaluation(M, exp_.test_data),digits=2)
        plt = plot(M, exp_.test_data, title="$exp_name;\n $M_name ($score)")
        # Errors:
        ids = findall(vec(round.(probs(M, exp_.test_data.X)) .!= exp_.test_data.y))
        x_wrongly_labelled = exp_.test_data.X[:,ids]
        scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="")
        plts = vcat(plts..., plt)
    end
end
plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300))
savefig(plt, joinpath(www_path,"models_test_before.png"))

Code

using AlgorithmicRecourseDynamics.Models: model_evaluation
plts = []
for (exp_name, exp_) in experiments
    for (M_name, M) in exp_.models
        score = round(model_evaluation(M, exp_.train_data),digits=2)
        plt = plot(M, exp_.train_data, title="$exp_name;\n $M_name ($score)")
        # Errors:
        ids = findall(vec(round.(probs(M, exp_.train_data.X)) .!= exp_.train_data.y))
        x_wrongly_labelled = exp_.train_data.X[:,ids]
        scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="")
        plts = vcat(plts..., plt)
    end
end
plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300))
savefig(plt, joinpath(www_path,"models_train_before.png"))

Code

n_evals = 5
n_rounds = 50
evaluate_every = Int(round(n_rounds/n_evals))
n_folds = 5
T = 100
results = run_experiments(
    experiments;
    save_path=output_path,evaluate_every=evaluate_every,n_rounds=n_rounds, n_folds=n_folds, T=T
)
Serialization.serialize(joinpath(output_path,"results.jls"),results)

Code

plot_dict = Dict(key => Dict() for (key,val) in results)
fold = 1
for (name, res) in results
    exp_ = res.experiment
    plot_dict[name] = Dict(key => [] for (key,val) in exp_.generators)
    rec_sys = exp_.recourse_systems[fold]
    sys_ids = collect(exp_.system_identifiers)
    M = length(rec_sys)
    for m in 1:M
        model_name, generator_name = sys_ids[m]
        M = rec_sys[m].model
        score = round(model_evaluation(M, exp_.test_data),digits=2)
        plt = plot(M, exp_.test_data, title="$name;\n $model_name ($score)")
        # Errors:
        ids = findall(vec(round.(probs(M, exp_.test_data.X)) .!= exp_.test_data.y))
        x_wrongly_labelled = exp_.test_data.X[:,ids]
        scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="")
        plot_dict[name][generator_name] = vcat(plot_dict[name][generator_name], plt)
    end
end
plot_dict = Dict(key => reduce(vcat, [plots[key] for plots in values(plot_dict)]) for (key, value) in generators)
for (name, plts) in plot_dict
    plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300))
    savefig(plt, joinpath(www_path,"models_test_after_$(name).png"))
end

Code

using AlgorithmicRecourseDynamics.Models: model_evaluation
plot_dict = Dict(key => Dict() for (key,val) in results)
fold = 1
for (name, res) in results
    exp_ = res.experiment
    plot_dict[name] = Dict(key => [] for (key,val) in exp_.generators)
    rec_sys = exp_.recourse_systems[fold]
    sys_ids = collect(exp_.system_identifiers)
    M = length(rec_sys)
    for m in 1:M
        model_name, generator_name = sys_ids[m]
        M = rec_sys[m].model
        data = rec_sys[m].data
        score = round(model_evaluation(M, data),digits=2)
        plt = plot(M, data, title="$name;\n $model_name ($score)")
        # Errors:
        ids = findall(vec(round.(probs(M, data.X)) .!= data.y))
        x_wrongly_labelled = data.X[:,ids]
        scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="")
        plot_dict[name][generator_name] = vcat(plot_dict[name][generator_name], plt)
    end
end
plot_dict = Dict(key => reduce(vcat, [plots[key] for plots in values(plot_dict)]) for (key, value) in generators)
for (name, plts) in plot_dict
    plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300))
    savefig(plt, joinpath(www_path,"models_train_after_$(name).png"))
end

3.2 Results

Code

results = load_synthetic_results()

3.2.1 Animated GIFs for Presentation

Code

df = DataFrame(group=["A", "B", "C"], total=[7.7, 4.6, 5.1], std_error = [0.04, 0.05, 0.06])
bar(df.group, df.total, c=:blues, lw=0, widen=false)
plot!(1/2:(ncol(df)-1/2), df.total, lw=0, yerror=20*df.std_error, ms=10)

3.2.2 Plots in Annex

Code

using Images
line_charts = Dict()
errorbar_charts = Dict()
for (data_name, res) in results
    plt = plot_res(res)
    Images.save(joinpath(www_path, "line_chart_$(data_name).png"), plt)
    Images.save(joinpath(www_paper_path, "line_chart_$(data_name).png"), plt)
    line_charts[data_name] = plt
    plt = plot_res(res,maximum(res.output.n))
    Images.save(joinpath(www_path, "errorbar_chart_$(data_name).png"), plt)
    Images.save(joinpath(www_paper_path, "errorbar_chart_$(data_name).png"), plt)
    errorbar_charts[data_name] = plt
end

3.2.3 Line Charts

The evolution of the evaluation metrics over the course of the experiment is shown for different datasets in Figure 3.1 to Figure 3.4.

Figure 3.1: Evolution of evaluation metrics over the course of the experiment. Data: Circles.

Figure 3.2: Evolution of evaluation metrics over the course of the experiment. Data: Linearly Separable.

Figure 3.3: Evolution of evaluation metrics over the course of the experiment. Data: Moons.

Figure 3.4: Evolution of evaluation metrics over the course of the experiment. Data: Overlapping.

3.2.4 Error Bar Charts

The evaluation metrics at the end of the experiment are shown for different datasets in Figure 3.5 to Figure 3.8.

Figure 3.5: Evaluation metrics at the end of the experiment. Data: Circles.

Figure 3.6: Evaluation metrics at the end of the experiment. Data: Linearly Separable.

Figure 3.7: Evaluation metrics at the end of the experiment. Data: Moons.

Figure 3.8: Evaluation metrics at the end of the experiment. Data: Overlapping.

3.2.5 Bootstrap

Code

n_bootstrap = 100
df = run_bootstrap(results, n_bootstrap; filename=joinpath(output_path,"bootstrap.csv"))

Table 3.1 presents the tests for statistical significance of the estimated MMD metrics.

Code

df = EMAR.aggregate_bs_synthetic()
CSV.write(joinpath(www_path, "bootstrap.csv"), df)
CSV.write(joinpath(www_paper_path, "bootstrap.csv"), df)
EMAR.tabulate_bs(df, Val(:html))

Table 3.1: Tests for statistical significance of the estimated MMD metrics. We have highlighted p-values smaller than the significance level $\alpha=0.05$ in bold. Data: Synthetic.

Metric	Data	Generator	Model	p-value
MMD	Circles	DICE	Deep Ensemble	0.988
MMD	Circles	DICE	Linear	1.0
MMD	Circles	DICE	MLP	0.99
MMD	Circles	Generic (γ=0.5)	Deep Ensemble	0.996
MMD	Circles	Generic (γ=0.5)	Linear	0.996
MMD	Circles	Generic (γ=0.5)	MLP	0.99
MMD	Circles	Greedy	Deep Ensemble	0.992
MMD	Circles	Greedy	Linear	1.0
MMD	Circles	Greedy	MLP	0.994
MMD	Circles	Latent	Deep Ensemble	0.9975
MMD	Circles	Latent	Linear	0.9925
MMD	Circles	Latent	MLP	1.0
MMD	Linearly Separable	DICE	Deep Ensemble	0.0
MMD	Linearly Separable	DICE	Linear	0.0
MMD	Linearly Separable	DICE	MLP	0.0
MMD	Linearly Separable	Generic (γ=0.5)	Deep Ensemble	0.0
MMD	Linearly Separable	Generic (γ=0.5)	Linear	0.0
MMD	Linearly Separable	Generic (γ=0.5)	MLP	0.0
MMD	Linearly Separable	Greedy	Deep Ensemble	0.0
MMD	Linearly Separable	Greedy	Linear	0.0
MMD	Linearly Separable	Greedy	MLP	0.0
MMD	Linearly Separable	Latent	Deep Ensemble	0.748
MMD	Linearly Separable	Latent	Linear	0.768
MMD	Linearly Separable	Latent	MLP	0.69
MMD	Moons	DICE	Deep Ensemble	0.0
MMD	Moons	DICE	Linear	0.0
MMD	Moons	DICE	MLP	0.0
MMD	Moons	Generic (γ=0.5)	Deep Ensemble	0.0
MMD	Moons	Generic (γ=0.5)	Linear	0.0
MMD	Moons	Generic (γ=0.5)	MLP	0.0
MMD	Moons	Greedy	Deep Ensemble	0.0
MMD	Moons	Greedy	Linear	0.0
MMD	Moons	Greedy	MLP	0.0
MMD	Moons	Latent	Deep Ensemble	0.0
MMD	Moons	Latent	Linear	0.0
MMD	Moons	Latent	MLP	0.0
MMD	Overlapping	DICE	Deep Ensemble	0.0
MMD	Overlapping	DICE	Linear	0.0
MMD	Overlapping	DICE	MLP	0.0
MMD	Overlapping	Generic (γ=0.5)	Deep Ensemble	0.0
MMD	Overlapping	Generic (γ=0.5)	Linear	0.0
MMD	Overlapping	Generic (γ=0.5)	MLP	0.0
MMD	Overlapping	Greedy	Deep Ensemble	0.0
MMD	Overlapping	Greedy	Linear	0.0
MMD	Overlapping	Greedy	MLP	0.0
MMD	Overlapping	Latent	Deep Ensemble	0.0
MMD	Overlapping	Latent	Linear	0.0
MMD	Overlapping	Latent	MLP	0.0
PP MMD	Circles	DICE	Deep Ensemble	0.996
PP MMD	Circles	DICE	Linear	0.796
PP MMD	Circles	DICE	MLP	0.9975
PP MMD	Circles	Generic (γ=0.5)	Deep Ensemble	1.0
PP MMD	Circles	Generic (γ=0.5)	Linear	0.996
PP MMD	Circles	Generic (γ=0.5)	MLP	0.992
PP MMD	Circles	Greedy	Deep Ensemble	1.0
PP MMD	Circles	Greedy	Linear	0.0
PP MMD	Circles	Greedy	MLP	0.996
PP MMD	Circles	Latent	Deep Ensemble	0.9975
PP MMD	Circles	Latent	Linear	0.0
PP MMD	Circles	Latent	MLP	0.994
PP MMD	Linearly Separable	DICE	Deep Ensemble	0.9525
PP MMD	Linearly Separable	DICE	Linear	0.0
PP MMD	Linearly Separable	DICE	MLP	0.964
PP MMD	Linearly Separable	Generic (γ=0.5)	Deep Ensemble	0.958
PP MMD	Linearly Separable	Generic (γ=0.5)	Linear	0.0
PP MMD	Linearly Separable	Generic (γ=0.5)	MLP	0.944
PP MMD	Linearly Separable	Greedy	Deep Ensemble	0.716
PP MMD	Linearly Separable	Greedy	Linear	0.0
PP MMD	Linearly Separable	Greedy	MLP	0.684
PP MMD	Linearly Separable	Latent	Deep Ensemble	0.856
PP MMD	Linearly Separable	Latent	Linear	0.46
PP MMD	Linearly Separable	Latent	MLP	0.852
PP MMD	Moons	DICE	Deep Ensemble	0.865
PP MMD	Moons	DICE	Linear	0.0
PP MMD	Moons	DICE	MLP	0.87
PP MMD	Moons	Generic (γ=0.5)	Deep Ensemble	0.678
PP MMD	Moons	Generic (γ=0.5)	Linear	0.0
PP MMD	Moons	Generic (γ=0.5)	MLP	0.84
PP MMD	Moons	Greedy	Deep Ensemble	0.388
PP MMD	Moons	Greedy	Linear	0.0
PP MMD	Moons	Greedy	MLP	0.346
PP MMD	Moons	Latent	Deep Ensemble	0.902
PP MMD	Moons	Latent	Linear	0.004
PP MMD	Moons	Latent	MLP	0.91
PP MMD	Overlapping	DICE	Deep Ensemble	0.0
PP MMD	Overlapping	DICE	Linear	0.0
PP MMD	Overlapping	DICE	MLP	0.002
PP MMD	Overlapping	Generic (γ=0.5)	Deep Ensemble	0.004
PP MMD	Overlapping	Generic (γ=0.5)	Linear	0.0
PP MMD	Overlapping	Generic (γ=0.5)	MLP	0.002
PP MMD	Overlapping	Greedy	Deep Ensemble	0.002
PP MMD	Overlapping	Greedy	Linear	0.0
PP MMD	Overlapping	Greedy	MLP	0.004
PP MMD	Overlapping	Latent	Deep Ensemble	0.034
PP MMD	Overlapping	Latent	Linear	0.012
PP MMD	Overlapping	Latent	MLP	0.034
PP MMD (grid)	Circles	DICE	Deep Ensemble	0.762
PP MMD (grid)	Circles	DICE	Linear	0.814
PP MMD (grid)	Circles	DICE	MLP	0.7375
PP MMD (grid)	Circles	Generic (γ=0.5)	Deep Ensemble	0.89
PP MMD (grid)	Circles	Generic (γ=0.5)	Linear	0.994
PP MMD (grid)	Circles	Generic (γ=0.5)	MLP	0.688
PP MMD (grid)	Circles	Greedy	Deep Ensemble	0.568
PP MMD (grid)	Circles	Greedy	Linear	0.0
PP MMD (grid)	Circles	Greedy	MLP	0.776
PP MMD (grid)	Circles	Latent	Deep Ensemble	1.0
PP MMD (grid)	Circles	Latent	Linear	0.0
PP MMD (grid)	Circles	Latent	MLP	0.996
PP MMD (grid)	Linearly Separable	DICE	Deep Ensemble	0.0
PP MMD (grid)	Linearly Separable	DICE	Linear	0.0
PP MMD (grid)	Linearly Separable	DICE	MLP	0.0
PP MMD (grid)	Linearly Separable	Generic (γ=0.5)	Deep Ensemble	0.0
PP MMD (grid)	Linearly Separable	Generic (γ=0.5)	Linear	0.0
PP MMD (grid)	Linearly Separable	Generic (γ=0.5)	MLP	0.0
PP MMD (grid)	Linearly Separable	Greedy	Deep Ensemble	0.0
PP MMD (grid)	Linearly Separable	Greedy	Linear	0.0
PP MMD (grid)	Linearly Separable	Greedy	MLP	0.0
PP MMD (grid)	Linearly Separable	Latent	Deep Ensemble	0.0
PP MMD (grid)	Linearly Separable	Latent	Linear	0.0
PP MMD (grid)	Linearly Separable	Latent	MLP	0.0
PP MMD (grid)	Moons	DICE	Deep Ensemble	0.1225
PP MMD (grid)	Moons	DICE	Linear	0.0
PP MMD (grid)	Moons	DICE	MLP	0.01
PP MMD (grid)	Moons	Generic (γ=0.5)	Deep Ensemble	0.016
PP MMD (grid)	Moons	Generic (γ=0.5)	Linear	0.0
PP MMD (grid)	Moons	Generic (γ=0.5)	MLP	0.02
PP MMD (grid)	Moons	Greedy	Deep Ensemble	0.006
PP MMD (grid)	Moons	Greedy	Linear	0.0
PP MMD (grid)	Moons	Greedy	MLP	0.0
PP MMD (grid)	Moons	Latent	Deep Ensemble	0.114
PP MMD (grid)	Moons	Latent	Linear	0.004
PP MMD (grid)	Moons	Latent	MLP	0.174
PP MMD (grid)	Overlapping	DICE	Deep Ensemble	0.002
PP MMD (grid)	Overlapping	DICE	Linear	0.0
PP MMD (grid)	Overlapping	DICE	MLP	0.0
PP MMD (grid)	Overlapping	Generic (γ=0.5)	Deep Ensemble	0.0
PP MMD (grid)	Overlapping	Generic (γ=0.5)	Linear	0.0
PP MMD (grid)	Overlapping	Generic (γ=0.5)	MLP	0.0
PP MMD (grid)	Overlapping	Greedy	Deep Ensemble	0.0
PP MMD (grid)	Overlapping	Greedy	Linear	0.0
PP MMD (grid)	Overlapping	Greedy	MLP	0.002
PP MMD (grid)	Overlapping	Latent	Deep Ensemble	0.208
PP MMD (grid)	Overlapping	Latent	Linear	0.02
PP MMD (grid)	Overlapping	Latent	MLP	0.342

3.2.6 Chart in paper (Figure 3)

Figure 3.9 shows the chart that went into the paper.

Code

Images.load(joinpath(www_artifact_path,"paper_synthetic_results.png"))

Code

# echo: false

generate_artifacts(output_path)
generate_artifacts(www_path)

--- julia: exeflags: ["+1.8"] --- ```{julia} #| eval: true projectdir = splitpath(pwd()) |> ss -> joinpath(ss[1:findall([s == "endogenous-macrodynamics-in-algorithmic-recourse" for s in ss])[1]]...) cd(projectdir) ``` # Synthetic Data {#sec-app-synthetic} This notebook was used to run the experiments for the synthetic datasets and can be used to reproduce the results in the paper. In the following we first run the experiments and then generate visualizations and tables. ## Running the Experiment ```{julia} #| echo: false #| eval: true using CSV using EMAR using PrettyTables using Images output_path = output_dir("synthetic") # output directory for artifacts www_path = www_dir("synthetic") # output directory for images data_path = data_dir("synthetic") www_paper_path = mkpath("paper/www/appendix/synthetic/") data_names = ["Circles", "Linearly Separable", "Moons", "Overlapping"] # Artifacts: # Images: artifact_name = create_artifact_name_from_path(www_path, nothing) _hash = artifact_hash(artifact_name, artifact_toml()) www_artifact_path = joinpath(artifact_path(_hash), artifact_name) # Output: artifact_name = create_artifact_name_from_path(output_path, nothing) _hash = artifact_hash(artifact_name, artifact_toml()) output_artifact_path = joinpath(artifact_path(_hash), artifact_name) ``` ```{julia} models = [ :LogisticRegression, :FluxModel, :FluxEnsemble, ] opt = Flux.Descent(0.01) generators = Dict( :Greedy=>GreedyGenerator(), :Generic=>GenericGenerator(opt = opt), :REVISE=>REVISEGenerator(opt = opt), :DICE=>DiCEGenerator(opt = opt), ) ``` ```{julia} max_obs = 1000 catalogue = load_synthetic(max_obs) choices = [ :linearly_separable, :overlapping, :circles, :moons, ] data_sets = filter(p -> p[1] in choices, catalogue) ``` ```{julia} experiments = set_up_experiments(data_sets,models,generators) ``` ```{julia} plts = [] for (exp_name, exp_) in experiments for (M_name, M) in exp_.models score = round(model_evaluation(M, exp_.test_data),digits=2) plt = plot(M, exp_.test_data, title="$exp_name;\n $M_name ($score)") # Errors: ids = findall(vec(round.(probs(M, exp_.test_data.X)) .!= exp_.test_data.y)) x_wrongly_labelled = exp_.test_data.X[:,ids] scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="") plts = vcat(plts..., plt) end end plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300)) savefig(plt, joinpath(www_path,"models_test_before.png")) ``` ```{julia} using AlgorithmicRecourseDynamics.Models: model_evaluation plts = [] for (exp_name, exp_) in experiments for (M_name, M) in exp_.models score = round(model_evaluation(M, exp_.train_data),digits=2) plt = plot(M, exp_.train_data, title="$exp_name;\n $M_name ($score)") # Errors: ids = findall(vec(round.(probs(M, exp_.train_data.X)) .!= exp_.train_data.y)) x_wrongly_labelled = exp_.train_data.X[:,ids] scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="") plts = vcat(plts..., plt) end end plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300)) savefig(plt, joinpath(www_path,"models_train_before.png")) ``` ```{julia} n_evals = 5 n_rounds = 50 evaluate_every = Int(round(n_rounds/n_evals)) n_folds = 5 T = 100 results = run_experiments( experiments; save_path=output_path,evaluate_every=evaluate_every,n_rounds=n_rounds, n_folds=n_folds, T=T ) Serialization.serialize(joinpath(output_path,"results.jls"),results) ``` ```{julia} plot_dict = Dict(key => Dict() for (key,val) in results) fold = 1 for (name, res) in results exp_ = res.experiment plot_dict[name] = Dict(key => [] for (key,val) in exp_.generators) rec_sys = exp_.recourse_systems[fold] sys_ids = collect(exp_.system_identifiers) M = length(rec_sys) for m in 1:M model_name, generator_name = sys_ids[m] M = rec_sys[m].model score = round(model_evaluation(M, exp_.test_data),digits=2) plt = plot(M, exp_.test_data, title="$name;\n $model_name ($score)") # Errors: ids = findall(vec(round.(probs(M, exp_.test_data.X)) .!= exp_.test_data.y)) x_wrongly_labelled = exp_.test_data.X[:,ids] scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="") plot_dict[name][generator_name] = vcat(plot_dict[name][generator_name], plt) end end plot_dict = Dict(key => reduce(vcat, [plots[key] for plots in values(plot_dict)]) for (key, value) in generators) for (name, plts) in plot_dict plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300)) savefig(plt, joinpath(www_path,"models_test_after_$(name).png")) end ``` ```{julia} using AlgorithmicRecourseDynamics.Models: model_evaluation plot_dict = Dict(key => Dict() for (key,val) in results) fold = 1 for (name, res) in results exp_ = res.experiment plot_dict[name] = Dict(key => [] for (key,val) in exp_.generators) rec_sys = exp_.recourse_systems[fold] sys_ids = collect(exp_.system_identifiers) M = length(rec_sys) for m in 1:M model_name, generator_name = sys_ids[m] M = rec_sys[m].model data = rec_sys[m].data score = round(model_evaluation(M, data),digits=2) plt = plot(M, data, title="$name;\n $model_name ($score)") # Errors: ids = findall(vec(round.(probs(M, data.X)) .!= data.y)) x_wrongly_labelled = data.X[:,ids] scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="") plot_dict[name][generator_name] = vcat(plot_dict[name][generator_name], plt) end end plot_dict = Dict(key => reduce(vcat, [plots[key] for plots in values(plot_dict)]) for (key, value) in generators) for (name, plts) in plot_dict plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300)) savefig(plt, joinpath(www_path,"models_train_after_$(name).png")) end ``` ## Results ```{julia} #| eval: true results = load_synthetic_results() ``` ### Animated GIFs for Presentation ```{julia} #| echo: false Plots.scalefontsizes(2) df_plot = results[:overlapping].output |> df -> filter(:name => ==(:mmd), df) |> df -> filter(:model => ==(:FluxEnsemble), df) |> df -> filter(:scope => ==(:domain), df) |> df -> filter(:n => >(0), df) ylims = (0, 1.2 * maximum(df_plot.value)) anim = @animate for i in sort(unique(df_plot.n)) df = filter(:n => ==(i) ,df_plot) |> df -> combine(groupby(df, :generator), :value => mean, :value => std) plt = bar( string.(df.generator), df.value_mean, c=:blues, lw=0, widen=false, legend=false, ylims = ylims, title = "Number of rounds: $(i)" ) n_gen = length(unique(df.generator)) plot!((0:(n_gen-1)) .+ 0.5, df.value_mean, yerror=df.value_std, ms=10, color="transparent") scatter!((0:(n_gen-1)) .+ 0.5, df.value_mean, ms=10, c=:blues) end gif(anim, "dev/presentation/www/synthetic_domain.gif", fps=0.5) Plots.scalefontsizes() ``` ```{julia} #| echo: false Plots.scalefontsizes(2) df_plot = results[:overlapping].output |> df -> filter(:name => ==(:model_performance), df) |> df -> filter(:model => ==(:FluxEnsemble), df) |> df -> filter(:scope => ==(:model), df) |> df -> filter(:n => >(0), df) ylims = (1.2 * minimum(df_plot.value), 0) anim = @animate for i in sort(unique(df_plot.n)) df = filter(:n => ==(i) ,df_plot) |> df -> combine(groupby(df, :generator), :value => mean, :value => std) plt = bar( string.(df.generator), df.value_mean, c=:blues, lw=0, widen=false, legend=false, ylims = ylims, title = "Number of rounds: $(i)" ) n_gen = length(unique(df.generator)) plot!((0:(n_gen-1)) .+ 0.5, df.value_mean, yerror=df.value_std, ms=10, color="transparent") scatter!((0:(n_gen-1)) .+ 0.5, df.value_mean, ms=10, c=:blues) end gif(anim, "dev/presentation/www/synthetic_performance.gif", fps=0.5) Plots.scalefontsizes() ``` ```{julia} df = DataFrame(group=["A", "B", "C"], total=[7.7, 4.6, 5.1], std_error = [0.04, 0.05, 0.06]) bar(df.group, df.total, c=:blues, lw=0, widen=false) plot!(1/2:(ncol(df)-1/2), df.total, lw=0, yerror=20*df.std_error, ms=10) ``` ### Plots in Annex ```{julia} #| eval: true using Images line_charts = Dict() errorbar_charts = Dict() for (data_name, res) in results plt = plot_res(res) Images.save(joinpath(www_path, "line_chart_$(data_name).png"), plt) Images.save(joinpath(www_paper_path, "line_chart_$(data_name).png"), plt) line_charts[data_name] = plt plt = plot_res(res,maximum(res.output.n)) Images.save(joinpath(www_path, "errorbar_chart_$(data_name).png"), plt) Images.save(joinpath(www_paper_path, "errorbar_chart_$(data_name).png"), plt) errorbar_charts[data_name] = plt end ``` ### Line Charts ```{julia} #| eval: true #| output: asis #| echo: false fig_label_prefix = "line" fig_labels = (nm -> "fig-$(fig_label_prefix)-$nm").(replace.(lowercase.(data_names), " " => "-")) _str = "The evolution of the evaluation metrics over the course of the experiment is shown for different datasets in @$(fig_labels[1]) to @$(fig_labels[end])." println(_str) ``` ```{julia} #| eval: true #| output: asis #| echo: false str_pattern = "line_chart" fig_caption = "Evolution of evaluation metrics over the course of the experiment." full_paths = joinpath.(www_paper_path, readdir(www_paper_path)[contains.(readdir(www_paper_path), str_pattern)]) include_img_commands = EMAR.get_img_command(data_names, full_paths, fig_labels; fig_caption) _str = join(include_img_commands, "\n\n") println(_str) ``` ### Error Bar Charts ```{julia} #| eval: true #| output: asis #| echo: false fig_label_prefix = "errorbar" fig_labels = (nm -> "fig-$(fig_label_prefix)-$nm").(replace.(lowercase.(data_names), " " => "-")) _str = "The evaluation metrics at the end of the experiment are shown for different datasets in @$(fig_labels[1]) to @$(fig_labels[end])." println(_str) ``` ```{julia} #| eval: true #| output: asis #| echo: false str_pattern = "errorbar" fig_caption = "Evaluation metrics at the end of the experiment." full_paths = joinpath.(www_paper_path, readdir(www_paper_path)[contains.(readdir(www_paper_path), str_pattern)]) include_img_commands = EMAR.get_img_command(data_names, full_paths, fig_labels; fig_caption) _str = join(include_img_commands, "\n\n") println(_str) ``` ### Bootstrap ```{julia} n_bootstrap = 100 df = run_bootstrap(results, n_bootstrap; filename=joinpath(output_path,"bootstrap.csv")) ``` @tbl-bs-synthetic presents the tests for statistical significance of the estimated MMD metrics. ::: {#tbl-bs-synthetic} ```{julia} #| output: asis #| eval: true df = EMAR.aggregate_bs_synthetic() CSV.write(joinpath(www_path, "bootstrap.csv"), df) CSV.write(joinpath(www_paper_path, "bootstrap.csv"), df) EMAR.tabulate_bs(df, Val(:html)) ``` Tests for statistical significance of the estimated MMD metrics. We have highlighted p-values smaller than the significance level $\alpha=0.05$ in bold. Data: Synthetic. ::: ### Chart in paper (Figure 3) {#sec-app-synthetic-paper} @fig-paper shows the chart that went into the paper. ```{julia} #| eval: true #| echo: false using DataFrames, Statistics, RCall df = results[:overlapping].output df = df[[x ∈ maximum(df.n) for x in df.n],:] gdf = groupby(df, [:generator, :model, :n, :name, :scope]) df_plot = combine(gdf, :value => (x -> [(mean(x),mean(x)+std(x),mean(x)-std(x))]) => [:mean, :ymax, :ymin]) df_plot = df_plot[[name in [:decisiveness, :disagreement, :mmd, :mmd_grid, :model_performance] for name in df_plot.name],:] df_plot = df_plot[.!(df_plot.name.==:mmd .&& df_plot.scope.==:model),:] df_plot = mapcols(x -> typeof(x) == Vector{Symbol} ? string.(x) : x, df_plot) transform!(df_plot, :name => (X -> [x=="decisiveness" ? "Decisiveness" : x for x in X]) => :name) transform!(df_plot, :name => (X -> [x=="disagreement" ? "Disagreement" : x for x in X]) => :name) transform!(df_plot, :name => (X -> [x=="mmd" ? "MMD (domain)" : x for x in X]) => :name) transform!(df_plot, :name => (X -> [x=="mmd_grid" ? "MMD (model)" : x for x in X]) => :name) transform!(df_plot, :name => (X -> [x=="model_performance" ? "Performance" : x for x in X]) => :name) transform!(df_plot, :generator => (X -> [x=="REVISE" ? "Latent" : x for x in X]) => :generator) transform!(df_plot, :model => (X -> [x=="FluxEnsemble" ? "Deep Ensemble" : x for x in X]) => :model) transform!(df_plot, :model => (X -> [x=="FluxModel" ? "MLP" : x for x in X]) => :model) transform!(df_plot, :model => (X -> [x=="LogisticRegression" ? "Linear" : x for x in X]) => :model) ncol = length(unique(df_plot.model)) nrow = length(unique(df_plot.name)) scale_ = 1.5 R""" library(data.table) df_plot <- data.table($df_plot) name_order <- c( "MMD (domain)", "MMD (model)", "Performance", "Disagreement", "Decisiveness" ) df_plot[,name:=factor(name, levels=name_order)] model_order <- c("Linear", "MLP", "Deep Ensemble") df_plot[,model:=factor(model, levels=model_order)] library(ggplot2) plt <- ggplot(df_plot) + geom_bar(aes(x=n, y=mean, fill=generator), stat="identity", alpha=0.5, position="dodge") + geom_pointrange(aes(x=n, y=mean, ymin=ymin, ymax=ymax, colour=generator), alpha=0.9, position=position_dodge(width=c(0.9,0.9)), size=0.5) + facet_grid( rows = vars(name), cols = vars(model), scales = "free_y" ) + labs(y = "Value") + scale_fill_discrete(name="Generator:") + scale_colour_discrete(name="Generator:") + theme( axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position="bottom" ) temp_path <- file.path(tempdir(), "plot.png") ggsave(temp_path, width=$ncol * $scale_,height=$nrow * $scale_ * 0.75, dpi=300) """ img = Images.load(rcopy(R"temp_path")) Images.save(joinpath(www_path,"paper_synthetic_results.png"), img) ``` ```{julia} #| output: true #| label: fig-paper #| fig-cap: "Chart in paper" #| eval: true Images.load(joinpath(www_artifact_path,"paper_synthetic_results.png")) ``` ```{julia} # echo: false generate_artifacts(output_path) generate_artifacts(www_path) ```