This notebook contains all the analysis described in AndrÃ© and De Langhe (2020):

"No Evidence for Loss Aversion Disappearance and Reversal in Walasek and Stewart (2015)"

# Preamble¶

## Libraries¶

Show Code
import os
import warnings
from functools import partial, reduce

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
import seaborn as sns
import statsmodels.formula.api as smf
from numpy import dot
from scipy.linalg import LinAlgError, inv

warnings.filterwarnings("ignore")

%R suppressMessages(library(tidyverse))
np.random.seed(189123789)  # Seed for reproducibility


## Functions¶

Show Code
def split_bets_high(x):
bets = {}
for el in x:
g, l = [int(i) for i in el.split("-")]
if (g > 24) & (l <= 24):
bets[el] = "high-low"
elif (g <= 24) & (l > 24):
bets[el] = "low-high"
elif (g <= 24) & (l <= 24):
bets[el] = "low-low"
elif (g > 24) & (l > 24):
bets[el] = "high-high"
return bets

def split_bets_low(x):
bets = {}
for el in x:
g, l = [int(i) for i in el.split("-")]
if (g > 12) & (l <= 12):
bets[el] = "high-low"
elif (g <= 12) & (l > 12):
bets[el] = "low-high"
elif (g <= 12) & (l <= 12):
bets[el] = "low-low"
elif (g > 12) & (l > 12):
bets[el] = "high-high"
return bets

def ols(X, y):
X = np.array([np.ones(X.shape[0]), X]).T
return np.linalg.inv(X.T @ X) @ X.T @ y

def invlogit(x):
return np.exp(x) / (1 + np.exp(x))

def loglinear(gain, loss):
"""
First decision rule considered in the paper.
Return the log-linear utility for a bet: a (gain, loss) tuple.
"""
eta = np.log(gain) - np.log(loss)
return eta

def ratio(gain, loss):
"""
Second decision rule considered in the paper.
Return the Gain/Loss ratio utility for a bet: a (gain, loss) tuple.
"""
eta = gain / loss - 1
return eta

def threshold(gain, loss):
"""
Third decision rule considered in the paper.
Return discontinuous expected value utility for a bet: a (gain, loss) tuple.
"""
if gain > loss:
return 1.4 + (gain - loss) / 20
elif gain < loss:
return -1.4 + (gain - loss) / 20
else:
return 0

def fast_OLS(endog, exog):
"""
A simple function for (X'X)^(-1)X'Y
:return: The Kx1 array of estimated coefficients.
"""
try:
return dot(dot(inv(dot(exog.T, exog)), exog.T), endog).squeeze()
except LinAlgError:
raise LinAlgError

def logit_cdf(X):
"""
The CDF of the logistic function.
:param X: Value at which to estimate the CDF
:return: The logistic function CDF, evaluated at X
"""
idx = X > 0
out = np.empty(X.size, dtype=float)
out[idx] = 1 / (1 + np.exp(-X[idx]))
exp_X = np.exp(X[~idx])
out[~idx] = exp_X / (1 + exp_X)
return out

def logit_score(endog, exog, params, n_obs):
"""
The score of the logistic function.
:param endog: Nx1 vector of endogenous predictions
:param exog: NxK vector of exogenous predictors
:param params: Kx1 vector of parameters for the predictors
:param n_obs: Number of observations
:return: The score, a Kx1 vector, evaluated at params'
"""
return dot(endog - logit_cdf(dot(exog, params)), exog) / n_obs

def logit_hessian(exog, params, n_obs):
"""
The hessian of the logistic function.
:param exog: NxK vector of exogenous predictors
:param params: Kx1 vector of parameters for the predictors
:param n_obs: Number of observations
:return: The Hessian, a KxK matrix, evaluated at params'
"""
L = logit_cdf(np.dot(exog, params))
return -dot(L * (1 - L) * exog.T, exog) / n_obs

def fast_logit(endog, exog, n_obs=0, n_vars=0, max_iter=10000, tolerance=1e-10):
"""
A convenience function for the Newton-Raphson method to evaluate a logistic model.
:param endog: Nx1 vector of endogenous predictions
:param exog: NxK vector of exogenous predictors
:param n_obs: Number of observations N
:param n_vars: Number of exogenous predictors K
:param max_iter: Maximum number of iterations
:param tolerance: Margin of error for convergence
:return: The error-minimizing parameters for the model.
"""
iterations = 0
oldparams = np.inf
newparams = np.repeat(0, n_vars)
while iterations < max_iter and np.any(np.abs(newparams - oldparams) > tolerance):
oldparams = newparams
try:
H = logit_hessian(exog, oldparams, n_obs)
invH = inv(H)
score = logit_score(endog, exog, oldparams, n_obs)
newparams = oldparams - invH @ score
except LinAlgError:
raise LinAlgError
iterations += 1
return newparams

def bootstrap_lambda_within_s1_clustered(df, nboots=5000):
"""
Bootstrap individuals from Experiment 1 of W&S, and estimate the bootstrapped
lambda parameters within each subset of lotteries.
"""
N = df.shape[0]
subids = df.sub_id
unique_subids = subids.unique()
N_subids = unique_subids.shape[0]
blambd = np.empty((nboots, 4))
m = smf.logit(
"response ~ -1 + C(status) + gain:C(status) + loss:C(status)", data=df
)
endog = m.endog.reshape(-1, 1)
exog = m.exog
data = np.hstack([endog, exog])
params = m.fit(disp=False).params.values
g = params[4:8]
l = params[8:]
lambd = -l / g
opt = partial(fast_logit, n_obs=N, n_vars=12)
for i in range(nboots):
chosen_subids = np.random.choice(unique_subids, N_subids, replace=True)
chosen_rows = subids == chosen_subids[0]
bdata = data[chosen_rows]
for s in chosen_subids[1:]:
chosen_rows = subids == s
sdata = data[chosen_rows]
bdata = np.vstack([bdata, sdata])
bparams = opt(bdata[:, 0], bdata[:, 1:])
bg = bparams[4:8]
bl = bparams[8:]
blambd[i] = -bl / bg
return lambd, blambd

def bootstrap_lambda_between_s1_clustered(df, nboots=5000):
"""
Bootstrap individuals from Experiment 1 of W&S, and estimate the bootstrapped
lambda parameters within each condition.
"""
N = df.shape[0]
subids = df.sub_id
unique_subids = subids.unique()
N_subids = unique_subids.shape[0]
blambd = np.empty((nboots, 4))
m = smf.logit(
"response ~ -1 + C(condition) + gain:C(condition) + loss:C(condition)", data=df
)
endog = m.endog.reshape(-1, 1)
exog = m.exog
data = np.hstack([endog, exog])
params = m.fit(disp=False).params.values
g = params[4:8]
l = params[8:]
lambd = -l / g
opt = partial(fast_logit, n_obs=N, n_vars=12)
for i in range(nboots):
chosen_subids = np.random.choice(unique_subids, N_subids, replace=True)
chosen_rows = subids == chosen_subids[0]
bdata = data[chosen_rows]
for s in chosen_subids[1:]:
chosen_rows = subids == s
sdata = data[chosen_rows]
bdata = np.vstack([bdata, sdata])
bparams = opt(bdata[:, 0], bdata[:, 1:])
bg = bparams[4:8]
bl = bparams[8:]
blambd[i] = -bl / bg
return lambd, blambd

def bootstrap_lambda_between_s3_clustered(df, nboots=5000):
"""
Bootstrap individuals from Experiment3 of W&S, and estimate the bootstrapped
lambda parameters within each condition.
"""
subids = df.sub_id
unique_subids = subids.unique()
N_subids = unique_subids.shape[0]
blambd = np.empty((nboots, 3))
m = smf.ols(
"response ~ -1 + C(condition) + gain:C(condition) + loss:C(condition)", data=df
)
endog = m.endog.reshape(-1, 1)
exog = m.exog
data = np.hstack([endog, exog])
params = fast_OLS(data[:, 0], data[:, 1:])
g = params[3:6]
l = params[6:]
lambd = -l / g
for i in range(nboots):
chosen_subids = np.random.choice(unique_subids, N_subids, replace=True)
chosen_rows = subids == chosen_subids[0]
bdata = data[chosen_rows]
for s in chosen_subids[1:]:
chosen_rows = subids == s
sdata = data[chosen_rows]
bdata = np.vstack([bdata, sdata])
bparams = fast_OLS(bdata[:, 0], bdata[:, 1:])
bg = bparams[3:6]
bl = bparams[6:]
blambd[i] = -bl / bg
return lambd, blambd

def build_bayesian_model(data, subset="all"):
"""
Build the Bayesian model used to estimate the pooled parameters.
"""
y, gains, losses, condition = data.loc[
:, ["response", "gain", "loss", "condition"]
].values.T
condition = np.array([int(i) for i in condition])
with pm.Model() as model:
Ïƒ_Î± = pm.HalfCauchy("Ïƒ_Î±", beta=25)
Ïƒ_Î²G = pm.HalfCauchy("Ïƒ_Î²G", beta=25)
Ïƒ_Î²L = pm.HalfCauchy("Ïƒ_Î²L", beta=25)

Î± = pm.Normal("Î±", mu=0, sd=Ïƒ_Î±, shape=4)
Î²G = pm.HalfNormal("Î²G", sd=Ïƒ_Î²G, shape=4)
Î²L = pm.HalfNormal("Î²L", sd=Ïƒ_Î²L, shape=4)

Î» = pm.Deterministic("Î»", Î²L / Î²G)

Î¼ = Î±[condition] + Î²G[condition] * gains - Î²L[condition] * losses
y_1 = pm.Bernoulli("y_1", logit_p=Î¼, observed=y)
return model


## Data Processing¶

Show Code
# Reading the bets from S1 of W&S: 64 bets for each condition
bets_s1 = pd.read_csv("Original Data/Bets_S1.csv")
bets_s1["bet"] = bets_s1[["gain", "loss"]].apply(
lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1
)
pairs_by_cond = bets_s1.groupby("condition").apply(lambda x: np.unique(x.bet)).to_dict()
common_pairs = reduce(np.intersect1d, [v for k, v in pairs_by_cond.items()])
unique_pairs = bets_s1.bet.unique()

bets_s1["is_common"] = bets_s1.bet.apply(lambda x: x in common_pairs)
dict_common_s1 = {k: v for k, v in bets_s1[["bet", "is_common"]].values}

bets_dict_high = split_bets_high(pairs_by_cond[3])
bets_dict_low = split_bets_low(pairs_by_cond[0])

# Reading the data for study 1
data_s1 = pd.read_csv(f"Original Data/Data_S1ab.csv")
data_s1["response"] = data_s1["response"].map({"Accept": 1, "Reject": 0})
data_s1["condition"] = data_s1["condition"].map(
{
"Gains 20-Losses 20": 0,
"Gains 20-Losses 40": 1,
"Gains 40-Losses 20": 2,
"Gains 40-Losses 40": 3,
}
)
data_s1["pair"] = data_s1.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
n_parts = data_s1["sub"].shape[0] / 64
data_s1["sub_id"] = np.repeat(np.arange(n_parts), 64)
data_s1["is_common"] = data_s1.pair.apply(lambda x: x in common_pairs)

# Creating subsets of data of study 1 (used in analysis later)
data_s1_high = data_s1[(data_s1.condition == 3)]
data_s1_high["status"] = data_s1_high.pair.map(bets_dict_high)

data_s1_low = data_s1[(data_s1.condition == 0)]
data_s1_low["status"] = data_s1_low.pair.map(bets_dict_low)

# Reading the data for study 1a
data_s1a = pd.read_csv(f"Original Data/Data_S1a.csv")
data_s1a["response"] = data_s1a["response"].map({"Accept": 1, "Reject": 0})
data_s1a["condition"] = data_s1a["condition"].map(
{
"Gains 20-Losses 20": 0,
"Gains 20-Losses 40": 1,
"Gains 40-Losses 20": 2,
"Gains 40-Losses 40": 3,
}
)
data_s1a["pair"] = data_s1a.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
n_parts = data_s1a["sub"].shape[0] / 64
data_s1a["sub_id"] = np.repeat(np.arange(n_parts), 64)

# Reading the data for study 1b
data_s1b = pd.read_csv(f"Original Data/Data_S1b.csv")
data_s1b["response"] = data_s1b["response"].map({"Accept": 1, "Reject": 0})
data_s1b["condition"] = data_s1b["condition"].map(
{
"Gains 20-Losses 20": 0,
"Gains 20-Losses 40": 1,
"Gains 40-Losses 20": 2,
"Gains 40-Losses 40": 3,
}
)
data_s1b["pair"] = data_s1b.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
n_parts = data_s1b["sub"].shape[0] / 64
data_s1b["sub_id"] = np.repeat(np.arange(n_parts), 64)

# Reading the data for study 2
data_s2 = pd.read_csv("Original Data/Data_S2.csv")
data_s2["condition"] = data_s2.condition.map({20.6: 0, 60.2: 1})
data_s2["Ones"] = 1

data_s2["pair"] = data_s2.apply(lambda x: f"{x.gain}_{x.loss}", axis=1)
pairs_by_cond = (
data_s2.groupby("condition").apply(lambda x: np.unique(x.pair)).to_dict()
)
common_pairs = reduce(np.intersect1d, [v for k, v in pairs_by_cond.items()])
unique_pairs = data_s2.pair.unique()
data_s2["is_common"] = data_s2.pair.apply(lambda x: x in common_pairs)
dict_common_s2 = {k: v for k, v in data_s2[["pair", "is_common"]].values}

# Reading the data for study 3
data_s3 = pd.read_csv("Original Data/Data_S3.csv")
data_s3["condition"] = data_s3.condition.map(
{"Gains 20-Losses 40": 0, "Gains 40-Losses 20": 2, "Gains 40-Losses 40": 1}
)
data_s3["response"] = data_s3.response.map(
{
"Strongly Reject": -2,
"Weakly Reject": -1,
"Weakly Accept": 1,
"Strongly Accept": 2,
}
)

data_s3["pair"] = data_s3.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
pairs_by_cond = (
data_s3.groupby("condition").apply(lambda x: np.unique(x.pair)).to_dict()
)
common_pairs = reduce(np.intersect1d, [v for k, v in pairs_by_cond.items()])
unique_pairs = data_s3.pair.unique()
data_s3["is_common"] = data_s3.pair.apply(lambda x: x in common_pairs)
dict_common_s3 = {k: v for k, v in data_s3[["pair", "is_common"]].values}


# Experimental Design and Key Result¶

We estimate the impact of gains and losses on the likelihood to accept the bet, using W&S' R code.

Show Code
%%R -i data_s1 -o fits_real

# Original code from W&S

# Fit GLM per individual
loss_aversion <- function(data) {
m1 <- glm(response ~ gain + loss, data=data, family=binomial)
data.frame(id=data$sub_id[1], condition=data$condition[1],
converged=m1$converged, deviance=deviance(m1), max.residual=max(abs(residuals(m1))), intercept=coef(m1)["(Intercept)"], beta.loss=coef(m1)["loss"], beta.gain=coef(m1)["gain"]) } # Original data (S1a + S1b) fits_real <- by(data=data_s1, data_s1$sub_id, loss_aversion)
fits_real <- do.call(rbind, fits_real)
fits_real <- fits_real[order(fits_real$id),] fits_real$lambda <- with(fits_real, -beta.loss/beta.gain)

# Remove 5% with highest deviance
deviance.cut <- quantile(fits_real$deviance,probs=c(.95)) fits_real <- subset(fits_real,deviance < deviance.cut) # Remove negative lambdas fits_real <- (subset(fits_real,lambda > 0) %>% mutate(id = as.numeric(factor(id)), dataset="Original"))  Plotting the results: Show Code fits_real["condition_name"] = fits_real.condition.map( {0: "20G-20L", 1: "20G-40L", 2: "40G-20L", 3: "40G-40L"} ) sns.set_context("talk") g = sns.catplot( x="condition", y="lambda", data=fits_real, ci=95, kind="point", estimator=np.median, join=False, dodge=0.1, legend=False, aspect=1.5, height=6.38 * 1.5 / 1.5, ) pal = sns.color_palette() g.set_xticklabels(["20G-20L", "20G-40L", "40G-20L", "40G-40L"]) g.set_xlabels("") g.set_ylabels(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=16) g.ax.axhline(1, lw=0.6, color="black") plt.savefig("Figures/Figure 1.png", dpi=100);  #$\lambda$Will Differ Without Decision by Sampling¶ ## Evidence Through Simulation¶ We generate simulated choices for 10,000 participants, randomly assigned to each of the four conditions. Show Code np.random.seed(189123789) # Seed for reproducibility bets_s1["bet_utility_loglin"] = bets_s1[["gain", "loss"]].apply( lambda x: loglinear(x.gain, x.loss), axis=1 ) # Compute the log-linear utility of the bet. bets_s1["bet_accept_prob_loglin"] = bets_s1["bet_utility_loglin"].apply( invlogit ) # Compute the probability of accepting the bet. bets_s1["bet_reject_prob_loglin"] = ( 1 - bets_s1["bet_accept_prob_loglin"] ) # Probability of rejecting the bet bets_s1["bet_utility_ratio"] = bets_s1[["gain", "loss"]].apply( lambda x: ratio(x.gain, x.loss), axis=1 ) # Compute the ratio utility of the bet. bets_s1["bet_accept_prob_ratio"] = bets_s1["bet_utility_ratio"].apply( invlogit ) # Compute the probability of accepting the bet. bets_s1["bet_reject_prob_ratio"] = ( 1 - bets_s1["bet_accept_prob_ratio"] ) # Probability of rejecting the bet bets_s1["bet_utility_threshold"] = bets_s1[["gain", "loss"]].apply( lambda x: threshold(x.gain, x.loss), axis=1 ) # Compute the ratio utility of the bet. bets_s1["bet_accept_prob_threshold"] = bets_s1["bet_utility_threshold"].apply( invlogit ) # Compute the probability of accepting the bet. bets_s1["bet_reject_prob_threshold"] = ( 1 - bets_s1["bet_accept_prob_threshold"] ) # Probability of rejecting the bet bets_s1 = bets_s1.sort_values(["condition", "gain", "loss"]) for funcform in ["ratio", "loglin", "threshold"]: try: data_sim = pd.read_csv( f"Buffer/Simulated Data_S1_{funcform}.csv" ) # Check if data has been simulated already except FileNotFoundError: # Create the simulated data N_BY_COND = 2500 # Number of participants by condition N_PARTS = N_BY_COND * 4 # Total number of participants bets = bets_s1[ [ "condition", "gain", "loss", f"bet_accept_prob_{funcform}", f"bet_reject_prob_{funcform}", ] ] # Select the bets choices = np.array( [ np.random.choice( [0, 1], p=[ b[f"bet_reject_prob_{funcform}"], b[f"bet_accept_prob_{funcform}"], ], size=N_BY_COND, ) for _, b in bets.iterrows() ] ).T.reshape( -1, 1 ) # Generate simulated choices for all bets (64*4 conditions = 256), corresponding to N_CHOICES per condition. partids = np.repeat(np.arange(0, N_PARTS), 64).reshape( -1, 1 ) # Generating unique IDs for each participant bet_attributes = np.tile( bets[["condition", "gain", "loss"]], (N_BY_COND, 1) ) # Repeating the values of the bets data = np.concatenate([partids, bet_attributes, choices], axis=1) df = pd.DataFrame( data, columns=["sub", "condition", "gain", "loss", "response"] ) df["condition"] = df["condition"].astype(int) df.to_csv(f"Buffer/Simulated Data_S1_{funcform}.csv", index=None)  We then estimate the impact of gains and losses on the likelihood to accept the bet, using W&S' R code. Show Code %%R -o fits_sim # Original code from W&S #Simulated data data_sim <- read.csv("Buffer/Simulated Data_S1_ratio.csv",as.is=T) data_sim$sub_id <- as.factor(data_sim$sub) fits_sim <- by(data=data_sim, data_sim$sub_id, loss_aversion)
fits_sim <- do.call(rbind, fits_sim)
fits_sim <- fits_sim[order(fits_sim$id),] fits_sim$lambda <- with(fits_sim, -beta.loss/beta.gain)

# Remove 5% with highest deviance
deviance.cut <- quantile(fits_sim$deviance,probs=c(.95)) fits_sim <- subset(fits_sim,deviance < deviance.cut) # Remove negative lambdas fits_sim_ratio <- (subset(fits_sim,lambda > 0) %>% mutate(id = as.numeric(factor(id)), dataset="Simulated (Gain-Loss Ratio)")) #Simulated data data_sim <- read.csv("Buffer/Simulated Data_S1_loglin.csv",as.is=T) data_sim$sub_id <- as.factor(data_sim$sub) fits_sim <- by(data=data_sim, data_sim$sub_id, loss_aversion)
fits_sim <- do.call(rbind, fits_sim)
fits_sim <- fits_sim[order(fits_sim$id),] fits_sim$lambda <- with(fits_sim, -beta.loss/beta.gain)

# Remove 5% with highest deviance
deviance.cut <- quantile(fits_sim$deviance,probs=c(.95)) fits_sim <- subset(fits_sim,deviance < deviance.cut) # Remove negative lambdas fits_sim_loglin <- (subset(fits_sim,lambda > 0) %>% mutate(id = as.numeric(factor(id)), dataset="Simulated (Log-Linear)")) #Simulated data data_sim <- read.csv("Buffer/Simulated Data_S1_threshold.csv",as.is=T) data_sim$sub_id <- as.factor(data_sim$sub) fits_sim <- by(data=data_sim, data_sim$sub_id, loss_aversion)
fits_sim <- do.call(rbind, fits_sim)
fits_sim <- fits_sim[order(fits_sim$id),] fits_sim$lambda <- with(fits_sim, -beta.loss/beta.gain)

# Remove 5% with highest deviance
deviance.cut <- quantile(fits_sim$deviance,probs=c(.95)) fits_sim <- subset(fits_sim,deviance < deviance.cut) # Remove negative lambdas fits_sim_threshold <- (subset(fits_sim,lambda > 0) %>% mutate(id = as.numeric(factor(id)), dataset="Simulated (Discontinuous EV)")) fits_sim <- rbind(fits_sim_threshold, fits_sim_loglin, fits_sim_ratio)  The table below reports the estimates$\lambda$obtained from the original data and from the simulated data, for each of the three decision rules: Show Code fits_sim["condition_name"] = fits_sim.condition.map( {0: "20G-20L", 1: "20G-40L", 2: "40G-20L", 3: "40G-40L"} ) fits_all = pd.concat([fits_real, fits_sim]) medians = fits_all.groupby(["condition_name", "dataset"])[["lambda"]].median().unstack() medians  lambda dataset Original Simulated (Discontinuous EV) Simulated (Gain-Loss Ratio) Simulated (Log-Linear) condition_name 20G-20L 1.000000 1.000000 1.059934 1.00000 20G-40L 0.861915 0.788079 0.500000 0.52167 40G-20L 1.730755 1.237665 2.000000 2.00000 40G-40L 1.017761 1.000000 1.083097 1.00000 In graph form: Show Code orig = medians.loc[:, ("lambda", "Original")].values simu_threshold = medians.loc[:, ("lambda", "Simulated (Discontinuous EV)")].values simu_ratio = medians.loc[:, ("lambda", "Simulated (Gain-Loss Ratio)")].values simu_loglin = medians.loc[:, ("lambda", "Simulated (Log-Linear)")].values sns.set_context("talk") g = sns.catplot( x="condition", y="lambda", hue="dataset", data=fits_all, kind="point", estimator=np.median, join=False, dodge=0.4, legend=False, aspect=1.5, height=6.38 * 1.5 / 1.5, hue_order=[ "Original", "Simulated (Log-Linear)", "Simulated (Gain-Loss Ratio)", "Simulated (Discontinuous EV)", ], markers=["o", "D", "s", "^"], ) pal = sns.color_palette() plt.legend(loc="upper left", frameon=False, fontsize=14) g.set_xticklabels(["20G-20L", "20G-40L", "40G-20L", "40G-40L"]) g.set_xlabels("") g.set_ylabels(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=16) g.ax.axhline(1, lw=0.6, color="black") g.ax.set_ylim(0.4, 2.2) for i in range(4): if i == 2: ycoords = (0.85, 0.75, 0.65, 0.55) else: ycoords = (1.55, 1.45, 1.35, 1.25) g.ax.annotate( f"$\\lambda_O$= {orig[i]:.2f}", (i, ycoords[0]), va="center", ha="center", color=pal[0], size=14, ) g.ax.annotate( "$\\lambda_{LL}$" + f" = {simu_loglin[i]:.2f}", (i, ycoords[1]), va="center", ha="center", color=pal[1], size=14, ) g.ax.annotate( "$\\lambda_{GLR}$" + f" = {simu_ratio[i]:.2f}", (i, ycoords[2]), va="center", ha="center", color=pal[2], size=14, ) g.ax.annotate( "$\\lambda_{DEV}$" + f" = {simu_threshold[i]:.2f}", (i, ycoords[3]), va="center", ha="center", color=pal[3], size=14, ) plt.savefig("Figures/Figure 2.png", dpi=400);  ## Evidence in Data from W&S¶ Show Code np.random.seed(189123789) # Seed for reproducibility sns.set_context("talk") _, axes = plt.subplots(2, 1, figsize=(6.38, 6.38)) x = [3, 2, 1, 0] for d, label, ax, title in zip( [data_s1_low, data_s1_high], ["Low", "High"], axes, ["20G - 20L", "40G - 40L"] ): try: # Check if bootstrapped estimates have already been generated lambd = np.load( f"Buffer/Saved Lambdas/Lambda - Within {label} - Data_S1ab - Clustered.npy" ) blambd = np.load( f"Buffer/Saved Lambdas/BootLambdas - Within {label} - Data_S1ab - Clustered.npy" ) except FileNotFoundError: # Otherwise generate them (long...) lambd, blambd = bootstrap_lambda_within_s1_clustered(d, 5000) np.save( f"Buffer/Saved Lambdas/Lambda - Within {label} - Data_S1ab - Clustered.npy", lambd, ) np.save( f"Buffer/Saved Lambdas/BootLambdas - Within {label} - Data_S1ab - Clustered.npy", blambd, ) ax.scatter(x, lambd) llci, ulci = np.abs(lambd - np.percentile(blambd, [2.5, 97.5], axis=0)) ax.errorbar(x, lambd, [llci, ulci], alpha=1, fmt="none", lw=2) ax.axhline(1, color="black", lw=0.6) ax.set_xticks([0, 1, 2, 3]) yticks = np.round(np.arange(-1, 3.5, 1), 1) ax.set_yticks(yticks) ax.set_ylim(-1.5, 3.5) ax.set_xlim(-0.5, 3.5) ax.set_yticklabels(yticks) ax.set_xticklabels([]) sns.despine(ax=ax) ax.xaxis.grid(False) ax.set_ylabel(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=16) ax.set_title(title) axes[1].set_xticklabels( ["Small G\nSmall L", "Small G\nLarge L", "Large G\nSmall L", "Large G\nLarge L",], size=14, ) plt.tight_layout() plt.savefig("Figures/Figure 3.png", dpi=400, bbox_inches="tight");  In table form: Show Code lambdas = [] for label in ["Low", "High"]: lambd = np.load( f"Buffer/Saved Lambdas/Lambda - Within {label} - Data_S1ab - Clustered.npy" ) blambd = np.load( f"Buffer/Saved Lambdas/BootLambdas - Within {label} - Data_S1ab - Clustered.npy" ) lambdas.append(lambd) df_lambdas = pd.DataFrame(lambdas) df_lambdas.columns = ["Small G<br>Small L", "Small G<br>Large L", "Large G<br>Small L", "Large G<br>Large L",] df_lambdas["Condition"] = ["20G-20L", "40G-40L"] df_lambdas.set_index(["Condition"]).style.format("{:.2f}")  Small G Small L Small G Large L Large G Small L Large G Large L Condition 20G-20L 0.95 1.83 0.53 1.03 40G-40L 1.05 1.31 0.45 1.09 #$\lambda$Is The Same When Analyzing Common Lotteries¶ Show Code np.random.seed(189123789) # Seed for reproducibility sns.set_style("ticks") sns.set_context("talk") _, axes = plt.subplots(3, 1, figsize=(6.38, 8)) pal = sns.color_palette() nboots = 5000 for ax, filename, title in zip( axes, ["Data_S1ab", "Data_S1a", "Data_S1b"], ["Experiments 1a + 1b", "Experiment 1a", "Experiment 1b"], ): data = pd.read_csv(f"Original Data/{filename}.csv") data["response"] = data["response"].map({"Accept": 1, "Reject": 0}) data["condition"] = data["condition"].map( { "Gains 20-Losses 20": 0, "Gains 20-Losses 40": 1, "Gains 40-Losses 20": 2, "Gains 40-Losses 40": 3, } ) data["pair"] = data.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1) data["is_common"] = data.pair.map(dict_common_s1) n_parts = data["sub"].shape[0] / 64 data["sub_id"] = np.repeat(np.arange(n_parts), 64) chosen = [False] * 55 + [True] * 9 data["is_randomly_chosen"] = data.groupby("sub_id").pair.transform( lambda x: np.random.choice(chosen, 64, replace=False) ) data_common = data[data.is_common] data_random = data[data.is_randomly_chosen] datasets = [data, data_random, data_common] labels = ["All Lotteries", "Random Subset", "Common Lotteries"] markers = ["o", "s", "D"] xpos = [np.arange(0, 4, 1) - 0.1, np.arange(0, 4, 1), np.arange(0, 4, 1) + 0.1] for d, l, m, x, c in zip(datasets, labels, markers, xpos, pal): try: # Check if bootstrapped estimates have already been generated lambd = np.load( f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy" ) blambd = np.load( f"Buffer/Saved Lambdas/BootLambdas - {l} - {filename} - Clustered.npy" ) except: # Otherwise generate them (long...) lambd, blambd = bootstrap_lambda_between_s1_clustered(d, 5000) np.save( f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy", lambd ) np.save( f"Buffer/Saved Lambdas/BootLambdas - {l} - {filename} - Clustered.npy", blambd, ) ax.scatter(x, lambd, label=l, marker=m) llci, ulci = np.abs(lambd - np.percentile(blambd, [2.5, 97.5], axis=0)) ax.errorbar(x, lambd, [llci, ulci], color=c, alpha=1, fmt="none", lw=2) ax.axhline(1, color="black", lw=0.6) ax.set_xticks(np.arange(0, 4, 1)) yticks = np.round(np.arange(0.6, 2.8, 0.4), 1) ax.set_yticks(yticks) ax.set_ylim(0.4, 2.6) ax.set_yticklabels(yticks, size=14) sns.despine(ax=ax) ax.xaxis.grid(False) ax.set_ylabel(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=16) ax.set_title(title, size=14) ax.set_xticklabels([]) axes[0].legend(frameon=False, loc=2, fontsize=13, handletextpad=0.05) axes[2].set_xticklabels(["20G-20L", "20G-40L", "40G-20L", "40G-40L"], size=14) plt.tight_layout() plt.savefig("Figures/Figure 4.png", dpi=100, bbox_inches="tight");  In table form: Show Code lambdas = [] for filename, ax, title in zip( ["Data_S1ab", "Data_S1a", "Data_S1b"], axes, ["Study 1a + Study 1b", "Study 1a", "Study 1b"], ): for d, l, x, c in zip(datasets, labels, xpos, pal): lambd = np.load( f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy" ) lambdas.append(lambd) df_lambdas = pd.DataFrame(lambdas) df_lambdas.columns = ["20G - 20L", "20G - 40L", "40G - 20L", "40G - 40L"] df_lambdas["Data"] = np.repeat(["S1a + S1b", "S1a", "S1b"], 3) df_lambdas["Subset"] = np.tile( ["All Lotteries", "Random Subset", "Common Lotteries"], 3 ) df_lambdas.set_index(["Data", "Subset"]).transpose().style.format("{:.2f}")  Data S1a + S1b S1a S1b Subset All Lotteries Random Subset Common Lotteries All Lotteries Random Subset Common Lotteries All Lotteries Random Subset Common Lotteries 20G - 20L 1.04 1.08 0.96 1.06 1.05 0.99 1.03 1.06 0.93 20G - 40L 0.71 0.70 0.96 0.73 0.69 1.00 0.70 0.66 0.92 40G - 20L 1.84 1.74 1.03 1.94 1.85 1.08 1.77 1.73 1.00 40G - 40L 1.07 1.10 1.01 1.08 1.14 1.03 1.07 1.08 0.99 # Appendix¶ ## Validation of the Pooled Model¶ Fitting the original individual-level model in R: Show Code %%R -i data_s1 -i data_s1a -i data_s1b -o fits_all # Code from W&S # Fit GLM per individual loss_aversion <- function(data) { m1 <- glm(response ~ gain + loss, data=data, family=binomial) data.frame(id=data$sub_id[1],
condition=data$condition[1], converged=m1$converged,
deviance=deviance(m1),
max.residual=max(abs(residuals(m1))),
intercept=coef(m1)["(Intercept)"],
beta.loss=coef(m1)["loss"],
beta.gain=coef(m1)["gain"])
}

# Original data (S1a + S1b)

fits_s1ab <- by(data=data_s1, data_s1$sub_id, loss_aversion) fits_s1ab <- do.call(rbind, fits_s1ab) fits_s1ab <- fits_s1ab[order(fits_s1ab$id),]
fits_s1ab$lambda <- with(fits_s1ab, -beta.loss/beta.gain) # Remove 5% with highest deviance deviance.cut <- quantile(fits_s1ab$deviance,probs=c(.95))
fits_s1ab <- subset(fits_s1ab, deviance < deviance.cut)

# Remove negative lambdas
fits_s1ab <- (subset(fits_s1ab,lambda > 0)
%>% mutate(id = as.numeric(factor(id)),
dataset="S1ab"))

# Original data S1a

fits_s1a <- by(data=data_s1a, data_s1a$sub_id, loss_aversion) fits_s1a <- do.call(rbind, fits_s1a) fits_s1a <- fits_s1ab[order(fits_s1a$id),]
fits_s1a$lambda <- with(fits_s1a, -beta.loss/beta.gain) # Remove 5% with highest deviance deviance.cut <- quantile(fits_s1a$deviance,probs=c(.95))
data_s1a <- subset(fits_s1a, deviance < deviance.cut)

# Remove negative lambdas
fits_s1a <- (subset(fits_s1a,lambda > 0)
%>% mutate(id = as.numeric(factor(id)),
dataset="S1a"))

# Original data S1b
fits_s1b <- by(data=data_s1b, data_s1b$sub_id, loss_aversion) fits_s1b <- do.call(rbind, fits_s1b) fits_s1b <- fits_s1b[order(fits_s1b$id),]
fits_s1b$lambda <- with(fits_s1b, -beta.loss/beta.gain) # Remove 5% with highest deviance deviance.cut <- quantile(fits_s1b$deviance,probs=c(.95))
fits_s1b <- subset(fits_s1b, deviance < deviance.cut)

# Remove negative lambdas
fits_s1b <- (subset(fits_s1b,lambda > 0)
%>% mutate(id = as.numeric(factor(id)),
dataset="S1b"))

fits_all <- bind_rows(fits_s1ab, fits_s1a, fits_s1b)


Fitting the Bayesian model:

Show Code
np.random.seed(189123789)  # Seed for reproducibility
results_bayes = []
for data, filename in zip([data_s1, data_s1a, data_s1b], ["S1ab", "S1a", "S1b"]):
model = build_bayesian_model(data)
path = f"Buffer/Bayesian Traces/Data_{filename}-all/"
with model:
if not os.path.exists(path):
# If the model has not been estimated yet, run it (long...)
trace_simple = pm.sample(1000, tune=1000)
pm.save_trace(trace_simple, directory=path)
l = pm.summary(tab, varnames=["Î»"]).reset_index()
l["condition_name"] = (
l["index"]
.apply(lambda x: int(x[2]))
.map({0: "20G-20L", 1: "20G-40L", 2: "40G-20L", 3: "40G-40L"})
)
l["dataset"] = filename
results_bayes.append(l)
results_bayes = (
pd.concat(results_bayes)
.pivot(index="condition_name", columns="dataset", values="mean")
.reset_index()
)
results_bayes["estimation"] = "bayes"


Fitting the logistic model:

Show Code
results_logit = []
for filename, ax in zip(["S1ab", "S1a", "S1b"], axes):
f"Buffer/Saved Lambdas/Lambda - All Lotteries - Data_{filename} - Clustered.npy"
)
results_logit.append(lambd)
results_logit = pd.DataFrame(results_logit)
results_logit.columns = ["20G-20L", "20G-40L", "40G-20L", "40G-40L"]
results_logit["dataset"] = ["S1ab", "S1a", "S1b"]
results_logit = (
pd.melt(
results_logit,
id_vars=["dataset"],
value_vars=["20G-20L", "20G-40L", "40G-20L", "40G-40L"],
)
.pivot(index="variable", columns="dataset", values="value")
.reset_index()
)
results_logit.columns = ["condition_name", "S1a", "S1ab", "S1b"]
results_logit["estimation"] = "logit"


Summary information:

Show Code
fits_all["condition_name"] = fits_all.condition.map(
{0: "20G-20L", 1: "20G-40L", 2: "40G-20L", 3: "40G-40L"}
)
results_median = (
fits_all.groupby(["condition_name", "dataset"])[["lambda"]].median().unstack()
)
results_median.columns = [col[1] for col in results_median.columns]
results_median = results_median.reset_index()
results_median["estimation"] = "median"

results_all = (
pd.concat([results_logit, results_bayes, results_median])[
["condition_name", "estimation", "S1ab", "S1a", "S1b"]
]
.set_index(["condition_name", "estimation"])
.reindex(["20G-20L", "20G-40L", "40G-20L", "40G-40L"], level=0)
.reindex(["median", "logit", "bayes"], level=1)
.unstack()
)
results_all.style.format("{:.2f}")

dataset S1ab S1a S1b
estimation median logit bayes median logit bayes median logit bayes
condition_name
20G-20L 1.00 1.04 1.04 1.01 1.06 1.06 1.01 1.03 1.03
20G-40L 0.86 0.71 0.72 0.88 0.73 0.75 0.81 0.70 0.71
40G-20L 1.73 1.84 1.84 1.77 1.94 1.94 1.59 1.77 1.76
40G-40L 1.02 1.07 1.07 1.02 1.08 1.08 1.01 1.07 1.07

## Unsuccessful Re-Analysis of Study 2¶

According to the paper:

In Experiment 2, we used two distributions for gains and losses, one ranging from \$6 to \\$20 (in \$2 increments) and one three times larger, ranging from \\$18 to \$60 (in \\$6 increments). We only tested the asymmetric cases. Unlike in Experiments 1a and 1b, the possible gains and losses were randomly drawn and paired from the distributions to produce 64 pairs.

From this description, one would expect the data to have the following properties:

1. In the "Gains 20-Losses 60" (vs. "Gains 60-Losses 20) condition...
 A. All possible gains (losses) should be between 6 and 20, inclusive
B. All possible gains (losses) should be multiples of 2
C. All possible losses (gains) should be between 18 and 60, inclusive
D. All possible losses (gains) should be multiples of 6

Instead, the data suggests the following:

1. In the "Gains 20-Losses 60" (vs. "Gains 60-Losses 20) condition...
 A. Possible gains (losses) were drawn at random between 0 and 19 (inclusive)
B. Possible losses(gains) were drawn at random between 0 and 59 (inclusive)

This inconsistency does not allow a re-analysis of the data, as the bet structure does not appear to match the description of the paper.

The graph at the bottom presents the number of bets that appear in the data, for each combination of gains and losses:

Show Code
sns.set_context("talk")

grid_kws = {"height_ratios": (0.95, 0.05), "hspace": 0.2}
_, (ax, cbar_ax) = plt.subplots(2, 1, figsize=(10, 11), gridspec_kw=grid_kws)
df_accept = data_s2.groupby(["gain", "loss"]).Ones.count().reset_index()
g = sns.heatmap(
df_accept.pivot("gain", "loss", "Ones").iloc[::-1, :],
annot=False,
cmap="viridis",
fmt=".2f",
ax=ax,
cbar=True,
cbar_ax=cbar_ax,
cbar_kws=dict(orientation="horizontal"),
)
g.set_xlim(-0.5, 60)
g.set_xticks(np.arange(0, 60, 2) + 0.5)
g.set_xticklabels(np.arange(0, 60, 2))
g.set_yticks(np.arange(60, 0, -2) - 0.5)
g.set_yticklabels(np.arange(0, 60, 2))
g.set_xlabel("Gain Amount")
g.set_ylabel("Loss Amount")
g.set_ylim(60, -0.5);


The authors have confirmed that the amounts of gains and losses reported in the paper were inaccurate.

## Re-Analysis of Study 3¶

Show Code
np.random.seed(189123789)  # Seed for reproducibility
sns.set_style("ticks")
sns.set_context("talk")
_, ax = plt.subplots(1, 1, figsize=(6.38, 5))
pal = sns.color_palette()
nboots = 5000
filename = "Data_S3.csv"
title = ""

data = pd.read_csv(f"Original Data/{filename}")
data["response"] = data.response.map(
{
"Strongly Reject": -2,
"Weakly Reject": -1,
"Weakly Accept": 1,
"Strongly Accept": 2,
}
)
data["condition"] = data.condition.map(
{"Gains 20-Losses 40": 0, "Gains 40-Losses 20": 2, "Gains 40-Losses 40": 1}
)
data["pair"] = data.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
data["is_common"] = data.pair.map(dict_common_s3)
n_parts = data["sub"].shape[0] / 256
data["sub_id"] = np.repeat(np.arange(n_parts), 256)
chosen = [False] * (576 - 36) + [True] * 36
data["is_randomly_chosen"] = data.groupby("sub_id").pair.transform(
lambda x: np.random.choice(chosen, 256, replace=False)
)
data_common = data[data.is_common]
data_random = data[data.is_randomly_chosen]

datasets = [data, data_random, data_common]
labels = ["All Lotteries", "Random Subset", "Common Lotteries"]
markers = ["o", "s", "D"]
xpos = [np.arange(0, 3, 1) - 0.1, np.arange(0, 3, 1), np.arange(0, 3, 1) + 0.1]

sns.set_context("talk")
for d, l, m, x, c in zip(datasets, labels, markers, xpos, pal):
try:
f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy"
)
f"Buffer/Saved Lambdas/BootLambdas - {l} - {filename} - Clustered.npy"
)
except FileNotFoundError:
lambd, blambd = bootstrap_lambda_between_s3_clustered(d, nboots)
np.save(
f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy", lambd
)
np.save(
f"Buffer/Saved Lambdas/BootLambdas - {l} - {filename} - Clustered.npy",
blambd,
)
ax.scatter(x, lambd, label=l, marker=m)
llci, ulci = np.abs(lambd - np.percentile(blambd, [2.5, 97.5], axis=0))
ax.errorbar(x, lambd, [llci, ulci], color=c, alpha=1, fmt="none", lw=2)

ax.axhline(1, lw=0.6, color="black")
sns.despine(ax=ax)
ax.set_title(title)
ax.set_xticks([0, 1, 2])
ax.set_ylabel(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=12)
ax.set_xticklabels(["20G-40L", "40G-40L", "40G-20L"])
ax.legend(title="", frameon=False, loc=2)
plt.tight_layout()
plt.savefig("Figures/Figure 5.png", dpi=100);


In table form:

Show Code
lambdas = []
for filename, ax, title in zip(["Data_S3.csv"], axes, ["S3"]):
for d, l, x, c in zip(datasets, labels, xpos, pal):
f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy"
)
lambdas.append(lambd)
df_lambdas = pd.DataFrame(lambdas)
df_lambdas.columns = ["20G - 40L", "40G - 40L", "40G - 20L"]
df_lambdas["Subset"] = np.tile(
["All Lotteries", "Random Subset", "Common Lotteries"], 1
)
df_lambdas.set_index(["Subset"])

20G - 40L 40G - 40L 40G - 20L
Subset
All Lotteries 0.735418 1.244776 1.859524
Random Subset 0.653839 1.154495 2.241193
Common Lotteries 0.887531 1.306122 1.123239

Study 3 also shows limited evidence that decision by sampling is a driver of loss aversion: while their model would have predicted the "40G - 40L" and "40G - 20L" to be different, they seem to be identical when considering the common lotteries.