This notebook contains all the analysis described in André and De Langhe (2020):
"No Evidence for Loss Aversion Disappearance and Reversal in Walasek and Stewart (2015)"
import os
import warnings
from functools import partial, reduce
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
import seaborn as sns
import statsmodels.formula.api as smf
from numpy import dot
from scipy.linalg import LinAlgError, inv
warnings.filterwarnings("ignore")
%load_ext rpy2.ipython
%R suppressMessages(library(tidyverse))
np.random.seed(189123789) # Seed for reproducibility
def split_bets_high(x):
bets = {}
for el in x:
g, l = [int(i) for i in el.split("-")]
if (g > 24) & (l <= 24):
bets[el] = "high-low"
elif (g <= 24) & (l > 24):
bets[el] = "low-high"
elif (g <= 24) & (l <= 24):
bets[el] = "low-low"
elif (g > 24) & (l > 24):
bets[el] = "high-high"
return bets
def split_bets_low(x):
bets = {}
for el in x:
g, l = [int(i) for i in el.split("-")]
if (g > 12) & (l <= 12):
bets[el] = "high-low"
elif (g <= 12) & (l > 12):
bets[el] = "low-high"
elif (g <= 12) & (l <= 12):
bets[el] = "low-low"
elif (g > 12) & (l > 12):
bets[el] = "high-high"
return bets
def ols(X, y):
X = np.array([np.ones(X.shape[0]), X]).T
return np.linalg.inv(X.T @ X) @ X.T @ y
def invlogit(x):
return np.exp(x) / (1 + np.exp(x))
def loglinear(gain, loss):
"""
First decision rule considered in the paper.
Return the log-linear utility for a bet: a (gain, loss) tuple.
"""
eta = np.log(gain) - np.log(loss)
return eta
def ratio(gain, loss):
"""
Second decision rule considered in the paper.
Return the Gain/Loss ratio utility for a bet: a (gain, loss) tuple.
"""
eta = gain / loss - 1
return eta
def threshold(gain, loss):
"""
Third decision rule considered in the paper.
Return discontinuous expected value utility for a bet: a (gain, loss) tuple.
"""
if gain > loss:
return 1.4 + (gain - loss) / 20
elif gain < loss:
return -1.4 + (gain - loss) / 20
else:
return 0
def fast_OLS(endog, exog):
"""
A simple function for (X'X)^(-1)X'Y
:return: The Kx1 array of estimated coefficients.
"""
try:
return dot(dot(inv(dot(exog.T, exog)), exog.T), endog).squeeze()
except LinAlgError:
raise LinAlgError
def logit_cdf(X):
"""
The CDF of the logistic function.
:param X: Value at which to estimate the CDF
:return: The logistic function CDF, evaluated at X
"""
idx = X > 0
out = np.empty(X.size, dtype=float)
out[idx] = 1 / (1 + np.exp(-X[idx]))
exp_X = np.exp(X[~idx])
out[~idx] = exp_X / (1 + exp_X)
return out
def logit_score(endog, exog, params, n_obs):
"""
The score of the logistic function.
:param endog: Nx1 vector of endogenous predictions
:param exog: NxK vector of exogenous predictors
:param params: Kx1 vector of parameters for the predictors
:param n_obs: Number of observations
:return: The score, a Kx1 vector, evaluated at `params'
"""
return dot(endog - logit_cdf(dot(exog, params)), exog) / n_obs
def logit_hessian(exog, params, n_obs):
"""
The hessian of the logistic function.
:param exog: NxK vector of exogenous predictors
:param params: Kx1 vector of parameters for the predictors
:param n_obs: Number of observations
:return: The Hessian, a KxK matrix, evaluated at `params'
"""
L = logit_cdf(np.dot(exog, params))
return -dot(L * (1 - L) * exog.T, exog) / n_obs
def fast_logit(endog, exog, n_obs=0, n_vars=0, max_iter=10000, tolerance=1e-10):
"""
A convenience function for the Newton-Raphson method to evaluate a logistic model.
:param endog: Nx1 vector of endogenous predictions
:param exog: NxK vector of exogenous predictors
:param n_obs: Number of observations N
:param n_vars: Number of exogenous predictors K
:param max_iter: Maximum number of iterations
:param tolerance: Margin of error for convergence
:return: The error-minimizing parameters for the model.
"""
iterations = 0
oldparams = np.inf
newparams = np.repeat(0, n_vars)
while iterations < max_iter and np.any(np.abs(newparams - oldparams) > tolerance):
oldparams = newparams
try:
H = logit_hessian(exog, oldparams, n_obs)
invH = inv(H)
score = logit_score(endog, exog, oldparams, n_obs)
newparams = oldparams - invH @ score
except LinAlgError:
raise LinAlgError
iterations += 1
return newparams
def bootstrap_lambda_within_s1_clustered(df, nboots=5000):
"""
Bootstrap individuals from Experiment 1 of W&S, and estimate the bootstrapped
lambda parameters within each subset of lotteries.
"""
N = df.shape[0]
subids = df.sub_id
unique_subids = subids.unique()
N_subids = unique_subids.shape[0]
blambd = np.empty((nboots, 4))
m = smf.logit(
"response ~ -1 + C(status) + gain:C(status) + loss:C(status)", data=df
)
endog = m.endog.reshape(-1, 1)
exog = m.exog
data = np.hstack([endog, exog])
params = m.fit(disp=False).params.values
g = params[4:8]
l = params[8:]
lambd = -l / g
opt = partial(fast_logit, n_obs=N, n_vars=12)
for i in range(nboots):
chosen_subids = np.random.choice(unique_subids, N_subids, replace=True)
chosen_rows = subids == chosen_subids[0]
bdata = data[chosen_rows]
for s in chosen_subids[1:]:
chosen_rows = subids == s
sdata = data[chosen_rows]
bdata = np.vstack([bdata, sdata])
bparams = opt(bdata[:, 0], bdata[:, 1:])
bg = bparams[4:8]
bl = bparams[8:]
blambd[i] = -bl / bg
return lambd, blambd
def bootstrap_lambda_between_s1_clustered(df, nboots=5000):
"""
Bootstrap individuals from Experiment 1 of W&S, and estimate the bootstrapped
lambda parameters within each condition.
"""
N = df.shape[0]
subids = df.sub_id
unique_subids = subids.unique()
N_subids = unique_subids.shape[0]
blambd = np.empty((nboots, 4))
m = smf.logit(
"response ~ -1 + C(condition) + gain:C(condition) + loss:C(condition)", data=df
)
endog = m.endog.reshape(-1, 1)
exog = m.exog
data = np.hstack([endog, exog])
params = m.fit(disp=False).params.values
g = params[4:8]
l = params[8:]
lambd = -l / g
opt = partial(fast_logit, n_obs=N, n_vars=12)
for i in range(nboots):
chosen_subids = np.random.choice(unique_subids, N_subids, replace=True)
chosen_rows = subids == chosen_subids[0]
bdata = data[chosen_rows]
for s in chosen_subids[1:]:
chosen_rows = subids == s
sdata = data[chosen_rows]
bdata = np.vstack([bdata, sdata])
bparams = opt(bdata[:, 0], bdata[:, 1:])
bg = bparams[4:8]
bl = bparams[8:]
blambd[i] = -bl / bg
return lambd, blambd
def bootstrap_lambda_between_s3_clustered(df, nboots=5000):
"""
Bootstrap individuals from Experiment3 of W&S, and estimate the bootstrapped
lambda parameters within each condition.
"""
subids = df.sub_id
unique_subids = subids.unique()
N_subids = unique_subids.shape[0]
blambd = np.empty((nboots, 3))
m = smf.ols(
"response ~ -1 + C(condition) + gain:C(condition) + loss:C(condition)", data=df
)
endog = m.endog.reshape(-1, 1)
exog = m.exog
data = np.hstack([endog, exog])
params = fast_OLS(data[:, 0], data[:, 1:])
g = params[3:6]
l = params[6:]
lambd = -l / g
for i in range(nboots):
chosen_subids = np.random.choice(unique_subids, N_subids, replace=True)
chosen_rows = subids == chosen_subids[0]
bdata = data[chosen_rows]
for s in chosen_subids[1:]:
chosen_rows = subids == s
sdata = data[chosen_rows]
bdata = np.vstack([bdata, sdata])
bparams = fast_OLS(bdata[:, 0], bdata[:, 1:])
bg = bparams[3:6]
bl = bparams[6:]
blambd[i] = -bl / bg
return lambd, blambd
def build_bayesian_model(data, subset="all"):
"""
Build the Bayesian model used to estimate the pooled parameters.
"""
y, gains, losses, condition = data.loc[
:, ["response", "gain", "loss", "condition"]
].values.T
condition = np.array([int(i) for i in condition])
with pm.Model() as model:
σ_α = pm.HalfCauchy("σ_α", beta=25)
σ_βG = pm.HalfCauchy("σ_βG", beta=25)
σ_βL = pm.HalfCauchy("σ_βL", beta=25)
α = pm.Normal("α", mu=0, sd=σ_α, shape=4)
βG = pm.HalfNormal("βG", sd=σ_βG, shape=4)
βL = pm.HalfNormal("βL", sd=σ_βL, shape=4)
λ = pm.Deterministic("λ", βL / βG)
μ = α[condition] + βG[condition] * gains - βL[condition] * losses
y_1 = pm.Bernoulli("y_1", logit_p=μ, observed=y)
return model
# Reading the bets from S1 of W&S: 64 bets for each condition
bets_s1 = pd.read_csv("Original Data/Bets_S1.csv")
bets_s1["bet"] = bets_s1[["gain", "loss"]].apply(
lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1
)
pairs_by_cond = bets_s1.groupby("condition").apply(lambda x: np.unique(x.bet)).to_dict()
common_pairs = reduce(np.intersect1d, [v for k, v in pairs_by_cond.items()])
unique_pairs = bets_s1.bet.unique()
bets_s1["is_common"] = bets_s1.bet.apply(lambda x: x in common_pairs)
dict_common_s1 = {k: v for k, v in bets_s1[["bet", "is_common"]].values}
bets_dict_high = split_bets_high(pairs_by_cond[3])
bets_dict_low = split_bets_low(pairs_by_cond[0])
# Reading the data for study 1
data_s1 = pd.read_csv(f"Original Data/Data_S1ab.csv")
data_s1["response"] = data_s1["response"].map({"Accept": 1, "Reject": 0})
data_s1["condition"] = data_s1["condition"].map(
{
"Gains 20-Losses 20": 0,
"Gains 20-Losses 40": 1,
"Gains 40-Losses 20": 2,
"Gains 40-Losses 40": 3,
}
)
data_s1["pair"] = data_s1.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
n_parts = data_s1["sub"].shape[0] / 64
data_s1["sub_id"] = np.repeat(np.arange(n_parts), 64)
data_s1["is_common"] = data_s1.pair.apply(lambda x: x in common_pairs)
# Creating subsets of data of study 1 (used in analysis later)
data_s1_high = data_s1[(data_s1.condition == 3)]
data_s1_high["status"] = data_s1_high.pair.map(bets_dict_high)
data_s1_low = data_s1[(data_s1.condition == 0)]
data_s1_low["status"] = data_s1_low.pair.map(bets_dict_low)
# Reading the data for study 1a
data_s1a = pd.read_csv(f"Original Data/Data_S1a.csv")
data_s1a["response"] = data_s1a["response"].map({"Accept": 1, "Reject": 0})
data_s1a["condition"] = data_s1a["condition"].map(
{
"Gains 20-Losses 20": 0,
"Gains 20-Losses 40": 1,
"Gains 40-Losses 20": 2,
"Gains 40-Losses 40": 3,
}
)
data_s1a["pair"] = data_s1a.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
n_parts = data_s1a["sub"].shape[0] / 64
data_s1a["sub_id"] = np.repeat(np.arange(n_parts), 64)
# Reading the data for study 1b
data_s1b = pd.read_csv(f"Original Data/Data_S1b.csv")
data_s1b["response"] = data_s1b["response"].map({"Accept": 1, "Reject": 0})
data_s1b["condition"] = data_s1b["condition"].map(
{
"Gains 20-Losses 20": 0,
"Gains 20-Losses 40": 1,
"Gains 40-Losses 20": 2,
"Gains 40-Losses 40": 3,
}
)
data_s1b["pair"] = data_s1b.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
n_parts = data_s1b["sub"].shape[0] / 64
data_s1b["sub_id"] = np.repeat(np.arange(n_parts), 64)
# Reading the data for study 2
data_s2 = pd.read_csv("Original Data/Data_S2.csv")
data_s2["condition"] = data_s2.condition.map({20.6: 0, 60.2: 1})
data_s2["Ones"] = 1
data_s2["pair"] = data_s2.apply(lambda x: f"{x.gain}_{x.loss}", axis=1)
pairs_by_cond = (
data_s2.groupby("condition").apply(lambda x: np.unique(x.pair)).to_dict()
)
common_pairs = reduce(np.intersect1d, [v for k, v in pairs_by_cond.items()])
unique_pairs = data_s2.pair.unique()
data_s2["is_common"] = data_s2.pair.apply(lambda x: x in common_pairs)
dict_common_s2 = {k: v for k, v in data_s2[["pair", "is_common"]].values}
# Reading the data for study 3
data_s3 = pd.read_csv("Original Data/Data_S3.csv")
data_s3["condition"] = data_s3.condition.map(
{"Gains 20-Losses 40": 0, "Gains 40-Losses 20": 2, "Gains 40-Losses 40": 1}
)
data_s3["response"] = data_s3.response.map(
{
"Strongly Reject": -2,
"Weakly Reject": -1,
"Weakly Accept": 1,
"Strongly Accept": 2,
}
)
data_s3["pair"] = data_s3.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
pairs_by_cond = (
data_s3.groupby("condition").apply(lambda x: np.unique(x.pair)).to_dict()
)
common_pairs = reduce(np.intersect1d, [v for k, v in pairs_by_cond.items()])
unique_pairs = data_s3.pair.unique()
data_s3["is_common"] = data_s3.pair.apply(lambda x: x in common_pairs)
dict_common_s3 = {k: v for k, v in data_s3[["pair", "is_common"]].values}
We estimate the impact of gains and losses on the likelihood to accept the bet, using W&S' R code.
%%R -i data_s1 -o fits_real
# Original code from W&S
# Fit GLM per individual
loss_aversion <- function(data) {
m1 <- glm(response ~ gain + loss, data=data, family=binomial)
data.frame(id=data$sub_id[1],
condition=data$condition[1],
converged=m1$converged,
deviance=deviance(m1),
max.residual=max(abs(residuals(m1))),
intercept=coef(m1)["(Intercept)"],
beta.loss=coef(m1)["loss"],
beta.gain=coef(m1)["gain"])
}
# Original data (S1a + S1b)
fits_real <- by(data=data_s1, data_s1$sub_id, loss_aversion)
fits_real <- do.call(rbind, fits_real)
fits_real <- fits_real[order(fits_real$id),]
fits_real$lambda <- with(fits_real, -beta.loss/beta.gain)
# Remove 5% with highest deviance
deviance.cut <- quantile(fits_real$deviance,probs=c(.95))
fits_real <- subset(fits_real,deviance < deviance.cut)
# Remove negative lambdas
fits_real <- (subset(fits_real,lambda > 0)
%>% mutate(id = as.numeric(factor(id)),
dataset="Original"))
Plotting the results:
fits_real["condition_name"] = fits_real.condition.map(
{0: "20G-20L", 1: "20G-40L", 2: "40G-20L", 3: "40G-40L"}
)
sns.set_context("talk")
g = sns.catplot(
x="condition",
y="lambda",
data=fits_real,
ci=95,
kind="point",
estimator=np.median,
join=False,
dodge=0.1,
legend=False,
aspect=1.5,
height=6.38 * 1.5 / 1.5,
)
pal = sns.color_palette()
g.set_xticklabels(["20G-20L", "20G-40L", "40G-20L", "40G-40L"])
g.set_xlabels("")
g.set_ylabels(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=16)
g.ax.axhline(1, lw=0.6, color="black")
plt.savefig("Figures/Figure 1.png", dpi=100);
We generate simulated choices for 10,000 participants, randomly assigned to each of the four conditions.
np.random.seed(189123789) # Seed for reproducibility
bets_s1["bet_utility_loglin"] = bets_s1[["gain", "loss"]].apply(
lambda x: loglinear(x.gain, x.loss), axis=1
) # Compute the log-linear utility of the bet.
bets_s1["bet_accept_prob_loglin"] = bets_s1["bet_utility_loglin"].apply(
invlogit
) # Compute the probability of accepting the bet.
bets_s1["bet_reject_prob_loglin"] = (
1 - bets_s1["bet_accept_prob_loglin"]
) # Probability of rejecting the bet
bets_s1["bet_utility_ratio"] = bets_s1[["gain", "loss"]].apply(
lambda x: ratio(x.gain, x.loss), axis=1
) # Compute the ratio utility of the bet.
bets_s1["bet_accept_prob_ratio"] = bets_s1["bet_utility_ratio"].apply(
invlogit
) # Compute the probability of accepting the bet.
bets_s1["bet_reject_prob_ratio"] = (
1 - bets_s1["bet_accept_prob_ratio"]
) # Probability of rejecting the bet
bets_s1["bet_utility_threshold"] = bets_s1[["gain", "loss"]].apply(
lambda x: threshold(x.gain, x.loss), axis=1
) # Compute the ratio utility of the bet.
bets_s1["bet_accept_prob_threshold"] = bets_s1["bet_utility_threshold"].apply(
invlogit
) # Compute the probability of accepting the bet.
bets_s1["bet_reject_prob_threshold"] = (
1 - bets_s1["bet_accept_prob_threshold"]
) # Probability of rejecting the bet
bets_s1 = bets_s1.sort_values(["condition", "gain", "loss"])
for funcform in ["ratio", "loglin", "threshold"]:
try:
data_sim = pd.read_csv(
f"Buffer/Simulated Data_S1_{funcform}.csv"
) # Check if data has been simulated already
except FileNotFoundError:
# Create the simulated data
N_BY_COND = 2500 # Number of participants by condition
N_PARTS = N_BY_COND * 4 # Total number of participants
bets = bets_s1[
[
"condition",
"gain",
"loss",
f"bet_accept_prob_{funcform}",
f"bet_reject_prob_{funcform}",
]
] # Select the bets
choices = np.array(
[
np.random.choice(
[0, 1],
p=[
b[f"bet_reject_prob_{funcform}"],
b[f"bet_accept_prob_{funcform}"],
],
size=N_BY_COND,
)
for _, b in bets.iterrows()
]
).T.reshape(
-1, 1
) # Generate simulated choices for all bets (64*4 conditions = 256), corresponding to N_CHOICES per condition.
partids = np.repeat(np.arange(0, N_PARTS), 64).reshape(
-1, 1
) # Generating unique IDs for each participant
bet_attributes = np.tile(
bets[["condition", "gain", "loss"]], (N_BY_COND, 1)
) # Repeating the values of the bets
data = np.concatenate([partids, bet_attributes, choices], axis=1)
df = pd.DataFrame(
data, columns=["sub", "condition", "gain", "loss", "response"]
)
df["condition"] = df["condition"].astype(int)
df.to_csv(f"Buffer/Simulated Data_S1_{funcform}.csv", index=None)
We then estimate the impact of gains and losses on the likelihood to accept the bet, using W&S' R code.
%%R -o fits_sim
# Original code from W&S
#Simulated data
data_sim <- read.csv("Buffer/Simulated Data_S1_ratio.csv",as.is=T)
data_sim$sub_id <- as.factor(data_sim$sub)
fits_sim <- by(data=data_sim, data_sim$sub_id, loss_aversion)
fits_sim <- do.call(rbind, fits_sim)
fits_sim <- fits_sim[order(fits_sim$id),]
fits_sim$lambda <- with(fits_sim, -beta.loss/beta.gain)
# Remove 5% with highest deviance
deviance.cut <- quantile(fits_sim$deviance,probs=c(.95))
fits_sim <- subset(fits_sim,deviance < deviance.cut)
# Remove negative lambdas
fits_sim_ratio <- (subset(fits_sim,lambda > 0)
%>% mutate(id = as.numeric(factor(id)),
dataset="Simulated (Gain-Loss Ratio)"))
#Simulated data
data_sim <- read.csv("Buffer/Simulated Data_S1_loglin.csv",as.is=T)
data_sim$sub_id <- as.factor(data_sim$sub)
fits_sim <- by(data=data_sim, data_sim$sub_id, loss_aversion)
fits_sim <- do.call(rbind, fits_sim)
fits_sim <- fits_sim[order(fits_sim$id),]
fits_sim$lambda <- with(fits_sim, -beta.loss/beta.gain)
# Remove 5% with highest deviance
deviance.cut <- quantile(fits_sim$deviance,probs=c(.95))
fits_sim <- subset(fits_sim,deviance < deviance.cut)
# Remove negative lambdas
fits_sim_loglin <- (subset(fits_sim,lambda > 0)
%>% mutate(id = as.numeric(factor(id)),
dataset="Simulated (Log-Linear)"))
#Simulated data
data_sim <- read.csv("Buffer/Simulated Data_S1_threshold.csv",as.is=T)
data_sim$sub_id <- as.factor(data_sim$sub)
fits_sim <- by(data=data_sim, data_sim$sub_id, loss_aversion)
fits_sim <- do.call(rbind, fits_sim)
fits_sim <- fits_sim[order(fits_sim$id),]
fits_sim$lambda <- with(fits_sim, -beta.loss/beta.gain)
# Remove 5% with highest deviance
deviance.cut <- quantile(fits_sim$deviance,probs=c(.95))
fits_sim <- subset(fits_sim,deviance < deviance.cut)
# Remove negative lambdas
fits_sim_threshold <- (subset(fits_sim,lambda > 0)
%>% mutate(id = as.numeric(factor(id)),
dataset="Simulated (Discontinuous EV)"))
fits_sim <- rbind(fits_sim_threshold, fits_sim_loglin, fits_sim_ratio)
The table below reports the estimates $\lambda$ obtained from the original data and from the simulated data, for each of the three decision rules:
fits_sim["condition_name"] = fits_sim.condition.map(
{0: "20G-20L", 1: "20G-40L", 2: "40G-20L", 3: "40G-40L"}
)
fits_all = pd.concat([fits_real, fits_sim])
medians = fits_all.groupby(["condition_name", "dataset"])[["lambda"]].median().unstack()
medians
In graph form:
orig = medians.loc[:, ("lambda", "Original")].values
simu_threshold = medians.loc[:, ("lambda", "Simulated (Discontinuous EV)")].values
simu_ratio = medians.loc[:, ("lambda", "Simulated (Gain-Loss Ratio)")].values
simu_loglin = medians.loc[:, ("lambda", "Simulated (Log-Linear)")].values
sns.set_context("talk")
g = sns.catplot(
x="condition",
y="lambda",
hue="dataset",
data=fits_all,
kind="point",
estimator=np.median,
join=False,
dodge=0.4,
legend=False,
aspect=1.5,
height=6.38 * 1.5 / 1.5,
hue_order=[
"Original",
"Simulated (Log-Linear)",
"Simulated (Gain-Loss Ratio)",
"Simulated (Discontinuous EV)",
],
markers=["o", "D", "s", "^"],
)
pal = sns.color_palette()
plt.legend(loc="upper left", frameon=False, fontsize=14)
g.set_xticklabels(["20G-20L", "20G-40L", "40G-20L", "40G-40L"])
g.set_xlabels("")
g.set_ylabels(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=16)
g.ax.axhline(1, lw=0.6, color="black")
g.ax.set_ylim(0.4, 2.2)
for i in range(4):
if i == 2:
ycoords = (0.85, 0.75, 0.65, 0.55)
else:
ycoords = (1.55, 1.45, 1.35, 1.25)
g.ax.annotate(
f"$\\lambda_O$ = {orig[i]:.2f}",
(i, ycoords[0]),
va="center",
ha="center",
color=pal[0],
size=14,
)
g.ax.annotate(
"$\\lambda_{LL}$" + f" = {simu_loglin[i]:.2f}",
(i, ycoords[1]),
va="center",
ha="center",
color=pal[1],
size=14,
)
g.ax.annotate(
"$\\lambda_{GLR}$" + f" = {simu_ratio[i]:.2f}",
(i, ycoords[2]),
va="center",
ha="center",
color=pal[2],
size=14,
)
g.ax.annotate(
"$\\lambda_{DEV}$" + f" = {simu_threshold[i]:.2f}",
(i, ycoords[3]),
va="center",
ha="center",
color=pal[3],
size=14,
)
plt.savefig("Figures/Figure 2.png", dpi=400);
np.random.seed(189123789) # Seed for reproducibility
sns.set_context("talk")
_, axes = plt.subplots(2, 1, figsize=(6.38, 6.38))
x = [3, 2, 1, 0]
for d, label, ax, title in zip(
[data_s1_low, data_s1_high], ["Low", "High"], axes, ["20G - 20L", "40G - 40L"]
):
try: # Check if bootstrapped estimates have already been generated
lambd = np.load(
f"Buffer/Saved Lambdas/Lambda - Within {label} - Data_S1ab - Clustered.npy"
)
blambd = np.load(
f"Buffer/Saved Lambdas/BootLambdas - Within {label} - Data_S1ab - Clustered.npy"
)
except FileNotFoundError: # Otherwise generate them (long...)
lambd, blambd = bootstrap_lambda_within_s1_clustered(d, 5000)
np.save(
f"Buffer/Saved Lambdas/Lambda - Within {label} - Data_S1ab - Clustered.npy",
lambd,
)
np.save(
f"Buffer/Saved Lambdas/BootLambdas - Within {label} - Data_S1ab - Clustered.npy",
blambd,
)
ax.scatter(x, lambd)
llci, ulci = np.abs(lambd - np.percentile(blambd, [2.5, 97.5], axis=0))
ax.errorbar(x, lambd, [llci, ulci], alpha=1, fmt="none", lw=2)
ax.axhline(1, color="black", lw=0.6)
ax.set_xticks([0, 1, 2, 3])
yticks = np.round(np.arange(-1, 3.5, 1), 1)
ax.set_yticks(yticks)
ax.set_ylim(-1.5, 3.5)
ax.set_xlim(-0.5, 3.5)
ax.set_yticklabels(yticks)
ax.set_xticklabels([])
sns.despine(ax=ax)
ax.xaxis.grid(False)
ax.set_ylabel(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=16)
ax.set_title(title)
axes[1].set_xticklabels(
["Small G\nSmall L", "Small G\nLarge L", "Large G\nSmall L", "Large G\nLarge L",],
size=14,
)
plt.tight_layout()
plt.savefig("Figures/Figure 3.png", dpi=400, bbox_inches="tight");
In table form:
lambdas = []
for label in ["Low", "High"]:
lambd = np.load(
f"Buffer/Saved Lambdas/Lambda - Within {label} - Data_S1ab - Clustered.npy"
)
blambd = np.load(
f"Buffer/Saved Lambdas/BootLambdas - Within {label} - Data_S1ab - Clustered.npy"
)
lambdas.append(lambd)
df_lambdas = pd.DataFrame(lambdas)
df_lambdas.columns = ["Small G<br>Small L", "Small G<br>Large L", "Large G<br>Small L", "Large G<br>Large L",]
df_lambdas["Condition"] = ["20G-20L", "40G-40L"]
df_lambdas.set_index(["Condition"]).style.format("{:.2f}")
np.random.seed(189123789) # Seed for reproducibility
sns.set_style("ticks")
sns.set_context("talk")
_, axes = plt.subplots(3, 1, figsize=(6.38, 8))
pal = sns.color_palette()
nboots = 5000
for ax, filename, title in zip(
axes,
["Data_S1ab", "Data_S1a", "Data_S1b"],
["Experiments 1a + 1b", "Experiment 1a", "Experiment 1b"],
):
data = pd.read_csv(f"Original Data/{filename}.csv")
data["response"] = data["response"].map({"Accept": 1, "Reject": 0})
data["condition"] = data["condition"].map(
{
"Gains 20-Losses 20": 0,
"Gains 20-Losses 40": 1,
"Gains 40-Losses 20": 2,
"Gains 40-Losses 40": 3,
}
)
data["pair"] = data.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
data["is_common"] = data.pair.map(dict_common_s1)
n_parts = data["sub"].shape[0] / 64
data["sub_id"] = np.repeat(np.arange(n_parts), 64)
chosen = [False] * 55 + [True] * 9
data["is_randomly_chosen"] = data.groupby("sub_id").pair.transform(
lambda x: np.random.choice(chosen, 64, replace=False)
)
data_common = data[data.is_common]
data_random = data[data.is_randomly_chosen]
datasets = [data, data_random, data_common]
labels = ["All Lotteries", "Random Subset", "Common Lotteries"]
markers = ["o", "s", "D"]
xpos = [np.arange(0, 4, 1) - 0.1, np.arange(0, 4, 1), np.arange(0, 4, 1) + 0.1]
for d, l, m, x, c in zip(datasets, labels, markers, xpos, pal):
try: # Check if bootstrapped estimates have already been generated
lambd = np.load(
f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy"
)
blambd = np.load(
f"Buffer/Saved Lambdas/BootLambdas - {l} - {filename} - Clustered.npy"
)
except: # Otherwise generate them (long...)
lambd, blambd = bootstrap_lambda_between_s1_clustered(d, 5000)
np.save(
f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy", lambd
)
np.save(
f"Buffer/Saved Lambdas/BootLambdas - {l} - {filename} - Clustered.npy",
blambd,
)
ax.scatter(x, lambd, label=l, marker=m)
llci, ulci = np.abs(lambd - np.percentile(blambd, [2.5, 97.5], axis=0))
ax.errorbar(x, lambd, [llci, ulci], color=c, alpha=1, fmt="none", lw=2)
ax.axhline(1, color="black", lw=0.6)
ax.set_xticks(np.arange(0, 4, 1))
yticks = np.round(np.arange(0.6, 2.8, 0.4), 1)
ax.set_yticks(yticks)
ax.set_ylim(0.4, 2.6)
ax.set_yticklabels(yticks, size=14)
sns.despine(ax=ax)
ax.xaxis.grid(False)
ax.set_ylabel(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=16)
ax.set_title(title, size=14)
ax.set_xticklabels([])
axes[0].legend(frameon=False, loc=2, fontsize=13, handletextpad=0.05)
axes[2].set_xticklabels(["20G-20L", "20G-40L", "40G-20L", "40G-40L"], size=14)
plt.tight_layout()
plt.savefig("Figures/Figure 4.png", dpi=100, bbox_inches="tight");
In table form:
lambdas = []
for filename, ax, title in zip(
["Data_S1ab", "Data_S1a", "Data_S1b"],
axes,
["Study 1a + Study 1b", "Study 1a", "Study 1b"],
):
for d, l, x, c in zip(datasets, labels, xpos, pal):
lambd = np.load(
f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy"
)
lambdas.append(lambd)
df_lambdas = pd.DataFrame(lambdas)
df_lambdas.columns = ["20G - 20L", "20G - 40L", "40G - 20L", "40G - 40L"]
df_lambdas["Data"] = np.repeat(["S1a + S1b", "S1a", "S1b"], 3)
df_lambdas["Subset"] = np.tile(
["All Lotteries", "Random Subset", "Common Lotteries"], 3
)
df_lambdas.set_index(["Data", "Subset"]).transpose().style.format("{:.2f}")
Fitting the original individual-level model in R:
%%R -i data_s1 -i data_s1a -i data_s1b -o fits_all
# Code from W&S
# Fit GLM per individual
loss_aversion <- function(data) {
m1 <- glm(response ~ gain + loss, data=data, family=binomial)
data.frame(id=data$sub_id[1],
condition=data$condition[1],
converged=m1$converged,
deviance=deviance(m1),
max.residual=max(abs(residuals(m1))),
intercept=coef(m1)["(Intercept)"],
beta.loss=coef(m1)["loss"],
beta.gain=coef(m1)["gain"])
}
# Original data (S1a + S1b)
fits_s1ab <- by(data=data_s1, data_s1$sub_id, loss_aversion)
fits_s1ab <- do.call(rbind, fits_s1ab)
fits_s1ab <- fits_s1ab[order(fits_s1ab$id),]
fits_s1ab$lambda <- with(fits_s1ab, -beta.loss/beta.gain)
# Remove 5% with highest deviance
deviance.cut <- quantile(fits_s1ab$deviance,probs=c(.95))
fits_s1ab <- subset(fits_s1ab, deviance < deviance.cut)
# Remove negative lambdas
fits_s1ab <- (subset(fits_s1ab,lambda > 0)
%>% mutate(id = as.numeric(factor(id)),
dataset="S1ab"))
# Original data S1a
fits_s1a <- by(data=data_s1a, data_s1a$sub_id, loss_aversion)
fits_s1a <- do.call(rbind, fits_s1a)
fits_s1a <- fits_s1ab[order(fits_s1a$id),]
fits_s1a$lambda <- with(fits_s1a, -beta.loss/beta.gain)
# Remove 5% with highest deviance
deviance.cut <- quantile(fits_s1a$deviance,probs=c(.95))
data_s1a <- subset(fits_s1a, deviance < deviance.cut)
# Remove negative lambdas
fits_s1a <- (subset(fits_s1a,lambda > 0)
%>% mutate(id = as.numeric(factor(id)),
dataset="S1a"))
# Original data S1b
fits_s1b <- by(data=data_s1b, data_s1b$sub_id, loss_aversion)
fits_s1b <- do.call(rbind, fits_s1b)
fits_s1b <- fits_s1b[order(fits_s1b$id),]
fits_s1b$lambda <- with(fits_s1b, -beta.loss/beta.gain)
# Remove 5% with highest deviance
deviance.cut <- quantile(fits_s1b$deviance,probs=c(.95))
fits_s1b <- subset(fits_s1b, deviance < deviance.cut)
# Remove negative lambdas
fits_s1b <- (subset(fits_s1b,lambda > 0)
%>% mutate(id = as.numeric(factor(id)),
dataset="S1b"))
fits_all <- bind_rows(fits_s1ab, fits_s1a, fits_s1b)
Fitting the Bayesian model:
np.random.seed(189123789) # Seed for reproducibility
results_bayes = []
for data, filename in zip([data_s1, data_s1a, data_s1b], ["S1ab", "S1a", "S1b"]):
model = build_bayesian_model(data)
path = f"Buffer/Bayesian Traces/Data_{filename}-all/"
with model:
if not os.path.exists(path):
# If the model has not been estimated yet, run it (long...)
trace_simple = pm.sample(1000, tune=1000)
pm.save_trace(trace_simple, directory=path)
tab = pm.load_trace(path)
l = pm.summary(tab, varnames=["λ"]).reset_index()
l["condition_name"] = (
l["index"]
.apply(lambda x: int(x[2]))
.map({0: "20G-20L", 1: "20G-40L", 2: "40G-20L", 3: "40G-40L"})
)
l["dataset"] = filename
results_bayes.append(l)
results_bayes = (
pd.concat(results_bayes)
.pivot(index="condition_name", columns="dataset", values="mean")
.reset_index()
)
results_bayes["estimation"] = "bayes"
Fitting the logistic model:
results_logit = []
for filename, ax in zip(["S1ab", "S1a", "S1b"], axes):
lambd = np.load(
f"Buffer/Saved Lambdas/Lambda - All Lotteries - Data_{filename} - Clustered.npy"
)
results_logit.append(lambd)
results_logit = pd.DataFrame(results_logit)
results_logit.columns = ["20G-20L", "20G-40L", "40G-20L", "40G-40L"]
results_logit["dataset"] = ["S1ab", "S1a", "S1b"]
results_logit = (
pd.melt(
results_logit,
id_vars=["dataset"],
value_vars=["20G-20L", "20G-40L", "40G-20L", "40G-40L"],
)
.pivot(index="variable", columns="dataset", values="value")
.reset_index()
)
results_logit.columns = ["condition_name", "S1a", "S1ab", "S1b"]
results_logit["estimation"] = "logit"
Summary information:
fits_all["condition_name"] = fits_all.condition.map(
{0: "20G-20L", 1: "20G-40L", 2: "40G-20L", 3: "40G-40L"}
)
results_median = (
fits_all.groupby(["condition_name", "dataset"])[["lambda"]].median().unstack()
)
results_median.columns = [col[1] for col in results_median.columns]
results_median = results_median.reset_index()
results_median["estimation"] = "median"
results_all = (
pd.concat([results_logit, results_bayes, results_median])[
["condition_name", "estimation", "S1ab", "S1a", "S1b"]
]
.set_index(["condition_name", "estimation"])
.reindex(["20G-20L", "20G-40L", "40G-20L", "40G-40L"], level=0)
.reindex(["median", "logit", "bayes"], level=1)
.unstack()
)
results_all.style.format("{:.2f}")
According to the paper:
In Experiment 2, we used two distributions for gains and losses, one ranging from \$6 to \\$20 (in \$2 increments) and one three times larger, ranging from \\$18 to \$60 (in \\$6 increments). We only tested the asymmetric cases. Unlike in Experiments 1a and 1b, the possible gains and losses were randomly drawn and paired from the distributions to produce 64 pairs.
From this description, one would expect the data to have the following properties:
A. All possible gains (losses) should be between 6 and 20, inclusive
B. All possible gains (losses) should be multiples of 2
C. All possible losses (gains) should be between 18 and 60, inclusive
D. All possible losses (gains) should be multiples of 6
Instead, the data suggests the following:
A. Possible gains (losses) were drawn at random between 0 and 19 (inclusive)
B. Possible losses(gains) were drawn at random between 0 and 59 (inclusive)
This inconsistency does not allow a re-analysis of the data, as the bet structure does not appear to match the description of the paper.
The graph at the bottom presents the number of bets that appear in the data, for each combination of gains and losses:
sns.set_context("talk")
grid_kws = {"height_ratios": (0.95, 0.05), "hspace": 0.2}
_, (ax, cbar_ax) = plt.subplots(2, 1, figsize=(10, 11), gridspec_kw=grid_kws)
df_accept = data_s2.groupby(["gain", "loss"]).Ones.count().reset_index()
g = sns.heatmap(
df_accept.pivot("gain", "loss", "Ones").iloc[::-1, :],
annot=False,
cmap="viridis",
fmt=".2f",
ax=ax,
cbar=True,
cbar_ax=cbar_ax,
cbar_kws=dict(orientation="horizontal"),
)
g.set_xlim(-0.5, 60)
g.set_xticks(np.arange(0, 60, 2) + 0.5)
g.set_xticklabels(np.arange(0, 60, 2))
g.set_yticks(np.arange(60, 0, -2) - 0.5)
g.set_yticklabels(np.arange(0, 60, 2))
g.set_xlabel("Gain Amount")
g.set_ylabel("Loss Amount")
g.set_ylim(60, -0.5);
The authors have confirmed that the amounts of gains and losses reported in the paper were inaccurate.
np.random.seed(189123789) # Seed for reproducibility
sns.set_style("ticks")
sns.set_context("talk")
_, ax = plt.subplots(1, 1, figsize=(6.38, 5))
pal = sns.color_palette()
nboots = 5000
filename = "Data_S3.csv"
title = ""
data = pd.read_csv(f"Original Data/{filename}")
data["response"] = data.response.map(
{
"Strongly Reject": -2,
"Weakly Reject": -1,
"Weakly Accept": 1,
"Strongly Accept": 2,
}
)
data["condition"] = data.condition.map(
{"Gains 20-Losses 40": 0, "Gains 40-Losses 20": 2, "Gains 40-Losses 40": 1}
)
data["pair"] = data.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
data["is_common"] = data.pair.map(dict_common_s3)
n_parts = data["sub"].shape[0] / 256
data["sub_id"] = np.repeat(np.arange(n_parts), 256)
chosen = [False] * (576 - 36) + [True] * 36
data["is_randomly_chosen"] = data.groupby("sub_id").pair.transform(
lambda x: np.random.choice(chosen, 256, replace=False)
)
data_common = data[data.is_common]
data_random = data[data.is_randomly_chosen]
datasets = [data, data_random, data_common]
labels = ["All Lotteries", "Random Subset", "Common Lotteries"]
markers = ["o", "s", "D"]
xpos = [np.arange(0, 3, 1) - 0.1, np.arange(0, 3, 1), np.arange(0, 3, 1) + 0.1]
sns.set_context("talk")
for d, l, m, x, c in zip(datasets, labels, markers, xpos, pal):
try:
lambd = np.load(
f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy"
)
blambd = np.load(
f"Buffer/Saved Lambdas/BootLambdas - {l} - {filename} - Clustered.npy"
)
except FileNotFoundError:
lambd, blambd = bootstrap_lambda_between_s3_clustered(d, nboots)
np.save(
f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy", lambd
)
np.save(
f"Buffer/Saved Lambdas/BootLambdas - {l} - {filename} - Clustered.npy",
blambd,
)
ax.scatter(x, lambd, label=l, marker=m)
llci, ulci = np.abs(lambd - np.percentile(blambd, [2.5, 97.5], axis=0))
ax.errorbar(x, lambd, [llci, ulci], color=c, alpha=1, fmt="none", lw=2)
ax.axhline(1, lw=0.6, color="black")
sns.despine(ax=ax)
ax.set_title(title)
ax.set_xticks([0, 1, 2])
ax.set_ylabel(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=12)
ax.set_xticklabels(["20G-40L", "40G-40L", "40G-20L"])
ax.legend(title="", frameon=False, loc=2)
plt.tight_layout()
plt.savefig("Figures/Figure 5.png", dpi=100);
In table form:
lambdas = []
for filename, ax, title in zip(["Data_S3.csv"], axes, ["S3"]):
for d, l, x, c in zip(datasets, labels, xpos, pal):
lambd = np.load(
f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy"
)
lambdas.append(lambd)
df_lambdas = pd.DataFrame(lambdas)
df_lambdas.columns = ["20G - 40L", "40G - 40L", "40G - 20L"]
df_lambdas["Subset"] = np.tile(
["All Lotteries", "Random Subset", "Common Lotteries"], 1
)
df_lambdas.set_index(["Subset"])
Study 3 also shows limited evidence that decision by sampling is a driver of loss aversion: while their model would have predicted the "40G - 40L" and "40G - 20L" to be different, they seem to be identical when considering the common lotteries.