This notebook contains all the analysis described in André and De Langhe (2020):

"No Evidence for Loss Aversion Disappearance and Reversal in Walasek and Stewart (2015)"

Preamble

Libraries

Show Code
import os
import warnings
from functools import partial, reduce

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
import seaborn as sns
import statsmodels.formula.api as smf
from numpy import dot
from scipy.linalg import LinAlgError, inv

warnings.filterwarnings("ignore")

%load_ext rpy2.ipython
%R suppressMessages(library(tidyverse))
np.random.seed(189123789)  # Seed for reproducibility

Functions

Show Code
def split_bets_high(x):
    bets = {}
    for el in x:
        g, l = [int(i) for i in el.split("-")]
        if (g > 24) & (l <= 24):
            bets[el] = "high-low"
        elif (g <= 24) & (l > 24):
            bets[el] = "low-high"
        elif (g <= 24) & (l <= 24):
            bets[el] = "low-low"
        elif (g > 24) & (l > 24):
            bets[el] = "high-high"
    return bets


def split_bets_low(x):
    bets = {}
    for el in x:
        g, l = [int(i) for i in el.split("-")]
        if (g > 12) & (l <= 12):
            bets[el] = "high-low"
        elif (g <= 12) & (l > 12):
            bets[el] = "low-high"
        elif (g <= 12) & (l <= 12):
            bets[el] = "low-low"
        elif (g > 12) & (l > 12):
            bets[el] = "high-high"
    return bets


def ols(X, y):
    X = np.array([np.ones(X.shape[0]), X]).T
    return np.linalg.inv(X.T @ X) @ X.T @ y


def invlogit(x):
    return np.exp(x) / (1 + np.exp(x))


def loglinear(gain, loss):
    """
    First decision rule considered in the paper.
    Return the log-linear utility for a bet: a (gain, loss) tuple.
    """
    eta = np.log(gain) - np.log(loss)
    return eta


def ratio(gain, loss):
    """
    Second decision rule considered in the paper.
    Return the Gain/Loss ratio utility for a bet: a (gain, loss) tuple.
    """
    eta = gain / loss - 1
    return eta


def threshold(gain, loss):
    """
    Third decision rule considered in the paper.
    Return discontinuous expected value utility for a bet: a (gain, loss) tuple.
    """
    if gain > loss:
        return 1.4 + (gain - loss) / 20
    elif gain < loss:
        return -1.4 + (gain - loss) / 20
    else:
        return 0


def fast_OLS(endog, exog):
    """
    A simple function for (X'X)^(-1)X'Y
    :return: The Kx1 array of estimated coefficients.
    """
    try:
        return dot(dot(inv(dot(exog.T, exog)), exog.T), endog).squeeze()
    except LinAlgError:
        raise LinAlgError


def logit_cdf(X):
    """
    The CDF of the logistic function.
    :param X: Value at which to estimate the CDF
    :return: The logistic function CDF, evaluated at X
    """
    idx = X > 0
    out = np.empty(X.size, dtype=float)
    out[idx] = 1 / (1 + np.exp(-X[idx]))
    exp_X = np.exp(X[~idx])
    out[~idx] = exp_X / (1 + exp_X)
    return out


def logit_score(endog, exog, params, n_obs):
    """
    The score of the logistic function.
    :param endog: Nx1 vector of endogenous predictions
    :param exog: NxK vector of exogenous predictors
    :param params: Kx1 vector of parameters for the predictors
    :param n_obs: Number of observations
    :return: The score, a Kx1 vector, evaluated at `params'
    """
    return dot(endog - logit_cdf(dot(exog, params)), exog) / n_obs


def logit_hessian(exog, params, n_obs):
    """
    The hessian of the logistic function.
    :param exog: NxK vector of exogenous predictors
    :param params: Kx1 vector of parameters for the predictors
    :param n_obs: Number of observations
    :return: The Hessian, a KxK matrix, evaluated at `params'
    """
    L = logit_cdf(np.dot(exog, params))
    return -dot(L * (1 - L) * exog.T, exog) / n_obs


def fast_logit(endog, exog, n_obs=0, n_vars=0, max_iter=10000, tolerance=1e-10):
    """
    A convenience function for the Newton-Raphson method to evaluate a logistic model.
    :param endog: Nx1 vector of endogenous predictions
    :param exog: NxK vector of exogenous predictors
    :param n_obs: Number of observations N
    :param n_vars: Number of exogenous predictors K
    :param max_iter: Maximum number of iterations
    :param tolerance: Margin of error for convergence
    :return: The error-minimizing parameters for the model.
    """
    iterations = 0
    oldparams = np.inf
    newparams = np.repeat(0, n_vars)
    while iterations < max_iter and np.any(np.abs(newparams - oldparams) > tolerance):
        oldparams = newparams
        try:
            H = logit_hessian(exog, oldparams, n_obs)
            invH = inv(H)
            score = logit_score(endog, exog, oldparams, n_obs)
            newparams = oldparams - invH @ score
        except LinAlgError:
            raise LinAlgError
        iterations += 1
    return newparams


def bootstrap_lambda_within_s1_clustered(df, nboots=5000):
    """
    Bootstrap individuals from Experiment 1 of W&S, and estimate the bootstrapped
    lambda parameters within each subset of lotteries.
    """
    N = df.shape[0]
    subids = df.sub_id
    unique_subids = subids.unique()
    N_subids = unique_subids.shape[0]
    blambd = np.empty((nboots, 4))
    m = smf.logit(
        "response ~ -1 + C(status) + gain:C(status) + loss:C(status)", data=df
    )
    endog = m.endog.reshape(-1, 1)
    exog = m.exog
    data = np.hstack([endog, exog])
    params = m.fit(disp=False).params.values
    g = params[4:8]
    l = params[8:]
    lambd = -l / g
    opt = partial(fast_logit, n_obs=N, n_vars=12)
    for i in range(nboots):
        chosen_subids = np.random.choice(unique_subids, N_subids, replace=True)
        chosen_rows = subids == chosen_subids[0]
        bdata = data[chosen_rows]
        for s in chosen_subids[1:]:
            chosen_rows = subids == s
            sdata = data[chosen_rows]
            bdata = np.vstack([bdata, sdata])
        bparams = opt(bdata[:, 0], bdata[:, 1:])
        bg = bparams[4:8]
        bl = bparams[8:]
        blambd[i] = -bl / bg
    return lambd, blambd


def bootstrap_lambda_between_s1_clustered(df, nboots=5000):
    """
    Bootstrap individuals from Experiment 1 of W&S, and estimate the bootstrapped
    lambda parameters within each condition.
    """
    N = df.shape[0]
    subids = df.sub_id
    unique_subids = subids.unique()
    N_subids = unique_subids.shape[0]
    blambd = np.empty((nboots, 4))
    m = smf.logit(
        "response ~ -1 + C(condition) + gain:C(condition) + loss:C(condition)", data=df
    )
    endog = m.endog.reshape(-1, 1)
    exog = m.exog
    data = np.hstack([endog, exog])
    params = m.fit(disp=False).params.values
    g = params[4:8]
    l = params[8:]
    lambd = -l / g
    opt = partial(fast_logit, n_obs=N, n_vars=12)
    for i in range(nboots):
        chosen_subids = np.random.choice(unique_subids, N_subids, replace=True)
        chosen_rows = subids == chosen_subids[0]
        bdata = data[chosen_rows]
        for s in chosen_subids[1:]:
            chosen_rows = subids == s
            sdata = data[chosen_rows]
            bdata = np.vstack([bdata, sdata])
        bparams = opt(bdata[:, 0], bdata[:, 1:])
        bg = bparams[4:8]
        bl = bparams[8:]
        blambd[i] = -bl / bg
    return lambd, blambd


def bootstrap_lambda_between_s3_clustered(df, nboots=5000):
    """
    Bootstrap individuals from Experiment3 of W&S, and estimate the bootstrapped
    lambda parameters within each condition.
    """
    subids = df.sub_id
    unique_subids = subids.unique()
    N_subids = unique_subids.shape[0]
    blambd = np.empty((nboots, 3))
    m = smf.ols(
        "response ~ -1 + C(condition) + gain:C(condition) + loss:C(condition)", data=df
    )
    endog = m.endog.reshape(-1, 1)
    exog = m.exog
    data = np.hstack([endog, exog])
    params = fast_OLS(data[:, 0], data[:, 1:])
    g = params[3:6]
    l = params[6:]
    lambd = -l / g
    for i in range(nboots):
        chosen_subids = np.random.choice(unique_subids, N_subids, replace=True)
        chosen_rows = subids == chosen_subids[0]
        bdata = data[chosen_rows]
        for s in chosen_subids[1:]:
            chosen_rows = subids == s
            sdata = data[chosen_rows]
            bdata = np.vstack([bdata, sdata])
        bparams = fast_OLS(bdata[:, 0], bdata[:, 1:])
        bg = bparams[3:6]
        bl = bparams[6:]
        blambd[i] = -bl / bg
    return lambd, blambd


def build_bayesian_model(data, subset="all"):
    """
    Build the Bayesian model used to estimate the pooled parameters.
    """
    y, gains, losses, condition = data.loc[
        :, ["response", "gain", "loss", "condition"]
    ].values.T
    condition = np.array([int(i) for i in condition])
    with pm.Model() as model:
        σ_α = pm.HalfCauchy("σ_α", beta=25)
        σ_βG = pm.HalfCauchy("σ_βG", beta=25)
        σ_βL = pm.HalfCauchy("σ_βL", beta=25)

        α = pm.Normal("α", mu=0, sd=σ_α, shape=4)
        βG = pm.HalfNormal("βG", sd=σ_βG, shape=4)
        βL = pm.HalfNormal("βL", sd=σ_βL, shape=4)

        λ = pm.Deterministic("λ", βL / βG)

        μ = α[condition] + βG[condition] * gains - βL[condition] * losses
        y_1 = pm.Bernoulli("y_1", logit_p=μ, observed=y)
    return model

Data Processing

Show Code
# Reading the bets from S1 of W&S: 64 bets for each condition
bets_s1 = pd.read_csv("Original Data/Bets_S1.csv")
bets_s1["bet"] = bets_s1[["gain", "loss"]].apply(
    lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1
)
pairs_by_cond = bets_s1.groupby("condition").apply(lambda x: np.unique(x.bet)).to_dict()
common_pairs = reduce(np.intersect1d, [v for k, v in pairs_by_cond.items()])
unique_pairs = bets_s1.bet.unique()

bets_s1["is_common"] = bets_s1.bet.apply(lambda x: x in common_pairs)
dict_common_s1 = {k: v for k, v in bets_s1[["bet", "is_common"]].values}

bets_dict_high = split_bets_high(pairs_by_cond[3])
bets_dict_low = split_bets_low(pairs_by_cond[0])

# Reading the data for study 1
data_s1 = pd.read_csv(f"Original Data/Data_S1ab.csv")
data_s1["response"] = data_s1["response"].map({"Accept": 1, "Reject": 0})
data_s1["condition"] = data_s1["condition"].map(
    {
        "Gains 20-Losses 20": 0,
        "Gains 20-Losses 40": 1,
        "Gains 40-Losses 20": 2,
        "Gains 40-Losses 40": 3,
    }
)
data_s1["pair"] = data_s1.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
n_parts = data_s1["sub"].shape[0] / 64
data_s1["sub_id"] = np.repeat(np.arange(n_parts), 64)
data_s1["is_common"] = data_s1.pair.apply(lambda x: x in common_pairs)


# Creating subsets of data of study 1 (used in analysis later)
data_s1_high = data_s1[(data_s1.condition == 3)]
data_s1_high["status"] = data_s1_high.pair.map(bets_dict_high)

data_s1_low = data_s1[(data_s1.condition == 0)]
data_s1_low["status"] = data_s1_low.pair.map(bets_dict_low)


# Reading the data for study 1a
data_s1a = pd.read_csv(f"Original Data/Data_S1a.csv")
data_s1a["response"] = data_s1a["response"].map({"Accept": 1, "Reject": 0})
data_s1a["condition"] = data_s1a["condition"].map(
    {
        "Gains 20-Losses 20": 0,
        "Gains 20-Losses 40": 1,
        "Gains 40-Losses 20": 2,
        "Gains 40-Losses 40": 3,
    }
)
data_s1a["pair"] = data_s1a.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
n_parts = data_s1a["sub"].shape[0] / 64
data_s1a["sub_id"] = np.repeat(np.arange(n_parts), 64)

# Reading the data for study 1b
data_s1b = pd.read_csv(f"Original Data/Data_S1b.csv")
data_s1b["response"] = data_s1b["response"].map({"Accept": 1, "Reject": 0})
data_s1b["condition"] = data_s1b["condition"].map(
    {
        "Gains 20-Losses 20": 0,
        "Gains 20-Losses 40": 1,
        "Gains 40-Losses 20": 2,
        "Gains 40-Losses 40": 3,
    }
)
data_s1b["pair"] = data_s1b.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
n_parts = data_s1b["sub"].shape[0] / 64
data_s1b["sub_id"] = np.repeat(np.arange(n_parts), 64)

# Reading the data for study 2
data_s2 = pd.read_csv("Original Data/Data_S2.csv")
data_s2["condition"] = data_s2.condition.map({20.6: 0, 60.2: 1})
data_s2["Ones"] = 1

data_s2["pair"] = data_s2.apply(lambda x: f"{x.gain}_{x.loss}", axis=1)
pairs_by_cond = (
    data_s2.groupby("condition").apply(lambda x: np.unique(x.pair)).to_dict()
)
common_pairs = reduce(np.intersect1d, [v for k, v in pairs_by_cond.items()])
unique_pairs = data_s2.pair.unique()
data_s2["is_common"] = data_s2.pair.apply(lambda x: x in common_pairs)
dict_common_s2 = {k: v for k, v in data_s2[["pair", "is_common"]].values}

# Reading the data for study 3
data_s3 = pd.read_csv("Original Data/Data_S3.csv")
data_s3["condition"] = data_s3.condition.map(
    {"Gains 20-Losses 40": 0, "Gains 40-Losses 20": 2, "Gains 40-Losses 40": 1}
)
data_s3["response"] = data_s3.response.map(
    {
        "Strongly Reject": -2,
        "Weakly Reject": -1,
        "Weakly Accept": 1,
        "Strongly Accept": 2,
    }
)

data_s3["pair"] = data_s3.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
pairs_by_cond = (
    data_s3.groupby("condition").apply(lambda x: np.unique(x.pair)).to_dict()
)
common_pairs = reduce(np.intersect1d, [v for k, v in pairs_by_cond.items()])
unique_pairs = data_s3.pair.unique()
data_s3["is_common"] = data_s3.pair.apply(lambda x: x in common_pairs)
dict_common_s3 = {k: v for k, v in data_s3[["pair", "is_common"]].values}

Experimental Design and Key Result

We estimate the impact of gains and losses on the likelihood to accept the bet, using W&S' R code.

Show Code
%%R -i data_s1 -o fits_real

# Original code from W&S

# Fit GLM per individual
loss_aversion <- function(data) {
    m1 <- glm(response ~ gain + loss, data=data, family=binomial)
    data.frame(id=data$sub_id[1],
               condition=data$condition[1], 
               converged=m1$converged, 
               deviance=deviance(m1), 
               max.residual=max(abs(residuals(m1))), 
               intercept=coef(m1)["(Intercept)"], 
               beta.loss=coef(m1)["loss"], 
               beta.gain=coef(m1)["gain"])
}

# Original data (S1a + S1b)
fits_real <- by(data=data_s1, data_s1$sub_id, loss_aversion)
fits_real <- do.call(rbind, fits_real)
fits_real <- fits_real[order(fits_real$id),]
fits_real$lambda <- with(fits_real, -beta.loss/beta.gain)

# Remove 5% with highest deviance
deviance.cut <- quantile(fits_real$deviance,probs=c(.95))
fits_real <- subset(fits_real,deviance < deviance.cut)

# Remove negative lambdas 
fits_real <- (subset(fits_real,lambda > 0) 
              %>% mutate(id = as.numeric(factor(id)), 
                        dataset="Original"))

Plotting the results:

Show Code
fits_real["condition_name"] = fits_real.condition.map(
    {0: "20G-20L", 1: "20G-40L", 2: "40G-20L", 3: "40G-40L"}
)
sns.set_context("talk")
g = sns.catplot(
    x="condition",
    y="lambda",
    data=fits_real,
    ci=95,
    kind="point",
    estimator=np.median,
    join=False,
    dodge=0.1,
    legend=False,
    aspect=1.5,
    height=6.38 * 1.5 / 1.5,
)
pal = sns.color_palette()
g.set_xticklabels(["20G-20L", "20G-40L", "40G-20L", "40G-40L"])
g.set_xlabels("")
g.set_ylabels(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=16)
g.ax.axhline(1, lw=0.6, color="black")
plt.savefig("Figures/Figure 1.png", dpi=100);

$\lambda$ Will Differ Without Decision by Sampling

Evidence Through Simulation

We generate simulated choices for 10,000 participants, randomly assigned to each of the four conditions.

Show Code
np.random.seed(189123789)  # Seed for reproducibility
bets_s1["bet_utility_loglin"] = bets_s1[["gain", "loss"]].apply(
    lambda x: loglinear(x.gain, x.loss), axis=1
)  # Compute the log-linear utility of the bet.
bets_s1["bet_accept_prob_loglin"] = bets_s1["bet_utility_loglin"].apply(
    invlogit
)  # Compute the probability of accepting the bet.
bets_s1["bet_reject_prob_loglin"] = (
    1 - bets_s1["bet_accept_prob_loglin"]
)  # Probability of rejecting the bet

bets_s1["bet_utility_ratio"] = bets_s1[["gain", "loss"]].apply(
    lambda x: ratio(x.gain, x.loss), axis=1
)  # Compute the ratio utility of the bet.
bets_s1["bet_accept_prob_ratio"] = bets_s1["bet_utility_ratio"].apply(
    invlogit
)  # Compute the probability of accepting the bet.
bets_s1["bet_reject_prob_ratio"] = (
    1 - bets_s1["bet_accept_prob_ratio"]
)  # Probability of rejecting the bet

bets_s1["bet_utility_threshold"] = bets_s1[["gain", "loss"]].apply(
    lambda x: threshold(x.gain, x.loss), axis=1
)  # Compute the ratio utility of the bet.
bets_s1["bet_accept_prob_threshold"] = bets_s1["bet_utility_threshold"].apply(
    invlogit
)  # Compute the probability of accepting the bet.
bets_s1["bet_reject_prob_threshold"] = (
    1 - bets_s1["bet_accept_prob_threshold"]
)  # Probability of rejecting the bet

bets_s1 = bets_s1.sort_values(["condition", "gain", "loss"])

for funcform in ["ratio", "loglin", "threshold"]:
    try:
        data_sim = pd.read_csv(
            f"Buffer/Simulated Data_S1_{funcform}.csv"
        )  # Check if data has been simulated already
    except FileNotFoundError:
        # Create the simulated data
        N_BY_COND = 2500  # Number of participants by condition
        N_PARTS = N_BY_COND * 4  # Total number of participants
        bets = bets_s1[
            [
                "condition",
                "gain",
                "loss",
                f"bet_accept_prob_{funcform}",
                f"bet_reject_prob_{funcform}",
            ]
        ]  # Select the bets
        choices = np.array(
            [
                np.random.choice(
                    [0, 1],
                    p=[
                        b[f"bet_reject_prob_{funcform}"],
                        b[f"bet_accept_prob_{funcform}"],
                    ],
                    size=N_BY_COND,
                )
                for _, b in bets.iterrows()
            ]
        ).T.reshape(
            -1, 1
        )  # Generate simulated choices for all bets (64*4 conditions = 256), corresponding to N_CHOICES per condition.
        partids = np.repeat(np.arange(0, N_PARTS), 64).reshape(
            -1, 1
        )  # Generating unique IDs for each participant
        bet_attributes = np.tile(
            bets[["condition", "gain", "loss"]], (N_BY_COND, 1)
        )  # Repeating the values of the bets

        data = np.concatenate([partids, bet_attributes, choices], axis=1)
        df = pd.DataFrame(
            data, columns=["sub", "condition", "gain", "loss", "response"]
        )
        df["condition"] = df["condition"].astype(int)
        df.to_csv(f"Buffer/Simulated Data_S1_{funcform}.csv", index=None)

We then estimate the impact of gains and losses on the likelihood to accept the bet, using W&S' R code.

Show Code
%%R -o fits_sim
# Original code from W&S

#Simulated data
data_sim <- read.csv("Buffer/Simulated Data_S1_ratio.csv",as.is=T)
data_sim$sub_id <- as.factor(data_sim$sub)

fits_sim <- by(data=data_sim, data_sim$sub_id, loss_aversion)
fits_sim <- do.call(rbind, fits_sim)
fits_sim <- fits_sim[order(fits_sim$id),]
fits_sim$lambda <- with(fits_sim, -beta.loss/beta.gain)

# Remove 5% with highest deviance
deviance.cut <- quantile(fits_sim$deviance,probs=c(.95))
fits_sim <- subset(fits_sim,deviance < deviance.cut)

# Remove negative lambdas 
fits_sim_ratio <- (subset(fits_sim,lambda > 0) 
              %>% mutate(id = as.numeric(factor(id)),
                        dataset="Simulated (Gain-Loss Ratio)"))

#Simulated data
data_sim <- read.csv("Buffer/Simulated Data_S1_loglin.csv",as.is=T)
data_sim$sub_id <- as.factor(data_sim$sub)

fits_sim <- by(data=data_sim, data_sim$sub_id, loss_aversion)
fits_sim <- do.call(rbind, fits_sim)
fits_sim <- fits_sim[order(fits_sim$id),]
fits_sim$lambda <- with(fits_sim, -beta.loss/beta.gain)

# Remove 5% with highest deviance
deviance.cut <- quantile(fits_sim$deviance,probs=c(.95))
fits_sim <- subset(fits_sim,deviance < deviance.cut)

# Remove negative lambdas 
fits_sim_loglin <- (subset(fits_sim,lambda > 0) 
              %>% mutate(id = as.numeric(factor(id)),
                        dataset="Simulated (Log-Linear)"))

#Simulated data
data_sim <- read.csv("Buffer/Simulated Data_S1_threshold.csv",as.is=T)
data_sim$sub_id <- as.factor(data_sim$sub)

fits_sim <- by(data=data_sim, data_sim$sub_id, loss_aversion)
fits_sim <- do.call(rbind, fits_sim)
fits_sim <- fits_sim[order(fits_sim$id),]
fits_sim$lambda <- with(fits_sim, -beta.loss/beta.gain)

# Remove 5% with highest deviance
deviance.cut <- quantile(fits_sim$deviance,probs=c(.95))
fits_sim <- subset(fits_sim,deviance < deviance.cut)

# Remove negative lambdas 
fits_sim_threshold <- (subset(fits_sim,lambda > 0) 
              %>% mutate(id = as.numeric(factor(id)),
                        dataset="Simulated (Discontinuous EV)"))

fits_sim <- rbind(fits_sim_threshold, fits_sim_loglin, fits_sim_ratio)

The table below reports the estimates $\lambda$ obtained from the original data and from the simulated data, for each of the three decision rules:

Show Code
fits_sim["condition_name"] = fits_sim.condition.map(
    {0: "20G-20L", 1: "20G-40L", 2: "40G-20L", 3: "40G-40L"}
)
fits_all = pd.concat([fits_real, fits_sim])
medians = fits_all.groupby(["condition_name", "dataset"])[["lambda"]].median().unstack()
medians
lambda
dataset Original Simulated (Discontinuous EV) Simulated (Gain-Loss Ratio) Simulated (Log-Linear)
condition_name
20G-20L 1.000000 1.000000 1.059934 1.00000
20G-40L 0.861915 0.788079 0.500000 0.52167
40G-20L 1.730755 1.237665 2.000000 2.00000
40G-40L 1.017761 1.000000 1.083097 1.00000

In graph form:

Show Code
orig = medians.loc[:, ("lambda", "Original")].values
simu_threshold = medians.loc[:, ("lambda", "Simulated (Discontinuous EV)")].values
simu_ratio = medians.loc[:, ("lambda", "Simulated (Gain-Loss Ratio)")].values
simu_loglin = medians.loc[:, ("lambda", "Simulated (Log-Linear)")].values
sns.set_context("talk")
g = sns.catplot(
    x="condition",
    y="lambda",
    hue="dataset",
    data=fits_all,
    kind="point",
    estimator=np.median,
    join=False,
    dodge=0.4,
    legend=False,
    aspect=1.5,
    height=6.38 * 1.5 / 1.5,
    hue_order=[
        "Original",
        "Simulated (Log-Linear)",
        "Simulated (Gain-Loss Ratio)",
        "Simulated (Discontinuous EV)",
    ],
    markers=["o", "D", "s", "^"],
)
pal = sns.color_palette()
plt.legend(loc="upper left", frameon=False, fontsize=14)
g.set_xticklabels(["20G-20L", "20G-40L", "40G-20L", "40G-40L"])
g.set_xlabels("")
g.set_ylabels(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=16)
g.ax.axhline(1, lw=0.6, color="black")
g.ax.set_ylim(0.4, 2.2)
for i in range(4):
    if i == 2:
        ycoords = (0.85, 0.75, 0.65, 0.55)
    else:
        ycoords = (1.55, 1.45, 1.35, 1.25)
    g.ax.annotate(
        f"$\\lambda_O$ = {orig[i]:.2f}",
        (i, ycoords[0]),
        va="center",
        ha="center",
        color=pal[0],
        size=14,
    )
    g.ax.annotate(
        "$\\lambda_{LL}$" + f" = {simu_loglin[i]:.2f}",
        (i, ycoords[1]),
        va="center",
        ha="center",
        color=pal[1],
        size=14,
    )
    g.ax.annotate(
        "$\\lambda_{GLR}$" + f" = {simu_ratio[i]:.2f}",
        (i, ycoords[2]),
        va="center",
        ha="center",
        color=pal[2],
        size=14,
    )
    g.ax.annotate(
        "$\\lambda_{DEV}$" + f" = {simu_threshold[i]:.2f}",
        (i, ycoords[3]),
        va="center",
        ha="center",
        color=pal[3],
        size=14,
    )

plt.savefig("Figures/Figure 2.png", dpi=400);

Evidence in Data from W&S

Show Code
np.random.seed(189123789)  # Seed for reproducibility
sns.set_context("talk")
_, axes = plt.subplots(2, 1, figsize=(6.38, 6.38))
x = [3, 2, 1, 0]
for d, label, ax, title in zip(
    [data_s1_low, data_s1_high], ["Low", "High"], axes, ["20G - 20L", "40G - 40L"]
):
    try:  # Check if bootstrapped estimates have already been generated
        lambd = np.load(
            f"Buffer/Saved Lambdas/Lambda - Within {label} - Data_S1ab - Clustered.npy"
        )
        blambd = np.load(
            f"Buffer/Saved Lambdas/BootLambdas - Within {label} - Data_S1ab - Clustered.npy"
        )
    except FileNotFoundError:  # Otherwise generate them (long...)
        lambd, blambd = bootstrap_lambda_within_s1_clustered(d, 5000)
        np.save(
            f"Buffer/Saved Lambdas/Lambda - Within {label} - Data_S1ab - Clustered.npy",
            lambd,
        )
        np.save(
            f"Buffer/Saved Lambdas/BootLambdas - Within {label} - Data_S1ab - Clustered.npy",
            blambd,
        )

    ax.scatter(x, lambd)
    llci, ulci = np.abs(lambd - np.percentile(blambd, [2.5, 97.5], axis=0))
    ax.errorbar(x, lambd, [llci, ulci], alpha=1, fmt="none", lw=2)
    ax.axhline(1, color="black", lw=0.6)
    ax.set_xticks([0, 1, 2, 3])
    yticks = np.round(np.arange(-1, 3.5, 1), 1)
    ax.set_yticks(yticks)
    ax.set_ylim(-1.5, 3.5)
    ax.set_xlim(-0.5, 3.5)
    ax.set_yticklabels(yticks)
    ax.set_xticklabels([])

    sns.despine(ax=ax)
    ax.xaxis.grid(False)
    ax.set_ylabel(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=16)
    ax.set_title(title)
axes[1].set_xticklabels(
    ["Small G\nSmall L", "Small G\nLarge L", "Large G\nSmall L", "Large G\nLarge L",],
    size=14,
)
plt.tight_layout()
plt.savefig("Figures/Figure 3.png", dpi=400, bbox_inches="tight");

In table form:

Show Code
lambdas = []
for label in ["Low", "High"]:
    lambd = np.load(
            f"Buffer/Saved Lambdas/Lambda - Within {label} - Data_S1ab - Clustered.npy"
        )
    blambd = np.load(
            f"Buffer/Saved Lambdas/BootLambdas - Within {label} - Data_S1ab - Clustered.npy"
    )
    lambdas.append(lambd)
df_lambdas = pd.DataFrame(lambdas)
df_lambdas.columns = ["Small G<br>Small L", "Small G<br>Large L", "Large G<br>Small L", "Large G<br>Large L",]
df_lambdas["Condition"] = ["20G-20L", "40G-40L"]
df_lambdas.set_index(["Condition"]).style.format("{:.2f}")
Small G
Small L
Small G
Large L
Large G
Small L
Large G
Large L
Condition
20G-20L 0.95 1.83 0.53 1.03
40G-40L 1.05 1.31 0.45 1.09

$\lambda$ Is The Same When Analyzing Common Lotteries

Show Code
np.random.seed(189123789)  # Seed for reproducibility
sns.set_style("ticks")
sns.set_context("talk")
_, axes = plt.subplots(3, 1, figsize=(6.38, 8))
pal = sns.color_palette()
nboots = 5000
for ax, filename, title in zip(
    axes,
    ["Data_S1ab", "Data_S1a", "Data_S1b"],
    ["Experiments 1a + 1b", "Experiment 1a", "Experiment 1b"],
):

    data = pd.read_csv(f"Original Data/{filename}.csv")
    data["response"] = data["response"].map({"Accept": 1, "Reject": 0})
    data["condition"] = data["condition"].map(
        {
            "Gains 20-Losses 20": 0,
            "Gains 20-Losses 40": 1,
            "Gains 40-Losses 20": 2,
            "Gains 40-Losses 40": 3,
        }
    )
    data["pair"] = data.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
    data["is_common"] = data.pair.map(dict_common_s1)

    n_parts = data["sub"].shape[0] / 64
    data["sub_id"] = np.repeat(np.arange(n_parts), 64)

    chosen = [False] * 55 + [True] * 9
    data["is_randomly_chosen"] = data.groupby("sub_id").pair.transform(
        lambda x: np.random.choice(chosen, 64, replace=False)
    )

    data_common = data[data.is_common]
    data_random = data[data.is_randomly_chosen]

    datasets = [data, data_random, data_common]
    labels = ["All Lotteries", "Random Subset", "Common Lotteries"]
    markers = ["o", "s", "D"]
    xpos = [np.arange(0, 4, 1) - 0.1, np.arange(0, 4, 1), np.arange(0, 4, 1) + 0.1]

    for d, l, m, x, c in zip(datasets, labels, markers, xpos, pal):
        try:  # Check if bootstrapped estimates have already been generated
            lambd = np.load(
                f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy"
            )
            blambd = np.load(
                f"Buffer/Saved Lambdas/BootLambdas - {l} - {filename} - Clustered.npy"
            )
        except:  # Otherwise generate them (long...)
            lambd, blambd = bootstrap_lambda_between_s1_clustered(d, 5000)
            np.save(
                f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy", lambd
            )
            np.save(
                f"Buffer/Saved Lambdas/BootLambdas - {l} - {filename} - Clustered.npy",
                blambd,
            )
        ax.scatter(x, lambd, label=l, marker=m)
        llci, ulci = np.abs(lambd - np.percentile(blambd, [2.5, 97.5], axis=0))
        ax.errorbar(x, lambd, [llci, ulci], color=c, alpha=1, fmt="none", lw=2)

    ax.axhline(1, color="black", lw=0.6)
    ax.set_xticks(np.arange(0, 4, 1))
    yticks = np.round(np.arange(0.6, 2.8, 0.4), 1)
    ax.set_yticks(yticks)
    ax.set_ylim(0.4, 2.6)
    ax.set_yticklabels(yticks, size=14)
    sns.despine(ax=ax)
    ax.xaxis.grid(False)
    ax.set_ylabel(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=16)
    ax.set_title(title, size=14)
    ax.set_xticklabels([])
axes[0].legend(frameon=False, loc=2, fontsize=13, handletextpad=0.05)
axes[2].set_xticklabels(["20G-20L", "20G-40L", "40G-20L", "40G-40L"], size=14)
plt.tight_layout()
plt.savefig("Figures/Figure 4.png", dpi=100, bbox_inches="tight");

In table form:

Show Code
lambdas = []
for filename, ax, title in zip(
    ["Data_S1ab", "Data_S1a", "Data_S1b"],
    axes,
    ["Study 1a + Study 1b", "Study 1a", "Study 1b"],
):
    for d, l, x, c in zip(datasets, labels, xpos, pal):
        lambd = np.load(
            f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy"
        )
        lambdas.append(lambd)
df_lambdas = pd.DataFrame(lambdas)
df_lambdas.columns = ["20G - 20L", "20G - 40L", "40G - 20L", "40G - 40L"]
df_lambdas["Data"] = np.repeat(["S1a + S1b", "S1a", "S1b"], 3)
df_lambdas["Subset"] = np.tile(
    ["All Lotteries", "Random Subset", "Common Lotteries"], 3
)
df_lambdas.set_index(["Data", "Subset"]).transpose().style.format("{:.2f}")
Data S1a + S1b S1a S1b
Subset All Lotteries Random Subset Common Lotteries All Lotteries Random Subset Common Lotteries All Lotteries Random Subset Common Lotteries
20G - 20L 1.04 1.08 0.96 1.06 1.05 0.99 1.03 1.06 0.93
20G - 40L 0.71 0.70 0.96 0.73 0.69 1.00 0.70 0.66 0.92
40G - 20L 1.84 1.74 1.03 1.94 1.85 1.08 1.77 1.73 1.00
40G - 40L 1.07 1.10 1.01 1.08 1.14 1.03 1.07 1.08 0.99

Appendix

Validation of the Pooled Model

Fitting the original individual-level model in R:

Show Code
%%R -i data_s1 -i data_s1a -i data_s1b -o fits_all
# Code from W&S

# Fit GLM per individual
loss_aversion <- function(data) {
    m1 <- glm(response ~ gain + loss, data=data, family=binomial)
    data.frame(id=data$sub_id[1],
               condition=data$condition[1], 
               converged=m1$converged, 
               deviance=deviance(m1), 
               max.residual=max(abs(residuals(m1))), 
               intercept=coef(m1)["(Intercept)"], 
               beta.loss=coef(m1)["loss"], 
               beta.gain=coef(m1)["gain"])
}

# Original data (S1a + S1b)

fits_s1ab <- by(data=data_s1, data_s1$sub_id, loss_aversion)
fits_s1ab <- do.call(rbind, fits_s1ab)
fits_s1ab <- fits_s1ab[order(fits_s1ab$id),]
fits_s1ab$lambda <- with(fits_s1ab, -beta.loss/beta.gain)

# Remove 5% with highest deviance
deviance.cut <- quantile(fits_s1ab$deviance,probs=c(.95))
fits_s1ab <- subset(fits_s1ab, deviance < deviance.cut)


# Remove negative lambdas 
fits_s1ab <- (subset(fits_s1ab,lambda > 0) 
              %>% mutate(id = as.numeric(factor(id)),
                        dataset="S1ab"))

# Original data S1a

fits_s1a <- by(data=data_s1a, data_s1a$sub_id, loss_aversion)
fits_s1a <- do.call(rbind, fits_s1a)
fits_s1a <- fits_s1ab[order(fits_s1a$id),]
fits_s1a$lambda <- with(fits_s1a, -beta.loss/beta.gain)

# Remove 5% with highest deviance
deviance.cut <- quantile(fits_s1a$deviance,probs=c(.95))
data_s1a <- subset(fits_s1a, deviance < deviance.cut)


# Remove negative lambdas 
fits_s1a <- (subset(fits_s1a,lambda > 0) 
              %>% mutate(id = as.numeric(factor(id)), 
                        dataset="S1a"))

# Original data S1b
fits_s1b <- by(data=data_s1b, data_s1b$sub_id, loss_aversion)
fits_s1b <- do.call(rbind, fits_s1b)
fits_s1b <- fits_s1b[order(fits_s1b$id),]
fits_s1b$lambda <- with(fits_s1b, -beta.loss/beta.gain)

# Remove 5% with highest deviance
deviance.cut <- quantile(fits_s1b$deviance,probs=c(.95))
fits_s1b <- subset(fits_s1b, deviance < deviance.cut)


# Remove negative lambdas 
fits_s1b <- (subset(fits_s1b,lambda > 0) 
              %>% mutate(id = as.numeric(factor(id)), 
                        dataset="S1b"))


fits_all <- bind_rows(fits_s1ab, fits_s1a, fits_s1b)

Fitting the Bayesian model:

Show Code
np.random.seed(189123789)  # Seed for reproducibility
results_bayes = []
for data, filename in zip([data_s1, data_s1a, data_s1b], ["S1ab", "S1a", "S1b"]):
    model = build_bayesian_model(data)
    path = f"Buffer/Bayesian Traces/Data_{filename}-all/"
    with model:
        if not os.path.exists(path):
            # If the model has not been estimated yet, run it (long...)
            trace_simple = pm.sample(1000, tune=1000)
            pm.save_trace(trace_simple, directory=path)
        tab = pm.load_trace(path)
        l = pm.summary(tab, varnames=["λ"]).reset_index()
        l["condition_name"] = (
            l["index"]
            .apply(lambda x: int(x[2]))
            .map({0: "20G-20L", 1: "20G-40L", 2: "40G-20L", 3: "40G-40L"})
        )
        l["dataset"] = filename
    results_bayes.append(l)
results_bayes = (
    pd.concat(results_bayes)
    .pivot(index="condition_name", columns="dataset", values="mean")
    .reset_index()
)
results_bayes["estimation"] = "bayes"

Fitting the logistic model:

Show Code
results_logit = []
for filename, ax in zip(["S1ab", "S1a", "S1b"], axes):
    lambd = np.load(
        f"Buffer/Saved Lambdas/Lambda - All Lotteries - Data_{filename} - Clustered.npy"
    )
    results_logit.append(lambd)
results_logit = pd.DataFrame(results_logit)
results_logit.columns = ["20G-20L", "20G-40L", "40G-20L", "40G-40L"]
results_logit["dataset"] = ["S1ab", "S1a", "S1b"]
results_logit = (
    pd.melt(
        results_logit,
        id_vars=["dataset"],
        value_vars=["20G-20L", "20G-40L", "40G-20L", "40G-40L"],
    )
    .pivot(index="variable", columns="dataset", values="value")
    .reset_index()
)
results_logit.columns = ["condition_name", "S1a", "S1ab", "S1b"]
results_logit["estimation"] = "logit"

Summary information:

Show Code
fits_all["condition_name"] = fits_all.condition.map(
    {0: "20G-20L", 1: "20G-40L", 2: "40G-20L", 3: "40G-40L"}
)
results_median = (
    fits_all.groupby(["condition_name", "dataset"])[["lambda"]].median().unstack()
)
results_median.columns = [col[1] for col in results_median.columns]
results_median = results_median.reset_index()
results_median["estimation"] = "median"

results_all = (
    pd.concat([results_logit, results_bayes, results_median])[
        ["condition_name", "estimation", "S1ab", "S1a", "S1b"]
    ]
    .set_index(["condition_name", "estimation"])
    .reindex(["20G-20L", "20G-40L", "40G-20L", "40G-40L"], level=0)
    .reindex(["median", "logit", "bayes"], level=1)
    .unstack()
)
results_all.style.format("{:.2f}")
dataset S1ab S1a S1b
estimation median logit bayes median logit bayes median logit bayes
condition_name
20G-20L 1.00 1.04 1.04 1.01 1.06 1.06 1.01 1.03 1.03
20G-40L 0.86 0.71 0.72 0.88 0.73 0.75 0.81 0.70 0.71
40G-20L 1.73 1.84 1.84 1.77 1.94 1.94 1.59 1.77 1.76
40G-40L 1.02 1.07 1.07 1.02 1.08 1.08 1.01 1.07 1.07

Unsuccessful Re-Analysis of Study 2

According to the paper:

In Experiment 2, we used two distributions for gains and losses, one ranging from \$6 to \\$20 (in \$2 increments) and one three times larger, ranging from \\$18 to \$60 (in \\$6 increments). We only tested the asymmetric cases. Unlike in Experiments 1a and 1b, the possible gains and losses were randomly drawn and paired from the distributions to produce 64 pairs.

From this description, one would expect the data to have the following properties:

  1. In the "Gains 20-Losses 60" (vs. "Gains 60-Losses 20) condition...
     A. All possible gains (losses) should be between 6 and 20, inclusive
     B. All possible gains (losses) should be multiples of 2
     C. All possible losses (gains) should be between 18 and 60, inclusive
     D. All possible losses (gains) should be multiples of 6

Instead, the data suggests the following:

  1. In the "Gains 20-Losses 60" (vs. "Gains 60-Losses 20) condition...
     A. Possible gains (losses) were drawn at random between 0 and 19 (inclusive)
     B. Possible losses(gains) were drawn at random between 0 and 59 (inclusive)

This inconsistency does not allow a re-analysis of the data, as the bet structure does not appear to match the description of the paper.

The graph at the bottom presents the number of bets that appear in the data, for each combination of gains and losses:

Show Code
sns.set_context("talk")

grid_kws = {"height_ratios": (0.95, 0.05), "hspace": 0.2}
_, (ax, cbar_ax) = plt.subplots(2, 1, figsize=(10, 11), gridspec_kw=grid_kws)
df_accept = data_s2.groupby(["gain", "loss"]).Ones.count().reset_index()
g = sns.heatmap(
    df_accept.pivot("gain", "loss", "Ones").iloc[::-1, :],
    annot=False,
    cmap="viridis",
    fmt=".2f",
    ax=ax,
    cbar=True,
    cbar_ax=cbar_ax,
    cbar_kws=dict(orientation="horizontal"),
)
g.set_xlim(-0.5, 60)
g.set_xticks(np.arange(0, 60, 2) + 0.5)
g.set_xticklabels(np.arange(0, 60, 2))
g.set_yticks(np.arange(60, 0, -2) - 0.5)
g.set_yticklabels(np.arange(0, 60, 2))
g.set_xlabel("Gain Amount")
g.set_ylabel("Loss Amount")
g.set_ylim(60, -0.5);

The authors have confirmed that the amounts of gains and losses reported in the paper were inaccurate.

Re-Analysis of Study 3

Show Code
np.random.seed(189123789)  # Seed for reproducibility
sns.set_style("ticks")
sns.set_context("talk")
_, ax = plt.subplots(1, 1, figsize=(6.38, 5))
pal = sns.color_palette()
nboots = 5000
filename = "Data_S3.csv"
title = ""

data = pd.read_csv(f"Original Data/{filename}")
data["response"] = data.response.map(
    {
        "Strongly Reject": -2,
        "Weakly Reject": -1,
        "Weakly Accept": 1,
        "Strongly Accept": 2,
    }
)
data["condition"] = data.condition.map(
    {"Gains 20-Losses 40": 0, "Gains 40-Losses 20": 2, "Gains 40-Losses 40": 1}
)
data["pair"] = data.apply(lambda x: f"{x.gain:.0f}-{x.loss:.0f}", axis=1)
data["is_common"] = data.pair.map(dict_common_s3)
n_parts = data["sub"].shape[0] / 256
data["sub_id"] = np.repeat(np.arange(n_parts), 256)
chosen = [False] * (576 - 36) + [True] * 36
data["is_randomly_chosen"] = data.groupby("sub_id").pair.transform(
    lambda x: np.random.choice(chosen, 256, replace=False)
)
data_common = data[data.is_common]
data_random = data[data.is_randomly_chosen]

datasets = [data, data_random, data_common]
labels = ["All Lotteries", "Random Subset", "Common Lotteries"]
markers = ["o", "s", "D"]
xpos = [np.arange(0, 3, 1) - 0.1, np.arange(0, 3, 1), np.arange(0, 3, 1) + 0.1]


sns.set_context("talk")
for d, l, m, x, c in zip(datasets, labels, markers, xpos, pal):
    try:
        lambd = np.load(
            f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy"
        )
        blambd = np.load(
            f"Buffer/Saved Lambdas/BootLambdas - {l} - {filename} - Clustered.npy"
        )
    except FileNotFoundError:
        lambd, blambd = bootstrap_lambda_between_s3_clustered(d, nboots)
        np.save(
            f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy", lambd
        )
        np.save(
            f"Buffer/Saved Lambdas/BootLambdas - {l} - {filename} - Clustered.npy",
            blambd,
        )
    ax.scatter(x, lambd, label=l, marker=m)
    llci, ulci = np.abs(lambd - np.percentile(blambd, [2.5, 97.5], axis=0))
    ax.errorbar(x, lambd, [llci, ulci], color=c, alpha=1, fmt="none", lw=2)

ax.axhline(1, lw=0.6, color="black")
sns.despine(ax=ax)
ax.set_title(title)
ax.set_xticks([0, 1, 2])
ax.set_ylabel(r"$\lambda = \frac{-\beta_{L}}{\beta_{G}}$", size=12)
ax.set_xticklabels(["20G-40L", "40G-40L", "40G-20L"])
ax.legend(title="", frameon=False, loc=2)
plt.tight_layout()
plt.savefig("Figures/Figure 5.png", dpi=100);

In table form:

Show Code
lambdas = []
for filename, ax, title in zip(["Data_S3.csv"], axes, ["S3"]):
    for d, l, x, c in zip(datasets, labels, xpos, pal):
        lambd = np.load(
            f"Buffer/Saved Lambdas/Lambda - {l} - {filename} - Clustered.npy"
        )
        lambdas.append(lambd)
df_lambdas = pd.DataFrame(lambdas)
df_lambdas.columns = ["20G - 40L", "40G - 40L", "40G - 20L"]
df_lambdas["Subset"] = np.tile(
    ["All Lotteries", "Random Subset", "Common Lotteries"], 1
)
df_lambdas.set_index(["Subset"])
20G - 40L 40G - 40L 40G - 20L
Subset
All Lotteries 0.735418 1.244776 1.859524
Random Subset 0.653839 1.154495 2.241193
Common Lotteries 0.887531 1.306122 1.123239

Study 3 also shows limited evidence that decision by sampling is a driver of loss aversion: while their model would have predicted the "40G - 40L" and "40G - 20L" to be different, they seem to be identical when considering the common lotteries.