Source code for model.OpALS

# -*- coding: utf-8 -*-
"""
:Author: Dominic Hunt

:Reference: Based on the paper Opponent actor learning (OpAL): Modeling
                interactive effects of striatal dopamine on reinforcement
                learning and choice incentive.
                Collins, A. G. E., & Frank, M. J. (2014).
                Psychological Review, 121(3), 337–66.
                doi:10.1037/a0037015

"""
import logging

import numpy as np

from model.modelTemplate import Model


[docs]class OpALS(Model): """The Opponent actor learning model modified to have saturation values The saturation values are the same for the actor and critic learners Attributes ---------- Name : string The name of the class used when recording what has been used. currAction : int The current action chosen by the model. Used to pass participant action to model when fitting Parameters ---------- alpha : float, optional Learning rate parameter, used as either the alphaGoNogoDiff : float, optional The difference between ``alphaGo`` and ``alphaNogo``. Default is ``None``. If not ``None`` will overwrite ``alphaNogo`` :math:`\\alpha_N = \\alpha_G - \\alpha_\\delta` alphaCrit : float, optional The critic learning rate. Default is ``alpha`` alphaGo : float, optional Learning rate parameter for Go, the positive part of the actor learning Default is ``alpha`` alphaNogo : float, optional Learning rate parameter for Nogo, the negative part of the actor learning Default is ``alpha`` alphaGoDiff : float, optional The difference between ``alphaCrit`` and ``alphaGo``. The default is ``None`` If not ``None`` and ``alphaNogoDiff`` is also not ``None``, it will overwrite the ``alphaGo`` parameter :math:`\\alpha_G = \\alpha_C + \\alpha_\\deltaG` alphaNogoDiff : float, optional The difference between ``alphaCrit`` and ``alphaNogo``. The default is ``None`` If not ``None`` and ``alphaGoDiff`` is also not ``None``, it will overwrite the ``alphaNogo`` parameter :math:`\\alpha_N = \\alpha_C + \\alpha_\\deltaN` beta : float, optional Sensitivity parameter for probabilities. Also known as an exploration- exploitation parameter. Defined as :math:`\\beta` in the paper invBeta : float, optional Inverse of sensitivity parameter for the probabilities. Defined as :math:`\\frac{1}{\\beta+1}`. Default ``0.2`` rho : float, optional The asymmetry between the actor weights. :math:`\\rho = \\beta_G - \\beta = \\beta_N + \\beta` number_actions : integer, optional The maximum number of valid actions the model can expect to receive. Default 2. number_cues : integer, optional The initial maximum number of stimuli the model can expect to receive. Default 1. number_critics : integer, optional The number of different reaction learning sets. Default number_actions*number_cues action_codes : dict with string or int as keys and int values, optional A dictionary used to convert between the action references used by the task or dataset and references used in the models to describe the order in which the action information is stored. prior : array of floats in ``[0, 1]``, optional The prior probability of of the states being the correct one. Default ``ones((number_actions, number_cues)) / number_critics)`` expect: array of floats, optional The initialisation of the the expected reward. Default ``ones((number_actions, number_cues)) / number_critics`` expectGo : array of floats, optional The initialisation of the the expected go and nogo. Default ``ones((number_actions, number_cues)) / number_critics`` saturateVal : float, optional The saturation value for the model. Default is 10 stimFunc : function, optional The function that transforms the stimulus into a form the model can understand and a string to identify it later. Default is blankStim rewFunc : function, optional The function that transforms the reward into a form the model can understand. Default is blankRew decFunc : function, optional The function that takes the internal values of the model and turns them in to a decision. Default is model.decision.discrete.weightProb Notes ----- Actor: The chosen action is updated with .. math:: \\delta_{d,t} = r_t-E_{d,t} E_{d,t+1} = E_{d,t} + \\alpha_E \\delta_{d,t} (1-\\frac{E_{d,t}}{S}) Critic: The chosen action is updated with .. math:: G_{d,t+1} = G_{d,t} + \\alpha_G G_{d,t} \\delta_{d,t} (1-\\frac{G_{d,t}}{S}) N_{d,t+1} = N_{d,t} - \\alpha_N N_{d,t} \\delta_{d,t} (1-\\frac{N_{d,t}}{S}) Probabilities: The probabilities for all actions are calculated using .. math:: A_{d,t} = (1+\\rho) G_{d,t}-(1-\\rho) N_{d,t} P_{d,t} = \\frac{ e^{\\beta A_{d,t} }}{\\sum_{d \\in D}e^{\\beta A_{d,t}}} """ def __init__(self, alpha=0.3, beta=4, rho=0, saturateVal=10, invBeta=None, alphaCrit=None, betaGo=None, betaNogo=None, alphaGo=None, alphaNogo=None, alphaGoDiff=None, alphaNogoDiff=None, alphaGoNogoDiff=None, expect=None, expectGo=None, **kwargs): super(OpALS, self).__init__(**kwargs) if alphaCrit is None: alphaCrit = alpha self.alphaCrit = alphaCrit if alphaGo is not None and alphaNogo is not None: self.alphaGo = alphaGo self.alphaNogo = alphaNogo elif alphaGoNogoDiff is not None and (alphaGo is not None or alphaNogo is not None): if alphaGo is not None: self.alphaGo = alphaGo self.alphaNogo = alphaGo - alphaGoNogoDiff elif alphaNogo is not None: self.alphaGo = alphaNogo + alphaGoNogoDiff self.alphaNogo = alphaNogo elif alphaGoDiff is not None and alphaNogoDiff is not None: self.alphaGo = alpha + alphaGoDiff self.alphaNogo = alpha + alphaNogoDiff else: self.alphaGo = alpha self.alphaNogo = alpha if invBeta is not None: beta = (1 / invBeta) - 1 if betaGo is not None and betaNogo is not None: self.beta = (betaGo + betaNogo) / 2 self.rho = (betaGo - betaNogo) / (2 * beta) else: self.beta = beta self.rho = rho if expect is None: expect = np.ones((self.number_actions, self.number_cues)) / self.number_critics self.expect = expect if expectGo is None: expectGo = np.ones((self.number_actions, self.number_cues)) self.expectGo = expectGo self.saturateVal = saturateVal self.expectations = np.array(self.expect) self.go = np.array(self.expectGo) self.nogo = np.array(self.expectGo) self.actionValues = np.ones(self.expectations.shape) self.parameters["alphaCrit"] = self.alphaCrit self.parameters["alphaGo"] = self.alphaGo self.parameters["alphaNogo"] = self.alphaNogo self.parameters["beta"] = self.beta self.parameters["betaGo"] = betaGo self.parameters["betaNogo"] = betaNogo self.parameters["rho"] = self.rho self.parameters["expectation"] = self.expect self.parameters["expectationGo"] = self.expectGo self.parameters["saturateVal"] = self.saturateVal # Recorded information self.recGo = [] self.recNogo = [] self.recActionValues = []
[docs] def returnTaskState(self): """ Returns all the relevant data for this model Returns ------- results : dict The dictionary contains a series of keys including Name, Probabilities, Actions and Events. """ results = self.standardResultOutput() results["Go"] = np.array(self.recGo) results["Nogo"] = np.array(self.recNogo) results["ActionValues"] = np.array(self.recActionValues) return results
[docs] def storeState(self): """ Stores the state of all the important variables so that they can be accessed later """ self.storeStandardResults() self.recGo.append(self.go.copy()) self.recNogo.append(self.nogo.copy()) self.recActionValues.append(self.actionValues.copy())
[docs] def rewardExpectation(self, observation): """Calculate the estimated reward based on the action and stimuli This contains parts that are task dependent Parameters ---------- observation : {int | float | tuple} The set of stimuli Returns ------- actionExpectations : array of floats The expected rewards for each action stimuli : list of floats The processed observations activeStimuli : list of [0, 1] mapping to [False, True] A list of the stimuli that were or were not present """ activeStimuli, stimuli = self.stimulus_shaper.processStimulus(observation) actionExpectations = self._actExpectations(self.expectations, stimuli) return actionExpectations, stimuli, activeStimuli
[docs] def delta(self, reward, expectation, action, stimuli): """ Calculates the comparison between the reward and the expectation Parameters ---------- reward : float The reward value expectation : float The expected reward value action : int The chosen action stimuli : {int | float | tuple | None} The stimuli received Returns ------- delta """ modReward = self.reward_shaper.processFeedback(reward, action, stimuli) delta = modReward - expectation return delta
[docs] def updateModel(self, delta, action, stimuli, stimuliFilter): """ Parameters ---------- delta : float The difference between the reward and the expected reward action : int The action chosen by the model in this trialstep stimuli : list of float The weights of the different stimuli in this trialstep stimuliFilter : list of bool A list describing if a stimulus cue is present in this trialstep """ # Find the new activities self._critic(action, delta, stimuli) self._actor(action, delta, stimuli) self._actionValues(self.go, self.nogo) # Calculate the new probabilities self.probabilities = self.actorStimulusProbs()
def _critic(self, action, delta, stimuli): newExpectations = self.expectations[action] + self.alphaCrit * delta * (1-self.expectations[action]/self.saturateVal) * stimuli/np.sum(stimuli) newExpectations = newExpectations * (newExpectations >= 0) self.expectations[action] = newExpectations def _actor(self, action, delta, stimuli): chosenGo = self.go[action] * stimuli/np.sum(stimuli) chosenNogo = self.nogo[action] * stimuli/np.sum(stimuli) self.go[action] += self.alphaGo * chosenGo * delta * (1-chosenGo/self.saturateVal) self.nogo[action] -= self.alphaNogo * chosenNogo * delta * (1-chosenNogo/self.saturateVal) def _actionValues(self, go, nogo): rho = self.rho actionValues = (1 + rho) * go - (1 - rho) * nogo self.actionValues = actionValues def _actExpectations(self, expectations, stimuli): # If there are multiple possible stimuli, filter by active stimuli and calculate # calculate the expectations associated with each action. if self.number_cues > 1: actionExpectations = self.actStimMerge(expectations, stimuli) else: actionExpectations = expectations return actionExpectations
[docs] def calcProbabilities(self, actionValues): # type: (np.ndarray) -> np.ndarray """ Calculate the probabilities associated with the actions Parameters ---------- actionValues : 1D ndArray of floats Returns ------- probArray : 1D ndArray of floats The probabilities associated with the actionValues """ numerator = np.exp(self.beta * actionValues) denominator = np.sum(numerator) probArray = numerator / denominator return probArray
[docs] def actorStimulusProbs(self): """ Calculates in the model-appropriate way the probability of each action. Returns ------- probabilities : 1D ndArray of floats The probabilities associated with the action choices """ actExpectations = self._actExpectations(self.actionValues, self.stimuli) probabilities = self.calcProbabilities(actExpectations) return probabilities