# -*- coding: utf-8 -*-
"""
:Author: Dominic Hunt
"""
import numpy as np
import copy
import re
import collections
from model.decision.discrete import weightProb
import utils
[docs]class Stimulus(object):
"""
Stimulus processor class. This acts as an interface between an observation and . Does nothing.
Attributes
----------
Name : string
The identifier of the function
"""
# Name = __qualname__ ## TODO: start using when moved to Python 3. See https://docs.python.org/3/glossary.html#term-qualified-name
[docs] @classmethod
def get_name(cls):
name = '{}.{}'.format(cls.__module__, cls.__name__)
return name
def __init__(self, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)
self.Name = self.get_name()
[docs] def details(self):
properties = [str(k) + ' : ' + str(v).strip('[]()') for k, v in self.__dict__.items() if k != "Name"]
desc = self.Name + " with " + ", ".join(properties)
return desc
[docs] def processStimulus(self, observation):
"""
Takes the observation and turns it into a form the model can use
Parameters
----------
observation :
Returns
-------
stimuliPresent : int or list of int
stimuliActivity : float or list of float
"""
return 1, 1
[docs]class Rewards(object):
"""
This acts as an interface between the feedback from a task and the feedback a model can process
Attributes
----------
Name : string
The identifier of the function
"""
# Name = __qualname__ ## TODO: start using when moved to Python 3. See https://docs.python.org/3/glossary.html#term-qualified-name
[docs] @classmethod
def get_name(cls):
name = '{}.{}'.format(cls.__module__, cls.__name__)
return name
def __init__(self, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)
self.Name = self.get_name()
[docs] def details(self):
properties = [str(k) + ' : ' + str(v).strip('[]()') for k, v in self.__dict__.items() if k != "Name"]
desc = self.Name + " with " + ", ".join(properties)
return desc
[docs] def processFeedback(self, feedback, lastAction, stimuli):
"""
Takes the feedback and turns it into a form to be processed by the model
Parameters
----------
feedback :
lastAction :
stimuli:
Returns
-------
modelFeedback:
"""
return feedback
[docs]class Model(object):
"""
The model class is a general template for a model. It also contains
universal methods used by all models.
Attributes
----------
Name : string
The name of the class used when recording what has been used.
currAction : int
The current action chosen by the model. Used to pass participant action
to model when fitting
Parameters
----------
number_actions : integer, optional
The maximum number of valid actions the model can expect to receive.
Default 2.
number_cues : integer, optional
The initial maximum number of stimuli the model can expect to receive.
Default 1.
number_critics : integer, optional
The number of different reaction learning sets.
Default number_actions*number_cues
action_codes : dict with string or int as keys and int values, optional
A dictionary used to convert between the action references used by the
task or dataset and references used in the models to describe the order
in which the action information is stored.
prior : array of floats in ``[0,1]``, optional
The prior probability of of the states being the correct one.
Default ``ones((self.number_actions, self.number_cues)) / self.number_critics)``
stimulus_shaper_name : string, optional
The name of the function that transforms the stimulus into a form the model can
understand and a string to identify it later. ``stimulus_shaper`` takes priority
reward_shaper_name : string, optional
The name of the function that transforms the reward into a form the model can
understand. ``rewards_shaper`` takes priority
decision_function_name : string, optional
The name of the function that takes the internal values of the model and turns them
in to a decision. ``decision function`` takes priority
stimulus_shaper : Stimulus class, optional
The class that transforms the stimulus into a form the model can
understand and a string to identify it later. Default is Stimulus
reward_shaper : Rewards class, optional
The class that transforms the reward into a form the model can
understand. Default is Rewards
decision_function : function, optional
The function that takes the internal values of the model and turns them in to a decision.
Default is ``weightProb(list(range(number_actions)))``
stimulus_shaper_properties : list, optional
The valid parameters of the function. Used to filter the unlisted keyword arguments
Default is ``None``
reward_shaper_properties : list, optional
The valid parameters of the function. Used to filter the unlisted keyword arguments
Default is ``None``
decision_function_properties : list, optional
The valid parameters of the function. Used to filter the unlisted keyword arguments
Default is ``None``
"""
#Name = __qualname__ ## TODO: start using when moved to Python 3. See https://docs.python.org/3/glossary.html#term-qualified-name
[docs] @classmethod
def get_name(cls):
return cls.__name__
# TODO: define and start using non_action
parameter_patterns = []
parameter_patterns = []
def __init__(self, number_actions=2, number_cues=1, number_critics=None,
action_codes=None, non_action='None',
prior=None,
stimulus_shaper=None, stimulus_shaper_name=None, stimulus_shaper_properties=None,
reward_shaper=None, reward_shaper_name=None, reward_shaper_properties=None,
decision_function=None, decision_function_name=None, decision_function_properties=None,
**kwargs):
""""""
self.Name = self.get_name()
self.pattern_parameters = self.kwarg_pattern_parameters(kwargs)
for k, v in self.pattern_parameters.items():
setattr(self, k, v)
self.pattern_parameters = self.kwarg_pattern_parameters(kwargs)
for k, v in self.pattern_parameters.items():
setattr(self, k, v)
self.number_actions = number_actions
self.number_cues = number_cues
if number_critics is None:
number_critics = self.number_actions * self.number_cues
self.number_critics = number_critics
if action_codes is None:
action_codes = {k: k for k in range(self.number_actions)}
self.actionCode = action_codes
self.defaultNonAction = non_action
if prior is None:
prior = np.ones(self.number_actions) / self.number_actions
self.prior = prior
self.stimuli = np.ones(self.number_cues)
self.stimuliFilter = np.ones(self.number_cues)
self.currAction = None
self.decision = None
self.validActions = None
self.lastObservation = None
self.probabilities = np.array(self.prior)
self.decProbabilities = np.array(self.prior)
self.expectedRewards = np.ones(self.number_actions)
self.expectedReward = np.array([1])
if stimulus_shaper is not None and issubclass(stimulus_shaper, Stimulus):
if stimulus_shaper_properties is not None:
stimulus_shaper_kwargs = {k: v for k, v in kwargs.items() if k in stimulus_shaper_properties}
else:
stimulus_shaper_kwargs = kwargs.copy()
self.stimulus_shaper = stimulus_shaper(**stimulus_shaper_kwargs)
elif isinstance(stimulus_shaper_name, str):
stimulus_class = utils.find_class(stimulus_shaper_name,
class_folder='tasks',
inherited_class=Stimulus,
excluded_files=['taskTemplate', '__init__', 'taskGenerator'])
stimulus_shaper_kwargs = {k: v for k, v in kwargs.items() if k in utils.get_class_args(stimulus_class)}
self.stimulus_shaper = stimulus_class(**stimulus_shaper_kwargs)
else:
self.stimulus_shaper = Stimulus()
if reward_shaper is not None and issubclass(reward_shaper, Rewards):
if reward_shaper_properties is not None:
reward_shaper_kwargs = {k: v for k, v in kwargs.items() if k in reward_shaper_properties}
else:
reward_shaper_kwargs = kwargs.copy()
self.reward_shaper = reward_shaper(**reward_shaper_kwargs)
elif isinstance(reward_shaper_name, str):
reward_class = utils.find_class(reward_shaper_name,
class_folder='tasks',
inherited_class=Rewards,
excluded_files=['taskTemplate', '__init__', 'taskGenerator'])
reward_shaper_kwargs = {k: v for k, v in kwargs.items() if k in utils.get_class_args(reward_class)}
self.reward_shaper = reward_class.processFeedback(**reward_shaper_kwargs)
else:
self.reward_shaper = Rewards()
if callable(decision_function):
if decision_function_properties is not None:
decision_shaper_kwargs = {k: v for k, v in kwargs.items() if k in decision_function_properties}
else:
decision_shaper_kwargs = kwargs.copy()
self.decision_function = decision_function(**decision_shaper_kwargs)
elif isinstance(decision_function_name, str):
decision_function = utils.find_function(decision_function_name, 'model/decision')
decision_function_kwargs = {k: v for k, v in kwargs.items() if k in utils.get_function_args(decision_function)}
self.decision_function = decision_function(**decision_function_kwargs)
else:
self.decision_function = weightProb(list(range(self.number_actions)))
self.parameters = {"Name": self.Name,
"number_actions": self.number_actions,
"number_cues": self.number_cues,
"number_critics": self.number_critics,
"prior": copy.copy(self.prior),
"non_action": self.defaultNonAction,
"actionCode": copy.copy(self.actionCode),
"stimulus_shaper": self.stimulus_shaper.details(),
"reward_shaper": self.reward_shaper.details(),
"decision_function": utils.callableDetailsString(self.decision_function)}
self.parameters.update(self.pattern_parameters)
# Recorded information
self.recAction = []
self.recActionSymbol = []
self.recStimuli = []
self.recReward = []
self.recExpectations = []
self.recExpectedReward = []
self.recExpectedRewards = []
self.recValidActions = []
self.recDecision = []
self.recProbabilities = []
self.recActionProbs = []
self.recActionProb = []
self.simID = None
def __eq__(self, other):
# TODO: Expand this to cover the parameters properly
if self.Name == other.Name:
return True
else:
return False
def __ne__(self, other):
if self.Name != other.Name:
return True
else:
return False
def __hash__(self):
return hash(self.Name)
[docs] def action(self):
"""
Returns the action of the model
Returns
-------
action : integer or None
"""
return self.currActionSymbol
[docs] def observe(self, state):
"""
Receives the latest observation and decides what to do with it
There are five possible states:
Observation
Observation Action
Observation Action Feedback
Action Feedback
Observation Feedback
Parameters
----------
state : tuple of ({int | float | tuple},{tuple of int | None})
The stimulus from the task followed by the tuple of valid
actions. Passes the values onto a processing function,
self._updateObservation``.
"""
events, validActions = state
lastEvents = self.lastObservation
self.validActions = validActions
# If the last observation still has not been processed,
# and there has been no feedback, then process it.
# There may have been an action but feedback was None
# Since we have another observation it is time to learn from the previous one
if lastEvents is not None:
self.processEvent(self.currAction)
self.storeState()
self.lastObservation = events
# Find the reward expectations
self.expectedRewards, self.stimuli, self.stimuliFilter = self.rewardExpectation(events)
expectedProbs = self.actorStimulusProbs()
# If the model is not expected to act, use a dummy action,
# Otherwise choose an action
lastAction = self.currAction
if validActions is self.defaultNonAction:
self.currAction = self.defaultNonAction
else:
self.currAction, self.decProbabilities = self.chooseAction(expectedProbs, lastAction, events, validActions)
# Now that the action has been chosen, add any reinforcement of the previous choice in the expectations
self.lastChoiceReinforcement()
[docs] def feedback(self, response):
"""
Receives the reaction to the action and processes it
Parameters
----------
response : float
The response from the task after an action. Returns without doing
anything if the value of response is `None`.
"""
# If there is feedback
if response is not None:
self.processEvent(self.currAction, response)
self.lastObservation = None
self.storeState()
[docs] def processEvent(self, action=None, response=None):
"""
Integrates the information from a stimulus, action, response set, regardless
of which of the three elements are present.
Parameters
----------
stimuli : {int | float | tuple | None}
The stimuli received
action : int, optional
The chosen action of the model. Default ``None``
response : float, optional
The response from the task after an action. Default ``None``
"""
self.recReward.append(response)
# If there were any last reflections to do on the action chosen before processing the new event, now is the last
# chance to do it
self.choiceReflection()
# If there was a reward passed but it was empty, there is nothing to update
if response is not None and (np.size(response) == 0 or np.isnan(response)):
return
# Find the reward expectation
expectedReward = self.expectedRewards[action]
self.expectedReward = expectedReward
# If there was no reward, the the stimulus is the learnt 'reward'
if response is None:
response = self.stimuli
# Find the significance of the discrepancy between the response and the expected response
delta = self.delta(response, expectedReward, action, self.stimuli)
# Use that discrepancy to update the model
self.updateModel(delta, action, self.stimuli, self.stimuliFilter)
[docs] def rewardExpectation(self, stimuli):
"""Calculate the expected reward for each action based on the stimuli
This contains parts that are task dependent
Parameters
----------
stimuli : {int | float | tuple}
The set of stimuli
Returns
-------
expectedRewards : float
The expected reward for each action
stimuli : list of floats
The processed observations
activeStimuli : list of [0, 1] mapping to [False, True]
A list of the stimuli that were or were not present
"""
# Calculate expectation by identifying the relevant stimuli for the action
# First identify the expectations relevant to the action
# Filter them weighted by the stimuli
# Calculate the combined value
# Return the value
# stimuli = self.stimulus_shaper_name(lastObservation)
return 0, stimuli, 0
[docs] def delta(self, reward, expectation, action, stimuli):
"""
Calculates the comparison between the reward and the expectation
Parameters
----------
reward : float
The reward value
expectation : float
The expected reward value
action : int
The chosen action
stimuli : {int | float | tuple | None}
The stimuli received
Returns
-------
delta
"""
modReward = self.reward_shaper.processFeedback(reward, action, stimuli)
return 0
[docs] def updateModel(self, delta, action, stimuli, stimuliFilter):
"""
Parameters
----------
delta : float
The difference between the reward and the expected reward
action : int
The action chosen by the model in this trialstep
stimuli : list of float
The weights of the different stimuli in this trialstep
stimuliFilter : list of bool
A list describing if a stimulus cue is present in this trialstep
"""
# There is no model here
[docs] def calcProbabilities(self, actionValues):
"""
Calculate the probabilities associated with the action
Parameters
----------
actionValues : 1D ndArray of floats
Returns
-------
probArray : 1D ndArray of floats
The probabilities associated with the actionValues
"""
# There is no model here
return 0
[docs] def actorStimulusProbs(self):
"""
Calculates in the model-appropriate way the probability of each action.
Returns
-------
probabilities : 1D ndArray of floats
The probabilities associated with the action choices
"""
return 0
[docs] def chooseAction(self, probabilities, lastAction, events, validActions):
"""
Chooses the next action and returns the associated probabilities
Parameters
----------
probabilities : list of floats
The probabilities associated with each combinations
lastAction : int
The last chosen action
events : list of floats
The stimuli. If probActions is True then this will be unused as the probabilities will already be
validActions : 1D list or array
The actions permitted during this trialstep
Returns
-------
newAction : int
The chosen action
decProbabilities : list of floats
The weights for the different actions
"""
if np.isnan(probabilities).any():
raise ValueError("probabilities contain NaN")
decision, decProbabilities = self.decision_function(probabilities, lastAction, trial_responses=validActions)
self.decision = decision
self.currActionSymbol = decision
decisionCode = self.actionCode[decision]
return decisionCode, decProbabilities
[docs] def overrideActionChoice(self, action):
"""
Provides a method for overriding the model action choice. This is used when fitting models to participant actions.
Parameters
----------
action : int
Action chosen by external source to same situation
"""
self.currActionSymbol = action
self.currAction = self.actionCode[action]
[docs] def choiceReflection(self):
"""
Allows the model to update its state once an action has been chosen.
"""
[docs] def lastChoiceReinforcement(self):
"""
Allows the model to update the reward expectation for the previous trialstep given the choice made in this trialstep
Returns
-------
"""
[docs] def actStimMerge(self, actStimuliParam, stimFilter=1):
"""
Takes the parameter to be merged by stimuli and filters it by the stimuli values
Parameters
----------
actStimuliParam : list of floats
The list of values representing each action stimuli pair, where the stimuli will have their filtered
values merged together.
stimFilter : array of floats or a float, optional
The list of active stimuli with their weightings or one weight for all.
Default ``1``
Returns
-------
actionParams : list of floats
The parameter values associated with each action
"""
actionParamSets = np.reshape(actStimuliParam, (self.number_actions, self.number_cues))
actionParamSets = actionParamSets * stimFilter
actionParams = np.sum(actionParamSets, axis=1, keepdims=True)
return actionParams
[docs] def returnTaskState(self):
"""
Returns all the relevant data for this model
Returns
-------
results : dictionary
"""
results = self.standardResultOutput()
return results.copy()
[docs] def storeState(self):
"""
Stores the state of all the important variables so that they can be
accessed later
"""
self.storeStandardResults()
[docs] def standardResultOutput(self):
"""
Returns the relevant data expected from a model as well as the parameters for the current model
Returns
-------
results : dictionary
A dictionary of details about the
"""
results = self.parameters.copy()
results["simID"] = self.simID
results["Actions"] = np.array(self.recAction)
results["Stimuli"] = np.array(self.recStimuli).T
results["Rewards"] = np.array(self.recReward)
results["Expectations"] = np.array(self.recExpectations).T
results["ExpectedReward"] = np.array(self.recExpectedReward).flatten()
results["ExpectedRewards"] = np.array(self.recExpectedRewards).T
results["ValidActions"] = np.array(self.recValidActions).T
results["Decisions"] = np.array(self.recDecision)
results["UpdatedProbs"] = np.array(self.recProbabilities).T
results["ActionProb"] = np.array(self.recActionProb)
results["DecisionProbs"] = np.array(self.recActionProbs)
return results
[docs] def storeStandardResults(self):
"""
Updates the store of standard results found across models
"""
self.recAction.append(self.currAction)
self.recActionSymbol.append(self.currActionSymbol)
self.recValidActions.append(self.validActions[:])
self.recDecision.append(self.decision)
self.recExpectations.append(self.expectations.flatten())
self.recExpectedRewards.append(self.expectedRewards.flatten())
self.recExpectedReward.append(self.expectedReward.flatten())
self.recStimuli.append(self.stimuli)
self.recProbabilities.append(self.probabilities.flatten())
self.recActionProbs.append(self.decProbabilities.copy())
self.recActionProb.append(self.decProbabilities[self.currActionSymbol])
[docs] def params(self):
"""
Returns the parameters of the model
Returns
-------
parameters : dictionary
"""
return self.parameters.copy()
def __repr__(self):
params = self.params()
name = params.pop('Name')
label = ["{}(".format(name)]
label.extend(["{}={}, ".format(k, repr(v)) for k, v in params.items()])
label.append(")")
representation = ' '.join(label)
return representation
[docs] def setsimID(self, simID):
"""
Parameters
----------
simID : float
Returns
-------
"""
self.simID = simID
[docs] @classmethod
def pattern_parameters_match(cls, *args):
"""
Validates if the parameters are described by the model patterns
Parameters
----------
*args : strings
The potential parameter names
Returns
-------
pattern_parameters : list
The args that match the patterns in parameter_patterns
"""
pattern_parameters = []
for pattern in cls.parameter_patterns:
pattern_parameters.extend(sorted([k for k in args if re.match(pattern, k)]))
return pattern_parameters
[docs] def kwarg_pattern_parameters(self, kwargs):
"""
Extracts the kwarg parameters that are described by the model patterns
Parameters
----------
kwargs : dict
The class initialisation kwargs
Returns
-------
pattern_parameter_dict : dict
A subset of kwargs that match the patterns in parameter_patterns
"""
pattern_parameter_keys = self.pattern_parameters_match(*kwargs.keys())
pattern_parameter_dict = collections.OrderedDict()
for k in pattern_parameter_keys:
pattern_parameter_dict[k] = kwargs.pop(k)
return pattern_parameter_dict