Source code for model.modelTemplate

# -*- coding: utf-8 -*-
"""
:Author: Dominic Hunt
"""
import numpy as np

import copy
import re
import collections

from model.decision.discrete import weightProb

import utils


[docs]class Stimulus(object):
    """
    Stimulus processor class. This acts as an interface between an observation and . Does nothing.

    Attributes
    ----------
    Name : string
        The identifier of the function
    """

    # Name = __qualname__ ## TODO: start using when moved to Python 3. See https://docs.python.org/3/glossary.html#term-qualified-name

[docs]    @classmethod
    def get_name(cls):

        name = '{}.{}'.format(cls.__module__, cls.__name__)

        return name

    def __init__(self, **kwargs):
        for k, v in kwargs.items():
            setattr(self, k, v)

        self.Name = self.get_name()

[docs]    def details(self):

        properties = [str(k) + ' : ' + str(v).strip('[]()') for k, v in self.__dict__.items() if k != "Name"]
        desc = self.Name + " with " + ", ".join(properties)

        return desc

[docs]    def processStimulus(self, observation):
        """
        Takes the observation and turns it into a form the model can use

        Parameters
        ----------
        observation :

        Returns
        -------
        stimuliPresent :  int or list of int
        stimuliActivity : float or list of float

        """
        return 1, 1


[docs]class Rewards(object):
    """
    This acts as an interface between the feedback from a task and the feedback a model can process

    Attributes
    ----------
    Name : string
        The identifier of the function
    """

    # Name = __qualname__ ## TODO: start using when moved to Python 3. See https://docs.python.org/3/glossary.html#term-qualified-name

[docs]    @classmethod
    def get_name(cls):

        name = '{}.{}'.format(cls.__module__, cls.__name__)

        return name

    def __init__(self, **kwargs):
        for k, v in kwargs.items():
            setattr(self, k, v)

        self.Name = self.get_name()

[docs]    def details(self):

        properties = [str(k) + ' : ' + str(v).strip('[]()') for k, v in self.__dict__.items() if k != "Name"]
        desc = self.Name + " with " + ", ".join(properties)

        return desc

[docs]    def processFeedback(self, feedback, lastAction, stimuli):
        """
        Takes the feedback and turns it into a form to be processed by the model

        Parameters
        ----------
        feedback :
        lastAction :
        stimuli:

        Returns
        -------
        modelFeedback:

        """
        return feedback


[docs]class Model(object):

    """
    The model class is a general template for a model. It also contains
    universal methods used by all models.

    Attributes
    ----------
    Name : string
        The name of the class used when recording what has been used.
    currAction : int
        The current action chosen by the model. Used to pass participant action
        to model when fitting

    Parameters
    ----------
    number_actions : integer, optional
        The maximum number of valid actions the model can expect to receive.
        Default 2.
    number_cues : integer, optional
        The initial maximum number of stimuli the model can expect to receive.
         Default 1.
    number_critics : integer, optional
        The number of different reaction learning sets.
        Default number_actions*number_cues
    action_codes : dict with string or int as keys and int values, optional
        A dictionary used to convert between the action references used by the
        task or dataset and references used in the models to describe the order
        in which the action information is stored.
    prior : array of floats in ``[0,1]``, optional
        The prior probability of of the states being the correct one.
        Default ``ones((self.number_actions, self.number_cues)) / self.number_critics)``
    stimulus_shaper_name : string, optional
        The  name of the function that transforms the stimulus into a form the model can
        understand and a string to identify it later. ``stimulus_shaper`` takes priority
    reward_shaper_name : string, optional
        The  name of the function that transforms the reward into a form the model can
        understand. ``rewards_shaper`` takes priority
    decision_function_name : string, optional
        The name of the function that takes the internal values of the model and turns them
        in to a decision. ``decision function`` takes priority
    stimulus_shaper : Stimulus class, optional
        The class that transforms the stimulus into a form the model can
        understand and a string to identify it later. Default is Stimulus
    reward_shaper : Rewards class, optional
        The class that transforms the reward into a form the model can
        understand. Default is Rewards
    decision_function : function, optional
        The function that takes the internal values of the model and turns them in to a decision.
        Default is ``weightProb(list(range(number_actions)))``
    stimulus_shaper_properties : list, optional
        The valid parameters of the function. Used to filter the unlisted keyword arguments
        Default is ``None``
    reward_shaper_properties : list, optional
        The valid parameters of the function. Used to filter the unlisted keyword arguments
        Default is ``None``
    decision_function_properties : list, optional
        The valid parameters of the function. Used to filter the unlisted keyword arguments
        Default is ``None``
    """

    #Name = __qualname__ ## TODO: start using when moved to Python 3. See https://docs.python.org/3/glossary.html#term-qualified-name

[docs]    @classmethod
    def get_name(cls):
        return cls.__name__

    # TODO:  define and start using non_action
    
    parameter_patterns = []

    parameter_patterns = []

    def __init__(self, number_actions=2, number_cues=1, number_critics=None,
                 action_codes=None, non_action='None',
                 prior=None,
                 stimulus_shaper=None, stimulus_shaper_name=None, stimulus_shaper_properties=None,
                 reward_shaper=None, reward_shaper_name=None, reward_shaper_properties=None,
                 decision_function=None, decision_function_name=None, decision_function_properties=None,
                 **kwargs):
        """"""
        self.Name = self.get_name()
        
        self.pattern_parameters = self.kwarg_pattern_parameters(kwargs)
        for k, v in self.pattern_parameters.items():
            setattr(self, k, v)

        self.pattern_parameters = self.kwarg_pattern_parameters(kwargs)
        for k, v in self.pattern_parameters.items():
            setattr(self, k, v)

        self.number_actions = number_actions
        self.number_cues = number_cues
        if number_critics is None:
            number_critics = self.number_actions * self.number_cues
        self.number_critics = number_critics

        if action_codes is None:
            action_codes = {k: k for k in range(self.number_actions)}
        self.actionCode = action_codes

        self.defaultNonAction = non_action

        if prior is None:
            prior = np.ones(self.number_actions) / self.number_actions
        self.prior = prior

        self.stimuli = np.ones(self.number_cues)
        self.stimuliFilter = np.ones(self.number_cues)

        self.currAction = None
        self.decision = None
        self.validActions = None
        self.lastObservation = None

        self.probabilities = np.array(self.prior)
        self.decProbabilities = np.array(self.prior)
        self.expectedRewards = np.ones(self.number_actions)
        self.expectedReward = np.array([1])

        if stimulus_shaper is not None and issubclass(stimulus_shaper, Stimulus):
            if stimulus_shaper_properties is not None:
                stimulus_shaper_kwargs = {k: v for k, v in kwargs.items() if k in stimulus_shaper_properties}
            else:
                stimulus_shaper_kwargs = kwargs.copy()
            self.stimulus_shaper = stimulus_shaper(**stimulus_shaper_kwargs)
        elif isinstance(stimulus_shaper_name, str):
            stimulus_class = utils.find_class(stimulus_shaper_name,
                                              class_folder='tasks',
                                              inherited_class=Stimulus,
                                              excluded_files=['taskTemplate', '__init__', 'taskGenerator'])
            stimulus_shaper_kwargs = {k: v for k, v in kwargs.items() if k in utils.get_class_args(stimulus_class)}
            self.stimulus_shaper = stimulus_class(**stimulus_shaper_kwargs)
        else:
            self.stimulus_shaper = Stimulus()

        if reward_shaper is not None and issubclass(reward_shaper, Rewards):
            if reward_shaper_properties is not None:
                reward_shaper_kwargs = {k: v for k, v in kwargs.items() if k in reward_shaper_properties}
            else:
                reward_shaper_kwargs = kwargs.copy()
            self.reward_shaper = reward_shaper(**reward_shaper_kwargs)
        elif isinstance(reward_shaper_name, str):
            reward_class = utils.find_class(reward_shaper_name,
                                            class_folder='tasks',
                                            inherited_class=Rewards,
                                            excluded_files=['taskTemplate', '__init__', 'taskGenerator'])
            reward_shaper_kwargs = {k: v for k, v in kwargs.items() if k in utils.get_class_args(reward_class)}
            self.reward_shaper = reward_class.processFeedback(**reward_shaper_kwargs)
        else:
            self.reward_shaper = Rewards()

        if callable(decision_function):
            if decision_function_properties is not None:
                decision_shaper_kwargs = {k: v for k, v in kwargs.items() if k in decision_function_properties}
            else:
                decision_shaper_kwargs = kwargs.copy()
            self.decision_function = decision_function(**decision_shaper_kwargs)
        elif isinstance(decision_function_name, str):
            decision_function = utils.find_function(decision_function_name, 'model/decision')
            decision_function_kwargs = {k: v for k, v in kwargs.items() if k in utils.get_function_args(decision_function)}
            self.decision_function = decision_function(**decision_function_kwargs)
        else:
            self.decision_function = weightProb(list(range(self.number_actions)))

        self.parameters = {"Name": self.Name,
                           "number_actions": self.number_actions,
                           "number_cues": self.number_cues,
                           "number_critics": self.number_critics,
                           "prior": copy.copy(self.prior),
                           "non_action": self.defaultNonAction,
                           "actionCode": copy.copy(self.actionCode),
                           "stimulus_shaper": self.stimulus_shaper.details(),
                           "reward_shaper": self.reward_shaper.details(),
                           "decision_function": utils.callableDetailsString(self.decision_function)}
        self.parameters.update(self.pattern_parameters)

        # Recorded information
        self.recAction = []
        self.recActionSymbol = []
        self.recStimuli = []
        self.recReward = []
        self.recExpectations = []
        self.recExpectedReward = []
        self.recExpectedRewards = []
        self.recValidActions = []
        self.recDecision = []
        self.recProbabilities = []
        self.recActionProbs = []
        self.recActionProb = []
        self.simID = None

    def __eq__(self, other):

        # TODO: Expand this to cover the parameters properly
        if self.Name == other.Name:
            return True
        else:
            return False

    def __ne__(self, other):

        if self.Name != other.Name:
            return True
        else:
            return False

    def __hash__(self):

        return hash(self.Name)

[docs]    def action(self):
        """
        Returns the action of the model

        Returns
        -------
        action : integer or None
        """

        return self.currActionSymbol

[docs]    def observe(self, state):
        """
        Receives the latest observation and decides what to do with it

        There are five possible states:
        Observation
        Observation Action
        Observation Action Feedback
        Action Feedback
        Observation Feedback

        Parameters
        ----------
        state : tuple of ({int | float | tuple},{tuple of int | None})
            The stimulus from the task followed by the tuple of valid
            actions. Passes the values onto a processing function,
            self._updateObservation``.

        """

        events, validActions = state

        lastEvents = self.lastObservation
        self.validActions = validActions

        # If the last observation still has not been processed,
        # and there has been no feedback, then process it.
        # There may have been an action but feedback was None
        # Since we have another observation it is time to learn from the previous one
        if lastEvents is not None:
            self.processEvent(self.currAction)
            self.storeState()

        self.lastObservation = events

        # Find the reward expectations
        self.expectedRewards, self.stimuli, self.stimuliFilter = self.rewardExpectation(events)

        expectedProbs = self.actorStimulusProbs()

        # If the model is not expected to act, use a dummy action,
        # Otherwise choose an action
        lastAction = self.currAction
        if validActions is self.defaultNonAction:
            self.currAction = self.defaultNonAction
        else:
            self.currAction, self.decProbabilities = self.chooseAction(expectedProbs, lastAction, events, validActions)

        # Now that the action has been chosen, add any reinforcement of the previous choice in the expectations
        self.lastChoiceReinforcement()

[docs]    def feedback(self, response):
        """
        Receives the reaction to the action and processes it

        Parameters
        ----------
        response : float
            The response from the task after an action. Returns without doing
            anything if the value of response is `None`.
        """

        # If there is feedback
        if response is not None:
            self.processEvent(self.currAction, response)
            self.lastObservation = None
            self.storeState()

[docs]    def processEvent(self, action=None, response=None):
        """
        Integrates the information from a stimulus, action, response set, regardless
        of which of the three elements are present.

        Parameters
        ----------
        stimuli : {int | float | tuple | None}
            The stimuli received
        action : int, optional
            The chosen action of the model. Default ``None``
        response : float, optional
            The response from the task after an action. Default ``None``
        """
        self.recReward.append(response)

        # If there were any last reflections to do on the action chosen before processing the new event, now is the last
        # chance to do it
        self.choiceReflection()

        # If there was a reward passed but it was empty, there is nothing to update
        if response is not None and (np.size(response) == 0 or np.isnan(response)):
            return

        # Find the reward expectation
        expectedReward = self.expectedRewards[action]
        self.expectedReward = expectedReward

        # If there was no reward, the the stimulus is the learnt 'reward'
        if response is None:
            response = self.stimuli

        # Find the significance of the discrepancy between the response and the expected response
        delta = self.delta(response, expectedReward, action, self.stimuli)

        # Use that discrepancy to update the model
        self.updateModel(delta, action, self.stimuli, self.stimuliFilter)

[docs]    def rewardExpectation(self, stimuli):
        """Calculate the expected reward for each action based on the stimuli

        This contains parts that are task dependent

        Parameters
        ----------
        stimuli : {int | float | tuple}
            The set of stimuli

        Returns
        -------
        expectedRewards : float
            The expected reward for each action
        stimuli : list of floats
            The processed observations
        activeStimuli : list of [0, 1] mapping to [False, True]
            A list of the stimuli that were or were not present
        """

        # Calculate expectation by identifying the relevant stimuli for the action
        # First identify the expectations relevant to the action
        # Filter them weighted by the stimuli
        # Calculate the combined value
        # Return the value

        # stimuli = self.stimulus_shaper_name(lastObservation)
        return 0, stimuli, 0

[docs]    def delta(self, reward, expectation, action, stimuli):
        """
        Calculates the comparison between the reward and the expectation

        Parameters
        ----------
        reward : float
            The reward value
        expectation : float
            The expected reward value
        action : int
            The chosen action
        stimuli : {int | float | tuple | None}
            The stimuli received

        Returns
        -------
        delta
        """

        modReward = self.reward_shaper.processFeedback(reward, action, stimuli)

        return 0

[docs]    def updateModel(self, delta, action, stimuli, stimuliFilter):
        """
        Parameters
        ----------
        delta : float
            The difference between the reward and the expected reward
        action : int
            The action chosen by the model in this trialstep
        stimuli : list of float
            The weights of the different stimuli in this trialstep
        stimuliFilter : list of bool
            A list describing if a stimulus cue is present in this trialstep

        """

        # There is no model here

[docs]    def calcProbabilities(self, actionValues):
        """
        Calculate the probabilities associated with the action

        Parameters
        ----------
        actionValues : 1D ndArray of floats

        Returns
        -------
        probArray : 1D ndArray of floats
            The probabilities associated with the actionValues

        """

        # There is no model here

        return 0

[docs]    def actorStimulusProbs(self):
        """
        Calculates in the model-appropriate way the probability of each action.

        Returns
        -------
        probabilities : 1D ndArray of floats
            The probabilities associated with the action choices

        """

        return 0

[docs]    def chooseAction(self, probabilities, lastAction, events, validActions):
        """
        Chooses the next action and returns the associated probabilities

        Parameters
        ----------
        probabilities : list of floats
            The probabilities associated with each combinations
        lastAction : int
            The last chosen action
        events : list of floats
            The stimuli. If probActions is True then this will be unused as the probabilities will already be
        validActions : 1D list or array
            The actions permitted during this trialstep

        Returns
        -------
        newAction : int
            The chosen action
        decProbabilities : list of floats
            The weights for the different actions

        """

        if np.isnan(probabilities).any():
            raise ValueError("probabilities contain NaN")
        decision, decProbabilities = self.decision_function(probabilities, lastAction, trial_responses=validActions)
        self.decision = decision
        self.currActionSymbol = decision
        decisionCode = self.actionCode[decision]

        return decisionCode, decProbabilities

[docs]    def overrideActionChoice(self, action):
        """
        Provides a method for overriding the model action choice. This is used when fitting models to participant actions.

        Parameters
        ----------
        action : int
            Action chosen by external source to same situation
        """

        self.currActionSymbol = action
        self.currAction = self.actionCode[action]

[docs]    def choiceReflection(self):
        """
        Allows the model to update its state once an action has been chosen.
        """

[docs]    def lastChoiceReinforcement(self):
        """
        Allows the model to update the reward expectation for the previous trialstep given the choice made in this trialstep

        Returns
        -------

        """

[docs]    def actStimMerge(self, actStimuliParam, stimFilter=1):
        """
        Takes the parameter to be merged by stimuli and filters it by the stimuli values

        Parameters
        ----------
        actStimuliParam : list of floats
            The list of values representing each action stimuli pair, where the stimuli will have their filtered
             values merged together.
        stimFilter : array of floats or a float, optional
            The list of active stimuli with their weightings or one weight for all.
            Default ``1``

        Returns
        -------
        actionParams : list of floats
            The parameter values associated with each action

        """

        actionParamSets = np.reshape(actStimuliParam, (self.number_actions, self.number_cues))
        actionParamSets = actionParamSets * stimFilter
        actionParams = np.sum(actionParamSets, axis=1, keepdims=True)

        return actionParams

[docs]    def returnTaskState(self):
        """
        Returns all the relevant data for this model

        Returns
        -------
        results : dictionary
        """

        results = self.standardResultOutput()

        return results.copy()

[docs]    def storeState(self):
        """
        Stores the state of all the important variables so that they can be
        accessed later
        """

        self.storeStandardResults()

[docs]    def standardResultOutput(self):
        """
        Returns the relevant data expected from a model as well as the parameters for the current model

        Returns
        -------
        results : dictionary
            A dictionary of details about the

        """

        results = self.parameters.copy()

        results["simID"] = self.simID
        results["Actions"] = np.array(self.recAction)
        results["Stimuli"] = np.array(self.recStimuli).T
        results["Rewards"] = np.array(self.recReward)
        results["Expectations"] = np.array(self.recExpectations).T
        results["ExpectedReward"] = np.array(self.recExpectedReward).flatten()
        results["ExpectedRewards"] = np.array(self.recExpectedRewards).T
        results["ValidActions"] = np.array(self.recValidActions).T
        results["Decisions"] = np.array(self.recDecision)
        results["UpdatedProbs"] = np.array(self.recProbabilities).T
        results["ActionProb"] = np.array(self.recActionProb)
        results["DecisionProbs"] = np.array(self.recActionProbs)

        return results

[docs]    def storeStandardResults(self):
        """
        Updates the store of standard results found across models
        """

        self.recAction.append(self.currAction)
        self.recActionSymbol.append(self.currActionSymbol)
        self.recValidActions.append(self.validActions[:])
        self.recDecision.append(self.decision)
        self.recExpectations.append(self.expectations.flatten())
        self.recExpectedRewards.append(self.expectedRewards.flatten())
        self.recExpectedReward.append(self.expectedReward.flatten())
        self.recStimuli.append(self.stimuli)
        self.recProbabilities.append(self.probabilities.flatten())
        self.recActionProbs.append(self.decProbabilities.copy())
        self.recActionProb.append(self.decProbabilities[self.currActionSymbol])

[docs]    def params(self):
        """
        Returns the parameters of the model

        Returns
        -------
        parameters : dictionary
        """

        return self.parameters.copy()

    def __repr__(self):

        params = self.params()
        name = params.pop('Name')

        label = ["{}(".format(name)]
        label.extend(["{}={}, ".format(k, repr(v)) for k, v in params.items()])
        label.append(")")

        representation = ' '.join(label)

        return representation

[docs]    def setsimID(self, simID):
        """

        Parameters
        ----------
        simID : float

        Returns
        -------

        """

        self.simID = simID


[docs]    @classmethod
    def pattern_parameters_match(cls, *args):
        """
        Validates if the parameters are described by the model patterns

        Parameters
        ----------
        *args : strings
            The potential parameter names

        Returns
        -------
        pattern_parameters : list
            The args that match the patterns in parameter_patterns
        """

        pattern_parameters = []
        for pattern in cls.parameter_patterns:
            pattern_parameters.extend(sorted([k for k in args if re.match(pattern, k)]))

        return pattern_parameters

[docs]    def kwarg_pattern_parameters(self, kwargs):
        """
        Extracts the kwarg parameters that are described by the model patterns

        Parameters
        ----------
        kwargs : dict
            The class initialisation kwargs

        Returns
        -------
        pattern_parameter_dict : dict
            A subset of kwargs that match the patterns in parameter_patterns
        """

        pattern_parameter_keys = self.pattern_parameters_match(*kwargs.keys())

        pattern_parameter_dict = collections.OrderedDict()
        for k in pattern_parameter_keys:
            pattern_parameter_dict[k] = kwargs.pop(k)

        return pattern_parameter_dict