# Copyright (c) 2009 Tim Freeman # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # (This is the standard MIT License, copied from # http://www.opensource.org/licenses/mit-license.php on 24 Apr 2007.) #desc Code for inferring an agent's utility function from his #desc voluntary behavior and perceptions. Uses mind_body to first #desc infer the voluntary behavior. from bits import ensure, Doesnt_match, argmax, Ambiguous_argmax from mind_body import Mind_body_problem, Mind_body_explanation import traceback # An explanation of the world where we assume that the entities with # minds have a utility function and act to maximize their utility. class Infer_utility_explanation(Mind_body_explanation): def __init__(self, # Hmm, my hope that this subclassing thing would be # seamless and elegant just fell down when I had to # enumerate all this. Yuck. explanations=None, compute_behavior=None, nonmind_physics=None, compute_perceptions=None, mind_physics=None, beliefs=None, compute_utility=None, **kwargs): if explanations is None: assert compute_behavior is not None # The first four things listed here have to match up with # the positions of them in the init method of # Mind_body_explanation. Not very modular. Sorry. explanations = [compute_behavior, nonmind_physics, compute_perceptions, mind_physics, beliefs, compute_utility] assert explanations is not None # Because explanations isn't none, the fact that we've pulled # all the interesting keyword arguments off of the argument list # presented to Mind_body_explanation.__init__ doesn't matter. Mind_body_explanation.__init__(self, explanations=explanations, **kwargs) def beliefs_program(self): return self.explanations[4] def compute_utility_program(self): return self.explanations[5] # Returns a Python pair (bnab, bnms). # bnab is the believed non-A behaviors, which is a dict mapping agent id's # other than A to behaviors, # bnms is the believed non-mind state, which is a tape suitable as # an input to nonmind_physics. def beliefs(self, A, possibility, mind_state, all_ids): # A is the id of the agent we're inferring beliefs for. It's # a tape. # possibility is a number between 0 and 2**possibility_bits, # so A can do reasoning under uncertainty. # mind_state is a tape with A's mind_state. # all_ids is a list of all agent id's, in order, so we can # parse the output from running the beliefs program. p = self.beliefs_program() result_tape = p.run( p.build_tuple(A, p.num_to_value(possibility), mind_state)) bnab = {} for i in range(len(all_ids)): if all_ids[i] != A: bnab[all_ids[i]] = p.nth(result_tape, i) bnms_tape = p.nth(result_tape, len(all_ids)) return (bnab, bnms_tape) # Compute the given agent's utility, assuming the given nonmind_state. # Returns an integer. def compute_utility(self, A, nonmind_state): p = self.compute_utility_program() return p.value_to_num(p.run(p.build_tuple(A, nonmind_state))) # Compute what we think agent A would have done, given our guesses # about his beliefs and utility function. # This is the Expected-Utility-Maximizer in the big diagram in the paper. def optimal_behavior(self, A, possible_behaviors, possibility_bits, all_ids, mind_state): def expected_utility(b): total_utility = 0 for possibility in range(1L< 0 # possible_ai_behaviors is a list of what the AI could do. We # need to know this if we're to choose the best next behavior # among what is possible. Each element of this is a tape. assert possible_ai_behaviors is not None self.possible_ai_behaviors = possible_ai_behaviors # possible_ai_perceptions is a list of what the AI could # possibly perceive. The AI needs to know this so it can plan for all # possibilities it may encounter. assert possible_ai_perceptions is not None self.possible_ai_perceptions = possible_ai_perceptions # possible_non_ai_behaviors is a list of the behaviors that a non-AI # agent could possibly take. We need to know that because the # right value for compute_utility gives the highest utility # possible behavior equal to the actual behavior. For # simplicity we assume that all agents other than the ai have the # same set of possible behaviors. self.possible_non_ai_behaviors = possible_non_ai_behaviors # utility_bits is the number of bits of output we permit the # utility function to have. We have to have a limit here, # since planning around unbounded utilities presents problems. self.utility_bits = utility_bits # possibility_bits is the log of the number of possibilities # the AI is willing to imagine that other agents consider, # when the other agents are doing planning in the presence of # uncertainty. The run time is exponential in possibility_bits. # We assume the possibilities all have the same probability. # If you want to represent possibilities with different # probabilities, just repeat some of them. self.possibility_bits = possibility_bits self.extra_match_condition = extra_match_condition def check_state(self, state, explanation): Mind_body_problem.check_state(self, state, explanation) for A in self.other_agent_ids: optimal = explanation.optimal_behavior( A=A, possible_behaviors = self.possible_non_ai_behaviors, possibility_bits = self.possibility_bits, all_ids = self.all_agent_ids, mind_state = state.mind_states[A]) # The apparently-optimal behavior has to match the # behavior we inferred from our world model, otherwise it's the # wrong utility function. #if optimal != state.behaviors[A]: # print "Failing in check_state, optimal is %r, state.behaviors[A] is %r" % (optimal, state.behaviors[A]) ensure(optimal == state.behaviors[A]) # Use the explanation to simulate from the beginning to the # current time, then run given plan starting at the current state # until we reach the end of the plan. Return the final state when # the plan is done. def run_plan(self, behaviors, explanation, plan): # This code has to work even if there are no ai_behaviors yet. # If there are no ai_behaviors and plan is None, then we # would return a state that doesn't have the perception and # behavior filled in. But the plan will only be None if we # have zero steps before our planning horizon, so we shouldn't # get here anyway. s = None for i in range(0, len(behaviors)): s = self.next_state(explanation, s) assert s.timestep == i self.fill_in_perception(explanation, s) self.fill_in_behaviors(explanation, s, behaviors[i]) #print "Recapitulating, state is %r" % (s,) for i in range(len(behaviors), self.last_timestep()): s = self.next_state(explanation, s) plan.assert_has_depth(self.last_timestep() - s.timestep) assert s.timestep == i self.fill_in_perception(explanation, s) perception = s.perceptions[self.ai_agent_id] (behavior, plan) = plan.step(perception) self.fill_in_behaviors(explanation, s, behavior) #print "At next timestep, behavior was %r, state is %r" % (behavior, s,) plan.assert_has_depth(0) assert s.timestep == self.last_timestep() - 1 # Get the final perceptions, since that influences the # final utility. s = self.next_state(explanation, s) self.fill_in_perception(explanation, s) # Don't fill in the behavior. assert s.timestep == self.last_timestep() return s # Return if the explanation is consistent with itself and with the # observations made so far, or raise Doesnt_match if it doesn't. def matches(self, explanation): try: #print "Top of infer_utility.matches." Mind_body_problem.matches(self, explanation) #print "Passed mbp.matches." if self.extra_match_condition is not None: self.extra_match_condition(explanation) #print "Passed extra_match_condition." except Doesnt_match: #print "The explanation %r did not match" % (explanation,) raise # Return a dictionary mapping each agent id to the utility for # that agent, given a state and an explanation. This # is only used for states at the planning horizon. # This returns a mapping from agent id's to integers. def final_utility(self, explanation, state): def compute_utility(A, nonmind_state): return explanation.compute_utility(A, nonmind_state) result = {} for agent in self.other_agent_ids: result[agent] = compute_utility(agent, state.nonmind_state) return result # The timestep of the end of the planning horizon. This is # exclusive. In other words, the last step we worry about has # timestep self.last_timestep() - 1. def last_timestep(self): return len(self.ai_behavior) + self.horizon