# Copyright (c) 2009 Tim Freeman # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # (This is the standard MIT License, copied from # http://www.opensource.org/licenses/mit-license.php on 24 Apr 2007.) #desc Given the ability to infer utility functions for the known #desc agents in the world (typically from infer_utility.py) and a plan #desc for combining the utilities of the individual agents into a #desc grand total (from utility_combiners.py), figure out what to do next. from bits import Doesnt_match, argmax from physics import laws_of_physics from plan_generators import exhaustive, dict_to_plan def multiply_utility_dict(const, d): result = {} for agent in d: result[agent] = const * d[agent] return result def add_utility_dict(d1, d2): result = {} for agent in d1: result[agent] = d1[agent] + d2[agent] return result class Planner(object): __slots__ = ["problem", "explanations", "eps", "plan_generator", "respect_start", "do_nothing_action"] def __init__(self, problem_class=None, eps=None, plan_generator=exhaustive, respect_start=None, do_nothing_action=None, possible_ai_behaviors=None, **kwargs): self.do_nothing_action = do_nothing_action assert do_nothing_action is not None assert do_nothing_action in possible_ai_behaviors self.plan_generator = plan_generator assert self.plan_generator is not None self.problem = problem_class( possible_ai_behaviors=possible_ai_behaviors, extra_match_condition=self.extra_match_condition, **kwargs) assert self.problem is not None # We can't choose a next action if there are no more actions # to take before the planning horizon. assert self.problem.horizon > 0 # respect_start is the beginning of the period of time that # we're being respectful. Used by do_nothing_utility_dict, # since disrespect is defined to be harming people worse than # doing nothing since the beginning of the plan. self.respect_start = respect_start assert respect_start is not None assert respect_start >= 0 # We can't start the respect interval in the future. assert respect_start <= len(self.problem.ai_behavior) assert eps is not None # We have one more perception than action, so our plan from # here is just an action and a subsequent plan. # Each explanation, when run against a plan, gets into a # specific final state. We can compute utilities for these. # The explanations have probabilities, so we can compute # expected utilities for plans. Given a perception, take the # action that leads to the plan with the highest expected utility. # We should have one more perception than action. # The call to laws_of_physics immediately below can be slow. # explanations is used in plan_utility_map. self.explanations = laws_of_physics(self.problem, eps) # If we don't have any explanations, we can't make meaningful # decisions. This should only only be possible during test scenarios. # We can't really run non-test scenarios, so it's possible always! assert len(self.explanations) > 0 pass # Manipulate bugs in the coverage analyzer. def all_plans(self, horizon): problem = self.problem return self.plan_generator(perceptions=problem.possible_ai_perceptions, behaviors=problem.possible_ai_behaviors, horizon=horizon) def check_reasonable_utilities(self, state, explanation): problem = self.problem for A in self.problem.other_agent_ids: # Fail if the utility cannot be computed. utility = explanation.compute_utility(A, state.nonmind_state) assert type(utility) == int or type(utility) == long \ or type(utility) == float # Fail if the utility is out of bounds. # We cannot really have negative utilities with the # current code, since when we convert tapes to # integers we interpret them as unsigned integers. maxu = 1L << problem.utility_bits if utility < 0 or utility >= maxu: raise Doesnt_match, "Utility %r is not between 0 and %r" % ( utility, maxu) # Code that gets added to "matches". def extra_match_condition(self, explanation): problem = self.problem # The explanation has to give some predicted final state and # utility for what would have happened if the AI did nothing # retroactive to respect start, going from there up to the end # of the planning horizon. respect_state = problem.run_plan(self.respect_relevant_behaviors(), explanation, dict_to_plan(None)) self.check_reasonable_utilities(respect_state, explanation) # If the explanation fails to give predicted utilities for all # agents other than the AI and all possible AI behaviors up # until the time horizon, the AI can't use it as a basis for # deciding what to do, so we have to say it doesn't match. for plan in self.all_plans(problem.horizon): state = problem.run_plan(problem.ai_behavior, explanation, plan) self.check_reasonable_utilities(state, explanation) def zero_utility_dict(self): result = {} for agent in self.problem.other_agent_ids: result[agent] = 0.0 return result # Given a plan, a bunch of AI behaviors, a bunch of explanations, # and a map from explanations to the probability we'll use for # each of them, return a dict mapping agent id's to their expected # utility for the given plan. def plan_utility_map(self, plan, behaviors): problem = self.problem this_plan_utility = self.zero_utility_dict() assert len(behaviors) <= problem.last_timestep() for e in self.explanations: #print "Considering explanation %r" % (e,) s = problem.run_plan(behaviors, e, plan) #print "Final state for plan %r is %r" % (plan, s) fu = problem.final_utility(e, s) #print "Final utility dict is %r" % (fu,) #print "relative_probability is %r" % (self.relative_probability(e),) this_plan_utility = add_utility_dict(this_plan_utility, multiply_utility_dict( e.measure(), fu)) #print "Total utility for behaviors %r and plan %r is %r" % (behaviors, plan, this_plan_utility) return this_plan_utility def respect_relevant_behaviors(self): problem = self.problem # We did what we did from time 0 (inclusive) to respect_start # (exclusive). behaviors = problem.ai_behavior[0:self.respect_start] # From respect_start to the end of the planning horizon, we # didn't and won't do anything. for i in range(self.respect_start, problem.last_timestep()): behaviors.append(self.do_nothing_action) return behaviors # Return the utility for each agent assuming that the AI did and # will do nothing between respect_start and the end of the # planning horizon. def do_nothing_utility_dict(self): return self.plan_utility_map(dict_to_plan(None), self.respect_relevant_behaviors()) # This method returns what the AI chooses to do next. # In the normal case when testing, you'll probably want paranoid # to be True. This causes an error if the AI perceives a choice # between distinct equally good alternatives. For "real life", # which this code will never see, we don't care which equally good # alternative the AI takes. If we cared we'd change the # utility funcion to represent the fact that we care. Thus # paranoid should be False in that situation. def next_action(self, utility_combiner, paranoid=True): problem = self.problem # The horizon is the number of behaviors the plan describes. # It's one less than the horizon passed in because the code # right here is determining one of the behaviors. plans = self.all_plans(problem.horizon - 1) #print "In the planner, we have %d plans" % (len(plans),) assert len(plans) > 0 dnud = self.do_nothing_utility_dict() # Set best_action to the action that leaves us with the best # plan. # We have to choose the plan before we choose the explanation, # since we don't know which explanation is true. def behavior_utility(behavior): #print "Evaluating utility for %r" % (behavior,) behaviors = problem.ai_behavior + [behavior] # The utility of the behavior is the utility of the best # plan that can be followed after doing it. # We don't need to know the best plan. All we need to # know is its utility. # TODO We might have a source of conflict between the AI # and the other agents if the AI's planning horizon is # longer than the planning horizon of the other agents. # We would ideally fix this by inferring the planning # horizon of the other agents from their behavior and # treating each appropriately. I don't know how to do that. def plan_utility(p): return utility_combiner.ai_utility( self.plan_utility_map(p, behaviors), dnud) result = max([plan_utility(p) for p in plans]) #print "Utility for %r is %r" % (behavior, result) return result return argmax(behavior_utility, problem.possible_ai_behaviors, paranoid=paranoid) # This chooses one next action for the AI, given its observations so # far, and an impossibly powerful machine to run this code. def choose_next_action(utility_combiner, eps=None, problem_class = None, paranoid = True, **kwargs): assert problem_class is not None planner = Planner(problem_class=problem_class, eps = eps, **kwargs) return planner.next_action(utility_combiner = utility_combiner, paranoid=paranoid) # Weaknesses: # This doesn't support one person caring what another person thinks. # (Since people generally don't know what other people think, # concerns of this kind might be regarded as neurotic, so perhaps # this isn't a bug. It does support one person caring about what # another person might eventually do.) # The set of possible behaviors and perceptions of non-AI agents is # given. Thus, it's unclear what would happen if the AI or other # entities can do neurosurgery or uploading in such a way that after the # modification, the modified agent's set of possible behaviors or # perceptions is expanded. # This code is given the number of agents. It would have to # be changed if it must figure out how many people there are. # Therefore this code doesn't understand birth, since that's a change # to the number of agents. # It should understand death fairly well. It's an inescapable # mind-state. Death doesn't have any obvious special effects on # the dead agent's inferred utility function, so the AI # would continue to be influenced by it's guess about someone's # desires after they die. # We avoid the possibility of infinitely delayed gratification by having # planning cycles. This is clunky and might lead to odd behavior # from the AI near the end of the planning cycle. # The present scheme forces everyone to experience the same planning # horizon on the part of the AI. Ideally the AI should have the # option of delaying gratification for different people by different # amounts. This way impulsive people are more likely to perceive the AI as # useful to them.