import orbital.algorithm.template.*; import orbital.logic.functor.Function; import orbital.robotic.*; import orbital.robotic.Map; import java.util.*; import java.util.List; import java.io.*; import java.awt.*; import orbital.math.*; import javax.swing.*; /** * Robot navigation. A robot navigates through a labyrinth with either * deterministic or non-deterministic actions searching for a goal. * Tumbling through the maze he will learn to find better ways after * some trials. Essentially, the robot is blind and can only feel its * way through the labyrinth. But it has a little memory, and will * display which ways it thinks about going along. Continuously, it * reaches the optimal path. * @internal note that reinforcement learning problems are MDPs with c(s,a) = - E[rt+1|st=s,at=a] * @todo spawn a new example "Race Track Problem" [Barto et al. 1993] * @TODO: introduce turning backwards so that we do not need to turn twice. And what's its reaction? * @todo provide exact settings for more labyrinths. Implement field for "LOST" with negative reward. */ public class RobotNavigation implements MarkovDecisionProblem { private static int MOVE_DELAY = 400; private static final int TRIALS = 5000; private static final boolean DETERMINISTIC = true; public static void main(String arg[]) throws IOException { InputStream input = new FileInputStream(arg.length > 0 ? arg[0] : "default.lab.txt"); RobotNavigation nav = new RobotNavigation(input); input.close(); MarkovDecisionProcess planner; // here we decide which exact MDP planning algorithm to use // the single difference in using another planning algorithm // would only concern the constructor call planner = new RealTimeDynamicProgramming(nav.getHeuristic()); //planner = new GaussSeidelDynamicProgramming(nav.getHeuristic(), nav.allStates(), 0.1); JFrame f = new JFrame(); f.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); f.add(nav.getView()); f.pack(); f.setVisible(true); // really obtain a plan Function plan = (Function)planner.solve(nav); nav.followPlan(plan, TRIALS); } /** * Set the delay time between two moves (for visual animation). */ protected static void setDelay(int moveDelay) { RobotNavigation.MOVE_DELAY = moveDelay; } /** * Declaration of the characters occurring in the labyrinth file. */ public static final Character GOAL = new Character('G'); public static final Character WALL = new Character('#'); public static final Character ROBOT = new Character('R'); public static final Character LOST = new Character('L'); // original problem's data /** * the original problem's map. */ private Map originalMap; /** * the heuristic goal position on map. */ private Position goalPosition; // for performing navigation /** * the robot's current position on the map of view. */ private Moving currentPosition; /** * the current map view. view.getMap() may differ from originalMap in * that the robot moves on view but not on the original originalMap. So * contrary to originalMap, view.getMap() will change. */ private MapView view; public RobotNavigation() { this.originalMap = null; this.goalPosition = null; this.currentPosition = null; this.view = new RobotMapView(null); } /** * Create a new robot navigation problem for the labyrinth read * from a stream. */ public RobotNavigation(InputStream labyrinth) throws IOException { this(); //@internal reuse the loading ability of our view, even though we really want to set originalMap view.load(labyrinth); Map originalMap = view.getMap(); Moving start = new Moving(searchAny(originalMap, ROBOT), Direction.South); //@internal cloning could be unnecessary, here init((Map) originalMap.clone(), (Moving) start.clone()); } /** * Re-initialize the map. */ public void init(Map map, Moving start) { this.originalMap = map; this.currentPosition = start; this.goalPosition = searchAny(getOriginalMap(), GOAL); // view only gets a clone of the original map, since we want the robot to wander on it view.setMap((Map) getOriginalMap().clone()); } /** * the original problem's map. */ protected Map getOriginalMap() { return originalMap; } /** * the problem's current map (also on display). */ protected Map getMap() { return view.getMap(); } /** * the current map view. view.getMap() may differ from originalMap in * that the robot moves on view but not on the original originalMap. So * contrary to originalMap, view.getMap() will change. */ public MapView getView() { return view; } public boolean isSolution(Object state) { Position pos = (Position) state; return GOAL.equals(getMap().get(pos)) || goalPosition.equals(pos) // negative "solution" when we lost the game || LOST.equals(getMap().get(pos)); } public Iterator actions(Object state) { return transitions.keySet().iterator(); } /** * A map containing the possible transitions and transition probabilities * for each intended action. */ private static final java.util.Map transitions = new HashMap(); static { if (DETERMINISTIC) { // deterministic actions // we append a transition for resting at a place because this can happen when moving on a wall transitions.put("FF", new Transition[] {new Transition("FF", 1), new Transition(" ", 0)}); transitions.put("l", new Transition[] {new Transition("l", 1)}); transitions.put("r", new Transition[] {new Transition("r", 1)}); //transitions.put("b", new Transition[] {new Transition("b", 1), new Transition(" ", 0)}); transitions.put(" ", new Transition[] {new Transition(" ", 1)}); } else { // non-deterministic actions transitions.put("FF", new Transition[] {new Transition("FF", .73), new Transition("/LF/LF", .05), new Transition("/RF/RF", .05), new Transition("/LF/LFl", .05), new Transition("/RF/RFr", .05), new Transition("l", .03), new Transition("r", .03), new Transition(" ", 0.01)}); transitions.put("l", new Transition[] {new Transition("l", .8), new Transition("b", .1), new Transition("r", .05), new Transition(" ", .05)}); transitions.put("r", new Transition[] {new Transition("r", .8), new Transition("b", .1), new Transition("l", .05), new Transition(" ", .05)}); //transitions.put("b", new Transition[] {new Transition("b", .4), new Transition("l", .1), new Transition("r", .1), new Transition(" ", .1), new Transition("lFF", .05), new Transition("rFF", .05), new Transition("lFFl", .05), new Transition("rFFr", .05), new Transition("/LF/LFl", .05), new Transition("/RF/RFr", .05), new Transition(" ", 0)}); transitions.put(" ", new Transition[] {new Transition(" ", 1)}); } } public Iterator states(Object action, Object state) { Moving s = (Moving) state; Set t = new HashSet(); Transition[] ts = (Transition[]) transitions.get(action); for (int i = 0; i < ts.length; i++) { Moving sp = (Moving) s.clone(); sp.move(ts[i].move); // version with information caching: //t.add(new Option(sp, transitionProbability(sp, s, action, ts[i]), getCost(s, action)));# t.add(sp); } return t.iterator(); } public TransitionModel.Transition transition(Object action, Object state, Object statep) { return new MarkovDecisionProblem.DefaultTransition(transitionProbability((Moving)statep, (Moving)state, action), getCost((Moving)state, action)); } private double transitionProbability(Moving sp, Moving s, Object action) { // check special cases, first // adjust transition model such that intended transitions into walls or off board are assigned a probability of 1 of resting at the original state // find out whether the action would normally lead us straight into a wall if (wouldBounceWall(s, action)) // bounced against a wall return sp.equals(s) ? 1 : 0; // goal and lost states are terminal states. Then no action leads anywhere else. // (note that this is important for the undiscounted case to converge) if (GOAL.equals(getMap().get(s)) || LOST.equals(getMap().get(s))) return sp.equals(s) ? 1 : 0; // normal case Transition[] ts = (Transition[]) transitions.get(action); for (int i = 0; i < ts.length; i++) // would we get to sp by this move? if (ts[i].explains(s, sp)) return ts[i].probability; return 0; } /** * Check whether the given action in the given state would lead us into a wall or off board. */ private boolean wouldBounceWall(Moving state, Object action) { String move = ((Transition[]) transitions.get(action))[0].move; Moving moving = (Moving) state.clone(); // move as intended, i.e. as if the world were deterministic // move and check single intermediate steps for (int i = 0; i < move.length(); i++) { moving.move(move.charAt(i)); if (!getMap().inRange(moving) || WALL.equals(getMap().get(moving))) { return true; } } return false; } private double getCost(Moving state, Object action) { // where the action's target is Moving target = ((Moving) state.clone()); target.move((String)action); if (!getMap().inRange(target) || WALL.equals(getMap().get(target))) // bounced against a wall return 70; if (LOST.equals(getMap().get(target))) // bound to lose when this succeeds return 200; return 2; // or return action.length(); } // implementation helpers /** * Search all positions on map that equal o. */ protected static List searchAll(Map map, Object o) { List l = new LinkedList(); Rectangle r = map.getBounds(); for (int y = r.y; y < r.height; y++) for (int x = r.x; x < r.width; x++) { Position p = new Position(x, y); if (o.equals(map.get(p))) l.add(p); } return !l.isEmpty() ? l : null; } /** * Search any position on map that equals o. * Ties break randomly. */ protected static Position searchAny(Map map, Object o) { List l = searchAll(map, o); if (l == null || l.isEmpty()) throw new NoSuchElementException("Labyrinth does not contain " + o); return (Position) l.get((int) (Math.random() * l.size())); } /** * Get the set S of all states. * For academic toy algorithms, only. */ protected Set allStates() { Map map = getOriginalMap(); // get all states (even states that can never be reached, at all) Set states = new HashSet(); for (int i = 1; i < map.getDimension().width; i+=2) { for (int j = 1; j < map.getDimension().height; j+=2) { Moving s = new Moving(new Position(i, j), Direction.South); if (WALL.equals(map.get(s))) continue; for (int k = 0; k < 4; k++) { s.move(Move.Left); states.add(s.clone()); } } } return states; // get all reachable states, only //return MarkovDecisionProblemSearch.getReachableStates(this, currentPosition); } // additional navigation methods protected void perform(Object action) { String a = (String) action; // chose a non-deterministic transition double dice = Math.random(); Transition[] ts = (Transition[]) transitions.get(action); double distribution = 0; for (int i = 0; i < ts.length; i++) { distribution += ts[i].probability; if (dice <= distribution) { a = ts[i].move; break; } } Moving moving = (Moving) currentPosition.clone(); moving.move(a); //System.out.println(" --"+a+"--> "+moving); if (!getMap().inRange(moving) || WALL.equals(getMap().get(moving))) if (action.equals(a)) // intended illegal move throw new InternalError("intended illegal move " + action + " to " + (getMap().inRange(moving) ? getMap().get(moving) : "OutOfBoundsException") + "@" + moving); else // accidentally hit the wall // then we rebounced to original position moving = currentPosition; view.setMap(currentPosition, new Character(' ')); currentPosition = moving; char robot; switch (moving.getDirection().getDirection()) { case Direction.North: robot = '^'; break; case Direction.South: robot = 'v'; break; case Direction.East: robot = '>'; break; case Direction.West: robot = '<'; break; default: robot = 'R'; } view.setMap(currentPosition, new Character(robot)); try {Thread.sleep(MOVE_DELAY);} catch(InterruptedException x) {} } protected Object observe() { return currentPosition; } /** * follow the plan several times. */ protected void followPlan(Function plan, int trials) { // multiple trials of following the plan (for Trial-RTDP to achieve convergence) for (int i = 0; i < trials; i++) { try {Thread.sleep(2 * MOVE_DELAY);} catch(InterruptedException x) {} Moving start = new Moving(searchAny(getOriginalMap(), ROBOT), Direction.South); init((Map) getOriginalMap().clone(), (Moving) start.clone()); followPlan(plan, (Moving) start.clone()); } } /** * follow the plan */ protected void followPlan(Function plan, Moving start) { for (Object state = start; !isSolution(state); state = observe()) { perform(plan.apply(state)); } } protected void followPlan(Function plan) { followPlan(plan, new Moving(searchAny(getOriginalMap(), ROBOT), Direction.South)); } /** * follow the plan and return a trace of the states, visited. */ protected List tracePlan(Function plan) { List trace = new LinkedList(); Moving start = new Moving(searchAny(getOriginalMap(), ROBOT), Direction.South); init((Map) getOriginalMap().clone(), (Moving) start.clone()); Object state = start; trace.add(state); while (!isSolution(state)) { perform(plan.apply(state)); state = observe(); trace.add(state); } return trace; } // heuristic function /** * use euclidian distance plus turning cost to the goal as heuristic function. */ protected Function getHeuristic() { return new Function() { final ValueFactory vf = Values.getDefault(); public Object apply(Object state) { Moving s = (Moving) state; double cost = manhattan(s, goalPosition);//s.subtract(goalPosition).length(); // whether we are on the wrong x/y coordinate. cost += 2*turnDistance(s, goalPosition); //System.out.println("\t\th("+s+")\t= " + cost); return vf.valueOf(cost); } }; } // utilities /** * The turn distance of m to p. * If m is at the center position '>' looking east, then this will return the numbers * on the corresponding fields. *
* 2...211...1 * . ... . * 2...211...1 * 2...2>0...0 * 2...211...1 * . ... . * 2...211...1 **/ public static int turnDistance(Moving m, Position p) { Position t = m.getDirection().getDirectionVector(); // component-wise normalized delta (p-m normalized in d∞-Norm). Position d = new Position(MathUtilities.sign(p.x - m.x), MathUtilities.sign(p.y - m.y)); if (t.equals(d)) return 0; // s = t*d int s = t.x * d.x + t.y * d.y; return s < 0 ? 2 : 1; } /** * Get the manhattan distance d1. * Where d1 is the metric induced by the 1-norm. * Visually speaking, the manhatten distance is the distance following * the rectangular streets of manhattan * = delta x + delta y. */ private static int manhattan(Position a, Position b) { return Math.abs(b.x - a.x) + Math.abs(b.y - a.y); } /** * Stochastic transition holder. */ private static class Transition { String move; double probability; public Transition(String move, double probability) { this.move = move; this.probability = probability; } public Transition() { this(null, 0); } /** * Whether this transition would lead to source moving to destination. */ public boolean explains(Moving source, Moving destination) { Moving m = (Moving) source.clone(); m.move(move); // we would get to t by this move m? return destination.equals(m); } } } // Utilities class RobotMapView extends MapView { public RobotMapView(Map map) { super(map); } protected Object newField(char ch) { return new Character(ch); } protected char fieldValue(Object field) { return ((Character) field).charValue(); } } // /** // * Extract some features of a Markov Decision Problem, and formulate it // * as a state-model. // * This approach restricts the solution abilities, of course, but is convenient // * for simple sub tasks such as finding all reachable states. // * This class is some kind of deterministic projection into state space. // */ // class MarkovDecisionProblemSearch implements GeneralSearchProblem { // private final MarkovDecisionProblem problem; // private final Object initialState; // /** // * The set of states build up of states that reachable from initialState. // */ // private Set reachable; // public MarkovDecisionProblemSearch(MarkovDecisionProblem problem, Object initialState) { // this.problem = problem; // this.initialState = initialState; // } // /** // * get all states that are reachable from initialState in the given MDP. // * Reachable are those states for that an action policy exists that has a // * non-zero probability of finally reaching them. // */ // public static Set getReachableStates(MarkovDecisionProblem problem, Object initialState) { // MarkovDecisionProblemSearch p = new MarkovDecisionProblemSearch(problem, initialState); // new DepthFirstSearch().solve(p); // return p.getReachableStates(); // } // /** // * Get the computed set of all reachable states. // * @return the reachable set computed in the last run. // */ // Set getReachableStates() { // return reachable; // } // public Object getInitialState() { // this.reachable = new HashSet(); // return initialState; // } // public boolean isSolution(Option n) { // reachable.add(n.getState()); // return false; // } // /** // * Returns states that are reachable under any actions at all, // * regardless of transition probability. // */ // public Iterator expand(Option n) { // Set destinations = new HashSet(); // for (Iterator i = problem.actions(n.getState()); i.hasNext(); ) { // Object action = i.next(); // Collection states = problem.nextStates(n.getState(), action); // double cost = problem.getCost(n.getState(), action); // for (Iterator j = states.iterator(); j.hasNext(); ) { // Object st = j.next(); // // if the state has not already been reached but has a non-zero transition probability // if (!reachable.contains(st) && problem.transitionProbability(st, n.getState(), action) != 0) // destinations.add(new Option(st, action, n.getCost() + cost)); // } // } // return destinations.iterator(); // } // public double getCost(Option n) { // return problem.getCost(n.getState(), n.getAction()); // } // }