Reinforcement

(*

 * lablai - An ML Artificial Inteligence library

 * Copyright (C) 2006  Till Crueger

 *

 * This library is free software; you can redistribute it and/or

 * modify it under the terms of the GNU Lesser General Public

 * License as published by the Free Software Foundation; either

 * version 2 of the License, or (at your option) any later version.

 *

 * This library is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

 * Lesser General Public License for more details.

 *

 * You should have received a copy of the GNU Lesser General Public

 * License along with this library; if not, write to the Free Software

 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 *)



(* File $RCSfile$ *)

(* last edited by $Author: till_crueger $ *)

(* $Date: 2008-01-11 15:25:50 +0100 (Fr, 11 Jan 2008) $, $Revision: 35 $ *)





let epsilon = 0.01;;



class ['a,'b] q_learner actions' (start_state : 'b )=

   let num_actions' = List.length actions' in

   let _ = Random.self_init () in

   object (self)

      val mutable states = [(start_state,1)]

      val mutable num_states = 1

      val actions : 'a array = Array.of_list actions'

      val num_actions = num_actions'

      val mutable q_table= [(start_state,Array.make (num_actions') 0.0)]

      val mutable current_state = start_state

      val mutable last_action = 0

      val mutable delta = 0.5

      val mutable alpha = 0.5

      val mutable beta = 0.3

      

      method private get_best_action () = 

         let action_table = List.assoc current_state q_table in

         let best = ref action_table.(0) and

               winners = ref [0] in

         for i = 1 to num_actions-1 do

            if (action_table.(i) -. !best) > epsilon then

            (

               best := action_table.(i);

               winners := [i]

            )

            else

            (

               if abs_float (action_table.(i) -. !best) <= epsilon then

               (

                  winners := i :: !winners

               )

               else

               ()

            )

         done;

         let num_winners = List.length !winners in

         let index = Random.int num_winners in

         List.nth !winners index

      

      method get_action () =

         let chosen =

            let rand = Random.float 1.0 in

            if rand > beta then

               self#get_best_action ()

            else

               Random.int (Array.length actions)

         in

         last_action <- chosen;

         actions.(chosen)

      

      method percieve_result percept reward =

         let action_table = 

            try

               List.assoc percept q_table

            with Not_found ->

               num_states <- num_states +1;

               let arry = Array.make (num_actions') 0.0 in

               q_table <- (percept,Array.make (num_actions') 0.0) :: q_table;

               arry

         in

         let max = Array.fold_right max action_table  action_table.(0) in

         let new_q = reward +. delta *. max in

         let action_table = List.assoc current_state q_table in

         action_table.(last_action) <-  

               alpha *. action_table.(last_action) +. (1.0-.alpha) *. new_q;

         current_state <- percept

         

   end

   ;;

   

(*

class ['a] neuro_q_learner actions' (start_state : float array) layout =

   let sensors = Array.length start_state in

   let num_actions' = List.length actions' in

   let _ = Random.self_init () in

   object (self)

      val actions : 'a array = Array.of_list actions'

      val num_actions = num_actions'

      val mutable q_table=  Mlp.make_approximator sensors num_actions' layout

      val mutable current_state = Array.copy start_state

      val mutable last_action = 0

      val mutable delta = 0.5

      val mutable alpha = 0.1

      val mutable beta = 0.3

      

      method private get_best_action () = 

         let action_table = Mlp.evaluate q_table current_state in

         let best = ref action_table.(0) and

               winners = ref [0] in

         for i = 1 to num_actions-1 do

            if (action_table.(i) -. !best) > epsilon then

            (

               best := action_table.(i);

               winners := [i]

            )

            else

            (

               if abs_float (action_table.(i) -. !best) <= epsilon then

               (

                  winners := i :: !winners

               )

               else

               ()

            )

         done;

         let num_winners = List.length !winners in

         let index = Random.int num_winners in

         List.nth !winners index

      

      method get_action () =

         let chosen =

            let rand = Random.float 1.0 in

            if rand > beta then

               self#get_best_action ()

            else

               Random.int (Array.length actions)

         in

         last_action <- chosen;

         actions.(chosen)

      

      method percieve_result percept reward =

         let action_table = Mlp.evaluate q_table percept in

         let max = Array.fold_right max action_table  action_table.(0) in

         let new_q = reward +. delta *. max in

         let last_action_table = Mlp.evaluate q_table current_state  in

         last_action_table.(last_action) <- new_q;

         Mlp.train_in_place q_table (current_state,last_action_table) alpha;

         current_state <- Array.copy percept

         

   end

   ;;

*)

(* 

 * $Log$

 * Revision 1.6  2008/01/11 14:25:50  till_crueger

 * - Changed teacher Interface to be able to combine different teachers

 *

 * Revision 1.5  2007/12/19 01:17:56  till_crueger

 *

 * - Changed Interface for MLPs

 * - Fixed some small bugs for MLPs as approximators

 * - Added q-Learner that uses an MLP as State-Value-Function

 *

 * Revision 1.4  2007/12/15 18:52:58  till_crueger

 *

 *

 * - Updated documentation

 * - Moved Log-Tags to a better position in the sources

 *

 * Revision 1.3  2006/02/20 20:21:46  till

 * Added LGPL to all files

 * Added LGPL to package

 *

 * Revision 1.2  2006/02/12 13:26:00  till

 * commented out debugging statements in reinforcement.ml

 *

 * started work on decission trees

 *

 * Revision 1.1  2006/02/11 13:24:54  till

 * Began work on a reinforcment learner.

 *

 * Q-learner started

 *

 *)