(*
* lablai - An ML Artificial Inteligence library
* Copyright (C) 2006 Till Crueger
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
(* File $RCSfile$ *)
(* last edited by $Author: till_crueger $ *)
(* $Date: 2008-01-11 15:25:50 +0100 (Fr, 11 Jan 2008) $, $Revision: 35 $ *)
let epsilon = 0.01;;
class ['a,'b] q_learner actions' (start_state : 'b )=
let num_actions' = List.length actions' in
let _ = Random.self_init () in
object (self)
val mutable states = [(start_state,1)]
val mutable num_states = 1
val actions : 'a array = Array.of_list actions'
val num_actions = num_actions'
val mutable q_table= [(start_state,Array.make (num_actions') 0.0)]
val mutable current_state = start_state
val mutable last_action = 0
val mutable delta = 0.5
val mutable alpha = 0.5
val mutable beta = 0.3
method private get_best_action () =
let action_table = List.assoc current_state q_table in
let best = ref action_table.(0) and
winners = ref [0] in
for i = 1 to num_actions-1 do
if (action_table.(i) -. !best) > epsilon then
(
best := action_table.(i);
winners := [i]
)
else
(
if abs_float (action_table.(i) -. !best) <= epsilon then
(
winners := i :: !winners
)
else
()
)
done;
let num_winners = List.length !winners in
let index = Random.int num_winners in
List.nth !winners index
method get_action () =
let chosen =
let rand = Random.float 1.0 in
if rand > beta then
self#get_best_action ()
else
Random.int (Array.length actions)
in
last_action <- chosen;
actions.(chosen)
method percieve_result percept reward =
let action_table =
try
List.assoc percept q_table
with Not_found ->
num_states <- num_states +1;
let arry = Array.make (num_actions') 0.0 in
q_table <- (percept,Array.make (num_actions') 0.0) :: q_table;
arry
in
let max = Array.fold_right max action_table action_table.(0) in
let new_q = reward +. delta *. max in
let action_table = List.assoc current_state q_table in
action_table.(last_action) <-
alpha *. action_table.(last_action) +. (1.0-.alpha) *. new_q;
current_state <- percept
end
;;
(*
class ['a] neuro_q_learner actions' (start_state : float array) layout =
let sensors = Array.length start_state in
let num_actions' = List.length actions' in
let _ = Random.self_init () in
object (self)
val actions : 'a array = Array.of_list actions'
val num_actions = num_actions'
val mutable q_table= Mlp.make_approximator sensors num_actions' layout
val mutable current_state = Array.copy start_state
val mutable last_action = 0
val mutable delta = 0.5
val mutable alpha = 0.1
val mutable beta = 0.3
method private get_best_action () =
let action_table = Mlp.evaluate q_table current_state in
let best = ref action_table.(0) and
winners = ref [0] in
for i = 1 to num_actions-1 do
if (action_table.(i) -. !best) > epsilon then
(
best := action_table.(i);
winners := [i]
)
else
(
if abs_float (action_table.(i) -. !best) <= epsilon then
(
winners := i :: !winners
)
else
()
)
done;
let num_winners = List.length !winners in
let index = Random.int num_winners in
List.nth !winners index
method get_action () =
let chosen =
let rand = Random.float 1.0 in
if rand > beta then
self#get_best_action ()
else
Random.int (Array.length actions)
in
last_action <- chosen;
actions.(chosen)
method percieve_result percept reward =
let action_table = Mlp.evaluate q_table percept in
let max = Array.fold_right max action_table action_table.(0) in
let new_q = reward +. delta *. max in
let last_action_table = Mlp.evaluate q_table current_state in
last_action_table.(last_action) <- new_q;
Mlp.train_in_place q_table (current_state,last_action_table) alpha;
current_state <- Array.copy percept
end
;;
*)
(*
* $Log$
* Revision 1.6 2008/01/11 14:25:50 till_crueger
* - Changed teacher Interface to be able to combine different teachers
*
* Revision 1.5 2007/12/19 01:17:56 till_crueger
*
* - Changed Interface for MLPs
* - Fixed some small bugs for MLPs as approximators
* - Added q-Learner that uses an MLP as State-Value-Function
*
* Revision 1.4 2007/12/15 18:52:58 till_crueger
*
*
* - Updated documentation
* - Moved Log-Tags to a better position in the sources
*
* Revision 1.3 2006/02/20 20:21:46 till
* Added LGPL to all files
* Added LGPL to package
*
* Revision 1.2 2006/02/12 13:26:00 till
* commented out debugging statements in reinforcement.ml
*
* started work on decission trees
*
* Revision 1.1 2006/02/11 13:24:54 till
* Began work on a reinforcment learner.
*
* Q-learner started
*
*)