method percieve_result percept reward =
let action_table =
try
List.assoc percept q_table
with Not_found ->
num_states <- num_states +1;
let arry = Array.make (num_actions') 0.0 in
q_table <- (percept,Array.make (num_actions') 0.0) :: q_table;
arry
in
let max = Array.fold_right max action_table action_table.(0) in
let new_q = reward +. delta *. max in
let action_table = List.assoc current_state q_table in
action_table.(last_action) <-
alpha *. action_table.(last_action) +. (1.0-.alpha) *. new_q;
current_state <- percept