Reinforcement.q_learner.percieve

method percieve_result percept reward =

         let action_table = 

            try

               List.assoc percept q_table

            with Not_found ->

               num_states <- num_states +1;

               let arry = Array.make (num_actions') 0.0 in

               q_table <- (percept,Array.make (num_actions') 0.0) :: q_table;

               arry

         in

         let max = Array.fold_right max action_table  action_table.(0) in

         let new_q = reward +. delta *. max in

         let action_table = List.assoc current_state q_table in

         action_table.(last_action) <-  

               alpha *. action_table.(last_action) +. (1.0-.alpha) *. new_q;

         current_state <- percept