// lrn_alg.h : // containts most generic definition of learning algorithm. // it's step type definition where step is pair of system state // and action which can be performed in this state. template struct Step { State TheState; Action TheAction; Step() { //completely undefined - is it ok?? } Step (State st, Action act) { TheState = st; TheAction = act; } }; template bool operator< (Step st1, Step st2) { if (st1.TheState == st2.TheState) return st1.TheAction < st2.TheAction; return st1.TheState < st2.TheState; } // should provide output operator overload for generic results output template ostream& operator<< (ostream& out, Step st) { out << st.TheState << "," << st.TheAction; } // Here is pure abstract class defining learning algoritms // most generic behavior template class Learning_Alg{ public: // several types definition // Used for statistics per state storage typedef map > StateStats_M; typedef StateStats_M::const_iterator StateStats_I; // Used for statistics per step storage typedef map , double,less > > StepStats_M; typedef StepStats_M::const_iterator StepStats_I; // Add action for state - gets notification that can be // performed in virtual void set_in_table (State stepState, Action stepAction) = 0; // start new experiment (in ) virtual void start_run (State startState) = 0; // gets information about performed step (action performed, // state reached, reward obtained) and updates obtained by // now statistics using new information. virtual void get_step (Action stepAction, State nextState, double stepReward) = 0; // finish experiment - needed for off-line algorithms which makes updates // only when the experiment is finished. virtual void finish_run () = 0; // output results of the algorithm runs virtual void print_stats () = 0; };