// steplrn_alg.h : // generic algorithm definition for algoritms which estimates // value function (per step) experimental evaluation. template class StepLrn_Alg : public Learning_Alg { protected: typedef multimap > Actions_MM; typedef Actions_MM::const_iterator Actions_I; State CurrState; int RunCount; // number of runs performd so far int StepCount; StepStats_M StepsData; // statistics accumulated so far double Lambda; Actions_MM AllActions; // finds "best" action according to function based on accumulated // so far information Action find_best (State currState) { Action choise; double maxRew = 0; bool is_first = true; const Actions_MM& actForState = AllActions; pair steps = actForState.equal_range(currState); //cout << (*steps.first).first << " " << (*steps.first).second << "\n"; for (Actions_I it = steps.first; it != steps.second; ++it) { if (is_first || StepsData[Step(currState,(*it).second)] > maxRew) { is_first = false; choise = (*it).second; //cout << choise << "\n"; maxRew = StepsData[Step(currState,(*it).second)]; } } //cout << "CHOISE: " << choise << "\n"; return choise; } // Return number of legal action for specified int get_count (State currState) { int count = 0; const Actions_MM& actForState = AllActions; pair steps = actForState.equal_range(currState); for (Actions_I it = steps.first; it != steps.second; ++it) { count++; } return count; } // Returns action # in list of legal actions for Action act_by_count (State currState,int needed) { int count = 0; const Actions_MM& actForState = AllActions; pair steps = actForState.equal_range(currState); for (Actions_I it = steps.first; it != steps.second; ++it) { if (count == needed) { return (*it).second; } count++; } } public: // sets all legal action in "table" virtual void set_in_table (State stepState, Action stepAction) { const StepStats_M& valuesData = StepsData; const pair theStep(stepState,stepAction); Step newStep (stepState, stepAction); StepStats_I nextStepValI = valuesData.find (newStep); if (nextStepValI == valuesData.end()) { StepsData[newStep] = 0; AllActions.insert(theStep); } } // initialize common algorithm parameters StepLrn_Alg(double lambdaVal = 1.0) { RunCount = 0; StepCount = 0; Lambda = lambdaVal; } // new run is started virtual void start_run (State startState) { CurrState = startState; RunCount++; } // prints all accumulated statistics (per pair) virtual void print_stats () { const StepStats_M& valuesData = StepsData; cout << RunCount << " runs\n"; for (StepStats_I allVals = valuesData.begin(); allVals != valuesData.end(); ++allVals) cout << (*allVals).first << " : " << (*allVals).second << "\n"; cout << "\n"; } };