\documentstyle[12pt]{book}
%\setlength{\oddsidemargin}{-.1 in}
%\setlength{\evensidemargin}{-.3 in}
\setlength{\evensidemargin}{.0 in}
\addtolength{\topmargin}{-1in}
\setlength{\textwidth}{6.5in}
\setlength{\textheight}{8.5in}
%
% The following macro is used to generate the header.
%
%
\newcommand{\handout}[5]{
\pagestyle{headings}
\thispagestyle{plain}
\newpage
% \setcounter{chapter}{#1}
\setcounter{page}{#2}
% \set\thechapter{#3}
\noindent
\begin{center}
\framebox{
\vbox{
\hbox to 6.28in { {\bf Reinforcement Learning
\hfill Fall Semester, 1999/2000} }
\vspace{4mm}
\hbox to 6.28in { {\Large \hfill #1: #3 \hfill} }
\vspace{2mm}
\hbox to 6.28in { {\it Lecturer: #4 \hfill} }
}
}
\end{center}
\markboth{Handout #1: #3}{Handout #1: #3}
\vspace*{4mm}
}
%
% Use these macros for organizing sections of your notes.
% Each command takes two arguments: (1) the title of the section and and
% (2) a keyword for that section to appear in the index. (See examples.)
% Please don't use \section, \subsection, and \subsubsection directly!
%
\newcommand{\topic}[2]{\section{#1} \index{#2} \markright{#1}}
\newcommand{\subtopic}[2]{\subsection{#1} \index{#2}}
\newcommand{\subsubtopic}[2]{\subsubsection{#1} \index{#2}}
%
% Convention for citations is first author's last name followed by other
% authors' last initials, followed by the year. For example, to cite the
% seventh entry in the course bibliography, you would type: \cite{BurnsL80}
% (To avoid bibliography problems, for now we redefine the \cite command.)
%
\renewcommand{\cite}[1]{[#1]}
%
% These are just to make things a little easier:
%
\newcommand{\bi}{\begin{itemize}}
\newcommand{\ei}{\end{itemize}}
\newcommand{\be}{\begin{enumerate}}
\newcommand{\ee}{\end{enumerate}}
\newcommand{\blank}{\vspace{1ex}} % generates a blank line in the output
%
% Use these for theorems, lemmas, proofs, etc.
%
\newtheorem{theorem}{Theorem}[chapter]
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{corollary}[theorem]{Corollary}
\newenvironment{proof}{{\em Proof:}}{\hfill\rule{2mm}{2mm}}
%
% Use the following for definitions.
% \bigdef is for definitions to be set off by themselves; \smalldef is for
% definitions given in the middle of a paragraph.
%
\newenvironment{dfn}{{\vspace*{1ex} \noindent \bf Definition }}{\vspace*{1ex}}
\newcommand{\bigdef}[2]{\index{#1}\begin{dfn} {\rm #2} \end{dfn}}
\newcommand{\smalldef}[1]{\index{#1} {\em #1}}
\begin{document}
%\handout{**HANDOUT-NUMBER**}{**1ST-PAGE**}{**DATE**}{**LECTURER**}
\handout{Homework 1}{1}{Nov. 11}{Yishay Mansour}
\section*{Homework number 1.}
\noindent{\bf Question 1}
Consider a multiplicative model, where the return function
of the reward sequence $r_1, \ldots , r_N$ is $\prod_{i=1}^N r_i$.
Develop an algorithm that evaluates a given deterministic Markovian
policy for such a return function.\\
\noindent{\bf Question 2}
Suppose we have two actions. Action {\bf D} has always reward $0$,
while action {\bf R} has reward $+1$ with probability $1/2$ and $-1$ with
probability $1/2$. Our aim is to maximize the probability that the sum of the
rewards is at least $1$. What is the optimal policy.
(Hint: Show that there exists an optimal policy that first performs
a sequence of $R$
operations and only latter performs a sequence (possibly empty)
of $D$ operations.)\\
\noindent{\bf Question 3}
Consider the following digit game.
We have $N+1$ slots, marked by $0$ to $N$.
At each round (of $N+1$ rounds) a random digit is chosen.
(I.e. we chose a random number from $\{0, 1, \ldots , 9\}$.)
We can put the digit in one of the unoccupied slots, and that slot becomes
occupied.
Our aim is to maximize the expected value of the sequence of digits, when
viewed as a number.
(I.e. if we have $d_i$ in slot $i$ then the final value is $\sum_{i=0}^N d_i
10^i$.)
Example: Suppose $N=2$. At the beginning we have three empty slots.
Lets denote an empty slot by $\times$, then we have $\times\times\times$.
Suppose the first digit is $5$, and we decide to put it in the second slot,
then we have $\times 5 \times$. Suppose the second digit is $4$
and we decide to put it in the first slot, we have $\times 5 4$.
Finally, the last digit is $4$ again, and our final number is $454$.
\begin{enumerate}
\item
Build an MDP for this problem.
(Hint: you may incorporate the random digit in the state.)
\item
Show that the optimal policy depends only on the number of
empty slots.
\item
Compute the optimal policy for $N=2$.
(I.e the number has three digits.)
\end{enumerate}
\noindent{\bf The homework is due in two weeks}
\end{document}