<efrbr:recordSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:efrbr="http://vfrbr.info/efrbr/1.1" xmlns:efrbr-work="http://vfrbr.info/efrbr/1.1/work" xmlns:efrbr-expression="http://vfrbr.info/efrbr/1.1/expression" xmlns:efrbr-manifestation="http://vfrbr.info/efrbr/1.1/manifestation" xmlns:efrbr-person="http://vfrbr.info/efrbr/1.1/person" xmlns:efrbr-corporateBody="http://vfrbr.info/efrbr/1.1/corporateBody" xmlns:efrbr-concept="http://vfrbr.info/efrbr/1.1/concept" xmlns:efrbr-structure="http://vfrbr.info/efrbr/1.1/structure" xmlns:efrbr-responsible="http://vfrbr.info/efrbr/1.1/responsible" xmlns:efrbr-subject="http://vfrbr.info/efrbr/1.1/subject" xmlns:efrbr-other="http://vfrbr.info/efrbr/1.1/other" xsi:schemaLocation="http://vfrbr.info/efrbr/1.1 http://vfrbr.info/schemas/1.1/efrbr.xsd"><efrbr:entities><efrbr-work:work identifier="http://purl.tuc.gr/dl/dias/E0292307-A486-42F6-A1D4-8BF6498753E2"><efrbr-work:titleOfTheWork>On the locality of action domination in sequential decision making</efrbr-work:titleOfTheWork></efrbr-work:work><efrbr-expression:expression identifier="http://purl.tuc.gr/dl/dias/E0292307-A486-42F6-A1D4-8BF6498753E2"><efrbr-expression:titleOfTheExpression>On the locality of action domination in sequential decision making</efrbr-expression:titleOfTheExpression><efrbr-expression:formOfExpression vocabulary="DIAS:TYPES">
            Πλήρης Δημοσίευση σε Συνέδριο
            Conference Full Paper
         </efrbr-expression:formOfExpression><efrbr-expression:dateOfExpression type="issued">2015-11-13</efrbr-expression:dateOfExpression><efrbr-expression:dateOfExpression type="published">2010</efrbr-expression:dateOfExpression><efrbr-expression:languageOfExpression vocabulary="iso639-1">en</efrbr-expression:languageOfExpression><efrbr-expression:summarizationOfContent>In the field of sequential decision making and reinforcement
learning, it has been observed that good policies for most
problems exhibit a significant amount of structure. In practice,
this implies that when a learning agent discovers an action
is better than any other in a given state, this action actually
happens to also dominate in a certain neighbourhood
around that state. This paper presents new results proving
that this notion of locality in action domination can be linked
to the smoothness of the environment’s underlying stochastic
model. Namely, we link the Lipschitz continuity of a Markov
Decision Process to the Lispchitz continuity of its policies’
value functions and introduce the key concept of influence radius
to describe the neighbourhood of states where the dominating
action is guaranteed to be constant. These ideas are
directly exploited into the proposed Localized Policy Iteration
(LPI) algorithm, which is an active learning version of
Rollout-based Policy Iteration. Preliminary results on the Inverted
Pendulum domain demonstrate the viability and the
potential of the proposed approach.</efrbr-expression:summarizationOfContent><efrbr-expression:useRestrictionsOnTheExpression type="creative-commons">http://creativecommons.org/licenses/by/4.0/</efrbr-expression:useRestrictionsOnTheExpression><efrbr-expression:note type="conference name">11th International Symposium on Artificial Intelligence and Mathematics</efrbr-expression:note><efrbr-expression:note type="proceedings title">Proceedings of the 11th International Symposium on Artificial Intelligence and Mathematics (ISAIM), Ft. Lauderdale, FL, USA, January 2010</efrbr-expression:note></efrbr-expression:expression><efrbr-person:person identifier="http://viaf.org/viaf/313550800"><efrbr-person:nameOfPerson vocabulary="VIAF">
            Rachelson, Emmanuel
         </efrbr-person:nameOfPerson></efrbr-person:person><efrbr-person:person identifier="http://users.isc.tuc.gr/~lagoudakis"><efrbr-person:nameOfPerson vocabulary="TUC:LDAP">
            Lagoudakis Michael
            Λαγουδακης Μιχαηλ
         </efrbr-person:nameOfPerson></efrbr-person:person><efrbr-concept:concept identifier="30C811A4-0B45-403B-8394-14261A2BF6EF"><efrbr-concept:termForTheConcept>
            Artificial Intelligence
         </efrbr-concept:termForTheConcept></efrbr-concept:concept></efrbr:entities><efrbr:relationships><efrbr-structure:structureRelations><efrbr-structure:realizedThrough sourceEntity="work" targetEntity="expression" sourceURI="http://purl.tuc.gr/dl/dias/E0292307-A486-42F6-A1D4-8BF6498753E2" targetURI="http://purl.tuc.gr/dl/dias/E0292307-A486-42F6-A1D4-8BF6498753E2"/></efrbr-structure:structureRelations><efrbr-responsible:responsibleRelations><efrbr-responsible:createdBy sourceEntity="work" targetEntity="person" sourceURI="http://purl.tuc.gr/dl/dias/E0292307-A486-42F6-A1D4-8BF6498753E2" targetURI="http://viaf.org/viaf/313550800"/><efrbr-responsible:realizedBy sourceEntity="expression" role="author" targetEntity="person" sourceURI="http://purl.tuc.gr/dl/dias/E0292307-A486-42F6-A1D4-8BF6498753E2" targetURI="http://viaf.org/viaf/313550800"/><efrbr-responsible:realizedBy sourceEntity="expression" role="author" targetEntity="person" sourceURI="http://purl.tuc.gr/dl/dias/E0292307-A486-42F6-A1D4-8BF6498753E2" targetURI="http://users.isc.tuc.gr/~lagoudakis"/></efrbr-responsible:responsibleRelations><efrbr-subject:subjectRelations><efrbr-subject:hasSubject sourceEntity="work" targetEntity="concept" sourceURI="http://purl.tuc.gr/dl/dias/E0292307-A486-42F6-A1D4-8BF6498753E2" targetURI="30C811A4-0B45-403B-8394-14261A2BF6EF"/></efrbr-subject:subjectRelations><efrbr-other:otherRelations/></efrbr:relationships></efrbr:recordSet>