<efrbr:recordSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:efrbr="http://vfrbr.info/efrbr/1.1" xmlns:efrbr-work="http://vfrbr.info/efrbr/1.1/work" xmlns:efrbr-expression="http://vfrbr.info/efrbr/1.1/expression" xmlns:efrbr-manifestation="http://vfrbr.info/efrbr/1.1/manifestation" xmlns:efrbr-person="http://vfrbr.info/efrbr/1.1/person" xmlns:efrbr-corporateBody="http://vfrbr.info/efrbr/1.1/corporateBody" xmlns:efrbr-concept="http://vfrbr.info/efrbr/1.1/concept" xmlns:efrbr-structure="http://vfrbr.info/efrbr/1.1/structure" xmlns:efrbr-responsible="http://vfrbr.info/efrbr/1.1/responsible" xmlns:efrbr-subject="http://vfrbr.info/efrbr/1.1/subject" xmlns:efrbr-other="http://vfrbr.info/efrbr/1.1/other" xsi:schemaLocation="http://vfrbr.info/efrbr/1.1 http://vfrbr.info/schemas/1.1/efrbr.xsd"><efrbr:entities><efrbr-work:work identifier="http://purl.tuc.gr/dl/dias/15CFBFB5-CCCD-4BC0-ABAF-DAAE65C69CBD"><efrbr-work:titleOfTheWork>Coordinated reinforcement learning</efrbr-work:titleOfTheWork></efrbr-work:work><efrbr-expression:expression identifier="http://purl.tuc.gr/dl/dias/15CFBFB5-CCCD-4BC0-ABAF-DAAE65C69CBD"><efrbr-expression:titleOfTheExpression>Coordinated reinforcement learning</efrbr-expression:titleOfTheExpression><efrbr-expression:formOfExpression vocabulary="DIAS:TYPES">
            Πλήρης Δημοσίευση σε Συνέδριο
            Conference Full Paper
         </efrbr-expression:formOfExpression><efrbr-expression:dateOfExpression type="issued">2015-11-13</efrbr-expression:dateOfExpression><efrbr-expression:dateOfExpression type="published">2002</efrbr-expression:dateOfExpression><efrbr-expression:languageOfExpression vocabulary="iso639-1">en</efrbr-expression:languageOfExpression><efrbr-expression:summarizationOfContent>We present several new algorithms for multiagent
reinforcement learning. A common feature of these
algorithms is a parameterized, structured representation
of a policy or value function. This structure
is leveraged in an approach we call coordinated reinforcement
learning, by which agents coordinate
both their action selection activities and their parameter
updates. Within the limits of our parametric
representations, the agents will determine
a jointly optimal action without explicitly considering
every possible action in their exponentially
large joint action space. Our methods differ from
many previous reinforcement learning approaches
to multiagent coordination in that structured communication
and coordination between agents appears
at the core of both the learning algorithm and
the execution architecture. Our experimental results,
comparing our approach to other RL methods,
illustrate both the quality of the policies obtained
and the additional benefits of coordination.
</efrbr-expression:summarizationOfContent><efrbr-expression:useRestrictionsOnTheExpression type="creative-commons">http://creativecommons.org/licenses/by/4.0/</efrbr-expression:useRestrictionsOnTheExpression><efrbr-expression:note type="page range">227–234</efrbr-expression:note><efrbr-expression:note type="conference name">19th International Conference on Machine Learning</efrbr-expression:note><efrbr-expression:note type="proceedings title">Proceedings of the 19th International Conference on Machine Learning (ICML), Sydney, Australia, July 2002</efrbr-expression:note></efrbr-expression:expression><efrbr-person:person identifier="http://users.isc.tuc.gr/~lagoudakis"><efrbr-person:nameOfPerson vocabulary="TUC:LDAP">
            Lagoudakis Michael
            Λαγουδακης Μιχαηλ
         </efrbr-person:nameOfPerson></efrbr-person:person><efrbr-person:person identifier="3E44A29F-157D-4BD0-B360-E84FEBA3C46B"><efrbr-person:nameOfPerson vocabulary="">
            Guestrin, C.
         </efrbr-person:nameOfPerson></efrbr-person:person><efrbr-person:person identifier="A3A2355F-4301-4EC2-A303-BBA400CDAB54"><efrbr-person:nameOfPerson vocabulary="">
            Parr, R.
         </efrbr-person:nameOfPerson></efrbr-person:person><efrbr-concept:concept identifier="71C60324-B692-4E77-9647-25A878F2EF0D"><efrbr-concept:termForTheConcept>
            Reinforcement Learning
         </efrbr-concept:termForTheConcept></efrbr-concept:concept></efrbr:entities><efrbr:relationships><efrbr-structure:structureRelations><efrbr-structure:realizedThrough sourceEntity="work" targetEntity="expression" sourceURI="http://purl.tuc.gr/dl/dias/15CFBFB5-CCCD-4BC0-ABAF-DAAE65C69CBD" targetURI="http://purl.tuc.gr/dl/dias/15CFBFB5-CCCD-4BC0-ABAF-DAAE65C69CBD"/></efrbr-structure:structureRelations><efrbr-responsible:responsibleRelations><efrbr-responsible:createdBy sourceEntity="work" targetEntity="person" sourceURI="http://purl.tuc.gr/dl/dias/15CFBFB5-CCCD-4BC0-ABAF-DAAE65C69CBD" targetURI="http://users.isc.tuc.gr/~lagoudakis"/><efrbr-responsible:realizedBy sourceEntity="expression" role="author" targetEntity="person" sourceURI="http://purl.tuc.gr/dl/dias/15CFBFB5-CCCD-4BC0-ABAF-DAAE65C69CBD" targetURI="http://users.isc.tuc.gr/~lagoudakis"/><efrbr-responsible:realizedBy sourceEntity="expression" role="author" targetEntity="person" sourceURI="http://purl.tuc.gr/dl/dias/15CFBFB5-CCCD-4BC0-ABAF-DAAE65C69CBD" targetURI="3E44A29F-157D-4BD0-B360-E84FEBA3C46B"/><efrbr-responsible:realizedBy sourceEntity="expression" role="author" targetEntity="person" sourceURI="http://purl.tuc.gr/dl/dias/15CFBFB5-CCCD-4BC0-ABAF-DAAE65C69CBD" targetURI="A3A2355F-4301-4EC2-A303-BBA400CDAB54"/></efrbr-responsible:responsibleRelations><efrbr-subject:subjectRelations><efrbr-subject:hasSubject sourceEntity="work" targetEntity="concept" sourceURI="http://purl.tuc.gr/dl/dias/15CFBFB5-CCCD-4BC0-ABAF-DAAE65C69CBD" targetURI="71C60324-B692-4E77-9647-25A878F2EF0D"/></efrbr-subject:subjectRelations><efrbr-other:otherRelations/></efrbr:relationships></efrbr:recordSet>