<efrbr:recordSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:efrbr="http://vfrbr.info/efrbr/1.1" xmlns:efrbr-work="http://vfrbr.info/efrbr/1.1/work" xmlns:efrbr-expression="http://vfrbr.info/efrbr/1.1/expression" xmlns:efrbr-manifestation="http://vfrbr.info/efrbr/1.1/manifestation" xmlns:efrbr-person="http://vfrbr.info/efrbr/1.1/person" xmlns:efrbr-corporateBody="http://vfrbr.info/efrbr/1.1/corporateBody" xmlns:efrbr-concept="http://vfrbr.info/efrbr/1.1/concept" xmlns:efrbr-structure="http://vfrbr.info/efrbr/1.1/structure" xmlns:efrbr-responsible="http://vfrbr.info/efrbr/1.1/responsible" xmlns:efrbr-subject="http://vfrbr.info/efrbr/1.1/subject" xmlns:efrbr-other="http://vfrbr.info/efrbr/1.1/other" xsi:schemaLocation="http://vfrbr.info/efrbr/1.1 http://vfrbr.info/schemas/1.1/efrbr.xsd"><efrbr:entities><efrbr-work:work identifier="http://purl.tuc.gr/dl/dias/78C8B833-D841-436A-82B4-676C1B860269"><efrbr-work:titleOfTheWork>Reinforcement learning as classification: leveraging modern classifiers</efrbr-work:titleOfTheWork></efrbr-work:work><efrbr-expression:expression identifier="http://purl.tuc.gr/dl/dias/78C8B833-D841-436A-82B4-676C1B860269"><efrbr-expression:titleOfTheExpression>Reinforcement learning as classification: leveraging modern classifiers</efrbr-expression:titleOfTheExpression><efrbr-expression:formOfExpression vocabulary="DIAS:TYPES">
            Πλήρης Δημοσίευση σε Συνέδριο
            Conference Full Paper
         </efrbr-expression:formOfExpression><efrbr-expression:dateOfExpression type="issued">2015-11-13</efrbr-expression:dateOfExpression><efrbr-expression:dateOfExpression type="published">2003</efrbr-expression:dateOfExpression><efrbr-expression:languageOfExpression vocabulary="iso639-1">en</efrbr-expression:languageOfExpression><efrbr-expression:summarizationOfContent>The basic tools of machine learning appear in
the inner loop of most reinforcement learning algorithms,
typically in the form of Monte Carlo
methods or function approximation techniques.
To a large extent, however, current reinforcement
learning algorithms draw upon machine learning
techniques that are at least ten years old and,
with a few exceptions, very little has been done
to exploit recent advances in classification learning
for the purposes of reinforcement learning.
We use a variant of approximate policy iteration
based on rollouts that allows us to use a pure classification
learner, such as a support vector machine
(SVM), in the inner loop of the algorithm.
We argue that the use of SVMs, particularly in
combination with the kernel trick, can make it
easier to apply reinforcement learning as an “outof-the-box”
technique, without extensive feature
engineering. Our approach opens the door to
modern classification methods, but does not preclude
the use of classical methods. We present
experimental results in the pendulum balancing
and bicycle riding domains using both SVMs and
neural networks for classifiers</efrbr-expression:summarizationOfContent><efrbr-expression:useRestrictionsOnTheExpression type="creative-commons">http://creativecommons.org/licenses/by/4.0/</efrbr-expression:useRestrictionsOnTheExpression><efrbr-expression:note type="page range">424–431</efrbr-expression:note><efrbr-expression:note type="conference name">20th International Conference on Machine Learning</efrbr-expression:note><efrbr-expression:note type="proceedings title">Proceedings of the 20th International Conference on Machine Learning (ICML), Washington, DC, U.S.A., August 2003</efrbr-expression:note></efrbr-expression:expression><efrbr-person:person identifier="http://users.isc.tuc.gr/~lagoudakis"><efrbr-person:nameOfPerson vocabulary="TUC:LDAP">
            Lagoudakis Michael
            Λαγουδακης Μιχαηλ
         </efrbr-person:nameOfPerson></efrbr-person:person><efrbr-person:person identifier="3D8D7CD4-FC84-46F6-AD4A-385B706087CB"><efrbr-person:nameOfPerson vocabulary="">
            Parr, R.
         </efrbr-person:nameOfPerson></efrbr-person:person><efrbr-concept:concept identifier="4BE5C2F6-281E-4478-9500-F282AC0A73E1"><efrbr-concept:termForTheConcept>
            machine learning
         </efrbr-concept:termForTheConcept></efrbr-concept:concept></efrbr:entities><efrbr:relationships><efrbr-structure:structureRelations><efrbr-structure:realizedThrough sourceEntity="work" targetEntity="expression" sourceURI="http://purl.tuc.gr/dl/dias/78C8B833-D841-436A-82B4-676C1B860269" targetURI="http://purl.tuc.gr/dl/dias/78C8B833-D841-436A-82B4-676C1B860269"/></efrbr-structure:structureRelations><efrbr-responsible:responsibleRelations><efrbr-responsible:createdBy sourceEntity="work" targetEntity="person" sourceURI="http://purl.tuc.gr/dl/dias/78C8B833-D841-436A-82B4-676C1B860269" targetURI="http://users.isc.tuc.gr/~lagoudakis"/><efrbr-responsible:realizedBy sourceEntity="expression" role="author" targetEntity="person" sourceURI="http://purl.tuc.gr/dl/dias/78C8B833-D841-436A-82B4-676C1B860269" targetURI="http://users.isc.tuc.gr/~lagoudakis"/><efrbr-responsible:realizedBy sourceEntity="expression" role="author" targetEntity="person" sourceURI="http://purl.tuc.gr/dl/dias/78C8B833-D841-436A-82B4-676C1B860269" targetURI="3D8D7CD4-FC84-46F6-AD4A-385B706087CB"/></efrbr-responsible:responsibleRelations><efrbr-subject:subjectRelations><efrbr-subject:hasSubject sourceEntity="work" targetEntity="concept" sourceURI="http://purl.tuc.gr/dl/dias/78C8B833-D841-436A-82B4-676C1B860269" targetURI="4BE5C2F6-281E-4478-9500-F282AC0A73E1"/></efrbr-subject:subjectRelations><efrbr-other:otherRelations/></efrbr:relationships></efrbr:recordSet>