<efrbr:recordSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:efrbr="http://vfrbr.info/efrbr/1.1" xmlns:efrbr-work="http://vfrbr.info/efrbr/1.1/work" xmlns:efrbr-expression="http://vfrbr.info/efrbr/1.1/expression" xmlns:efrbr-manifestation="http://vfrbr.info/efrbr/1.1/manifestation" xmlns:efrbr-person="http://vfrbr.info/efrbr/1.1/person" xmlns:efrbr-corporateBody="http://vfrbr.info/efrbr/1.1/corporateBody" xmlns:efrbr-concept="http://vfrbr.info/efrbr/1.1/concept" xmlns:efrbr-structure="http://vfrbr.info/efrbr/1.1/structure" xmlns:efrbr-responsible="http://vfrbr.info/efrbr/1.1/responsible" xmlns:efrbr-subject="http://vfrbr.info/efrbr/1.1/subject" xmlns:efrbr-other="http://vfrbr.info/efrbr/1.1/other" xsi:schemaLocation="http://vfrbr.info/efrbr/1.1 http://vfrbr.info/schemas/1.1/efrbr.xsd"><efrbr:entities><efrbr-work:work identifier="http://purl.tuc.gr/dl/dias/75E77769-957E-4070-8DCA-33D273034342"><efrbr-work:titleOfTheWork>Algorithm selection using reinforcement learning</efrbr-work:titleOfTheWork></efrbr-work:work><efrbr-expression:expression identifier="http://purl.tuc.gr/dl/dias/75E77769-957E-4070-8DCA-33D273034342"><efrbr-expression:titleOfTheExpression>Algorithm selection using reinforcement learning</efrbr-expression:titleOfTheExpression><efrbr-expression:formOfExpression vocabulary="DIAS:TYPES">
            Πλήρης Δημοσίευση σε Συνέδριο
            Conference Full Paper
         </efrbr-expression:formOfExpression><efrbr-expression:dateOfExpression type="issued">2015-11-14</efrbr-expression:dateOfExpression><efrbr-expression:dateOfExpression type="published">2000</efrbr-expression:dateOfExpression><efrbr-expression:languageOfExpression vocabulary="iso639-1">en</efrbr-expression:languageOfExpression><efrbr-expression:summarizationOfContent>Many computational problems can be solved by
multiple algorithms, with different algorithms
fastest for different problem sizes, input distributions,
and hardware characteristics. We consider
the problem of algorithm selection: dynamically
choose an algorithm to attack an instance
of a problem with the goal of minimizing
the overall execution time. We formulate the
problem as a kind of Markov decision process
(MDP), and use ideas from reinforcement learning
to solve it. This paper introduces a kind of
MDP that models the algorithm selection problem
by allowing multiple state transitions. The well
known Q-learning algorithm is adapted for this
case in a way that combines both Monte-Carlo
and Temporal Difference methods. Also, this
work uses, and extends in a way to control problems,
the Least-Squares Temporal Difference algorithm
(LSTD(0)) of Boyan. The experimental
study focuses on the classic problems of order
statistic selection and sorting. The encouraging
results reveal the potential of applying learning
methods to traditional computational problems.</efrbr-expression:summarizationOfContent><efrbr-expression:useRestrictionsOnTheExpression type="creative-commons">http://creativecommons.org/licenses/by/4.0/</efrbr-expression:useRestrictionsOnTheExpression><efrbr-expression:note type="page range"> 511–518</efrbr-expression:note><efrbr-expression:note type="conference name">17th International Conference on Machine Learning</efrbr-expression:note><efrbr-expression:note type="proceedings title">Proceedings of the 17th International Conference on Machine Learning (ICML), Stanford, CA, USA, June 2000</efrbr-expression:note></efrbr-expression:expression><efrbr-person:person identifier="http://users.isc.tuc.gr/~lagoudakis"><efrbr-person:nameOfPerson vocabulary="TUC:LDAP">
            Lagoudakis Michael
            Λαγουδακης Μιχαηλ
         </efrbr-person:nameOfPerson></efrbr-person:person><efrbr-person:person identifier="83E7BF1A-A321-4F06-8865-F0E85F5486D5"><efrbr-person:nameOfPerson vocabulary="">
            Littman, M.
         </efrbr-person:nameOfPerson></efrbr-person:person><efrbr-concept:concept identifier="http://id.loc.gov/authorities/subjects/sh94004659"><efrbr-concept:termForTheConcept>
            Intelligence, Computational
            computational intelligence
            intelligence computational
         </efrbr-concept:termForTheConcept></efrbr-concept:concept></efrbr:entities><efrbr:relationships><efrbr-structure:structureRelations><efrbr-structure:realizedThrough sourceEntity="work" targetEntity="expression" sourceURI="http://purl.tuc.gr/dl/dias/75E77769-957E-4070-8DCA-33D273034342" targetURI="http://purl.tuc.gr/dl/dias/75E77769-957E-4070-8DCA-33D273034342"/></efrbr-structure:structureRelations><efrbr-responsible:responsibleRelations><efrbr-responsible:createdBy sourceEntity="work" targetEntity="person" sourceURI="http://purl.tuc.gr/dl/dias/75E77769-957E-4070-8DCA-33D273034342" targetURI="http://users.isc.tuc.gr/~lagoudakis"/><efrbr-responsible:realizedBy sourceEntity="expression" role="author" targetEntity="person" sourceURI="http://purl.tuc.gr/dl/dias/75E77769-957E-4070-8DCA-33D273034342" targetURI="http://users.isc.tuc.gr/~lagoudakis"/><efrbr-responsible:realizedBy sourceEntity="expression" role="author" targetEntity="person" sourceURI="http://purl.tuc.gr/dl/dias/75E77769-957E-4070-8DCA-33D273034342" targetURI="83E7BF1A-A321-4F06-8865-F0E85F5486D5"/></efrbr-responsible:responsibleRelations><efrbr-subject:subjectRelations><efrbr-subject:hasSubject sourceEntity="work" targetEntity="concept" sourceURI="http://purl.tuc.gr/dl/dias/75E77769-957E-4070-8DCA-33D273034342" targetURI="http://id.loc.gov/authorities/subjects/sh94004659"/></efrbr-subject:subjectRelations><efrbr-other:otherRelations/></efrbr:relationships></efrbr:recordSet>