dc.creator | Hernández Salmerón, Inmaculada Concepción | es |
dc.creator | Sleiman, Hassan A. | es |
dc.creator | Ruiz Cortés, David | es |
dc.creator | Corchuelo Gil, Rafael | es |
dc.date.accessioned | 2017-11-09T10:22:31Z | |
dc.date.available | 2017-11-09T10:22:31Z | |
dc.date.issued | 2011 | |
dc.identifier.citation | Hernández Salmerón, I.C., Sleiman, H.A., Ruiz Cortés, D. y Corchuelo Gil, R. (2011). A Conceptual Framework for Efficient Web Crawling in Virtual Integration Contexts. En WISM 2011: International Conference on Web Information Systems and Mining (282-291), Taiyuan, China: Springer. | |
dc.identifier.isbn | 978-3-642-23981-6 | es |
dc.identifier.issn | 0302-9743 | es |
dc.identifier.uri | http://hdl.handle.net/11441/65832 | |
dc.description.abstract | Virtual Integration systems require a crawling tool able to
navigate and reach relevant pages in the Web in an efficient way. Existing
proposals in the crawling area are aware of the efficiency problem,
but still most of them need to download pages in order to classify them
as relevant or not. In this paper, we present a conceptual framework for
designing crawlers supported by a web page classifier that relies solely
on URLs to determine page relevance. Such a crawler is able to choose
in each step only the URLs that lead to relevant pages, and therefore
reduces the number of unnecessary pages downloaded, optimising bandwidth
and making it efficient and suitable for virtual integration systems.
Our preliminary experiments show that such a classifier is able to distinguish
between links leading to different kinds of pages, without previous
intervention from the user. | es |
dc.description.sponsorship | Ministerio de Educación y Ciencia TIN2007-64119 | es |
dc.description.sponsorship | Junta de Andalucía P07-TIC-2602 | es |
dc.description.sponsorship | Junta de Andalucía P08- TIC-4100 | es |
dc.description.sponsorship | Ministerio de Ciencia e Innovación TIN2008-04718-E | es |
dc.description.sponsorship | Ministerio de Ciencia e Innovación TIN2010-21744 | es |
dc.description.sponsorship | Ministerio de Economía, Industria y Competitividad TIN2010-09809-E | es |
dc.description.sponsorship | Ministerio de Ciencia e Innovación TIN2010-10811-E | es |
dc.description.sponsorship | Ministerio de Ciencia e Innovación TIN2010-09988-E | es |
dc.format | application/pdf | es |
dc.language.iso | eng | es |
dc.publisher | Springer | es |
dc.relation.ispartof | WISM 2011: International Conference on Web Information Systems and Mining (2011), p 282-291 | |
dc.rights | Attribution-NonCommercial-NoDerivatives 4.0 Internacional | * |
dc.rights.uri | http://creativecommons.org/licenses/by-nc-nd/4.0/ | * |
dc.subject | Crawlers | es |
dc.subject | Web Navigation | es |
dc.subject | Virtual Integration | es |
dc.title | A Conceptual Framework for Efficient Web Crawling in Virtual Integration Contexts | es |
dc.type | info:eu-repo/semantics/conferenceObject | es |
dcterms.identifier | https://ror.org/03yxnpp24 | |
dc.type.version | info:eu-repo/semantics/submittedVersion | es |
dc.rights.accessRights | info:eu-repo/semantics/openAccess | es |
dc.contributor.affiliation | Universidad de Sevilla. Departamento de Lenguajes y Sistemas Informáticos | es |
dc.relation.projectID | TIN2007-64119 | es |
dc.relation.projectID | P07-TIC-2602 | es |
dc.relation.projectID | P08- TIC-4100 | es |
dc.relation.projectID | TIN2008-04718-E | es |
dc.relation.projectID | TIN2010-21744 | es |
dc.relation.projectID | TIN2010-09809-E | es |
dc.relation.projectID | TIN2010-10811-E | es |
dc.relation.projectID | TIN2010-09988-E | es |
dc.relation.publisherversion | https://link.springer.com/chapter/10.1007/978-3-642-23982-3_35 | es |
dc.identifier.doi | 10.1007/978-3-642-23982-3_35 | es |
idus.format.extent | 10 | es |
dc.publication.initialPage | 282 | es |
dc.publication.endPage | 291 | es |
dc.eventtitle | WISM 2011: International Conference on Web Information Systems and Mining | es |
dc.eventinstitution | Taiyuan, China | es |
dc.relation.publicationplace | Berlin | es |