dc.creator | Hernández Salmerón, Inmaculada Concepción | es |
dc.creator | Rivero, Carlos R. | es |
dc.creator | Ruiz Cortés, David | es |
dc.creator | Corchuelo Gil, Rafael | es |
dc.date.accessioned | 2017-11-17T09:34:53Z | |
dc.date.available | 2017-11-17T09:34:53Z | |
dc.date.issued | 2012 | |
dc.identifier.citation | Hernández Salmerón, I.C., Rivero, C.R., Ruiz Cortés, D. y Corchuelo Gil, R. (2012). An Architecture for Efficient Web Crawling. En CAiSE 2012: International Conference on Advanced Information Systems Engineering (228-234), Gdańsk, Poland: Springer. | |
dc.identifier.isbn | 978-3-642-31068-3 | es |
dc.identifier.issn | 1865-1348 | es |
dc.identifier.uri | http://hdl.handle.net/11441/66162 | |
dc.description.abstract | Virtual Integration systems require a crawling tool able to
navigate and reach relevant pages in the Deep Web in an efficient way.
Existing proposals in the crawling area fulfill some of these requirements,
but most of them need to download pages in order to classify them as
relevant or not. We propose a crawler supported by a web page classifier
that uses solely a page URL to determine page relevance. Such
a crawler is able to choose in each step only the URLs that lead to
relevant pages, and therefore reduces the number of unnecessary pages
downloaded, minimising bandwidth and making it efficient and suitable
for virtual integration systems. | es |
dc.description.sponsorship | Ministerio de Educación y Ciencia TIN2007-64119 | es |
dc.description.sponsorship | Junta de Andalucía P07-TIC-2602 | es |
dc.description.sponsorship | Junta de Andalucía P08- TIC-4100 | es |
dc.description.sponsorship | Ministerio de Ciencia e Innovación TIN2008-04718-E | es |
dc.description.sponsorship | Ministerio de Ciencia e Innovación TIN2010-21744 | es |
dc.description.sponsorship | Ministerio de Economía, Industria y Competitividad TIN2010-09809-E | es |
dc.description.sponsorship | Ministerio de Ciencia e Innovación TIN2010-10811-E | es |
dc.description.sponsorship | Ministerio de Ciencia e Innovación TIN2010-09988-E | es |
dc.format | application/pdf | es |
dc.language.iso | eng | es |
dc.publisher | Springer | es |
dc.relation.ispartof | CAiSE 2012: International Conference on Advanced Information Systems Engineering (2012), p 228-234 | |
dc.rights | Attribution-NonCommercial-NoDerivatives 4.0 Internacional | * |
dc.rights.uri | http://creativecommons.org/licenses/by-nc-nd/4.0/ | * |
dc.subject | Web Crawling | es |
dc.subject | Crawler Architecture | es |
dc.subject | Virtual Integration | es |
dc.title | An Architecture for Efficient Web Crawling | es |
dc.type | info:eu-repo/semantics/conferenceObject | es |
dcterms.identifier | https://ror.org/03yxnpp24 | |
dc.type.version | info:eu-repo/semantics/publishedVersion | es |
dc.rights.accessRights | info:eu-repo/semantics/openAccess | es |
dc.contributor.affiliation | Universidad de Sevilla. Departamento de Lenguajes y Sistemas Informáticos | es |
dc.relation.projectID | TIN2007-64119 | es |
dc.relation.projectID | P07-TIC-2602 | es |
dc.relation.projectID | P08- TIC-4100 | es |
dc.relation.projectID | TIN2008-04718-E | es |
dc.relation.projectID | TIN2010-21744 | es |
dc.relation.projectID | TIN2010-09809-E | es |
dc.relation.projectID | TIN2010-10811-E | es |
dc.relation.projectID | TIN2010-09988-E | es |
dc.relation.publisherversion | https://link.springer.com/chapter/10.1007/978-3-642-31069-0_20 | es |
dc.identifier.doi | 10.1007/978-3-642-31069-0_20 | es |
dc.contributor.group | Universidad de Sevilla. TIC134: Sistemas Informáticos | es |
idus.format.extent | 7 | es |
dc.publication.initialPage | 228 | es |
dc.publication.endPage | 234 | es |
dc.eventtitle | CAiSE 2012: International Conference on Advanced Information Systems Engineering | es |
dc.eventinstitution | Gdańsk, Poland | es |
dc.relation.publicationplace | Berlin | es |