dc.creator | Hernández Salmerón, Inmaculada Concepción | es |
dc.creator | Rivero, Carlos R. | es |
dc.creator | Ruiz Cortés, David | es |
dc.creator | Corchuelo Gil, Rafael | es |
dc.date.accessioned | 2017-11-13T12:37:00Z | |
dc.date.available | 2017-11-13T12:37:00Z | |
dc.date.issued | 2011 | |
dc.identifier.citation | Hernández Salmerón, I.C., Rivero, C.R., Ruiz Cortés, D. y Corchuelo Gil, R. (2011). A Tool for Link-Based Web Page Classification. En CAEPIA 2011: 14th Conference of the Spanish Association for Artificial Intelligence (443-452), La Laguna, España: Springer. | |
dc.identifier.isbn | 978-3-642-25273-0 | es |
dc.identifier.uri | http://hdl.handle.net/11441/65970 | |
dc.description.abstract | Virtual integration systems require a crawler to navigate
through web sites automatically, looking for relevant information. This
process is online, so whilst the system is looking for the required information,
the user is waiting for a response. Therefore, downloading a
minimum number of irrelevant pages is mandatory to improve the crawler
efficiency. Most crawlers need to download a page to determine its relevance,
which results in a high number of irrelevant pages downloaded. In
this paper, we propose a classifier that helps crawlers to efficiently navigate
through web sites. This classifier is able to determine if a web page
is relevant by analysing exclusively its URL, minimising the number of
irrelevant pages downloaded, improving crawling efficiency and reducing
used bandwidth, making it suitable for virtual integration systems. | es |
dc.description.sponsorship | Ministerio de Educación y Ciencia TIN2007-64119 | es |
dc.description.sponsorship | Junta de Andalucía P07-TIC-2602 | es |
dc.description.sponsorship | Junta de Andalucía P08- TIC-4100 | es |
dc.description.sponsorship | Ministerio de Ciencia e Innovación TIN2008-04718-E | es |
dc.description.sponsorship | Ministerio de Ciencia e Innovación TIN2010-21744 | es |
dc.description.sponsorship | Ministerio de Economía, Industria y Competitividad TIN2010-09809-E | es |
dc.description.sponsorship | Ministerio de Ciencia e Innovación TIN2010-10811-E | es |
dc.description.sponsorship | Ministerio de Ciencia e Innovación TIN2010-09988-E | es |
dc.format | application/pdf | es |
dc.language.iso | eng | es |
dc.publisher | Springer | es |
dc.relation.ispartof | CAEPIA 2011: 14th Conference of the Spanish Association for Artificial Intelligence (2011), p 443-452 | |
dc.rights | Attribution-NonCommercial-NoDerivatives 4.0 Internacional | * |
dc.rights.uri | http://creativecommons.org/licenses/by-nc-nd/4.0/ | * |
dc.subject | Crawling | es |
dc.subject | Web Page Classification | es |
dc.subject | Virtual Integration | es |
dc.title | A Tool for Link-Based Web Page Classification | es |
dc.type | info:eu-repo/semantics/conferenceObject | es |
dcterms.identifier | https://ror.org/03yxnpp24 | |
dc.type.version | info:eu-repo/semantics/submittedVersion | es |
dc.rights.accessRights | info:eu-repo/semantics/openAccess | es |
dc.contributor.affiliation | Universidad de Sevilla. Departamento de Lenguajes y Sistemas Informáticos | es |
dc.relation.projectID | TIN2007-64119 | es |
dc.relation.projectID | P07-TIC-2602 | es |
dc.relation.projectID | P08- TIC-4100 | es |
dc.relation.projectID | TIN2008-04718-E | es |
dc.relation.projectID | TIN2010-21744 | es |
dc.relation.projectID | TIN2010-09809-E | es |
dc.relation.projectID | TIN2010-10811-E | es |
dc.relation.projectID | TIN2010-09988-E | es |
dc.relation.publisherversion | https://link.springer.com/chapter/10.1007/978-3-642-25274-7_45 | es |
dc.identifier.doi | 10.1007/978-3-642-25274-7_45 | es |
dc.contributor.group | Universidad de Sevilla. TIC134: Sistemas Informáticos | es |
idus.format.extent | 10 | es |
dc.publication.initialPage | 443 | es |
dc.publication.endPage | 452 | es |
dc.eventtitle | CAEPIA 2011: 14th Conference of the Spanish Association for Artificial Intelligence | es |
dc.eventinstitution | La Laguna, España | es |
dc.relation.publicationplace | Berlin | es |