Skip to content

Commit

Permalink
[HotFix] HAL: Hotfix for identification failures
Browse files Browse the repository at this point in the history
The HAL translator did not work at all with the new HAL site at a
different domain.

- Update target regex for the new domain `hal.science`
- Update the element selector for the content-type label
- Do some cleaning with raw content-type label text; update some labels
- Fix a large number of ESLint errors

Resolves zotero#3041
  • Loading branch information
zoe-translates committed Jul 12, 2023
1 parent 80c211d commit 1a1feea
Showing 1 changed file with 43 additions and 36 deletions.
79 changes: 43 additions & 36 deletions HAL Archives Ouvertes.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@
"translatorID": "f20f91fe-d875-47e7-9656-0abb928be472",
"label": "HAL Archives Ouvertes",
"creator": "Sebastian Karcher",
"target": "^https?://hal\\.archives-ouvertes\\.fr",
"target": "^https://(hal\\.archives-ouvertes\\.fr|hal\\.science)\\b",
"minVersion": "3.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2022-12-12 19:29:40"
"lastUpdated": "2023-07-12 07:30:59"
}

/*
***** BEGIN LICENSE BLOCK *****
HAL translator
Copyright © 2012-2014 Sebastian Karcher
Copyright © 2012-2014 Sebastian Karcher and contributors
This file is part of Zotero.
Expand All @@ -36,51 +36,58 @@
*/

function detectWeb(doc, url) {
if (url.search(/\/search\/index\//)!=-1) return "multiple";
if (url.search(/\index\.php\?halsid=|\.fr\/[a-z]+-\d+/)!=-1) return findItemType(doc, url);
if (/\/search\/index\//.test(url)) return "multiple";
if (/\/hal-\d+/.test(url)) return findItemType(doc, url);
return false;
}

function findItemType(doc, url){
var itemType = text(doc, 'div.label');
//Z.debug(itemType)
function findItemType(doc, url) {
var itemType = text(doc, '.typdoc')
// do some preliminary cleaning
.split("(")[0].trim() // discard parenthesized text
.split(", ")[0].trim(); // simplify "Pré-publication, Document de travail" and " Preprints, Working Papers, ..."
var typeMap = {
/* eslint-disable quote-props */
"Books": "book",
"Ouvrage (y compris édition critique et traduction)": "book",
"Ouvrages": "book",
"Book sections": "bookSection",
"Chapitre d'ouvrage": "bookSection",
"Conference papers": "conferencePaper",
"Communication dans un congrès": "conferencePaper",
"Directions of work or proceedings": "book",
"Direction d'ouvrage, Proceedings": "book",
"Journal articles": "journalArticle",
"Article dans des revues": "journalArticle",
"Article dans une revue": "journalArticle",
"Lectures": "presentation",
"Cours": "presentation",
"Other publications": "book", //this could also be report, not sure here but bibtex guesses book
"Autre publication": "book", //this could also be report, not sure here but bibtex guesses book
"Other publications": "book", // this could also be report, not sure here but bibtex guesses book
"Autre publication scientifique": "book", // this could also be report, not sure here but bibtex guesses book
"Patents": "patent",
"Brevet": "patent",
"Preprints, Working Papers, ...": "manuscript",
"Pré-publication, Document de travail": "manuscript",
"Preprints": "preprint",
"Pré-publication": "preprint",
"Reports": "report",
"Rapport": "report",
"Theses": "thesis",
"Scientific blog post": "blogPost",
"Article de blog scientifique": "blogPost",
"Theses": "thesis",
"Thèse": "thesis",
"Poster communications": "presentation",
"Poster de conférence": "presentation"
}
"Poster de conférence": "presentation",
/* eslint-enable quote-props */
};
if (typeMap[itemType]) return typeMap[itemType];
else if (url.indexOf("medihal-")!=-1) return "artwork";
else if (url.includes("medihal-")) return "artwork";
else return "journalArticle";
}

function doWeb(doc, url) {
var articles = new Array();
var articles = [];
if (detectWeb(doc, url) == "multiple") {
var items = {};
var titles = doc.evaluate('//strong/a[@data-original-title="Display the resource" or @data-original-title="Voir la ressource"]', doc, null, XPathResult.ANY_TYPE, null);
var title;
while (title = titles.iterateNext()) {
while ((title = titles.iterateNext())/* assignment */) {
items[title.href] = title.textContent;
}
Zotero.selectItems(items, function (items) {
Expand All @@ -90,34 +97,33 @@ function doWeb(doc, url) {
for (var i in items) {
articles.push(i);
}
Zotero.Utilities.processDocuments(articles, scrape)
Zotero.Utilities.processDocuments(articles, scrape);
return true;
});
} else {
//work on PDF pages
if (url.search(/\/document$/) != -1 ) {
var articleURL = url.replace(/\/document$/, "")
//Z.debug(articleURL)
ZU.processDocuments(articleURL, scrape);
}
else scrape(doc, url);
}
else if (/\/document$/.test(url)) { // work on PDF pages
var articleURL = url.replace(/\/document$/, "");
// Z.debug(articleURL)
ZU.processDocuments(articleURL, scrape);
}
else scrape(doc, url);
}

function scrape(doc, url) {
var bibtexUrl = url.replace(/#.+|\/$/, "") + "/bibtex";
var abstract = ZU.xpathText(doc, '//div[@class="abstract-content"]');
var pdfUrl = ZU.xpathText(doc, '//meta[@name="citation_pdf_url"]/@content');
//Z.debug("pdfURL " + pdfUrl)
var pdfUrl = ZU.xpathText(doc, '//meta[@name="citation_pdf_url"]/@content');
// Z.debug("pdfURL " + pdfUrl)
ZU.doGet(bibtexUrl, function (bibtex) {
//Z.debug(bibtex)
// Z.debug(bibtex)
var translator = Zotero.loadTranslator("import");
translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4");
translator.setString(bibtex);
translator.setHandler("itemDone", function (obj, item) {
if (abstract){
item.abstractNote=abstract.replace(/(Abstract|Résumé)\s*:/, "");
if (abstract) {
item.abstractNote = abstract.replace(/(Abstract|Résumé)\s*:/, "");
}
if (pdfUrl){
if (pdfUrl) {
item.attachments = [{
url: pdfUrl,
title: "HAL PDF Full Text",
Expand All @@ -141,8 +147,9 @@ function scrape(doc, url) {
item.complete();
});
translator.translate();
})
});
}

/** BEGIN TEST CASES **/
var testCases = [
{
Expand Down

0 comments on commit 1a1feea

Please sign in to comment.