Skip to content

Commit

Permalink
Boston Review: Fix broken translator; remove FW code
Browse files Browse the repository at this point in the history
The translator no longer worked with the current site. The fix is
implemented using the EM translator, and the old FW code is completely
removed.

Test cases are updated and new tests are added for multiple authors and
special projects.

Note that the ISSN field is no longer added to all translated items
because in general there's no way to determine if an online article
belongs to a particular print issue under that ISSN. Further advices is
appreciated for this particular issue.

See also zotero#3105 for the FW problem.
  • Loading branch information
zoe-translates committed Aug 16, 2023
1 parent 9180801 commit 019d0e4
Showing 1 changed file with 194 additions and 87 deletions.
281 changes: 194 additions & 87 deletions Boston Review.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,191 +3,298 @@
"label": "Boston Review",
"creator": "Sebastian Karcher",
"target": "^https?://(www\\.)?bostonreview\\.net/",
"minVersion": "2.1.9",
"minVersion": "6.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2015-08-09 19:58:48"
"lastUpdated": "2023-08-16 01:46:35"
}

/* FW LINE 59:b820c6d */ function flatten(t){var e=new Array;for(var i in t){var r=t[i];r instanceof Array?e=e.concat(flatten(r)):e.push(r)}return e}var FW={_scrapers:new Array};FW._Base=function(){this.callHook=function(t,e,i,r){if("object"==typeof this.hooks){var n=this.hooks[t];"function"==typeof n&&n(e,i,r)}},this.evaluateThing=function(t,e,i){var r=typeof t;if("object"===r){if(t instanceof Array){var n=this.evaluateThing,a=t.map(function(t){return n(t,e,i)});return flatten(a)}return t.evaluate(e,i)}return"function"===r?t(e,i):t},this.makeItems=function(t,e,i,r,n){n()}},FW.Scraper=function(t){FW._scrapers.push(new FW._Scraper(t))},FW._Scraper=function(t){for(x in t)this[x]=t[x];this._singleFieldNames=["abstractNote","applicationNumber","archive","archiveLocation","artworkMedium","artworkSize","assignee","audioFileType","audioRecordingType","billNumber","blogTitle","bookTitle","callNumber","caseName","code","codeNumber","codePages","codeVolume","committee","company","conferenceName","country","court","date","dateDecided","dateEnacted","dictionaryTitle","distributor","docketNumber","documentNumber","DOI","edition","encyclopediaTitle","episodeNumber","extra","filingDate","firstPage","forumTitle","genre","history","institution","interviewMedium","ISBN","ISSN","issue","issueDate","issuingAuthority","journalAbbreviation","label","language","legalStatus","legislativeBody","letterType","libraryCatalog","manuscriptType","mapType","medium","meetingName","nameOfAct","network","number","numberOfVolumes","numPages","pages","patentNumber","place","postType","presentationType","priorityNumbers","proceedingsTitle","programTitle","programmingLanguage","publicLawNumber","publicationTitle","publisher","references","reportNumber","reportType","reporter","reporterVolume","rights","runningTime","scale","section","series","seriesNumber","seriesText","seriesTitle","session","shortTitle","studio","subject","system","thesisType","title","type","university","url","version","videoRecordingType","volume","websiteTitle","websiteType"],this._makeAttachments=function(t,e,i,r){if(i instanceof Array)i.forEach(function(i){this._makeAttachments(t,e,i,r)},this);else if("object"==typeof i){var n=i.urls||i.url,a=i.types||i.type,s=i.titles||i.title,o=i.snapshots||i.snapshot,u=this.evaluateThing(n,t,e),l=this.evaluateThing(s,t,e),c=this.evaluateThing(a,t,e),h=this.evaluateThing(o,t,e);u instanceof Array||(u=[u]);for(var f in u){var p,m,v,d=u[f];p=c instanceof Array?c[f]:c,m=l instanceof Array?l[f]:l,v=h instanceof Array?h[f]:h,r.attachments.push({url:d,title:m,mimeType:p,snapshot:v})}}},this.makeItems=function(t,e,i,r,n){var a=new Zotero.Item(this.itemType);a.url=e;for(var s in this._singleFieldNames){var o=this._singleFieldNames[s];if(this[o]){var u=this.evaluateThing(this[o],t,e);u instanceof Array?a[o]=u[0]:a[o]=u}}var l=["creators","tags"];for(var c in l){var h=l[c],f=this.evaluateThing(this[h],t,e);if(f)for(var p in f)a[h].push(f[p])}this._makeAttachments(t,e,this.attachments,a),r(a,this,t,e),n()}},FW._Scraper.prototype=new FW._Base,FW.MultiScraper=function(t){FW._scrapers.push(new FW._MultiScraper(t))},FW._MultiScraper=function(t){for(x in t)this[x]=t[x];this._mkSelectItems=function(t,e){var i=new Object;for(var r in t)i[e[r]]=t[r];return i},this._selectItems=function(t,e,i){var r=new Array;Zotero.selectItems(this._mkSelectItems(t,e),function(t){for(var e in t)r.push(e);i(r)})},this._mkAttachments=function(t,e,i){var r=this.evaluateThing(this.attachments,t,e),n=new Object;if(r)for(var a in i)n[i[a]]=r[a];return n},this._makeChoices=function(t,e,i,r,n){if(t instanceof Array)t.forEach(function(t){this._makeTitlesUrls(t,e,i,r,n)},this);else if("object"==typeof t){var a=t.urls||t.url,s=t.titles||t.title,o=this.evaluateThing(a,e,i),u=this.evaluateThing(s,e,i),l=u instanceof Array;o instanceof Array||(o=[o]);for(var c in o){var h,f=o[c];h=l?u[c]:u,n.push(f),r.push(h)}}},this.makeItems=function(t,e,i,r,n){if(this.beforeFilter){var a=this.beforeFilter(t,e);if(a!=e)return void this.makeItems(t,a,i,r,n)}var s=[],o=[];this._makeChoices(this.choices,t,e,s,o);var u=this._mkAttachments(t,e,o),l=this.itemTrans;this._selectItems(s,o,function(t){if(t){var e=function(t){var e=t.documentURI,i=l;void 0===i&&(i=FW.getScraper(t,e)),void 0===i||i.makeItems(t,e,u[e],r,function(){})};Zotero.Utilities.processDocuments(t,e,n)}else n()})}},FW._MultiScraper.prototype=new FW._Base,FW.WebDelegateTranslator=function(t){return new FW._WebDelegateTranslator(t)},FW._WebDelegateTranslator=function(t){for(x in t)this[x]=t[x];this.makeItems=function(t,e,i,r,n){var a=this,s=Zotero.loadTranslator("web");s.setHandler("itemDone",function(i,n){r(n,a,t,e)}),s.setDocument(t),this.translatorId?(s.setTranslator(this.translatorId),s.translate()):(s.setHandler("translators",function(t,e){e.length&&(s.setTranslator(e[0]),s.translate())}),s.getTranslators()),n()}},FW._WebDelegateTranslator.prototype=new FW._Base,FW._StringMagic=function(){this._filters=new Array,this.addFilter=function(t){return this._filters.push(t),this},this.split=function(t){return this.addFilter(function(e){return e.split(t).filter(function(t){return""!=t})})},this.replace=function(t,e,i){return this.addFilter(function(r){return r.match(t)?r.replace(t,e,i):r})},this.prepend=function(t){return this.replace(/^/,t)},this.append=function(t){return this.replace(/$/,t)},this.remove=function(t,e){return this.replace(t,"",e)},this.trim=function(){return this.addFilter(function(t){return Zotero.Utilities.trim(t)})},this.trimInternal=function(){return this.addFilter(function(t){return Zotero.Utilities.trimInternal(t)})},this.match=function(t,e){return e||(e=0),this.addFilter(function(i){var r=i.match(t);return void 0===r||null===r?void 0:r[e]})},this.cleanAuthor=function(t,e){return this.addFilter(function(i){return Zotero.Utilities.cleanAuthor(i,t,e)})},this.key=function(t){return this.addFilter(function(e){return e[t]})},this.capitalizeTitle=function(){return this.addFilter(function(t){return Zotero.Utilities.capitalizeTitle(t)})},this.unescapeHTML=function(){return this.addFilter(function(t){return Zotero.Utilities.unescapeHTML(t)})},this.unescape=function(){return this.addFilter(function(t){return unescape(t)})},this._applyFilters=function(t,e){for(i in this._filters){t=flatten(t),t=t.filter(function(t){return void 0!==t&&null!==t});for(var r=0;r<t.length;r++)try{if(void 0===t[r]||null===t[r])continue;t[r]=this._filters[i](t[r],e)}catch(n){t[r]=void 0,Zotero.debug("Caught exception "+n+"on filter: "+this._filters[i])}t=t.filter(function(t){return void 0!==t&&null!==t})}return flatten(t)}},FW.PageText=function(){return new FW._PageText},FW._PageText=function(){this._filters=new Array,this.evaluate=function(t){var e=[t.documentElement.innerHTML];return e=this._applyFilters(e,t),0==e.length?!1:e}},FW._PageText.prototype=new FW._StringMagic,FW.Url=function(){return new FW._Url},FW._Url=function(){this._filters=new Array,this.evaluate=function(t,e){var i=[e];return i=this._applyFilters(i,t),0==i.length?!1:i}},FW._Url.prototype=new FW._StringMagic,FW.Xpath=function(t){return new FW._Xpath(t)},FW._Xpath=function(t){this._xpath=t,this._filters=new Array,this.text=function(){var t=function(t){return"object"==typeof t&&t.textContent?t.textContent:t};return this.addFilter(t),this},this.sub=function(t){var e=function(e,i){var r=i.evaluate(t,e,null,XPathResult.ANY_TYPE,null);return r?r.iterateNext():void 0};return this.addFilter(e),this},this.evaluate=function(t){var e=t.evaluate(this._xpath,t,null,XPathResult.ANY_TYPE,null),i=e.resultType,r=new Array;if(i==XPathResult.STRING_TYPE)r.push(e.stringValue);else if(i==XPathResult.BOOLEAN_TYPE)r.push(e.booleanValue);else if(i==XPathResult.NUMBER_TYPE)r.push(e.numberValue);else if(i==XPathResult.ORDERED_NODE_ITERATOR_TYPE||i==XPathResult.UNORDERED_NODE_ITERATOR_TYPE)for(var n;n=e.iterateNext();)r.push(n);return r=this._applyFilters(r,t),0==r.length?!1:r}},FW._Xpath.prototype=new FW._StringMagic,FW.detectWeb=function(t,e){for(var i in FW._scrapers){var r=FW._scrapers[i],n=r.evaluateThing(r.itemType,t,e),a=r.evaluateThing(r.detect,t,e);if(a.length>0&&a[0])return n}},FW.getScraper=function(t,e){var i=FW.detectWeb(t,e);return FW._scrapers.filter(function(r){return r.evaluateThing(r.itemType,t,e)==i&&r.evaluateThing(r.detect,t,e)})[0]},FW.doWeb=function(t,e){var i=FW.getScraper(t,e);i.makeItems(t,e,[],function(t,e,i,r){e.callHook("scraperDone",t,i,r),t.title||(t.title=""),t.complete()},function(){Zotero.done()}),Zotero.wait()};
function detectWeb(doc, url) { return FW.detectWeb(doc, url); }
function doWeb(doc, url) { return FW.doWeb(doc, url); }



/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2012/2013 Sebastian Karcher
Copyright © 2012/2013 Sebastian Karcher, Zoë C. Ma, and contributors
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/

/**ToC **/
FW.MultiScraper({
itemType : 'multiple',
detect : FW.Xpath('//h6/a'),
choices : {
titles : FW.Xpath('//h6/a|//h2/a').text().trim(),
urls : FW.Xpath('//h6/a|//h2/a').key("href")

function detectWeb(doc, url) {
let pageURL = new URL(url);
// Some content may not be in the print edition, but the web publication
// is a magazine nonetheless
if (/^\/(articles|forum(_response)?|us)\/.+/.test(pageURL.pathname)) {
return 'magazineArticle';
}
else if (getSearchResults(doc, true)) {
return 'multiple';
}
return false;
}

function getSearchResults(doc, checkOnly) {
var items = {};
var found = false;
let outerSelectors = [
"[data-elementor-type='search-results']", // search page
"[data-elementor-post-type='issue']", // issue TOC
"[data-elementor-post-type='special-project']", // special project
];
// Titles are in an "h3 a" inside the outer container element
// The computed selector looks like "container1 h3 a, container2 h3 a ..."
let selectors = outerSelectors.map(s => s + " h3 a").join(", ");
// Legacy issue pages; although many of the links are broken
selectors += ", [data-elementor-post-type='elementor_library'] h6 a";
var rows = doc.querySelectorAll(selectors);
for (let row of rows) {
let href = row.href;
let title = ZU.trimInternal(row.textContent);
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
items[href] = title;
}
return found ? items : false;
}
});

/**Search Results**/
FW.MultiScraper({
itemType : 'multiple',
detect : FW.Xpath('//li[@class="search-result"]//h3/a'),
choices : {
titles : FW.Xpath('//h3/a').text().trim(),
urls : FW.Xpath('//h3/a').key("href")

async function doWeb(doc, url) {
if (detectWeb(doc, url) == 'multiple') {
let items = await Zotero.selectItems(getSearchResults(doc, false));
if (!items) return;
for (let url of Object.keys(items)) {
await scrape(await requestDocument(url));
}
}
else {
await scrape(doc, url);
}
}
});

/** Articles */
FW.Scraper({
itemType : 'magazineArticle',
detect : FW.Xpath('//div[@class="title"]/h1'),
title : FW.Xpath('//div[@class="title"]/h1').text().trim(),
attachments : [{ url: FW.Url(),
title: "Boston Review Snapshot",
type: "text/html" }],
creators : FW.Xpath('//div[@class="article-author"]//div[@class="author-name"]'+
'|//div[contains(@class, "field-name-field-author")]//a').text().split(/ and /).cleanAuthor("author"),
date : FW.Xpath('//span[@class="date-display-single"]/@content').text(),
ISSN : "0734-2306",
abstractNote : FW.Xpath('//div[contains(@class, "field-name-field-subhead")]').text(),
publicationTitle : "Boston Review",
language : "en-US"
});


async function scrape(doc, url = doc.location.href) {
let translator = Zotero.loadTranslator('web');
// Embedded Metadata
translator.setTranslator('951c027d-74ac-47d4-a107-9c3069ab7b48');
translator.setDocument(doc);

translator.setHandler('itemDone', (_obj, item) => {
item.publicationTitle = item.libraryCatalog = "Boston Review";
let ldInfo = text(doc, "script[type='application/ld+json']");
let date;
if (ldInfo) {
// Get schema.org metadata for the web page from JSON-LD
let pageInfo
= (JSON.parse(ldInfo)["@graph"] || [])
.filter(x => x["@type"] === "WebPage")[0];
date = pageInfo && pageInfo.datePublished;
}
if (!date) {
// Only as fallback; despite the itemprop value, this doesn't
// always appear to be "date modified"; rather, it's the original
// publication date
date = text(doc, ".elementor-post-info [itemprop='dateModified']");
}
if (date) {
item.date = date;
}

// Remove suffix " - Boston Review" in title
item.title = item.title.replace(/\s+-\s+Boston Review\s*$/, "");

// NOTE: the href property match takes care of both /author and
// /author-custom paths
let authors = doc.querySelectorAll("h2 a[href^='https://www.bostonreview.net/author']");
for (let author of authors) {
let authorName = ZU.trimInternal(author.textContent.trim());
item.creators.push(ZU.cleanAuthor(authorName, "author"));
}

for (let tag of doc.querySelectorAll("a[href^='https://www.bostonreview.net/tag/']")) {
item.tags.push(ZU.trimInternal(tag.textContent));
}

// NOTE that in general there's no sure way to determine whether an
// article belongs to a print issue (hence no volume/issue numbers and
// no ISSN which is for the print publication). If you can help, please
// contribute!

item.complete();
});

let em = await translator.getTranslatorObject();
em.itemType = 'magazineArticle';
await em.doWeb(doc, url);
}

/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://www.bostonreview.net/forum/promoting-social-mobility/rethinking-family-life-robin-west",
"url": "https://www.bostonreview.net/forum/can-global-brands-create-just-supply-chains-richard-locke",
"items": [
{
"itemType": "magazineArticle",
"title": "Promoting Social Mobility",
"title": "Can Global Brands Create Just Supply Chains?",
"creators": [
{
"firstName": "Robin",
"lastName": "West",
"firstName": "Richard M.",
"lastName": "Locke",
"creatorType": "author"
}
],
"date": "2012-09-01T00:00:00-04:00",
"ISSN": "0734-2306",
"date": "2013-05-21T10:22:49+00:00",
"abstractNote": "When Jia Jingchuan, a 27-year-old electronics worker in Suzhou, China, sought compensation for the chemical poisoning he suffered at work, he appealed",
"language": "en-US",
"libraryCatalog": "Boston Review",
"publicationTitle": "Boston Review",
"url": "http://www.bostonreview.net/forum/promoting-social-mobility/rethinking-family-life-robin-west",
"url": "https://www.bostonreview.net/forum/can-global-brands-create-just-supply-chains-richard-locke/",
"attachments": [
{
"title": "Boston Review Snapshot",
"title": "Snapshot",
"mimeType": "text/html"
}
],
"tags": [],
"tags": [
{
"tag": "Global"
},
{
"tag": "Human Rights"
},
{
"tag": "Labor"
}
],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "http://www.bostonreview.net/forum/can-global-brands-create-just-supply-chains-richard-locke",
"url": "https://www.bostonreview.net/articles/government-loansharking/",
"items": [
{
"itemType": "magazineArticle",
"title": "Can Global Brands Create Just Supply Chains?",
"title": "Government Loansharking",
"creators": [
{
"firstName": "Richard M.",
"lastName": "Locke",
"firstName": "Malcolm",
"lastName": "Harris",
"creatorType": "author"
}
],
"date": "2013-05-21T00:00:00-04:00",
"ISSN": "0734-2306",
"date": "2013-06-07T12:00:00+00:00",
"abstractNote": "Last November when I first wrote about student loans for Boston Review, the Department of Education estimated it would be pulling in around $25 billion in",
"language": "en-US",
"libraryCatalog": "Boston Review",
"publicationTitle": "Boston Review",
"url": "http://www.bostonreview.net/forum/can-global-brands-create-just-supply-chains-richard-locke",
"url": "https://www.bostonreview.net/articles/government-loansharking/",
"attachments": [
{
"title": "Boston Review Snapshot",
"title": "Snapshot",
"mimeType": "text/html"
}
],
"tags": [],
"tags": [
{
"tag": "Economy"
},
{
"tag": "Education"
},
{
"tag": "Politics"
},
{
"tag": "U.S."
}
],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "http://www.bostonreview.net/us/government-loansharking",
"url": "https://bostonreview.net/issue/september-october-2012/",
"items": "multiple"
},
{
"type": "web",
"url": "https://www.bostonreview.net/?s=labor",
"items": "multiple"
},
{
"type": "web",
"url": "https://www.bostonreview.net/articles/astra-taylor-wolson-interview/",
"items": [
{
"itemType": "magazineArticle",
"title": "Government Loansharking",
"title": "Beyond the Neoliberal University",
"creators": [
{
"firstName": "Malcolm",
"lastName": "Harris",
"firstName": "Astra",
"lastName": "Taylor",
"creatorType": "author"
},
{
"firstName": "Todd",
"lastName": "Wolfson",
"creatorType": "author"
}
],
"date": "2013-06-07T00:00:00-04:00",
"ISSN": "0734-2306",
"abstractNote": "An Update on the Student-Debt Crisis",
"date": "2020-08-04T12:00:00+00:00",
"abstractNote": "Astra Taylor talks with Rutgers faculty union president Todd Wolfson about organizing academic communities in the age of COVID-19.",
"language": "en-US",
"libraryCatalog": "Boston Review",
"publicationTitle": "Boston Review",
"url": "http://www.bostonreview.net/us/government-loansharking",
"url": "https://www.bostonreview.net/articles/astra-taylor-wolson-interview/",
"attachments": [
{
"title": "Boston Review Snapshot",
"title": "Snapshot",
"mimeType": "text/html"
}
],
"tags": [],
"tags": [
{
"tag": "COVID-19"
},
{
"tag": "Education"
},
{
"tag": "Interview"
},
{
"tag": "Labor"
}
],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "http://bostonreview.net/septemberoctober-2012",
"items": "multiple"
},
{
"type": "web",
"url": "http://www.bostonreview.net/search/node/labor",
"url": "https://www.bostonreview.net/special_project/opportunity-after-neoliberalism/",
"items": "multiple"
}
]
/** END TEST CASES **/
/** END TEST CASES **/

0 comments on commit 019d0e4

Please sign in to comment.