-
Notifications
You must be signed in to change notification settings - Fork 767
/
ARTFL Encyclopedie.js
169 lines (160 loc) · 5.31 KB
/
ARTFL Encyclopedie.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
{
"translatorID":"72cb2536-3211-41e0-ae8b-974c0385e085",
"translatorType":4,
"label":"ARTFL Encyclopedie",
"creator":"Sean Takats",
"target":"/cgi-bin/philologic31/(getobject\\.pl\\?c\\.[0-9]+:[0-9]+\\.encyclopedie|search3t\\?dbname=encyclopedie0507)",
"minVersion":"1.0.0b4.r1",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2011-01-11 04:31:00"
}
function detectWeb(doc, url) {
if (url.indexOf("getobject.pl") != -1){
return "encyclopediaArticle";
} else {
return "multiple";
}
}
function reconcileAuthor(author){
var authorMap = {
"Venel":"Venel, Gabriel-François",
"d'Aumont":"d'Aumont, Arnulphe",
"de La Chapelle":"de La Chapelle, Jean-Baptiste",
"Bourgelat":"Bourgelat, Claude",
"Dumarsais":"Du Marsais, César Chesneau",
"Mallet":"Mallet, Edme-François",
"Toussaint":"Toussaint, François-Vincent",
"Daubenton":"Daubenton, Louis-Jean-Marie",
"d'Argenville": "d'Argenville, Antoine-Joseph Desallier",
"Tarin":"Tarin, Pierre",
"Vandenesse":"de Vandenesse, Urbain",
"Blondel": "Blondel, Jacques-François",
"Le Blond":"Le Blond, Guillaume",
"Rousseau":"Rousseau, Jean-Jacques",
"Eidous":"Eidous, Marc-Antoine",
"d'Alembert":"d'Alembert, Jean le Rond",
"Louis":"Louis, Antoine",
"Bellin":"Bellin, Jacques-Nicolas",
"Diderot":"Diderot, Denis",
"Diderot1":"Diderot, Denis",
"Diderot2":"Diderot, Denis",
"de Jaucourt":"de Jaucourt, Chevalier Louis",
"Jaucourt":"de Jaucourt, Chevalier Louis",
"d'Holbach":"d'Holbach, Baron"
/* not yet mapped
Yvon
Forbonnais
Douchet and Beauzée
Boucher d'Argis
Lenglet Du Fresnoy
Cahusac
Pestré
Daubenton, le Subdélégué
Goussier
de Villiers
Barthès
Morellet
Malouin
Ménuret de Chambaud
Landois
Le Roy
*/
}
if(authorMap[author]) {
author = authorMap[author];
}
// remove ARTFL's trailing 5 for odd contributors (e.g. Turgot5)
if (author.substr(author.length-1, 1)=="5"){
author = author.substr(0, author.length-1);
}
return author;
}
function scrape (doc){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var url = doc.location.href;
var newItem = new Zotero.Item("encyclopediaArticle");
var xpath = '/html/body/div[@class="text"]/font';
var titleElmt = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (titleElmt) {
var title = titleElmt.textContent;
} else {
xpath = '/html/body/div[@class="text"]/b';
var title = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
}
newItem.title = title;
newItem.encyclopediaTitle = "Encyclopédie, ou Dictionnaire raisonné des sciences, des arts et des métiers";
newItem.shortTitle = "Encyclopédie";
newItem.date = "1751-1772";
newItem.publisher = "Briasson";
newItem.place = "Paris";
newItem.url = url;
newItem.attachments.push({title:"ARTFL Snapshot", mimeType:"text/html", url:url, snapshot:true});
// get author and tags
var hostRegexp = new RegExp("^(https?://[^/]+)/");
var hMatch = hostRegexp.exec(url);
var host = hMatch[1];
var getString1 = "/cgi-bin/philologic31/search3t?dbname=encyclopedie0507&word=&dgdivhead=";
var getString2 = "&dgdivocauthor=&dgdivocplacename=&dgdivocsalutation=&dgdivocclassification=&dgdivocpartofspeech=&dgdivtype=&CONJUNCT=PHRASE&DISTANCE=3&PROXY=or+fewer&OUTPUT=conc&POLESPAN=5&KWSS=1&KWSSPRLIM=500";
Zotero.Utilities.HTTP.doGet(host+getString1+title+getString2, function(text){
var tagRe = new RegExp('>'+title+'</a>[^\[]*\\[([^\\]]*)\]', 'i');
var m = tagRe.exec(text);
if(m[1] != "unclassified"){
var tagstring = m[1].replace("&", "&", "g");
var tags = tagstring.split(";")
for(var j in tags) {
newItem.tags.push(Zotero.Utilities.trimInternal(tags[j]));
}
}
var authorRe = new RegExp('>'+title+'</a>,([^,]*),', "i");
var m = authorRe.exec(text);
var author = m[1];
author = Zotero.Utilities.trimInternal(author);
// reconcile author
author = reconcileAuthor(author);
if (author!="NA"){ // ignore unknown authors
newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author", true));
}
newItem.creators.push({firstName:"Denis", lastName:"Diderot", creatorType:"editor"});
newItem.creators.push({firstName:"Jean le Rond", lastName:"d'Alembert", creatorType:"editor"});
newItem.complete();
}, function() {Zotero.done();}, null);
Zotero.wait();
}
function doWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
if (url.indexOf("getobject.pl") != -1){
// single article
scrape(doc);
} else {
//search page
var items = new Object();
var xpath = '/html/body/div[@class="text"]/p/a';
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
var elmt;
while (elmt = elmts.iterateNext()){
var title = elmt.textContent;
var link = elmt.href;
if (title && link){
items[link] = title;
}
}
var items = Zotero.selectItems(items);
if(!items) {
return true;
}
var urls = new Array();
for(var i in items) {
urls.push(i);
}
Zotero.Utilities.processDocuments(urls, scrape, function() { Zotero.done(); });
Zotero.wait();
}
}