-
Notifications
You must be signed in to change notification settings - Fork 767
/
BBC.js
116 lines (96 loc) · 3.35 KB
/
BBC.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
{
"translatorID":"f4130157-93f7-4493-8f24-a7c85549013d",
"translatorType":4,
"label":"BBC",
"creator":"Ben Parr",
"target":"^https?://(?:www|news?)\\.bbc\\.co.uk",
"minVersion":"1.0.0b4.r1",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2007-08-27 05:00:00"
}
function detectWeb(doc, url)
{
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var xpath;
xpath='//meta[@name="Headline"]';
if(content=doc.evaluate(xpath, doc, nsResolver,XPathResult.ANY_TYPE, null).iterateNext())
{ return "newspaperArticle"; }
xpath='//font[@class="poshead"]/b';
if(doc.evaluate(xpath, doc, nsResolver,XPathResult.ANY_TYPE, null).iterateNext())
{ return "newspaperArticle"; }
return null;
}
function scrape(doc,url,title)
{
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var newItem = new Zotero.Item("newspaperArticle");
newItem.url=url;
newItem.repository="bbc.co.uk";
newItem.publicationTitle="BBC";
newItem.title=title;
xpath='//meta[@name="OriginalPublicationDate"]/@content';
var temp=doc.evaluate(xpath, doc, nsResolver,XPathResult.ANY_TYPE, null).iterateNext();
if(temp)
{
temp=temp.value;
temp=temp.split(" ")[0];
newItem.date=temp;
}
else
{
xpath='//font[@class="postxt"][@size="1"]';
var rows=doc.evaluate(xpath, doc, nsResolver,XPathResult.ANY_TYPE, null);
var row;
while(row=rows.iterateNext())
{
temp=row.textContent;
if(temp.substr(0,9)=="Created: ")
{
newItem.date=temp.substr(9);
break;
}
}
}
xpath='//meta[@name="Section"]/@content';
temp=doc.evaluate(xpath, doc, nsResolver,XPathResult.ANY_TYPE, null).iterateNext();
if(temp)
{ newItem.section=temp.value; }
xpath='//meta[@name="Description"]/@content';
temp=doc.evaluate(xpath, doc, nsResolver,XPathResult.ANY_TYPE, null).iterateNext();
if(temp)
{ newItem.abstractNote=temp.value; }
else
{
xpath='//meta[@name="description"]/@content';
temp=doc.evaluate(xpath, doc, nsResolver,XPathResult.ANY_TYPE, null).iterateNext();
if(temp)
{ newItem.abstractNote=temp.value; }
}
newItem.attachments.push({url:url, title:"BBC News Snapshot",mimeType:"text/html"});
newItem.complete();
}
function doWeb(doc,url)
{
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var xpath='//meta[@name="Headline"]/@content';
var title;
if(title=doc.evaluate(xpath, doc, nsResolver,XPathResult.ANY_TYPE, null).iterateNext())
{ scrape(doc,url,title.value) }
else
{
xpath='//font[@class="poshead"]/b';
if(title=doc.evaluate(xpath, doc, nsResolver,XPathResult.ANY_TYPE, null).iterateNext())
{ scrape(doc,url,title.textContent) }
}
}