forked from lushl9301/PubMed-Text-Mining-Tool
-
Notifications
You must be signed in to change notification settings - Fork 0
/
jsonParser.pl
29 lines (24 loc) · 817 Bytes
/
jsonParser.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
use warnings;
use Lingua::EN::Sentence qw(get_sentences);
use Unicode::Normalize 'normalize';
use JSON;
require "splitFunction.pl";
binmode(STDOUT, ":utf8");
$filename = "raw_data.json";
open FILE, "<:encoding(utf-8)", $filename or exit "can't find file ".$filename;
while ($readinline = <FILE>) {
if ($readinline =~ /^\n/) {
next;
}
$result = decode_json($readinline);
$pmid = "URL- " . $result->{'url'} . "\n";
$title = "Title- " . $result->{'title'};
$abstract = "Abstract- " . $result->{'abstract'};
if ($title =~ /Title- \[/ or $abstract =~ /^.{7,12}$/) { #if not english or no abstract, ignore
next;
}
($counter, $abstractSentences) = _split($abstract);
if ($counter > 0) {
print "$pmid" . "$title\n" . $abstractSentences . "\n";
}
}