-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmainFormatXML.pl
111 lines (102 loc) · 2.37 KB
/
mainFormatXML.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
use Time::HiRes qw ( time alarm sleep ) ;
my $begin = time();
use strict;
use warnings;
use Data::Dumper;
use Encode;
use JSON;
use feature qw(say);
my $dictName = "swg_xhzd";
my $root = "E:/dict/$dictName/";
#输入文件路径
my $in = $root . $dictName . "_origin.xml";
#输出文件路径
my $out = $root . $dictName . "_origin_format.xml";
open IN, "<", $in or die $!;
open OUT, ">", $out or die $!;
#缩进符号
my $indentMark = "\t";
#缩进个数
my $indentNum = 1;
my $content = "";
while(<IN>){
chomp;
my $line = $_;
$line =~ s/^ +//g;
# if($line =~ /^[类rik]/){#特殊处理
# $line = ' '.$line;
# }
$content .= $line;
}
my @contents = ();
if($content =~ /<(.+?)>/){
$content =~ s/(<.+?>)/<yangrui>$1<yangrui>/g;
$content =~ s/(<yangrui>)+/<yangrui>/g;
$content =~ s/^<yangrui>|<yangrui>$//g;
@contents = split /<yangrui>/, $content;
}
my $indent = "";
my $flag = "None";
my $indentDeep = 0;
my @tags = ();
for(my $i = 0; $i < @contents; $i++){
next if $contents[$i] =~ /<\?.+?\?>/;
next if $contents[$i] =~ /<!DOCTYPE/;
if($contents[$i] =~ /<\/(.+?)>/){#结束标签
#判断标签是否配对
isPairOfTag($1);
if($flag eq "eTag" || $flag eq "content"){
$indentDeep--;
$indent = getIndent($indentDeep);
}
$flag = "eTag";
}elsif($contents[$i] =~ /<.+?\/>/){
#一体标签,不作处理
}elsif($contents[$i] =~ /<(.+?)>/){#开始标签
my $tmp = $1;
$tmp =~ s/^(.+?) .+$/$1/ if $tmp =~ / /;
push(@tags,$1);
if($flag eq "bTag"){
$indentDeep++;
$indent = getIndent($indentDeep);
}
$flag = "bTag";
if($contents[$i+2] eq "</$tmp>"){#最内层标签不换行
say OUT $indent.$contents[$i].$contents[$i+1].$contents[$i+2];
$i += 2;
pop @tags;
$flag = "eTag";
next;
}
}else{#纯文本内容
if($flag eq "bTag"){
$indentDeep++;
$indent = getIndent($indentDeep);
}
$flag = "content";
}
say OUT $indent.$contents[$i];
}
sub isPairOfTag{
my $rTag = shift;
my $lTag = pop @tags;
if($rTag ne $lTag){
say $lTag."<---->".$rTag;
say "error!!";
exit;
}
}
sub getIndent{
my ($deep) = @_;
return "" if $deep == 0;
my $num = $deep * $indentNum;
my $format = "%0".$num."s";
my $indent = sprintf($format,$indentMark);
$indent =~ s/0/$indentMark/g;
return $indent;
}
my $end = time();
my $run = sprintf("%.2f",$end - $begin);
print "运行时间:".$run."秒\n";
say $run;
print "finish!!!\n";