forked from yiisoft/yii2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUtf8Controller.php
130 lines (112 loc) · 3.82 KB
/
Utf8Controller.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
<?php
/**
* @link http://www.yiiframework.com/
* @copyright Copyright (c) 2008 Yii Software LLC
* @license http://www.yiiframework.com/license/
*/
namespace yii\build\controllers;
use yii\console\Controller;
use yii\helpers\Console;
use yii\helpers\FileHelper;
/**
* Check files for broken UTF8 and non-printable characters.
*
* @author Carsten Brandt <[email protected]>
*/
class Utf8Controller extends Controller
{
public $defaultAction = 'check-guide';
/**
* Check guide for non-printable characters that may break docs generation.
*
* @param string $directory the directory to check. If not specified, the default
* guide directory will be checked.
*/
public function actionCheckGuide($directory = null)
{
if ($directory === null) {
$directory = dirname(dirname(__DIR__)) . '/docs';
}
if (is_file($directory)) {
$files = [$directory];
} else {
$files = FileHelper::findFiles($directory, [
'only' => ['*.md'],
]);
}
foreach($files as $file) {
$content = file_get_contents($file);
$chars = preg_split('//u', $content, null, PREG_SPLIT_NO_EMPTY);
$line = 1;
$pos = 0;
foreach($chars as $c) {
$ord = $this->unicodeOrd($c);
$pos++;
if ($ord == 0x000A) {
$line++;
$pos = 0;
}
if ($ord === false) {
$this->found("BROKEN UTF8", $c, $line, $pos, $file);
continue;
}
// http://unicode-table.com/en/blocks/general-punctuation/
if (0x2000 <= $ord && $ord <= 0x200F
|| 0x2028 <= $ord && $ord <= 0x202E
|| 0x205f <= $ord && $ord <= 0x206F
) {
$this->found("UNSUPPORTED SPACE CHARACTER", $c, $line, $pos, $file);
continue;
}
if ($ord < 0x0020 && $ord != 0x000A && $ord != 0x0009 ||
0x0080 <= $ord && $ord < 0x009F) {
$this->found("CONTROL CHARARCTER", $c, $line, $pos, $file);
continue;
}
// if ($ord > 0x009F) {
// $this->found("NON ASCII CHARARCTER", $c, $line, $pos, $file);
// continue;
// }
}
}
}
private $_foundFiles = [];
private function found($what, $char, $line, $pos, $file)
{
if (!isset($this->_foundFiles[$file])) {
$this->stdout("$file: \n", Console::BOLD);
$this->_foundFiles[$file] = $file;
}
$hexcode = dechex($this->unicodeOrd($char));
$hexcode = str_repeat('0', max(4 - strlen($hexcode), 0)) . $hexcode;
$this->stdout(" at $line:$pos FOUND $what: 0x$hexcode '$char' http://unicode-table.com/en/$hexcode/\n");
}
/**
* Equvalent for ord() just for unicode
*
* http://stackoverflow.com/a/10333324/1106908
*
* @param $c
* @return bool|int
*/
private function unicodeOrd($c)
{
$h = ord($c{0});
if ($h <= 0x7F) {
return $h;
} else if ($h < 0xC2) {
return false;
} else if ($h <= 0xDF) {
return ($h & 0x1F) << 6 | (ord($c{1}) & 0x3F);
} else if ($h <= 0xEF) {
return ($h & 0x0F) << 12 | (ord($c{1}) & 0x3F) << 6
| (ord($c{2}) & 0x3F);
} else if ($h <= 0xF4) {
return ($h & 0x0F) << 18 | (ord($c{1}) & 0x3F) << 12
| (ord($c{2}) & 0x3F) << 6
| (ord($c{3}) & 0x3F);
} else {
return false;
}
}
}