forked from cakephp/docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpopulate_search_index.php
executable file
·101 lines (78 loc) · 2.33 KB
/
populate_search_index.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env php
<?php
/**
* Utility script to populate the elastic search indexes
*
*/
// Elastic search config
define('ES_DEFAULT_HOST', 'http://localhost:9200');
define('ES_INDEX', 'documentation');
define('CAKEPHP_VERSION', '2-2');
function main($argv) {
if (empty($argv[1])) {
echo "A language to scan is required.\n";
exit(1);
}
$lang = $argv[1];
if (!empty($argv[2])) {
define('ES_HOST', $argv[2]);
} else {
define('ES_HOST', ES_DEFAULT_HOST);
}
$directory = new RecursiveDirectoryIterator($lang);
$recurser = new RecursiveIteratorIterator($directory);
$matcher = new RegexIterator($recurser, '/\.rst/');
foreach ($matcher as $file) {
updateIndex($lang, $file);
}
echo "\nIndex update complete\n";
}
function updateIndex($lang, $file) {
$fileData = readFileData($file);
$filename = $file->getPathName();
list($filename) = explode('.', $filename);
$path = $filename . '.html';
$id = str_replace($lang . '/', '', $filename);
$id = str_replace('/', '-', $id);
$id = trim($id, '-');
$url = implode('/', array(ES_HOST, ES_INDEX, CAKEPHP_VERSION . '-' . $lang, $id));
$data = array(
'contents' => $fileData['contents'],
'title' => $fileData['title'],
'url' => $path,
);
$data = json_encode($data);
$size = strlen($data);
$fh = fopen('php://memory', 'rw');
fwrite($fh, $data);
rewind($fh);
echo "Sending request:\n\tfile: $file\n\turl: $url\n";
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_PUT, true);
curl_setopt($ch, CURLOPT_INFILE, $fh);
curl_setopt($ch, CURLOPT_INFILESIZE, $size);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$response = curl_exec($ch);
$metadata = curl_getinfo($ch);
if ($metadata['http_code'] > 400 || !$metadata['http_code']) {
echo "[ERROR] Failed to complete request.\n";
var_dump($response);
exit(2);
}
curl_close($ch);
fclose($fh);
echo "Sent $file\n";
}
function readFileData($file) {
$contents = file_get_contents($file);
// extract the title and guess that things underlined with # or == and first in the file
// are the title.
preg_match('/^(.*)\n[=#]+\n/', $contents, $matches);
$title = $matches[1];
// Remove the title from the indexed text.
$contents = str_replace($matches[0], '', $contents);
// Remove title markers from the text.
$contents = preg_replace('/\n[-=~]+\n/', '', $contents);
return compact('contents', 'title');
}
main($argv);