forked from cakephp/docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
populate_search_index.php
executable file
·127 lines (103 loc) · 3.07 KB
/
populate_search_index.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env php
<?php
/**
* Utility script to populate the elastic search indexes
*
* Gets called by the Make file.
*/
// Elastic search config
define('ES_DEFAULT_HOST', 'https://ci.cakephp.org:9200');
define('ES_INDEX', 'documentation');
define('CAKEPHP_VERSION', '3-next');
/**
* The main function
*
* Populates the search index for the given language.
*
* @param array $argv The array of CLI arguments, 1: language, 2. Elastic search host.
* @return void
*/
function main($argv)
{
if (empty($argv[1])) {
echo "A language to scan is required.\n";
exit(1);
}
$lang = $argv[1];
if (!empty($argv[2])) {
define('ES_HOST', $argv[2]);
} else {
define('ES_HOST', ES_DEFAULT_HOST);
}
$directory = new RecursiveDirectoryIterator($lang);
$recurser = new RecursiveIteratorIterator($directory);
$matcher = new RegexIterator($recurser, '/\.rst/');
foreach ($matcher as $file) {
updateIndex($lang, $file);
}
echo "\nIndex update complete\n";
}
/**
* Update the index for a given language
*
* @param string $lang The language to update, e.g. "en".
* @param RecursiveDirectoryIterator $file The file to load data from.
* @return void
*/
function updateIndex($lang, $file)
{
$fileData = readFileData($file);
$filename = $file->getPathName();
list($filename) = explode('.', $filename);
$path = $filename . '.html';
$id = str_replace($lang . '/', '', $filename);
$id = str_replace('/', '-', $id);
$id = trim($id, '-');
$url = implode('/', array(ES_HOST, ES_INDEX, CAKEPHP_VERSION . '-' . $lang, $id));
$data = array(
'contents' => $fileData['contents'],
'title' => $fileData['title'],
'url' => $path,
);
$data = json_encode($data);
$size = strlen($data);
$fh = fopen('php://memory', 'rw');
fwrite($fh, $data);
rewind($fh);
echo "Sending request:\n\tfile: $file\n\turl: $url\n";
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_PUT, true);
curl_setopt($ch, CURLOPT_INFILE, $fh);
curl_setopt($ch, CURLOPT_INFILESIZE, $size);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$response = curl_exec($ch);
$metadata = curl_getinfo($ch);
if ($metadata['http_code'] > 400 || !$metadata['http_code']) {
echo "[ERROR] Failed to complete request.\n";
var_dump($response);
exit(2);
}
curl_close($ch);
fclose($fh);
echo "Sent $file\n";
}
/**
* Read data from file
*
* @param string $file The file to read.
* @return array The read data.
*/
function readFileData($file)
{
$contents = file_get_contents($file);
// Extract the title and guess that things underlined with # or == and first in the file
// are the title.
preg_match('/^(.*)\n[=#]+\n/', $contents, $matches);
$title = $matches[1];
// Remove the title from the indexed text.
$contents = str_replace($matches[0], '', $contents);
// Remove title markers from the text.
$contents = preg_replace('/\n[-=~]+\n/', '', $contents);
return compact('contents', 'title');
}
main($argv);