PHPでMySQLの標準機能で日本語を全文検索する(3)

April 28, 2007

htmlfiles は HTMLファイルのパスが書かれたテキストリストをコマンドラインでから流し込む。

$ php into.php < htmlfiles

into.php

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
<?php
mb\_internal\_encoding("UTF-8");
mb\_regex\_encoding("UTF-8");
require_once("db.php");
require_once("htmlindex.php");
class Register {
var $dbm;
var $uri;
var $title;
var $content;
var $hie;
public function Register() {
$this->dbm = new DBManager("livedocs_ft");
$this->hie =new HtmlIndexExtractor();
}
public function regist($htmlfile) {
if (!$this->readFile($htmlfile)) {
echo "\[ERR\] Can't open file. : $file\\n";
return;
}
$this->hie->extract($this->htmltext);
if ($this->dbm->insertFullTextIndexPrimary($htmlfile,
$this->hie->getTitle(),
$this->hie->getContent())) {
echo "\[OK\] Inserted into file : $htmlfile.\\n";
} else {
echo "\[NG\] Inserted into file : $htmlfile.\\n";
}
}
private function readFile($file) {
$fh = fopen($file, 'r');
if ($fh == FALSE) {
return false;
}
$ctn = "";
while (! feof($fh)) {
$ctn .= fgets($fh);
}
fclose($fh);
$this->htmltext = $ctn;
return true;
}
}
$rg = new Register();
$stdin = fopen('php://stdin', 'r');
if ($stdin == FALSE) {
echo "No STDIN\\n";
exit;
}
while (!feof($stdin)) {
$idxfile = rtrim(fgets($stdin), "\\n");
$idxfile = trim($idxfile);
if (strlen($idxfile) > 0)
$rg->regist($idxfile);
}
fclose($stdin);
echo "Registing Completed.\\n";
?>
MySQL PHP

tilfin freelance software engineer