199 lines
7.0 KiB
SQL
199 lines
7.0 KiB
SQL
#
|
||
#-----------------------------------------------------------------------------
|
||
# wl6607 : InnoDB FULLTEXT SEARCH: CJK support (mecab parser)
|
||
# Adding FTS check with mecab parser
|
||
# - Table with mecab parser
|
||
# - Basic FTS query ( mainly BOOLEAN MODE check)
|
||
# - FTS index with single and two columns
|
||
# - external parser index using alter table
|
||
#
|
||
#------------------------------------------------------------------------------
|
||
|
||
# restart does not work with embedded
|
||
-- source include/not_embedded.inc
|
||
|
||
call mtr.add_suppression("unknown variable 'loose_mecab_rc_file");
|
||
call mtr.add_suppression("Fulltext index charset 'big5' doesn't match mecab charset");
|
||
call mtr.add_suppression("Mecab: createModel.* failed: param.cpp");
|
||
call mtr.add_suppression("Plugin 'mecab' init function returned error");
|
||
|
||
-- disable_query_log
|
||
|
||
-- error 0, ER_CANT_INITIALIZE_UDF
|
||
eval INSTALL PLUGIN mecab SONAME '$MECAB';
|
||
|
||
let $mecab_charset=`SELECT variable_value FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='mecab_charset'`;
|
||
if ($mecab_charset == '') {
|
||
# Restart with package dictionary.
|
||
let $MYSQL_DATADIR=`select @@datadir`;
|
||
let $MYSQL_BASEDIR=`select @@basedir`;
|
||
|
||
let $mecabrc = $MYSQL_DATADIR/mecabrc;
|
||
let $dicdir = $MYSQL_BASEDIR/lib/mecab/dic/ipadic_$ipadic_charset;
|
||
|
||
-- exec echo "dicdir=$dicdir" > $mecabrc
|
||
|
||
-- source include/shutdown_mysqld.inc
|
||
-- exec echo "restart: --loose_mecab_rc_file=$mecabrc $MECAB_OPT --innodb_ft_min_token_size=2" >$MYSQLTEST_VARDIR/tmp/mysqld.1.expect
|
||
-- enable_reconnect
|
||
-- source include/wait_until_connected_again.inc
|
||
-- disable_reconnect
|
||
|
||
-- disable_query_log
|
||
|
||
-- error 0, ER_CANT_INITIALIZE_UDF
|
||
eval INSTALL PLUGIN mecab SONAME '$MECAB';
|
||
|
||
let $mecab_charset=`SELECT variable_value FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='mecab_charset'`;
|
||
}
|
||
|
||
if ($mecab_charset == '') {
|
||
-- skip Test fail to load mecab parser, please set correct 'loose_mecab_rc_file'.
|
||
}
|
||
|
||
if ($mecab_charset != $mysql_charset) {
|
||
UNINSTALL PLUGIN mecab;
|
||
-- skip Test mecab charset mismatch.
|
||
}
|
||
|
||
--enable_query_log
|
||
|
||
SHOW STATUS LIKE 'mecab_charset';
|
||
|
||
SET NAMES utf8;
|
||
eval CREATE TABLE page (
|
||
page_id int(8) unsigned NOT NULL AUTO_INCREMENT,
|
||
page_title varchar(255) CHARACTER SET $mecab_charset NOT NULL DEFAULT '',
|
||
PRIMARY KEY (page_id),
|
||
FULLTEXT KEY page_title (page_title) WITH PARSER mecab
|
||
) ENGINE=InnoDB;
|
||
|
||
INSERT INTO page VALUES
|
||
(1,'レモナ'),
|
||
(2,'SDレモナ'),
|
||
(3,'レモナ'),
|
||
(4,'データベース管理システム'),
|
||
(5,'キム'),
|
||
(6,'鉄道100電車'),
|
||
(7,'100'),
|
||
(8,'ラフィタ・カスティージョ'),
|
||
(9,'ツル科_(Sibley)'),
|
||
(10,'ツル'),
|
||
(11,'鉄道');
|
||
|
||
SET GLOBAL innodb_ft_aux_table="test/page";
|
||
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE;
|
||
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE;
|
||
|
||
SHOW CREATE TABLE page;
|
||
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST("レモナ" IN BOOLEAN MODE);
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST("キムショウカン" IN BOOLEAN MODE);
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST("100" IN BOOLEAN MODE);
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST("ツル*" IN BOOLEAN MODE);
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST("ラフィタ・カスティージョ");
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST("ラフィタ・カスティージョ" IN BOOLEAN MODE);
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST("+ラフィタ・カスティージョ +データベース" IN BOOLEAN MODE);
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST("+ラフィタ・カスティージョ +(データベース)" IN BOOLEAN MODE);
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST("-ラフィタ・カスティージョ +(データベース)" IN BOOLEAN MODE);
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST("ラフィタ・カスティージョ 鉄道" IN BOOLEAN MODE);
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST("<ラフィタ・カスティージョ >鉄道" IN BOOLEAN MODE);
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST('鉄道');
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST('鉄道' WITH QUERY EXPANSION);
|
||
|
||
eval ALTER TABLE page add column c1 varchar(100) CHARACTER SET $mecab_charset DEFAULT 'レモナ';
|
||
ALTER TABLE page ADD FULLTEXT INDEX idx1 (page_title,c1) WITH PARSER mecab;
|
||
SELECT * FROM page WHERE MATCH(page_title,c1) AGAINST('レモナ');
|
||
SELECT * FROM page WHERE MATCH(page_title,c1) AGAINST('-レモナ' IN BOOLEAN MODE);
|
||
|
||
DROP TABLE page;
|
||
|
||
# Test charset mismatch.
|
||
CREATE TABLE page (
|
||
page_id int(8) unsigned NOT NULL AUTO_INCREMENT,
|
||
page_title varchar(255) CHARACTER SET big5 NOT NULL DEFAULT '',
|
||
PRIMARY KEY (page_id)
|
||
)ENGINE=InnoDB;
|
||
|
||
INSERT INTO page VALUES (1,'日本');
|
||
|
||
# We don't return charset mismatch error because of parallel fts build.
|
||
CREATE FULLTEXT INDEX ft_idx ON page(page_title) WITH PARSER mecab;
|
||
|
||
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE;
|
||
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE;
|
||
|
||
--error ER_ERROR_ON_WRITE
|
||
INSERT INTO page VALUES (2,'日本');
|
||
|
||
--error ER_ERROR_ON_WRITE
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST('日本');
|
||
|
||
DROP TABLE page;
|
||
|
||
# Test utf8mb4
|
||
if ($mecab_charset == 'utf8')
|
||
{
|
||
SET NAMES utf8mb4;
|
||
|
||
CREATE TABLE page (
|
||
page_id int(8) unsigned NOT NULL AUTO_INCREMENT,
|
||
page_title varchar(255) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
|
||
PRIMARY KEY (page_id),
|
||
FULLTEXT KEY page_title (page_title) WITH PARSER mecab
|
||
) ENGINE=InnoDB;
|
||
|
||
# insert some emoji chars with 4 bytes.
|
||
INSERT INTO page VALUES
|
||
(1,'😃レモナ'),
|
||
(2,'鉄道😄'),
|
||
(3,'鉄道😊100電車');
|
||
|
||
SET GLOBAL innodb_ft_aux_table="test/page";
|
||
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE;
|
||
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE;
|
||
|
||
SHOW CREATE TABLE page;
|
||
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST("レモナ" IN BOOLEAN MODE);
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST('鉄道');
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST('鉄道😄');
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST('鉄道' WITH QUERY EXPANSION);
|
||
|
||
DROP TABLE page;
|
||
}
|
||
|
||
# Test eucjpms & cp932
|
||
if ($mysql_win_charset)
|
||
{
|
||
eval CREATE TABLE page (
|
||
page_id int(8) unsigned NOT NULL AUTO_INCREMENT,
|
||
page_title varchar(255) CHARACTER SET $mysql_win_charset NOT NULL DEFAULT '',
|
||
PRIMARY KEY (page_id),
|
||
FULLTEXT KEY page_title (page_title) WITH PARSER mecab
|
||
) ENGINE=InnoDB;
|
||
|
||
# insert some eucjpms & cp932 chars.
|
||
INSERT INTO page VALUES
|
||
(1,'ⅰレモナ'),
|
||
(2,'鉄道ⅹ'),
|
||
(3,'鉄道ⅴ100電車');
|
||
|
||
SET GLOBAL innodb_ft_aux_table="test/page";
|
||
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE;
|
||
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE;
|
||
|
||
SHOW CREATE TABLE page;
|
||
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST("レモナ" IN BOOLEAN MODE);
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST('鉄道');
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST('鉄道ⅴ');
|
||
SELECT * FROM page WHERE MATCH(page_title) AGAINST('鉄道' WITH QUERY EXPANSION);
|
||
|
||
DROP TABLE page;
|
||
}
|
||
|
||
SET GLOBAL innodb_ft_aux_table=default;
|
||
|
||
UNINSTALL PLUGIN mecab;
|