mysql5/mysql-5.7.27/unittest/gunit/strings_utf8-t.cc

402 lines
15 KiB
C++

/* Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
#include "my_config.h"
#include <gtest/gtest.h>
#include <m_ctype.h>
#include <sql_class.h>
namespace strings_utf8_unittest {
class StringsUTF8Test : public ::testing::Test
{
protected:
virtual void SetUp()
{
// Save global settings.
m_charset= system_charset_info;
system_charset_info= &my_charset_utf8_bin;
}
virtual void TearDown()
{
// Restore global settings.
system_charset_info= m_charset;
}
private:
CHARSET_INFO *m_charset;
};
TEST_F(StringsUTF8Test, MyStrchr)
{
const char* null_pos= NULL;
char* pos;
char valid_utf8_string[]= "str1";
/*
All valid utf8 characters in str arg passed to my_strchr and char to be
found not in str.
*/
pos= my_strchr(system_charset_info, valid_utf8_string,
valid_utf8_string + 3, 't');
EXPECT_EQ(valid_utf8_string + 1, pos);
/*
All valid utf8 characters in str arg passed to my_strchr and char to be
found not in str.
*/
pos= my_strchr(system_charset_info, valid_utf8_string,
valid_utf8_string + 3, 'd');
ASSERT_EQ(null_pos, pos);
// Assign an invalid utf8 char to valid_utf8_str
valid_utf8_string[0]= '\xff';
// Invalid utf8 character in str arg passed to my_strchr.
pos= my_strchr(system_charset_info, valid_utf8_string,
valid_utf8_string + 3,'y');
ASSERT_EQ(null_pos, pos);
}
TEST_F(StringsUTF8Test, MyStrcasecmpMb)
{
std::string utf8_src= "str";
std::string utf8_dst= "str";
EXPECT_EQ(0, my_strcasecmp_mb(system_charset_info, utf8_src.c_str(),
utf8_dst.c_str()));
utf8_dst[1]= 'd';
// src and dst are different utf8 strings
EXPECT_EQ(1, my_strcasecmp_mb(system_charset_info, utf8_src.c_str(),
utf8_dst.c_str()));
// dst contain an invalid utf8 string
utf8_dst[1]= '\xFF';
EXPECT_EQ(1, my_strcasecmp_mb(system_charset_info, utf8_src.c_str(),
utf8_dst.c_str()));
}
TEST_F(StringsUTF8Test, MyWellFormedLenUtf8)
{
char utf8_src[32]= "\x00\x7f\xc2\x80\xdf\xbf\xe0\xa0\x80\xef\xbf\xbf";
int error;
/* valid utf8 charaters, testing for boundry values */
EXPECT_EQ(12U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 12,
6, &error));
ASSERT_EQ(0, error);
/* test for 0 length string */
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src,
6, &error));
ASSERT_EQ(0, error);
/* test for illegal utf8 char */
utf8_src[0]= '\xc1';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 1,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xdf';
utf8_src[1]= '\x00';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 2,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xe0';
utf8_src[1]= '\xbf';
utf8_src[2]= '\x00';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 3,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xe0';
utf8_src[1]= '\x80';
utf8_src[2]= '\x80';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 3,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xf0';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 1,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xc2';
utf8_src[1]= '\x80';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 1,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xef';
utf8_src[1]= '\xbf';
utf8_src[2]= '\xbf';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 2,
1, &error));
ASSERT_EQ(1, error);
}
TEST_F(StringsUTF8Test, MyIsmbcharUtf8)
{
char utf8_src[8];
/* valid utf8 charaters, testing for boundry values */
utf8_src[0]= '\x00';
EXPECT_EQ(0U, system_charset_info->cset->ismbchar(system_charset_info,
utf8_src, utf8_src + 1));
utf8_src[0]= '\x7f';
EXPECT_EQ(0U, system_charset_info->cset->ismbchar(system_charset_info,
utf8_src, utf8_src + 1));
utf8_src[0]= '\xc2';
utf8_src[1]= '\x80';
EXPECT_EQ(2U, system_charset_info->cset->ismbchar(system_charset_info,
utf8_src, utf8_src + 2));
utf8_src[0]= '\xdf';
utf8_src[1]= '\xbf';
EXPECT_EQ(2U, system_charset_info->cset->ismbchar(system_charset_info,
utf8_src, utf8_src + 2));
utf8_src[0]= '\xe0';
utf8_src[1]= '\xa0';
utf8_src[2]= '\x80';
EXPECT_EQ(3U, system_charset_info->cset->ismbchar(system_charset_info,
utf8_src, utf8_src + 3));
utf8_src[0]= '\xef';
utf8_src[1]= '\xbf';
utf8_src[2]= '\xbf';
EXPECT_EQ(3U, system_charset_info->cset->ismbchar(system_charset_info,
utf8_src, utf8_src + 3));
/* Not testing for illegal charaters as same is tested in above test case */
}
class StringsUTF8mb4Test : public ::testing::Test
{
protected:
virtual void SetUp()
{
// Save global settings.
m_charset= system_charset_info;
system_charset_info= &my_charset_utf8mb4_bin;
}
virtual void TearDown()
{
// Restore global settings.
system_charset_info= m_charset;
}
private:
CHARSET_INFO *m_charset;
};
TEST_F(StringsUTF8mb4Test, MyWellFormedLenUtf8mb4)
{
char utf8_src[32]= "\x00\x7f\xc2\x80\xdf\xbf\xe0\xa0\x80\xef\xbf\xbf"
"\xf0\x90\x80\x80\xF4\x8F\xBF\xBF";
int error;
/* valid utf8mb4 charaters, testing for boundry values */
EXPECT_EQ(20U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 20,
8, &error));
ASSERT_EQ(0, error);
/* test for 0 length string */
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src, utf8_src,
8, &error));
ASSERT_EQ(0, error);
/* test for illegal utf8mb4 char */
utf8_src[0]= '\xc1';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 1,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xdf';
utf8_src[1]= '\x00';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 2,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xe0';
utf8_src[1]= '\xbf';
utf8_src[2]= '\x00';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 3,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xe0';
utf8_src[1]= '\x80';
utf8_src[2]= '\x80';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 3,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xe0';
utf8_src[1]= '\xbf';
utf8_src[2]= '\x00';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 3,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xf0';
utf8_src[1]= '\x80';
utf8_src[2]= '\x80';
utf8_src[3]= '\x80';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 4,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xf4';
utf8_src[1]= '\x9f';
utf8_src[2]= '\x80';
utf8_src[3]= '\x80';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 4,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xf0';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 1,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xc2';
utf8_src[1]= '\x80';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 1,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xef';
utf8_src[1]= '\xbf';
utf8_src[2]= '\xbf';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 2,
1, &error));
ASSERT_EQ(1, error);
utf8_src[0]= '\xf4';
utf8_src[1]= '\x8f';
utf8_src[2]= '\xbf';
utf8_src[3]= '\xbf';
EXPECT_EQ(0U, system_charset_info->cset->well_formed_len(system_charset_info,
utf8_src,
utf8_src + 2,
1, &error));
ASSERT_EQ(1, error);
}
TEST_F(StringsUTF8mb4Test, MyIsmbcharUtf8mb4)
{
char utf8_src[8];
/* valid utf8mb4 charaters, testing for boundry values */
utf8_src[0]= '\x00';
EXPECT_EQ(0U, system_charset_info->cset->ismbchar(system_charset_info,
utf8_src,utf8_src + 1));
utf8_src[0]= '\x7f';
EXPECT_EQ(0U, system_charset_info->cset->ismbchar(system_charset_info,
utf8_src, utf8_src + 1));
utf8_src[0]= '\xc2';
utf8_src[1]= '\x80';
EXPECT_EQ(2U, system_charset_info->cset->ismbchar(system_charset_info,
utf8_src, utf8_src + 2));
utf8_src[0]= '\xdf';
utf8_src[1]= '\xbf';
EXPECT_EQ(2U, system_charset_info->cset->ismbchar(system_charset_info,
utf8_src, utf8_src + 2));
utf8_src[0]= '\xe0';
utf8_src[1]= '\xa0';
utf8_src[2]= '\x80';
EXPECT_EQ(3U, system_charset_info->cset->ismbchar(system_charset_info,
utf8_src, utf8_src + 3));
utf8_src[0]= '\xef';
utf8_src[1]= '\xbf';
utf8_src[2]= '\xbf';
EXPECT_EQ(3U, system_charset_info->cset->ismbchar(system_charset_info,
utf8_src, utf8_src + 3));
utf8_src[0]= '\xf0';
utf8_src[1]= '\x90';
utf8_src[2]= '\x80';
utf8_src[3]= '\x80';
EXPECT_EQ(4U, system_charset_info->cset->ismbchar(system_charset_info,
utf8_src, utf8_src + 4));
utf8_src[0]= '\xf4';
utf8_src[1]= '\x8f';
utf8_src[2]= '\xbf';
utf8_src[3]= '\xbf';
EXPECT_EQ(4U, system_charset_info->cset->ismbchar(system_charset_info,
utf8_src, utf8_src + 4));
/* Not testing for illegal charaters as same is tested in above test case */
}
}