From aa0188eaedc056dca8374ac03d0177429b495515 Mon Sep 17 00:00:00 2001 From: "K.Kosako" Date: Thu, 7 Nov 2019 14:13:55 +0900 Subject: [PATCH] fix #163: heap-buffer-overflow in gb18030_mbc_enc_len --- src/gb18030.c | 16 +++++++++++++++- src/regparse.c | 32 ++++++++++++++++++++++---------- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/src/gb18030.c b/src/gb18030.c index ad5bf96..da6cfab 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -75,6 +75,20 @@ gb18030_mbc_enc_len(const UChar* p) return 2; } +static int +gb18030_code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xff000000) != 0) return 4; + else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; + else if ((code & 0xff00) != 0) return 2; + else { + if (GB18030_MAP[(int )(code & 0xff)] == CM) + return ONIGERR_INVALID_CODE_POINT_VALUE; + + return 1; + } +} + static int is_valid_mbc_string(const UChar* p, const UChar* end) { @@ -513,7 +527,7 @@ OnigEncodingType OnigEncodingGB18030 = { 1, /* min enc length */ onigenc_is_mbc_newline_0x0a, gb18030_mbc_to_code, - onigenc_mb4_code_to_mbclen, + gb18030_code_to_mbclen, gb18030_code_to_mbc, gb18030_mbc_case_fold, onigenc_ascii_apply_all_case_fold, diff --git a/src/regparse.c b/src/regparse.c index 70c36d5..5bf25e8 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -5885,6 +5885,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) int c, r; int ascii_mode; + int is_single; const OnigCodePoint *ranges; OnigCodePoint limit; OnigCodePoint sb_out; @@ -5906,6 +5907,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) } r = 0; + is_single = ONIGENC_IS_SINGLEBYTE(enc); limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE; switch (ctype) { @@ -5922,19 +5924,25 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) case ONIGENC_CTYPE_ALNUM: if (not != 0) { for (c = 0; c < (int )limit; c++) { - if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT(cc->bs, c); + if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) { + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } } for (c = limit; c < SINGLE_BYTE_SIZE; c++) { - BITSET_SET_BIT(cc->bs, c); + if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) + BITSET_SET_BIT(cc->bs, c); } - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + if (is_single == 0) + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } else { for (c = 0; c < (int )limit; c++) { - if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT(cc->bs, c); + if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) { + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } } } break; @@ -5944,21 +5952,25 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) case ONIGENC_CTYPE_WORD: if (not != 0) { for (c = 0; c < (int )limit; c++) { - if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */ + /* check invalid code point */ + if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) BITSET_SET_BIT(cc->bs, c); } for (c = limit; c < SINGLE_BYTE_SIZE; c++) { - if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) + if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) BITSET_SET_BIT(cc->bs, c); } + if (ascii_mode != 0 && is_single == 0) + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } else { for (c = 0; c < (int )limit; c++) { - if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) + && ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) BITSET_SET_BIT(cc->bs, c); } - if (ascii_mode == 0) + if (ascii_mode == 0 && is_single == 0) ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } break;