sentencepiece/Added-split_digits-to-SentencePieceTrainer.patch
2021-11-27 19:37:28 +08:00

25 lines
802 B
Diff

From 427d695ab4343568cc46411fbe83ef5ccc619752 Mon Sep 17 00:00:00 2001
From: mingruimingrui <mingruimingrui@hotmail.com>
Date: Sat, 27 Jun 2020 02:56:03 +0800
Subject: [PATCH 1/7] Added split_digits to SentencePieceTrainer
---
src/spec_parser.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/spec_parser.h b/src/spec_parser.h
index 729e036..6dd054b 100644
--- a/src/spec_parser.h
+++ b/src/spec_parser.h
@@ -207,6 +207,7 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name,
PARSE_BOOL(split_by_unicode_script);
PARSE_BOOL(split_by_number);
PARSE_BOOL(split_by_whitespace);
+ PARSE_BOOL(split_digits);
PARSE_BOOL(treat_whitespace_as_suffix);
PARSE_REPEATED_STRING(control_symbols);
PARSE_REPEATED_STRING(user_defined_symbols);
--
2.18.0.huawei.25