120 lines
4.1 KiB
Diff
120 lines
4.1 KiB
Diff
From 522cc87fdc25449222a5894a428eebf4b8d5eaa9 Mon Sep 17 00:00:00 2001
|
|
From: Patrick Steinhardt <ps@pks.im>
|
|
Date: Thu, 1 Dec 2022 15:46:53 +0100
|
|
Subject: [PATCH] utf8: fix truncated string lengths in `utf8_strnwidth()`
|
|
|
|
The `utf8_strnwidth()` function accepts an optional string length as
|
|
input parameter. This parameter can either be set to `-1`, in which case
|
|
we call `strlen()` on the input. Or it can be set to a positive integer
|
|
that indicates a precomputed length, which callers typically compute by
|
|
calling `strlen()` at some point themselves.
|
|
|
|
The input parameter is an `int` though, whereas `strlen()` returns a
|
|
`size_t`. This can lead to implementation-defined behaviour though when
|
|
the `size_t` cannot be represented by the `int`. In the general case
|
|
though this leads to wrap-around and thus to negative string sizes,
|
|
which is sure enough to not lead to well-defined behaviour.
|
|
|
|
Fix this by accepting a `size_t` instead of an `int` as string length.
|
|
While this takes away the ability of callers to simply pass in `-1` as
|
|
string length, it really is trivial enough to convert them to instead
|
|
pass in `strlen()` instead.
|
|
|
|
Signed-off-by: Patrick Steinhardt <ps@pks.im>
|
|
Signed-off-by: Junio C Hamano <gitster@pobox.com>
|
|
---
|
|
column.c | 2 +-
|
|
pretty.c | 4 ++--
|
|
utf8.c | 8 +++-----
|
|
utf8.h | 2 +-
|
|
4 files changed, 7 insertions(+), 9 deletions(-)
|
|
|
|
diff --git a/column.c b/column.c
|
|
index 1261e18a72..fbf88639aa 100644
|
|
--- a/column.c
|
|
+++ b/column.c
|
|
@@ -23,7 +23,7 @@ struct column_data {
|
|
/* return length of 's' in letters, ANSI escapes stripped */
|
|
static int item_length(const char *s)
|
|
{
|
|
- return utf8_strnwidth(s, -1, 1);
|
|
+ return utf8_strnwidth(s, strlen(s), 1);
|
|
}
|
|
|
|
/*
|
|
diff --git a/pretty.c b/pretty.c
|
|
index 7e649b1cec..aae6e792bc 100644
|
|
--- a/pretty.c
|
|
+++ b/pretty.c
|
|
@@ -1483,7 +1483,7 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */
|
|
int occupied;
|
|
if (!start)
|
|
start = sb->buf;
|
|
- occupied = utf8_strnwidth(start, -1, 1);
|
|
+ occupied = utf8_strnwidth(start, strlen(start), 1);
|
|
occupied += c->pretty_ctx->graph_width;
|
|
padding = (-padding) - occupied;
|
|
}
|
|
@@ -1501,7 +1501,7 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */
|
|
placeholder++;
|
|
total_consumed++;
|
|
}
|
|
- len = utf8_strnwidth(local_sb.buf, -1, 1);
|
|
+ len = utf8_strnwidth(local_sb.buf, local_sb.len, 1);
|
|
|
|
if (c->flush_type == flush_left_and_steal) {
|
|
const char *ch = sb->buf + sb->len - 1;
|
|
diff --git a/utf8.c b/utf8.c
|
|
index 5b39361ada..504e517c34 100644
|
|
--- a/utf8.c
|
|
+++ b/utf8.c
|
|
@@ -206,13 +206,11 @@ int utf8_width(const char **start, size_t *remainder_p)
|
|
* string, assuming that the string is utf8. Returns strlen() instead
|
|
* if the string does not look like a valid utf8 string.
|
|
*/
|
|
-int utf8_strnwidth(const char *string, int len, int skip_ansi)
|
|
+int utf8_strnwidth(const char *string, size_t len, int skip_ansi)
|
|
{
|
|
int width = 0;
|
|
const char *orig = string;
|
|
|
|
- if (len == -1)
|
|
- len = strlen(string);
|
|
while (string && string < orig + len) {
|
|
int skip;
|
|
while (skip_ansi &&
|
|
@@ -225,7 +223,7 @@ int utf8_strnwidth(const char *string, int len, int skip_ansi)
|
|
|
|
int utf8_strwidth(const char *string)
|
|
{
|
|
- return utf8_strnwidth(string, -1, 0);
|
|
+ return utf8_strnwidth(string, strlen(string), 0);
|
|
}
|
|
|
|
int is_utf8(const char *text)
|
|
@@ -791,7 +789,7 @@ int skip_utf8_bom(char **text, size_t len)
|
|
void strbuf_utf8_align(struct strbuf *buf, align_type position, unsigned int width,
|
|
const char *s)
|
|
{
|
|
- int slen = strlen(s);
|
|
+ size_t slen = strlen(s);
|
|
int display_len = utf8_strnwidth(s, slen, 0);
|
|
int utf8_compensation = slen - display_len;
|
|
|
|
diff --git a/utf8.h b/utf8.h
|
|
index fcd5167baf..6da1b6d05e 100644
|
|
--- a/utf8.h
|
|
+++ b/utf8.h
|
|
@@ -7,7 +7,7 @@ typedef unsigned int ucs_char_t; /* assuming 32bit int */
|
|
|
|
size_t display_mode_esc_sequence_len(const char *s);
|
|
int utf8_width(const char **start, size_t *remainder_p);
|
|
-int utf8_strnwidth(const char *string, int len, int skip_ansi);
|
|
+int utf8_strnwidth(const char *string, size_t len, int skip_ansi);
|
|
int utf8_strwidth(const char *string);
|
|
int is_utf8(const char *text);
|
|
int is_encoding_utf8(const char *name);
|
|
--
|
|
2.27.0
|
|
|