diff mbox series

[v3,06/14] vt: use new tables in ucs.c

Message ID 20250417184849.475581-7-nico@fluxnic.net
State New
Headers show
Series vt: implement proper Unicode handling | expand

Commit Message

Nicolas Pitre April 17, 2025, 6:45 p.m. UTC
From: Nicolas Pitre <npitre@baylibre.com>

This removes the table from ucs.c and substitutes the generated tables
from ucs_width_table.h providing comprehensive ranges for double-width
and zero-width Unicode code points.

Also implements ucs_is_zero_width() to query the new zero-width table.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
---
 drivers/tty/vt/ucs.c       | 44 +++++++++++++++++++++-----------------
 include/linux/consolemap.h |  6 +-----
 2 files changed, 25 insertions(+), 25 deletions(-)
diff mbox series

Patch

diff --git a/drivers/tty/vt/ucs.c b/drivers/tty/vt/ucs.c
index dc4a6e7945..5f9f25bd20 100644
--- a/drivers/tty/vt/ucs.c
+++ b/drivers/tty/vt/ucs.c
@@ -8,22 +8,12 @@ 
 #include <linux/consolemap.h>
 #include <linux/minmax.h>
 
-/* ucs_is_double_width() is based on the wcwidth() implementation by
- * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
- * Latest version: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
- */
-
 struct ucs_interval {
 	u32 first;
 	u32 last;
 };
 
-static const struct ucs_interval ucs_double_width_ranges[] = {
-	{ 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E },
-	{ 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF },
-	{ 0xFE10, 0xFE19 }, { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 },
-	{ 0xFFE0, 0xFFE6 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD }
-};
+#include "ucs_width_table.h"
 
 static int interval_cmp(const void *key, const void *element)
 {
@@ -37,6 +27,27 @@  static int interval_cmp(const void *key, const void *element)
 	return 0;
 }
 
+static bool cp_in_range(u32 cp, const struct ucs_interval *ranges, size_t size)
+{
+	if (!in_range(cp, ranges[0].first, ranges[size - 1].last))
+		return false;
+
+	return __inline_bsearch(&cp, ranges, size, sizeof(*ranges),
+				interval_cmp) != NULL;
+}
+
+/**
+ * ucs_is_zero_width() - Determine if a Unicode code point is zero-width.
+ * @cp: Unicode code point (UCS-4)
+ *
+ * Return: true if the character is zero-width, false otherwise
+ */
+bool ucs_is_zero_width(u32 cp)
+{
+	return cp_in_range(cp, ucs_zero_width_ranges,
+			   ARRAY_SIZE(ucs_zero_width_ranges));
+}
+
 /**
  * ucs_is_double_width() - Determine if a Unicode code point is double-width.
  * @cp: Unicode code point (UCS-4)
@@ -45,13 +56,6 @@  static int interval_cmp(const void *key, const void *element)
  */
 bool ucs_is_double_width(u32 cp)
 {
-	size_t size = ARRAY_SIZE(ucs_double_width_ranges);
-
-	if (!in_range(cp, ucs_double_width_ranges[0].first,
-			  ucs_double_width_ranges[size - 1].last))
-		return false;
-
-	return __inline_bsearch(&cp, ucs_double_width_ranges, size,
-				sizeof(*ucs_double_width_ranges),
-				interval_cmp) != NULL;
+	return cp_in_range(cp, ucs_double_width_ranges,
+			   ARRAY_SIZE(ucs_double_width_ranges));
 }
diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index 7d778752dc..b3a9118666 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -29,11 +29,7 @@  u32 conv_8bit_to_uni(unsigned char c);
 int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
 bool ucs_is_double_width(uint32_t cp);
-static inline bool ucs_is_zero_width(uint32_t cp)
-{
-	/* coming soon */
-	return false;
-}
+bool ucs_is_zero_width(uint32_t cp);
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
 		bool use_unicode)