From 8d415b8911be26b12b85497f7cc57143b5321787 Mon Sep 17 00:00:00 2001
From: Maks Verver <maks@verver.ch>
Date: Tue, 8 Apr 2025 13:13:55 +0200
Subject: [PATCH] [CVE-2025-32414] python: Read at most len/4 characters.

Fixes #889 by reserving space in the buffer for UTF-8 encoding of text.
---
 python/libxml.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/python/libxml.c b/python/libxml.c
index 5dea50260..0a1e408bd 100644
--- a/python/libxml.c
+++ b/python/libxml.c
@@ -237,7 +237,9 @@ xmlPythonFileReadRaw (void * context, char * buffer, int len) {
 
     file = (PyObject *) context;
     if (file == NULL) return(-1);
-    ret = PyObject_CallMethod(file, (char *) "read", (char *) "(i)", len);
+    /* When read() returns a string, the length is in characters not bytes, so
+       request at most len / 4 characters to leave space for UTF-8 encoding. */
+    ret = PyObject_CallMethod(file, (char *) "read", (char *) "(i)", len / 4);
     if (ret == NULL) {
 	printf("xmlPythonFileReadRaw: result is NULL\n");
 	return(-1);
@@ -272,10 +274,12 @@ xmlPythonFileReadRaw (void * context, char * buffer, int len) {
 	Py_DECREF(ret);
 	return(-1);
     }
-    if (lenread > len)
-	memcpy(buffer, data, len);
-    else
-	memcpy(buffer, data, lenread);
+    if (lenread < 0 || lenread > len) {
+	printf("xmlPythonFileReadRaw: invalid lenread\n");
+	Py_DECREF(ret);
+	return(-1);
+    }
+    memcpy(buffer, data, lenread);
     Py_DECREF(ret);
     return(lenread);
 }
@@ -299,7 +303,9 @@ xmlPythonFileRead (void * context, char * buffer, int len) {
 
     file = (PyObject *) context;
     if (file == NULL) return(-1);
-    ret = PyObject_CallMethod(file, (char *) "io_read", (char *) "(i)", len);
+    /* When io_read() returns a string, the length is in characters not bytes, so
+       request at most len / 4 characters to leave space for UTF-8 encoding. */
+    ret = PyObject_CallMethod(file, (char *) "io_read", (char *) "(i)", len / 4);
     if (ret == NULL) {
 	printf("xmlPythonFileRead: result is NULL\n");
 	return(-1);
@@ -334,10 +340,12 @@ xmlPythonFileRead (void * context, char * buffer, int len) {
 	Py_DECREF(ret);
 	return(-1);
     }
-    if (lenread > len)
-	memcpy(buffer, data, len);
-    else
-	memcpy(buffer, data, lenread);
+    if (lenread < 0 || lenread > len) {
+	printf("xmlPythonFileRead: invalid lenread\n");
+	Py_DECREF(ret);
+	return(-1);
+    }
+    memcpy(buffer, data, lenread);
     Py_DECREF(ret);
     return(lenread);
 }
