1*b1cdbd2cSJim JagielskiFrom 50f06b3efb638efb0abd95dc62dca05ae67882c2 Mon Sep 17 00:00:00 2001
2*b1cdbd2cSJim JagielskiFrom: Nick Wellnhofer <wellnhofer@aevum.de>
3*b1cdbd2cSJim JagielskiDate: Fri, 7 Aug 2020 21:54:27 +0200
4*b1cdbd2cSJim JagielskiSubject: [PATCH] Fix out-of-bounds read with 'xmllint --htmlout'
5*b1cdbd2cSJim Jagielski
6*b1cdbd2cSJim JagielskiMake sure that truncated UTF-8 sequences don't cause an out-of-bounds
7*b1cdbd2cSJim Jagielskiarray access.
8*b1cdbd2cSJim Jagielski
9*b1cdbd2cSJim JagielskiThanks to @SuhwanSong and the Agency for Defense Development (ADD) for
10*b1cdbd2cSJim Jagielskithe report.
11*b1cdbd2cSJim Jagielski
12*b1cdbd2cSJim JagielskiFixes #178.
13*b1cdbd2cSJim Jagielski---
14*b1cdbd2cSJim Jagielski xmllint.c | 6 ++++++
15*b1cdbd2cSJim Jagielski 1 file changed, 6 insertions(+)
16*b1cdbd2cSJim Jagielski
17*b1cdbd2cSJim Jagielskidiff --git misc/libxml2-2.9.10/xmllint.c misc/build/libxml2-2.9.10/xmllint.c
18*b1cdbd2cSJim Jagielskiindex f6a8e4636..c647486f3 100644
19*b1cdbd2cSJim Jagielski--- misc/libxml2-2.9.10/xmllint.c
20*b1cdbd2cSJim Jagielski+++ misc/build/libxml2-2.9.10/xmllint.c
21*b1cdbd2cSJim Jagielski@@ -528,6 +528,12 @@ static void
22*b1cdbd2cSJim Jagielski xmlHTMLEncodeSend(void) {
23*b1cdbd2cSJim Jagielski     char *result;
24*b1cdbd2cSJim Jagielski
25*b1cdbd2cSJim Jagielski+    /*
26*b1cdbd2cSJim Jagielski+     * xmlEncodeEntitiesReentrant assumes valid UTF-8, but the buffer might
27*b1cdbd2cSJim Jagielski+     * end with a truncated UTF-8 sequence. This is a hack to at least avoid
28*b1cdbd2cSJim Jagielski+     * an out-of-bounds read.
29*b1cdbd2cSJim Jagielski+     */
30*b1cdbd2cSJim Jagielski+    memset(&buffer[sizeof(buffer)-4], 0, 4);
31*b1cdbd2cSJim Jagielski     result = (char *) xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer);
32*b1cdbd2cSJim Jagielski     if (result) {
33*b1cdbd2cSJim Jagielski 	xmlGenericError(xmlGenericErrorContext, "%s", result);
34*b1cdbd2cSJim Jagielski--
35*b1cdbd2cSJim JagielskiGitLab
36*b1cdbd2cSJim Jagielski
37