From ba7b73c5937452f0c133ba6521ce46776079f76b Mon Sep 17 00:00:00 2001 From: Nicholas Clark Date: Wed, 16 Jan 2013 17:08:03 +0100 Subject: [PATCH] Test that S_grok_bslash_N() copes if S_reg() restarts the sizing parse. S_reg() can discover midway through parsing the pattern to determine its size, that the pattern will actually need to be encoded as UTF-8. If calculations so far have been done in terms of bytes, then the macro REQUIRE_UTF8 is used to restart the parse, so that sizes can be calculated correctly for UTF-8. It is possible to trigger this restart when processing multi-character charnames interpolated into the pattern using \N{}. Test that this is handled correctly. --- t/re/pat_advanced.t | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index fc13c6f..faa8859 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -1082,6 +1082,14 @@ sub run_tests { eval "q(W) =~ /\\N{$name}/"; ok ! $w, 'Verify that latin1 letter in name doesnt give warning'; + # This tests the code path that restarts the parse when the recursive + # call to S_reg() from within S_grok_bslash_N() discovers that the + # pattern needs to be recalculated as UTF-8. use eval to avoid + # needing literal Unicode in this source file: + my $r = eval "qr/\\N{\x{100}\x{100}}/"; + isnt $r, undef, "Generated regex for multi-char UTF-8 charname" + or diag($@); + ok "\x{100}\x{100}" =~ $r, "which matches"; } { -- 1.8.3.1