+{
+ # Harder cases for the neutrality test
+
+ # u format
+ my $down = "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff\x05\x06";
+ my $up = $down;
+ utf8::upgrade($up);
+ is(pack("u", $down), pack("u", $up), "u pack is neutral");
+ is(unpack("u", pack("u", $down)), $down, "u unpack to downgraded works");
+ is(unpack("U0C0u", pack("u", $down)), $up, "u unpack to upgraded works");
+
+ # p/P format
+ # This actually only tests something if the address contains a byte >= 0x80
+ my $str = "abc\xa5\x00\xfede";
+ $down = pack("p", $str);
+ is(pack("P", $str), $down);
+ is(pack("U0C0p", $str), $down);
+ is(pack("U0C0P", $str), $down);
+ is(unpack("p", $down), "abc\xa5", "unpack p downgraded");
+ $up = $down;
+ utf8::upgrade($up);
+ is(unpack("p", $up), "abc\xa5", "unpack p upgraded");
+
+ is(unpack("P7", $down), "abc\xa5\x00\xfed", "unpack P downgraded");
+ is(unpack("P7", $up), "abc\xa5\x00\xfed", "unpack P upgraded");
+
+ # x, X and @
+ $down = "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff\x05\x06";
+ $up = $down;
+ utf8::upgrade($up);
+
+ is(unpack('@4W', $down), 0xfc, "\@positioning on downgraded string");
+ is(unpack('@4W', $up), 0xfc, "\@positioning on upgraded string");
+
+ is(unpack('@4x2W', $down), 0xfe, "x moving on downgraded string");
+ is(unpack('@4x2W', $up), 0xfe, "x moving on upgraded string");
+ is(unpack('@4x!4W', $down), 0xfc, "x! moving on downgraded string");
+ is(unpack('@4x!4W', $up), 0xfc, "x! moving on upgraded string");
+ is(unpack('@5x!4W', $down), 0x05, "x! moving on downgraded string");
+ is(unpack('@5x!4W', $up), 0x05, "x! moving on upgraded string");
+
+ is(unpack('@4X2W', $down), 0xfa, "X moving on downgraded string");
+ is(unpack('@4X2W', $up), 0xfa, "X moving on upgraded string");
+ is(unpack('@4X!4W', $down), 0xfc, "X! moving on downgraded string");
+ is(unpack('@4X!4W', $up), 0xfc, "X! moving on upgraded string");
+ is(unpack('@5X!4W', $down), 0xfc, "X! moving on downgraded string");
+ is(unpack('@5X!4W', $up), 0xfc, "X! moving on upgraded string");
+ is(unpack('@5X!8W', $down), 0xf8, "X! moving on downgraded string");
+ is(unpack('@5X!8W', $up), 0xf8, "X! moving on upgraded string");
+
+ is(pack("W2x", 0xfa, 0xe3), "\xfa\xe3\x00", "x on downgraded string");
+ is(pack("W2x!4", 0xfa, 0xe3), "\xfa\xe3\x00\x00",
+ "x! on downgraded string");
+ is(pack("W2x!2", 0xfa, 0xe3), "\xfa\xe3", "x! on downgraded string");
+ is(pack("U0C0W2x", 0xfa, 0xe3), "\xfa\xe3\x00", "x on upgraded string");
+ is(pack("U0C0W2x!4", 0xfa, 0xe3), "\xfa\xe3\x00\x00",
+ "x! on upgraded string");
+ is(pack("U0C0W2x!2", 0xfa, 0xe3), "\xfa\xe3", "x! on upgraded string");
+ is(pack("W2X", 0xfa, 0xe3), "\xfa", "X on downgraded string");
+ is(pack("U0C0W2X", 0xfa, 0xe3), "\xfa", "X on upgraded string");
+ is(pack("W2X!2", 0xfa, 0xe3), "\xfa\xe3", "X! on downgraded string");
+ is(pack("U0C0W2X!2", 0xfa, 0xe3), "\xfa\xe3", "X! on upgraded string");
+ is(pack("W3X!2", 0xfa, 0xe3, 0xa6), "\xfa\xe3", "X! on downgraded string");
+ is(pack("U0C0W3X!2", 0xfa, 0xe3, 0xa6), "\xfa\xe3",
+ "X! on upgraded string");
+
+ # backward eating through a ( moves the group starting point backwards
+ is(pack("a*(Xa)", "abc", "q"), "abq",
+ "eating before strbeg moves it back");
+ is(pack("a*(Xa)", "ab" . chr(512), "q"), "abq",
+ "eating before strbeg moves it back");
+
+ # Check marked_upgrade
+ is(pack('W(W(Wa@3W)@6W)@9W', 0xa1, 0xa2, 0xa3, "a", 0xa4, 0xa5, 0xa6),
+ "\xa1\xa2\xa3a\x00\xa4\x00\xa5\x00\xa6");
+ $up = "a";
+ utf8::upgrade($up);
+ is(pack('W(W(Wa@3W)@6W)@9W', 0xa1, 0xa2, 0xa3, $up, 0xa4, 0xa5, 0xa6),
+ "\xa1\xa2\xa3a\x00\xa4\x00\xa5\x00\xa6", "marked upgrade caused by a");
+ is(pack('W(W(WW@3W)@6W)@9W', 0xa1, 0xa2, 0xa3, 256, 0xa4, 0xa5, 0xa6),
+ "\xa1\xa2\xa3\x{100}\x00\xa4\x00\xa5\x00\xa6",
+ "marked upgrade caused by W");
+ is(pack('W(W(WU0aC0@3W)@6W)@9W', 0xa1, 0xa2, 0xa3, "a", 0xa4, 0xa5, 0xa6),
+ "\xa1\xa2\xa3a\x00\xa4\x00\xa5\x00\xa6", "marked upgrade caused by U0");
+
+ # a, A and Z
+ $down = "\xa4\xa6\xa7";
+ $up = $down;
+ utf8::upgrade($up);
+ utf8::upgrade(my $high = "\xfeb");
+
+ for my $format ("a0", "A0", "Z0", "U0a0C0", "U0A0C0", "U0Z0C0") {
+ is(pack("a* $format a*", "ab", $down, "cd"), "abcd",
+ "$format format on plain string");
+ is(pack("a* $format a*", "ab", $up, "cd"), "abcd",
+ "$format format on upgraded string");
+ is(pack("a* $format a*", $high, $down, "cd"), "\xfebcd",
+ "$format format on plain string");
+ is(pack("a* $format a*", $high, $up, "cd"), "\xfebcd",
+ "$format format on upgraded string");
+ my @down = unpack("a1 $format a*", "\xfeb");
+ is("@down", "\xfe b", "unpack $format");
+ my @up = unpack("a1 $format a*", $high);
+ is("@up", "\xfe b", "unpack $format");
+ }
+ is(pack("a1", $high), "\xfe");
+ is(pack("A1", $high), "\xfe");
+ is(pack("Z1", $high), "\x00");
+ is(pack("a2", $high), "\xfeb");
+ is(pack("A2", $high), "\xfeb");
+ is(pack("Z2", $high), "\xfe\x00");
+ is(pack("a5", $high), "\xfeb\x00\x00\x00");
+ is(pack("A5", $high), "\xfeb ");
+ is(pack("Z5", $high), "\xfeb\x00\x00\x00");
+ is(pack("a*", $high), "\xfeb");
+ is(pack("A*", $high), "\xfeb");
+ is(pack("Z*", $high), "\xfeb\x00");
+
+ utf8::upgrade($high = "\xc3\xbeb");
+ is(pack("U0a2", $high), "\xfe");
+ is(pack("U0A2", $high), "\xfe");
+ is(pack("U0Z1", $high), "\x00");
+ is(pack("U0a3", $high), "\xfeb");
+ is(pack("U0A3", $high), "\xfeb");
+ is(pack("U0Z3", $high), "\xfe\x00");
+ is(pack("U0a6", $high), "\xfeb\x00\x00\x00");
+ is(pack("U0A6", $high), "\xfeb ");
+ is(pack("U0Z6", $high), "\xfeb\x00\x00\x00");
+ is(pack("U0a*", $high), "\xfeb");
+ is(pack("U0A*", $high), "\xfeb");
+ is(pack("U0Z*", $high), "\xfeb\x00");
+}
+{
+ # pack /
+ my @array = 1..14;
+ my @out = unpack("N/S", pack("N/S", @array) . "abcd");
+ is("@out", "@array", "pack N/S works");
+ @out = unpack("N/S*", pack("N/S*", @array) . "abcd");
+ is("@out", "@array", "pack N/S* works");
+ @out = unpack("N/S*", pack("N/S14", @array) . "abcd");
+ is("@out", "@array", "pack N/S14 works");
+ @out = unpack("N/S*", pack("N/S15", @array) . "abcd");
+ is("@out", "@array", "pack N/S15 works");
+ @out = unpack("N/S*", pack("N/S13", @array) . "abcd");
+ is("@out", "@array[0..12]", "pack N/S13 works");
+ @out = unpack("N/S*", pack("N/S0", @array) . "abcd");
+ is("@out", "", "pack N/S0 works");
+ is(pack("Z*/a0", "abc"), "0\0", "pack Z*/a0 makes a short string");
+ is(pack("Z*/Z0", "abc"), "0\0", "pack Z*/Z0 makes a short string");
+ is(pack("Z*/a3", "abc"), "3\0abc", "pack Z*/a3 makes a full string");
+ is(pack("Z*/Z3", "abc"), "3\0ab\0", "pack Z*/Z3 makes a short string");
+ is(pack("Z*/a5", "abc"), "5\0abc\0\0", "pack Z*/a5 makes a long string");
+ is(pack("Z*/Z5", "abc"), "5\0abc\0\0", "pack Z*/Z5 makes a long string");
+ is(pack("Z*/Z"), "1\0\0", "pack Z*/Z makes an extended string");
+ is(pack("Z*/Z", ""), "1\0\0", "pack Z*/Z makes an extended string");
+ is(pack("Z*/a", ""), "0\0", "pack Z*/a makes an extended string");
+}
+{
+ # unpack("A*", $unicode) strips general unicode spaces
+ is(unpack("A*", "ab \n\xa0 \0"), "ab \n\xa0",
+ 'normal A* strip leaves \xa0');
+ is(unpack("U0C0A*", "ab \n\xa0 \0"), "ab \n\xa0",
+ 'normal A* strip leaves \xa0 even if it got upgraded for technical reasons');
+ is(unpack("A*", pack("a*(U0U)a*", "ab \n", 0xa0, " \0")), "ab",
+ 'upgraded strings A* removes \xa0');
+ is(unpack("A*", pack("a*(U0UU)a*", "ab \n", 0xa0, 0x1680, " \0")), "ab",
+ 'upgraded strings A* removes all unicode whitespace');
+ is(unpack("A5", pack("a*(U0U)a*", "ab \n", 0x1680, "def", "ab")), "ab",
+ 'upgraded strings A5 removes all unicode whitespace');
+ is(unpack("A*", pack("U", 0x1680)), "",
+ 'upgraded strings A* with nothing left');
+}
+{
+ # Testing unpack . and .!
+ is(unpack(".", "ABCD"), 0, "offset at start of string is 0");
+ is(unpack(".", ""), 0, "offset at start of empty string is 0");
+ is(unpack("x3.", "ABCDEF"), 3, "simple offset works");
+ is(unpack("x3.", "ABC"), 3, "simple offset at end of string works");
+ is(unpack("x3.0", "ABC"), 0, "self offset is 0");
+ is(unpack("x3(x2.)", "ABCDEF"), 2, "offset is relative to inner group");
+ is(unpack("x3(X2.)", "ABCDEF"), -2,
+ "negative offset relative to inner group");
+ is(unpack("x3(X2.2)", "ABCDEF"), 1, "offset is relative to inner group");
+ is(unpack("x3(x2.0)", "ABCDEF"), 0, "self offset in group is still 0");
+ is(unpack("x3(x2.2)", "ABCDEF"), 5, "offset counts groups");
+ is(unpack("x3(x2.*)", "ABCDEF"), 5, "star offset is relative to start");
+
+ my $high = chr(8188) x 6;
+ is(unpack("x3(x2.)", $high), 2, "utf8 offset is relative to inner group");
+ is(unpack("x3(X2.)", $high), -2,
+ "utf8 negative offset relative to inner group");
+ is(unpack("x3(X2.2)", $high), 1, "utf8 offset counts groups");
+ is(unpack("x3(x2.0)", $high), 0, "utf8 self offset in group is still 0");
+ is(unpack("x3(x2.2)", $high), 5, "utf8 offset counts groups");
+ is(unpack("x3(x2.*)", $high), 5, "utf8 star offset is relative to start");
+
+ is(unpack("U0x3(x2.)", $high), 2,
+ "U0 mode utf8 offset is relative to inner group");
+ is(unpack("U0x3(X2.)", $high), -2,
+ "U0 mode utf8 negative offset relative to inner group");
+ is(unpack("U0x3(X2.2)", $high), 1,
+ "U0 mode utf8 offset counts groups");
+ is(unpack("U0x3(x2.0)", $high), 0,
+ "U0 mode utf8 self offset in group is still 0");
+ is(unpack("U0x3(x2.2)", $high), 5,
+ "U0 mode utf8 offset counts groups");
+ is(unpack("U0x3(x2.*)", $high), 5,
+ "U0 mode utf8 star offset is relative to start");
+
+ is(unpack("x3(x2.!)", $high), 2*3,
+ "utf8 offset is relative to inner group");
+ is(unpack("x3(X2.!)", $high), -2*3,
+ "utf8 negative offset relative to inner group");
+ is(unpack("x3(X2.!2)", $high), 1*3,
+ "utf8 offset counts groups");
+ is(unpack("x3(x2.!0)", $high), 0,
+ "utf8 self offset in group is still 0");
+ is(unpack("x3(x2.!2)", $high), 5*3,
+ "utf8 offset counts groups");
+ is(unpack("x3(x2.!*)", $high), 5*3,
+ "utf8 star offset is relative to start");
+
+ is(unpack("U0x3(x2.!)", $high), 2,
+ "U0 mode utf8 offset is relative to inner group");
+ is(unpack("U0x3(X2.!)", $high), -2,
+ "U0 mode utf8 negative offset relative to inner group");
+ is(unpack("U0x3(X2.!2)", $high), 1,
+ "U0 mode utf8 offset counts groups");
+ is(unpack("U0x3(x2.!0)", $high), 0,
+ "U0 mode utf8 self offset in group is still 0");
+ is(unpack("U0x3(x2.!2)", $high), 5,
+ "U0 mode utf8 offset counts groups");
+ is(unpack("U0x3(x2.!*)", $high), 5,
+ "U0 mode utf8 star offset is relative to start");
+}
+{
+ # Testing pack . and .!
+ is(pack("(a)5 .", 1..5, 3), "123", ". relative to string start, shorten");
+ eval { () = pack("(a)5 .", 1..5, -3) };
+ like($@, qr{'\.' outside of string in pack}, "Proper error message");
+ is(pack("(a)5 .", 1..5, 8), "12345\x00\x00\x00",
+ ". relative to string start, extend");
+ is(pack("(a)5 .", 1..5, 5), "12345", ". relative to string start, keep");
+
+ is(pack("(a)5 .0", 1..5, -3), "12",
+ ". relative to string current, shorten");
+ is(pack("(a)5 .0", 1..5, 2), "12345\x00\x00",
+ ". relative to string current, extend");
+ is(pack("(a)5 .0", 1..5, 0), "12345",
+ ". relative to string current, keep");
+
+ is(pack("(a)5 (.)", 1..5, -3), "12",
+ ". relative to group, shorten");
+ is(pack("(a)5 (.)", 1..5, 2), "12345\x00\x00",
+ ". relative to group, extend");
+ is(pack("(a)5 (.)", 1..5, 0), "12345",
+ ". relative to group, keep");
+
+ is(pack("(a)3 ((a)2 .)", 1..5, -2), "1",
+ ". relative to group, shorten");
+ is(pack("(a)3 ((a)2 .)", 1..5, 2), "12345",
+ ". relative to group, keep");
+ is(pack("(a)3 ((a)2 .)", 1..5, 4), "12345\x00\x00",
+ ". relative to group, extend");
+
+ is(pack("(a)3 ((a)2 .2)", 1..5, 2), "12",
+ ". relative to counted group, shorten");
+ is(pack("(a)3 ((a)2 .2)", 1..5, 7), "12345\x00\x00",
+ ". relative to counted group, extend");
+ is(pack("(a)3 ((a)2 .2)", 1..5, 5), "12345",
+ ". relative to counted group, keep");
+
+ is(pack("(a)3 ((a)2 .*)", 1..5, 2), "12",
+ ". relative to start, shorten");
+ is(pack("(a)3 ((a)2 .*)", 1..5, 7), "12345\x00\x00",
+ ". relative to start, extend");
+ is(pack("(a)3 ((a)2 .*)", 1..5, 5), "12345",
+ ". relative to start, keep");
+
+ is(pack('(a)5 (. @2 a)', 1..5, -3, "a"), "12\x00\x00a",
+ ". based shrink properly updates group starts");
+
+ is(pack("(W)3 ((W)2 .)", 0x301..0x305, -2), "\x{301}",
+ "utf8 . relative to group, shorten");
+ is(pack("(W)3 ((W)2 .)", 0x301..0x305, 2),
+ "\x{301}\x{302}\x{303}\x{304}\x{305}",
+ "utf8 . relative to group, keep");
+ is(pack("(W)3 ((W)2 .)", 0x301..0x305, 4),
+ "\x{301}\x{302}\x{303}\x{304}\x{305}\x00\x00",
+ "utf8 . relative to group, extend");
+
+ is(pack("(W)3 ((W)2 .!)", 0x301..0x305, -2), "\x{301}\x{302}",
+ "utf8 . relative to group, shorten");
+ is(pack("(W)3 ((W)2 .!)", 0x301..0x305, 4),
+ "\x{301}\x{302}\x{303}\x{304}\x{305}",
+ "utf8 . relative to group, keep");
+ is(pack("(W)3 ((W)2 .!)", 0x301..0x305, 6),
+ "\x{301}\x{302}\x{303}\x{304}\x{305}\x00\x00",
+ "utf8 . relative to group, extend");
+
+ is(pack('(W)5 (. @2 a)', 0x301..0x305, -3, "a"),
+ "\x{301}\x{302}\x00\x00a",
+ "utf8 . based shrink properly updates group starts");
+}
+{
+ # Testing @!
+ is(pack('a* @3', "abcde"), "abc", 'Test basic @');
+ is(pack('a* @!3', "abcde"), "abc", 'Test basic @!');
+ is(pack('a* @2', "\x{301}\x{302}\x{303}\x{304}\x{305}"), "\x{301}\x{302}",
+ 'Test basic utf8 @');
+ is(pack('a* @!2', "\x{301}\x{302}\x{303}\x{304}\x{305}"), "\x{301}",
+ 'Test basic utf8 @!');
+
+ is(unpack('@4 a*', "abcde"), "e", 'Test basic @');
+ is(unpack('@!4 a*', "abcde"), "e", 'Test basic @!');
+ is(unpack('@4 a*', "\x{301}\x{302}\x{303}\x{304}\x{305}"), "\x{305}",
+ 'Test basic utf8 @');
+ is(unpack('@!4 a*', "\x{301}\x{302}\x{303}\x{304}\x{305}"),
+ "\x{303}\x{304}\x{305}", 'Test basic utf8 @!');
+}