Skip to content

Commit

Permalink
Parse Unicode surrogate pairs in Encoding::JS.unescape (closes #519).
Browse files Browse the repository at this point in the history
  • Loading branch information
postmodern committed Jul 13, 2024
1 parent 757b189 commit dc5dde7
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 0 deletions.
5 changes: 5 additions & 0 deletions lib/ronin/support/encoding/js.rb
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,11 @@ def self.unescape(data)
until scanner.eos?
unescaped << if (backslash_escape = scanner.scan(/\\[btnfr'"\\]/))
BACKSLASHED_CHARS[backslash_escape]
elsif (surrogate_pair = scanner.scan(/\\u[dD][890abAB][0-9a-fA-F]{2}\\u[dD][cdefCDEF][0-9a-fA-F]{2}/))
hi = surrogate_pair[2..6].to_i(16)
lo = surrogate_pair[8..12].to_i(16)

(0x1_0000 + ((hi - 0xd800) * 0x400) + (lo - 0xdc00))
elsif (unicode_escape = scanner.scan(/[\\%]u[0-9a-fA-F]{1,4}/))
unicode_escape[2..].to_i(16)
elsif (hex_escape = scanner.scan(/[\\%][0-9a-fA-F]{1,2}/))
Expand Down
8 changes: 8 additions & 0 deletions spec/encoding/js_spec.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
require 'spec_helper'
require 'ronin/support/encoding/js'

require 'json'

describe Ronin::Support::Encoding::JS do
describe ".escape_byte" do
context "when given a byte that maps to a special character" do
Expand Down Expand Up @@ -97,6 +99,12 @@
expect(subject.unescape(js_unicode)).to eq(data)
end

it "must unescape Unicode surrogate pair characters" do
expect(subject.unescape("\\uD83D\\uDE80")).to eq(
JSON.parse("\"\\uD83D\\uDE80\"")
)
end

it "must unescape JavaScript hex characters" do
expect(subject.unescape(js_hex)).to eq(data)
end
Expand Down

0 comments on commit dc5dde7

Please sign in to comment.