diff --git a/CHANGELOG.md b/CHANGELOG.md index faf4bd54280c..06d9630803bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Bug fixes: * Fix `IO#{wait,wait_readable,wait_writable}` methods and switch the current thread into a sleep state (@andrykonchin). * Fix `rb_global_variable()` for `Float` and bignum values during the `Init_` function (#3478, @eregon). * Fix parsing literal floats when the locale does not use `.` for the decimal separator (e.g. `LANG=fr_FR.UTF-8`) (#3512, @eregon). +* Fix `IO#{read_nonblock,readpartial,sysread}`, `BasicSocket#{recv,recv_nonblock}`, `{Socket,UDPSocket}#recvfrom_nonblock`, `UnixSocket#recvfrom` and preserve a provided buffer's encoding (#3506, @andrykonchyn). Compatibility: * Move `IO#wait_readable`, `IO#wait_writable`, `IO#wait_priority` and `IO#wait` into core library (@andrykonchin). diff --git a/lib/truffle/socket/basic_socket.rb b/lib/truffle/socket/basic_socket.rb index 7bfee710d74f..aa58725fdb8a 100644 --- a/lib/truffle/socket/basic_socket.rb +++ b/lib/truffle/socket/basic_socket.rb @@ -163,7 +163,11 @@ def send(message, flags, dest_sockaddr = nil) end str = buf.read_string(n_bytes) - buffer ? buffer.replace(str) : str + if buffer + buffer.replace str.force_encoding(buffer.encoding) + else + str + end end end diff --git a/lib/truffle/socket/ip_socket.rb b/lib/truffle/socket/ip_socket.rb index 2a75a71a10cd..72e11c4c6183 100644 --- a/lib/truffle/socket/ip_socket.rb +++ b/lib/truffle/socket/ip_socket.rb @@ -61,7 +61,9 @@ def peeraddr(reverse_lookup = nil) end end - message = buffer.replace(message) if buffer + if buffer + message = buffer.replace message.force_encoding(buffer.encoding) + end [message, [aname, addr.ip_port, hostname, addr.ip_address]] end diff --git a/lib/truffle/socket/socket.rb b/lib/truffle/socket/socket.rb index 1dbe31226359..188c0806ebd7 100644 --- a/lib/truffle/socket/socket.rb +++ b/lib/truffle/socket/socket.rb @@ -384,7 +384,9 @@ def recvfrom(bytes, flags = 0) if message == :wait_readable message else - message = buffer.replace(message) if buffer + if buffer + message = buffer.replace message.force_encoding(buffer.encoding) + end [message, addr] end end diff --git a/lib/truffle/socket/unix_socket.rb b/lib/truffle/socket/unix_socket.rb index 7b8a5bdf53e4..491b29862e42 100644 --- a/lib/truffle/socket/unix_socket.rb +++ b/lib/truffle/socket/unix_socket.rb @@ -58,11 +58,18 @@ def initialize(path) Errno.handle('connect(2)') if status < 0 end - def recvfrom(bytes_read, flags = 0) + def recvfrom(bytes_read, flags = 0, buffer = nil) Truffle::Socket::Foreign.memory_pointer(bytes_read) do |buf| n_bytes = Truffle::Socket::Foreign.recvfrom(Primitive.io_fd(self), buf, bytes_read, flags, nil, nil) Errno.handle('recvfrom(2)') if n_bytes == -1 - return [buf.read_string(n_bytes), ['AF_UNIX', '']] + + message = buf.read_string(n_bytes) + + if buffer + message = buffer.replace message.force_encoding(buffer.encoding) + end + + return [message, ['AF_UNIX', '']] end end diff --git a/lib/truffle/stringio.rb b/lib/truffle/stringio.rb index 5eb4d0918e65..702e02a1e5cf 100644 --- a/lib/truffle/stringio.rb +++ b/lib/truffle/stringio.rb @@ -450,6 +450,8 @@ def read(length = nil, buffer = nil) pos = d.pos string = d.string + # intentionally don't preserve buffer's encoding + # see https://bugs.ruby-lang.org/issues/20418 if length length = Truffle::Type.coerce_to length, Integer, :to_int raise ArgumentError if length < 0 diff --git a/spec/ruby/core/io/pread_spec.rb b/spec/ruby/core/io/pread_spec.rb index 28afc80e5c61..6d93b432c263 100644 --- a/spec/ruby/core/io/pread_spec.rb +++ b/spec/ruby/core/io/pread_spec.rb @@ -22,7 +22,7 @@ it "accepts a length, an offset, and an output buffer" do buffer = +"foo" - @file.pread(3, 4, buffer) + @file.pread(3, 4, buffer).should.equal?(buffer) buffer.should == "567" end @@ -38,6 +38,13 @@ buffer.should == "12345" end + it "preserves the encoding of the given buffer" do + buffer = ''.encode(Encoding::ISO_8859_1) + @file.pread(10, 0, buffer) + + buffer.encoding.should == Encoding::ISO_8859_1 + end + it "does not advance the file pointer" do @file.pread(4, 0).should == "1234" @file.read.should == "1234567890" diff --git a/spec/ruby/core/io/read_spec.rb b/spec/ruby/core/io/read_spec.rb index eb3652e692a5..8741d9f0179a 100644 --- a/spec/ruby/core/io/read_spec.rb +++ b/spec/ruby/core/io/read_spec.rb @@ -376,6 +376,21 @@ buf.should == @contents[0..4] end + it "preserves the encoding of the given buffer" do + buffer = ''.encode(Encoding::ISO_8859_1) + @io.read(10, buffer) + + buffer.encoding.should == Encoding::ISO_8859_1 + end + + # https://bugs.ruby-lang.org/issues/20416 + it "does not preserve the encoding of the given buffer when max length is not provided" do + buffer = ''.encode(Encoding::ISO_8859_1) + @io.read(nil, buffer) + + buffer.encoding.should_not == Encoding::ISO_8859_1 + end + it "returns the given buffer" do buf = +"" diff --git a/spec/ruby/core/io/readpartial_spec.rb b/spec/ruby/core/io/readpartial_spec.rb index 0060beb54504..0852f20b2da8 100644 --- a/spec/ruby/core/io/readpartial_spec.rb +++ b/spec/ruby/core/io/readpartial_spec.rb @@ -62,7 +62,7 @@ buffer = +"existing content" @wr.write("hello world") @wr.close - @rd.readpartial(11, buffer) + @rd.readpartial(11, buffer).should.equal?(buffer) buffer.should == "hello world" end @@ -106,6 +106,7 @@ @wr.write("abc") @wr.close @rd.readpartial(10, buffer) + buffer.encoding.should == Encoding::ISO_8859_1 end end diff --git a/spec/ruby/core/io/sysread_spec.rb b/spec/ruby/core/io/sysread_spec.rb index 003bb9eb9491..8851214283ca 100644 --- a/spec/ruby/core/io/sysread_spec.rb +++ b/spec/ruby/core/io/sysread_spec.rb @@ -97,7 +97,7 @@ it "discards the existing buffer content upon successful read" do buffer = +"existing content" - @file.sysread(11, buffer) + @file.sysread(11, buffer).should.equal?(buffer) buffer.should == "01234567890" end @@ -107,6 +107,13 @@ -> { @file.sysread(1, buffer) }.should raise_error(EOFError) buffer.should be_empty end + + it "preserves the encoding of the given buffer" do + buffer = ''.encode(Encoding::ISO_8859_1) + string = @file.sysread(10, buffer) + + buffer.encoding.should == Encoding::ISO_8859_1 + end end describe "IO#sysread" do diff --git a/spec/ruby/library/socket/basicsocket/recv_nonblock_spec.rb b/spec/ruby/library/socket/basicsocket/recv_nonblock_spec.rb index 17c846054df1..df42c116fb33 100644 --- a/spec/ruby/library/socket/basicsocket/recv_nonblock_spec.rb +++ b/spec/ruby/library/socket/basicsocket/recv_nonblock_spec.rb @@ -52,9 +52,19 @@ @s2.send("data", 0, @s1.getsockname) IO.select([@s1], nil, nil, 2) - buf = +"foo" - @s1.recv_nonblock(5, 0, buf) - buf.should == "data" + buffer = +"foo" + @s1.recv_nonblock(5, 0, buffer).should.equal?(buffer) + buffer.should == "data" + end + + it "preserves the encoding of the given buffer" do + @s1.bind(Socket.pack_sockaddr_in(0, ip_address)) + @s2.send("data", 0, @s1.getsockname) + IO.select([@s1], nil, nil, 2) + + buffer = ''.encode(Encoding::ISO_8859_1) + @s1.recv_nonblock(5, 0, buffer) + buffer.encoding.should == Encoding::ISO_8859_1 end it "does not block if there's no data available" do diff --git a/spec/ruby/library/socket/basicsocket/recv_spec.rb b/spec/ruby/library/socket/basicsocket/recv_spec.rb index 9fe8c52f9a52..e82a357d3dfd 100644 --- a/spec/ruby/library/socket/basicsocket/recv_spec.rb +++ b/spec/ruby/library/socket/basicsocket/recv_spec.rb @@ -100,13 +100,29 @@ socket.write("data") client = @server.accept - buf = +"foo" + buffer = +"foo" begin - client.recv(4, 0, buf) + client.recv(4, 0, buffer).should.equal?(buffer) ensure client.close end - buf.should == "data" + buffer.should == "data" + + socket.close + end + + it "preserves the encoding of the given buffer" do + socket = TCPSocket.new('127.0.0.1', @port) + socket.write("data") + + client = @server.accept + buffer = ''.encode(Encoding::ISO_8859_1) + begin + client.recv(4, 0, buffer) + ensure + client.close + end + buffer.encoding.should == Encoding::ISO_8859_1 socket.close end diff --git a/spec/ruby/library/socket/socket/recvfrom_nonblock_spec.rb b/spec/ruby/library/socket/socket/recvfrom_nonblock_spec.rb index 94f58ac49f4a..5596f91bb86b 100644 --- a/spec/ruby/library/socket/socket/recvfrom_nonblock_spec.rb +++ b/spec/ruby/library/socket/socket/recvfrom_nonblock_spec.rb @@ -52,6 +52,27 @@ end end + it "allows an output buffer as third argument" do + @client.write('hello') + + IO.select([@server]) + buffer = +'' + message, = @server.recvfrom_nonblock(5, 0, buffer) + + message.should.equal?(buffer) + buffer.should == 'hello' + end + + it "preserves the encoding of the given buffer" do + @client.write('hello') + + IO.select([@server]) + buffer = ''.encode(Encoding::ISO_8859_1) + @server.recvfrom_nonblock(5, 0, buffer) + + buffer.encoding.should == Encoding::ISO_8859_1 + end + describe 'the returned data' do it 'is the same as the sent data' do 5.times do diff --git a/spec/ruby/library/socket/udpsocket/recvfrom_nonblock_spec.rb b/spec/ruby/library/socket/udpsocket/recvfrom_nonblock_spec.rb index 650a061221b8..b8040995897c 100644 --- a/spec/ruby/library/socket/udpsocket/recvfrom_nonblock_spec.rb +++ b/spec/ruby/library/socket/udpsocket/recvfrom_nonblock_spec.rb @@ -58,6 +58,15 @@ buffer.should == 'h' end + it "preserves the encoding of the given buffer" do + buffer = ''.encode(Encoding::ISO_8859_1) + IO.select([@server]) + message, = @server.recvfrom_nonblock(1, 0, buffer) + + message.should.equal?(buffer) + buffer.encoding.should == Encoding::ISO_8859_1 + end + describe 'the returned Array' do before do IO.select([@server]) diff --git a/spec/ruby/library/socket/unixsocket/recvfrom_spec.rb b/spec/ruby/library/socket/unixsocket/recvfrom_spec.rb index fedf74bb2fa2..d849fdc30264 100644 --- a/spec/ruby/library/socket/unixsocket/recvfrom_spec.rb +++ b/spec/ruby/library/socket/unixsocket/recvfrom_spec.rb @@ -31,6 +31,29 @@ sock.close end + it "allows an output buffer as third argument" do + buffer = +'' + + @client.send("foobar", 0) + sock = @server.accept + message, = sock.recvfrom(6, 0, buffer) + sock.close + + message.should.equal?(buffer) + buffer.should == "foobar" + end + + it "preserves the encoding of the given buffer" do + buffer = ''.encode(Encoding::ISO_8859_1) + + @client.send("foobar", 0) + sock = @server.accept + sock.recvfrom(6, 0, buffer) + sock.close + + buffer.encoding.should == Encoding::ISO_8859_1 + end + it "uses different message options" do @client.send("foobar", Socket::MSG_PEEK) sock = @server.accept diff --git a/spec/ruby/library/stringio/shared/read.rb b/spec/ruby/library/stringio/shared/read.rb index e3840786d902..45d8d5bffbc7 100644 --- a/spec/ruby/library/stringio/shared/read.rb +++ b/spec/ruby/library/stringio/shared/read.rb @@ -11,10 +11,17 @@ end it "reads length bytes and writes them to the buffer String" do - @io.send(@method, 7, buffer = +"") + @io.send(@method, 7, buffer = +"").should.equal?(buffer) buffer.should == "example" end + it "does not preserve the encoding of the given buffer" do + buffer = ''.encode(Encoding::ISO_8859_1) + @io.send(@method, 7, buffer) + + buffer.encoding.should_not == Encoding::ISO_8859_1 + end + it "tries to convert the passed buffer Object to a String using #to_str" do obj = mock("to_str") obj.should_receive(:to_str).and_return(buffer = +"") diff --git a/spec/tags/core/io/pread_tags.txt b/spec/tags/core/io/pread_tags.txt index 22858b98bf63..84560c9368be 100644 --- a/spec/tags/core/io/pread_tags.txt +++ b/spec/tags/core/io/pread_tags.txt @@ -18,3 +18,4 @@ fails:IO#pread raises TypeError if offset is not an Integer and cannot be coerce fails:IO#pread raises ArgumentError for negative values of maxlen fails:IO#pread raised Errno::EINVAL for negative values of offset fails:IO#pread raises TypeError if the buffer is not a String and cannot be coerced into String +fails:IO#pread preserves the encoding of the given buffer diff --git a/spec/tags/core/io/read_nonblock_tags.txt b/spec/tags/core/io/read_nonblock_tags.txt index 86717d874798..af86397479ae 100644 --- a/spec/tags/core/io/read_nonblock_tags.txt +++ b/spec/tags/core/io/read_nonblock_tags.txt @@ -1,3 +1,2 @@ fails:IO#read_nonblock raises an exception after ungetc with data in the buffer and character conversion enabled fails:IO#read_nonblock discards the existing buffer content upon error -fails:IO#read_nonblock preserves the encoding of the given buffer diff --git a/spec/tags/core/io/readpartial_tags.txt b/spec/tags/core/io/readpartial_tags.txt deleted file mode 100644 index fc74e20f7480..000000000000 --- a/spec/tags/core/io/readpartial_tags.txt +++ /dev/null @@ -1 +0,0 @@ -fails:IO#readpartial preserves the encoding of the given buffer diff --git a/spec/tags/library/socket/basicsocket/read_nonblock_tags.txt b/spec/tags/library/socket/basicsocket/read_nonblock_tags.txt index 97c413fb560d..ac56e3a0b03e 100644 --- a/spec/tags/library/socket/basicsocket/read_nonblock_tags.txt +++ b/spec/tags/library/socket/basicsocket/read_nonblock_tags.txt @@ -1,4 +1,2 @@ fails:BasicSocket#read_nonblock using IPv4 does not set the IO in nonblock mode fails:BasicSocket#read_nonblock using IPv6 does not set the IO in nonblock mode -fails:BasicSocket#read_nonblock using IPv4 replaces the content of the provided buffer without changing its encoding -fails:BasicSocket#read_nonblock using IPv6 replaces the content of the provided buffer without changing its encoding diff --git a/spec/tags/library/socket/basicsocket/recv_nonblock_tags.txt b/spec/tags/library/socket/basicsocket/recv_nonblock_tags.txt deleted file mode 100644 index 9185207dafbc..000000000000 --- a/spec/tags/library/socket/basicsocket/recv_nonblock_tags.txt +++ /dev/null @@ -1,2 +0,0 @@ -fails:Socket::BasicSocket#recv_nonblock using IPv4 allows an output buffer as third argument -fails:Socket::BasicSocket#recv_nonblock using IPv6 allows an output buffer as third argument diff --git a/spec/tags/library/socket/basicsocket/recv_tags.txt b/spec/tags/library/socket/basicsocket/recv_tags.txt deleted file mode 100644 index a51e691a9784..000000000000 --- a/spec/tags/library/socket/basicsocket/recv_tags.txt +++ /dev/null @@ -1 +0,0 @@ -fails:BasicSocket#recv allows an output buffer as third argument diff --git a/spec/tags/library/socket/socket/recvfrom_nonblock_tags.txt b/spec/tags/library/socket/socket/recvfrom_nonblock_tags.txt deleted file mode 100644 index 3a19e38dd791..000000000000 --- a/spec/tags/library/socket/socket/recvfrom_nonblock_tags.txt +++ /dev/null @@ -1 +0,0 @@ -fails:Socket#recvfrom_nonblock using IPv6 using a bound socket with data available the returned data is the same as the sent data diff --git a/src/main/ruby/truffleruby/core/io.rb b/src/main/ruby/truffleruby/core/io.rb index 213a9ea094c5..eec725b9d0ca 100644 --- a/src/main/ruby/truffleruby/core/io.rb +++ b/src/main/ruby/truffleruby/core/io.rb @@ -1673,6 +1673,8 @@ def read(length = nil, buffer = nil) str = IO.read_encode self, read_all return str unless buffer + # intentionally don't preserve buffer encoding + # see https://bugs.ruby-lang.org/issues/20416 return buffer.replace(str) end @@ -1753,10 +1755,14 @@ def read_nonblock(size, buffer = nil, exception: true) str = Truffle::POSIX.read_string_nonblock(self, size, exception) case str - when Symbol + when Symbol # :wait_readable in case of EAGAIN_ERRNO str when String - buffer ? buffer.replace(str) : str + if buffer + buffer.replace str.force_encoding(buffer.encoding) + else + str + end else # EOF if exception raise EOFError, 'end of file reached' @@ -1871,8 +1877,7 @@ def readpartial(size, buffer = nil) raise EOFError if Primitive.nil? data end - buffer.replace(data) - buffer + buffer.replace data.force_encoding(buffer.encoding) else return +'' if size == 0 @@ -2196,20 +2201,23 @@ def sync=(v) # # @todo Improve reading into provided buffer. # - def sysread(number_of_bytes, buffer = undefined) + def sysread(number_of_bytes, buffer = nil) ensure_open_and_readable flush raise IOError unless @ibuffer.empty? + buffer = StringValue(buffer) if buffer + str, errno = Truffle::POSIX.read_string(self, number_of_bytes) Errno.handle_errno(errno) unless errno == 0 raise EOFError if Primitive.nil? str - unless Primitive.undefined? buffer - StringValue(buffer).replace str + if buffer + buffer.replace str.force_encoding(buffer.encoding) + else + str end - str end ##