From 11d3dde50c1bc1a3c720d26215d360f0a329b8bc Mon Sep 17 00:00:00 2001 From: Gal Topper Date: Sun, 25 Aug 2024 16:40:40 +0800 Subject: [PATCH] Encode string request body as `utf-8` (#125) Otherwise, http.client [tries to encode it](https://github.com/python/cpython/blob/v3.9.19/Lib/http/client.py#L166) as `latin-1`. [ML-7498](https://iguazio.atlassian.net/browse/ML-7498) --- tests/test_client.py | 21 +++++++++------------ v3io/dataplane/request.py | 10 ++++++++-- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/tests/test_client.py b/tests/test_client.py index 73646ff..9955fe7 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -215,7 +215,8 @@ def setUp(self): self._delete_dir(self._object_dir) def test_object(self): - contents = "vegans are better than everyone" + body = "this unicode character Ě triggers ML-7498" + body_bytes = body.encode("utf-8") response = self._client.object.get( container=self._container, path=self._object_path, raise_for_status=v3io.dataplane.RaiseForStatus.never @@ -223,29 +224,25 @@ def test_object(self): self.assertEqual(404, response.status_code) - # put contents to some object - self._client.object.put(container=self._container, path=self._object_path, body=contents) + # put body to some object + self._client.object.put(container=self._container, path=self._object_path, body=body) - # get the object contents + # get the object body response = self._client.object.get(container=self._container, path=self._object_path) - if not isinstance(response.body, str): - response.body = response.body.decode("utf-8") - self.assertEqual(response.body, contents) + self.assertEqual(response.body, body_bytes) response = self._client.object.get(container=self._container, path=self._object_path, offset=0, num_bytes=10) - if not isinstance(response.body, str): - response.body = response.body.decode("utf-8") - self.assertEqual(response.body, contents[0:10]) + self.assertEqual(response.body, body_bytes[0:10]) # get the head of the object response = self._client.object.head(container=self._container, path=self._object_path) - self.assertIn(("Content-Length", str(len(contents))), response.headers.items()) + self.assertIn(("Content-Length", str(len(body_bytes))), response.headers.items()) # get the head of the dir-object response = self._client.object.head(container=self._container, path=self._object_dir) - self.assertIn(("Content-Length", str(0)), response.headers.items()) + self.assertIn(("Content-Length", "0"), response.headers.items()) # delete the object self._client.object.delete(container=self._container, path=self._object_path) diff --git a/v3io/dataplane/request.py b/v3io/dataplane/request.py index 1983c11..f1ebc09 100644 --- a/v3io/dataplane/request.py +++ b/v3io/dataplane/request.py @@ -117,13 +117,19 @@ def encode_get_object(container_name, access_key, kwargs): def encode_put_object(container_name, access_key, kwargs): - headers = None + headers = { + "Content-Type": "application-octet-stream", + } # if the append flag is passed, add a range header if kwargs["append"]: headers = {"Range": "-1"} - return _encode("PUT", container_name, access_key, kwargs["path"], None, headers, kwargs["body"]) + body = kwargs["body"] + if isinstance(body, str): + body = body.encode("utf-8") + + return _encode("PUT", container_name, access_key, kwargs["path"], None, headers, body) def encode_delete_object(container_name, access_key, kwargs):