Skip to content

Commit

Permalink
Merge branch 'johan/smarter-spacing' into python
Browse files Browse the repository at this point in the history
This improves handling of spaces in the command lines. Notably this
should help us parse Java command lines better when there are spaces in
the classpath.

Fixes #120.
  • Loading branch information
walles committed Feb 24, 2024
2 parents 53c4faf + 5354ec7 commit 584f919
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 19 deletions.
74 changes: 63 additions & 11 deletions px/px_commandline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os.path
import logging

from typing import List, Optional
from typing import List, Optional, Callable

LOG = logging.getLogger(__name__)

Expand All @@ -19,21 +19,73 @@
PERL_BIN = re.compile("^perl[.0-9]*$")


def to_array(commandline: str) -> List[str]:
def should_coalesce(
part1: str, part2: str, exists: Callable[[str], bool] = os.path.exists
) -> bool:
"""
Two (previously) space separated command line parts should be coalesced if
combining them with a space in between creates an existing file path.
"""

if part1.endswith("/"):
# "xxx/ yyy" would make no sense coalesced
return False

if part2.startswith("-"):
# "xxx -yyy" would make no sense coalesced, that - likely means what
# comes after is a command line switch
return False

if part2.startswith("/"):
# "xxx /yyy" would make no sense coalesced
return False

# Find the last possible starting point of an absolute path in part1
path_start_index = -1
if part1.startswith("/"):
# /x/y/z
path_start_index = 0
if (first_equals_slash := part1.find("=/")) >= 0:
# -Dhello=/x/y/z
path_start_index = first_equals_slash + 1
if (last_colon_slash := part1.rfind(":/")) >= 0:
if last_colon_slash > path_start_index:
# -Dsomepath=/a/b/c:/x/y/z
path_start_index = last_colon_slash + 1

# FIXME: Ignore (non-file:?) URLs?

if path_start_index == -1:
# No path in part1, no need to coalesce
return False

path_end_index_exclusive = len(part2)
if (first_colon := part2.find(":")) >= 0:
path_end_index_exclusive = first_colon
if (first_slash := part2.find("/")) >= 0:
if first_slash < path_end_index_exclusive:
path_end_index_exclusive = first_slash

candidate_path = part1[path_start_index:] + " " + part2[:path_end_index_exclusive]

return exists(candidate_path)


def to_array(
commandline: str, exists: Callable[[str], bool] = os.path.exists
) -> List[str]:
"""Splits a command line string into components"""
base_split = commandline.split(" ")
if len(base_split) == 1:
return base_split

# Try to reverse engineer executables with spaces in their names
merged_split = list(base_split)
while not os.path.isfile(merged_split[0]):
if len(merged_split) == 1:
# Nothing more to merge, give up
return base_split

# Merge the two first elements: http://stackoverflow.com/a/1142879/473672
merged_split[0:2] = [" ".join(merged_split[0:2])]
# Try to reverse engineer which spaces should not be split on
merged_split = [base_split[0]]
for part in base_split[1:]:
if should_coalesce(merged_split[-1], part, exists):
merged_split[-1] += " " + part
else:
merged_split.append(part)

return merged_split

Expand Down
78 changes: 71 additions & 7 deletions tests/px_commandline_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,73 @@
from px import px_commandline


def test_should_coalesce():
def exists(s):
return s in [
"/Applications",
"/Applications/IntelliJ IDEA.app",
]

assert not px_commandline.should_coalesce(
"java", "-Dhello=/Applications/IntelliJ", exists=exists
)

assert px_commandline.should_coalesce(
"-Dhello=/Applications/IntelliJ", "IDEA.app/Contents", exists=exists
)

assert px_commandline.should_coalesce(
"/Applications/IntelliJ",
"IDEA.app/Contents/plugins/maven-model/lib/maven-model.jar:/Applications/IntelliJ",
exists=exists,
)

assert px_commandline.should_coalesce(
"/Applications/IntelliJ IDEA.app/Contents/plugins/maven-model/lib/maven-model.jar:/Applications/IntelliJ",
"IDEA.app/Contents/plugins/maven-server/lib/maven-server.jar",
exists=exists,
)


def test_to_array_spaced1():
assert px_commandline.to_array(
"java -Dhello=/Applications/IntelliJ IDEA.app/Contents",
exists=lambda s: s
in [
"/Applications",
"/Applications/IntelliJ IDEA.app",
],
) == ["java", "-Dhello=/Applications/IntelliJ IDEA.app/Contents"]


def test_to_array_spaced2():
assert px_commandline.to_array(
" ".join(
[
"java",
"-Dhello=/Applications/IntelliJ IDEA.app/Contents/Info.plist",
"-classpath",
"/Applications/IntelliJ",
"IDEA.app/Contents/plugins/maven-model/lib/maven-model.jar:/Applications/IntelliJ",
"IDEA.app/Contents/plugins/maven-server/lib/maven-server.jar:/Applications/IntelliJ",
"IDEA.app/Contents/plugins/maven/lib/maven3-server-common.jar",
"MainClass",
]
),
exists=lambda s: s
in [
"/Applications",
"/Applications/IntelliJ IDEA.app",
],
) == [
"java",
"-Dhello=/Applications/IntelliJ IDEA.app/Contents/Info.plist",
"-classpath",
"/Applications/IntelliJ IDEA.app/Contents/plugins/maven-model/lib/maven-model.jar:/Applications/IntelliJ IDEA.app/Contents/plugins/maven-server/lib/maven-server.jar:/Applications/IntelliJ IDEA.app/Contents/plugins/maven/lib/maven3-server-common.jar",
"MainClass",
]


def test_get_command_python():
assert px_commandline.get_command("python") == "python"
assert px_commandline.get_command("/apa/Python") == "Python"
Expand Down Expand Up @@ -98,9 +165,6 @@ def test_get_command_java():
assert px_commandline.get_command("java -jar") == "java"
assert px_commandline.get_command("java -jar ") == "java"

# FIXME: Add test for classpath containing spaces? I say this should be
# postponed until we have a real world use case for that.


def test_get_command_java_gradled():
commandline = (
Expand Down Expand Up @@ -251,23 +315,23 @@ def test_get_command_sudo():

def test_get_command_sudo_with_space_in_command_name(tmpdir):
# Create a file name with a space in it
spaced_path = tmpdir.join("i contain spaces")
spaced_path = tmpdir.join("with space")
spaced_path.write_binary(b"")
spaced_name = str(spaced_path)

# Verify splitting of the spaced file name
assert px_commandline.get_command("sudo " + spaced_name) == "sudo i contain spaces"
assert px_commandline.get_command("sudo " + spaced_name) == "sudo with space"

# Verify splitting with more parameters on the line
assert (
px_commandline.get_command("sudo " + spaced_name + " parameter")
== "sudo i contain spaces"
== "sudo with space"
)


def test_get_command_sudo_with_space_in_path(tmpdir):
# Create a file name with a space in it
spaced_dir = tmpdir.join("i contain spaces")
spaced_dir = tmpdir.join("with space")
os.mkdir(str(spaced_dir))
spaced_path = spaced_dir + "/runme"

Expand Down
2 changes: 1 addition & 1 deletion tests/px_process_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def test_get_command_line_array():

def test_get_command_line_array_space_in_binary(tmpdir):
# Create a file name with a space in it
spaced_path = tmpdir.join("i contain spaces")
spaced_path = tmpdir.join("with space")
spaced_path.write_binary(b"")
spaced_name = str(spaced_path)

Expand Down

0 comments on commit 584f919

Please sign in to comment.