Skip to content

Commit

Permalink
Merge pull request #1882 from liam923/distinguish-files
Browse files Browse the repository at this point in the history
Distinguish files with identical contents
  • Loading branch information
voodoos authored Feb 3, 2025
2 parents 1f73cad + 915eff7 commit 834821e
Show file tree
Hide file tree
Showing 16 changed files with 156 additions and 23 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ unreleased
+ merlin library
- Expose utilities to manipulate typed-holes in `Merlin_analysis.Typed_hole`
(#1888)
- `locate` can now disambiguate between files with identical names and contents
(#1882)

merlin 5.4.1
============
Expand Down
92 changes: 79 additions & 13 deletions src/analysis/locate.ml
Original file line number Diff line number Diff line change
Expand Up @@ -386,12 +386,13 @@ let find_source ~config loc =
| Some s -> s
in
log ~title:"find_source" "initial path: %S" initial_path;
let dir = Filename.dirname initial_path in
let dir =
let canonical_dir_for_file file =
let raw_dir = Filename.dirname file in
match config.Mconfig.query.directory with
| "" -> dir
| cwd -> Misc.canonicalize_filename ~cwd dir
| "" -> raw_dir
| cwd -> Misc.canonicalize_filename ~cwd raw_dir
in
let dir = canonical_dir_for_file initial_path in
match Utils.find_all_matches ~config ~with_fallback file with
| [] ->
log ~title:"find_source" "failed to find %S in source path (fallback = %b)"
Expand All @@ -410,22 +411,28 @@ let find_source ~config loc =
~title:(sprintf "find_source(%s)" filename)
"multiple matches in the source path : %s"
(String.concat ~sep:" , " files);
try
let files_matching_digest =
match File_switching.source_digest () with
| None ->
log ~title:"find_source"
"... no source digest available to select the right one";
raise Not_found
[]
| Some digest ->
log ~title:"find_source"
"... trying to use source digest to find the right one";
log ~title:"find_source" "Source digest: %s" (Digest.to_hex digest);
Found
(List.find files ~f:(fun f ->
let fdigest = Digest.file f in
log ~title:"find_source" " %s (%s)" f (Digest.to_hex fdigest);
fdigest = digest))
with Not_found -> (

List.filter files ~f:(fun f ->
let fdigest = Digest.file f in
log ~title:"find_source" " %s (%s)" f (Digest.to_hex fdigest);
fdigest = digest)
in
match files_matching_digest with
| [ file ] ->
log ~title:"find_source" "... found exactly one file with matching digest";
Found file
| [] -> (
log ~title:"find_source" "... found no files with matching digest";
log ~title:"find_source" "... using heuristic to select the right one";
log ~title:"find_source" "we are looking for a file named %s in %s" fname
dir;
Expand Down Expand Up @@ -462,7 +469,66 @@ let find_source ~config loc =
match lst with
| (i1, _) :: (i2, _) :: _ when i1 = i2 -> Multiple_matches files
| (_, s) :: _ -> Found s
| _ -> assert false))
| _ -> assert false)
| files_matching_digest ->
log ~title:"find_source" "... found multiple files with matching digest";
log ~title:"find_source"
"... using directory heuristic to choose the best one";
(* Give each source file a score that represents how close its path is to the
target path (the path of the build artifact) and then choose the source file
with the highest score.
The score of a source file is the longest tail of the path of the its
directory that is a subpath of the target path. This is premised on build
systems liking to put artifacts in paths that are similar to the source path.
i.e., dune may put the cmt for foo/bar/baz.ml in
_build/default/foo/bar/.bar.objs/byte/bar__Baz.cmt, so we want to use that
shared foo/bar in the path to disambiguate.
ex:
source file: /a/b/c/d/e/f.ml
target path: /a/b/c/_build/default/d/e/artifacts/f.cmi
score: 2, because /a/b/c/d/e is the source file's directory, and d/e is
the longest tail of it that is a subpath of the target path. *)
let score_file source_file =
(* This is technically quadratic, but
a) most file paths are short
b) in the common case, this is linear because common_prefix_len
will usually fail on the first loop
c) this isn't a hot path - this is only for the uncommon case where there are
two identical files
So the stars would need to align for this to cause performance problems *)
let target_dir = dir in
let source_dir = canonical_dir_for_file source_file in
let target_dir_rev = target_dir |> Misc.split_path |> List.rev in
let source_dir_rev = source_dir |> Misc.split_path |> List.rev in
let rec common_prefix_len a b =
match (a, b) with
| [], _ | _, [] -> 0
| a_hd :: a_tl, b_hd :: b_tl ->
if String.equal a_hd b_hd then 1 + common_prefix_len a_tl b_tl
else 0
in
let rec candidates = function
| [] -> []
| _ :: tl as curr -> curr :: candidates tl
in
candidates target_dir_rev
|> List.map ~f:(common_prefix_len source_dir_rev)
|> List.max_elt ~cmp:Int.compare
|> Option.value ~default:0
in
let files_matching_digest_with_scores =
List.map files_matching_digest ~f:(fun file -> (file, score_file file))
in
(* get the max *)
let best_file, _best_score =
List.max_elt files_matching_digest_with_scores
~cmp:(fun (_, a) (_, b) -> Int.compare a b)
|> Option.get
(* theres at least one element, so this is never None *)
in
Found best_file)

(* Well, that's just another hack.
[find_source] doesn't like the "-o" option of the compiler. This hack handles
Expand Down
12 changes: 7 additions & 5 deletions src/utils/misc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ let remove_file filename =
then Sys.remove filename
with Sys_error _msg -> ()

let rec split_path path acc =
let rec split_path_and_prepend path acc =
match Filename.dirname path with
| dir when dir = path ->
let is_letter c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') in
Expand All @@ -373,7 +373,9 @@ let rec split_path path acc =
else dir
in
dir :: acc
| dir -> split_path dir (Filename.basename path :: acc)
| dir -> split_path_and_prepend dir (Filename.basename path :: acc)

let split_path path = split_path_and_prepend path []

(* Deal with case insensitive FS *)

Expand Down Expand Up @@ -412,9 +414,9 @@ let exact_file_exists ~dirname ~basename =

let canonicalize_filename ?cwd path =
let parts =
match split_path path [] with
match split_path path with
| dot :: rest when dot = Filename.current_dir_name ->
split_path (match cwd with None -> Sys.getcwd () | Some c -> c) rest
split_path_and_prepend (match cwd with None -> Sys.getcwd () | Some c -> c) rest
| parts -> parts
in
let goup path = function
Expand Down Expand Up @@ -461,7 +463,7 @@ let rec expand_glob ~filter acc root = function
Array.fold_left process acc items

let expand_glob ?(filter=fun _ -> true) path acc =
match split_path path [] with
match split_path path with
| [] -> acc
| root :: subs ->
let patterns = List.map ~f:Glob.compile_pattern subs in
Expand Down
13 changes: 9 additions & 4 deletions src/utils/misc.mli
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,16 @@ val canonicalize_filename : ?cwd:string -> string -> string
val expand_glob : ?filter:(string -> bool) -> string -> string list -> string list
(* [expand_glob ~filter pattern acc] adds all filenames matching
[pattern] and satistfying the [filter] predicate to [acc]*)
val split_path : string -> string list -> string list
(* [split_path path tail] prepends all components of [path] to [tail],
val split_path : string -> string list
(* [split_path path] returns the components of [path],
including implicit "." if path is not absolute.
[split_path "a/b/c"] = ["."; "a"; "b"; "c"]
[split_path "/a/b/c"] = ["/"; "a"; "b"; "c"]
FIXME: explain windows behavior
*)
val split_path_and_prepend : string -> string list -> string list
(* [split_path_and_prepend path tail] prepends all components of [path] to [tail],
including implicit "." if path is not absolute.
[split_path "a/b/c" []] = ["."; "a"; "b"; "c"]
[split_path "/a/b/c" []] = ["/"; "a"; "b"; "c"]
FIXME: explain windows behavior
*)

Expand Down
12 changes: 12 additions & 0 deletions src/utils/std.ml
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,18 @@ module List = struct
let sort_uniq ~cmp l = dedup_adjacent ~cmp (sort ~cmp l)

let print f () l = "[ " ^ String.concat "; " (List.map (f ()) l) ^ " ]"

let max_elt list ~cmp =
fold_left list ~init:None ~f:(fun acc elt ->
match acc with
| None -> Some elt
| Some max -> if cmp max elt < 0 then Some elt else acc)

let min_elt list ~cmp =
fold_left list ~init:None ~f:(fun acc elt ->
match acc with
| None -> Some elt
| Some min -> if cmp min elt > 0 then Some elt else acc)
end

module Option = struct
Expand Down
3 changes: 3 additions & 0 deletions tests/test-dirs/locate/distinguish-files.t/bin/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
(executable
(name main)
(libraries lib_a lib_b))
5 changes: 5 additions & 0 deletions tests/test-dirs/locate/distinguish-files.t/bin/main.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
let _ = Lib_a.Same.foo
let _ = Lib_b.Same.foo

let _ = Lib_a.Different.foo
let _ = Lib_b.Different.foo
1 change: 1 addition & 0 deletions tests/test-dirs/locate/distinguish-files.t/dune-project
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(lang dune 2.0)
2 changes: 2 additions & 0 deletions tests/test-dirs/locate/distinguish-files.t/lib_a/different.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
(* AAAAA *)
let foo = 10
2 changes: 2 additions & 0 deletions tests/test-dirs/locate/distinguish-files.t/lib_a/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
(library
(name lib_a))
1 change: 1 addition & 0 deletions tests/test-dirs/locate/distinguish-files.t/lib_a/same.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
let foo = 10
2 changes: 2 additions & 0 deletions tests/test-dirs/locate/distinguish-files.t/lib_b/different.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
(* BBBBB *)
let foo = 10
2 changes: 2 additions & 0 deletions tests/test-dirs/locate/distinguish-files.t/lib_b/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
(library
(name lib_b))
1 change: 1 addition & 0 deletions tests/test-dirs/locate/distinguish-files.t/lib_b/same.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
let foo = 10
27 changes: 27 additions & 0 deletions tests/test-dirs/locate/distinguish-files.t/run.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Merlin can distinguish between two files with the same name and contents

$ dune build @check

Part 1: Test that two files with the same name but different contents can be distinguished

Get Lib_a.Different.foo
$ $MERLIN single locate -position 4:26 -filename bin/main.ml < bin/main.ml \
> | jq .value.file -r
$TESTCASE_ROOT/lib_a/different.ml

Get Lib_b.Different.foo
$ $MERLIN single locate -position 5:26 -filename bin/main.ml < bin/main.ml \
> | jq .value.file -r
$TESTCASE_ROOT/lib_b/different.ml

Part 2: Test that two files with the same name and same contents can be distinguished

Get Lib_a.Same.foo
$ $MERLIN single locate -position 1:22 -filename bin/main.ml < bin/main.ml \
> | jq .value.file -r
$TESTCASE_ROOT/lib_a/same.ml

Get Lib_b.Same.foo
$ $MERLIN single locate -position 2:22 -filename bin/main.ml < bin/main.ml \
> | jq .value.file -r
$TESTCASE_ROOT/lib_b/same.ml
2 changes: 1 addition & 1 deletion tests/test-dirs/locate/dune
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
(applies_to looping-substitution mutually-recursive partial-cmt includes
issue802 issue845 issue1848 issue1199 issue1524 sig-substs l-413-features
module-aliases locate-constrs without-implem without-sig module-decl-aliases
in-implicit-trans-dep)
in-implicit-trans-dep distinguish-files)
(enabled_if
(<> %{os_type} Win32)))

Expand Down

0 comments on commit 834821e

Please sign in to comment.