diff --git a/CHANGES.md b/CHANGES.md index e38c3afe9..4e95be438 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,8 @@ unreleased + merlin library - Expose utilities to manipulate typed-holes in `Merlin_analysis.Typed_hole` (#1888) + - `locate` can now disambiguate between files with identical names and contents + (#1882) merlin 5.4.1 ============ diff --git a/src/analysis/locate.ml b/src/analysis/locate.ml index 8c6294e3e..3ce56b3b6 100644 --- a/src/analysis/locate.ml +++ b/src/analysis/locate.ml @@ -386,12 +386,13 @@ let find_source ~config loc = | Some s -> s in log ~title:"find_source" "initial path: %S" initial_path; - let dir = Filename.dirname initial_path in - let dir = + let canonical_dir_for_file file = + let raw_dir = Filename.dirname file in match config.Mconfig.query.directory with - | "" -> dir - | cwd -> Misc.canonicalize_filename ~cwd dir + | "" -> raw_dir + | cwd -> Misc.canonicalize_filename ~cwd raw_dir in + let dir = canonical_dir_for_file initial_path in match Utils.find_all_matches ~config ~with_fallback file with | [] -> log ~title:"find_source" "failed to find %S in source path (fallback = %b)" @@ -410,22 +411,28 @@ let find_source ~config loc = ~title:(sprintf "find_source(%s)" filename) "multiple matches in the source path : %s" (String.concat ~sep:" , " files); - try + let files_matching_digest = match File_switching.source_digest () with | None -> log ~title:"find_source" "... no source digest available to select the right one"; - raise Not_found + [] | Some digest -> log ~title:"find_source" "... trying to use source digest to find the right one"; log ~title:"find_source" "Source digest: %s" (Digest.to_hex digest); - Found - (List.find files ~f:(fun f -> - let fdigest = Digest.file f in - log ~title:"find_source" " %s (%s)" f (Digest.to_hex fdigest); - fdigest = digest)) - with Not_found -> ( + + List.filter files ~f:(fun f -> + let fdigest = Digest.file f in + log ~title:"find_source" " %s (%s)" f (Digest.to_hex fdigest); + fdigest = digest) + in + match files_matching_digest with + | [ file ] -> + log ~title:"find_source" "... found exactly one file with matching digest"; + Found file + | [] -> ( + log ~title:"find_source" "... found no files with matching digest"; log ~title:"find_source" "... using heuristic to select the right one"; log ~title:"find_source" "we are looking for a file named %s in %s" fname dir; @@ -462,7 +469,66 @@ let find_source ~config loc = match lst with | (i1, _) :: (i2, _) :: _ when i1 = i2 -> Multiple_matches files | (_, s) :: _ -> Found s - | _ -> assert false)) + | _ -> assert false) + | files_matching_digest -> + log ~title:"find_source" "... found multiple files with matching digest"; + log ~title:"find_source" + "... using directory heuristic to choose the best one"; + (* Give each source file a score that represents how close its path is to the + target path (the path of the build artifact) and then choose the source file + with the highest score. + + The score of a source file is the longest tail of the path of the its + directory that is a subpath of the target path. This is premised on build + systems liking to put artifacts in paths that are similar to the source path. + i.e., dune may put the cmt for foo/bar/baz.ml in + _build/default/foo/bar/.bar.objs/byte/bar__Baz.cmt, so we want to use that + shared foo/bar in the path to disambiguate. + + ex: + source file: /a/b/c/d/e/f.ml + target path: /a/b/c/_build/default/d/e/artifacts/f.cmi + score: 2, because /a/b/c/d/e is the source file's directory, and d/e is + the longest tail of it that is a subpath of the target path. *) + let score_file source_file = + (* This is technically quadratic, but + a) most file paths are short + b) in the common case, this is linear because common_prefix_len + will usually fail on the first loop + c) this isn't a hot path - this is only for the uncommon case where there are + two identical files + So the stars would need to align for this to cause performance problems *) + let target_dir = dir in + let source_dir = canonical_dir_for_file source_file in + let target_dir_rev = target_dir |> Misc.split_path |> List.rev in + let source_dir_rev = source_dir |> Misc.split_path |> List.rev in + let rec common_prefix_len a b = + match (a, b) with + | [], _ | _, [] -> 0 + | a_hd :: a_tl, b_hd :: b_tl -> + if String.equal a_hd b_hd then 1 + common_prefix_len a_tl b_tl + else 0 + in + let rec candidates = function + | [] -> [] + | _ :: tl as curr -> curr :: candidates tl + in + candidates target_dir_rev + |> List.map ~f:(common_prefix_len source_dir_rev) + |> List.max_elt ~cmp:Int.compare + |> Option.value ~default:0 + in + let files_matching_digest_with_scores = + List.map files_matching_digest ~f:(fun file -> (file, score_file file)) + in + (* get the max *) + let best_file, _best_score = + List.max_elt files_matching_digest_with_scores + ~cmp:(fun (_, a) (_, b) -> Int.compare a b) + |> Option.get + (* theres at least one element, so this is never None *) + in + Found best_file) (* Well, that's just another hack. [find_source] doesn't like the "-o" option of the compiler. This hack handles diff --git a/src/utils/misc.ml b/src/utils/misc.ml index fd7b3b27a..2516d8df8 100644 --- a/src/utils/misc.ml +++ b/src/utils/misc.ml @@ -356,7 +356,7 @@ let remove_file filename = then Sys.remove filename with Sys_error _msg -> () -let rec split_path path acc = +let rec split_path_and_prepend path acc = match Filename.dirname path with | dir when dir = path -> let is_letter c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') in @@ -373,7 +373,9 @@ let rec split_path path acc = else dir in dir :: acc - | dir -> split_path dir (Filename.basename path :: acc) + | dir -> split_path_and_prepend dir (Filename.basename path :: acc) + +let split_path path = split_path_and_prepend path [] (* Deal with case insensitive FS *) @@ -412,9 +414,9 @@ let exact_file_exists ~dirname ~basename = let canonicalize_filename ?cwd path = let parts = - match split_path path [] with + match split_path path with | dot :: rest when dot = Filename.current_dir_name -> - split_path (match cwd with None -> Sys.getcwd () | Some c -> c) rest + split_path_and_prepend (match cwd with None -> Sys.getcwd () | Some c -> c) rest | parts -> parts in let goup path = function @@ -461,7 +463,7 @@ let rec expand_glob ~filter acc root = function Array.fold_left process acc items let expand_glob ?(filter=fun _ -> true) path acc = - match split_path path [] with + match split_path path with | [] -> acc | root :: subs -> let patterns = List.map ~f:Glob.compile_pattern subs in diff --git a/src/utils/misc.mli b/src/utils/misc.mli index 9c560d2dc..0708802c2 100644 --- a/src/utils/misc.mli +++ b/src/utils/misc.mli @@ -129,11 +129,16 @@ val canonicalize_filename : ?cwd:string -> string -> string val expand_glob : ?filter:(string -> bool) -> string -> string list -> string list (* [expand_glob ~filter pattern acc] adds all filenames matching [pattern] and satistfying the [filter] predicate to [acc]*) -val split_path : string -> string list -> string list - (* [split_path path tail] prepends all components of [path] to [tail], +val split_path : string -> string list + (* [split_path path] returns the components of [path], + including implicit "." if path is not absolute. + [split_path "a/b/c"] = ["."; "a"; "b"; "c"] + [split_path "/a/b/c"] = ["/"; "a"; "b"; "c"] + FIXME: explain windows behavior + *) +val split_path_and_prepend : string -> string list -> string list + (* [split_path_and_prepend path tail] prepends all components of [path] to [tail], including implicit "." if path is not absolute. - [split_path "a/b/c" []] = ["."; "a"; "b"; "c"] - [split_path "/a/b/c" []] = ["/"; "a"; "b"; "c"] FIXME: explain windows behavior *) diff --git a/src/utils/std.ml b/src/utils/std.ml index 0457d87a3..9bcb784ac 100644 --- a/src/utils/std.ml +++ b/src/utils/std.ml @@ -290,6 +290,18 @@ module List = struct let sort_uniq ~cmp l = dedup_adjacent ~cmp (sort ~cmp l) let print f () l = "[ " ^ String.concat "; " (List.map (f ()) l) ^ " ]" + + let max_elt list ~cmp = + fold_left list ~init:None ~f:(fun acc elt -> + match acc with + | None -> Some elt + | Some max -> if cmp max elt < 0 then Some elt else acc) + + let min_elt list ~cmp = + fold_left list ~init:None ~f:(fun acc elt -> + match acc with + | None -> Some elt + | Some min -> if cmp min elt > 0 then Some elt else acc) end module Option = struct diff --git a/tests/test-dirs/locate/distinguish-files.t/bin/dune b/tests/test-dirs/locate/distinguish-files.t/bin/dune new file mode 100644 index 000000000..b5dd7d866 --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/bin/dune @@ -0,0 +1,3 @@ +(executable + (name main) + (libraries lib_a lib_b)) diff --git a/tests/test-dirs/locate/distinguish-files.t/bin/main.ml b/tests/test-dirs/locate/distinguish-files.t/bin/main.ml new file mode 100644 index 000000000..ccbabed03 --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/bin/main.ml @@ -0,0 +1,5 @@ +let _ = Lib_a.Same.foo +let _ = Lib_b.Same.foo + +let _ = Lib_a.Different.foo +let _ = Lib_b.Different.foo diff --git a/tests/test-dirs/locate/distinguish-files.t/dune-project b/tests/test-dirs/locate/distinguish-files.t/dune-project new file mode 100644 index 000000000..929c696e5 --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/dune-project @@ -0,0 +1 @@ +(lang dune 2.0) diff --git a/tests/test-dirs/locate/distinguish-files.t/lib_a/different.ml b/tests/test-dirs/locate/distinguish-files.t/lib_a/different.ml new file mode 100644 index 000000000..bd9ffd45b --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/lib_a/different.ml @@ -0,0 +1,2 @@ +(* AAAAA *) +let foo = 10 diff --git a/tests/test-dirs/locate/distinguish-files.t/lib_a/dune b/tests/test-dirs/locate/distinguish-files.t/lib_a/dune new file mode 100644 index 000000000..93aa7c4cc --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/lib_a/dune @@ -0,0 +1,2 @@ +(library + (name lib_a)) diff --git a/tests/test-dirs/locate/distinguish-files.t/lib_a/same.ml b/tests/test-dirs/locate/distinguish-files.t/lib_a/same.ml new file mode 100644 index 000000000..5b5ecbfc2 --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/lib_a/same.ml @@ -0,0 +1 @@ +let foo = 10 diff --git a/tests/test-dirs/locate/distinguish-files.t/lib_b/different.ml b/tests/test-dirs/locate/distinguish-files.t/lib_b/different.ml new file mode 100644 index 000000000..d0358288d --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/lib_b/different.ml @@ -0,0 +1,2 @@ +(* BBBBB *) +let foo = 10 diff --git a/tests/test-dirs/locate/distinguish-files.t/lib_b/dune b/tests/test-dirs/locate/distinguish-files.t/lib_b/dune new file mode 100644 index 000000000..4f6662e96 --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/lib_b/dune @@ -0,0 +1,2 @@ +(library + (name lib_b)) diff --git a/tests/test-dirs/locate/distinguish-files.t/lib_b/same.ml b/tests/test-dirs/locate/distinguish-files.t/lib_b/same.ml new file mode 100644 index 000000000..5b5ecbfc2 --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/lib_b/same.ml @@ -0,0 +1 @@ +let foo = 10 diff --git a/tests/test-dirs/locate/distinguish-files.t/run.t b/tests/test-dirs/locate/distinguish-files.t/run.t new file mode 100644 index 000000000..11da27d20 --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/run.t @@ -0,0 +1,27 @@ +Merlin can distinguish between two files with the same name and contents + + $ dune build @check + +Part 1: Test that two files with the same name but different contents can be distinguished + +Get Lib_a.Different.foo + $ $MERLIN single locate -position 4:26 -filename bin/main.ml < bin/main.ml \ + > | jq .value.file -r + $TESTCASE_ROOT/lib_a/different.ml + +Get Lib_b.Different.foo + $ $MERLIN single locate -position 5:26 -filename bin/main.ml < bin/main.ml \ + > | jq .value.file -r + $TESTCASE_ROOT/lib_b/different.ml + +Part 2: Test that two files with the same name and same contents can be distinguished + +Get Lib_a.Same.foo + $ $MERLIN single locate -position 1:22 -filename bin/main.ml < bin/main.ml \ + > | jq .value.file -r + $TESTCASE_ROOT/lib_a/same.ml + +Get Lib_b.Same.foo + $ $MERLIN single locate -position 2:22 -filename bin/main.ml < bin/main.ml \ + > | jq .value.file -r + $TESTCASE_ROOT/lib_b/same.ml diff --git a/tests/test-dirs/locate/dune b/tests/test-dirs/locate/dune index 2f1e864e3..3121931a3 100755 --- a/tests/test-dirs/locate/dune +++ b/tests/test-dirs/locate/dune @@ -2,7 +2,7 @@ (applies_to looping-substitution mutually-recursive partial-cmt includes issue802 issue845 issue1848 issue1199 issue1524 sig-substs l-413-features module-aliases locate-constrs without-implem without-sig module-decl-aliases - in-implicit-trans-dep) + in-implicit-trans-dep distinguish-files) (enabled_if (<> %{os_type} Win32)))