# # # add_dir "tests/ls_duplicates_with_duplicates" # # add_dir "tests/ls_duplicates_with_workspace" # # add_dir "tests/ls_duplicates_without_duplicates" # # add_file "tests/ls_duplicates_with_duplicates/__driver__.lua" # content [173f6b87312b4ff79ecf285b49c929989581b15a] # # add_file "tests/ls_duplicates_with_workspace/__driver__.lua" # content [db991809a608a413a79e57416ec5fc7b896ad751] # # add_file "tests/ls_duplicates_without_duplicates/__driver__.lua" # content [9a371c2a25e87e8c30a2df96e729bb89872dd695] # # patch "NEWS" # from [71aefa7862136563e5158ea3577d2100f7a6dab2] # to [dd6bc393ab51c974015f7b19a1b086e1610fa92f] # # patch "cmd_list.cc" # from [3ea2cf51e26455ca77ee870728ccb58f8ec42782] # to [5fa3e7c75de40b0315ae337be019c202a039a67b] # # patch "monotone.texi" # from [6a8534e5e1e4045df89160f4d49b69a0fed06077] # to [8542311382a69a2d94963663f5336ad83f842ea2] # ============================================================ --- tests/ls_duplicates_with_duplicates/__driver__.lua 173f6b87312b4ff79ecf285b49c929989581b15a +++ tests/ls_duplicates_with_duplicates/__driver__.lua 173f6b87312b4ff79ecf285b49c929989581b15a @@ -0,0 +1,25 @@ + +mtn_setup() + +check(mtn("ls", "duplicates", "bla"), 2, false, false); + +addfile("unique", "unique") +commit() + +addfile("testfile", "blah blah") +commit() + +writefile("testfile", "foo foo") +commit() + +addfile("testfile2", "blah blah") +commit() + +writefile("testfile2", "foo foo") +commit() +R=base_revision() + +check(mtn("ls", "duplicates","-r", R), 0, true, false) +check(qgrep("testfile", "stdout")) +check(qgrep("testfile2", "stdout")) +check(not qgrep("unique", "stdout")) ============================================================ --- tests/ls_duplicates_with_workspace/__driver__.lua db991809a608a413a79e57416ec5fc7b896ad751 +++ tests/ls_duplicates_with_workspace/__driver__.lua db991809a608a413a79e57416ec5fc7b896ad751 @@ -0,0 +1,34 @@ + +mtn_setup() + +addfile("unique", "unique") +commit() + +addfile("testfile", "blah blah") +commit() + +writefile("testfile", "foo foo") +commit() + +addfile("testfile2", "blah blah") +commit() + +writefile("testfile2", "foo foo") +commit() + +addfile("workspace_1", "foo foo"); +addfile("workspace_2", "hello hello"); +addfile("workspace_3", "hello hello"); +addfile("workspace_4", "another unique file"); + +check(mtn("ls", "duplicates"), 0, true, false) +check(qgrep("testfile", "stdout")) +check(qgrep("testfile2", "stdout")) + +check(not qgrep("unique", "stdout")) + +check(qgrep("workspace_1", "stdout")) +check(qgrep("workspace_2", "stdout")) +check(qgrep("workspace_3", "stdout")) +check(not qgrep("workspace_4", "stdout")) + ============================================================ --- tests/ls_duplicates_without_duplicates/__driver__.lua 9a371c2a25e87e8c30a2df96e729bb89872dd695 +++ tests/ls_duplicates_without_duplicates/__driver__.lua 9a371c2a25e87e8c30a2df96e729bb89872dd695 @@ -0,0 +1,15 @@ + +mtn_setup() + +addfile("testfile", "blah blah") +commit() + +writefile("testfile", "foo foo") +commit() + +addfile("testfile2", "blah blah") +commit() +R=base_revision() + +check(mtn("ls", "duplicates","-r", R), 0, true, false) +check(not qgrep("testfile", "stdout")) ============================================================ --- NEWS 71aefa7862136563e5158ea3577d2100f7a6dab2 +++ NEWS dd6bc393ab51c974015f7b19a1b086e1610fa92f @@ -1,20 +1,23 @@ ??? ??? ? ??:??:?? UTC 2008 - 0.42 release. + 0.42 release. - Changes + Changes - Bugs fixed - - - 'mtn db kill_rev_locally' did not update the inodeprint - cache when executed from a workspace on which the - revision's changes where applied. + Bugs fixed - New features + - 'mtn db kill_rev_locally' did not update the inodeprint + cache when executed from a workspace on which the + revision's changes where applied. - Internal + New features + - New 'mtn ls duplicates' command which lets you list + duplicated files in a given revision or the workspace. + Internal + + Wed Sep 3 21:13:18 UTC 2008 0.41 release. ============================================================ --- cmd_list.cc 3ea2cf51e26455ca77ee870728ccb58f8ec42782 +++ cmd_list.cc 5fa3e7c75de40b0315ae337be019c202a039a67b @@ -155,6 +155,103 @@ CMD(certs, "certs", "", CMD_REF(list), " guard.commit(); } +CMD(duplicates, "duplicates", "", CMD_REF(list), "", + N_("Lists duplicate files in the specified revision." + " If no revision is specified, use the workspace"), + "", + options::opts::revision) +{ + if (args.size() != 0) + throw usage(execid); + + revision_id rev_id; + roster_t roster; + database db(app); + project_t project(db); + + N(app.opts.revision_selectors.size() <= 1, + F("more than one revision given")); + + if (app.opts.revision_selectors.size() == 0) + { + workspace work(app); + temp_node_id_source nis; + + work.get_current_roster_shape(db, nis, roster); + } + else + { + complete(app.opts, app.lua, project, + idx(app.opts.revision_selectors, 0)(), rev_id); + N(db.revision_exists(rev_id), + F("no revision %s found in database") % rev_id); + db.get_roster(rev_id, roster); + } + + // To find the duplicate files, we put all file_ids in a map + // and count how many times they occur in the roster. + // + // Structure of file_id_map is following: + // first : file_id + // second : + // first : unsigned int + // second : file_paths (=vector) + typedef std::vector file_paths; + typedef std::pair file_count; + typedef std::map file_id_map; + file_id_map file_map; + + node_map const & nodes = roster.all_nodes(); + for (node_map::const_iterator i = nodes.begin(); + i != nodes.end(); ++i) + { + node_t node = i->second; + if (is_file_t(node)) + { + file_t f = downcast_to_file_t(node); + file_path p; + roster.get_name(i->first, p); + + file_id_map::iterator iter = file_map.find(f->content); + if (iter == file_map.end()) + { + file_paths paths; + paths.push_back(p); + file_count fc(1, paths); + file_map.insert(make_pair(f->content, fc)); + } + else + { + iter->second.first++; + iter->second.second.push_back(p); + } + } + } + + string empty_checksum(40, ' '); + for (file_id_map::const_iterator i = file_map.begin(); + i != file_map.end(); ++i) + { + if (i->second.first > 1) + { + bool first_print = true; + for (file_paths::const_iterator j = i->second.second.begin(); + j != i->second.second.end(); ++j) + { + if (first_print) + { + cout << i->first; + first_print = false; + } + else + cout << empty_checksum; + + cout << " " << *j << '\n'; + } + } + } +} + CMD(keys, "keys", "", CMD_REF(list), "[PATTERN]", N_("Lists keys that match a pattern"), "", ============================================================ --- monotone.texi 6a8534e5e1e4045df89160f4d49b69a0fed06077 +++ monotone.texi 8542311382a69a2d94963663f5336ad83f842ea2 @@ -5312,6 +5312,16 @@ @section Informative @end group @end smallexample address@hidden mtn list duplicates address@hidden address@hidden mtn ls duplicates address@hidden + +This command lists duplicate files in a given revision. If no revision is +given, the workspace is used instead. Ignored and unknown files are +excluded from the listing. + +Two or more files are considered duplicates if the @sc{sha1} hashes of their +contents are equal. + @item mtn list keys @item mtn ls keys @itemx mtn list keys @var{pattern}