# # # add_file "multi-mark-merge.text" # content [be08e2ef1754e64ca7fdcb92d71c33c91d42a65d] # # add_file "ss-existence-merge.text" # content [84169b7caf7da082a6393fc1ecd4c83a16f154da] # # add_file "ss-mark-merge.text" # content [d4c91b3c07e3582ff16d5993d60912cfb39114e0] # # add_file "unique-mark-merge.text" # content [089f28b2d9fb1061907d65930ad43a617119df09] # # patch "cmd_merging.cc" # from [63466000d5fbbbaf5a193275602052ff9d464774] # to [7a441305bb48f878eee5d5febaad2e99d781b212] # # patch "roster.cc" # from [9989ba61bf9d98cdc3551daaa3666cd37a752695] # to [a67920123605b12886b28cbd4c097f60a38f7b43] # # patch "roster.hh" # from [e752044dc5fb4183c3118daa8c8da40ef86ee3c7] # to [897cd92c82cf0a4a9506428bafe3ca94f959e747] # # patch "roster_merge.cc" # from [4fdb63c868ad20c20d95cde69bddc987316eac0d] # to [7c9664a62bd7bb84afce479ffcdfbe4ec82aa015] # # patch "tests/resolve_duplicate_name_conflict/__driver__.lua" # from [baf6a71e1f54cdf070340d504d6c9c330db0aa8e] # to [3da589c23ef5c0e996b267d74be712fc6c2176a2] # # patch "tests/resolve_duplicate_name_conflict/expected-update-messages-jim_1" # from [79eb4985a0fdab8524e19de458835db8b9dcf51c] # to [d42cacc7d4725d62070fa6f34bc0189c2da4d1cd] # ============================================================ --- multi-mark-merge.text be08e2ef1754e64ca7fdcb92d71c33c91d42a65d +++ multi-mark-merge.text be08e2ef1754e64ca7fdcb92d71c33c91d42a65d @@ -0,0 +1,281 @@ +http://article.gmane.org/gmane.comp.version-control.revctrl/93 + +Nathaniel Smith, based on work by Timothy Brownawell + +User model +---------- + +We keep exactly the same user model as unique-*-merge: + + 1) whenever a user explicitly sets the value, they express a claim + that their setting is superior to the old setting + 2) whenever a user chooses to commit a new revision, they implicitly + affirm the validity of the decisions that led to that revision's + parents + Corollary of (1) and (2): whenever a user explicitly sets the + value, they express that they consider their new setting to be + superior to _all_ old settings + 3) A "conflict" should occur if, and only if, the settings on each + side of the merge express parallel claims. + +The difference is that unique-*-merge does not _quite_ fulfill this +model, because in real life your algorithm will automatically resolve +coincidental clean merge cases without asking for user input; but +unique-* is not smart enough to take this into account when inferring +user intentions. + +Algorithm +--------- + +We start by marking the graph of previous revisions. For each node in +the graph, we either mark it (denoted by a *), or do not. A mark +indicates our inference that a human expressed an intention at this +node. + + i) a* graph roots are always marked + + a1 + ii) | no mark, value was not set + a2 + + a + iii) | b != a, so 'b' node marked + b* + + a b + iv) \ / + c* + 'c' is totally new, so marked + a1 a2 + \ / + c* + + a b1 we're marking places where users expressed + v) \ / intention; so 'b' should be marked iff this + b2? was a conflict + + a1 a2 'a' matches parents, and so is not marked + vi) \ / (alternatively, we can say this is a special + a3 case of (v), that is never a conflict) + +Case (vi) is the only one that differs from unique-* merge. However, +because of it, we must use a new definition of *(): + +Definition: By *(A), we mean we set of minimal marked ancestors of A. +"Minimal" here is used in the mathematical sense of a node in a graph +that has no descendents in that graph. + +Algorithm: Given two nodes to merge, A and B, we consider four cases: + a) value(A) = value(B): return the shared value + b) *(A) > B: return value(B) + c) *(B) > A: return value(A) + d) else: conflict; escalate to user +Where "*(A) > B" means "all elements of the set *(A) are non-strict +ancestors of the revision B". The right way to read this is as "try +(a) first, and then if that fails try (b), (c), (d) simultaneously". + +Note that except for the addition of rule (a), this is a strict +generalization of the unique-* algorithm; if *(A) and *(B) are +single-element sets, then this performs _exactly_ the same +computations as the unique-* algorithm. + +Now we can say what we mean by "was a conflict" in case (v) above: +given a -> b2, b1 -> b2, we leave b2 unmarked if and only if +*(a) > b1. + +Examples +-------- + +1. + a1* + / \ + a2 b* + +result: *(a2) = {a1}, a1 > b, so b wins. + +2. + a* + / \ + b* c* + +result: *(b) = {b}, *(c) = {c}, neither *(b) > c nor *(c) > b, so + conflict. + +3. + a* + / \ + b1* b2* + \ / \ + b3 c1* + +result: *(b3) = {b1, b2}; b2 > c1, but b1 is not > c, so c does not + win. *(c1) = {c1}, which is not > b3. conflict. +note: this demonstrates that this algorithm does _not_ do convergence. +Instead, it takes the conservative position that for one node to +silently beat another, the winning node must pre-empt _all_ the +intentions that created the losing node. While it's easy to come up +with just-so stories where this is the correct thing to do (e.g., b1 +and b2 each contain some other changes that independently require 'a' +to become 'b'; c1 will have fixed up b2's changes, but not b1's), this +doesn't actually mean much. Whether this is good or bad behavior a +somewhat unresolved question, that may ultimately be answered by which +merge algorithms turn out to be more tractable... + +4. + a* + / \ + b1* b2* + |\ /| + | X | + |/ \| + b3 c* + +result: *(b3) = {b1, b2} > c. *(c) = {c}, which is not > b3. c wins + cleanly. + +5. + a* + / \ + b1* c1* + / \ / \ + c2* X b2* + \ / \ / + c3 b3 + +result: *(c3) = {c1, c2}; c1 > b3 but c2 is not > b3, so b3 does not + win. likewise, *(b3) = {b1, b2}; b1 > c3 but b2 is not > c3, so c3 + does not win either. conflict. + +6. + a* + / \ + b1* c1* + / \ / \ + c2* X b2* + \ / \ / + c3 b3 + |\ /| + | X | + |/ \| + c4* b4* + +(this was my best effort to trigger an ambiguous clean merge with this +algorithm; it fails pitifully:) +result: *(c4) = {c4}, *(b4) = {b4}, obvious conflict. + +Math +---- + +The interesting thing about this algorithm is that all the unique-* +proofs still go through, in a generalized form. The key one that +makes *-merge tractable is: + +Theorem: In a graph marked by the above rules, given a node N, all + nodes in *(N) will have the same value as N. +Proof: By induction. We consider the cases (i)-(vi) above. (i) + through (iv) are trivially true. (v) is interesting. b2 is marked + when *(a) not > b1. b2 being marked makes that case trivial, so + suppose *(a) > b1. All elements of *(a) are marked, and are + ancestors of b1; therefore, by the definition of *() and "minimal", + they are also all ancestors of things in *(b1). Thus no element of + *(a) can be a minimal marked ancestor of b2. + (vi) is also trivial, because *(a3) = *(a1) union *(a2). QED. + +We also have to do a bit of extra work because of the sets: + +Corollary 1: If *(A) > B, and any element R of *(B) is R > A, then + value(A) = value(B). +Proof: Let such an R be given. R > A, and R marked, imply that there + is some element S of *(A) such that R > S. + On the other hand, *(A) > B implies that S > B. By similar reasoning + to the above, this means that there is some element T of *(B) such + that S > T. So, recapping, we have: + nodes: R > S > T + from: *(B) *(A) *(B) + *(B) is a set of minimal nodes, yet we have R > T and R and T both in + *(B). This implies that R = T. R > S > R implies that S = R, + because we are in a DAG. Thus + value(A) = value(S) = value(R) = value(B) + QED. + +Corollary 2: If *(A) > B and *(B) > A, then not only does value(A) = + value(B), but *(A) = *(B). +Proof: By above, each element of *(B) is equal to some element of + *(A), and vice-versa. + +This is good, because it means our algorithm is well-defined. The +only time when options (b) and (c) (in the algorithm) can +simultaneously be true, is when the two values being merged are +identical to start with. I.e., no somewhat anomalous "4th case" of +ambiguous clean merge. + +Actually, this deserves some more discussion. With *() returning a +set, there are some more subtle "partial ambiguous clean" cases to +think about -- should we be worrying about cases where some, but not +all, of the marked ancestors are pre-empted? This is possible, as in +example 5 above: + a* + / \ + b1* c1* + / \ / \ + c2* X b2* + \ / \ / + c3 b3 +A hypothetical (convergence supporting?) algorithm that said A beats B +if _any_ elements of *(A) are > B would give an ambiguous clean merge +on this case. (Maybe that wouldn't be so bad, so long as we marked +the result, but I'm in no way prepared to do any sort of sufficient +analysis right now...) + +The nastiest case of this is where *(A) > B, but some elements of *(B) +are > A -- so we silently make B win, but it's really not _quite_ +clear that's a good idea, since A also beat B sometimes -- and we're +ignoring those user's intentions. + +This is the nice thing about Corollary 1 (and why I didn't just +collapse it into Corollary 2) -- it assures us that the only time this +_weak_ form of ambiguous clean can happen is when A and B are already +identical. This _can_ happen, for what it's worth: + a* + /|\ + / | \ + / | \ + / | \ + b1* b2* d* + |\ /\ / + | \ / \/ + | X b3* + | / \ / + |/ b4 + b5 +Here *(b5) = {b3, b2}, *(b6) = {b2, b4}. If we ignore for a moment +that b4 and b5 have the same value, this is a merge that b4 would win +and b5 would lose, even though one of b4's ancestors, i.e. b1, is +pre-empted by b5. However, it can _only_ happen if we ignore that +they have the same value... + +The one other thing we proved about unique-* merge also still applies; +the proof goes through word-for-word: +Theorem: If A and B would merge cleanly with A winning, then any + descendent D of A will also merge cleanly with B, with D winning. +Proof: *(B) > A, and A > D, so *(B) > D. + +Discussion +---------- + +This algorithm resolves one of the two basic problems I observed for +unique-* merge -- coincidental clean merges are now handled, well, +cleanly, and the user model is fully implemented. However, we still +do not handle the unnamed case (you guys totally let me down when I +requested names for this case last time): + a + / \ + b* c* + \ / \ + c* d* +which still gives a conflict. We also, of course, continue to not +support more exotic features like convergence or implicit rollback. + +Not the most exciting thing in the world. OTOH, it does strictly +increase the complexity of algorithms that are tractable to formal +analysis. ============================================================ --- ss-existence-merge.text 84169b7caf7da082a6393fc1ecd4c83a16f154da +++ ss-existence-merge.text 84169b7caf7da082a6393fc1ecd4c83a16f154da @@ -0,0 +1,120 @@ +doc current die-die-die-merge for existence + http://revctrl.org/DieDieDieMerge + + implemented in roster_merge.cc roster_merge + +In a given revision, a node either exists or does not exist. + +Notation: + + exists A1 + does not exist A + +In a child revision with two parents, there are several cases: + + A1 B1 +i) \ / + C1 + + The node is merged in the child + + + A1 B2 +ii) \ / + C3 + + The node is sutured in the child + + + A1 B3 +iiia) \ / + C3 + + Node 3 was born in a suture or split in an uncommon ancestor of B, + with node 1 as a parent + + A3 B1 +iiib) \ / + C3 + + Node 3 was born in a suture or split in an uncommon ancestor of A, + with node 1 as a parent + + + A1 B +iv) \ / + C1 + + The node was born in a user add in A's uncommon ancestors + + A B1 + \ / + C1 + + The node was born in a user add in B's uncommon ancestors + + + A1 B +va) \ / + C + + The node was deleted in B's uncommon ancestors + + A B1 +vb) \ / + C + + The node was deleted in A's uncommon ancestors + + +In monotone 0.4 and earlier, cases ii and iii did not exist. + +Case ii can only happen if we support sutures as user operations; not +yet. + +In case v, deletion wins over any other change; it might be better to +have deletion conflict with any other change. But that's left for +another time. + +To process all nodes, we loop thru the union of the parent node ids, +deciding which case each node belongs in. We create nodes in the +result roster for cases i thru iv; we don't create a node for case v. +The mark-merge step only considers nodes that are in the result +roster. + +To distinguish cases iii and iv, we need to store information about +how a node was born. This can be done in the marking map, by adding +birth_cause, a pair containing an enumeral and a pair of node_ids. The +enumeral indicates add, suture, or split; if suture, the node_ids +indicate the ancestors. If split, the first node_id indicates the +ancestor. + +To distinguish case iiia from va, we search B_uncommon for a node that +is a suture or split and has 1 as an ancestor. Similarly for case iiib +and vb. + +case iii will show up twice, once for node id 1 and once for node id +3. We want to create only one node in the result roster, with node id +3 (the sutured node id). Since we handle both nodes when we encounter +either, we maintain a set 'already_handled' of already handled nodes; +if a node is in already_handled, we simply ignore it. We also record +ids 1 and 3 as the ancestors of the result node, so we can check for +conflicts in the subsequent mark-merge step. + +To handle node 1 in case iii, we search A_uncommon for a node that is +born in a suture or split with node 1 as a parent, create a node in +the result roster for the found node, and add it to already_handled. + +To handle node 3 in case iii, we check birth_cause for node 3 in the +marking map. If it is a suture, we create a node for it in the result +map. Then we add both ancestors to already_handled. This is more +efficient than the detection for node 1. + +If we process the nodes in decreasing numerical order, the sutured +node will always occur before the non-sutured node. That means we find +case iii more efficiently. + +That also simplifies checking for case v; if a node is only in A or B, +was not born in a suture, and is not in already_handled, it is case v. + +(end of file) ============================================================ --- ss-mark-merge.text d4c91b3c07e3582ff16d5993d60912cfb39114e0 +++ ss-mark-merge.text d4c91b3c07e3582ff16d5993d60912cfb39114e0 @@ -0,0 +1,811 @@ +Introduction +------------ + +This writeup follows [1], but uses a different notation for revisions, +and is expanded to include item identities, suture, and split. + +There are (at least) two criteria for a merge algorithm. It must match +user expectations, and it must have an efficient implementation. + +multi-*-merge used in monotone 0.4 and earlier meets both criteria, +except that it does not support file suturing and splitting. File +suturing is needed for resolving duplicate name conflicts in some +cases, and is a useful user operation. Splitting is needed to undo +suturing, and is also a useful user operation. + +Here we extend multi-*-merge to support suturing and splitting. We +call the result ss-*-merge. + +The main difference between multi-*-merge and ss-*-merge is the +implementation of caching the mark data. Since we will be discussing +monotone code details, we use terminology here that matches the code, +to reduce confusion. In particular, "node" has the same meaning as in +the code; it is a file or directory, which is a node in the filesystem +tree. We use "revision" or "rev" to mean a node in the revision +history graph. + +References +---------- + +[1] http://thread.gmane.org/gmane.comp.version-control.monotone.devel/4297 + Nathaniel Smith's writeup of unique-*-merge + +[2] http://article.gmane.org/gmane.comp.version-control.revctrl/93 + Nathaniel Smith's writeup of multi-*-merge; used in monotone 0.40 + and earlier + +[3] life-cycle merge description. + FIXME: write it, put reference here. + monotone currently uses die-die-die for this, but that also needs + to be enhanced for sutures and splits + +User model +---------- + +"Revisions" contain sets of "nodes". In monotone, the nodes are files +and directories. + +Each node has a set of "scalars" that can change value over the life +of the node. In monotone, the scalars are the file or directory name +(including directory path), the file contents (represented by a hash +of the actual contents), and attributes. + +Nodes (and therefore revisions) can be "changed", "branched" and +"merged". + +"Changing" means the scalars associated with a node have different +values in one child revision that is derived from one parent. + +"Branching" means the scalars associated with a node have different +values in two or more child revisions that are derived from one parent. + +"Merging" means two parents of a single child revision have different +scalars for a node. + +Sometimes the user must specify what value the child scalars should +have in a merge, but the goal is for monotone to figure out the +appropriate value without user intervention whenever possible. + +The set of revisions related by changing, branching, and merging +form a revision graph; the nodes in the graph are revisions, the +edges are the changes to node scalars. The graph is a directed +acyclic graph. + +Nodes have two life cycle events: birth and death. + +Each node in a revision has a unique identity (a number). When a +node is born, it is assigned a new identity (one that has never been +used previously). When a node dies, it is simply removed from the +child revision. + +A node birth or death can be "independent" (the user simply creates or +deletes the node), or part of a "suture" or "split". + +"suture" means two parent nodes die, a new child node is born, and the +child inherits merged values of the scalars of the parents. Sutures +are always user operations; they can occur during merge conflict +resolution, or as an independent user operation. The user may need to +specify the child scalar values, but we'd like monotone to help in +that process as much as possible. + +"split" means one parent node dies, two child nodes are born, and each +child inherits a possibly changed copy of the parent's scalars. Splits +can occur only as independent user action; they can be used to undo a +suture. The child scalar values are only changed from the parent +values if the user specifies a new value. + +Suturing and splitting directories is not currently supported, but +that's not relevant here. + +If a node's birth and death are not part of a suture or split, the +scalars associated with it are not inherited from or by other nodes. +This is the model assumed in [2]. + +Note that we are _not_ considering node existence to be a scalar that +is merged; we are just considering the effect of sutures and splits on +merging the other scalars. We assume node existence is merged using a +different merge process (see [3]). + +User expectations for scalars: + +1) Whenever a user explicitly sets the value of a scalar, they express + a claim that their setting is superior to the old setting. + +2) Whenever a user chooses to commit a new revision in which the + scalar value has not changed, they implicitly affirm the validity + of the decisions that led to the value of the scalar in that + revision's parents. + +3) A potential conflict occurs when a new revision is a merge of two + parent revisions, and the scalar values are different in the + parents. The conflict should actually occur only if the settings on + each side of the merge express parallel claims. (This is made more + precise below). + +Corollary of (1) and (2): whenever a user explicitly sets the value, +they express that they consider their new setting to be superior to +_all_ old settings. + +This is the same user model as in [2], but expanded to include the +concepts of node identity, suture, and split. + +Notation +-------- + +A revision is labeled by a capital letter (A, B). + +The identities of the nodes in a revision are indicated by a set of +integers (A1,2; B2,3). + +The value of a scalar is indicated by a lowercase letter associated +with each identity integer (A1a,2a; B2b,3c). + +Parentage is indicated by lines joining revisions. Branching is +indicated by two revisions with one parent, merging is indicated by +one revision with two parents. + +Suturing and splitting are indicated by different node ids between +parent and child nodes. + +"marking" is indicated by *; a revision is marked if a scalar changed in +that revision relative to a parent. A mark indicates that a human +expressed an intention at this revision, by 2) above. + +? indicates we are not yet sure whether the revision should be marked. + +Algorithm +--------- + +During a single execution of the mark-merge algorithm, we only +consider one scalar. In monotone, we conceptually repeat mark-merge +for each scalar and each node in the new child; this may be partly +done in parallel. + +We start by marking the graph of previous revisions. For each revision in +the graph, we either mark it, or do not. + +There are several cases. Note that the node ids play no role in this +algorithm; they are present to clarify implementation issues discussed +below. + + i) A1a* graph roots are always marked; the node is newly born + + + A1a + ii) | no change in value, so no mark + B1a + + A1a + / \ split; value not changed, so not marked + B2a C3a + + + A1a + iii) | b != a, so revision B is marked + B1b* + + A1a + / \ branch; value changed, so marked + B1b* C1c* + + A1a + / \ split; value changed, so marked + B2b* C3c* + + + A1a B1b + iv) \ / c != a, c != b so revision C is marked + C1c* + + A1a B1a + \ / c != a so revision C is marked + C1c* + + + A1a B2b + \ / suture; value changed, so marked + C3c* + + A1a B2a + \ / suture; value changed, so marked + C3c* + + + A1a B1b + v) \ / should be marked iff this was a conflict + C1b? + + A1a B2b + \ / suture; should be marked iff this was a conflict + C3b + + + A1a B1a + vi) \ / no change in value, so no mark + C1a + + A1a B2a + \ / suture; no change in value, so not marked + C3a + + +These are the same marking rules as in [2], but expanded to include +suture and split. Note that there are still only 6 cases. + + +Note that for any revision graph, the marks on existing revisions do not +change as new revisions are added; the marks may be cached for +performance. + +Definition: The "parents" operation returns the set of revisions that +contain the parent nodes of a node n in a revision A. + +Here "parent nodes" is defined by the operations "change", "branch", +"merge", "suture", and "split". If the child node is not sutured or +split, the child and parent nodes have the same identities. If the +child node is sutured or split, the child and parent identities are +different. + +Definition: The "ancestors" of a node with identity n at revision A +include all revisions that can be reached by the parents operation. + +This definition of "ancestors" is consistent with the traditional +definition in graph theory; it just makes explicit what is meant by +suture and split. + +In [2], the marks for each node identity are distinct; each revision +graph contains only one node identity. Here, because sutures and +splits link identities, the marks for identities are _not_ distinct. +Instead, the revision graph includes all identities that are included +in the ancestor revisions of the leaf revisions. Each revision has +only one mark; the mark applies to all identities in the revision. + +Definition: By *(A), we mean we set of minimal marked ancestors of A. + +"Minimal" here is used in the mathematical sense of a node in a graph +that has no descendents in that graph. + +*(A) is the "marking set" of A. + +This is the same definition as in [2], but the meaning of "ancestors" +is expanded to allow changes in identities due to suture and split. + +Algorithm: Given two revisions to merge, A and B, we consider four cases +for the scalar being merged: + + a) value(A) = value(B): return the shared value + + b) *(A) > B: return value(B) + + c) *(B) > A: return value(A) + + d) else: conflict; escalate to user + +Where "*(A) > B" means "all elements of the set *(A) are non-strict +ancestors of the revision B". + +Now we can say what we mean by "was a conflict" in case (v) above: +given A1a - C1b and B1a - C1b, we leave C unmarked if and only if +*(A) > B. + +Examples +-------- + +In each example, we have the exisiting revision graph, and are trying to +merge the two leaves. + +"wins" means "the scalar takes this revision's value in the child". + +Examples 1 - 5 are the same as in [2], but presented with our new +notation. 6 and 7 contain sutures. + +1. + A1a* + / \ + B1a C1b* + +result: *(B) = {A}, A > C, so C wins. + +2. + A1a* + / \ + B1b* C1c* + +result: *(B) = {B}, *(C) = {C}, neither B > C nor C > B, so +conflict. + +3. + A1a* + / \ + B1b* C1b* + \ / \ + D1b E1c* + +result: *(D) = {B, C}; C > E, but B is not > E, so E does not win. +*(E) = {E}, which is not > D, so D does not win. Conflict. + +note: This demonstrates that this algorithm does _not_ do convergence. +Instead, it takes the conservative position that for one revision to +silently beat another, the winning revision must pre-empt _all_ the +intentions that created the losing revision. While it's easy to come +up with just-so stories where this is the correct thing to do (e.g., B +and C each contain some other changes that independently require 'a' +to become 'b'; E will have fixed up C's changes, but not B's), this +doesn't actually mean much. Whether this is good or bad behavior a +somewhat unresolved question, that may ultimately be answered by which +merge algorithms turn out to be more tractable. + +4. + A1a* + / \ + B1b* C1b* + | \ / | + | X | + | / \ | + D1b E1c* + +result: *(D) = {B, C} > E. *(E) = {E}, which is not > D. E wins +with no conflict. + +5. + A1a* + / \ + B1b* C1c* + / \ / \ + D1c* X E1b* + \ / \ / + F1c G1b + +result: *(F) = {D, C}; C > G but D is not > G, so F does not +win. Likewise, *(G) = {B, E}; B > F but E is not > F, so F +does not win either. Conflict. + +6. "checksum.sh" example with conflict + + A1a* B2b* + / \ / \ + C1c* D3a E2d* + \ / \ / + G4c H5d + +Abe and Beth both create a new file "checksum.sh" in A and B. The +scalar in this graph is the file contents, but the file name scalar +causes a conflict which is resolved by suturing. Jim sutures A and B +in D, but Abe and Beth each make changes in C and E, and then merge +via suture in G and H. Jim is now merging G and H, with another +suture. + +At first glance, since nodes G and H have different node ids, they do +not need merging. However, they have the same file name, so the name +conflict is resolved by suturing nodes 4 and 5; that requires merging +the file content. + +result: *(G) = {B, C}; B > H, but C not > H, so H does not win. *(H) = +{A, E}; A > G, but E not > G, so G does not win. Conflict. + +7. "checksum.sh" example with clean merge + +Abe creates "checksum.sh" and emails it to Beth; Beth changes the +contents but Abe does not; the result should be Beth's changed +version. The merging pattern is the same as in 6. + + A1a* B2a* + | \ / \ + | D3a E2b* + \ / \ / + G4a H5b + +result: *(G) = {A, B} > H, so H wins. *(H) = {A, E} not > G, so G does +not win. + +Note that the user will be involved in the merge of G and H, because +nodes 4 and 5 have the same file name scalar, and resolving that +conflict requires user intervention. The point in this example is that +the merge of the file content can succeed without user intervention. + +FIXME: add split conflict and clean examples. Add split/suture and +suture/split examples. + +Math +---- + +The proofs in [1,2] are concerned only about how the scalar values in +the revision graph change from parent to child, not about how node +identities change. We have not changed anything that affects that; +sutures are just merges as far as the scalars are concerned, and +splits are just new revisions. So the proofs not repeated here. + +FIXME: repeat the proofs here, with the current notation, and give +them names/numbers so we can refer to them. + +Implementation +-------------- + +Here we are concerned with the efficiency of the implementation of +these algorithms. We noted above that the marks on a revision graph +can be cached for performance; we discuss the data structure used for +that cache, and how it is computed as new revisions are added. + +For multi-*-merge, there is a separate revision graph for each scalar +value of each node identity. Thus the marks can be stored as sets of +revision ids; a revision id is in the set if that revision is marked +for that node and that scalar. + +The algorithm needs *(A) for each revision A; we compute that as each +revision is added, and cache it in a data structure called a +"marking_map": + +struct marking_t +{ + std::set parent_name; + std::set file_content; + std::map > attrs; +}; + +typedef std::map marking_map; + +We've left out details of the real 'struct marking_t' that are not +relevant here. In particular, 'birth_revision' is used in life-cycle +merging, not in scalar merging; see [3]. + +The scalars are the name of the file (called 'parent_name' instead of +'name' for obscure historical reasons), the file content, and the set +of attributes. + +The type 'node_id' corresponds exactly to our term 'node identity'; it +is a number that is unique within the entire set of revisions in the +database. + +For a given revision, each set in marking_t stores the most recent +ancestor nodes on each parent branch in which the scalar was changed. + +A marking_map is specific to a revision; the database stores the +marking_maps with each revision. + +When we extend this to include sutures and scalars, there is no longer +a distinct revision graph for each node_id. Note that there is still a +unique revision graph for each scalar. So at first glance, the +marking_map data structure is simply not appropriate. + +However, if we allow some data duplication, we can use the same +structure, but modify how we compute it. In essence, when a node is +sutured, its initial marking map is the union of the parent node +marking maps. First, some simple examples. + +The marking map for node F in example 5, assuming the only changes +after node birth are content changes, and ignoring attributes: + +F_marking_map => + (node 1 => + (parent_name => {A}, + file_content => {C, D})) + +G_marking_map => + (node 1 => + (parent_name => {A}, + file_content => {B, E})) + +Now we walk thru the computations needed while attempting to merge F +and G. The code starts in merge.cc, interactive_merge_and_store. + +First we compute the lists of uncommon ancestors of F and G; that will +allow computing *(F) > G and *(G) > F. + +Uncommon ancestors are all revisions that are non-strict ancestors to +F or G, but not both. This is done by graph.cc get_uncommon_ancestors, +using other data cached in the database (not the marking_map). + +F_uncommon => {D, F} +G_uncommon => {E, G} + +*(B) > A states that "all marked ancestors of B are also ancestors of +A". This is equivalent to "no marked ancestor of B is in B_uncommon". +So in general, to compute *(B) > A: + + For each revision in B_marking_map, return false if the revision + is in B_uncommon. Otherwise return true. + +This is implemented in the function roster_merge.cc a_wins. + +To compute *(F) > G for parent_name: + + Search F_uncommon for F_marking_map(node 1)(parent_name); A is not + in {D, F}, so return true. + +To compute *(G) > F for parent_name: + + Search G_uncommon for G_marking_map(node 1)(parent_name); A is not + in {E, G}, so return true. + +To compute *(F) > G for file_content: + + Search F_uncommon for F_marking_map(node 1)(file_content); D is in + {D, F}, so return false. + +To compute *(G) > F for file_content: + + Search G_uncommon for G_marking_map(node 1)(file_content); E is in + {E, G}, so return false. + +Since both *(F) > G and *(G) > F return false, there is a conflict. +Assume the user resolves the conflict by changing file content to d in +revision H; we need to compute the marking_map for H. + +To compute the marking_map for a revision C, given the marking_maps +for the parent revisions A, B: + +First we check whether the scalar has changed in the child from either +parent. + +If it has not changed from either, the new marking_map is the same as +either parent; they must also be the same as each other. + +If it has changed from both, the new marking map is {C}. + +If it has changed from one parent: + + If there was a conflict, the revision that caused the conflict and + has the new scalar value is the new mark. This requires searching + the uncommon ancestors for revisions in marking_map again. + + If there was no conflict, the new marks are the same as the parent + marks, which must be the same as each other. + +In psuedocode: + + if value (A) = value (B) then + assert A_marking_map (scalar) = B_marking_map (scalar) + + return A_marking_map (scalar) + end if + + if value (C) = value (A) then + + R = revision from A_uncommon that is also in + A_marking_map(scalar). + + if R /= null then + return {R} + else + -- no conflict + assert A_marking_map (scalar) = B_marking_map (scalar) + + return A_marking_map (scalar) + end if; + + elsif value (C) = value (B) then + + R = revision from B_uncommon that is also in + B_marking_map(scalar). + + if R /= null then + return {R} + else + -- no conflict + assert A_marking_map (scalar) = B_marking_map (scalar) + + return A_marking_map (scalar) + end if; + end if + + +This is implemented in the function roster.cc mark_merged_scalar. + +Thus the marking map for H is: + +H_marking_map => + (node 1 => + (parent_name => {A}, + file_content => {H})) + +To find the complexity order for this operation, let r be the number +of revisions in the revision graph, n be the number of file nodes. + +Because the marking map is the minimal set of marked ancestors, its +size is O(1). + +The computations involved in determining whether there is a +conflict, and in updating the marking map, is O(n); the computations +do not take longer as more revisions are added. + +Storing the marking map in the database and ram requires O(nr) space. +The database size increases linearly with the number of revisions and +the number of nodes. + +Now we turn to the effect of suturing. The marking maps for example 6: + +G_marking_map => + (node 4 => + (parent_name => {A, B}, + file_content => {B, C})) + +H_marking_map => + (node 5 => + (parent_name => {A, B}, + file_content => {A, E})) + +Uncommon ancestors: + +G_uncommon => {C, G} +H_uncommon => {E, H} + +In attempting to compute *(G) > H for any scalar, we first have to +deal with the fact that we have different node ids. We have been told +by the user to suture nodes 4 and 5 into 6. This is recorded in the +'ancestors' component of the 'node' data type (see roster.hh) for the +new node 6; it contains the set {4, 5}. So we refine the +implementation of *(B) > A for node n: + + For each node m in ancestors(n): + + For each revision in B_marking_map(node m)(scalar)), return + false if the revision is in B_uncommon. + + Otherwise return true. + +G parent_name = H parent_name, so we don't need to compute *(G) > H or +*(H) > G for parent_name. + +*(G) > H for file_content: + + Search G_uncommon for G_marking_map(nodes 4, 5)(file_content); {B, + C} is in {C, G}, so return false. + +*(H) > G for file_content: + + Search H_uncommon for H_marking_map(nodes 4, 5)(file_content); {A, + E} is in {E, H}, so return false. + +Assume the user resolves the conflict by changing file content to d in +revision I. + +The algorithm for computing the new marking_map must also be extended +to handle ancestor nodes different from child nodes. If the nodes are +actually different, the parent marking maps will not be the same even +if the values are the same. + +To compute the marking_map for a node n in revision C, given the marking_maps +for ancestor nodes l, m in the parent revisions A, B: + + if value (A) = value (B) then + if n = l = m then + assert A_marking_map(node l)(scalar) = + B_marking_map(node m)(scalar) + end if + + return erase_ancestors (union (A_marking_map(node l)(scalar), B_marking_map(node l)(scalar))) + end if + + if value (C) = value (A) then + + R = revision from A_uncommon that is also in + A_marking_map(node l)(scalar). + + if R /= null then + return {R} + else + -- no conflict + assert, return union (as above) + end if; + + elsif value (C) = value (B) then + + R = revision from B_uncommon that is also in + B_marking_map(node m)(scalar). + + if R /= null then + return {R} + else + -- no conflict + assert, return union (as above) + end if; + end if + +I_marking_map => + (node 6 => + (parent_name => (A, B), + file_content => {E})) + + +Applying these algorithms to example 7, starting with the first merge +to D: + +A_marking_map => + (node 1 => + (parent_name => {A}, + file_content => {A})) + +B_marking_map => + (node 2 => + (parent_name => {B}, + file_content => {B})) + +Compute merge: + +For parent_name: +value(A) = value(B) = "checksum.sh", but node ids conflict, so ask +user; they say 'suture 1,2->3 with value(D) = "checksum.sh"' + +For file_content: +value(A) = value(B) = a, so value(D) = a + +merged node = D3a + +Compute marking map: + +For parent_name: +value(A) = value(B) => union ({A}, {B}) + +For file_content: +value(A) = value(B) => union ({A}, {B}) + +D_marking_map => + (node 3 => + (parent_name => {A, B}, + file_content => {A, B})) + +Computing the marking map for a non-merged node is simple (left as an +exercise for the reader :): + +E_marking_map => + (node 2 => + (parent_name => {B}, + file_content => {E})) + +Merging A, D to G: + +Compute merge: + +For parent_name: +value(A) = value(D) = "checksum.sh", but node ids conflict, so ask +user; they say 'suture 1,3->4 with value(G) = "checksum.sh"' + +For file_content: +value(A) = value(D) = a, so value(G) = a + +merged node = G4a + +Compute marking_map: + +For parent_name: +value(A) = value(D) => union ({A}, {A, B}) + +For file_content: +value(A) = value(D) => union ({A}, {A, B}) + +G_marking_map => + (node 4 => + (parent_name => {A, B} + file_content => {A, B})) + +Merging D, E to H: + +compute merge: + +For parent_name: +value(D) = value(E) = "checksum.sh", but node ids conflict, so ask +user; they say 'suture 2,3->5 with value(H) = "checksum.sh"' + +For file_content: +value(D) /= value(E); need to search uncommon +D_uncommon => {D} +E_uncommon => {E} + +*(D) > E => true +*(E) > D => false + +E wins; result node is H5b + +compute marking map: + +For parent_name: +value(D) = value(E) = "checksum.sh" => return union + +For file_content: +value(D) /= value(E), value(H) = value(E) + so search E_uncommon for D_marking_map; none found => return union + +G_marking_map => + (node 4 => + (parent_name => {A, B} + file_content => {A, E})) ============================================================ --- unique-mark-merge.text 089f28b2d9fb1061907d65930ad43a617119df09 +++ unique-mark-merge.text 089f28b2d9fb1061907d65930ad43a617119df09 @@ -0,0 +1,360 @@ +http://thread.gmane.org/gmane.comp.version-control.monotone.devel/4297 + +I set myself a toy problem a few days ago: is there a really, truly, +right way to merge two heads of an arbitrary DAG, when the object +being merged is as simple as possible: a single scalar value? + +I assume that I'm given a graph, and each node in the graph has a +value, and no other annotation; I can add annotations, but they have +to be derived from the values and topology. Oh, and I assume that no +revision has more than 2 parents; probably things can be generalized +to the case of indegree 3 or higher, but it seems like a reasonable +restriction... + +So, anyway, here's what I came up with. Perhaps you all can tell me +if it makes sense. + +User model +---------- + +Since the goal was to be "really, truly, right", I had to figure out +what exactly that meant... basically, what I'm calling a "user model" +-- a formal definition of how the user thinks about merging, to give +an operational definition of "should conflict" and "should clean +merge". My rules are these: + 1) whenever a user explicitly sets the value, they express a claim + that their setting is superior to the old setting + 2) whenever a user chooses to commit a new revision, they implicitly + affirm the validity of the decisions that led to that revision's + parents + Corollary of (1) and (2): whenever a user explicitly sets the + value, they express that they consider their new setting to be + superior to _all_ old settings + 3) A "conflict" should occur if, and only if, the settings on each + side of the merge express parallel claims. +This in itself is not an algorithm, or anything close to it; the hope +is that it's a good description of what people actually want out of a +merge algorithm, expressed clearly enough that we can create an +algorithm that fits these desiderata. + +Algorithm +--------- + +I'll use slightly novel notation. Lower case letters represent values +that scalar the scalar takes. Upper case letters represent nodes in +the graph. + +Now, here's an algorithm, that is supposed to just be a transcription +of the above rules, one step more formal: + First, we need to know where users actively expressed an intention. + Intention is defined by (1), above. We use * to mark where this + occurred: + + i) a* graph roots are always marked + + a + ii) | no mark, value was not set + a + + a + iii) | b != a, so b node marked + b* + + a b + iv) \ / + c* + c is totally new, so marked + a a + \ / + c* + + a b we're marking places where users expressed + v) \ / intention; so b should be marked iff this + b? was a conflict (!) + + a a for now I'm not special-casing the coincidental + vi) \ / clean merge case, so let's consider this to be + a? a subclass of (v). + + That's all the cases possible. So, suppose we go through and + annotate our graph with *s, using the above rules; we have a graph + with some *s peppered through it, each * representing one point that + a user took action. + + Now, a merge algorithm per se: Let's use *(A) to mean the unique + nearest marked ancestor of node A. Suppose we want to merge A and + B. There are exactly 3 cases: + - *(A) is an ancestor of B, but not vice versa: B wins. + - *(B) is an ancestor of A, but not vice versa: A wins. + - *(A) is _not_ an ancestor of B, and vice versa: conflict, + escalate to user + Very intuitive, right? If B supercedes the intention that led to A, + then B should win, and vice-versa; if not, the user has expressed + two conflicting intentions, and that, by definition, is a conflict. + + This lets us clarify what we mean by "was a conflict" in case (v) + above. When we have a merge of a and b that gives b, we simple + calculate *(a); if it is an ancestor of 'b', then we're done, but if + it isn't, then we mark the merge node. (Subtle point: this is + actually not _quite_ the same as detecting whether merging 'a' and + 'b' would have given a conflict; if we somehow managed to get a + point in the graph that would have clean merged to 'a', but in fact + was merged to 'b', then this algorithm will still mark the merge + node.) For cases where the two parents differ, you have to do this + using the losing one; for cases where the two parents are the same, + you should check both, because it could have been a clean merge two + different ways. If *(a1) = *(a2), i.e., both sides have the same + nearest marked ancestor, consider that a clean merge. + + That's all. + +Examples +-------- + +Of course, I haven't shown you this is well-defined or anything, but +to draw out the suspense a little, have some worked examples (like +most places in this document, I draw graphs with two leaves and assume +that those are being merged): + + graph: + a* + / \ + a b* + result: *(a) is an ancestor of b, but *(b) is not an ancestor of a; + clean merge with result 'b'. + + graph: + a* + / \ + b* c* + result: *(b) = b is not an ancestor of c, and *(c) = c is not an + ancestor of c; conflict. + + graph: + a* + / \ + b* c* <--- these are both marked, by (iii) + |\ /| + | X | + |/ \| + b* c* <--- which means these were conflicts, and thus marked + result: the two leaves are both marked, and thus generate a conflict, + as above. + +Right, enough of that. Math time. + +Math +---- + +Theorem: In a graph marked following the above rules, every node N + will have a unique least marked ancestor M, and the values of M and N + will be the same. +Proof: By downwards induction on the graph structure. The base case + are graph roots, which by (i) are always marked, so the statement is + trivially true. Proceeding by cases, (iii) and (iv) are trivially + true, since they produce nodes that are themselves marked. (ii) is + almost as simple; in a graph 'a' -> 'a', the child obviously + inherits the parent's unique least marked ancestor, which by + inductive hypothesis exists. The interesting case is (v) and (vi): + a b + \ / + b + If the child is marked, then again the statement is trivial; so + suppose it is not. By definition, this only occurs when *(a) is an + ancestor of 'b'. But, by assumption, 'b' has a unique nearest + ancestor, whose value is 'b'. Therefore, *(a) is also an ancestor + of *(b). If we're in the weird edge case (vi) where a = b, then + these may be the same ancestor, which is fine. Otherwise, the fact + that a != b, and that *(a)'s value = a's value, *(b)'s value = b's + value, implies that *(a) is a strict ancestor of *(b). Either way, + the child has a unique least marked ancestor, and it is the same + ULMA as its same-valued parent, so the ULMA also has the right + value. QED. + +Corollary: *(N) is a well-defined function. + +Corollary: The three cases mentioned in the merge algorithm are the + only possible cases. In particular, it cannot be that *(A) is an + ancestor of B and *(B) is an ancestor of A simultaneously, unless + the two values being merged are identical (and why are you running + your merge algorithm then?). Or in other words: ambiguous clean + merge does not exist. +Proof: Suppose *(A) is an ancestor of B, and *(B) is an ancestor of A. + *(B) is unique, so *(A) must also be an ancestor of *(B). + Similarly, *(B) must be an ancestor of *(A). Therefore: + *(A) = *(B) + We also have: + value(*(A)) = value(A) + value(*(B)) = value(B) + which implies + value(A) = value(B). QED. + +Therefore, the above algorithm is well-defined in all possible cases. + +We can prove another somewhat interesting fact: +Theorem: If A and B would merge cleanly with A winning, then any + descendent D of A will also merge cleanly with B, with D winning. +Proof: *(B) is an ancestor of A, and A is an ancestor of D, so *(B) is + an ancestor of D. + +I suspect that this is enough to show that clean merges are order +invariant, but I don't have a proof together ATM. + +Not sure what other properties would be interesting to prove; any +suggestions? It'd be nice to have some sort of proof about "once a +conflict is resolved, you don't have to resolve it again" -- which is +the problem that makes ambiguous clean merge so bad -- but I'm not +sure how to state such a property formally. Something about it being +possible to fully converge a graph by resolving a finite number of +conflicts or something, perhaps? + +Funky cases +----------- + +There are two funky cases I know of. + +Coincidental clean merge: + | + a + / \ + b* b* + +Two people independently made the same change. When we're talking +about textual changes, some people argue this should give a conflict +(reasoning that perhaps the same line _should_ be inserted twice). In +our context that argument doesn't even apply, because these are just +scalars; so obviously this should be a clean merge. Currently, the +only way this algorithm has to handle this is to treat it as an +"automatically resolved conflict" -- there's a real conflict here, but +the VCS, acting as an agent for the user, may decide to just go ahead +and resolve it, because it knows perfectly well what the user will do. +In this interpretation, everything works fine, all the above stuff +applies; it's somewhat dissatisfying, though, because it's a violation +of the user model -- the user has not necessarily looked at this +merge, but we put the * of user-assertion on the result anyway. Not a +show-stopper, I guess... + +It's quite possible that the above stuff could be generalized to allow +non-unique least marked ancestors, that could only arise in exactly +this case. + +I'm not actually sure what the right semantics would be, though. If +we're merging: + | + a + / \ + b b + \ / \ + b c +Should that be a clean merge? 'b' was set twice, and only one of +these settings was overridden; is that good enough? + +Do you still have the same opinion if the graph is: + | + a + | + b + / \ + c b + | / \ + b b c + \ / + b +? Here the reason for the second setting of 'b' was that a change +away from it was reverted; to make it extra cringe-inducing, I threw +in that change being reverted was another change to 'c'... (this may +just be an example of how any merge algorithm has some particular case +you can construct where it will get something wrong, because it +doesn't _actually_ know how to read the users's minds). + +Supporting these cases may irresistably lead back to ambiguous clean, +as well: + | + a + / \ + b* c* + / \ / \ + c* X b* + \ / \ / + c b + +The other funky case is this thing (any clever name suggestions?): + a + / \ + b* c* + \ / \ + c* d* +Merging here will give a conflict, with my algorithm; 3-way merge +would resolve it cleanly. Polling people on #monotone and #revctrl, +the consensus seems to be that they agree with 3-way merge, but giving +a conflict is really not _that_ bad. (It also seems to cause some +funky effects with darcs-merge; see zooko's comments on #revctrl and +darcs-users.) + +This is really a problem with the user model, rather than the +algorithm. Apparently people do not interpret the act of resolving +the b/c merge to be "setting" the result; They seem to interpret it as +"selecting" the result of 'c'; the 'c' in the result is in some sense +the "same" 'c' as in the parent. The difference between "setting" and +"selecting" is the universe of possible options; if you see + a b + \ / + c +then you figure that the person doing the merge was picking from all +possible resolution values; when you see + a b + \ / + b +you figure that the user was just picking between the two options +given by the parents. My user model is too simple to take this into +account. It's not a huge extension to the model to do so; it's quite +possible that an algorithm could be devised that gave a clean merge +here, perhaps by separately tracking each node's nearest marked +ancestor and the original source of its value as two separate things. + +Relation to other work +---------------------- + +This algorithm is very close to the traditional codeville-merge +approach to this problem; the primary algorithmic difference is the +marking of conflict resolutions as being "changes". The more +important new stuff here, I think, are the user model and the proofs. + +Traditionally, merge algorithms are evaluated by coming up with some +set of examples, eyeballing them to make some guess as to what the +"correct" answer was, comparing that to the algorithm's output, and +then arguing with people whose intuitions were different. +Fundamentally, merging is about deterministically guessing the user's +intent in situations where the user has not expressed any intent. +Humans are very good at guessing intent; we have big chunks of squishy +hardware designed to form sophisticated models of others intents, and +it's completely impossible for a VCS to try and duplicate that in +full. My suggestion here, with my "user model", is to seriously and +explicitly study this part of the problem. There are complicated +trade-offs between accuracy (correctly modeling intention), +conservatism (avoiding incorrectly modeling intention), and +implementability (describing the user's thought processes exactly +isn't so useful if you can't apply it in practice). It's hard to make +an informed judgement when we don't have a name for the thing we're +trying to optimize, and hard to evaluate an algorithm when we can't +even say what it's supposed to be doing. + +I suspect the benefit of the proofs is obvious to anyone who has spent +much time banging their head against this problem; until a few days +ago I was skeptical there _was_ a way to design a merge algorithm that +didn't run into problems like ambiguous clean merge. + +I'm still skeptical, of course, until people read this; merging is +like crypto, you can't trust anything until everyone's tried to break +it... so let's say I'm cautiously optimistic :-). If this holds up, +I'm quite happy; between the user model and the proofs, I'm far more +confident that this does something sensible in all cases and has no +lurking edge cases than I have been in any previous algorithm. The +few problem cases I know of display a pleasing conservatism -- perhaps +more cautious than they need to be, but even if they do cause an +occasional unnecessary conflict, once the conflict is resolved it +should stay resolved. + +So... do your worst! + +-- Nathaniel ============================================================ --- cmd_merging.cc 63466000d5fbbbaf5a193275602052ff9d464774 +++ cmd_merging.cc 7a441305bb48f878eee5d5febaad2e99d781b212 @@ -164,8 +164,8 @@ CMD(update, "update", "", CMD_REF(worksp N(parents.size() == 1, F("this command can only be used in a single-parent workspace")); - revision_id old_rid = parent_id(parents.begin()); - N(!null_id(old_rid), + revision_id base_rid = parent_id(parents.begin()); + N(!null_id(base_rid), F("this workspace is a new project; cannot update")); // Figure out where we're going @@ -177,7 +177,7 @@ CMD(update, "update", "", CMD_REF(worksp { P(F("updating along branch '%s'") % app.opts.branchname); set candidates; - pick_update_candidates(app.lua, project, candidates, old_rid, + pick_update_candidates(app.lua, project, candidates, base_rid, app.opts.branchname, app.opts.ignore_suspend_certs); N(!candidates.empty(), @@ -209,9 +209,9 @@ CMD(update, "update", "", CMD_REF(worksp notify_if_multiple_heads(project, app.opts.branchname, app.opts.ignore_suspend_certs); - if (old_rid == chosen_rid) + if (base_rid == chosen_rid) { - P(F("already up to date at %s") % old_rid); + P(F("already up to date at %s") % base_rid); // do still switch the workspace branch, in case they have used // update to switch branches. work.set_ws_options(app.opts, true); @@ -228,60 +228,99 @@ CMD(update, "update", "", CMD_REF(worksp // Okay, we have a target, we have a branch, let's do this merge! - // We have: - // - // old --> working - // | | - // V V - // chosen --> merged - // - // - old is the revision specified in _MTN/revision - // - working is based on old and includes the workspace's changes - // - chosen is the revision we're updating to and will end up in _MTN/revision - // - merged is the merge of working and chosen, that will become the new - // workspace - // - // we apply the working to merged cset to the workspace - // and write the cset from chosen to merged changeset in _MTN/work + // base_ and working_roster are shared_ptr for use with wca below. + roster_t_cp base_roster = parent_cached_roster(parents.begin()).first; + MM(*base_roster); + revision_id working_rid; + shared_ptr working_roster = shared_ptr(new roster_t()); + marking_map working_markings; + MM(*working_roster); + MM(working_markings); + temp_node_id_source nis; + work.get_current_roster_shape(db, nis, *working_roster); + work.update_current_roster_from_filesystem(*working_roster); - // Get the OLD and WORKING rosters - roster_t_cp old_roster - = parent_cached_roster(parents.begin()).first; - MM(*old_roster); + roster_t chosen_roster; + marking_map chosen_markings; + MM(chosen_roster); + MM(chosen_markings); - shared_ptr working_roster = shared_ptr(new roster_t()); + db.get_roster(chosen_rid, chosen_roster, chosen_markings); - MM(*working_roster); - work.get_current_roster_shape(db, nis, *working_roster); - work.update_current_roster_from_filesystem(*working_roster); + roster_merge_result result; - revision_t working_rev; - revision_id working_rid; - make_revision_for_workspace(parents, *working_roster, working_rev); - calculate_ident(working_rev, working_rid); + // If we are not switching branches, we treat the workspace as a revision + // that has not yet been committed, and do a normal merge. This supports + // sutures and splits. + // + // If we are switching branches, we assume the user wants to apply their + // local changes to the selected revision in the new branch. + if (!switched_branch) + { + // we apply the base to working cset to base_marking to obtain an up-to-date + // marking map, then merge working with chosen. - // Get the CHOSEN roster - roster_t chosen_roster; MM(chosen_roster); - db.get_roster(chosen_rid, chosen_roster); + // We need a working_rid to put in the marking map, and the caller + // probably needs it later use in conflict resolution. But it doesn't have + // to be real in any sense, because this particular revision will never be + // committed to the database, so make one up. + working_rid = revision_id("00000000000000000042"); + make_working_markings(db, nis, base_rid, working_rid, *working_roster, working_markings); - // And finally do the merge - roster_merge_result result; - marking_map left_markings, right_markings; - three_way_merge(old_rid, *old_roster, - working_rid, *working_roster, - chosen_rid, chosen_roster, - result, left_markings, right_markings); + // working is an immediate child of base, so working_uncommon_ancestors = + // base_uncommon_ancestors + working. + set chosen_uncommon_ancestors, working_uncommon_ancestors; + db.get_uncommon_ancestors(chosen_rid, base_rid, + chosen_uncommon_ancestors, working_uncommon_ancestors); + safe_insert(working_uncommon_ancestors, working_rid); + roster_merge(*working_roster, working_markings, working_uncommon_ancestors, + chosen_roster, chosen_markings, chosen_uncommon_ancestors, + result); + } + else + { + // Switching branches. This doesn't directly apply the cset + // base->working to chosen, but it is the algorithm used by monotone + // before suture/split was implemented, so we keep it. + + // We have: + // + // base --> working + // | | + // V V + // chosen --> merged + // + // - base is the revision specified in _MTN/revision + // - working is based on base and includes the workspace's changes + // - chosen is the revision we're updating to and will end up in _MTN/revision + // - merged is the merge of working and chosen, that will become the new + // workspace + + // Get the BASE and WORKING rosters + + revision_t working_rev; + make_revision_for_workspace(parents, *working_roster, working_rev); + calculate_ident(working_rev, working_rid); + + // And finally do the merge + three_way_merge(base_rid, *base_roster, + working_rid, *working_roster, + chosen_rid, chosen_roster, + result, working_markings, chosen_markings); + + } + roster_t & merged_roster = result.roster; map paths; get_content_paths(*working_roster, paths); - content_merge_workspace_adaptor wca(db, old_rid, old_roster, - left_markings, right_markings, paths); + content_merge_workspace_adaptor wca(db, base_rid, base_roster, + chosen_markings, working_markings, paths); wca.cache_roster(working_rid, working_roster); resolve_merge_conflicts(app.lua, *working_roster, chosen_roster, result, wca, false); ============================================================ --- roster.cc 9989ba61bf9d98cdc3551daaa3666cd37a752695 +++ roster.cc a67920123605b12886b28cbd4c097f60a38f7b43 @@ -2245,6 +2245,32 @@ make_roster_for_revision(database & db, } +void +make_working_markings(database & db, + temp_node_id_source & nis, + revision_id const base_rid, + revision_id const working_rid, + roster_t const & working_roster, + marking_map & working_markings) +{ + roster_t base_roster; + marking_map base_markings; + + db.get_roster(base_rid, base_roster, base_markings); + + cset base_to_work; + make_cset(base_roster, working_roster, base_to_work); + + // We need to apply the cset to base_markings to get working_markings; + // base_to_work.apply does that. It also recomputes temp_roster; could + // save time by not doing that. + roster_t temp_roster = base_roster; + working_markings = base_markings; + editable_roster_for_nonmerge editable_working_roster(temp_roster, nis, working_rid, working_markings); + base_to_work.apply_to(editable_working_roster); +} + + //////////////////////////////////////////////////////////////////// // Calculation of a cset //////////////////////////////////////////////////////////////////// ============================================================ --- roster.hh e752044dc5fb4183c3118daa8c8da40ef86ee3c7 +++ roster.hh 897cd92c82cf0a4a9506428bafe3ca94f959e747 @@ -407,7 +407,17 @@ make_roster_for_revision(database & db, roster_t & result, marking_map & marking); +// Compute working_markings by applying the changeset from base to working +// to base markings void +make_working_markings(database & db, + temp_node_id_source & nis, + revision_id const base_rid, + revision_id const working_rid, + roster_t const & working_roster, + marking_map & working_markings); + +void read_roster_and_marking(roster_data const & dat, roster_t & ros, marking_map & mm); ============================================================ --- roster_merge.cc 4fdb63c868ad20c20d95cde69bddc987316eac0d +++ roster_merge.cc 7c9664a62bd7bb84afce479ffcdfbe4ec82aa015 @@ -2321,7 +2321,7 @@ roster_merge(roster_t const & left_paren I(new_i->first == i.right_key()); I(right_mi->first == i.right_key()); - merge_nodes(left_i->second, // left_n + merge_nodes(left_n, left_mi->second, // left_marking left_uncommon_ancestors, i.right_data(), // right_n ============================================================ --- tests/resolve_duplicate_name_conflict/__driver__.lua baf6a71e1f54cdf070340d504d6c9c330db0aa8e +++ tests/resolve_duplicate_name_conflict/__driver__.lua 3da589c23ef5c0e996b267d74be712fc6c2176a2 @@ -116,7 +116,6 @@ check(mtn("update"), 0, nil, true) remove ("thermostat-honeywell.c") remove ("thermostat-westinghouse.c") check(mtn("update"), 0, nil, true) --- FIXME: this warns about changes in checkout.sh being lost, even though they aren't; it doesn't understand suture canonicalize("stderr") get ("expected-update-messages-jim_1") check(samefile("expected-update-messages-jim_1", "stderr")) ============================================================ --- tests/resolve_duplicate_name_conflict/expected-update-messages-jim_1 79eb4985a0fdab8524e19de458835db8b9dcf51c +++ tests/resolve_duplicate_name_conflict/expected-update-messages-jim_1 d42cacc7d4725d62070fa6f34bc0189c2da4d1cd @@ -1,11 +1,5 @@ mtn: selected update target 257729bebdb3 mtn: updating along branch 'testbranch' mtn: selected update target 257729bebdb32819cd1fc059806e0fb4144f7ec7 -mtn: [left] 30065575a95fc60f8deb84b15d9b8c8b3944d37c -mtn: [right] 257729bebdb32819cd1fc059806e0fb4144f7ec7 -mtn: warning: Content changes to the file 'checkout.sh' -mtn: warning: will be ignored during this merge as the file has been -mtn: warning: removed on one side of the merge. Affected revisions include: -mtn: warning: Revision: 30065575a95fc60f8deb84b15d9b8c8b3944d37c mtn: adding checkout.sh mtn: renaming thermostat.c to thermostat-honeywell.c mtn: adding thermostat-westinghouse.c