# # # patch "NEWS" # from [7d6bb127f7ecd2dc50c01b9ecbf5a4d85841b0c2] # to [4102d7e399b0d44fa250e7110072f5dd049a903d] # # patch "UPGRADE" # from [b7e014bfa58db309cb29c06816070168e602fd14] # to [ef451e84d9d3c847f9d1fde2f470a9d20cdd19a9] # # patch "docs/Adding-Files.html" # from [5062910011a1a0943fdb10cb7d213251f97d9a86] # to [cc6c8724c9700e78116d914e8e9f39b414197aee] # # patch "docs/Additional-Lua-Functions.html" # from [846e1a5b8af06f43d8d8356df8ec75a7e91a6972] # to [e8f257471f9a6518560cba09bc3d3ab2a6143ba0] # # patch "docs/Advanced-Uses.html" # from [8321cf86cedd263616c8d270eec3749a75d7502a] # to [a5a5438f8a86bcbcdc677673000a05cfa29a5190] # # patch "docs/Automation.html" # from [356f171884468556f00bb7a1cf101d1a52e8d86c] # to [35bed8de0799c40422cc86ce6784e00e82d1667c] # # patch "docs/Basic-Network-Service.html" # from [085ca4c633152c1a4cda9919bc3632914b9f116b] # to [d9920baa4b979ac9355e1b376dd8f90cec9849b3] # # patch "docs/Branches.html" # from [6b3681a4cc9adbe57242405012580da70ce56ecf] # to [478cd3e4a9b9ff7ed91a3d32d88aaa05050e030b] # # patch "docs/Branching-and-Merging.html" # from [2f0bd679bd2111d317fc4bbea458a889d1e655c2] # to [21593f8879fda5188a67a83e55e4d055facf8361] # # patch "docs/CVS-Phrasebook.html" # from [e790524d54a2332bbf14e04fed7676ba6df56147] # to [f4e4781803808facf1795a2441e1fd6fff24918b] # # patch "docs/Certificate.html" # from [2565b478678bf30855d226b656a5abe73791ff6a] # to [f3837399865ff9faf2a0754b26cdd4f0dde955f2] # # patch "docs/Certificates.html" # from [051bd77b92a8d609761bba28d6f90f23a7d53811] # to [2cc0e4f319ed862ed12559a54f37658416a935d5] # # patch "docs/Command-Reference.html" # from [8be324c63b1adb3f71bb1ad0f96788af623c3a5c] # to [ac73891fbac9546afc4fcfdd53eaeca93f7f6c8b] # # patch "docs/Committing-Work.html" # from [0ac9fe948d6d6765c0b282198521f0d63122ae45] # to [e716cb842e741e8b617176249db01cbd2668f109] # # patch "docs/Concepts.html" # from [2b0ec10dfd1500b3852c2f97cf82bd2d940abed5] # to [0a0a28e84c6b2364c55a81596080a66438ee0498] # # patch "docs/Creating-a-Database.html" # from [4390285dd70f86c67db73ae6dc212d7324686222] # to [dd4d71928c1cfcd0bae494cf2f671ec5e14b7f10] # # patch "docs/Database.html" # from [c6e7c8ae101dfccc96542336f0696e5e03d4480c] # to [62ae58d646e0da9a8af14aa776c815c920d617ad] # # patch "docs/Dealing-with-a-Fork.html" # from [b677ba8ed57f4c2196ab5ba9c2d1144e022061ff] # to [f0d020a80d566fcce0e969526947d9bb04e11495] # # patch "docs/Default-hooks.html" # from [a3abe4ba183ea064970139d1e081b21c499f3d74] # to [426d6f8d291d2c1b66af91f7fc541cb4322b4c71] # # patch "docs/File-Attributes.html" # from [37e478866d1245c4b1c09cb8fdf0d64c1c1b94f0] # to [07a04b4dc5dd34222afc29ac756a3bee696cfe0c] # # patch "docs/Forks-and-merges.html" # from [caa0489f7797842f20be40b7f998afa5f84aeeb6] # to [1e26924b4bd9845021a4e3ad24c066a4b4caedee] # # patch "docs/General-Index.html" # from [d9860f42e76b6146e214ff8bd608285a2ba6f70a] # to [4935e1b875af4cd6dcc2d04dfd692887c69e4c0a] # # patch "docs/Generating-Keys.html" # from [f97c0998cf7d223ef4e2271f40b37b838a254bda] # to [7e8c77afdbf077663dbcee8f85126bf06e103734] # # patch "docs/Hash-Integrity.html" # from [3f9e7068d10c10ce601d14394277e31b9b983d4d] # to [e330839ef84c2902639d3e151c93338471aec15b] # # patch "docs/Historical-records.html" # from [90b70a4b56a763a70d864da898d6667b4c1826ba] # to [9937fdcabdc36c64c8e3e64a99de31dd3e4be343] # # patch "docs/Hook-Reference.html" # from [7a412358366c360d81a2e428b6b824cccaecb272] # to [ac7f505461c97439c2534cee928cf54168ef632b] # # patch "docs/Hooks.html" # from [673c5a499a3c72d3ef132f2470ae863c62d57ea4] # to [d32c0db23daf71b0884b9465b74fcd30f8e819de] # # patch "docs/Importing-from-CVS.html" # from [34ea9d99420562ab5f0a435386597101c3b6d98c] # to [f52c8fd7dd6784f50c00c456c74d6589a72ab15f] # # patch "docs/Informative.html" # from [ffe3b1df0339c4769f834001b639319d04ab2ea4] # to [52874d9861dc1742a759be4a260204be0cab01e9] # # patch "docs/Inodeprints.html" # from [00c8414b0b4d264a564ec486ca65a8e7dc48229d] # to [4e462bbef7cfa65afde05c0e51771a6c831ec672] # # patch "docs/Internationalization.html" # from [f743ea0d87237b4a0ba08b8e1eff8c2eda4b11e1] # to [20915ed9d3412b0f53b5a5b6c7e3c28f8586fd94] # # patch "docs/Key-and-Cert-Trust.html" # from [87061cac1bac7fc663f3b53fa7d2635f072e8083] # to [9a87d94eb16d0d44085edd2bd8339648e42aaa5b] # # patch "docs/Making-Changes.html" # from [7155a6d3952b2a8ef8a2eafa9549cbfd60ab89fb] # to [2a35cd7076ea9d81efcffddae77163d325fcff30] # # patch "docs/Mark_002dMerge.html" # from [7b95b1f2f7b6c7414d5523663e761de124a09529] # to [b392558ccae556635ed788648db245b9071d145a] # # patch "docs/Merging.html" # from [6e1b1cffbbfab2076cfe97d595dc3316b1ed4658] # to [a29585476ae412228693aadcdc3a54985e0b655d] # # patch "docs/Migrating-and-Dumping.html" # from [31cf38fd784584da6f9ea3ce119b8e8caffecc23] # to [10a5a0961bd67d70eb3ac97caf148bd39db6e06e] # # patch "docs/Naming-Conventions.html" # from [bf61294982536b75cc19ea1ae6aee8d5f0371c31] # to [e9dec1894d0cbd227f61f8500aff5f9934c6c784] # # patch "docs/Network-Service-Revisited.html" # from [2e8694d09a1dd96b0bf8d23b88887c651db4790d] # to [30ce6318069e4e82d3f49d573e9b10e964e232cf] # # patch "docs/Network.html" # from [ea1beb7ca4c48d2ccae750e1a01baa1143785849] # to [9b68ce1aadf777750f366aa0a356dd354c5d96ec] # # patch "docs/Other-Transports.html" # from [12d427aa02fa9c04751b31f614488da0dff04d51] # to [7e68d9cd7cf332f1193ece40eac4ff845325f176] # # patch "docs/Packet-I_002fO.html" # from [44d9a934e80c37a1bdd826fd86f9b5218563833f] # to [9737030371eae74dbe9ce2ce837de5d101fbd2fe] # # patch "docs/Quality-Assurance.html" # from [2ba8ffdc2456b9a53ca6bebe46aa1b956546433d] # to [a2c5b1325ef9d5176db2908061afa117f9239b14] # # patch "docs/RCS.html" # from [8e283147bdb9cd6822a2b262d73fef95d0ebf94b] # to [b0bb87a5300ed953a29c02f00b3534a85d43e6f1] # # patch "docs/Rebuilding-ancestry.html" # from [6424f9076b7d8ea298d9d616b003948e024624f4] # to [5b01226f5cc648bf08925e6f00b5fe84cf91014a] # # patch "docs/Regexp-Details.html" # from [292e1d632b1d603a8f6bdf8cf165954653b40511] # to [ea50c55e16d4127dd65b96ddb23b544aa956a032] # # patch "docs/Regexp-Summary.html" # from [3bd9b164c47cfebf0b14099e9ca05f3be4d2cb11] # to [cb5173340dc7d085bb9b31fee5beb7af838d7b23] # # patch "docs/Regexps.html" # from [815bd739ce0d1f2d8d11a5e170d0802c305147e5] # to [9d0e0674c0c926ccfe022142b805d810662a9284] # # patch "docs/Reserved-Certs.html" # from [d6a34981e320fd16d5a567b0dc2d5b3970490790] # to [d86b0c542ecf0fe616cfded24d4333b3c0dea012] # # patch "docs/Reserved-Files.html" # from [4b48d549c9e557f0a6de1f98adfba40bcc04baea] # to [dc28c3e5da252e42e708ade445183f430694b199] # # patch "docs/Restrictions.html" # from [452d413cdd9ca3b1fb4c7351d42a92e41a2588c4] # to [6e8d7a166765607b49f7724e5a1fbf22938bb4e7] # # patch "docs/Scripting.html" # from [a69b560f26880e3c1143a52f1c6e07aaf8b5f250] # to [30bea23402414b430800375915ca1647e5e9a5e0] # # patch "docs/Selectors.html" # from [e9adcb321fca68051ebe37e1f1d18894e0db656b] # to [27ff2f3960012335a26dfe8f355773418e1bb9ac] # # patch "docs/Special-Topics.html" # from [d54cb05bd92502bfe2a5c20a63e9c143f3709b36] # to [9705f227d829fe7eb1b89582c48a92e552e4465d] # # patch "docs/Starting-a-New-Project.html" # from [a3cbda53564080db304fd6237351ce954babea3c] # to [b4964123b9e63096310ca87c0b243c4b17718689] # # patch "docs/Storage-and-workflow.html" # from [69392c7d45bec0eaca2f17e9d2f92077c2ba4070] # to [caff1cbcb942d761fff3d5d66b9c46a97c4f3711] # # patch "docs/Synchronising-Databases.html" # from [d40f41860ab082e6da7fb16844afb19357fe55b9] # to [fb29388f7fabc7926e1357e321da9fafdc2c7e11] # # patch "docs/Tree.html" # from [b557ca7b886640909179345fba5187d52d49b94f] # to [b946d40b26991865e3db854d272f95addb6cd920] # # patch "docs/Tutorial.html" # from [5d27fde6085db7cb82e86332657d6e73de2dbf97] # to [28141be0386259fcd886a517f7de6b6c2a04af53] # # patch "docs/Using-packets.html" # from [4077ccda595a86dd82f1fb9a7da0468b69010b02] # to [9541f5784c2449819b1db89a826ea8917db3a416] # # patch "docs/Vars.html" # from [c7d3a44236435c862d6c7769d8a1b32fd57fd5fc] # to [7475e0311e505b2062aa865f3532d3ff83806b30] # # patch "docs/Versions-of-files.html" # from [ef657adad28e050daad568c6a40e338c8e7c2049] # to [f3bf46039cd6085320e20be0a3e6bd8656eca3fc] # # patch "docs/Versions-of-trees.html" # from [64531ae8cfd44ce9d5646702f3b65a74ea029e42] # to [ae0aaa5884c8701862f7545b8f48416bb4eec93c] # # patch "docs/Workspace-Collisions.html" # from [b1361bb5f32890fce1b8d3d6d62892b5dc882758] # to [ad276f0c48026052f87c36ebf66f04491fe2a5da] # # patch "docs/Workspace.html" # from [40b4543d5606e6701dace2e33f41aac6786bfd8c] # to [f20179119ab4fb1acf050df9897e87541c3711f7] # # patch "docs/index.html" # from [617ce735f037a9cd24f680774150561b890a2faa] # to [90d19f0dee2f65ff5a94c152a3f80558fa56e6ee] # # patch "monotone.html" # from [0c51e9b6862604f9724e8aadf48028c579da628f] # to [6fa6307fc17a8f9880a0e07f365d13e2180601a9] # # patch "monotone.pdf" # from [de109abbbb027f488eb8b286ed2cdc4a85737d37] # to [7704a142016b4a9be4b027a5eb5f7010a6422ca1] # ============================================================ --- NEWS 7d6bb127f7ecd2dc50c01b9ecbf5a4d85841b0c2 +++ NEWS 4102d7e399b0d44fa250e7110072f5dd049a903d @@ -1,3 +1,49 @@ +[ somewhen in the future ] + + 0.38 release. + + Changes + + - mtn log now prints a single dot for a project's root + directory instead of an empty string. + + - mtn now warns if changes to a file will be ignored because + the file has been deleted on one side of a merge. + + - mtn now errors if your chosen private key doesn't match the public + key of the same name in your database. + + - mtn now checks for your key before a merge action takes place to + ensure that any manually merged file isn't lost in an error case + + Bugs fixed + + - a bug introduced in 0.37 prevented an external merger from being + executed unless the MTN_MERGE environment variable was set + + - mtn read successfully reads revision data, and cert packets again + + - mtn consistently supports certs with empty values + (fixed 'ls certs' and 'read') + + Internal + + - Update Botan to 1.7.2. + + - Moved the gzip implementation out of the Botan directory. + + Other + + - Added the scripts of the following Lua-based contributed + Monotone extension commands to contrib/command/: + "mtn base", "mtn fuse", "mtn revision", "mtn conflicts". + + - Added a hooks version of the contributed ciabot script, + contrib/ciabot_monotone_hookversion.lua + + - The monotone manual is now licensed under the GPL rather than + the GFDL. + Fri Oct 25 22:35:33 UTC 2007 0.37 release. ============================================================ --- UPGRADE b7e014bfa58db309cb29c06816070168e602fd14 +++ UPGRADE ef451e84d9d3c847f9d1fde2f470a9d20cdd19a9 @@ -1,4 +1,4 @@ -upgrading monotone to 0.37 +upgrading monotone to 0.38 ========================== How to read this file: ============================================================ --- docs/Adding-Files.html 5062910011a1a0943fdb10cb7d213251f97d9a86 +++ docs/Adding-Files.html cc6c8724c9700e78116d914e8e9f39b414197aee @@ -1,9 +1,9 @@
0x00 thru 0x06 0x0E thru 0x1a 0x1c thru 0x1f -+
include(
scriptfile)
The output table will be:
{ 1 = { name = "thingy", values = { 1 = "foo", 2 = "bar" } }, @@ -151,7 +151,7 @@ the arguments. 3 = { name = "spork", values = { } }, 4 = { name = "frob", values = { 1 = "oops" } } } -+
regex.search(
regexp,
string)
4c2c1d846fa561601254200918fba1fd71e6795d ============================================================ --- docs/Basic-Network-Service.html 085ca4c633152c1a4cda9919bc3632914b9f116b +++ docs/Basic-Network-Service.html d9920baa4b979ac9355e1b376dd8f90cec9849b3 @@ -1,9 +1,9 @@+ function push_netsync_notifier(notifier) + return push_hook_functions(notifier) + end +end ============================================================ --- docs/File-Attributes.html 37e478866d1245c4b1c09cb8fdf0d64c1c1b94f0 +++ docs/File-Attributes.html 07a04b4dc5dd34222afc29ac756a3bee696cfe0c @@ -1,9 +1,9 @@Basic Network Service - monotone documentation - + ============================================================ --- docs/Branches.html 6b3681a4cc9adbe57242405012580da70ce56ecf +++ docs/Branches.html 478cd3e4a9b9ff7ed91a3d32d88aaa05050e030b @@ -1,9 +1,9 @@Branches - monotone documentation - + ============================================================ --- docs/Branching-and-Merging.html 2f0bd679bd2111d317fc4bbea458a889d1e655c2 +++ docs/Branching-and-Merging.html 21593f8879fda5188a67a83e55e4d055facf8361 @@ -1,9 +1,9 @@Branching and Merging - monotone documentation - + ============================================================ --- docs/CVS-Phrasebook.html e790524d54a2332bbf14e04fed7676ba6df56147 +++ docs/CVS-Phrasebook.html f4e4781803808facf1795a2441e1fd6fff24918b @@ -1,9 +1,9 @@CVS Phrasebook - monotone documentation - + ============================================================ --- docs/Certificate.html 2565b478678bf30855d226b656a5abe73791ff6a +++ docs/Certificate.html f3837399865ff9faf2a0754b26cdd4f0dde955f2 @@ -1,9 +1,9 @@Certificate - monotone documentation - + ============================================================ --- docs/Certificates.html 051bd77b92a8d609761bba28d6f90f23a7d53811 +++ docs/Certificates.html 2cc0e4f319ed862ed12559a54f37658416a935d5 @@ -1,9 +1,9 @@Certificates - monotone documentation - + ============================================================ --- docs/Command-Reference.html 8be324c63b1adb3f71bb1ad0f96788af623c3a5c +++ docs/Command-Reference.html ac73891fbac9546afc4fcfdd53eaeca93f7f6c8b @@ -1,9 +1,9 @@Command Reference - monotone documentation - + ============================================================ --- docs/Committing-Work.html 0ac9fe948d6d6765c0b282198521f0d63122ae45 +++ docs/Committing-Work.html e716cb842e741e8b617176249db01cbd2668f109 @@ -1,9 +1,9 @@Committing Work - monotone documentation - + ============================================================ --- docs/Concepts.html 2b0ec10dfd1500b3852c2f97cf82bd2d940abed5 +++ docs/Concepts.html 0a0a28e84c6b2364c55a81596080a66438ee0498 @@ -1,9 +1,9 @@Concepts - monotone documentation - + ============================================================ --- docs/Creating-a-Database.html 4390285dd70f86c67db73ae6dc212d7324686222 +++ docs/Creating-a-Database.html dd4d71928c1cfcd0bae494cf2f671ec5e14b7f10 @@ -1,9 +1,9 @@Creating a Database - monotone documentation - + ============================================================ --- docs/Database.html c6e7c8ae101dfccc96542336f0696e5e03d4480c +++ docs/Database.html 62ae58d646e0da9a8af14aa776c815c920d617ad @@ -1,9 +1,9 @@Database - monotone documentation - + ============================================================ --- docs/Dealing-with-a-Fork.html b677ba8ed57f4c2196ab5ba9c2d1144e022061ff +++ docs/Dealing-with-a-Fork.html f0d020a80d566fcce0e969526947d9bb04e11495 @@ -1,9 +1,9 @@Dealing with a Fork - monotone documentation - + ============================================================ --- docs/Default-hooks.html a3abe4ba183ea064970139d1e081b21c499f3d74 +++ docs/Default-hooks.html 426d6f8d291d2c1b66af91f7fc541cb4322b4c71 @@ -1,9 +1,9 @@Default hooks - monotone documentation - + @@ -805,7 +805,7 @@ function get_preferred_merge3_command (t -- If there wasn't any user-given merger, take the first that's available -- and wanted. for _,mkey in ipairs(default_order) do - c = trymerger(mkey) ; if c then return c,nil end + c = trymerger(mkey) ; if c then return c,mkey end end end @@ -1152,70 +1152,115 @@ end return "socat" end --- Netsync notifiers are tables containing 5 functions: --- start, revision_received, cert_received, pubkey_received and end --- Those functions take exactly the same arguments as the corresponding --- note_netsync functions, but return a different kind of value, a tuple --- composed of a return code and a value to be returned back to monotone. --- The codes are strings: --- "continue" and "stop" --- When the code "continue" is returned and there's another notifier, the --- second value is ignored and the next notifier is called. Otherwise, --- the second value is returned immediately. -netsync_notifiers = {} +do + -- Hook functions are tables containing any of the following 6 items + -- with associated functions: + -- + -- startup Corresponds to note_mtn_startup() + -- start Corresponds to note_netsync_start() + -- revision_received Corresponds to note_netsync_revision_received() + -- cert_received Corresponds to note_netsync_cert_received() + -- pubkey_received Corresponds to note_netsync_pubkey_received() + -- end Corresponds to note_netsync_end() + -- + -- Those functions take exactly the same arguments as the corresponding + -- global functions, but return a different kind of value, a tuple + -- composed of a return code and a value to be returned back to monotone. + -- The codes are strings: + -- "continue" and "stop" + -- When the code "continue" is returned and there's another notifier, the + -- second value is ignored and the next notifier is called. Otherwise, + -- the second value is returned immediately. + local hook_functions = {} + local supported_items = { + "startup", + "start", "revision_received", "cert_received", "pubkey_received", "end" + } -function _note_netsync_helper(f,...) - local s = "continue" - local v = nil - for _,n in pairs(netsync_notifiers) do - if n[f] then - s,v = n[f](...) + function _hook_functions_helper(f,...) + local s = "continue" + local v = nil + for _,n in pairs(hook_functions) do + if n[f] then + s,v = n[f](...) + end + if s ~= "continue" then + break + end end - if s ~= "continue" then - break - end + return v end - return v -end -function note_netsync_start(...) - return _note_netsync_helper("start",...) -end -function note_netsync_revision_received(...) - return _note_netsync_helper("revision_received",...) -end -function note_netsync_cert_received(...) - return _note_netsync_helper("cert_received",...) -end -function note_netsync_pubkey_received(...) - return _note_netsync_helper("pubkey_received",...) -end -function note_netsync_end(...) - return _note_netsync_helper("end",...) -end + function note_mtn_startup(...) + return _hook_functions_helper("startup",...) + end + function note_netsync_start(...) + return _hook_functions_helper("start",...) + end + function note_netsync_revision_received(...) + return _hook_functions_helper("revision_received",...) + end + function note_netsync_cert_received(...) + return _hook_functions_helper("cert_received",...) + end + function note_netsync_pubkey_received(...) + return _hook_functions_helper("pubkey_received",...) + end + function note_netsync_end(...) + return _hook_functions_helper("end",...) + end -function add_netsync_notifier(notifier, precedence) - if type(notifier) ~= "table" or type(precedence) ~= "number" then - return false, "Invalid tyoe" + function add_hook_functions(functions, precedence) + if type(functions) ~= "table" or type(precedence) ~= "number" then + return false, "Invalid type" + end + if hook_functions[precedence] then + return false, "Precedence already taken" + end + + local unknown_items = "" + local warning = nil + local is_member = + function (s,t) + for k,v in pairs(t) do if s == v then return true end end + return false + end + + for n,f in pairs(functions) do + if type(n) == "string" then + if not is_member(n, supported_items) then + if unknown_items ~= "" then + unknown_items = unknown_items .. "," + end + unknown_items = unknown_items .. n + end + if type(f) ~= "function" then + return false, "Value for functions item "..n.." isn't a function" + end + else + warning = "Non-string item keys found in functions table" + end + end + + if warning == nil and unknown_items ~= "" then + warning = "Unknown item(s) " .. unknown_items .. " in functions table" + end + + hook_functions[precedence] = functions + return true, warning end - if netsync_notifiers[precedence] then - return false, "Precedence already taken" + function push_hook_functions(functions) + local n = table.maxn(hook_functions) + 1 + return add_hook_functions(functions, n) end - local warning = nil - for n,f in pairs(notifier) do - if type(n) ~= "string" or n ~= "start" - and n ~= "revision_received" - and n ~= "cert_received" - and n ~= "pubkey_received" - and n ~= "end" then - warning = "Unknown item found in notifier table" - elseif type(f) ~= "function" then - return false, "Value for notifier item "..n.." isn't a function" - end + + -- Kept for backward compatibility + function add_netsync_notifier(notifier, precedence) + return add_hook_functions(notifier, precedence) end - netsync_notifiers[precedence] = notifier - return true, warning -end -
function get_author(branchname, keypair_id) -- Branch name ignored. if (keypair_id == "address@hidden") then @@ -249,7 +249,7 @@ definitions might be: end return keypair_id end -+
edit_comment (
commentary,
user_log_message)
function persist_phrase_ok()
return true
end
-
+
use_inodeprints ()
Returns true
if you want monotone to automatically enable
Inodeprints support in all workspaces. Only affects working
@@ -289,7 +289,7 @@ copies created after you modify the hook
function use_inodeprints()
return false
end
-
+
ignore_file (
filename)
Returns true
if filename should be ignored while adding,
dropping, or moving files. Otherwise returns false
. This is
@@ -333,7 +333,7 @@ the configuration directory. This file l
comment "everyone can read these branches"
pattern "net.example.{public,project}*"
allow "*"
-
+
This example allows everyone access to branches net.example.project
and
net.example.public
and their sub-branches, except for the branches in
net.example.project.security
and net.example.project.private
,
@@ -479,7 +479,7 @@ components:
end
return argv
end
-
+
use_transport_auth (
uri)
Returns a boolean indicating whether monotone should use transport
authentication mechanisms when communicating with uri. If this
@@ -505,7 +505,7 @@ authentication assumptions.
return true
end
end
-
+
get_mtn_command(
host)
Returns a string containing the monotone command to be executed on
host when communicating over ssh. The host
@@ -517,7 +517,7 @@ monotone binary is not in the default pa
function get_mtn_command(host)
return "mtn"
end
-
+
@@ -576,7 +576,7 @@ the intersection of tables) is the follo
return false
end
end
-
+
In this example, any revision certificate is trusted if it is signed
by at least one of three “trusted” keys, unless it is an
branch
certificate, in which case it must be signed by
@@ -606,7 +606,7 @@ version carrying the old_results
+
This definition accepts only those updates which preserve the set of
true
test results from update source to target. If no test
results exist, this hook has no affect; but once a true
test
@@ -672,8 +672,7 @@ you have a tool specific to certain file
you have a tool specific to certain file types.
-
-merge3 (
ancestor_path,
left_path,
right_path,
merged_path,
ancestor_text,
left_text,
right_text)
-
+
merge3 (
ancestor_path,
left_path,
right_path,
merged_path,
ancestor_text,
left_text,
right_text)
-
This hook is called to resolve merges that monotone could not resolve
automatically. The actual ancestor, left, and right contents of the
file are passed in the ancestor_text, left_text, and
@@ -695,8 +694,7 @@ local system. For details, see the code
and if not, then simply searches for whatever is installed on the
local system. For details, see the code in Default hooks.
-
get_preferred_merge3_command(
tbl)
-
+
get_preferred_merge3_command(
tbl)
-
Returns the results of running an external merge on three strings.
tbl wraps up the various arguments for each merge command and
is always provided by merge3. If there is a particular editor
@@ -769,7 +767,7 @@ attribute. Its definition is:
make_executable(filename)
end
end
-
+
attr_init_functions [
attribute] (
filename)
-
This is not a hook function, but a table of hook
functions. Each entry in the table
attr_init_functions
, at
@@ -798,7 +796,7 @@ definition is:
return nil
end
end
-
+
The binary_file
function is also defined as a Lua hook. See
Default hooks.
============================================================
--- docs/Importing-from-CVS.html 34ea9d99420562ab5f0a435386597101c3b6d98c
+++ docs/Importing-from-CVS.html f52c8fd7dd6784f50c00c456c74d6589a72ab15f
@@ -1,9 +1,9 @@
Importing from CVS - monotone documentation
-
+
============================================================
--- docs/Informative.html ffe3b1df0339c4769f834001b639319d04ab2ea4
+++ docs/Informative.html 52874d9861dc1742a759be4a260204be0cab01e9
@@ -1,9 +1,9 @@
Informative - monotone documentation
-
+
@@ -117,7 +117,7 @@ example, suppose you enter this command
fa36deead87811b0e15208da2853c39d2f6ebe90
fa36b76dd0139177b28b379fe1d56b22342e5306
fa36965ec190bee14c5afcac235f1b8e2239bb2a
-
+
Then monotone is telling you that there are 3 revisions it knows
about, in its database, which begin with the 4 hex digits
fa36
. This command is intended to be used by programmable
@@ -132,7 +132,7 @@ present users with additional informatio
01f5da490941bee1f0000f0561fc62eabfb2fa23 address@hidden 2003-12-03T03:14:35
01f992577bd8bcdcade0f89e724fd5dc2d2bbe8a address@hidden 2005-05-11T05:19:29
01faad191d8d0474777c70b4d606782942333a78 address@hidden 2005-04-11T04:24:01
-
+
- mtn diff [--unified] [--show-encloser]
- mtn diff --context [--show-encloser]
- mtn diff --external [--diff-args=argstring]
- mtn diff pathname...
- mtn diff --revision=id
- mtn diff --revision=id
pathname...
mtn diff --revision=id1 --revision=id2mtn diff --revision=id1 --revision=id2 pathname...
These commands print out GNU “unified diff format” textual difference
listings between various manifest versions. With no --revision
@@ -165,9 +165,9 @@ algorithm to produce a listing in &ldquo
--unified, --context, --show-encloser, and
--external. By default, monotone uses its built-in diff
algorithm to produce a listing in “unified diff” format (analogous
-to running the program diff -u); you can also explicitly
+to running the program diff -u); you can also explicitly
request this with --unified. The built-in diff algorithm can
-also produce “context diff” format (analogous to diff -c),
+also produce “context diff” format (analogous to diff -c),
which you request by specifying --context. The short options
that diff accepts for these modes, -u and
-c, also work.
@@ -186,7 +186,7 @@ syntax, See --unified requests the “unified diff” format, the default.
--context requests the “context diff” format (analogous to
-running the program diff -c). Both of these formats are
+running the program diff -c). Both of these formats are
generated directly by monotone, using its built-in diff algorithm.
Sometimes, you may want more flexibility in output formats; for these
@@ -244,7 +244,7 @@ branch:
: * tests/t_cross.at: New test for merging merges.
: * testsuite.at: Call t_cross.at.
:
-
+
mtn list keys
mtn ls keysmtn list keys patternmtn ls keys pattern These commands list rsa keys held in your keystore and current database.
They do not print out any cryptographic information; they simply list the
names of public and private keys you have on hand.
============================================================
--- docs/Inodeprints.html 00c8414b0b4d264a564ec486ca65a8e7dc48229d
+++ docs/Inodeprints.html 4e462bbef7cfa65afde05c0e51771a6c831ec672
@@ -1,9 +1,9 @@
Inodeprints - monotone documentation
-
+
============================================================
--- docs/Internationalization.html f743ea0d87237b4a0ba08b8e1eff8c2eda4b11e1
+++ docs/Internationalization.html 20915ed9d3412b0f53b5a5b6c7e3c28f8586fd94
@@ -1,9 +1,9 @@
Internationalization - monotone documentation
-
+
@@ -105,7 +105,7 @@ an IDNA string is this:
an IDNA string is this:
{ACE-prefix}{LDH-sanitized(punycode(nameprep(UTF-8-string)))}
-
+
It is important to understand that IDNA encoding does not
preserve the input string: it both prohibits a wide variety of
possible strings and normalizes non-equal strings to supposedly
============================================================
--- docs/Key-and-Cert-Trust.html 87061cac1bac7fc663f3b53fa7d2635f072e8083
+++ docs/Key-and-Cert-Trust.html 9a87d94eb16d0d44085edd2bd8339648e42aaa5b
@@ -1,9 +1,9 @@
Key and Cert Trust - monotone documentation
-
+
@@ -85,11 +85,11 @@ following two examples are equivalent:
enter passphrase for key ID address@hidden:
$ mtn ci -m"Changed foo to bar"
$ mtn push -k address@hidden
-
+
$ mtn ci -m"Changed foo to bar"
enter passphrase for key ID address@hidden:
$ mtn push -k address@hidden
-
+
In the second example, monotone automatically added the key to ssh-agent, making
entering the passphrase not needed during the push.
@@ -112,7 +112,7 @@ will cache the key for you.
Identity added: /home/user/.ssh/id_monotone (/home/user/.ssh/id_monotone)
$ mtn ci -m"Changed foo to bar"
$ mtn push -k address@hidden
-
+
You can also use the --ssh-sign option to control whether ssh-agent will
be used for signing. If set to yes, ssh-agent will be used to sign. If your
key has not been added to ssh-agent monotone will fall back to its internal signing
============================================================
--- docs/Making-Changes.html 7155a6d3952b2a8ef8a2eafa9549cbfd60ab89fb
+++ docs/Making-Changes.html 2a35cd7076ea9d81efcffddae77163d325fcff30
@@ -1,9 +1,9 @@
Making Changes - monotone documentation
-
+
============================================================
--- docs/Mark_002dMerge.html 7b95b1f2f7b6c7414d5523663e761de124a09529
+++ docs/Mark_002dMerge.html b392558ccae556635ed788648db245b9071d145a
@@ -1,9 +1,9 @@
Mark-Merge - monotone documentation
-
+
============================================================
--- docs/Merging.html 6e1b1cffbbfab2076cfe97d595dc3316b1ed4658
+++ docs/Merging.html a29585476ae412228693aadcdc3a54985e0b655d
@@ -1,9 +1,9 @@
Merging - monotone documentation
-
+
============================================================
--- docs/Migrating-and-Dumping.html 31cf38fd784584da6f9ea3ce119b8e8caffecc23
+++ docs/Migrating-and-Dumping.html 10a5a0961bd67d70eb3ac97caf148bd39db6e06e
@@ -1,9 +1,9 @@
Migrating and Dumping - monotone documentation
-
+
============================================================
--- docs/Naming-Conventions.html bf61294982536b75cc19ea1ae6aee8d5f0371c31
+++ docs/Naming-Conventions.html e9dec1894d0cbd227f61f8500aff5f9934c6c784
@@ -1,9 +1,9 @@
Naming Conventions - monotone documentation
-
+
============================================================
--- docs/Network-Service-Revisited.html 2e8694d09a1dd96b0bf8d23b88887c651db4790d
+++ docs/Network-Service-Revisited.html 30ce6318069e4e82d3f49d573e9b10e964e232cf
@@ -1,9 +1,9 @@
Network Service Revisited - monotone documentation
-
+
============================================================
--- docs/Network.html ea1beb7ca4c48d2ccae750e1a01baa1143785849
+++ docs/Network.html 9b68ce1aadf777750f366aa0a356dd354c5d96ec
@@ -1,9 +1,9 @@
Network - monotone documentation
-
+
@@ -80,11 +80,11 @@ branches. Supposing Alice's computer has
alice.someisp.com
, then Alice might run:
$ mtn --bind=alice.someisp.com serve
-
+
And Bob might run
$ mtn sync alice.someisp.com "net.venge.monotone*"
-
+
When the operation completes, all branches matching
net.venge.monotone*
will be synchronized between Alice and Bob's
databases.
@@ -96,7 +96,7 @@ with Alice again, he can simply run:
with Alice again, he can simply run:
$ mtn sync
-
+
Of course, he can still sync with other people and other
branches by passing an address or address plus globs on the command
line; this will not affect his default affinity for Alice. If you ever
============================================================
--- docs/Other-Transports.html 12d427aa02fa9c04751b31f614488da0dff04d51
+++ docs/Other-Transports.html 7e68d9cd7cf332f1193ece40eac4ff845325f176
@@ -1,9 +1,9 @@
Other Transports - monotone documentation
-
+
============================================================
--- docs/Packet-I_002fO.html 44d9a934e80c37a1bdd826fd86f9b5218563833f
+++ docs/Packet-I_002fO.html 9737030371eae74dbe9ce2ce837de5d101fbd2fe
@@ -1,9 +1,9 @@
Packet I/O - monotone documentation
-
+
============================================================
--- docs/Quality-Assurance.html 2ba8ffdc2456b9a53ca6bebe46aa1b956546433d
+++ docs/Quality-Assurance.html a2c5b1325ef9d5176db2908061afa117f9239b14
@@ -1,9 +1,9 @@
Quality Assurance - monotone documentation
-
+
============================================================
--- docs/RCS.html 8e283147bdb9cd6822a2b262d73fef95d0ebf94b
+++ docs/RCS.html b0bb87a5300ed953a29c02f00b3534a85d43e6f1
@@ -1,9 +1,9 @@
RCS - monotone documentation
-
+
============================================================
--- docs/Rebuilding-ancestry.html 6424f9076b7d8ea298d9d616b003948e024624f4
+++ docs/Rebuilding-ancestry.html 5b01226f5cc648bf08925e6f00b5fe84cf91014a
@@ -1,9 +1,9 @@
Rebuilding ancestry - monotone documentation
-
+
@@ -160,7 +160,7 @@ be a rather serious security problem!
-
+
============================================================
--- docs/Regexp-Details.html 292e1d632b1d603a8f6bdf8cf165954653b40511
+++ docs/Regexp-Details.html ea50c55e16d4127dd65b96ddb23b544aa956a032
@@ -1,9 +1,9 @@
Regexp Details - monotone documentation
-
+
@@ -73,7 +73,7 @@ brackets, the metacharacters are as foll
|
start of alternative branch
(
start subpattern
)
end subpattern
- ?
extends the meaning of `('
+ ?
extends the meaning of ‘(’
also 0 or 1 quantifier
also quantifier minimizer
*
0 or more quantifier
@@ -103,22 +103,22 @@ applies both inside and outside characte
that character may have. This use of backslash as an escape character
applies both inside and outside character classes.
- For example, if you want to match a `*' character, you write
-`\*' in the pattern. This escaping action applies whether or not
+
For example, if you want to match a ‘*’ character, you write
+‘\*’ in the pattern. This escaping action applies whether or not
the following character would otherwise be interpreted as a
metacharacter, so it is always safe to precede a non-alphanumeric with
backslash to specify that it stands for itself. In particular, if you
-want to match a backslash, you write `\\'.
+want to match a backslash, you write ‘\\’.
-
If a pattern is compiled with the `(?x)' option, whitespace in
+
If a pattern is compiled with the ‘(?x)’ option, whitespace in
the pattern (other than in a character class) and characters between a
-`#' outside a character class and the next newline are
+‘#’ outside a character class and the next newline are
ignored. An escaping backslash can be used to include a whitespace or
-`#' character as part of the pattern.
+‘#’ character as part of the pattern.
If you want to remove the special meaning from a sequence of
-characters, you can do so by putting them between `\Q' and
-`\E'. The `\Q...\E' sequence is recognized both inside and
+characters, you can do so by putting them between ‘\Q’ and
+‘\E’. The ‘\Q...\E’ sequence is recognized both inside and
outside character classes.
Non-printing Characters
@@ -143,36 +143,36 @@ represents:
\x{
hhh...}
character with hex code hhh...
- The precise effect of `\cx' is as follows: if x is a lower
+
The precise effect of ‘\cx’ is as follows: if x is a lower
case letter, it is converted to upper case. Then bit 6 of the
-character (hex 40) is inverted. Thus `\cz' becomes hex 1A (the
-<SUB> control character, in ASCII), but `\c{' becomes hex 3B
-(`;'), and `\c;' becomes hex 7B (`{').
+character (hex 40) is inverted. Thus ‘\cz’ becomes hex 1A (the
+<SUB> control character, in ASCII), but ‘\c{’ becomes hex 3B
+(‘;’), and ‘\c;’ becomes hex 7B (‘{’).
-
After `\x', from zero to two hexadecimal digits are read (letters
+
After ‘\x’, from zero to two hexadecimal digits are read (letters
can be in upper or lower case). Any number of hexadecimal digits may
-appear between `\x{' and `}', but the value of the
+appear between ‘\x{’ and ‘}’, but the value of the
character code must be less than 256 in non-UTF-8 mode, and less than
231in UTF-8 mode. That is, the maximum value in hexadecimal is
7FFFFFFF. Note that this is bigger than the largest Unicode code
point, which is 10FFFF.
-
If characters other than hexadecimal digits appear between `\x{'
-and `}', or if there is no terminating `}', this form of
-escape is not recognized. Instead, the initial `\x' will be
+
If characters other than hexadecimal digits appear between ‘\x{’
+and ‘}’, or if there is no terminating ‘}’, this form of
+escape is not recognized. Instead, the initial ‘\x’ will be
interpreted as a basic hexadecimal escape, with no following digits,
giving a character whose value is zero.
Characters whose value is less than 256 can be defined by either of
-the two syntaxes for `\x'. There is no difference in the way they
-are handled. For example, `\xdc' is exactly the same as
-`\x{dc}'.
+the two syntaxes for ‘\x’. There is no difference in the way they
+are handled. For example, ‘\xdc’ is exactly the same as
+‘\x{dc}’.
-
After `\0' up to two further octal digits are read. If there are
+
After ‘\0’ up to two further octal digits are read. If there are
fewer than two digits, just those that are present are used. Thus the
-sequence `\0\x\07' specifies two binary zeros followed by a
+sequence ‘\0\x\07’ specifies two binary zeros followed by a
<BEL> character (octal 007). Make sure you supply two digits after
the initial zero if the pattern character that follows is itself an
octal digit.
@@ -190,8 +190,8 @@ themselves. In non-UTF-8 mode, the value
up to three octal digits following the backslash, and uses them to
generate a data character. Any subsequent digits stand for
themselves. In non-UTF-8 mode, the value of a character specified in
-octal must be less than `\400'. In UTF-8 mode, values up to
-`\777' are permitted. For example:
+octal must be less than ‘\400’. In UTF-8 mode, values up to
+‘\777’ are permitted. For example:
\040
- is another way of writing a space
@@ -201,13 +201,13 @@ octal must be less than `
\11
- might be a back reference, or another way of
writing a tab
\011
- is always a tab
-
\0113
- is a tab followed by the character `3'
+
\0113
- is a tab followed by the character ‘3’
\113
- might be a back reference, otherwise the
character with octal code 113
\377
- might be a back reference, otherwise
the byte consisting entirely of 1 bits
\81
- is either a back reference, or a binary zero
- followed by the two characters `8' and `1'
+ followed by the two characters ‘8’ and ‘1’
Note that octal values of 100 or greater must not be introduced by a
@@ -215,18 +215,18 @@ both inside and outside character classe
All the sequences that define a single character value can be used
both inside and outside character classes. In addition, inside a
-character class, the sequence `\b' is interpreted as the <BS>
-character (hex 08), and the sequences `\R' and `\X' are
-interpreted as the characters `R' and `X',
+character class, the sequence ‘\b’ is interpreted as the <BS>
+character (hex 08), and the sequences ‘\R’ and ‘\X’ are
+interpreted as the characters ‘R’ and ‘X’,
respectively. Outside a character class, these sequences have
different meanings (see below).
Absolute and Relative Back References
-The sequence `\g' followed by an unsigned or a negative number,
+
The sequence ‘\g’ followed by an unsigned or a negative number,
optionally enclosed in braces, is an absolute or relative back
reference. A named back reference can be coded as
-`\g{name}'. Back references are discussed later, following the
+‘\g{name}’. Back references are discussed later, following the
discussion of parenthesized subpatterns.
Generic character types
@@ -256,18 +256,18 @@ string, all of them fail, since there is
type. If the current matching point is at the end of the subject
string, all of them fail, since there is no character to match.
- For compatibility with Perl, `\s' does not match the <VT>
+
For compatibility with Perl, ‘\s’ does not match the <VT>
character (code 11). This makes it different from the the POSIX
-“space” class. The `\s' characters are <TAB> (9), <LF>
+“space” class. The ‘\s’ characters are <TAB> (9), <LF>
(10), <FF> (12), <CR> (13), and <SPACE> (32).
In UTF-8 mode, characters with values greater than 128 never match
-`\d', `\s', or `\w', and always match `\D',
-`\S', and `\W'. These sequences retain their original
+‘\d’, ‘\s’, or ‘\w’, and always match ‘\D’,
+‘\S’, and ‘\W’. These sequences retain their original
meanings from before UTF-8 support was available, mainly for
efficiency reasons.
-
The sequences `\h', `\H', `\v', and `\V' are Perl
+
The sequences ‘\h’, ‘\H’, ‘\v’, and ‘\V’ are Perl
5.10 features. In contrast to the other sequences, these do match
certain high-valued codepoints in UTF-8 mode. The horizontal space
characters are:
@@ -334,23 +334,23 @@ the following five sequences:
(*CR)a.b
-
changes the convention to CR. That pattern matches `a\nb' because
+
changes the convention to CR. That pattern matches ‘a\nb’ because
LF is no longer a newline. Note that these special settings, which are
not Perl-compatible, are recognized only at the very start of a
pattern, and that they must be in upper case. If more than one of them
is present, the last one is used.
-
The newline convention does not affect what the `\R' escape
+
The newline convention does not affect what the ‘\R’ escape
sequence matches. By default, this is any Unicode newline sequence,
for Perl compatibility. However, this can be changed; see the
-description of `\R' below. A change of `\R' setting can be
+description of ‘\R’ below. A change of ‘\R’ setting can be
combined with a change of newline convention.
Newline Sequences
-Outside a character class, by default, the escape sequence `\R' matches
+
Outside a character class, by default, the escape sequence ‘\R’ matches
any Unicode newline sequence. This is a Perl 5.10 feature. In
-non-UTF-8 mode `\R' is equivalent to the following:
+non-UTF-8 mode ‘\R’ is equivalent to the following:
(?>\r\n|\n|\x0b|\f|\r|\x85)
@@ -367,7 +367,7 @@ and <PS> (paragraph separator, U+2028
)
and <PS> (paragraph separator, U+2029
).
- It is possible to change the meaning of `\R' by starting a +
It is possible to change the meaning of ‘\R’ by starting a pattern string with one of the following sequences:
Inside a character class, `\R' matches the letter `R'. +
Inside a character class, ‘\R’ matches the letter ‘R’.
The property names represented by xx above are limited to the -Unicode script names, the general category properties, and `Any', +Unicode script names, the general category properties, and ‘Any’, which matches any character (including newline). Other properties such -as `InMusicalSymbols' are not currently supported by PCRE. Note -that `\P{Any}' does not match any characters, so always causes +as ‘InMusicalSymbols’ are not currently supported by PCRE. Note +that ‘\P{Any}’ does not match any characters, so always causes a match failure.
Sets of Unicode characters are defined as belonging to certain @@ -489,9 +489,9 @@ by including a circumflex between the op
Each character has exactly one general category property, specified by a two-letter abbreviation. For compatibility with Perl, negation can be specified by including a circumflex between the opening brace and the property name. For -example, `\p{^Lu}' is the same as `\P{Lu}'. +example, ‘\p{^Lu}’ is the same as ‘\P{Lu}’. -
If only one letter is specified with `\p' or `\P', it +
If only one letter is specified with ‘\p’ or ‘\P’, it includes all the general category properties that start with that letter. In this case, in the absence of negation, the curly brackets in the escape sequence are optional; these two examples have the same @@ -550,28 +550,28 @@ effect:
Zs
The special property `L&' is also supported: it matches a -character that has the `Lu', `Ll', or `Lt' property, in +
The special property ‘L&’ is also supported: it matches a +character that has the ‘Lu’, ‘Ll’, or ‘Lt’ property, in other words, a letter that is not classified as a modifier or “other.” -
The `Cs' (Surrogate) property applies only to characters in the +
The ‘Cs’ (Surrogate) property applies only to characters in the
range U+D800
to U+DFFF
. Such characters are not valid in
UTF-8 strings (see RFC 3629) and so cannot be tested by PCRE.
The long synonyms for these properties that Perl supports (such as -`\p{Letter}') are not supported by PCRE, nor is it permitted to -prefix any of these properties with `Is'. +‘\p{Letter}’) are not supported by PCRE, nor is it permitted to +prefix any of these properties with ‘Is’. -
No character that is in the Unicode table has the `Cn' +
No character that is in the Unicode table has the ‘Cn’ (unassigned) property. Instead, this property is assumed for any code point that is not in the Unicode table.
Specifying caseless matching does not affect these escape sequences. For -example, `\p{Lu}' always matches only upper case letters. +example, ‘\p{Lu}’ always matches only upper case letters. -
The `\X' escape matches any number of Unicode characters that -form an extended Unicode sequence. `\X' is equivalent to +
The ‘\X’ escape matches any number of Unicode characters that +form an extended Unicode sequence. ‘\X’ is equivalent to
(?>\PM\pM*) @@ -582,16 +582,16 @@ character. None of them have codepoints treats the sequence as an atomic group (see below). Characters with the “mark” property are typically accents that affect the preceding character. None of them have codepoints less than 256, so in non-UTF-8 -mode `\X' matches any one character. +mode ‘\X’ matches any one character.-Matching characters by Unicode property is not fast, because PCRE has to search a structure that contains data for over fifteen thousand characters. That is why the traditional escape sequences such as -`\d' and `\w' do not use Unicode properties in PCRE. +‘\d’ and ‘\w’ do not use Unicode properties in PCRE.
Resetting the Match Start
-The escape sequence `\K', which is a Perl 5.10 feature, causes +
The escape sequence ‘\K’, which is a Perl 5.10 feature, causes any previously matched characters not to be included in the final matched sequence. For example, the pattern: @@ -599,18 +599,18 @@ matched sequence. For example, the patte foo\Kbar
matches `foobar', but reports that it has matched -`bar'. This feature is similar to a lookbehind assertion +
matches ‘foobar’, but reports that it has matched +‘bar’. This feature is similar to a lookbehind assertion (described below). However, in this case, the part of the subject before the real match does not have to be of fixed length, as -lookbehind assertions do. The use of `\K' does not interfere with the +lookbehind assertions do. The use of ‘\K’ does not interfere with the setting of captured substrings. For example, when the pattern
(foo)\Kbar-
matches `foobar', the first substring is still set to `foo'. +
matches ‘foobar’, the first substring is still set to ‘foo’.
These assertions may not appear in character classes (but note that -`\b' has a different meaning, namely the backspace character, +‘\b’ has a different meaning, namely the backspace character, inside a character class).
A word boundary is a position in the subject string where the current -character and the previous character do not both match `\w' or -`\W' (i.e. one matches `\w' and the other matches -`\W'), or the start or end of the string if the first or last -character matches `\w', respectively. +character and the previous character do not both match ‘\w’ or +‘\W’ (i.e. one matches ‘\w’ and the other matches +‘\W’), or the start or end of the string if the first or last +character matches ‘\w’, respectively. -
The `\A', `\Z', and `\z' assertions differ from the +
The ‘\A’, ‘\Z’, and ‘\z’ assertions differ from the traditional circumflex and dollar (described in the next section) in that they only ever match at the very start and end of the subject string, whatever options are set. Thus, they are independent of -multiline mode. The difference between `\Z' and `\z' is that -`\Z' matches before a newline at the end of the string as well as -at the very end, whereas `\z' matches only at the end. +multiline mode. The difference between ‘\Z’ and ‘\z’ is that +‘\Z’ matches before a newline at the end of the string as well as +at the very end, whereas ‘\z’ matches only at the end. -
The `\G' assertion is true only when the current matching +
The ‘\G’ assertion is true only when the current matching position is at the start point of the match. As used in Monotone, -`\G' is always equal to `\A'. +‘\G’ is always equal to ‘\A’.
Outside a character class, in the default matching mode, the -circumflex character, `^', is an assertion that is true only if +circumflex character, ‘^’, is an assertion that is true only if the current matching point is at the start of the subject string. Inside a character class, circumflex has an entirely different meaning (see below). @@ -668,7 +668,7 @@ other constructs that can cause a patter subject, it is said to be an “anchored” pattern. (There are also other constructs that can cause a pattern to be anchored.) -
A dollar character, `$', is an assertion that is true only if the +
A dollar character, ‘$’, is an assertion that is true only if the current matching point is at the end of the subject string, or immediately before a newline at the end of the string (by default). Dollar need not be the last character of the pattern if a @@ -677,23 +677,23 @@ character class. character class.
The meanings of the circumflex and dollar characters are changed if -the `(?m)' option is set. When this is the case, a circumflex +the ‘(?m)’ option is set. When this is the case, a circumflex matches immediately after internal newlines as well as at the start of the subject string. It does not match after a newline that ends the string. A dollar matches before any newlines in the string, as well as -at the very end, when `(?m)' is set. When newline is specified as +at the very end, when ‘(?m)’ is set. When newline is specified as the two-character sequence <CR><LF>, isolated <CR> and <LF> characters do not indicate newlines. -
For example, the pattern `^abc$' matches the subject string -`def\nabc' (where `\n' represents a newline) in multiline +
For example, the pattern ‘^abc$’ matches the subject string +‘def\nabc’ (where ‘\n’ represents a newline) in multiline mode, but not otherwise. Consequently, patterns that are anchored in single line mode because all branches start with ^ are not anchored in multiline mode. -
Note that the sequences `\A', `\Z', and `\z' can be +
Note that the sequences ‘\A’, ‘\Z’, and ‘\z’ can be used to match the start and end of the subject in both modes, and if -all branches of a pattern start with `\A' it is always anchored. +all branches of a pattern start with ‘\A’ it is always anchored.
The behaviour of dot with regard to newlines can be changed. If the -`(?s)' option is set, a dot matches any one character, without +‘(?s)’ option is set, a dot matches any one character, without exception. If the two-character sequence <CR><LF> is present in the subject string, it takes two dots to match it. @@ -721,15 +721,15 @@ special meaning in a character class.
Outside a character class, the escape sequence `\C' matches any +
Outside a character class, the escape sequence ‘\C’ matches any one byte, both in and out of UTF-8 mode. Unlike a dot, it always matches any line-ending characters. The feature is provided in Perl in order to match individual bytes in UTF-8 mode. Because it breaks up UTF-8 characters into individual bytes, what remains in the string may -be a malformed UTF-8 string. For this reason, the `\C' escape +be a malformed UTF-8 string. For this reason, the ‘\C’ escape sequence is best avoided. -
PCRE does not allow `\C' to appear in lookbehind assertions +
PCRE does not allow ‘\C’ to appear in lookbehind assertions (described below), because in UTF-8 mode this would make it impossible to calculate the length of the lookbehind. @@ -749,8 +749,8 @@ is not the first character, or escape it a circumflex is actually required as a member of the class, ensure it is not the first character, or escape it with a backslash. -
For example, the character class `[aeiou]' matches any lower case -vowel, while `[^aeiou]' matches any character that is not a lower +
For example, the character class ‘[aeiou]’ matches any lower case +vowel, while ‘[^aeiou]’ matches any character that is not a lower case vowel. Note that a circumflex is just a convenient notation for specifying the characters that are in the class by enumerating those that are not. A class that starts with a circumflex is not an @@ -758,13 +758,13 @@ therefore it fails if the current pointe therefore it fails if the current pointer is at the end of the string.
In UTF-8 mode, characters with values greater than 255 can be included -in a class as a literal string of bytes, or by using the `\x{' +in a class as a literal string of bytes, or by using the ‘\x{’ escaping mechanism.
When caseless matching is set, any letters in a class represent both their upper case and lower case versions, so for example, a caseless -`[aeiou]' matches `A' as well as `a', and a caseless [^aeiou] -does not match `A', whereas a caseful version would. In UTF-8 mode, +‘[aeiou]’ matches ‘A’ as well as ‘a’, and a caseless [^aeiou] +does not match ‘A’, whereas a caseful version would. In UTF-8 mode, PCRE always understands the concept of case for characters whose values are less than 128, so caseless matching is always possible. For characters with higher values, the concept of case is supported if @@ -775,43 +775,43 @@ special way when matching character clas
Characters that might indicate line breaks are never treated in any special way when matching character classes, whatever line-ending -sequence is in use, and whatever setting of the `(?s)' and -`(?m)' options is used. A class such as `[^a]' always +sequence is in use, and whatever setting of the ‘(?s)’ and +‘(?m)’ options is used. A class such as ‘[^a]’ always matches one of these characters.
The minus (hyphen) character can be used to specify a range of -characters in a character class. For example, `[d-m]' matches any -letter between `d' and `m', inclusive. If a minus character +characters in a character class. For example, ‘[d-m]’ matches any +letter between ‘d’ and ‘m’, inclusive. If a minus character is required in a class, it must be escaped with a backslash or appear in a position where it cannot be interpreted as indicating a range, typically as the first or last character in the class. -
It is not possible to have the literal character `]' as the end -character of a range. A pattern such as `[W-]46]' is interpreted -as a class of two characters (`W' and `-') followed by a -literal string `46]', so it would match `W46]' or -`-46]'. However, if the `]' is escaped with a backslash it -is interpreted as the end of range, so `[W-\]46]' is interpreted +
It is not possible to have the literal character ‘]’ as the end +character of a range. A pattern such as ‘[W-]46]’ is interpreted +as a class of two characters (‘W’ and ‘-’) followed by a +literal string ‘46]’, so it would match ‘W46]’ or +‘-46]’. However, if the ‘]’ is escaped with a backslash it +is interpreted as the end of range, so ‘[W-\]46]’ is interpreted as a class containing a range followed by two other characters. The -octal or hexadecimal representation of `]' can also be used to +octal or hexadecimal representation of ‘]’ can also be used to end a range.
Ranges operate in the collating sequence of character values. They can also be used for characters specified numerically, for example -`[\000-\037]'. In UTF-8 mode, ranges can include characters whose -values are greater than 255, for example `[\x{100}-\x{2ff}]'. +‘[\000-\037]’. In UTF-8 mode, ranges can include characters whose +values are greater than 255, for example ‘[\x{100}-\x{2ff}]’.
If a range that includes letters is used when caseless matching is -set, it matches the letters in either case. For example, `[W-c]' -is equivalent to `[][\\^_`wxyzabc]', matched caselessly. +set, it matches the letters in either case. For example, ‘[W-c]’ +is equivalent to ‘[][\\^_`wxyzabc]’, matched caselessly. -
The character types `\d', `\D', `\p', `\P', -`\s', `\S', `\w', and `\W' may also appear in a +
The character types ‘\d’, ‘\D’, ‘\p’, ‘\P’, +‘\s’, ‘\S’, ‘\w’, and ‘\W’ may also appear in a character class, and add the characters that they match to the -class. For example, `[\dABCDEF]' matches any hexadecimal digit. A +class. For example, ‘[\dABCDEF]’ matches any hexadecimal digit. A circumflex can conveniently be used with the upper case character types to specify a more restricted set of characters than the matching -lower case type. For example, the class `[^\W_]' matches any +lower case type. For example, the class ‘[^\W_]’ matches any letter or digit, but not underscore.
The only metacharacters that are recognized in character classes are @@ -824,14 +824,14 @@ escaping other non-alphanumeric characte
Perl supports the POSIX notation for character classes. This uses -names enclosed by `[:' and `:]' within the enclosing square +names enclosed by ‘[:’ and ‘:]’ within the enclosing square brackets. PCRE also supports this notation. For example,
[01[:alpha:]%]-
matches `0', `1', any alphabetic character, or `%'. The +
matches ‘0’, ‘1’, any alphabetic character, or ‘%’. The supported class names are
ascii
blank
cntrl
digit
digit
graph
lower
print
punct
space
space
upper
word
word
xdigit
The “space” characters are <HT> (9), <LF> (10), <VT> (11), <FF> (12), <CR> (13), and space (32). Notice that this list includes the <VT> character (code 11). This makes "space" -different to `\s', which does not include <VT> (for Perl +different to ‘\s’, which does not include <VT> (for Perl compatibility).
The name “word” is a Perl extension, and “blank” is a GNU extension from Perl 5.8. Another Perl extension is negation, which is -indicated by a `^' character after the colon. For example, +indicated by a ‘^’ character after the colon. For example,
[12[:^digit:]]-
matches `1', `2', or any non-digit. PCRE (and Perl) also -recognize the POSIX syntax `[.ch.]' and `[=ch=]' +
matches ‘1’, ‘2’, or any non-digit. PCRE (and Perl) also +recognize the POSIX syntax ‘[.ch.]’ and ‘[=ch=]’ where ch is a “collating element,” but these are not supported, and an error is given if they are encountered. @@ -882,7 +882,7 @@ example, the pattern gilbert|sullivan -
matches either `gilbert' or `sullivan'. Any number of +
matches either ‘gilbert’ or ‘sullivan’. Any number of alternatives may appear, and an empty alternative is permitted (matching the empty string). The matching process tries each alternative in turn, from left to right, and the first one that @@ -893,13 +893,13 @@ as the alternative in the subpattern.
The behavior of the matching engine can be adjusted from within the -pattern by a sequence of option letters enclosed between `(?' and -`)'. The option letters are +pattern by a sequence of option letters enclosed between ‘(?’ and +‘)’. The option letters are
i
m
m
s
x
For example, `(?im)' sets caseless, multiline matching. It is +
For example, ‘(?im)’ sets caseless, multiline matching. It is also possible to unset these options by preceding the letters with a -hyphen, and a combined setting and unsetting such as `(?im-sx)' +hyphen, and a combined setting and unsetting such as ‘(?im-sx)’ is also permitted. (This would set the caseless and multiline options while unsetting the dotall and extended-syntax options.) If a letter appears both before and after the hyphen, the option is unset. The @@ -929,7 +929,7 @@ current pattern that follows it, so (a(?i)b)c -
matches `abc' and `aBc' and no other strings. By this +
matches ‘abc’ and ‘aBc’ and no other strings. By this means, options can be made to have different settings in different parts of the pattern. Any changes made in one alternative do carry on into subsequent branches within the same subpattern. For example, @@ -938,14 +938,14 @@ into subsequent branches within the same (a(?i)b|c) -
matches `ab', `aB', `c', and `C', even though when -matching `C' the first branch is abandoned before the option +
matches ‘ab’, ‘aB’, ‘c’, and ‘C’, even though when +matching ‘C’ the first branch is abandoned before the option setting. This is because the effects of option settings happen when the pattern is parsed. There would be some very weird behaviour otherwise.
Note: Unlike these options, the similar, PCRE-specific option -sequences that start with `(*' may appear only at the very +sequences that start with ‘(*’ may appear only at the very beginning of the pattern. Details of these sequences are given in the section entitled “Newline sequences,” above. @@ -962,9 +962,9 @@ things: cat(aract|erpillar|) -
matches one of the words `cat', `cataract', or -`caterpillar'. Without the parentheses, it would match -`cataract', `erpillar' or an empty string. +
matches one of the words ‘cat’, ‘cataract’, or +‘caterpillar’. Without the parentheses, it would match +‘cataract’, ‘erpillar’ or an empty string.
For example, if the string `the red king' is matched against the pattern +
For example, if the string ‘the red king’ is matched against the pattern
the ((red|white) (king|queen))-
the captured substrings are `red king', `red', and -`king', and are numbered 1, 2, and 3, respectively. +
the captured substrings are ‘red king’, ‘red’, and +‘king’, and are numbered 1, 2, and 3, respectively.
The fact that plain parentheses fulfil two functions is not always @@ -989,20 +989,20 @@ capturing, and is not counted when compu without a capturing requirement. If an opening parenthesis is followed by a question mark and a colon, the subpattern does not do any capturing, and is not counted when computing the number of any -subsequent capturing subpatterns. For example, if the string `the -white queen' is matched against the pattern +subsequent capturing subpatterns. For example, if the string ‘the +white queen’ is matched against the pattern
the ((?:red|white) (king|queen))-
the captured substrings are `white queen' and `queen', and +
the captured substrings are ‘white queen’ and ‘queen’, and are numbered 1 and 2. The maximum number of capturing subpatterns is 65535.
As a convenient shorthand, if any option settings are required at the start of a non-capturing subpattern, the option letters may appear -between the `?' and the `:'. Thus the two patterns +between the ‘?’ and the ‘:’. Thus the two patterns
(?i:saturday|sunday) @@ -1012,26 +1012,26 @@ of the subpattern is reached, an optionmatch exactly the same set of strings. Because alternative branches are tried from left to right, and options are not reset until the end of the subpattern is reached, an option setting in one branch does -affect subsequent branches, so the above patterns match `SUNDAY' -as well as `Saturday'. +affect subsequent branches, so the above patterns match ‘SUNDAY’ +as well as ‘Saturday’.
Duplicate Subpattern Numbers
Perl 5.10 introduced a feature whereby each alternative in a subpattern uses the same numbers for its capturing parentheses. Such a -subpattern starts with `(?|' and is itself a non-capturing +subpattern starts with ‘(?|’ and is itself a non-capturing subpattern. For example, consider this pattern:
(?|(Sat)ur|(Sun))day-Because the two alternatives are inside a `(?|' group, both sets +
Because the two alternatives are inside a ‘(?|’ group, both sets of capturing parentheses are numbered one. Thus, when the pattern matches, you can look at captured substring number one, whichever alternative matched. This construct is useful when you want to capture part, but not all, of one of a number of alternatives. Inside a -`(?|' group, parentheses are numbered as usual, but the number is +‘(?|’ group, parentheses are numbered as usual, but the number is reset at the start of each branch. The numbers of any capturing buffers that follow the subpattern start after the highest number used in any branch. The following example is taken from the Perl @@ -1062,8 +1062,8 @@ and the Python syntax. and the Python syntax.
In PCRE, a subpattern can be named in one of three ways: -`(?<name>...)' or `(?'name'...)' as in Perl, or -`(?P<name>...)' as in Python. References to capturing +‘(?<name>...)’ or ‘(?'name'...)’ as in Perl, or +‘(?P<name>...)’ as in Python. References to capturing parentheses from other parts of the pattern, such as backreferences, recursion, and conditions, can be made by name as well as by number. @@ -1072,7 +1072,7 @@ as well as names, exactly as if the name as well as names, exactly as if the names were not present.
By default, a name must be unique within a pattern, but it is possible -to relax this constraint by setting the `(?J)' option. This can +to relax this constraint by setting the ‘(?J)’ option. This can be useful for patterns where only one instance of the named parentheses can match. Suppose you want to match the name of a weekday, either as a 3-letter abbreviation or as the full name, and in @@ -1100,10 +1100,10 @@ the following items:
matches `zz', `zzz', or `zzzz'. A closing brace on its +
matches ‘zz’, ‘zzz’, or ‘zzzz’. A closing brace on its own is not a special character. If the second number is omitted, but the comma is present, there is no upper limit; if the second number and the comma are both omitted, the quantifier specifies an exact @@ -1138,17 +1138,17 @@ the syntax of a quantifier, is taken as
matches exactly 8 digits. An opening curly bracket that appears in a position where a quantifier is not allowed, or one that does not match the syntax of a quantifier, is taken as a literal character. For -example, `{,6}' is not a quantifier, but a literal string of four +example, ‘{,6}’ is not a quantifier, but a literal string of four characters.
In UTF-8 mode, quantifiers apply to UTF-8 characters rather than to -individual bytes. Thus, for example, `\x{100}{2}' matches two +individual bytes. Thus, for example, ‘\x{100}{2}’ matches two UTF-8 characters, each of which is represented by a two-byte -sequence. Similarly, `\X{3}' matches three Unicode extended +sequence. Similarly, ‘\X{3}’ matches three Unicode extended sequences, each of which may be several bytes long (and they may be of different lengths). -
The quantifier `{0}' is permitted, causing the expression to +
The quantifier ‘{0}’ is permitted, causing the expression to behave as if the previous item and the quantifier were not present.
For convenience, the three most common quantifiers have @@ -1177,8 +1177,8 @@ of where this gives problems is in tryin much as possible (up to the maximum number of permitted times), without causing the rest of the pattern to fail. The classic example of where this gives problems is in trying to match comments in C -programs. These appear between `/*' and `*/', and within the -comment, individual `*' and `/' characters may appear. An +programs. These appear between ‘/*’ and ‘*/’, and within the +comment, individual ‘*’ and ‘/’ characters may appear. An attempt to match C comments by applying the pattern
@@ -1192,7 +1192,7 @@ attempt to match C comments by applying
fails, because it matches the entire string owing to the greediness of -the `.*' item. +the ‘.*’ item.
However, if a quantifier is followed by a question mark, it ceases to be greedy, and instead matches the minimum number of times possible, @@ -1215,7 +1215,7 @@ way the rest of the pattern matches.
which matches one digit by preference, but can match two if that is the only way the rest of the pattern matches. -
If the `(?U)' option is set (an option that is not available in +
If the ‘(?U)’ option is set (an option that is not available in Perl), the quantifiers are not greedy by default, but individual ones can be made greedy by following them with a question mark. In other words, it inverts the default behaviour. @@ -1224,17 +1224,17 @@ compiled pattern, in proportion to the s is greater than 1 or with a limited maximum, more memory is required for the compiled pattern, in proportion to the size of the minimum or maximum. -
If a pattern starts with `.*' or `.{0,}' and the -`(?s)' option is set, thus allowing the dot to match newlines, +
If a pattern starts with ‘.*’ or ‘.{0,}’ and the +‘(?s)’ option is set, thus allowing the dot to match newlines, the pattern is implicitly anchored, because whatever follows will be tried against every character position in the subject string, so there is no point in retrying the overall match at any position after the first. PCRE normally treats such a pattern as though it were preceded -by `\A'. +by ‘\A’.
In cases where it is known that the subject string contains no -newlines, it is worth setting `(?s)' in order to obtain this -optimization, or alternatively using `^' or `\A' to indicate +newlines, it is worth setting ‘(?s)’ in order to obtain this +optimization, or alternatively using ‘^’ or ‘\A’ to indicate anchoring explicitly.
However, there is one situation where the optimization cannot be @@ -1246,7 +1246,7 @@ fail where a later one succeeds. Conside (.*)abc\1 -
If the subject is `xyz123abc123' the match point is the fourth +
If the subject is ‘xyz123abc123’ the match point is the fourth character. For this reason, such a pattern is not implicitly anchored.
When a capturing subpattern is repeated, the value captured is the @@ -1256,8 +1256,8 @@ substring that matched the final iterati (tweedle[dume]{3}\s*)+ -
has matched `tweedledum tweedledee' the value of the captured -substring is `tweedledee'. However, if there are nested capturing +
has matched ‘tweedledum tweedledee’ the value of the captured +substring is ‘tweedledee’. However, if there are nested capturing subpatterns, the corresponding captured values may have been set in previous iterations. For example, after @@ -1265,7 +1265,7 @@ previous iterations. For example, after (a|(b))+ -
matches `aba' the value of the second captured substring is `b'. +
matches ‘aba’ the value of the second captured substring is ‘b’.
Consider, for example, the pattern `\d+foo' when applied to the +
Consider, for example, the pattern ‘\d+foo’ when applied to the subject line
123456bar-
After matching all 6 digits and then failing to match `foo', the +
After matching all 6 digits and then failing to match ‘foo’, the normal action of the matcher is to try again with only 5 digits -matching the `\d+' item, and then with 4, and so on, before +matching the ‘\d+’ item, and then with 4, and so on, before ultimately failing. Atomic grouping (a term taken from Jeffrey Friedl's book) provides the means for specifying that once a subpattern has matched, it is not to be re-evaluated in this way.
If we use atomic grouping for the previous example, the matcher gives -up immediately on failing to match `foo' the first time. The -notation is a kind of special parenthesis, starting with `(?>' as in +up immediately on failing to match ‘foo’ the first time. The +notation is a kind of special parenthesis, starting with ‘(?>’ as in this example:
@@ -1312,15 +1312,15 @@ maximizing repeat that must swallow everSimple cases such as the above example can be thought of as a maximizing repeat that must swallow everything it can. So, while both -`\d+' and `\d+?' are prepared to adjust the number of digits +‘\d+’ and ‘\d+?’ are prepared to adjust the number of digits they match in order to make the rest of the pattern match, -`(?>\d+)' can only match an entire sequence of digits. +‘(?>\d+)’ can only match an entire sequence of digits.
Atomic groups in general can of course contain arbitrarily complicated subpatterns, and can be nested. However, when the subpattern for an atomic group is just a single repeated item, as in the example above, a simpler notation, called a possessive quantifier, can be -used. This consists of an additional `+' character following a +used. This consists of an additional ‘+’ character following a quantifier. Using this notation, the previous example can be rewritten as @@ -1336,7 +1336,7 @@ example:
Possessive quantifiers are always greedy; the setting of the -`(?U)' option is ignored. They are a convenient notation for the +‘(?U)’ option is ignored. They are a convenient notation for the simpler forms of atomic group. However, there is no difference in the meaning of a possessive quantifier and the equivalent atomic group, though there may be a performance difference; possessive quantifiers @@ -1349,9 +1349,9 @@ ultimately found its way into Perl at re ultimately found its way into Perl at release 5.10.
PCRE has an optimization that automatically “possessifies” certain -simple pattern constructs. For example, the sequence `A+B' is -treated as `A++B' because there is no point in backtracking into -a sequence of `A's when `B' must follow. +simple pattern constructs. For example, the sequence ‘A+B’ is +treated as ‘A++B’ because there is no point in backtracking into +a sequence of ‘A’s when ‘B’ must follow.
When a pattern contains an unlimited repeat inside a subpattern that can itself be repeated an unlimited number of times, the use of an @@ -1363,8 +1363,8 @@ very long time indeed. The pattern
matches an unlimited number of substrings that either consist of -non-digits, or digits enclosed in `<>', followed by either -`!' or `?'. When it matches, it runs quickly. However, if it +non-digits, or digits enclosed in ‘<>’, followed by either +‘!’ or ‘?’. When it matches, it runs quickly. However, if it is applied to
@@ -1372,9 +1372,9 @@ is applied to
it takes a long time before reporting failure. This is because the -string can be divided between the internal `\D+' repeat and the -external `*' repeat in a large number of ways, and all have to be -tried. (The example uses `[!?]' rather than a single character at +string can be divided between the internal ‘\D+’ repeat and the +external ‘*’ repeat in a large number of ways, and all have to be +tried. (The example uses ‘[!?]’ rather than a single character at the end, because both PCRE and Perl have an optimization that allows for fast failure when a single character is used. They remember the last single character that is required for a match, and fail early if @@ -1406,7 +1406,7 @@ subpattern whose number is 10 or more us
It is not possible to have a numerical “forward back reference” to a subpattern whose number is 10 or more using this syntax because a -sequence such as `\50' is interpreted as a character defined in +sequence such as ‘\50’ is interpreted as a character defined in octal. See the subsection entitled “Non-printing characters” above for further details of the handling of digits following a backslash. There is no such problem when named parentheses are used. A @@ -1414,7 +1414,7 @@ back reference to any subpattern is poss (see below).
Another way of avoiding the ambiguity inherent in the use of digits -following a backslash is to use the `\g' escape sequence, which +following a backslash is to use the ‘\g’ escape sequence, which is a feature introduced in Perl 5.10. This escape must be followed by an unsigned number or a negative number, optionally enclosed in braces. These examples are all identical: @@ -1434,10 +1434,10 @@ reference. Consider this example: (abc(def)ghi)\g{-1} -
The sequence `\g{-1}' is a reference to the most recently -started capturing subpattern before `\g', that is, is it -equivalent to `\2'. Similarly, `\g{-2}' would be -equivalent to `\1'. The use of relative references can be helpful +
The sequence ‘\g{-1}’ is a reference to the most recently +started capturing subpattern before ‘\g’, that is, is it +equivalent to ‘\2’. Similarly, ‘\g{-2}’ would be +equivalent to ‘\1’. The use of relative references can be helpful in long patterns, and also in patterns that are created by joining together fragments that contain references within themselves. @@ -1450,8 +1450,8 @@ for a way of doing that). So the pattern (sens|respons)e and \1ibility -
matches `sense and sensibility' and `response and -responsibility', but not `sense and responsibility'. If caseful +
matches ‘sense and sensibility’ and ‘response and +responsibility’, but not ‘sense and responsibility’. If caseful matching is in force at the time of the back reference, the case of letters is relevant. For example, @@ -1459,14 +1459,14 @@ letters is relevant. For example, ((?i)rah)\s+\1 -
matches `rah rah' and `RAH RAH', but not `RAH rah', +
matches ‘rah rah’ and ‘RAH RAH’, but not ‘RAH rah’, even though the original capturing subpattern is matched caselessly.
There are several different ways of writing back references to named -subpatterns. The .NET syntax `\k{name}' and the Perl syntax -`\k<name>' or `\k'name'' are supported, as is the Python +subpatterns. The .NET syntax ‘\k{name}’ and the Perl syntax +‘\k<name>’ or ‘\k'name'’ are supported, as is the Python syntax (?P=name). Perl 5.10's unified back reference syntax, in which -`\g' can be used for both numeric and named references, is also +‘\g’ can be used for both numeric and named references, is also supported. We could rewrite the above example in any of the following ways: @@ -1488,16 +1488,16 @@ references to it always fail. For exampl (a|(bc))\2 -
always fails if it starts to match `a' rather than -`bc'. Because there may be many capturing parentheses in a +
always fails if it starts to match ‘a’ rather than +‘bc’. Because there may be many capturing parentheses in a pattern, all digits following the backslash are taken as part of a potential back reference number. If the pattern continues with a digit character, some delimiter must be used to terminate the back -reference. If the `(?x)' option is set, this can be whitespace. +reference. If the ‘(?x)’ option is set, this can be whitespace. Otherwise an empty comment (see “Comments” below) can be used.
A back reference that occurs inside the parentheses to which it refers -fails when the subpattern is first used, so, for example, `(a\1)' +fails when the subpattern is first used, so, for example, ‘(a\1)’ never matches. However, such references can be useful inside repeated subpatterns. For example, the pattern @@ -1505,7 +1505,7 @@ subpatterns. For example, the pattern (a|b\1)+ -
matches any number of `a's and also `aba', `ababbaa' +
matches any number of ‘a’s and also ‘aba’, ‘ababbaa’ etc. At each iteration of the subpattern, the back reference matches the character string corresponding to the previous iteration. In order for this to work, the pattern must be such that the first iteration @@ -1517,8 +1517,8 @@ current matching point that does not act
An assertion is a test on the characters following or preceding the current matching point that does not actually consume any -characters. The simple assertions coded as `\b', `\B', -`\A', `\G', `\Z', `\z', `^' and `$' are +characters. The simple assertions coded as ‘\b’, ‘\B’, +‘\A’, ‘\G’, ‘\Z’, ‘\z’, ‘^’ and ‘$’ are described above.
More complicated assertions are coded as subpatterns. There are two @@ -1537,8 +1537,8 @@ sense for negative assertions.
Lookahead assertions start with `(?=' for positive assertions and -`(?!' for negative assertions. For example, +
Lookahead assertions start with ‘(?=’ for positive assertions and +‘(?!’ for negative assertions. For example,
\w+(?=;) @@ -1551,35 +1551,35 @@ the match, and foo(?!bar)-
matches any occurrence of `foo' that is not followed by -`bar'. Note that the apparently similar pattern +
matches any occurrence of ‘foo’ that is not followed by +‘bar’. Note that the apparently similar pattern
(?!foo)bar-
does not find an occurrence of `bar' that is preceded by -something other than `foo'; it finds any occurrence of `bar' -whatsoever, because the assertion `(?!foo)' is always true when -the next three characters are `bar'. A lookbehind assertion is +
does not find an occurrence of ‘bar’ that is preceded by +something other than ‘foo’; it finds any occurrence of ‘bar’ +whatsoever, because the assertion ‘(?!foo)’ is always true when +the next three characters are ‘bar’. A lookbehind assertion is needed to achieve the other effect.
If you want to force a matching failure at some point in a pattern, -the most convenient way to do it is with `(?!)' because an empty +the most convenient way to do it is with ‘(?!)’ because an empty string always matches, so an assertion that requires there not to be an empty string must always fail.
Lookbehind assertions start with `(?<=' for positive assertions -and `(?<!' for negative assertions. For example, +
Lookbehind assertions start with ‘(?<=’ for positive assertions +and ‘(?<!’ for negative assertions. For example,
(?<!foo)bar-
matches an occurrence of `bar' that is not preceded by -`foo'. The contents of a lookbehind assertion are restricted such +
matches an occurrence of ‘bar’ that is not preceded by +‘foo’. The contents of a lookbehind assertion are restricted such that all the strings it matches must have a fixed length. However, if there are several top-level alternatives, they do not all have to have the same fixed length. Thus @@ -1611,7 +1611,7 @@ lengths, but it is acceptable if rewritt (?<=abc|abde) -
In some cases, the Perl 5.10 escape sequence `\K' (see above) can +
In some cases, the Perl 5.10 escape sequence ‘\K’ (see above) can be used instead of a lookbehind assertion; this is not restricted to a fixed-length. @@ -1620,10 +1620,10 @@ current position, the assertion fails. then try to match. If there are insufficient characters before the current position, the assertion fails. -
PCRE does not allow the `\C' escape (which matches a single byte +
PCRE does not allow the ‘\C’ escape (which matches a single byte in UTF-8 mode) to appear in lookbehind assertions, because it makes it -impossible to calculate the length of the lookbehind. The `\X' -and `\R' escapes, which can match different numbers of bytes, are +impossible to calculate the length of the lookbehind. The ‘\X’ +and ‘\R’ escapes, which can match different numbers of bytes, are also not permitted.
Possessive quantifiers can be used in conjunction with lookbehind @@ -1635,7 +1635,7 @@ string. Consider a simple pattern such a
when applied to a long string that does not match. Because matching -proceeds from left to right, PCRE will look for each `a' in the +proceeds from left to right, PCRE will look for each ‘a’ in the subject and then see if what follows matches the rest of the pattern. If the pattern is specified as @@ -1643,10 +1643,10 @@ pattern. If the pattern is specified as ^.*abcd$ -
the initial `.*' matches the entire string at first, but when this fails -(because there is no following `a'), it backtracks to match all +
the initial ‘.*’ matches the entire string at first, but when this fails +(because there is no following ‘a’), it backtracks to match all but the last character, then all but the last two characters, and so -on. Once again the search for `a' covers the entire string, from +on. Once again the search for ‘a’ covers the entire string, from right to left, so we are no better off. However, if the pattern is written as @@ -1654,7 +1654,7 @@ written as ^.*+(?<=abcd) -
there can be no backtracking for the `.*+' item; it can match +
there can be no backtracking for the ‘.*+’ item; it can match only the entire string. The subsequent lookbehind assertion does a single test on the last four characters. If it fails, the match fails immediately. For long strings, this approach makes a significant @@ -1668,14 +1668,14 @@ difference to the processing time. (?<=\d{3})(?<!999)foo -
matches `foo' preceded by three digits that are not -`999'. Notice that each of the assertions is applied +
matches ‘foo’ preceded by three digits that are not +‘999’. Notice that each of the assertions is applied independently at the same point in the subject string. First there is a check that the previous three characters are all digits, and then -there is a check that the same three characters are not `999'. -This pattern does not match `foo' preceded by six +there is a check that the same three characters are not ‘999’. +This pattern does not match ‘foo’ preceded by six characters, the first of which are digits and the last three of which -are not `999'. For example, it doesn't match `123abcfoo'. A +are not ‘999’. For example, it doesn't match ‘123abcfoo’. A pattern to do that is
@@ -1685,7 +1685,7 @@ assertion checks that the preceding thre-This time the first assertion looks at the preceding six characters, checking that the first three are digits, and then the second assertion checks that the preceding three characters are not -`999'. +‘999’.
Assertions can be nested in any combination. For example, @@ -1693,15 +1693,15 @@ assertion checks that the preceding thre (?<=(?<!foo)bar)baz
matches an occurrence of `baz' that is preceded by `bar' -which in turn is not preceded by `foo', while +
matches an occurrence of ‘baz’ that is preceded by ‘bar’ +which in turn is not preceded by ‘foo’, while
(?<=\d{3}(?!999)...)foo-
is another pattern that matches `foo' preceded by three digits -and any three characters that are not `999'. +
is another pattern that matches ‘foo’ preceded by three digits +and any three characters that are not ‘999’.
There are four kinds of condition: references to subpatterns, -references to recursion, a pseudo-condition called `DEFINE', and +references to recursion, a pseudo-condition called ‘DEFINE’, and assertions.
Consider the following pattern, which contains non-significant white space to make it more readable and to divide it into three parts for -ease of discussion (assume a preceding `(?x)'): +ease of discussion (assume a preceding ‘(?x)’):
( \( )? [^()]+ (?(1) \) ) @@ -1765,10 +1765,10 @@ relative reference:-Checking for a used subpattern by name
-Perl uses the syntax `(?(<name>)...)' or `(?('name')...)' to +
Perl uses the syntax ‘(?(<name>)...)’ or ‘(?('name')...)’ to test for a used subpattern by name. For compatibility with earlier versions of PCRE, which had this facility before Perl, the syntax -`(?(name)...)' is also recognized. However, there is a possible +‘(?(name)...)’ is also recognized. However, there is a possible ambiguity with this syntax, because subpattern names may consist entirely of digits. PCRE looks first for a named subpattern; if it cannot find one and the name consists entirely of digits, PCRE looks @@ -1784,10 +1784,10 @@ recommended.
Checking for pattern recursion
-If the condition is the string `(R)', and there is no subpattern -with the name `R', the condition is true if a recursive call to +
If the condition is the string ‘(R)’, and there is no subpattern +with the name ‘R’, the condition is true if a recursive call to the whole pattern or any subpattern has been made. If digits or a name -preceded by ampersand follow the letter `R', for example: +preceded by ampersand follow the letter ‘R’, for example:
(?(R3)...) or (?(R&name)...) @@ -1802,8 +1802,8 @@ patterns are described below.-Defining subpatterns for use by reference only
-If the condition is the string `(DEFINE)', and there is no -subpattern with the name `DEFINE', the condition is always +
If the condition is the string ‘(DEFINE)’, and there is no +subpattern with the name ‘DEFINE’, the condition is always false. In this case, there may be only one alternative in the subpattern. It is always skipped if control reaches this point in the pattern; the idea of DEFINE is that it can be used to define @@ -1844,18 +1844,18 @@ otherwise it is matched against the seco it tests for the presence of at least one letter in the subject. If a letter is found, the subject is matched against the first alternative; otherwise it is matched against the second. This pattern matches -strings in one of the two forms `dd-aaa-dd' or -`dd-dd-dd', where aaa are letters and +strings in one of the two forms ‘dd-aaa-dd’ or +‘dd-dd-dd’, where aaa are letters and dd are digits.
Comments
-The sequence `(?#' marks the start of a comment that continues up +
The sequence ‘(?#’ marks the start of a comment that continues up to the next closing parenthesis. Nested parentheses are not permitted. The characters that make up a comment play no part in the pattern matching at all. -
If the `(?x)' option is set, an unescaped `#' character +
If the ‘(?x)’ option is set, an unescaped ‘#’ character outside a character class introduces a comment that continues to immediately after the next newline in the pattern. @@ -1872,11 +1872,11 @@ release 5.10. PCRE and Python, this kind of recursion was introduced into Perl at release 5.10. -
A special item that consists of `(?' followed by a number greater +
A special item that consists of ‘(?’ followed by a number greater than zero and a closing parenthesis is a recursive call of the subpattern of the given number, provided that it occurs inside that subpattern. (If not, it is a subroutine call, which is described in -the next section.) The special item `(?R)' or `(?0)' is a +the next section.) The special item ‘(?R)’ or ‘(?0)’ is a recursive call of the entire regular expression.
In PCRE (like Python, but unlike Perl), a recursive subpattern call is @@ -1908,21 +1908,21 @@ tricky. This is made easier by the use o
In a larger pattern, keeping track of parenthesis numbers can be tricky. This is made easier by the use of relative references. (A Perl -5.10 feature.) Instead of `(?1)' in the pattern above you can -write `(?-2)' to refer to the second most recently opened +5.10 feature.) Instead of ‘(?1)’ in the pattern above you can +write ‘(?-2)’ to refer to the second most recently opened parentheses preceding the recursion. In other words, a negative number counts capturing parentheses leftwards from the point at which it is encountered.
It is also possible to refer to subsequently opened parentheses, by -writing references such as `(?+2)'. However, these cannot be +writing references such as ‘(?+2)’. However, these cannot be recursive because the reference is not inside the parentheses that are referenced. They are always subroutine calls, as described in the next section.
An alternative approach is to use named parentheses instead. The Perl -syntax for this is `(?&name)'; PCRE's earlier syntax -`(?P>name)' is also supported. We could rewrite the above example +syntax for this is ‘(?&name)’; PCRE's earlier syntax +‘(?P>name)’ is also supported. We could rewrite the above example as follows:
@@ -1944,7 +1944,7 @@ runs for a very long time indeed because-it fails quickly. However, if atomic grouping is not used, the match runs for a very long time indeed because there are so many different -ways the `+' and `*' repeats can carve up the subject, and +ways the ‘+’ and ‘*’ repeats can carve up the subject, and all have to be tested before failure can be reported.
At the end of a match, the values set for any capturing subpatterns @@ -1955,7 +1955,7 @@ subpattern value is set. If the pattern (ab(cd)ef)
the value for the capturing parentheses is `ef', which is the +
the value for the capturing parentheses is ‘ef’, which is the last value taken on at the top level. If additional parentheses are added, giving @@ -1978,9 +1978,9 @@ characters are permitted at the outer le < (?: (?(R) \d++ | [^<>]*+) | (?R)) * >
In this pattern, `(?(R)' is the start of a conditional +
In this pattern, ‘(?(R)’ is the start of a conditional subpattern, with two different alternatives for the recursive and -non-recursive cases. The `(?R)' item is the actual recursive +non-recursive cases. The ‘(?R)’ item is the actual recursive call.
Subpatterns as Subroutines
@@ -2003,15 +2003,15 @@ reference can be absolute or relative, a (sens|respons)e and \1ibility
matches `sense and sensibility' and `response and -responsibility', but not `sense and responsibility'. If instead +
matches ‘sense and sensibility’ and ‘response and +responsibility’, but not ‘sense and responsibility’. If instead the pattern
(sens|respons)e and (?1)ibility-
is used, it does match `sense and responsibility' as well as the +
is used, it does match ‘sense and responsibility’ as well as the other two strings. Another example is given in the discussion of DEFINE above. @@ -2029,7 +2029,7 @@ pattern: (abc)(?i:(?-1)) -
It matches `abcabc'. It does not match `abcABC' because the +
It matches ‘abcabc’. It does not match ‘abcABC’ because the change of processing option does not affect the called subpattern.
The new verbs make use of what was previously invalid syntax: an opening parenthesis followed by an asterisk. In Perl, they are -generally of the form `(*VERB:ARG)' but PCRE does not support the -use of arguments, so its general form is just `(*VERB)'. Any +generally of the form ‘(*VERB:ARG)’ but PCRE does not support the +use of arguments, so its general form is just ‘(*VERB)’. Any number of these verbs may occur in a pattern. There are two kinds:
A(A|B(*ACCEPT)|C)D-
This matches `AB', `AAD', or `ACD', but when it matches -`AB', no data is captured. +
This matches ‘AB’, ‘AAD’, or ‘ACD’, but when it matches +‘AB’, no data is captured.
(*FAIL)
or (*F)
This matches `xxaab' but not `aacaab'. It can be thought of +
This matches ‘xxaab’ but not ‘aacaab’. It can be thought of as a kind of dynamic anchor, or “I've started, so I must finish.”
(*PRUNE)
If the subject is `aaaac...', after the first match attempt fails +
If the subject is ‘aaaac...’, after the first match attempt fails (starting at the first character in the string), the starting point -skips on to start the next attempt at `c'. Note that a possessive +skips on to start the next attempt at ‘c’. Note that a possessive quantifer does not have the same effect in this example; although it would suppress backtracking during the first match attempt, the second attempt would start at the second character instead of skipping on to -`c'. +‘c’.
(*THEN)
If the `COND1' pattern matches, `FOO' is tried (and possibly -further items after the end of the group if `FOO' succeeds); on +
If the ‘COND1’ pattern matches, ‘FOO’ is tried (and possibly
+further items after the end of the group if ‘FOO’ succeeds); on
failure the matcher skips to the second alternative and tries
-`COND2', without backtracking into COND1. If (*THEN)
is used
+‘COND2’, without backtracking into COND1. If (*THEN)
is used
outside of any alternation, it acts exactly like (*PRUNE)
.
============================================================
--- docs/Regexp-Summary.html 3bd9b164c47cfebf0b14099e9ca05f3be4d2cb11
+++ docs/Regexp-Summary.html cb5173340dc7d085bb9b31fee5beb7af838d7b23
@@ -1,9 +1,9 @@
\X
`\d', `\D', `\s', `\S', `\w', and `\W' +
‘\d’, ‘\D’, ‘\s’, ‘\S’, ‘\w’, and ‘\W’ recognize only ASCII characters. -
C
Zs
Arabic,
Armenian,
@@ -221,12 +221,12 @@ Yi.
punct
space
upper
word
word
xdigit
In PCRE, POSIX character set names recognize only ASCII -characters. You can use `\Q...\E' inside a character class. +characters. You can use ‘\Q...\E’ inside a character class.
(*ACCEPT)
(*FAIL)
(*FAIL)
The following act only when a subsequent match failure causes a backtrack to @@ -390,15 +390,15 @@ pattern is not anchored.
These are recognized only at the very start of the pattern or after a -`(*BSR_...)' option. +‘(*BSR_...)’ option.
(*CR)
(*LF)
(*CRLF)
(*ANYCRLF)
(*ANY)
These are recognized only at the very start of the pattern or after a -`(*...)' option that sets the newline convention. +‘(*...)’ option that sets the newline convention.
(*BSR_ANYCRLF)
(*BSR_UNICODE)
Then the following files are created:
directory/ directory/Makefile directory/include/hello.h directory/src/hello.c -+
If you wish to checkout in the current directory, you can supply the special name . (a single period) for directory. When running checkout into an existing ============================================================ --- docs/Tutorial.html 5d27fde6085db7cb82e86332657d6e73de2dbf97 +++ docs/Tutorial.html 28141be0386259fcd886a517f7de6b6c2a04af53 @@ -1,9 +1,9 @@
$ mtn rename . new_root/put_old $ mtn rename new_root . -+
Except, of course, that these rename commands are illegal, because after the first command the tree has no root at all, and there is a directory loop. This illegality is the only reason for ============================================================ --- docs/index.html 617ce735f037a9cd24f680774150561b890a2faa +++ docs/index.html 90d19f0dee2f65ff5a94c152a3f80558fa56e6ee @@ -1,9 +1,9 @@
Then the following files are created:
directory/ directory/Makefile directory/include/hello.h directory/src/hello.c -+
If you wish to checkout in the current directory, you can
supply the special name . (a single period) for
directory. When running checkout into an existing
@@ -3525,8 +3525,8 @@ edit your current workspace to include t
$ mtn rename . new_root/put_old $ mtn rename new_root . -+
Except, of course, that these rename commands are illegal,
because after the first command the tree has no root at all, and there
is a directory loop. This illegality is the only reason for
@@ -3643,11 +3643,11 @@ branches. Supposing Alice's computer has
alice.someisp.com
, then Alice might run:
$ mtn --bind=alice.someisp.com serve -+
And Bob might run
$ mtn sync alice.someisp.com "net.venge.monotone*" -+
When the operation completes, all branches matching
net.venge.monotone*
will be synchronized between Alice and Bob's
databases.
@@ -3659,7 +3659,7 @@ with Alice again, he can simply run:
with Alice again, he can simply run:
$ mtn sync -+
Of course, he can still sync with other people and other branches by passing an address or address plus globs on the command line; this will not affect his default affinity for Alice. If you ever @@ -3773,7 +3773,7 @@ example, suppose you enter this command fa36deead87811b0e15208da2853c39d2f6ebe90 fa36b76dd0139177b28b379fe1d56b22342e5306 fa36965ec190bee14c5afcac235f1b8e2239bb2a - +
Then monotone is telling you that there are 3 revisions it knows
about, in its database, which begin with the 4 hex digits
fa36
. This command is intended to be used by programmable
@@ -3788,7 +3788,7 @@ present users with additional informatio
01f5da490941bee1f0000f0561fc62eabfb2fa23 address@hidden 2003-12-03T03:14:35
01f992577bd8bcdcade0f89e724fd5dc2d2bbe8a address@hidden 2005-05-11T05:19:29
01faad191d8d0474777c70b4d606782942333a78 address@hidden 2005-04-11T04:24:01
-
+
--unified requests the “unified diff” format, the default. --context requests the “context diff” format (analogous to -running the program diff -c). Both of these formats are +running the program diff -c). Both of these formats are generated directly by monotone, using its built-in diff algorithm.
Sometimes, you may want more flexibility in output formats; for these
@@ -3900,7 +3900,7 @@ branch:
: * tests/t_cross.at: New test for merging merges.
: * testsuite.at: Call t_cross.at.
:
-
+
$ mtn ci -m"Changed foo to bar" enter passphrase for key ID address@hidden: $ mtn push -k address@hidden -+
In the second example, monotone automatically added the key to ssh-agent, making entering the passphrase not needed during the push. @@ -4072,7 +4072,7 @@ will cache the key for you. Identity added: /home/user/.ssh/id_monotone (/home/user/.ssh/id_monotone) $ mtn ci -m"Changed foo to bar" $ mtn push -k address@hidden - +
You can also use the --ssh-sign option to control whether ssh-agent will
be used for signing. If set to yes, ssh-agent will be used to sign. If your
key has not been added to ssh-agent monotone will fall back to its internal signing
@@ -5780,7 +5780,7 @@ 3.0
4c2c1d846fa561601254200918fba1fd71e6795d @@ -6684,7 +6684,7 @@ variable as in this example: end print("Ending note_mtn_startup") end -+
function get_author(branchname, keypair_id) -- Branch name ignored. if (keypair_id == "address@hidden") then @@ -6748,7 +6748,7 @@ definitions might be: end return keypair_id end -+
edit_comment (
commentary,
user_log_message)
function persist_phrase_ok()
return true
end
-
+
use_inodeprints ()
-
Returns
true
if you want monotone to automatically enable
Inodeprints support in all workspaces. Only affects working
@@ -6788,7 +6788,7 @@ copies created after you modify the hook
function use_inodeprints()
return false
end
-
+
ignore_file (
filename)
-
Returns
true
if filename should be ignored while adding,
dropping, or moving files. Otherwise returns false
. This is
@@ -6832,7 +6832,7 @@ the configuration directory. This file l
comment "everyone can read these branches"
pattern "net.example.{public,project}*"
allow "*"
-
+
This example allows everyone access to branches net.example.project
and
net.example.public
and their sub-branches, except for the branches in
net.example.project.security
and net.example.project.private
,
@@ -6978,7 +6978,7 @@ components:
end
return argv
end
-
+
use_transport_auth (
uri)
-
Returns a boolean indicating whether monotone should use transport
authentication mechanisms when communicating with uri. If this
@@ -7004,7 +7004,7 @@ authentication assumptions.
return true
end
end
-
+
get_mtn_command(
host)
-
Returns a string containing the monotone command to be executed on
host when communicating over ssh. The host
@@ -7016,7 +7016,7 @@ monotone binary is not in the default pa
function get_mtn_command(host)
return "mtn"
end
-
+
@@ -7075,7 +7075,7 @@ the intersection of tables) is the follo
return false
end
end
-
+
In this example, any revision certificate is trusted if it is signed
by at least one of three “trusted” keys, unless it is an
branch
certificate, in which case it must be signed by
@@ -7105,7 +7105,7 @@ version carrying the old_results
+
This definition accepts only those updates which preserve the set of
true
test results from update source to target. If no test
results exist, this hook has no affect; but once a true
test
@@ -7171,8 +7171,7 @@ you have a tool specific to certain file
you have a tool specific to certain file types.
-
-merge3 (
ancestor_path,
left_path,
right_path,
merged_path,
ancestor_text,
left_text,
right_text)
-
+
merge3 (
ancestor_path,
left_path,
right_path,
merged_path,
ancestor_text,
left_text,
right_text)
-
This hook is called to resolve merges that monotone could not resolve
automatically. The actual ancestor, left, and right contents of the
file are passed in the ancestor_text, left_text, and
@@ -7194,8 +7193,7 @@ local system. For details, see the code
and if not, then simply searches for whatever is installed on the
local system. For details, see the code in Default hooks.
-
get_preferred_merge3_command(
tbl)
-
+
get_preferred_merge3_command(
tbl)
-
Returns the results of running an external merge on three strings.
tbl wraps up the various arguments for each merge command and
is always provided by merge3. If there is a particular editor
@@ -7268,7 +7266,7 @@ attribute. Its definition is:
make_executable(filename)
end
end
-
+
attr_init_functions [
attribute] (
filename)
-
This is not a hook function, but a table of hook
functions. Each entry in the table
attr_init_functions
, at
@@ -7297,7 +7295,7 @@ definition is:
return nil
end
end
-
+
The binary_file
function is also defined as a Lua hook. See
Default hooks.
@@ -7358,7 +7356,7 @@ more of the following characters:
0x00 thru 0x06
0x0E thru 0x1a
0x1c thru 0x1f
-
+
include(
scriptfile)
-
This function tries to load and execute the script contained into
scriptfile. It returns true for success and false if there is an
@@ -7438,7 +7436,7 @@ the arguments.
thingy "baz"
spork
frob "oops"
-
+
The output table will be:
{
1 = { name = "thingy", values = { 1 = "foo", 2 = "bar" } },
@@ -7446,7 +7444,7 @@ the arguments.
3 = { name = "spork", values = { } },
4 = { name = "frob", values = { 1 = "oops" } }
}
-
+
regex.search(
regexp,
string)
-
Returns true if a match for regexp is found in str, return
false otherwise. See Regexps, for the syntax of regexp.
@@ -7584,7 +7582,7 @@ an IDNA string is this:
an IDNA string is this:
{ACE-prefix}{LDH-sanitized(punycode(nameprep(UTF-8-string)))}
-
+
It is important to understand that IDNA encoding does not
preserve the input string: it both prohibits a wide variety of
possible strings and normalizes non-equal strings to supposedly
@@ -8880,10 +8878,10 @@ used in Monotone.
\X
- an extended Unicode sequence
-`\d', `\D', `\s', `\S', `\w', and `\W'
+
‘\d’, ‘\D’, ‘\s’, ‘\S’, ‘\w’, and ‘\W’
recognize only ASCII characters.
-
General category property codes for `\p' and `\P'
+General category property codes for ‘\p’ and ‘\P’
C
- Other
@@ -8932,7 +8930,7 @@ recognize only ASCII characters.
Zs
- Space separator
-Script names for `\p' and `\P'
+Script names for ‘\p’ and ‘\P’
Arabic,
Armenian,
@@ -9022,12 +9020,12 @@ Yi.
punct
- printing, excluding alphanumeric
space
- whitespace
upper
- upper case letter
-
word
- same as `\w'
+
word
- same as ‘\w’
xdigit
- hexadecimal digit
In PCRE, POSIX character set names recognize only ASCII
-characters. You can use `\Q...\E' inside a character class.
+characters. You can use ‘\Q...\E’ inside a character class.
Quantifiers
@@ -9173,7 +9171,7 @@ characters. You can use `
(*ACCEPT)
- force successful match
-
(*FAIL)
- force backtrack; synonym `(*F)'
+
(*FAIL)
- force backtrack; synonym ‘(*F)’
The following act only when a subsequent match failure causes a backtrack to
@@ -9191,15 +9189,15 @@ pattern is not anchored.
Newline Conventions
These are recognized only at the very start of the pattern or after a
-`(*BSR_...)' option.
+‘(*BSR_...)’ option.
(*CR)
(*LF)
(*CRLF)
(*ANYCRLF)
(*ANY)
-What `\R' Matches
+What ‘\R’ Matches
These are recognized only at the very start of the pattern or after a
-`(*...)' option that sets the newline convention.
+‘(*...)’ option that sets the newline convention.
(*BSR_ANYCRLF)
(*BSR_UNICODE)
@@ -9280,7 +9278,7 @@ brackets, the metacharacters are as foll
|
- start of alternative branch
(
- start subpattern
)
- end subpattern
-
?
- extends the meaning of `('
+
?
- extends the meaning of ‘(’
also 0 or 1 quantifier
also quantifier minimizer
*
- 0 or more quantifier
@@ -9310,22 +9308,22 @@ applies both inside and outside characte
that character may have. This use of backslash as an escape character
applies both inside and outside character classes.
-
For example, if you want to match a `*' character, you write
-`\*' in the pattern. This escaping action applies whether or not
+
For example, if you want to match a ‘*’ character, you write
+‘\*’ in the pattern. This escaping action applies whether or not
the following character would otherwise be interpreted as a
metacharacter, so it is always safe to precede a non-alphanumeric with
backslash to specify that it stands for itself. In particular, if you
-want to match a backslash, you write `\\'.
+want to match a backslash, you write ‘\\’.
-
If a pattern is compiled with the `(?x)' option, whitespace in
+
If a pattern is compiled with the ‘(?x)’ option, whitespace in
the pattern (other than in a character class) and characters between a
-`#' outside a character class and the next newline are
+‘#’ outside a character class and the next newline are
ignored. An escaping backslash can be used to include a whitespace or
-`#' character as part of the pattern.
+‘#’ character as part of the pattern.
If you want to remove the special meaning from a sequence of
-characters, you can do so by putting them between `\Q' and
-`\E'. The `\Q...\E' sequence is recognized both inside and
+characters, you can do so by putting them between ‘\Q’ and
+‘\E’. The ‘\Q...\E’ sequence is recognized both inside and
outside character classes.
Non-printing Characters
@@ -9350,36 +9348,36 @@ represents:
\x{
hhh...}
- character with hex code hhh...
-
The precise effect of `\cx' is as follows: if x is a lower
+
The precise effect of ‘\cx’ is as follows: if x is a lower
case letter, it is converted to upper case. Then bit 6 of the
-character (hex 40) is inverted. Thus `\cz' becomes hex 1A (the
-<SUB> control character, in ASCII), but `\c{' becomes hex 3B
-(`;'), and `\c;' becomes hex 7B (`{').
+character (hex 40) is inverted. Thus ‘\cz’ becomes hex 1A (the
+<SUB> control character, in ASCII), but ‘\c{’ becomes hex 3B
+(‘;’), and ‘\c;’ becomes hex 7B (‘{’).
-
After `\x', from zero to two hexadecimal digits are read (letters
+
After ‘\x’, from zero to two hexadecimal digits are read (letters
can be in upper or lower case). Any number of hexadecimal digits may
-appear between `\x{' and `}', but the value of the
+appear between ‘\x{’ and ‘}’, but the value of the
character code must be less than 256 in non-UTF-8 mode, and less than
231in UTF-8 mode. That is, the maximum value in hexadecimal is
7FFFFFFF. Note that this is bigger than the largest Unicode code
point, which is 10FFFF.
-
If characters other than hexadecimal digits appear between `\x{'
-and `}', or if there is no terminating `}', this form of
-escape is not recognized. Instead, the initial `\x' will be
+
If characters other than hexadecimal digits appear between ‘\x{’
+and ‘}’, or if there is no terminating ‘}’, this form of
+escape is not recognized. Instead, the initial ‘\x’ will be
interpreted as a basic hexadecimal escape, with no following digits,
giving a character whose value is zero.
Characters whose value is less than 256 can be defined by either of
-the two syntaxes for `\x'. There is no difference in the way they
-are handled. For example, `\xdc' is exactly the same as
-`\x{dc}'.
+the two syntaxes for ‘\x’. There is no difference in the way they
+are handled. For example, ‘\xdc’ is exactly the same as
+‘\x{dc}’.
-
After `\0' up to two further octal digits are read. If there are
+
After ‘\0’ up to two further octal digits are read. If there are
fewer than two digits, just those that are present are used. Thus the
-sequence `\0\x\07' specifies two binary zeros followed by a
+sequence ‘\0\x\07’ specifies two binary zeros followed by a
<BEL> character (octal 007). Make sure you supply two digits after
the initial zero if the pattern character that follows is itself an
octal digit.
@@ -9397,8 +9395,8 @@ themselves. In non-UTF-8 mode, the value
up to three octal digits following the backslash, and uses them to
generate a data character. Any subsequent digits stand for
themselves. In non-UTF-8 mode, the value of a character specified in
-octal must be less than `\400'. In UTF-8 mode, values up to
-`\777' are permitted. For example:
+octal must be less than ‘\400’. In UTF-8 mode, values up to
+‘\777’ are permitted. For example:
\040
- is another way of writing a space
@@ -9408,13 +9406,13 @@ octal must be less than `
\11
- might be a back reference, or another way of
writing a tab
\011
- is always a tab
-
\0113
- is a tab followed by the character `3'
+
\0113
- is a tab followed by the character ‘3’
\113
- might be a back reference, otherwise the
character with octal code 113
\377
- might be a back reference, otherwise
the byte consisting entirely of 1 bits
\81
- is either a back reference, or a binary zero
- followed by the two characters `8' and `1'
+ followed by the two characters ‘8’ and ‘1’
Note that octal values of 100 or greater must not be introduced by a
@@ -9422,18 +9420,18 @@ both inside and outside character classe
All the sequences that define a single character value can be used
both inside and outside character classes. In addition, inside a
-character class, the sequence `\b' is interpreted as the <BS>
-character (hex 08), and the sequences `\R' and `\X' are
-interpreted as the characters `R' and `X',
+character class, the sequence ‘\b’ is interpreted as the <BS>
+character (hex 08), and the sequences ‘\R’ and ‘\X’ are
+interpreted as the characters ‘R’ and ‘X’,
respectively. Outside a character class, these sequences have
different meanings (see below).
Absolute and Relative Back References
-The sequence `\g' followed by an unsigned or a negative number,
+
The sequence ‘\g’ followed by an unsigned or a negative number,
optionally enclosed in braces, is an absolute or relative back
reference. A named back reference can be coded as
-`\g{name}'. Back references are discussed later, following the
+‘\g{name}’. Back references are discussed later, following the
discussion of parenthesized subpatterns.
Generic character types
@@ -9463,18 +9461,18 @@ string, all of them fail, since there is
type. If the current matching point is at the end of the subject
string, all of them fail, since there is no character to match.
- For compatibility with Perl, `\s' does not match the <VT>
+
For compatibility with Perl, ‘\s’ does not match the <VT>
character (code 11). This makes it different from the the POSIX
-“space” class. The `\s' characters are <TAB> (9), <LF>
+“space” class. The ‘\s’ characters are <TAB> (9), <LF>
(10), <FF> (12), <CR> (13), and <SPACE> (32).
In UTF-8 mode, characters with values greater than 128 never match
-`\d', `\s', or `\w', and always match `\D',
-`\S', and `\W'. These sequences retain their original
+‘\d’, ‘\s’, or ‘\w’, and always match ‘\D’,
+‘\S’, and ‘\W’. These sequences retain their original
meanings from before UTF-8 support was available, mainly for
efficiency reasons.
-
The sequences `\h', `\H', `\v', and `\V' are Perl
+
The sequences ‘\h’, ‘\H’, ‘\v’, and ‘\V’ are Perl
5.10 features. In contrast to the other sequences, these do match
certain high-valued codepoints in UTF-8 mode. The horizontal space
characters are:
@@ -9541,23 +9539,23 @@ the following five sequences:
(*CR)a.b
-
changes the convention to CR. That pattern matches `a\nb' because
+
changes the convention to CR. That pattern matches ‘a\nb’ because
LF is no longer a newline. Note that these special settings, which are
not Perl-compatible, are recognized only at the very start of a
pattern, and that they must be in upper case. If more than one of them
is present, the last one is used.
-
The newline convention does not affect what the `\R' escape
+
The newline convention does not affect what the ‘\R’ escape
sequence matches. By default, this is any Unicode newline sequence,
for Perl compatibility. However, this can be changed; see the
-description of `\R' below. A change of `\R' setting can be
+description of ‘\R’ below. A change of ‘\R’ setting can be
combined with a change of newline convention.
Newline Sequences
-Outside a character class, by default, the escape sequence `\R' matches
+
Outside a character class, by default, the escape sequence ‘\R’ matches
any Unicode newline sequence. This is a Perl 5.10 feature. In
-non-UTF-8 mode `\R' is equivalent to the following:
+non-UTF-8 mode ‘\R’ is equivalent to the following:
(?>\r\n|\n|\x0b|\f|\r|\x85)
@@ -9574,7 +9572,7 @@ and <PS> (paragraph separator, U+2028
)
and <PS> (paragraph separator, U+2029
).
- It is possible to change the meaning of `\R' by starting a +
It is possible to change the meaning of ‘\R’ by starting a pattern string with one of the following sequences:
Inside a character class, `\R' matches the letter `R'. +
Inside a character class, ‘\R’ matches the letter ‘R’.
The property names represented by xx above are limited to the -Unicode script names, the general category properties, and `Any', +Unicode script names, the general category properties, and ‘Any’, which matches any character (including newline). Other properties such -as `InMusicalSymbols' are not currently supported by PCRE. Note -that `\P{Any}' does not match any characters, so always causes +as ‘InMusicalSymbols’ are not currently supported by PCRE. Note +that ‘\P{Any}’ does not match any characters, so always causes a match failure.
Sets of Unicode characters are defined as belonging to certain @@ -9696,9 +9694,9 @@ by including a circumflex between the op
Each character has exactly one general category property, specified by a two-letter abbreviation. For compatibility with Perl, negation can be specified by including a circumflex between the opening brace and the property name. For -example, `\p{^Lu}' is the same as `\P{Lu}'. +example, ‘\p{^Lu}’ is the same as ‘\P{Lu}’. -
If only one letter is specified with `\p' or `\P', it +
If only one letter is specified with ‘\p’ or ‘\P’, it includes all the general category properties that start with that letter. In this case, in the absence of negation, the curly brackets in the escape sequence are optional; these two examples have the same @@ -9757,28 +9755,28 @@ effect:
Zs
The special property `L&' is also supported: it matches a -character that has the `Lu', `Ll', or `Lt' property, in +
The special property ‘L&’ is also supported: it matches a +character that has the ‘Lu’, ‘Ll’, or ‘Lt’ property, in other words, a letter that is not classified as a modifier or “other.” -
The `Cs' (Surrogate) property applies only to characters in the +
The ‘Cs’ (Surrogate) property applies only to characters in the
range U+D800
to U+DFFF
. Such characters are not valid in
UTF-8 strings (see RFC 3629) and so cannot be tested by PCRE.
The long synonyms for these properties that Perl supports (such as -`\p{Letter}') are not supported by PCRE, nor is it permitted to -prefix any of these properties with `Is'. +‘\p{Letter}’) are not supported by PCRE, nor is it permitted to +prefix any of these properties with ‘Is’. -
No character that is in the Unicode table has the `Cn' +
No character that is in the Unicode table has the ‘Cn’ (unassigned) property. Instead, this property is assumed for any code point that is not in the Unicode table.
Specifying caseless matching does not affect these escape sequences. For -example, `\p{Lu}' always matches only upper case letters. +example, ‘\p{Lu}’ always matches only upper case letters. -
The `\X' escape matches any number of Unicode characters that -form an extended Unicode sequence. `\X' is equivalent to +
The ‘\X’ escape matches any number of Unicode characters that +form an extended Unicode sequence. ‘\X’ is equivalent to
(?>\PM\pM*) @@ -9789,16 +9787,16 @@ character. None of them have codepoints treats the sequence as an atomic group (see below). Characters with the “mark” property are typically accents that affect the preceding character. None of them have codepoints less than 256, so in non-UTF-8 -mode `\X' matches any one character. +mode ‘\X’ matches any one character.-Matching characters by Unicode property is not fast, because PCRE has to search a structure that contains data for over fifteen thousand characters. That is why the traditional escape sequences such as -`\d' and `\w' do not use Unicode properties in PCRE. +‘\d’ and ‘\w’ do not use Unicode properties in PCRE.
Resetting the Match Start
-The escape sequence `\K', which is a Perl 5.10 feature, causes +
The escape sequence ‘\K’, which is a Perl 5.10 feature, causes any previously matched characters not to be included in the final matched sequence. For example, the pattern: @@ -9806,18 +9804,18 @@ matched sequence. For example, the patte foo\Kbar
matches `foobar', but reports that it has matched -`bar'. This feature is similar to a lookbehind assertion +
matches ‘foobar’, but reports that it has matched +‘bar’. This feature is similar to a lookbehind assertion (described below). However, in this case, the part of the subject before the real match does not have to be of fixed length, as -lookbehind assertions do. The use of `\K' does not interfere with the +lookbehind assertions do. The use of ‘\K’ does not interfere with the setting of captured substrings. For example, when the pattern
(foo)\Kbar-
matches `foobar', the first substring is still set to `foo'. +
matches ‘foobar’, the first substring is still set to ‘foo’.
These assertions may not appear in character classes (but note that -`\b' has a different meaning, namely the backspace character, +‘\b’ has a different meaning, namely the backspace character, inside a character class).
A word boundary is a position in the subject string where the current -character and the previous character do not both match `\w' or -`\W' (i.e. one matches `\w' and the other matches -`\W'), or the start or end of the string if the first or last -character matches `\w', respectively. +character and the previous character do not both match ‘\w’ or +‘\W’ (i.e. one matches ‘\w’ and the other matches +‘\W’), or the start or end of the string if the first or last +character matches ‘\w’, respectively. -
The `\A', `\Z', and `\z' assertions differ from the +
The ‘\A’, ‘\Z’, and ‘\z’ assertions differ from the traditional circumflex and dollar (described in the next section) in that they only ever match at the very start and end of the subject string, whatever options are set. Thus, they are independent of -multiline mode. The difference between `\Z' and `\z' is that -`\Z' matches before a newline at the end of the string as well as -at the very end, whereas `\z' matches only at the end. +multiline mode. The difference between ‘\Z’ and ‘\z’ is that +‘\Z’ matches before a newline at the end of the string as well as +at the very end, whereas ‘\z’ matches only at the end. -
The `\G' assertion is true only when the current matching +
The ‘\G’ assertion is true only when the current matching position is at the start point of the match. As used in Monotone, -`\G' is always equal to `\A'. +‘\G’ is always equal to ‘\A’.
Outside a character class, in the default matching mode, the -circumflex character, `^', is an assertion that is true only if +circumflex character, ‘^’, is an assertion that is true only if the current matching point is at the start of the subject string. Inside a character class, circumflex has an entirely different meaning (see below). @@ -9875,7 +9873,7 @@ other constructs that can cause a patter subject, it is said to be an “anchored” pattern. (There are also other constructs that can cause a pattern to be anchored.) -
A dollar character, `$', is an assertion that is true only if the +
A dollar character, ‘$’, is an assertion that is true only if the current matching point is at the end of the subject string, or immediately before a newline at the end of the string (by default). Dollar need not be the last character of the pattern if a @@ -9884,23 +9882,23 @@ character class. character class.
The meanings of the circumflex and dollar characters are changed if -the `(?m)' option is set. When this is the case, a circumflex +the ‘(?m)’ option is set. When this is the case, a circumflex matches immediately after internal newlines as well as at the start of the subject string. It does not match after a newline that ends the string. A dollar matches before any newlines in the string, as well as -at the very end, when `(?m)' is set. When newline is specified as +at the very end, when ‘(?m)’ is set. When newline is specified as the two-character sequence <CR><LF>, isolated <CR> and <LF> characters do not indicate newlines. -
For example, the pattern `^abc$' matches the subject string -`def\nabc' (where `\n' represents a newline) in multiline +
For example, the pattern ‘^abc$’ matches the subject string +‘def\nabc’ (where ‘\n’ represents a newline) in multiline mode, but not otherwise. Consequently, patterns that are anchored in single line mode because all branches start with ^ are not anchored in multiline mode. -
Note that the sequences `\A', `\Z', and `\z' can be +
Note that the sequences ‘\A’, ‘\Z’, and ‘\z’ can be used to match the start and end of the subject in both modes, and if -all branches of a pattern start with `\A' it is always anchored. +all branches of a pattern start with ‘\A’ it is always anchored.
The behaviour of dot with regard to newlines can be changed. If the -`(?s)' option is set, a dot matches any one character, without +‘(?s)’ option is set, a dot matches any one character, without exception. If the two-character sequence <CR><LF> is present in the subject string, it takes two dots to match it. @@ -9928,15 +9926,15 @@ special meaning in a character class.
Outside a character class, the escape sequence `\C' matches any +
Outside a character class, the escape sequence ‘\C’ matches any one byte, both in and out of UTF-8 mode. Unlike a dot, it always matches any line-ending characters. The feature is provided in Perl in order to match individual bytes in UTF-8 mode. Because it breaks up UTF-8 characters into individual bytes, what remains in the string may -be a malformed UTF-8 string. For this reason, the `\C' escape +be a malformed UTF-8 string. For this reason, the ‘\C’ escape sequence is best avoided. -
PCRE does not allow `\C' to appear in lookbehind assertions +
PCRE does not allow ‘\C’ to appear in lookbehind assertions (described below), because in UTF-8 mode this would make it impossible to calculate the length of the lookbehind. @@ -9956,8 +9954,8 @@ is not the first character, or escape it a circumflex is actually required as a member of the class, ensure it is not the first character, or escape it with a backslash. -
For example, the character class `[aeiou]' matches any lower case -vowel, while `[^aeiou]' matches any character that is not a lower +
For example, the character class ‘[aeiou]’ matches any lower case +vowel, while ‘[^aeiou]’ matches any character that is not a lower case vowel. Note that a circumflex is just a convenient notation for specifying the characters that are in the class by enumerating those that are not. A class that starts with a circumflex is not an @@ -9965,13 +9963,13 @@ therefore it fails if the current pointe therefore it fails if the current pointer is at the end of the string.
In UTF-8 mode, characters with values greater than 255 can be included -in a class as a literal string of bytes, or by using the `\x{' +in a class as a literal string of bytes, or by using the ‘\x{’ escaping mechanism.
When caseless matching is set, any letters in a class represent both their upper case and lower case versions, so for example, a caseless -`[aeiou]' matches `A' as well as `a', and a caseless [^aeiou] -does not match `A', whereas a caseful version would. In UTF-8 mode, +‘[aeiou]’ matches ‘A’ as well as ‘a’, and a caseless [^aeiou] +does not match ‘A’, whereas a caseful version would. In UTF-8 mode, PCRE always understands the concept of case for characters whose values are less than 128, so caseless matching is always possible. For characters with higher values, the concept of case is supported if @@ -9982,43 +9980,43 @@ special way when matching character clas
Characters that might indicate line breaks are never treated in any special way when matching character classes, whatever line-ending -sequence is in use, and whatever setting of the `(?s)' and -`(?m)' options is used. A class such as `[^a]' always +sequence is in use, and whatever setting of the ‘(?s)’ and +‘(?m)’ options is used. A class such as ‘[^a]’ always matches one of these characters.
The minus (hyphen) character can be used to specify a range of -characters in a character class. For example, `[d-m]' matches any -letter between `d' and `m', inclusive. If a minus character +characters in a character class. For example, ‘[d-m]’ matches any +letter between ‘d’ and ‘m’, inclusive. If a minus character is required in a class, it must be escaped with a backslash or appear in a position where it cannot be interpreted as indicating a range, typically as the first or last character in the class. -
It is not possible to have the literal character `]' as the end -character of a range. A pattern such as `[W-]46]' is interpreted -as a class of two characters (`W' and `-') followed by a -literal string `46]', so it would match `W46]' or -`-46]'. However, if the `]' is escaped with a backslash it -is interpreted as the end of range, so `[W-\]46]' is interpreted +
It is not possible to have the literal character ‘]’ as the end +character of a range. A pattern such as ‘[W-]46]’ is interpreted +as a class of two characters (‘W’ and ‘-’) followed by a +literal string ‘46]’, so it would match ‘W46]’ or +‘-46]’. However, if the ‘]’ is escaped with a backslash it +is interpreted as the end of range, so ‘[W-\]46]’ is interpreted as a class containing a range followed by two other characters. The -octal or hexadecimal representation of `]' can also be used to +octal or hexadecimal representation of ‘]’ can also be used to end a range.
Ranges operate in the collating sequence of character values. They can also be used for characters specified numerically, for example -`[\000-\037]'. In UTF-8 mode, ranges can include characters whose -values are greater than 255, for example `[\x{100}-\x{2ff}]'. +‘[\000-\037]’. In UTF-8 mode, ranges can include characters whose +values are greater than 255, for example ‘[\x{100}-\x{2ff}]’.
If a range that includes letters is used when caseless matching is -set, it matches the letters in either case. For example, `[W-c]' -is equivalent to `[][\\^_`wxyzabc]', matched caselessly. +set, it matches the letters in either case. For example, ‘[W-c]’ +is equivalent to ‘[][\\^_`wxyzabc]’, matched caselessly. -
The character types `\d', `\D', `\p', `\P', -`\s', `\S', `\w', and `\W' may also appear in a +
The character types ‘\d’, ‘\D’, ‘\p’, ‘\P’, +‘\s’, ‘\S’, ‘\w’, and ‘\W’ may also appear in a character class, and add the characters that they match to the -class. For example, `[\dABCDEF]' matches any hexadecimal digit. A +class. For example, ‘[\dABCDEF]’ matches any hexadecimal digit. A circumflex can conveniently be used with the upper case character types to specify a more restricted set of characters than the matching -lower case type. For example, the class `[^\W_]' matches any +lower case type. For example, the class ‘[^\W_]’ matches any letter or digit, but not underscore.
The only metacharacters that are recognized in character classes are @@ -10031,14 +10029,14 @@ escaping other non-alphanumeric characte
Perl supports the POSIX notation for character classes. This uses -names enclosed by `[:' and `:]' within the enclosing square +names enclosed by ‘[:’ and ‘:]’ within the enclosing square brackets. PCRE also supports this notation. For example,
[01[:alpha:]%]-
matches `0', `1', any alphabetic character, or `%'. The +
matches ‘0’, ‘1’, any alphabetic character, or ‘%’. The supported class names are
ascii
blank
cntrl
digit
digit
graph
lower
print
punct
space
space
upper
word
word
xdigit
The “space” characters are <HT> (9), <LF> (10), <VT> (11), <FF> (12), <CR> (13), and space (32). Notice that this list includes the <VT> character (code 11). This makes "space" -different to `\s', which does not include <VT> (for Perl +different to ‘\s’, which does not include <VT> (for Perl compatibility).
The name “word” is a Perl extension, and “blank” is a GNU extension from Perl 5.8. Another Perl extension is negation, which is -indicated by a `^' character after the colon. For example, +indicated by a ‘^’ character after the colon. For example,
[12[:^digit:]]-
matches `1', `2', or any non-digit. PCRE (and Perl) also -recognize the POSIX syntax `[.ch.]' and `[=ch=]' +
matches ‘1’, ‘2’, or any non-digit. PCRE (and Perl) also +recognize the POSIX syntax ‘[.ch.]’ and ‘[=ch=]’ where ch is a “collating element,” but these are not supported, and an error is given if they are encountered. @@ -10089,7 +10087,7 @@ example, the pattern gilbert|sullivan -
matches either `gilbert' or `sullivan'. Any number of +
matches either ‘gilbert’ or ‘sullivan’. Any number of alternatives may appear, and an empty alternative is permitted (matching the empty string). The matching process tries each alternative in turn, from left to right, and the first one that @@ -10100,13 +10098,13 @@ as the alternative in the subpattern.
The behavior of the matching engine can be adjusted from within the -pattern by a sequence of option letters enclosed between `(?' and -`)'. The option letters are +pattern by a sequence of option letters enclosed between ‘(?’ and +‘)’. The option letters are
i
m
m
s
x
For example, `(?im)' sets caseless, multiline matching. It is +
For example, ‘(?im)’ sets caseless, multiline matching. It is also possible to unset these options by preceding the letters with a -hyphen, and a combined setting and unsetting such as `(?im-sx)' +hyphen, and a combined setting and unsetting such as ‘(?im-sx)’ is also permitted. (This would set the caseless and multiline options while unsetting the dotall and extended-syntax options.) If a letter appears both before and after the hyphen, the option is unset. The @@ -10136,7 +10134,7 @@ current pattern that follows it, so (a(?i)b)c -
matches `abc' and `aBc' and no other strings. By this +
matches ‘abc’ and ‘aBc’ and no other strings. By this means, options can be made to have different settings in different parts of the pattern. Any changes made in one alternative do carry on into subsequent branches within the same subpattern. For example, @@ -10145,14 +10143,14 @@ into subsequent branches within the same (a(?i)b|c) -
matches `ab', `aB', `c', and `C', even though when -matching `C' the first branch is abandoned before the option +
matches ‘ab’, ‘aB’, ‘c’, and ‘C’, even though when +matching ‘C’ the first branch is abandoned before the option setting. This is because the effects of option settings happen when the pattern is parsed. There would be some very weird behaviour otherwise.
Note: Unlike these options, the similar, PCRE-specific option -sequences that start with `(*' may appear only at the very +sequences that start with ‘(*’ may appear only at the very beginning of the pattern. Details of these sequences are given in the section entitled “Newline sequences,” above. @@ -10169,9 +10167,9 @@ things: cat(aract|erpillar|) -
matches one of the words `cat', `cataract', or -`caterpillar'. Without the parentheses, it would match -`cataract', `erpillar' or an empty string. +
matches one of the words ‘cat’, ‘cataract’, or +‘caterpillar’. Without the parentheses, it would match +‘cataract’, ‘erpillar’ or an empty string.
For example, if the string `the red king' is matched against the pattern +
For example, if the string ‘the red king’ is matched against the pattern
the ((red|white) (king|queen))-
the captured substrings are `red king', `red', and -`king', and are numbered 1, 2, and 3, respectively. +
the captured substrings are ‘red king’, ‘red’, and +‘king’, and are numbered 1, 2, and 3, respectively.
The fact that plain parentheses fulfil two functions is not always @@ -10196,20 +10194,20 @@ capturing, and is not counted when compu without a capturing requirement. If an opening parenthesis is followed by a question mark and a colon, the subpattern does not do any capturing, and is not counted when computing the number of any -subsequent capturing subpatterns. For example, if the string `the -white queen' is matched against the pattern +subsequent capturing subpatterns. For example, if the string ‘the +white queen’ is matched against the pattern
the ((?:red|white) (king|queen))-
the captured substrings are `white queen' and `queen', and +
the captured substrings are ‘white queen’ and ‘queen’, and are numbered 1 and 2. The maximum number of capturing subpatterns is 65535.
As a convenient shorthand, if any option settings are required at the start of a non-capturing subpattern, the option letters may appear -between the `?' and the `:'. Thus the two patterns +between the ‘?’ and the ‘:’. Thus the two patterns
(?i:saturday|sunday) @@ -10219,26 +10217,26 @@ of the subpattern is reached, an optionmatch exactly the same set of strings. Because alternative branches are tried from left to right, and options are not reset until the end of the subpattern is reached, an option setting in one branch does -affect subsequent branches, so the above patterns match `SUNDAY' -as well as `Saturday'. +affect subsequent branches, so the above patterns match ‘SUNDAY’ +as well as ‘Saturday’.
Duplicate Subpattern Numbers
Perl 5.10 introduced a feature whereby each alternative in a subpattern uses the same numbers for its capturing parentheses. Such a -subpattern starts with `(?|' and is itself a non-capturing +subpattern starts with ‘(?|’ and is itself a non-capturing subpattern. For example, consider this pattern:
(?|(Sat)ur|(Sun))day-Because the two alternatives are inside a `(?|' group, both sets +
Because the two alternatives are inside a ‘(?|’ group, both sets of capturing parentheses are numbered one. Thus, when the pattern matches, you can look at captured substring number one, whichever alternative matched. This construct is useful when you want to capture part, but not all, of one of a number of alternatives. Inside a -`(?|' group, parentheses are numbered as usual, but the number is +‘(?|’ group, parentheses are numbered as usual, but the number is reset at the start of each branch. The numbers of any capturing buffers that follow the subpattern start after the highest number used in any branch. The following example is taken from the Perl @@ -10269,8 +10267,8 @@ and the Python syntax. and the Python syntax.
In PCRE, a subpattern can be named in one of three ways: -`(?<name>...)' or `(?'name'...)' as in Perl, or -`(?P<name>...)' as in Python. References to capturing +‘(?<name>...)’ or ‘(?'name'...)’ as in Perl, or +‘(?P<name>...)’ as in Python. References to capturing parentheses from other parts of the pattern, such as backreferences, recursion, and conditions, can be made by name as well as by number. @@ -10279,7 +10277,7 @@ as well as names, exactly as if the name as well as names, exactly as if the names were not present.
By default, a name must be unique within a pattern, but it is possible -to relax this constraint by setting the `(?J)' option. This can +to relax this constraint by setting the ‘(?J)’ option. This can be useful for patterns where only one instance of the named parentheses can match. Suppose you want to match the name of a weekday, either as a 3-letter abbreviation or as the full name, and in @@ -10307,10 +10305,10 @@ the following items:
matches `zz', `zzz', or `zzzz'. A closing brace on its +
matches ‘zz’, ‘zzz’, or ‘zzzz’. A closing brace on its own is not a special character. If the second number is omitted, but the comma is present, there is no upper limit; if the second number and the comma are both omitted, the quantifier specifies an exact @@ -10345,17 +10343,17 @@ the syntax of a quantifier, is taken as
matches exactly 8 digits. An opening curly bracket that appears in a position where a quantifier is not allowed, or one that does not match the syntax of a quantifier, is taken as a literal character. For -example, `{,6}' is not a quantifier, but a literal string of four +example, ‘{,6}’ is not a quantifier, but a literal string of four characters.
In UTF-8 mode, quantifiers apply to UTF-8 characters rather than to -individual bytes. Thus, for example, `\x{100}{2}' matches two +individual bytes. Thus, for example, ‘\x{100}{2}’ matches two UTF-8 characters, each of which is represented by a two-byte -sequence. Similarly, `\X{3}' matches three Unicode extended +sequence. Similarly, ‘\X{3}’ matches three Unicode extended sequences, each of which may be several bytes long (and they may be of different lengths). -
The quantifier `{0}' is permitted, causing the expression to +
The quantifier ‘{0}’ is permitted, causing the expression to behave as if the previous item and the quantifier were not present.
For convenience, the three most common quantifiers have @@ -10384,8 +10382,8 @@ of where this gives problems is in tryin much as possible (up to the maximum number of permitted times), without causing the rest of the pattern to fail. The classic example of where this gives problems is in trying to match comments in C -programs. These appear between `/*' and `*/', and within the -comment, individual `*' and `/' characters may appear. An +programs. These appear between ‘/*’ and ‘*/’, and within the +comment, individual ‘*’ and ‘/’ characters may appear. An attempt to match C comments by applying the pattern
@@ -10399,7 +10397,7 @@ attempt to match C comments by applying
fails, because it matches the entire string owing to the greediness of -the `.*' item. +the ‘.*’ item.
However, if a quantifier is followed by a question mark, it ceases to be greedy, and instead matches the minimum number of times possible, @@ -10422,7 +10420,7 @@ way the rest of the pattern matches.
which matches one digit by preference, but can match two if that is the only way the rest of the pattern matches. -
If the `(?U)' option is set (an option that is not available in +
If the ‘(?U)’ option is set (an option that is not available in Perl), the quantifiers are not greedy by default, but individual ones can be made greedy by following them with a question mark. In other words, it inverts the default behaviour. @@ -10431,17 +10429,17 @@ compiled pattern, in proportion to the s is greater than 1 or with a limited maximum, more memory is required for the compiled pattern, in proportion to the size of the minimum or maximum. -
If a pattern starts with `.*' or `.{0,}' and the -`(?s)' option is set, thus allowing the dot to match newlines, +
If a pattern starts with ‘.*’ or ‘.{0,}’ and the +‘(?s)’ option is set, thus allowing the dot to match newlines, the pattern is implicitly anchored, because whatever follows will be tried against every character position in the subject string, so there is no point in retrying the overall match at any position after the first. PCRE normally treats such a pattern as though it were preceded -by `\A'. +by ‘\A’.
In cases where it is known that the subject string contains no -newlines, it is worth setting `(?s)' in order to obtain this -optimization, or alternatively using `^' or `\A' to indicate +newlines, it is worth setting ‘(?s)’ in order to obtain this +optimization, or alternatively using ‘^’ or ‘\A’ to indicate anchoring explicitly.
However, there is one situation where the optimization cannot be @@ -10453,7 +10451,7 @@ fail where a later one succeeds. Conside (.*)abc\1 -
If the subject is `xyz123abc123' the match point is the fourth +
If the subject is ‘xyz123abc123’ the match point is the fourth character. For this reason, such a pattern is not implicitly anchored.
When a capturing subpattern is repeated, the value captured is the @@ -10463,8 +10461,8 @@ substring that matched the final iterati (tweedle[dume]{3}\s*)+ -
has matched `tweedledum tweedledee' the value of the captured -substring is `tweedledee'. However, if there are nested capturing +
has matched ‘tweedledum tweedledee’ the value of the captured +substring is ‘tweedledee’. However, if there are nested capturing subpatterns, the corresponding captured values may have been set in previous iterations. For example, after @@ -10472,7 +10470,7 @@ previous iterations. For example, after (a|(b))+ -
matches `aba' the value of the second captured substring is `b'. +
matches ‘aba’ the value of the second captured substring is ‘b’.
Consider, for example, the pattern `\d+foo' when applied to the +
Consider, for example, the pattern ‘\d+foo’ when applied to the subject line
123456bar-
After matching all 6 digits and then failing to match `foo', the +
After matching all 6 digits and then failing to match ‘foo’, the normal action of the matcher is to try again with only 5 digits -matching the `\d+' item, and then with 4, and so on, before +matching the ‘\d+’ item, and then with 4, and so on, before ultimately failing. Atomic grouping (a term taken from Jeffrey Friedl's book) provides the means for specifying that once a subpattern has matched, it is not to be re-evaluated in this way.
If we use atomic grouping for the previous example, the matcher gives -up immediately on failing to match `foo' the first time. The -notation is a kind of special parenthesis, starting with `(?>' as in +up immediately on failing to match ‘foo’ the first time. The +notation is a kind of special parenthesis, starting with ‘(?>’ as in this example:
@@ -10519,15 +10517,15 @@ maximizing repeat that must swallow everSimple cases such as the above example can be thought of as a maximizing repeat that must swallow everything it can. So, while both -`\d+' and `\d+?' are prepared to adjust the number of digits +‘\d+’ and ‘\d+?’ are prepared to adjust the number of digits they match in order to make the rest of the pattern match, -`(?>\d+)' can only match an entire sequence of digits. +‘(?>\d+)’ can only match an entire sequence of digits.
Atomic groups in general can of course contain arbitrarily complicated subpatterns, and can be nested. However, when the subpattern for an atomic group is just a single repeated item, as in the example above, a simpler notation, called a possessive quantifier, can be -used. This consists of an additional `+' character following a +used. This consists of an additional ‘+’ character following a quantifier. Using this notation, the previous example can be rewritten as @@ -10543,7 +10541,7 @@ example:
Possessive quantifiers are always greedy; the setting of the -`(?U)' option is ignored. They are a convenient notation for the +‘(?U)’ option is ignored. They are a convenient notation for the simpler forms of atomic group. However, there is no difference in the meaning of a possessive quantifier and the equivalent atomic group, though there may be a performance difference; possessive quantifiers @@ -10556,9 +10554,9 @@ ultimately found its way into Perl at re ultimately found its way into Perl at release 5.10.
PCRE has an optimization that automatically “possessifies” certain -simple pattern constructs. For example, the sequence `A+B' is -treated as `A++B' because there is no point in backtracking into -a sequence of `A's when `B' must follow. +simple pattern constructs. For example, the sequence ‘A+B’ is +treated as ‘A++B’ because there is no point in backtracking into +a sequence of ‘A’s when ‘B’ must follow.
When a pattern contains an unlimited repeat inside a subpattern that can itself be repeated an unlimited number of times, the use of an @@ -10570,8 +10568,8 @@ very long time indeed. The pattern
matches an unlimited number of substrings that either consist of -non-digits, or digits enclosed in `<>', followed by either -`!' or `?'. When it matches, it runs quickly. However, if it +non-digits, or digits enclosed in ‘<>’, followed by either +‘!’ or ‘?’. When it matches, it runs quickly. However, if it is applied to
@@ -10579,9 +10577,9 @@ is applied to
it takes a long time before reporting failure. This is because the -string can be divided between the internal `\D+' repeat and the -external `*' repeat in a large number of ways, and all have to be -tried. (The example uses `[!?]' rather than a single character at +string can be divided between the internal ‘\D+’ repeat and the +external ‘*’ repeat in a large number of ways, and all have to be +tried. (The example uses ‘[!?]’ rather than a single character at the end, because both PCRE and Perl have an optimization that allows for fast failure when a single character is used. They remember the last single character that is required for a match, and fail early if @@ -10613,7 +10611,7 @@ subpattern whose number is 10 or more us
It is not possible to have a numerical “forward back reference” to a subpattern whose number is 10 or more using this syntax because a -sequence such as `\50' is interpreted as a character defined in +sequence such as ‘\50’ is interpreted as a character defined in octal. See the subsection entitled “Non-printing characters” above for further details of the handling of digits following a backslash. There is no such problem when named parentheses are used. A @@ -10621,7 +10619,7 @@ back reference to any subpattern is poss (see below).
Another way of avoiding the ambiguity inherent in the use of digits -following a backslash is to use the `\g' escape sequence, which +following a backslash is to use the ‘\g’ escape sequence, which is a feature introduced in Perl 5.10. This escape must be followed by an unsigned number or a negative number, optionally enclosed in braces. These examples are all identical: @@ -10641,10 +10639,10 @@ reference. Consider this example: (abc(def)ghi)\g{-1} -
The sequence `\g{-1}' is a reference to the most recently -started capturing subpattern before `\g', that is, is it -equivalent to `\2'. Similarly, `\g{-2}' would be -equivalent to `\1'. The use of relative references can be helpful +
The sequence ‘\g{-1}’ is a reference to the most recently +started capturing subpattern before ‘\g’, that is, is it +equivalent to ‘\2’. Similarly, ‘\g{-2}’ would be +equivalent to ‘\1’. The use of relative references can be helpful in long patterns, and also in patterns that are created by joining together fragments that contain references within themselves. @@ -10657,8 +10655,8 @@ for a way of doing that). So the pattern (sens|respons)e and \1ibility -
matches `sense and sensibility' and `response and -responsibility', but not `sense and responsibility'. If caseful +
matches ‘sense and sensibility’ and ‘response and +responsibility’, but not ‘sense and responsibility’. If caseful matching is in force at the time of the back reference, the case of letters is relevant. For example, @@ -10666,14 +10664,14 @@ letters is relevant. For example, ((?i)rah)\s+\1 -
matches `rah rah' and `RAH RAH', but not `RAH rah', +
matches ‘rah rah’ and ‘RAH RAH’, but not ‘RAH rah’, even though the original capturing subpattern is matched caselessly.
There are several different ways of writing back references to named -subpatterns. The .NET syntax `\k{name}' and the Perl syntax -`\k<name>' or `\k'name'' are supported, as is the Python +subpatterns. The .NET syntax ‘\k{name}’ and the Perl syntax +‘\k<name>’ or ‘\k'name'’ are supported, as is the Python syntax (?P=name). Perl 5.10's unified back reference syntax, in which -`\g' can be used for both numeric and named references, is also +‘\g’ can be used for both numeric and named references, is also supported. We could rewrite the above example in any of the following ways: @@ -10695,16 +10693,16 @@ references to it always fail. For exampl (a|(bc))\2 -
always fails if it starts to match `a' rather than -`bc'. Because there may be many capturing parentheses in a +
always fails if it starts to match ‘a’ rather than +‘bc’. Because there may be many capturing parentheses in a pattern, all digits following the backslash are taken as part of a potential back reference number. If the pattern continues with a digit character, some delimiter must be used to terminate the back -reference. If the `(?x)' option is set, this can be whitespace. +reference. If the ‘(?x)’ option is set, this can be whitespace. Otherwise an empty comment (see “Comments” below) can be used.
A back reference that occurs inside the parentheses to which it refers -fails when the subpattern is first used, so, for example, `(a\1)' +fails when the subpattern is first used, so, for example, ‘(a\1)’ never matches. However, such references can be useful inside repeated subpatterns. For example, the pattern @@ -10712,7 +10710,7 @@ subpatterns. For example, the pattern (a|b\1)+ -
matches any number of `a's and also `aba', `ababbaa' +
matches any number of ‘a’s and also ‘aba’, ‘ababbaa’ etc. At each iteration of the subpattern, the back reference matches the character string corresponding to the previous iteration. In order for this to work, the pattern must be such that the first iteration @@ -10724,8 +10722,8 @@ current matching point that does not act
An assertion is a test on the characters following or preceding the current matching point that does not actually consume any -characters. The simple assertions coded as `\b', `\B', -`\A', `\G', `\Z', `\z', `^' and `$' are +characters. The simple assertions coded as ‘\b’, ‘\B’, +‘\A’, ‘\G’, ‘\Z’, ‘\z’, ‘^’ and ‘$’ are described above.
More complicated assertions are coded as subpatterns. There are two @@ -10744,8 +10742,8 @@ sense for negative assertions.
Lookahead assertions start with `(?=' for positive assertions and -`(?!' for negative assertions. For example, +
Lookahead assertions start with ‘(?=’ for positive assertions and +‘(?!’ for negative assertions. For example,
\w+(?=;) @@ -10758,35 +10756,35 @@ the match, and foo(?!bar)-
matches any occurrence of `foo' that is not followed by -`bar'. Note that the apparently similar pattern +
matches any occurrence of ‘foo’ that is not followed by +‘bar’. Note that the apparently similar pattern
(?!foo)bar-
does not find an occurrence of `bar' that is preceded by -something other than `foo'; it finds any occurrence of `bar' -whatsoever, because the assertion `(?!foo)' is always true when -the next three characters are `bar'. A lookbehind assertion is +
does not find an occurrence of ‘bar’ that is preceded by +something other than ‘foo’; it finds any occurrence of ‘bar’ +whatsoever, because the assertion ‘(?!foo)’ is always true when +the next three characters are ‘bar’. A lookbehind assertion is needed to achieve the other effect.
If you want to force a matching failure at some point in a pattern, -the most convenient way to do it is with `(?!)' because an empty +the most convenient way to do it is with ‘(?!)’ because an empty string always matches, so an assertion that requires there not to be an empty string must always fail.
Lookbehind assertions start with `(?<=' for positive assertions -and `(?<!' for negative assertions. For example, +
Lookbehind assertions start with ‘(?<=’ for positive assertions +and ‘(?<!’ for negative assertions. For example,
(?<!foo)bar-
matches an occurrence of `bar' that is not preceded by -`foo'. The contents of a lookbehind assertion are restricted such +
matches an occurrence of ‘bar’ that is not preceded by +‘foo’. The contents of a lookbehind assertion are restricted such that all the strings it matches must have a fixed length. However, if there are several top-level alternatives, they do not all have to have the same fixed length. Thus @@ -10818,7 +10816,7 @@ lengths, but it is acceptable if rewritt (?<=abc|abde) -
In some cases, the Perl 5.10 escape sequence `\K' (see above) can +
In some cases, the Perl 5.10 escape sequence ‘\K’ (see above) can be used instead of a lookbehind assertion; this is not restricted to a fixed-length. @@ -10827,10 +10825,10 @@ current position, the assertion fails. then try to match. If there are insufficient characters before the current position, the assertion fails. -
PCRE does not allow the `\C' escape (which matches a single byte +
PCRE does not allow the ‘\C’ escape (which matches a single byte in UTF-8 mode) to appear in lookbehind assertions, because it makes it -impossible to calculate the length of the lookbehind. The `\X' -and `\R' escapes, which can match different numbers of bytes, are +impossible to calculate the length of the lookbehind. The ‘\X’ +and ‘\R’ escapes, which can match different numbers of bytes, are also not permitted.
Possessive quantifiers can be used in conjunction with lookbehind @@ -10842,7 +10840,7 @@ string. Consider a simple pattern such a
when applied to a long string that does not match. Because matching -proceeds from left to right, PCRE will look for each `a' in the +proceeds from left to right, PCRE will look for each ‘a’ in the subject and then see if what follows matches the rest of the pattern. If the pattern is specified as @@ -10850,10 +10848,10 @@ pattern. If the pattern is specified as ^.*abcd$ -
the initial `.*' matches the entire string at first, but when this fails -(because there is no following `a'), it backtracks to match all +
the initial ‘.*’ matches the entire string at first, but when this fails +(because there is no following ‘a’), it backtracks to match all but the last character, then all but the last two characters, and so -on. Once again the search for `a' covers the entire string, from +on. Once again the search for ‘a’ covers the entire string, from right to left, so we are no better off. However, if the pattern is written as @@ -10861,7 +10859,7 @@ written as ^.*+(?<=abcd) -
there can be no backtracking for the `.*+' item; it can match +
there can be no backtracking for the ‘.*+’ item; it can match only the entire string. The subsequent lookbehind assertion does a single test on the last four characters. If it fails, the match fails immediately. For long strings, this approach makes a significant @@ -10875,14 +10873,14 @@ difference to the processing time. (?<=\d{3})(?<!999)foo -
matches `foo' preceded by three digits that are not -`999'. Notice that each of the assertions is applied +
matches ‘foo’ preceded by three digits that are not +‘999’. Notice that each of the assertions is applied independently at the same point in the subject string. First there is a check that the previous three characters are all digits, and then -there is a check that the same three characters are not `999'. -This pattern does not match `foo' preceded by six +there is a check that the same three characters are not ‘999’. +This pattern does not match ‘foo’ preceded by six characters, the first of which are digits and the last three of which -are not `999'. For example, it doesn't match `123abcfoo'. A +are not ‘999’. For example, it doesn't match ‘123abcfoo’. A pattern to do that is
@@ -10892,7 +10890,7 @@ assertion checks that the preceding thre-This time the first assertion looks at the preceding six characters, checking that the first three are digits, and then the second assertion checks that the preceding three characters are not -`999'. +‘999’.
Assertions can be nested in any combination. For example, @@ -10900,15 +10898,15 @@ assertion checks that the preceding thre (?<=(?<!foo)bar)baz
matches an occurrence of `baz' that is preceded by `bar' -which in turn is not preceded by `foo', while +
matches an occurrence of ‘baz’ that is preceded by ‘bar’ +which in turn is not preceded by ‘foo’, while
(?<=\d{3}(?!999)...)foo-
is another pattern that matches `foo' preceded by three digits -and any three characters that are not `999'. +
is another pattern that matches ‘foo’ preceded by three digits +and any three characters that are not ‘999’.
There are four kinds of condition: references to subpatterns, -references to recursion, a pseudo-condition called `DEFINE', and +references to recursion, a pseudo-condition called ‘DEFINE’, and assertions.
Consider the following pattern, which contains non-significant white space to make it more readable and to divide it into three parts for -ease of discussion (assume a preceding `(?x)'): +ease of discussion (assume a preceding ‘(?x)’):
( \( )? [^()]+ (?(1) \) ) @@ -10972,10 +10970,10 @@ relative reference:-Checking for a used subpattern by name
-Perl uses the syntax `(?(<name>)...)' or `(?('name')...)' to +
Perl uses the syntax ‘(?(<name>)...)’ or ‘(?('name')...)’ to test for a used subpattern by name. For compatibility with earlier versions of PCRE, which had this facility before Perl, the syntax -`(?(name)...)' is also recognized. However, there is a possible +‘(?(name)...)’ is also recognized. However, there is a possible ambiguity with this syntax, because subpattern names may consist entirely of digits. PCRE looks first for a named subpattern; if it cannot find one and the name consists entirely of digits, PCRE looks @@ -10991,10 +10989,10 @@ recommended.
Checking for pattern recursion
-If the condition is the string `(R)', and there is no subpattern -with the name `R', the condition is true if a recursive call to +
If the condition is the string ‘(R)’, and there is no subpattern +with the name ‘R’, the condition is true if a recursive call to the whole pattern or any subpattern has been made. If digits or a name -preceded by ampersand follow the letter `R', for example: +preceded by ampersand follow the letter ‘R’, for example:
(?(R3)...) or (?(R&name)...) @@ -11009,8 +11007,8 @@ patterns are described below.-Defining subpatterns for use by reference only
-If the condition is the string `(DEFINE)', and there is no -subpattern with the name `DEFINE', the condition is always +
If the condition is the string ‘(DEFINE)’, and there is no +subpattern with the name ‘DEFINE’, the condition is always false. In this case, there may be only one alternative in the subpattern. It is always skipped if control reaches this point in the pattern; the idea of DEFINE is that it can be used to define @@ -11051,18 +11049,18 @@ otherwise it is matched against the seco it tests for the presence of at least one letter in the subject. If a letter is found, the subject is matched against the first alternative; otherwise it is matched against the second. This pattern matches -strings in one of the two forms `dd-aaa-dd' or -`dd-dd-dd', where aaa are letters and +strings in one of the two forms ‘dd-aaa-dd’ or +‘dd-dd-dd’, where aaa are letters and dd are digits.
Comments
-The sequence `(?#' marks the start of a comment that continues up +
The sequence ‘(?#’ marks the start of a comment that continues up to the next closing parenthesis. Nested parentheses are not permitted. The characters that make up a comment play no part in the pattern matching at all. -
If the `(?x)' option is set, an unescaped `#' character +
If the ‘(?x)’ option is set, an unescaped ‘#’ character outside a character class introduces a comment that continues to immediately after the next newline in the pattern. @@ -11079,11 +11077,11 @@ release 5.10. PCRE and Python, this kind of recursion was introduced into Perl at release 5.10. -
A special item that consists of `(?' followed by a number greater +
A special item that consists of ‘(?’ followed by a number greater than zero and a closing parenthesis is a recursive call of the subpattern of the given number, provided that it occurs inside that subpattern. (If not, it is a subroutine call, which is described in -the next section.) The special item `(?R)' or `(?0)' is a +the next section.) The special item ‘(?R)’ or ‘(?0)’ is a recursive call of the entire regular expression.
In PCRE (like Python, but unlike Perl), a recursive subpattern call is @@ -11115,21 +11113,21 @@ tricky. This is made easier by the use o
In a larger pattern, keeping track of parenthesis numbers can be tricky. This is made easier by the use of relative references. (A Perl -5.10 feature.) Instead of `(?1)' in the pattern above you can -write `(?-2)' to refer to the second most recently opened +5.10 feature.) Instead of ‘(?1)’ in the pattern above you can +write ‘(?-2)’ to refer to the second most recently opened parentheses preceding the recursion. In other words, a negative number counts capturing parentheses leftwards from the point at which it is encountered.
It is also possible to refer to subsequently opened parentheses, by -writing references such as `(?+2)'. However, these cannot be +writing references such as ‘(?+2)’. However, these cannot be recursive because the reference is not inside the parentheses that are referenced. They are always subroutine calls, as described in the next section.
An alternative approach is to use named parentheses instead. The Perl -syntax for this is `(?&name)'; PCRE's earlier syntax -`(?P>name)' is also supported. We could rewrite the above example +syntax for this is ‘(?&name)’; PCRE's earlier syntax +‘(?P>name)’ is also supported. We could rewrite the above example as follows:
@@ -11151,7 +11149,7 @@ runs for a very long time indeed because-it fails quickly. However, if atomic grouping is not used, the match runs for a very long time indeed because there are so many different -ways the `+' and `*' repeats can carve up the subject, and +ways the ‘+’ and ‘*’ repeats can carve up the subject, and all have to be tested before failure can be reported.
At the end of a match, the values set for any capturing subpatterns @@ -11162,7 +11160,7 @@ subpattern value is set. If the pattern (ab(cd)ef)
the value for the capturing parentheses is `ef', which is the +
the value for the capturing parentheses is ‘ef’, which is the last value taken on at the top level. If additional parentheses are added, giving @@ -11185,9 +11183,9 @@ characters are permitted at the outer le < (?: (?(R) \d++ | [^<>]*+) | (?R)) * >
In this pattern, `(?(R)' is the start of a conditional +
In this pattern, ‘(?(R)’ is the start of a conditional subpattern, with two different alternatives for the recursive and -non-recursive cases. The `(?R)' item is the actual recursive +non-recursive cases. The ‘(?R)’ item is the actual recursive call.
Subpatterns as Subroutines
@@ -11210,15 +11208,15 @@ reference can be absolute or relative, a (sens|respons)e and \1ibility
matches `sense and sensibility' and `response and -responsibility', but not `sense and responsibility'. If instead +
matches ‘sense and sensibility’ and ‘response and +responsibility’, but not ‘sense and responsibility’. If instead the pattern
(sens|respons)e and (?1)ibility-
is used, it does match `sense and responsibility' as well as the +
is used, it does match ‘sense and responsibility’ as well as the other two strings. Another example is given in the discussion of DEFINE above. @@ -11236,7 +11234,7 @@ pattern: (abc)(?i:(?-1)) -
It matches `abcabc'. It does not match `abcABC' because the +
It matches ‘abcabc’. It does not match ‘abcABC’ because the change of processing option does not affect the called subpattern.
The new verbs make use of what was previously invalid syntax: an opening parenthesis followed by an asterisk. In Perl, they are -generally of the form `(*VERB:ARG)' but PCRE does not support the -use of arguments, so its general form is just `(*VERB)'. Any +generally of the form ‘(*VERB:ARG)’ but PCRE does not support the +use of arguments, so its general form is just ‘(*VERB)’. Any number of these verbs may occur in a pattern. There are two kinds:
A(A|B(*ACCEPT)|C)D-
This matches `AB', `AAD', or `ACD', but when it matches -`AB', no data is captured. +
This matches ‘AB’, ‘AAD’, or ‘ACD’, but when it matches +‘AB’, no data is captured.
(*FAIL)
or (*F)
This matches `xxaab' but not `aacaab'. It can be thought of +
This matches ‘xxaab’ but not ‘aacaab’. It can be thought of as a kind of dynamic anchor, or “I've started, so I must finish.”
(*PRUNE)
If the subject is `aaaac...', after the first match attempt fails +
If the subject is ‘aaaac...’, after the first match attempt fails (starting at the first character in the string), the starting point -skips on to start the next attempt at `c'. Note that a possessive +skips on to start the next attempt at ‘c’. Note that a possessive quantifer does not have the same effect in this example; although it would suppress backtracking during the first match attempt, the second attempt would start at the second character instead of skipping on to -`c'. +‘c’.
(*THEN)
If the `COND1' pattern matches, `FOO' is tried (and possibly -further items after the end of the group if `FOO' succeeds); on +
If the ‘COND1’ pattern matches, ‘FOO’ is tried (and possibly
+further items after the end of the group if ‘FOO’ succeeds); on
failure the matcher skips to the second alternative and tries
-`COND2', without backtracking into COND1. If (*THEN)
is used
+‘COND2’, without backtracking into COND1. If (*THEN)
is used
outside of any alternation, it acts exactly like (*PRUNE)
.
@@ -12131,7 +12129,7 @@ function get_preferred_merge3_command (t
-- If there wasn't any user-given merger, take the first that's available
-- and wanted.
for _,mkey in ipairs(default_order) do
- c = trymerger(mkey) ; if c then return c,nil end
+ c = trymerger(mkey) ; if c then return c,mkey end
end
end
@@ -12478,70 +12476,115 @@ end
return "socat"
end
--- Netsync notifiers are tables containing 5 functions:
--- start, revision_received, cert_received, pubkey_received and end
--- Those functions take exactly the same arguments as the corresponding
--- note_netsync functions, but return a different kind of value, a tuple
--- composed of a return code and a value to be returned back to monotone.
--- The codes are strings:
--- "continue" and "stop"
--- When the code "continue" is returned and there's another notifier, the
--- second value is ignored and the next notifier is called. Otherwise,
--- the second value is returned immediately.
-netsync_notifiers = {}
+do
+ -- Hook functions are tables containing any of the following 6 items
+ -- with associated functions:
+ --
+ -- startup Corresponds to note_mtn_startup()
+ -- start Corresponds to note_netsync_start()
+ -- revision_received Corresponds to note_netsync_revision_received()
+ -- cert_received Corresponds to note_netsync_cert_received()
+ -- pubkey_received Corresponds to note_netsync_pubkey_received()
+ -- end Corresponds to note_netsync_end()
+ --
+ -- Those functions take exactly the same arguments as the corresponding
+ -- global functions, but return a different kind of value, a tuple
+ -- composed of a return code and a value to be returned back to monotone.
+ -- The codes are strings:
+ -- "continue" and "stop"
+ -- When the code "continue" is returned and there's another notifier, the
+ -- second value is ignored and the next notifier is called. Otherwise,
+ -- the second value is returned immediately.
+ local hook_functions = {}
+ local supported_items = {
+ "startup",
+ "start", "revision_received", "cert_received", "pubkey_received", "end"
+ }
-function _note_netsync_helper(f,...)
- local s = "continue"
- local v = nil
- for _,n in pairs(netsync_notifiers) do
- if n[f] then
- s,v = n[f](...)
+ function _hook_functions_helper(f,...)
+ local s = "continue"
+ local v = nil
+ for _,n in pairs(hook_functions) do
+ if n[f] then
+ s,v = n[f](...)
+ end
+ if s ~= "continue" then
+ break
+ end
end
- if s ~= "continue" then
- break
- end
+ return v
end
- return v
-end
-function note_netsync_start(...)
- return _note_netsync_helper("start",...)
-end
-function note_netsync_revision_received(...)
- return _note_netsync_helper("revision_received",...)
-end
-function note_netsync_cert_received(...)
- return _note_netsync_helper("cert_received",...)
-end
-function note_netsync_pubkey_received(...)
- return _note_netsync_helper("pubkey_received",...)
-end
-function note_netsync_end(...)
- return _note_netsync_helper("end",...)
-end
+ function note_mtn_startup(...)
+ return _hook_functions_helper("startup",...)
+ end
+ function note_netsync_start(...)
+ return _hook_functions_helper("start",...)
+ end
+ function note_netsync_revision_received(...)
+ return _hook_functions_helper("revision_received",...)
+ end
+ function note_netsync_cert_received(...)
+ return _hook_functions_helper("cert_received",...)
+ end
+ function note_netsync_pubkey_received(...)
+ return _hook_functions_helper("pubkey_received",...)
+ end
+ function note_netsync_end(...)
+ return _hook_functions_helper("end",...)
+ end
-function add_netsync_notifier(notifier, precedence)
- if type(notifier) ~= "table" or type(precedence) ~= "number" then
- return false, "Invalid tyoe"
+ function add_hook_functions(functions, precedence)
+ if type(functions) ~= "table" or type(precedence) ~= "number" then
+ return false, "Invalid type"
+ end
+ if hook_functions[precedence] then
+ return false, "Precedence already taken"
+ end
+
+ local unknown_items = ""
+ local warning = nil
+ local is_member =
+ function (s,t)
+ for k,v in pairs(t) do if s == v then return true end end
+ return false
+ end
+
+ for n,f in pairs(functions) do
+ if type(n) == "string" then
+ if not is_member(n, supported_items) then
+ if unknown_items ~= "" then
+ unknown_items = unknown_items .. ","
+ end
+ unknown_items = unknown_items .. n
+ end
+ if type(f) ~= "function" then
+ return false, "Value for functions item "..n.." isn't a function"
+ end
+ else
+ warning = "Non-string item keys found in functions table"
+ end
+ end
+
+ if warning == nil and unknown_items ~= "" then
+ warning = "Unknown item(s) " .. unknown_items .. " in functions table"
+ end
+
+ hook_functions[precedence] = functions
+ return true, warning
end
- if netsync_notifiers[precedence] then
- return false, "Precedence already taken"
+ function push_hook_functions(functions)
+ local n = table.maxn(hook_functions) + 1
+ return add_hook_functions(functions, n)
end
- local warning = nil
- for n,f in pairs(notifier) do
- if type(n) ~= "string" or n ~= "start"
- and n ~= "revision_received"
- and n ~= "cert_received"
- and n ~= "pubkey_received"
- and n ~= "end" then
- warning = "Unknown item found in notifier table"
- elseif type(f) ~= "function" then
- return false, "Value for notifier item "..n.." isn't a function"
- end
+
+ -- Kept for backward compatibility
+ function add_netsync_notifier(notifier, precedence)
+ return add_hook_functions(notifier, precedence)
end
- netsync_notifiers[precedence] = notifier
- return true, warning
-end
-
+ function push_netsync_notifier(notifier)
+ return push_hook_functions(notifier)
+ end
+end