# # # patch "NEWS" # from [7d6bb127f7ecd2dc50c01b9ecbf5a4d85841b0c2] # to [4102d7e399b0d44fa250e7110072f5dd049a903d] # # patch "UPGRADE" # from [b7e014bfa58db309cb29c06816070168e602fd14] # to [ef451e84d9d3c847f9d1fde2f470a9d20cdd19a9] # # patch "docs/Adding-Files.html" # from [5062910011a1a0943fdb10cb7d213251f97d9a86] # to [cc6c8724c9700e78116d914e8e9f39b414197aee] # # patch "docs/Additional-Lua-Functions.html" # from [846e1a5b8af06f43d8d8356df8ec75a7e91a6972] # to [e8f257471f9a6518560cba09bc3d3ab2a6143ba0] # # patch "docs/Advanced-Uses.html" # from [8321cf86cedd263616c8d270eec3749a75d7502a] # to [a5a5438f8a86bcbcdc677673000a05cfa29a5190] # # patch "docs/Automation.html" # from [356f171884468556f00bb7a1cf101d1a52e8d86c] # to [35bed8de0799c40422cc86ce6784e00e82d1667c] # # patch "docs/Basic-Network-Service.html" # from [085ca4c633152c1a4cda9919bc3632914b9f116b] # to [d9920baa4b979ac9355e1b376dd8f90cec9849b3] # # patch "docs/Branches.html" # from [6b3681a4cc9adbe57242405012580da70ce56ecf] # to [478cd3e4a9b9ff7ed91a3d32d88aaa05050e030b] # # patch "docs/Branching-and-Merging.html" # from [2f0bd679bd2111d317fc4bbea458a889d1e655c2] # to [21593f8879fda5188a67a83e55e4d055facf8361] # # patch "docs/CVS-Phrasebook.html" # from [e790524d54a2332bbf14e04fed7676ba6df56147] # to [f4e4781803808facf1795a2441e1fd6fff24918b] # # patch "docs/Certificate.html" # from [2565b478678bf30855d226b656a5abe73791ff6a] # to [f3837399865ff9faf2a0754b26cdd4f0dde955f2] # # patch "docs/Certificates.html" # from [051bd77b92a8d609761bba28d6f90f23a7d53811] # to [2cc0e4f319ed862ed12559a54f37658416a935d5] # # patch "docs/Command-Reference.html" # from [8be324c63b1adb3f71bb1ad0f96788af623c3a5c] # to [ac73891fbac9546afc4fcfdd53eaeca93f7f6c8b] # # patch "docs/Committing-Work.html" # from [0ac9fe948d6d6765c0b282198521f0d63122ae45] # to [e716cb842e741e8b617176249db01cbd2668f109] # # patch "docs/Concepts.html" # from [2b0ec10dfd1500b3852c2f97cf82bd2d940abed5] # to [0a0a28e84c6b2364c55a81596080a66438ee0498] # # patch "docs/Creating-a-Database.html" # from [4390285dd70f86c67db73ae6dc212d7324686222] # to [dd4d71928c1cfcd0bae494cf2f671ec5e14b7f10] # # patch "docs/Database.html" # from [c6e7c8ae101dfccc96542336f0696e5e03d4480c] # to [62ae58d646e0da9a8af14aa776c815c920d617ad] # # patch "docs/Dealing-with-a-Fork.html" # from [b677ba8ed57f4c2196ab5ba9c2d1144e022061ff] # to [f0d020a80d566fcce0e969526947d9bb04e11495] # # patch "docs/Default-hooks.html" # from [a3abe4ba183ea064970139d1e081b21c499f3d74] # to [426d6f8d291d2c1b66af91f7fc541cb4322b4c71] # # patch "docs/File-Attributes.html" # from [37e478866d1245c4b1c09cb8fdf0d64c1c1b94f0] # to [07a04b4dc5dd34222afc29ac756a3bee696cfe0c] # # patch "docs/Forks-and-merges.html" # from [caa0489f7797842f20be40b7f998afa5f84aeeb6] # to [1e26924b4bd9845021a4e3ad24c066a4b4caedee] # # patch "docs/General-Index.html" # from [d9860f42e76b6146e214ff8bd608285a2ba6f70a] # to [4935e1b875af4cd6dcc2d04dfd692887c69e4c0a] # # patch "docs/Generating-Keys.html" # from [f97c0998cf7d223ef4e2271f40b37b838a254bda] # to [7e8c77afdbf077663dbcee8f85126bf06e103734] # # patch "docs/Hash-Integrity.html" # from [3f9e7068d10c10ce601d14394277e31b9b983d4d] # to [e330839ef84c2902639d3e151c93338471aec15b] # # patch "docs/Historical-records.html" # from [90b70a4b56a763a70d864da898d6667b4c1826ba] # to [9937fdcabdc36c64c8e3e64a99de31dd3e4be343] # # patch "docs/Hook-Reference.html" # from [7a412358366c360d81a2e428b6b824cccaecb272] # to [ac7f505461c97439c2534cee928cf54168ef632b] # # patch "docs/Hooks.html" # from [673c5a499a3c72d3ef132f2470ae863c62d57ea4] # to [d32c0db23daf71b0884b9465b74fcd30f8e819de] # # patch "docs/Importing-from-CVS.html" # from [34ea9d99420562ab5f0a435386597101c3b6d98c] # to [f52c8fd7dd6784f50c00c456c74d6589a72ab15f] # # patch "docs/Informative.html" # from [ffe3b1df0339c4769f834001b639319d04ab2ea4] # to [52874d9861dc1742a759be4a260204be0cab01e9] # # patch "docs/Inodeprints.html" # from [00c8414b0b4d264a564ec486ca65a8e7dc48229d] # to [4e462bbef7cfa65afde05c0e51771a6c831ec672] # # patch "docs/Internationalization.html" # from [f743ea0d87237b4a0ba08b8e1eff8c2eda4b11e1] # to [20915ed9d3412b0f53b5a5b6c7e3c28f8586fd94] # # patch "docs/Key-and-Cert-Trust.html" # from [87061cac1bac7fc663f3b53fa7d2635f072e8083] # to [9a87d94eb16d0d44085edd2bd8339648e42aaa5b] # # patch "docs/Making-Changes.html" # from [7155a6d3952b2a8ef8a2eafa9549cbfd60ab89fb] # to [2a35cd7076ea9d81efcffddae77163d325fcff30] # # patch "docs/Mark_002dMerge.html" # from [7b95b1f2f7b6c7414d5523663e761de124a09529] # to [b392558ccae556635ed788648db245b9071d145a] # # patch "docs/Merging.html" # from [6e1b1cffbbfab2076cfe97d595dc3316b1ed4658] # to [a29585476ae412228693aadcdc3a54985e0b655d] # # patch "docs/Migrating-and-Dumping.html" # from [31cf38fd784584da6f9ea3ce119b8e8caffecc23] # to [10a5a0961bd67d70eb3ac97caf148bd39db6e06e] # # patch "docs/Naming-Conventions.html" # from [bf61294982536b75cc19ea1ae6aee8d5f0371c31] # to [e9dec1894d0cbd227f61f8500aff5f9934c6c784] # # patch "docs/Network-Service-Revisited.html" # from [2e8694d09a1dd96b0bf8d23b88887c651db4790d] # to [30ce6318069e4e82d3f49d573e9b10e964e232cf] # # patch "docs/Network.html" # from [ea1beb7ca4c48d2ccae750e1a01baa1143785849] # to [9b68ce1aadf777750f366aa0a356dd354c5d96ec] # # patch "docs/Other-Transports.html" # from [12d427aa02fa9c04751b31f614488da0dff04d51] # to [7e68d9cd7cf332f1193ece40eac4ff845325f176] # # patch "docs/Packet-I_002fO.html" # from [44d9a934e80c37a1bdd826fd86f9b5218563833f] # to [9737030371eae74dbe9ce2ce837de5d101fbd2fe] # # patch "docs/Quality-Assurance.html" # from [2ba8ffdc2456b9a53ca6bebe46aa1b956546433d] # to [a2c5b1325ef9d5176db2908061afa117f9239b14] # # patch "docs/RCS.html" # from [8e283147bdb9cd6822a2b262d73fef95d0ebf94b] # to [b0bb87a5300ed953a29c02f00b3534a85d43e6f1] # # patch "docs/Rebuilding-ancestry.html" # from [6424f9076b7d8ea298d9d616b003948e024624f4] # to [5b01226f5cc648bf08925e6f00b5fe84cf91014a] # # patch "docs/Regexp-Details.html" # from [292e1d632b1d603a8f6bdf8cf165954653b40511] # to [ea50c55e16d4127dd65b96ddb23b544aa956a032] # # patch "docs/Regexp-Summary.html" # from [3bd9b164c47cfebf0b14099e9ca05f3be4d2cb11] # to [cb5173340dc7d085bb9b31fee5beb7af838d7b23] # # patch "docs/Regexps.html" # from [815bd739ce0d1f2d8d11a5e170d0802c305147e5] # to [9d0e0674c0c926ccfe022142b805d810662a9284] # # patch "docs/Reserved-Certs.html" # from [d6a34981e320fd16d5a567b0dc2d5b3970490790] # to [d86b0c542ecf0fe616cfded24d4333b3c0dea012] # # patch "docs/Reserved-Files.html" # from [4b48d549c9e557f0a6de1f98adfba40bcc04baea] # to [dc28c3e5da252e42e708ade445183f430694b199] # # patch "docs/Restrictions.html" # from [452d413cdd9ca3b1fb4c7351d42a92e41a2588c4] # to [6e8d7a166765607b49f7724e5a1fbf22938bb4e7] # # patch "docs/Scripting.html" # from [a69b560f26880e3c1143a52f1c6e07aaf8b5f250] # to [30bea23402414b430800375915ca1647e5e9a5e0] # # patch "docs/Selectors.html" # from [e9adcb321fca68051ebe37e1f1d18894e0db656b] # to [27ff2f3960012335a26dfe8f355773418e1bb9ac] # # patch "docs/Special-Topics.html" # from [d54cb05bd92502bfe2a5c20a63e9c143f3709b36] # to [9705f227d829fe7eb1b89582c48a92e552e4465d] # # patch "docs/Starting-a-New-Project.html" # from [a3cbda53564080db304fd6237351ce954babea3c] # to [b4964123b9e63096310ca87c0b243c4b17718689] # # patch "docs/Storage-and-workflow.html" # from [69392c7d45bec0eaca2f17e9d2f92077c2ba4070] # to [caff1cbcb942d761fff3d5d66b9c46a97c4f3711] # # patch "docs/Synchronising-Databases.html" # from [d40f41860ab082e6da7fb16844afb19357fe55b9] # to [fb29388f7fabc7926e1357e321da9fafdc2c7e11] # # patch "docs/Tree.html" # from [b557ca7b886640909179345fba5187d52d49b94f] # to [b946d40b26991865e3db854d272f95addb6cd920] # # patch "docs/Tutorial.html" # from [5d27fde6085db7cb82e86332657d6e73de2dbf97] # to [28141be0386259fcd886a517f7de6b6c2a04af53] # # patch "docs/Using-packets.html" # from [4077ccda595a86dd82f1fb9a7da0468b69010b02] # to [9541f5784c2449819b1db89a826ea8917db3a416] # # patch "docs/Vars.html" # from [c7d3a44236435c862d6c7769d8a1b32fd57fd5fc] # to [7475e0311e505b2062aa865f3532d3ff83806b30] # # patch "docs/Versions-of-files.html" # from [ef657adad28e050daad568c6a40e338c8e7c2049] # to [f3bf46039cd6085320e20be0a3e6bd8656eca3fc] # # patch "docs/Versions-of-trees.html" # from [64531ae8cfd44ce9d5646702f3b65a74ea029e42] # to [ae0aaa5884c8701862f7545b8f48416bb4eec93c] # # patch "docs/Workspace-Collisions.html" # from [b1361bb5f32890fce1b8d3d6d62892b5dc882758] # to [ad276f0c48026052f87c36ebf66f04491fe2a5da] # # patch "docs/Workspace.html" # from [40b4543d5606e6701dace2e33f41aac6786bfd8c] # to [f20179119ab4fb1acf050df9897e87541c3711f7] # # patch "docs/index.html" # from [617ce735f037a9cd24f680774150561b890a2faa] # to [90d19f0dee2f65ff5a94c152a3f80558fa56e6ee] # # patch "monotone.html" # from [0c51e9b6862604f9724e8aadf48028c579da628f] # to [6fa6307fc17a8f9880a0e07f365d13e2180601a9] # # patch "monotone.pdf" # from [de109abbbb027f488eb8b286ed2cdc4a85737d37] # to [7704a142016b4a9be4b027a5eb5f7010a6422ca1] # ============================================================ --- NEWS 7d6bb127f7ecd2dc50c01b9ecbf5a4d85841b0c2 +++ NEWS 4102d7e399b0d44fa250e7110072f5dd049a903d @@ -1,3 +1,49 @@ +[ somewhen in the future ] + + 0.38 release. + + Changes + + - mtn log now prints a single dot for a project's root + directory instead of an empty string. + + - mtn now warns if changes to a file will be ignored because + the file has been deleted on one side of a merge. + + - mtn now errors if your chosen private key doesn't match the public + key of the same name in your database. + + - mtn now checks for your key before a merge action takes place to + ensure that any manually merged file isn't lost in an error case + + Bugs fixed + + - a bug introduced in 0.37 prevented an external merger from being + executed unless the MTN_MERGE environment variable was set + + - mtn read successfully reads revision data, and cert packets again + + - mtn consistently supports certs with empty values + (fixed 'ls certs' and 'read') + + Internal + + - Update Botan to 1.7.2. + + - Moved the gzip implementation out of the Botan directory. + + Other + + - Added the scripts of the following Lua-based contributed + Monotone extension commands to contrib/command/: + "mtn base", "mtn fuse", "mtn revision", "mtn conflicts". + + - Added a hooks version of the contributed ciabot script, + contrib/ciabot_monotone_hookversion.lua + + - The monotone manual is now licensed under the GPL rather than + the GFDL. + Fri Oct 25 22:35:33 UTC 2007 0.37 release. ============================================================ --- UPGRADE b7e014bfa58db309cb29c06816070168e602fd14 +++ UPGRADE ef451e84d9d3c847f9d1fde2f470a9d20cdd19a9 @@ -1,4 +1,4 @@ -upgrading monotone to 0.37 +upgrading monotone to 0.38 ========================== How to read this file: ============================================================ --- docs/Adding-Files.html 5062910011a1a0943fdb10cb7d213251f97d9a86 +++ docs/Adding-Files.html cc6c8724c9700e78116d914e8e9f39b414197aee @@ -1,9 +1,9 @@ Adding Files - monotone documentation - + ============================================================ --- docs/Additional-Lua-Functions.html 846e1a5b8af06f43d8d8356df8ec75a7e91a6972 +++ docs/Additional-Lua-Functions.html e8f257471f9a6518560cba09bc3d3ab2a6143ba0 @@ -1,9 +1,9 @@ Additional Lua Functions - monotone documentation - + @@ -63,7 +63,7 @@ more of the following characters:
          0x00 thru 0x06
           0x0E thru 0x1a
           0x1c thru 0x1f
-     
+
include(scriptfile)
This function tries to load and execute the script contained into scriptfile. It returns true for success and false if there is an @@ -143,7 +143,7 @@ the arguments. thingy "baz" spork frob "oops" - +

The output table will be:

          {
              1 = { name = "thingy", values = { 1 = "foo", 2 = "bar" } },
@@ -151,7 +151,7 @@ the arguments.
              3 = { name = "spork", values = { } },
              4 = { name = "frob", values = { 1 = "oops" } }
           }
-     
+
regex.search(regexp, string)
Returns true if a match for regexp is found in str, return false otherwise. See Regexps, for the syntax of regexp. ============================================================ --- docs/Advanced-Uses.html 8321cf86cedd263616c8d270eec3749a75d7502a +++ docs/Advanced-Uses.html a5a5438f8a86bcbcdc677673000a05cfa29a5190 @@ -1,9 +1,9 @@ Advanced Uses - monotone documentation - + ============================================================ --- docs/Automation.html 356f171884468556f00bb7a1cf101d1a52e8d86c +++ docs/Automation.html 35bed8de0799c40422cc86ce6784e00e82d1667c @@ -1,9 +1,9 @@ Automation - monotone documentation - + @@ -1381,7 +1381,7 @@ 3.0
Added in:
3.0 -
Renamed from attributes to get_attributes in:
+
Renamed from attributes to get_attributes in:
5.0
Purpose:
@@ -2009,7 +2009,7 @@ Workspace-less commit. Normally used via add_file "foo" content [5bf1fd927dfb8679496a2e6cf00cbe50c1c87145] - +
Sample output:
          
           4c2c1d846fa561601254200918fba1fd71e6795d
============================================================
--- docs/Basic-Network-Service.html	085ca4c633152c1a4cda9919bc3632914b9f116b
+++ docs/Basic-Network-Service.html	d9920baa4b979ac9355e1b376dd8f90cec9849b3
@@ -1,9 +1,9 @@
 
 
 Basic Network Service - monotone documentation
 
 
-
+
 
 
 
============================================================
--- docs/Branches.html	6b3681a4cc9adbe57242405012580da70ce56ecf
+++ docs/Branches.html	478cd3e4a9b9ff7ed91a3d32d88aaa05050e030b
@@ -1,9 +1,9 @@
 
 
 Branches - monotone documentation
 
 
-
+
 
 
 
============================================================
--- docs/Branching-and-Merging.html	2f0bd679bd2111d317fc4bbea458a889d1e655c2
+++ docs/Branching-and-Merging.html	21593f8879fda5188a67a83e55e4d055facf8361
@@ -1,9 +1,9 @@
 
 
 Branching and Merging - monotone documentation
 
 
-
+
 
 
 
============================================================
--- docs/CVS-Phrasebook.html	e790524d54a2332bbf14e04fed7676ba6df56147
+++ docs/CVS-Phrasebook.html	f4e4781803808facf1795a2441e1fd6fff24918b
@@ -1,9 +1,9 @@
 
 
 CVS Phrasebook - monotone documentation
 
 
-
+
 
 
 
============================================================
--- docs/Certificate.html	2565b478678bf30855d226b656a5abe73791ff6a
+++ docs/Certificate.html	f3837399865ff9faf2a0754b26cdd4f0dde955f2
@@ -1,9 +1,9 @@
 
 
 Certificate - monotone documentation
 
 
-
+
 
 
 
============================================================
--- docs/Certificates.html	051bd77b92a8d609761bba28d6f90f23a7d53811
+++ docs/Certificates.html	2cc0e4f319ed862ed12559a54f37658416a935d5
@@ -1,9 +1,9 @@
 
 
 Certificates - monotone documentation
 
 
-
+
 
 
 
============================================================
--- docs/Command-Reference.html	8be324c63b1adb3f71bb1ad0f96788af623c3a5c
+++ docs/Command-Reference.html	ac73891fbac9546afc4fcfdd53eaeca93f7f6c8b
@@ -1,9 +1,9 @@
 
 
 Command Reference - monotone documentation
 
 
-
+
 
 
 
============================================================
--- docs/Committing-Work.html	0ac9fe948d6d6765c0b282198521f0d63122ae45
+++ docs/Committing-Work.html	e716cb842e741e8b617176249db01cbd2668f109
@@ -1,9 +1,9 @@
 
 
 Committing Work - monotone documentation
 
 
-
+
 
 
 
============================================================
--- docs/Concepts.html	2b0ec10dfd1500b3852c2f97cf82bd2d940abed5
+++ docs/Concepts.html	0a0a28e84c6b2364c55a81596080a66438ee0498
@@ -1,9 +1,9 @@
 
 
 Concepts - monotone documentation
 
 
-
+
 
 
 
============================================================
--- docs/Creating-a-Database.html	4390285dd70f86c67db73ae6dc212d7324686222
+++ docs/Creating-a-Database.html	dd4d71928c1cfcd0bae494cf2f671ec5e14b7f10
@@ -1,9 +1,9 @@
 
 
 Creating a Database - monotone documentation
 
 
-
+
 
 
 
============================================================
--- docs/Database.html	c6e7c8ae101dfccc96542336f0696e5e03d4480c
+++ docs/Database.html	62ae58d646e0da9a8af14aa776c815c920d617ad
@@ -1,9 +1,9 @@
 
 
 Database - monotone documentation
 
 
-
+
 
 
 
============================================================
--- docs/Dealing-with-a-Fork.html	b677ba8ed57f4c2196ab5ba9c2d1144e022061ff
+++ docs/Dealing-with-a-Fork.html	f0d020a80d566fcce0e969526947d9bb04e11495
@@ -1,9 +1,9 @@
 
 
 Dealing with a Fork - monotone documentation
 
 
-
+
 
 
 
============================================================
--- docs/Default-hooks.html	a3abe4ba183ea064970139d1e081b21c499f3d74
+++ docs/Default-hooks.html	426d6f8d291d2c1b66af91f7fc541cb4322b4c71
@@ -1,9 +1,9 @@
 
 
 Default hooks - monotone documentation
 
 
-
+
 
 
 
@@ -805,7 +805,7 @@ function get_preferred_merge3_command (t
    -- If there wasn't any user-given merger, take the first that's available
    -- and wanted.
    for _,mkey in ipairs(default_order) do
-      c = trymerger(mkey) ; if c then return c,nil end
+      c = trymerger(mkey) ; if c then return c,mkey end
    end
 end
 
@@ -1152,70 +1152,115 @@ end
     return "socat"
 end
 
--- Netsync notifiers are tables containing 5 functions:
--- start, revision_received, cert_received, pubkey_received and end
--- Those functions take exactly the same arguments as the corresponding
--- note_netsync functions, but return a different kind of value, a tuple
--- composed of a return code and a value to be returned back to monotone.
--- The codes are strings:
--- "continue" and "stop"
--- When the code "continue" is returned and there's another notifier, the
--- second value is ignored and the next notifier is called.  Otherwise,
--- the second value is returned immediately.
-netsync_notifiers = {}
+do
+   -- Hook functions are tables containing any of the following 6 items
+   -- with associated functions:
+   --
+   --   startup			Corresponds to note_mtn_startup()
+   --   start			Corresponds to note_netsync_start()
+   --   revision_received	Corresponds to note_netsync_revision_received()
+   --   cert_received		Corresponds to note_netsync_cert_received()
+   --   pubkey_received		Corresponds to note_netsync_pubkey_received()
+   --   end			Corresponds to note_netsync_end()
+   --
+   -- Those functions take exactly the same arguments as the corresponding
+   -- global functions, but return a different kind of value, a tuple
+   -- composed of a return code and a value to be returned back to monotone.
+   -- The codes are strings:
+   -- "continue" and "stop"
+   -- When the code "continue" is returned and there's another notifier, the
+   -- second value is ignored and the next notifier is called.  Otherwise,
+   -- the second value is returned immediately.
+   local hook_functions = {}
+   local supported_items = {
+      "startup",
+      "start", "revision_received", "cert_received", "pubkey_received", "end"
+   }
 
-function _note_netsync_helper(f,...)
-   local s = "continue"
-   local v = nil
-   for _,n in pairs(netsync_notifiers) do
-      if n[f] then
-	 s,v = n[f](...)
+   function _hook_functions_helper(f,...)
+      local s = "continue"
+      local v = nil
+      for _,n in pairs(hook_functions) do
+	 if n[f] then
+	    s,v = n[f](...)
+	 end
+	 if s ~= "continue" then
+	    break
+	 end
       end
-      if s ~= "continue" then
-	 break
-      end
+      return v
    end
-   return v
-end
-function note_netsync_start(...)
-   return _note_netsync_helper("start",...)
-end
-function note_netsync_revision_received(...)
-   return _note_netsync_helper("revision_received",...)
-end
-function note_netsync_cert_received(...)
-   return _note_netsync_helper("cert_received",...)
-end
-function note_netsync_pubkey_received(...)
-   return _note_netsync_helper("pubkey_received",...)
-end
-function note_netsync_end(...)
-   return _note_netsync_helper("end",...)
-end
+   function note_mtn_startup(...)
+      return _hook_functions_helper("startup",...)
+   end
+   function note_netsync_start(...)
+      return _hook_functions_helper("start",...)
+   end
+   function note_netsync_revision_received(...)
+      return _hook_functions_helper("revision_received",...)
+   end
+   function note_netsync_cert_received(...)
+      return _hook_functions_helper("cert_received",...)
+   end
+   function note_netsync_pubkey_received(...)
+      return _hook_functions_helper("pubkey_received",...)
+   end
+   function note_netsync_end(...)
+      return _hook_functions_helper("end",...)
+   end
 
-function add_netsync_notifier(notifier, precedence)
-   if type(notifier) ~= "table" or type(precedence) ~= "number" then
-      return false, "Invalid tyoe"
+   function add_hook_functions(functions, precedence)
+      if type(functions) ~= "table" or type(precedence) ~= "number" then
+	 return false, "Invalid type"
+      end
+      if hook_functions[precedence] then
+	 return false, "Precedence already taken"
+      end
+
+      local unknown_items = ""
+      local warning = nil
+      local is_member =
+	 function (s,t)
+	    for k,v in pairs(t) do if s == v then return true end end
+	    return false
+	 end
+
+      for n,f in pairs(functions) do
+	 if type(n) == "string" then
+	    if not is_member(n, supported_items) then
+	       if unknown_items ~= "" then
+		  unknown_items = unknown_items .. ","
+	       end
+	       unknown_items = unknown_items .. n
+	    end
+	    if type(f) ~= "function" then
+	       return false, "Value for functions item "..n.." isn't a function"
+	    end
+	 else
+	    warning = "Non-string item keys found in functions table"
+	 end
+      end
+
+      if warning == nil and unknown_items ~= "" then
+	 warning = "Unknown item(s) " .. unknown_items .. " in functions table"
+      end
+
+      hook_functions[precedence] = functions
+      return true, warning
    end
-   if netsync_notifiers[precedence] then
-      return false, "Precedence already taken"
+   function push_hook_functions(functions)
+      local n = table.maxn(hook_functions) + 1
+      return add_hook_functions(functions, n)
    end
-   local warning = nil
-   for n,f in pairs(notifier) do
-      if type(n) ~= "string" or n ~= "start"
-	 and n ~= "revision_received"
-	 and n ~= "cert_received"
-	 and n ~= "pubkey_received"
-	 and n ~= "end" then
-	 warning = "Unknown item found in notifier table"
-      elseif type(f) ~= "function" then
-	 return false, "Value for notifier item "..n.." isn't a function"
-      end
+
+   -- Kept for backward compatibility
+   function add_netsync_notifier(notifier, precedence)
+      return add_hook_functions(notifier, precedence)
    end
-   netsync_notifiers[precedence] = notifier
-   return true, warning
-end
-
+ function push_netsync_notifier(notifier) + return push_hook_functions(notifier) + end +end ============================================================ --- docs/File-Attributes.html 37e478866d1245c4b1c09cb8fdf0d64c1c1b94f0 +++ docs/File-Attributes.html 07a04b4dc5dd34222afc29ac756a3bee696cfe0c @@ -1,9 +1,9 @@ File Attributes - monotone documentation - + ============================================================ --- docs/Forks-and-merges.html caa0489f7797842f20be40b7f998afa5f84aeeb6 +++ docs/Forks-and-merges.html 1e26924b4bd9845021a4e3ad24c066a4b4caedee @@ -1,9 +1,9 @@ Forks and merges - monotone documentation - + ============================================================ --- docs/General-Index.html d9860f42e76b6146e214ff8bd608285a2ba6f70a +++ docs/General-Index.html 4935e1b875af4cd6dcc2d04dfd692887c69e4c0a @@ -1,9 +1,9 @@ General Index - monotone documentation - + ============================================================ --- docs/Generating-Keys.html f97c0998cf7d223ef4e2271f40b37b838a254bda +++ docs/Generating-Keys.html 7e8c77afdbf077663dbcee8f85126bf06e103734 @@ -1,9 +1,9 @@ Generating Keys - monotone documentation - + ============================================================ --- docs/Hash-Integrity.html 3f9e7068d10c10ce601d14394277e31b9b983d4d +++ docs/Hash-Integrity.html e330839ef84c2902639d3e151c93338471aec15b @@ -1,9 +1,9 @@ Hash Integrity - monotone documentation - + ============================================================ --- docs/Historical-records.html 90b70a4b56a763a70d864da898d6667b4c1826ba +++ docs/Historical-records.html 9937fdcabdc36c64c8e3e64a99de31dd3e4be343 @@ -1,9 +1,9 @@ Historical records - monotone documentation - + ============================================================ --- docs/Hook-Reference.html 7a412358366c360d81a2e428b6b824cccaecb272 +++ docs/Hook-Reference.html ac7f505461c97439c2534cee928cf54168ef632b @@ -1,9 +1,9 @@ Hook Reference - monotone documentation - + ============================================================ --- docs/Hooks.html 673c5a499a3c72d3ef132f2470ae863c62d57ea4 +++ docs/Hooks.html d32c0db23daf71b0884b9465b74fcd30f8e819de @@ -1,9 +1,9 @@ Hooks - monotone documentation - + @@ -185,7 +185,7 @@ variable as in this example: end print("Ending note_mtn_startup") end - +

6.1.2 User Defaults

@@ -241,7 +241,7 @@ definitions might be: if ((user == nil) or (host == nil)) then return nil end return string.format("address@hidden", user, host) end - +
          function get_author(branchname, keypair_id)
                   -- Branch name ignored.
                   if (keypair_id == "address@hidden") then
@@ -249,7 +249,7 @@ definitions might be:
                   end
                   return keypair_id
           end
-     
+
edit_comment (commentary, user_log_message)
Returns a log entry for a given set of changes, described in commentary. The commentary is identical to the output of @@ -279,7 +279,7 @@ probably want this hook to return
          function persist_phrase_ok()
                   return true
           end
-     
+
use_inodeprints ()
Returns true if you want monotone to automatically enable Inodeprints support in all workspaces. Only affects working @@ -289,7 +289,7 @@ copies created after you modify the hook
          function use_inodeprints()
                   return false
           end
-     
+
ignore_file (filename)
Returns true if filename should be ignored while adding, dropping, or moving files. Otherwise returns false. This is @@ -333,7 +333,7 @@ the configuration directory. This file l comment "everyone can read these branches" pattern "net.example.{public,project}*" allow "*" - +

This example allows everyone access to branches net.example.project and net.example.public and their sub-branches, except for the branches in net.example.project.security and net.example.project.private, @@ -479,7 +479,7 @@ components: end return argv end - +

use_transport_auth (uri)
Returns a boolean indicating whether monotone should use transport authentication mechanisms when communicating with uri. If this @@ -505,7 +505,7 @@ authentication assumptions. return true end end - +
get_mtn_command(host)
Returns a string containing the monotone command to be executed on host when communicating over ssh. The host @@ -517,7 +517,7 @@ monotone binary is not in the default pa
          function get_mtn_command(host)
           	return "mtn"
           end
-     
+

@@ -576,7 +576,7 @@ the intersection of tables) is the follo return false end end - +

In this example, any revision certificate is trusted if it is signed by at least one of three “trusted” keys, unless it is an branch certificate, in which case it must be signed by @@ -606,7 +606,7 @@ version carrying the old_results +

This definition accepts only those updates which preserve the set of true test results from update source to target. If no test results exist, this hook has no affect; but once a true test @@ -672,8 +672,7 @@ you have a tool specific to certain file you have a tool specific to certain file types.

- -
merge3 (ancestor_path, left_path, right_path, merged_path, ancestor_text, left_text, right_text)
+
merge3 (ancestor_path, left_path, right_path, merged_path, ancestor_text, left_text, right_text)
This hook is called to resolve merges that monotone could not resolve automatically. The actual ancestor, left, and right contents of the file are passed in the ancestor_text, left_text, and @@ -695,8 +694,7 @@ local system. For details, see the code and if not, then simply searches for whatever is installed on the local system. For details, see the code in Default hooks. -

-

get_preferred_merge3_command(tbl)
+


get_preferred_merge3_command(tbl)
Returns the results of running an external merge on three strings. tbl wraps up the various arguments for each merge command and is always provided by merge3. If there is a particular editor @@ -769,7 +767,7 @@ attribute. Its definition is: make_executable(filename) end end - +
attr_init_functions [attribute] (filename)
This is not a hook function, but a table of hook functions. Each entry in the table attr_init_functions, at @@ -798,7 +796,7 @@ definition is: return nil end end - +

The binary_file function is also defined as a Lua hook. See Default hooks. ============================================================ --- docs/Importing-from-CVS.html 34ea9d99420562ab5f0a435386597101c3b6d98c +++ docs/Importing-from-CVS.html f52c8fd7dd6784f50c00c456c74d6589a72ab15f @@ -1,9 +1,9 @@ Importing from CVS - monotone documentation - + ============================================================ --- docs/Informative.html ffe3b1df0339c4769f834001b639319d04ab2ea4 +++ docs/Informative.html 52874d9861dc1742a759be4a260204be0cab01e9 @@ -1,9 +1,9 @@ Informative - monotone documentation - + @@ -117,7 +117,7 @@ example, suppose you enter this command fa36deead87811b0e15208da2853c39d2f6ebe90 fa36b76dd0139177b28b379fe1d56b22342e5306 fa36965ec190bee14c5afcac235f1b8e2239bb2a - +

Then monotone is telling you that there are 3 revisions it knows about, in its database, which begin with the 4 hex digits fa36. This command is intended to be used by programmable @@ -132,7 +132,7 @@ present users with additional informatio 01f5da490941bee1f0000f0561fc62eabfb2fa23 address@hidden 2003-12-03T03:14:35 01f992577bd8bcdcade0f89e724fd5dc2d2bbe8a address@hidden 2005-05-11T05:19:29 01faad191d8d0474777c70b4d606782942333a78 address@hidden 2005-04-11T04:24:01 - +

mtn diff [--unified] [--show-encloser]
mtn diff --context [--show-encloser]
mtn diff --external [--diff-args=argstring]
mtn diff pathname...
mtn diff --revision=id
mtn diff --revision=id pathname...
mtn diff --revision=id1 --revision=id2
mtn diff --revision=id1 --revision=id2 pathname...
These commands print out GNU “unified diff format” textual difference listings between various manifest versions. With no --revision @@ -165,9 +165,9 @@ algorithm to produce a listing in &ldquo --unified, --context, --show-encloser, and --external. By default, monotone uses its built-in diff algorithm to produce a listing in “unified diff” format (analogous -to running the program diff -u); you can also explicitly +to running the program diff -u); you can also explicitly request this with --unified. The built-in diff algorithm can -also produce “context diff” format (analogous to diff -c), +also produce “context diff” format (analogous to diff -c), which you request by specifying --context. The short options that diff accepts for these modes, -u and -c, also work. @@ -186,7 +186,7 @@ syntax, See --unified requests the “unified diff” format, the default. --context requests the “context diff” format (analogous to -running the program diff -c). Both of these formats are +running the program diff -c). Both of these formats are generated directly by monotone, using its built-in diff algorithm.

Sometimes, you may want more flexibility in output formats; for these @@ -244,7 +244,7 @@ branch: : * tests/t_cross.at: New test for merging merges. : * testsuite.at: Call t_cross.at. : - +

mtn list keys
mtn ls keys
mtn list keys pattern
mtn ls keys pattern
These commands list rsa keys held in your keystore and current database. They do not print out any cryptographic information; they simply list the names of public and private keys you have on hand. ============================================================ --- docs/Inodeprints.html 00c8414b0b4d264a564ec486ca65a8e7dc48229d +++ docs/Inodeprints.html 4e462bbef7cfa65afde05c0e51771a6c831ec672 @@ -1,9 +1,9 @@ Inodeprints - monotone documentation - + ============================================================ --- docs/Internationalization.html f743ea0d87237b4a0ba08b8e1eff8c2eda4b11e1 +++ docs/Internationalization.html 20915ed9d3412b0f53b5a5b6c7e3c28f8586fd94 @@ -1,9 +1,9 @@ Internationalization - monotone documentation - + @@ -105,7 +105,7 @@ an IDNA string is this: an IDNA string is this:
                {ACE-prefix}{LDH-sanitized(punycode(nameprep(UTF-8-string)))}
-     
+

It is important to understand that IDNA encoding does not preserve the input string: it both prohibits a wide variety of possible strings and normalizes non-equal strings to supposedly ============================================================ --- docs/Key-and-Cert-Trust.html 87061cac1bac7fc663f3b53fa7d2635f072e8083 +++ docs/Key-and-Cert-Trust.html 9a87d94eb16d0d44085edd2bd8339648e42aaa5b @@ -1,9 +1,9 @@ Key and Cert Trust - monotone documentation - + @@ -85,11 +85,11 @@ following two examples are equivalent: enter passphrase for key ID address@hidden: $ mtn ci -m"Changed foo to bar" $ mtn push -k address@hidden - +

          $ mtn ci -m"Changed foo to bar"
           enter passphrase for key ID address@hidden:
           $ mtn push -k address@hidden
-     
+

In the second example, monotone automatically added the key to ssh-agent, making entering the passphrase not needed during the push. @@ -112,7 +112,7 @@ will cache the key for you. Identity added: /home/user/.ssh/id_monotone (/home/user/.ssh/id_monotone) $ mtn ci -m"Changed foo to bar" $ mtn push -k address@hidden - +

You can also use the --ssh-sign option to control whether ssh-agent will be used for signing. If set to yes, ssh-agent will be used to sign. If your key has not been added to ssh-agent monotone will fall back to its internal signing ============================================================ --- docs/Making-Changes.html 7155a6d3952b2a8ef8a2eafa9549cbfd60ab89fb +++ docs/Making-Changes.html 2a35cd7076ea9d81efcffddae77163d325fcff30 @@ -1,9 +1,9 @@ Making Changes - monotone documentation - + ============================================================ --- docs/Mark_002dMerge.html 7b95b1f2f7b6c7414d5523663e761de124a09529 +++ docs/Mark_002dMerge.html b392558ccae556635ed788648db245b9071d145a @@ -1,9 +1,9 @@ Mark-Merge - monotone documentation - + ============================================================ --- docs/Merging.html 6e1b1cffbbfab2076cfe97d595dc3316b1ed4658 +++ docs/Merging.html a29585476ae412228693aadcdc3a54985e0b655d @@ -1,9 +1,9 @@ Merging - monotone documentation - + ============================================================ --- docs/Migrating-and-Dumping.html 31cf38fd784584da6f9ea3ce119b8e8caffecc23 +++ docs/Migrating-and-Dumping.html 10a5a0961bd67d70eb3ac97caf148bd39db6e06e @@ -1,9 +1,9 @@ Migrating and Dumping - monotone documentation - + ============================================================ --- docs/Naming-Conventions.html bf61294982536b75cc19ea1ae6aee8d5f0371c31 +++ docs/Naming-Conventions.html e9dec1894d0cbd227f61f8500aff5f9934c6c784 @@ -1,9 +1,9 @@ Naming Conventions - monotone documentation - + ============================================================ --- docs/Network-Service-Revisited.html 2e8694d09a1dd96b0bf8d23b88887c651db4790d +++ docs/Network-Service-Revisited.html 30ce6318069e4e82d3f49d573e9b10e964e232cf @@ -1,9 +1,9 @@ Network Service Revisited - monotone documentation - + ============================================================ --- docs/Network.html ea1beb7ca4c48d2ccae750e1a01baa1143785849 +++ docs/Network.html 9b68ce1aadf777750f366aa0a356dd354c5d96ec @@ -1,9 +1,9 @@ Network - monotone documentation - + @@ -80,11 +80,11 @@ branches. Supposing Alice's computer has alice.someisp.com, then Alice might run:

          $ mtn --bind=alice.someisp.com serve
-     
+

And Bob might run

          $ mtn sync alice.someisp.com "net.venge.monotone*"
-     
+

When the operation completes, all branches matching net.venge.monotone* will be synchronized between Alice and Bob's databases. @@ -96,7 +96,7 @@ with Alice again, he can simply run: with Alice again, he can simply run:

          $ mtn sync
-     
+

Of course, he can still sync with other people and other branches by passing an address or address plus globs on the command line; this will not affect his default affinity for Alice. If you ever ============================================================ --- docs/Other-Transports.html 12d427aa02fa9c04751b31f614488da0dff04d51 +++ docs/Other-Transports.html 7e68d9cd7cf332f1193ece40eac4ff845325f176 @@ -1,9 +1,9 @@ Other Transports - monotone documentation - + ============================================================ --- docs/Packet-I_002fO.html 44d9a934e80c37a1bdd826fd86f9b5218563833f +++ docs/Packet-I_002fO.html 9737030371eae74dbe9ce2ce837de5d101fbd2fe @@ -1,9 +1,9 @@ Packet I/O - monotone documentation - + ============================================================ --- docs/Quality-Assurance.html 2ba8ffdc2456b9a53ca6bebe46aa1b956546433d +++ docs/Quality-Assurance.html a2c5b1325ef9d5176db2908061afa117f9239b14 @@ -1,9 +1,9 @@ Quality Assurance - monotone documentation - + ============================================================ --- docs/RCS.html 8e283147bdb9cd6822a2b262d73fef95d0ebf94b +++ docs/RCS.html b0bb87a5300ed953a29c02f00b3534a85d43e6f1 @@ -1,9 +1,9 @@ RCS - monotone documentation - + ============================================================ --- docs/Rebuilding-ancestry.html 6424f9076b7d8ea298d9d616b003948e024624f4 +++ docs/Rebuilding-ancestry.html 5b01226f5cc648bf08925e6f00b5fe84cf91014a @@ -1,9 +1,9 @@ Rebuilding ancestry - monotone documentation - + @@ -160,7 +160,7 @@ be a rather serious security problem!

-


+
============================================================ --- docs/Regexp-Details.html 292e1d632b1d603a8f6bdf8cf165954653b40511 +++ docs/Regexp-Details.html ea50c55e16d4127dd65b96ddb23b544aa956a032 @@ -1,9 +1,9 @@ Regexp Details - monotone documentation - + @@ -73,7 +73,7 @@ brackets, the metacharacters are as foll
|
start of alternative branch
(
start subpattern
)
end subpattern -
?
extends the meaning of `(' +
?
extends the meaning of ‘(’ also 0 or 1 quantifier also quantifier minimizer
*
0 or more quantifier @@ -103,22 +103,22 @@ applies both inside and outside characte that character may have. This use of backslash as an escape character applies both inside and outside character classes. -

For example, if you want to match a `*' character, you write -`\*' in the pattern. This escaping action applies whether or not +

For example, if you want to match a ‘*’ character, you write +‘\*’ in the pattern. This escaping action applies whether or not the following character would otherwise be interpreted as a metacharacter, so it is always safe to precede a non-alphanumeric with backslash to specify that it stands for itself. In particular, if you -want to match a backslash, you write `\\'. +want to match a backslash, you write ‘\\’. -

If a pattern is compiled with the `(?x)' option, whitespace in +

If a pattern is compiled with the ‘(?x)’ option, whitespace in the pattern (other than in a character class) and characters between a -`#' outside a character class and the next newline are +‘#’ outside a character class and the next newline are ignored. An escaping backslash can be used to include a whitespace or -`#' character as part of the pattern. +‘#’ character as part of the pattern.

If you want to remove the special meaning from a sequence of -characters, you can do so by putting them between `\Q' and -`\E'. The `\Q...\E' sequence is recognized both inside and +characters, you can do so by putting them between ‘\Q’ and +‘\E’. The ‘\Q...\E’ sequence is recognized both inside and outside character classes.

Non-printing Characters
@@ -143,36 +143,36 @@ represents:
\x{hhh...}
character with hex code hhh...
-

The precise effect of `\cx' is as follows: if x is a lower +

The precise effect of ‘\cx’ is as follows: if x is a lower case letter, it is converted to upper case. Then bit 6 of the -character (hex 40) is inverted. Thus `\cz' becomes hex 1A (the -<SUB> control character, in ASCII), but `\c{' becomes hex 3B -(`;'), and `\c;' becomes hex 7B (`{'). +character (hex 40) is inverted. Thus ‘\cz’ becomes hex 1A (the +<SUB> control character, in ASCII), but ‘\c{’ becomes hex 3B +(‘;’), and ‘\c;’ becomes hex 7B (‘{’). -

After `\x', from zero to two hexadecimal digits are read (letters +

After ‘\x’, from zero to two hexadecimal digits are read (letters can be in upper or lower case). Any number of hexadecimal digits may -appear between `\x{' and `}', but the value of the +appear between ‘\x{’ and ‘}’, but the value of the character code must be less than 256 in non-UTF-8 mode, and less than 231in UTF-8 mode. That is, the maximum value in hexadecimal is 7FFFFFFF. Note that this is bigger than the largest Unicode code point, which is 10FFFF. -

If characters other than hexadecimal digits appear between `\x{' -and `}', or if there is no terminating `}', this form of -escape is not recognized. Instead, the initial `\x' will be +

If characters other than hexadecimal digits appear between ‘\x{’ +and ‘}’, or if there is no terminating ‘}’, this form of +escape is not recognized. Instead, the initial ‘\x’ will be interpreted as a basic hexadecimal escape, with no following digits, giving a character whose value is zero.

Characters whose value is less than 256 can be defined by either of -the two syntaxes for `\x'. There is no difference in the way they -are handled. For example, `\xdc' is exactly the same as -`\x{dc}'. +the two syntaxes for ‘\x’. There is no difference in the way they +are handled. For example, ‘\xdc’ is exactly the same as +‘\x{dc}’. -

After `\0' up to two further octal digits are read. If there are +

After ‘\0’ up to two further octal digits are read. If there are fewer than two digits, just those that are present are used. Thus the -sequence `\0\x\07' specifies two binary zeros followed by a +sequence ‘\0\x\07’ specifies two binary zeros followed by a <BEL> character (octal 007). Make sure you supply two digits after the initial zero if the pattern character that follows is itself an octal digit. @@ -190,8 +190,8 @@ themselves. In non-UTF-8 mode, the value up to three octal digits following the backslash, and uses them to generate a data character. Any subsequent digits stand for themselves. In non-UTF-8 mode, the value of a character specified in -octal must be less than `\400'. In UTF-8 mode, values up to -`\777' are permitted. For example: +octal must be less than ‘\400’. In UTF-8 mode, values up to +‘\777’ are permitted. For example:

\040
is another way of writing a space @@ -201,13 +201,13 @@ octal must be less than `\11
might be a back reference, or another way of writing a tab
\011
is always a tab -
\0113
is a tab followed by the character `3' +
\0113
is a tab followed by the character ‘3
\113
might be a back reference, otherwise the character with octal code 113
\377
might be a back reference, otherwise the byte consisting entirely of 1 bits
\81
is either a back reference, or a binary zero - followed by the two characters `8' and `1' + followed by the two characters ‘8’ and ‘1

Note that octal values of 100 or greater must not be introduced by a @@ -215,18 +215,18 @@ both inside and outside character classe

All the sequences that define a single character value can be used both inside and outside character classes. In addition, inside a -character class, the sequence `\b' is interpreted as the <BS> -character (hex 08), and the sequences `\R' and `\X' are -interpreted as the characters `R' and `X', +character class, the sequence ‘\b’ is interpreted as the <BS> +character (hex 08), and the sequences ‘\R’ and ‘\X’ are +interpreted as the characters ‘R’ and ‘X’, respectively. Outside a character class, these sequences have different meanings (see below).

Absolute and Relative Back References
-

The sequence `\g' followed by an unsigned or a negative number, +

The sequence ‘\g’ followed by an unsigned or a negative number, optionally enclosed in braces, is an absolute or relative back reference. A named back reference can be coded as -`\g{name}'. Back references are discussed later, following the +‘\g{name}’. Back references are discussed later, following the discussion of parenthesized subpatterns.

Generic character types
@@ -256,18 +256,18 @@ string, all of them fail, since there is type. If the current matching point is at the end of the subject string, all of them fail, since there is no character to match. -

For compatibility with Perl, `\s' does not match the <VT> +

For compatibility with Perl, ‘\s’ does not match the <VT> character (code 11). This makes it different from the the POSIX -“space” class. The `\s' characters are <TAB> (9), <LF> +“space” class. The ‘\s’ characters are <TAB> (9), <LF> (10), <FF> (12), <CR> (13), and <SPACE> (32).

In UTF-8 mode, characters with values greater than 128 never match -`\d', `\s', or `\w', and always match `\D', -`\S', and `\W'. These sequences retain their original +‘\d’, ‘\s’, or ‘\w’, and always match ‘\D’, +‘\S’, and ‘\W’. These sequences retain their original meanings from before UTF-8 support was available, mainly for efficiency reasons. -

The sequences `\h', `\H', `\v', and `\V' are Perl +

The sequences ‘\h’, ‘\H’, ‘\v’, and ‘\V’ are Perl 5.10 features. In contrast to the other sequences, these do match certain high-valued codepoints in UTF-8 mode. The horizontal space characters are: @@ -334,23 +334,23 @@ the following five sequences: (*CR)a.b -

changes the convention to CR. That pattern matches `a\nb' because +

changes the convention to CR. That pattern matches ‘a\nb’ because LF is no longer a newline. Note that these special settings, which are not Perl-compatible, are recognized only at the very start of a pattern, and that they must be in upper case. If more than one of them is present, the last one is used. -

The newline convention does not affect what the `\R' escape +

The newline convention does not affect what the ‘\R’ escape sequence matches. By default, this is any Unicode newline sequence, for Perl compatibility. However, this can be changed; see the -description of `\R' below. A change of `\R' setting can be +description of ‘\R’ below. A change of ‘\R’ setting can be combined with a change of newline convention.

Newline Sequences
-

Outside a character class, by default, the escape sequence `\R' matches +

Outside a character class, by default, the escape sequence ‘\R’ matches any Unicode newline sequence. This is a Perl 5.10 feature. In -non-UTF-8 mode `\R' is equivalent to the following: +non-UTF-8 mode ‘\R’ is equivalent to the following:

          (?>\r\n|\n|\x0b|\f|\r|\x85)
@@ -367,7 +367,7 @@ and <PS> (paragraph separator, U+2028)
 and <PS> (paragraph separator, U+2029).
 
-   

It is possible to change the meaning of `\R' by starting a +

It is possible to change the meaning of ‘\R’ by starting a pattern string with one of the following sequences:

@@ -385,7 +385,7 @@ example, a pattern can start with: (*ANY)(*BSR_ANYCRLF)
-

Inside a character class, `\R' matches the letter `R'. +

Inside a character class, ‘\R’ matches the letter ‘R’.

Unicode Character Properties
@@ -401,10 +401,10 @@ 256, but they do work in this mode. The

The property names represented by xx above are limited to the -Unicode script names, the general category properties, and `Any', +Unicode script names, the general category properties, and ‘Any’, which matches any character (including newline). Other properties such -as `InMusicalSymbols' are not currently supported by PCRE. Note -that `\P{Any}' does not match any characters, so always causes +as ‘InMusicalSymbols’ are not currently supported by PCRE. Note +that ‘\P{Any}’ does not match any characters, so always causes a match failure.

Sets of Unicode characters are defined as belonging to certain @@ -489,9 +489,9 @@ by including a circumflex between the op

Each character has exactly one general category property, specified by a two-letter abbreviation. For compatibility with Perl, negation can be specified by including a circumflex between the opening brace and the property name. For -example, `\p{^Lu}' is the same as `\P{Lu}'. +example, ‘\p{^Lu}’ is the same as ‘\P{Lu}’. -

If only one letter is specified with `\p' or `\P', it +

If only one letter is specified with ‘\p’ or ‘\P’, it includes all the general category properties that start with that letter. In this case, in the absence of negation, the curly brackets in the escape sequence are optional; these two examples have the same @@ -550,28 +550,28 @@ effect:

Zs
Space separator -

The special property `L&' is also supported: it matches a -character that has the `Lu', `Ll', or `Lt' property, in +

The special property ‘L&’ is also supported: it matches a +character that has the ‘Lu’, ‘Ll’, or ‘Lt’ property, in other words, a letter that is not classified as a modifier or “other.” -

The `Cs' (Surrogate) property applies only to characters in the +

The ‘Cs’ (Surrogate) property applies only to characters in the range U+D800 to U+DFFF. Such characters are not valid in UTF-8 strings (see RFC 3629) and so cannot be tested by PCRE.

The long synonyms for these properties that Perl supports (such as -`\p{Letter}') are not supported by PCRE, nor is it permitted to -prefix any of these properties with `Is'. +‘\p{Letter}’) are not supported by PCRE, nor is it permitted to +prefix any of these properties with ‘Is’. -

No character that is in the Unicode table has the `Cn' +

No character that is in the Unicode table has the ‘Cn’ (unassigned) property. Instead, this property is assumed for any code point that is not in the Unicode table.

Specifying caseless matching does not affect these escape sequences. For -example, `\p{Lu}' always matches only upper case letters. +example, ‘\p{Lu}’ always matches only upper case letters. -

The `\X' escape matches any number of Unicode characters that -form an extended Unicode sequence. `\X' is equivalent to +

The ‘\X’ escape matches any number of Unicode characters that +form an extended Unicode sequence. ‘\X’ is equivalent to

          (?>\PM\pM*)
@@ -582,16 +582,16 @@ character. None of them have codepoints 
 treats the sequence as an atomic group (see below).  Characters with
 the “mark” property are typically accents that affect the preceding
 character. None of them have codepoints less than 256, so in non-UTF-8
-mode `\X' matches any one character.
+mode ‘\X’ matches any one character.
 
    

Matching characters by Unicode property is not fast, because PCRE has to search a structure that contains data for over fifteen thousand characters. That is why the traditional escape sequences such as -`\d' and `\w' do not use Unicode properties in PCRE. +‘\d’ and ‘\w’ do not use Unicode properties in PCRE.

Resetting the Match Start
-

The escape sequence `\K', which is a Perl 5.10 feature, causes +

The escape sequence ‘\K’, which is a Perl 5.10 feature, causes any previously matched characters not to be included in the final matched sequence. For example, the pattern: @@ -599,18 +599,18 @@ matched sequence. For example, the patte foo\Kbar

-

matches `foobar', but reports that it has matched -`bar'. This feature is similar to a lookbehind assertion +

matches ‘foobar’, but reports that it has matched +‘bar’. This feature is similar to a lookbehind assertion (described below). However, in this case, the part of the subject before the real match does not have to be of fixed length, as -lookbehind assertions do. The use of `\K' does not interfere with the +lookbehind assertions do. The use of ‘\K’ does not interfere with the setting of captured substrings. For example, when the pattern

          (foo)\Kbar
 
-

matches `foobar', the first substring is still set to `foo'. +

matches ‘foobar’, the first substring is still set to ‘foo’.

Simple assertions
@@ -631,31 +631,31 @@ described below. The backslashed assert

These assertions may not appear in character classes (but note that -`\b' has a different meaning, namely the backspace character, +‘\b’ has a different meaning, namely the backspace character, inside a character class).

A word boundary is a position in the subject string where the current -character and the previous character do not both match `\w' or -`\W' (i.e. one matches `\w' and the other matches -`\W'), or the start or end of the string if the first or last -character matches `\w', respectively. +character and the previous character do not both match ‘\w’ or +‘\W’ (i.e. one matches ‘\w’ and the other matches +‘\W’), or the start or end of the string if the first or last +character matches ‘\w’, respectively. -

The `\A', `\Z', and `\z' assertions differ from the +

The ‘\A’, ‘\Z’, and ‘\z’ assertions differ from the traditional circumflex and dollar (described in the next section) in that they only ever match at the very start and end of the subject string, whatever options are set. Thus, they are independent of -multiline mode. The difference between `\Z' and `\z' is that -`\Z' matches before a newline at the end of the string as well as -at the very end, whereas `\z' matches only at the end. +multiline mode. The difference between ‘\Z’ and ‘\z’ is that +‘\Z’ matches before a newline at the end of the string as well as +at the very end, whereas ‘\z’ matches only at the end. -

The `\G' assertion is true only when the current matching +

The ‘\G’ assertion is true only when the current matching position is at the start point of the match. As used in Monotone, -`\G' is always equal to `\A'. +‘\G’ is always equal to ‘\A’.

Circumflex and Dollar

Outside a character class, in the default matching mode, the -circumflex character, `^', is an assertion that is true only if +circumflex character, ‘^’, is an assertion that is true only if the current matching point is at the start of the subject string. Inside a character class, circumflex has an entirely different meaning (see below). @@ -668,7 +668,7 @@ other constructs that can cause a patter subject, it is said to be an “anchored” pattern. (There are also other constructs that can cause a pattern to be anchored.) -

A dollar character, `$', is an assertion that is true only if the +

A dollar character, ‘$’, is an assertion that is true only if the current matching point is at the end of the subject string, or immediately before a newline at the end of the string (by default). Dollar need not be the last character of the pattern if a @@ -677,23 +677,23 @@ character class. character class.

The meanings of the circumflex and dollar characters are changed if -the `(?m)' option is set. When this is the case, a circumflex +the ‘(?m)’ option is set. When this is the case, a circumflex matches immediately after internal newlines as well as at the start of the subject string. It does not match after a newline that ends the string. A dollar matches before any newlines in the string, as well as -at the very end, when `(?m)' is set. When newline is specified as +at the very end, when ‘(?m)’ is set. When newline is specified as the two-character sequence <CR><LF>, isolated <CR> and <LF> characters do not indicate newlines. -

For example, the pattern `^abc$' matches the subject string -`def\nabc' (where `\n' represents a newline) in multiline +

For example, the pattern ‘^abc$’ matches the subject string +‘def\nabc’ (where ‘\n’ represents a newline) in multiline mode, but not otherwise. Consequently, patterns that are anchored in single line mode because all branches start with ^ are not anchored in multiline mode. -

Note that the sequences `\A', `\Z', and `\z' can be +

Note that the sequences ‘\A’, ‘\Z’, and ‘\z’ can be used to match the start and end of the subject in both modes, and if -all branches of a pattern start with `\A' it is always anchored. +all branches of a pattern start with ‘\A’ it is always anchored.

Full Stop (Period, Dot)
@@ -711,7 +711,7 @@ other line ending characters. other line ending characters.

The behaviour of dot with regard to newlines can be changed. If the -`(?s)' option is set, a dot matches any one character, without +‘(?s)’ option is set, a dot matches any one character, without exception. If the two-character sequence <CR><LF> is present in the subject string, it takes two dots to match it. @@ -721,15 +721,15 @@ special meaning in a character class.

Matching a Single Byte
-

Outside a character class, the escape sequence `\C' matches any +

Outside a character class, the escape sequence ‘\C’ matches any one byte, both in and out of UTF-8 mode. Unlike a dot, it always matches any line-ending characters. The feature is provided in Perl in order to match individual bytes in UTF-8 mode. Because it breaks up UTF-8 characters into individual bytes, what remains in the string may -be a malformed UTF-8 string. For this reason, the `\C' escape +be a malformed UTF-8 string. For this reason, the ‘\C’ escape sequence is best avoided. -

PCRE does not allow `\C' to appear in lookbehind assertions +

PCRE does not allow ‘\C’ to appear in lookbehind assertions (described below), because in UTF-8 mode this would make it impossible to calculate the length of the lookbehind. @@ -749,8 +749,8 @@ is not the first character, or escape it a circumflex is actually required as a member of the class, ensure it is not the first character, or escape it with a backslash. -

For example, the character class `[aeiou]' matches any lower case -vowel, while `[^aeiou]' matches any character that is not a lower +

For example, the character class ‘[aeiou]’ matches any lower case +vowel, while ‘[^aeiou]’ matches any character that is not a lower case vowel. Note that a circumflex is just a convenient notation for specifying the characters that are in the class by enumerating those that are not. A class that starts with a circumflex is not an @@ -758,13 +758,13 @@ therefore it fails if the current pointe therefore it fails if the current pointer is at the end of the string.

In UTF-8 mode, characters with values greater than 255 can be included -in a class as a literal string of bytes, or by using the `\x{' +in a class as a literal string of bytes, or by using the ‘\x{’ escaping mechanism.

When caseless matching is set, any letters in a class represent both their upper case and lower case versions, so for example, a caseless -`[aeiou]' matches `A' as well as `a', and a caseless [^aeiou] -does not match `A', whereas a caseful version would. In UTF-8 mode, +‘[aeiou]’ matches ‘A’ as well as ‘a’, and a caseless [^aeiou] +does not match ‘A’, whereas a caseful version would. In UTF-8 mode, PCRE always understands the concept of case for characters whose values are less than 128, so caseless matching is always possible. For characters with higher values, the concept of case is supported if @@ -775,43 +775,43 @@ special way when matching character clas

Characters that might indicate line breaks are never treated in any special way when matching character classes, whatever line-ending -sequence is in use, and whatever setting of the `(?s)' and -`(?m)' options is used. A class such as `[^a]' always +sequence is in use, and whatever setting of the ‘(?s)’ and +‘(?m)’ options is used. A class such as ‘[^a]’ always matches one of these characters.

The minus (hyphen) character can be used to specify a range of -characters in a character class. For example, `[d-m]' matches any -letter between `d' and `m', inclusive. If a minus character +characters in a character class. For example, ‘[d-m]’ matches any +letter between ‘d’ and ‘m’, inclusive. If a minus character is required in a class, it must be escaped with a backslash or appear in a position where it cannot be interpreted as indicating a range, typically as the first or last character in the class. -

It is not possible to have the literal character `]' as the end -character of a range. A pattern such as `[W-]46]' is interpreted -as a class of two characters (`W' and `-') followed by a -literal string `46]', so it would match `W46]' or -`-46]'. However, if the `]' is escaped with a backslash it -is interpreted as the end of range, so `[W-\]46]' is interpreted +

It is not possible to have the literal character ‘]’ as the end +character of a range. A pattern such as ‘[W-]46]’ is interpreted +as a class of two characters (‘W’ and ‘-’) followed by a +literal string ‘46]’, so it would match ‘W46]’ or +‘-46]’. However, if the ‘]’ is escaped with a backslash it +is interpreted as the end of range, so ‘[W-\]46]’ is interpreted as a class containing a range followed by two other characters. The -octal or hexadecimal representation of `]' can also be used to +octal or hexadecimal representation of ‘]’ can also be used to end a range.

Ranges operate in the collating sequence of character values. They can also be used for characters specified numerically, for example -`[\000-\037]'. In UTF-8 mode, ranges can include characters whose -values are greater than 255, for example `[\x{100}-\x{2ff}]'. +‘[\000-\037]’. In UTF-8 mode, ranges can include characters whose +values are greater than 255, for example ‘[\x{100}-\x{2ff}]’.

If a range that includes letters is used when caseless matching is -set, it matches the letters in either case. For example, `[W-c]' -is equivalent to `[][\\^_`wxyzabc]', matched caselessly. +set, it matches the letters in either case. For example, ‘[W-c]’ +is equivalent to ‘[][\\^_`wxyzabc]’, matched caselessly. -

The character types `\d', `\D', `\p', `\P', -`\s', `\S', `\w', and `\W' may also appear in a +

The character types ‘\d’, ‘\D’, ‘\p’, ‘\P’, +‘\s’, ‘\S’, ‘\w’, and ‘\W’ may also appear in a character class, and add the characters that they match to the -class. For example, `[\dABCDEF]' matches any hexadecimal digit. A +class. For example, ‘[\dABCDEF]’ matches any hexadecimal digit. A circumflex can conveniently be used with the upper case character types to specify a more restricted set of characters than the matching -lower case type. For example, the class `[^\W_]' matches any +lower case type. For example, the class ‘[^\W_]’ matches any letter or digit, but not underscore.

The only metacharacters that are recognized in character classes are @@ -824,14 +824,14 @@ escaping other non-alphanumeric characte

POSIX Character Classes

Perl supports the POSIX notation for character classes. This uses -names enclosed by `[:' and `:]' within the enclosing square +names enclosed by ‘[:’ and ‘:]’ within the enclosing square brackets. PCRE also supports this notation. For example,

          [01[:alpha:]%]
 
-

matches `0', `1', any alphabetic character, or `%'. The +

matches ‘0’, ‘1’, any alphabetic character, or ‘%’. The supported class names are

@@ -840,33 +840,33 @@ supported class names are
ascii
character codes 0 – 127
blank
space or tab only
cntrl
control characters -
digit
decimal digits (same as `\d') +
digit
decimal digits (same as ‘\d’)
graph
printing characters, excluding space
lower
lower case letters
print
printing characters, including space
punct
printing characters, excluding letters and digits -
space
white space (not quite the same as `\s') +
space
white space (not quite the same as ‘\s’)
upper
upper case letters -
word
“word” characters (same as `\w') +
word
“word” characters (same as ‘\w’)
xdigit
hexadecimal digits

The “space” characters are <HT> (9), <LF> (10), <VT> (11), <FF> (12), <CR> (13), and space (32). Notice that this list includes the <VT> character (code 11). This makes "space" -different to `\s', which does not include <VT> (for Perl +different to ‘\s’, which does not include <VT> (for Perl compatibility).

The name “word” is a Perl extension, and “blank” is a GNU extension from Perl 5.8. Another Perl extension is negation, which is -indicated by a `^' character after the colon. For example, +indicated by a ‘^’ character after the colon. For example,

          [12[:^digit:]]
 
-

matches `1', `2', or any non-digit. PCRE (and Perl) also -recognize the POSIX syntax `[.ch.]' and `[=ch=]' +

matches ‘1’, ‘2’, or any non-digit. PCRE (and Perl) also +recognize the POSIX syntax ‘[.ch.]’ and ‘[=ch=]’ where ch is a “collating element,” but these are not supported, and an error is given if they are encountered. @@ -882,7 +882,7 @@ example, the pattern gilbert|sullivan -

matches either `gilbert' or `sullivan'. Any number of +

matches either ‘gilbert’ or ‘sullivan’. Any number of alternatives may appear, and an empty alternative is permitted (matching the empty string). The matching process tries each alternative in turn, from left to right, and the first one that @@ -893,13 +893,13 @@ as the alternative in the subpattern.

Internal Option Setting

The behavior of the matching engine can be adjusted from within the -pattern by a sequence of option letters enclosed between `(?' and -`)'. The option letters are +pattern by a sequence of option letters enclosed between ‘(?’ and +‘)’. The option letters are

i
Caseless: characters in one case match the corresponding characters in other cases as well. -
m
Multiline: `^' and `$' match at newlines +
m
Multiline: ‘^’ and ‘$’ match at newlines as well as at beginning and end of string.
s
Dotall: dot matches any character, including newline characters.
x
Extended syntax: unescaped white space is ignored and embedded @@ -910,9 +910,9 @@ with no defined meaning appears. with no defined meaning appears.
-

For example, `(?im)' sets caseless, multiline matching. It is +

For example, ‘(?im)’ sets caseless, multiline matching. It is also possible to unset these options by preceding the letters with a -hyphen, and a combined setting and unsetting such as `(?im-sx)' +hyphen, and a combined setting and unsetting such as ‘(?im-sx)’ is also permitted. (This would set the caseless and multiline options while unsetting the dotall and extended-syntax options.) If a letter appears both before and after the hyphen, the option is unset. The @@ -929,7 +929,7 @@ current pattern that follows it, so (a(?i)b)c -

matches `abc' and `aBc' and no other strings. By this +

matches ‘abc’ and ‘aBc’ and no other strings. By this means, options can be made to have different settings in different parts of the pattern. Any changes made in one alternative do carry on into subsequent branches within the same subpattern. For example, @@ -938,14 +938,14 @@ into subsequent branches within the same (a(?i)b|c) -

matches `ab', `aB', `c', and `C', even though when -matching `C' the first branch is abandoned before the option +

matches ‘ab’, ‘aB’, ‘c’, and ‘C’, even though when +matching ‘C’ the first branch is abandoned before the option setting. This is because the effects of option settings happen when the pattern is parsed. There would be some very weird behaviour otherwise.

Note: Unlike these options, the similar, PCRE-specific option -sequences that start with `(*' may appear only at the very +sequences that start with ‘(*’ may appear only at the very beginning of the pattern. Details of these sequences are given in the section entitled “Newline sequences,” above. @@ -962,9 +962,9 @@ things: cat(aract|erpillar|) -

matches one of the words `cat', `cataract', or -`caterpillar'. Without the parentheses, it would match -`cataract', `erpillar' or an empty string. +

matches one of the words ‘cat’, ‘cataract’, or +‘caterpillar’. Without the parentheses, it would match +‘cataract’, ‘erpillar’ or an empty string.

  • It sets up the subpattern as a capturing subpattern. As used in Monotone this only means that during matching, the portion of the @@ -974,14 +974,14 @@ subpatterns. left to right (starting from 1) to obtain numbers for the capturing subpatterns. -

    For example, if the string `the red king' is matched against the pattern +

    For example, if the string ‘the red king’ is matched against the pattern

         
                   the ((red|white) (king|queen))
     
    -

    the captured substrings are `red king', `red', and -`king', and are numbered 1, 2, and 3, respectively. +

    the captured substrings are ‘red king’, ‘red’, and +‘king’, and are numbered 1, 2, and 3, respectively.

    The fact that plain parentheses fulfil two functions is not always @@ -989,20 +989,20 @@ capturing, and is not counted when compu without a capturing requirement. If an opening parenthesis is followed by a question mark and a colon, the subpattern does not do any capturing, and is not counted when computing the number of any -subsequent capturing subpatterns. For example, if the string `the -white queen' is matched against the pattern +subsequent capturing subpatterns. For example, if the string ‘the +white queen’ is matched against the pattern

              the ((?:red|white) (king|queen))
     
    -

    the captured substrings are `white queen' and `queen', and +

    the captured substrings are ‘white queen’ and ‘queen’, and are numbered 1 and 2. The maximum number of capturing subpatterns is 65535.

    As a convenient shorthand, if any option settings are required at the start of a non-capturing subpattern, the option letters may appear -between the `?' and the `:'. Thus the two patterns +between the ‘?’ and the ‘:’. Thus the two patterns

              (?i:saturday|sunday)
    @@ -1012,26 +1012,26 @@ of the subpattern is reached, an option 
     

    match exactly the same set of strings. Because alternative branches are tried from left to right, and options are not reset until the end of the subpattern is reached, an option setting in one branch does -affect subsequent branches, so the above patterns match `SUNDAY' -as well as `Saturday'. +affect subsequent branches, so the above patterns match ‘SUNDAY’ +as well as ‘Saturday’.

    Duplicate Subpattern Numbers

    Perl 5.10 introduced a feature whereby each alternative in a subpattern uses the same numbers for its capturing parentheses. Such a -subpattern starts with `(?|' and is itself a non-capturing +subpattern starts with ‘(?|’ and is itself a non-capturing subpattern. For example, consider this pattern:

              (?|(Sat)ur|(Sun))day
     
    -

    Because the two alternatives are inside a `(?|' group, both sets +

    Because the two alternatives are inside a ‘(?|’ group, both sets of capturing parentheses are numbered one. Thus, when the pattern matches, you can look at captured substring number one, whichever alternative matched. This construct is useful when you want to capture part, but not all, of one of a number of alternatives. Inside a -`(?|' group, parentheses are numbered as usual, but the number is +‘(?|’ group, parentheses are numbered as usual, but the number is reset at the start of each branch. The numbers of any capturing buffers that follow the subpattern start after the highest number used in any branch. The following example is taken from the Perl @@ -1062,8 +1062,8 @@ and the Python syntax. and the Python syntax.

    In PCRE, a subpattern can be named in one of three ways: -`(?<name>...)' or `(?'name'...)' as in Perl, or -`(?P<name>...)' as in Python. References to capturing +‘(?<name>...)’ or ‘(?'name'...)’ as in Perl, or +‘(?P<name>...)’ as in Python. References to capturing parentheses from other parts of the pattern, such as backreferences, recursion, and conditions, can be made by name as well as by number. @@ -1072,7 +1072,7 @@ as well as names, exactly as if the name as well as names, exactly as if the names were not present.

    By default, a name must be unique within a pattern, but it is possible -to relax this constraint by setting the `(?J)' option. This can +to relax this constraint by setting the ‘(?J)’ option. This can be useful for patterns where only one instance of the named parentheses can match. Suppose you want to match the name of a weekday, either as a 3-letter abbreviation or as the full name, and in @@ -1100,10 +1100,10 @@ the following items:

    • a literal data character
    • the dot metacharacter -
    • the `\C' escape sequence -
    • the `\X' escape sequence (in UTF-8 mode with Unicode properties) -
    • the `\R' escape sequence -
    • an escape such as `\d' that matches a single character +
    • the ‘\C’ escape sequence +
    • the ‘\X’ escape sequence (in UTF-8 mode with Unicode properties) +
    • the ‘\R’ escape sequence +
    • an escape such as ‘\d’ that matches a single character
    • a character class
    • a back reference (see next section)
    • a parenthesized subpattern (unless it is an assertion) @@ -1119,7 +1119,7 @@ example: z{2,4}
    -

    matches `zz', `zzz', or `zzzz'. A closing brace on its +

    matches ‘zz’, ‘zzz’, or ‘zzzz’. A closing brace on its own is not a special character. If the second number is omitted, but the comma is present, there is no upper limit; if the second number and the comma are both omitted, the quantifier specifies an exact @@ -1138,17 +1138,17 @@ the syntax of a quantifier, is taken as

    matches exactly 8 digits. An opening curly bracket that appears in a position where a quantifier is not allowed, or one that does not match the syntax of a quantifier, is taken as a literal character. For -example, `{,6}' is not a quantifier, but a literal string of four +example, ‘{,6}’ is not a quantifier, but a literal string of four characters.

    In UTF-8 mode, quantifiers apply to UTF-8 characters rather than to -individual bytes. Thus, for example, `\x{100}{2}' matches two +individual bytes. Thus, for example, ‘\x{100}{2}’ matches two UTF-8 characters, each of which is represented by a two-byte -sequence. Similarly, `\X{3}' matches three Unicode extended +sequence. Similarly, ‘\X{3}’ matches three Unicode extended sequences, each of which may be several bytes long (and they may be of different lengths). -

    The quantifier `{0}' is permitted, causing the expression to +

    The quantifier ‘{0}’ is permitted, causing the expression to behave as if the previous item and the quantifier were not present.

    For convenience, the three most common quantifiers have @@ -1177,8 +1177,8 @@ of where this gives problems is in tryin much as possible (up to the maximum number of permitted times), without causing the rest of the pattern to fail. The classic example of where this gives problems is in trying to match comments in C -programs. These appear between `/*' and `*/', and within the -comment, individual `*' and `/' characters may appear. An +programs. These appear between ‘/*’ and ‘*/’, and within the +comment, individual ‘*’ and ‘/’ characters may appear. An attempt to match C comments by applying the pattern

    @@ -1192,7 +1192,7 @@ attempt to match C comments by applying 
     

    fails, because it matches the entire string owing to the greediness of -the `.*' item. +the ‘.*’ item.

    However, if a quantifier is followed by a question mark, it ceases to be greedy, and instead matches the minimum number of times possible, @@ -1215,7 +1215,7 @@ way the rest of the pattern matches.

    which matches one digit by preference, but can match two if that is the only way the rest of the pattern matches. -

    If the `(?U)' option is set (an option that is not available in +

    If the ‘(?U)’ option is set (an option that is not available in Perl), the quantifiers are not greedy by default, but individual ones can be made greedy by following them with a question mark. In other words, it inverts the default behaviour. @@ -1224,17 +1224,17 @@ compiled pattern, in proportion to the s is greater than 1 or with a limited maximum, more memory is required for the compiled pattern, in proportion to the size of the minimum or maximum. -

    If a pattern starts with `.*' or `.{0,}' and the -`(?s)' option is set, thus allowing the dot to match newlines, +

    If a pattern starts with ‘.*’ or ‘.{0,}’ and the +‘(?s)’ option is set, thus allowing the dot to match newlines, the pattern is implicitly anchored, because whatever follows will be tried against every character position in the subject string, so there is no point in retrying the overall match at any position after the first. PCRE normally treats such a pattern as though it were preceded -by `\A'. +by ‘\A’.

    In cases where it is known that the subject string contains no -newlines, it is worth setting `(?s)' in order to obtain this -optimization, or alternatively using `^' or `\A' to indicate +newlines, it is worth setting ‘(?s)’ in order to obtain this +optimization, or alternatively using ‘^’ or ‘\A’ to indicate anchoring explicitly.

    However, there is one situation where the optimization cannot be @@ -1246,7 +1246,7 @@ fail where a later one succeeds. Conside (.*)abc\1 -

    If the subject is `xyz123abc123' the match point is the fourth +

    If the subject is ‘xyz123abc123’ the match point is the fourth character. For this reason, such a pattern is not implicitly anchored.

    When a capturing subpattern is repeated, the value captured is the @@ -1256,8 +1256,8 @@ substring that matched the final iterati (tweedle[dume]{3}\s*)+ -

    has matched `tweedledum tweedledee' the value of the captured -substring is `tweedledee'. However, if there are nested capturing +

    has matched ‘tweedledum tweedledee’ the value of the captured +substring is ‘tweedledee’. However, if there are nested capturing subpatterns, the corresponding captured values may have been set in previous iterations. For example, after @@ -1265,7 +1265,7 @@ previous iterations. For example, after (a|(b))+ -

    matches `aba' the value of the second captured substring is `b'. +

    matches ‘aba’ the value of the second captured substring is ‘b’.

    Atomic Grouping and Possessive Quantifiers
    @@ -1277,23 +1277,23 @@ the pattern knows there is no point in c to cause it fail earlier than it otherwise might, when the author of the pattern knows there is no point in carrying on. -

    Consider, for example, the pattern `\d+foo' when applied to the +

    Consider, for example, the pattern ‘\d+foo’ when applied to the subject line

              123456bar
     
    -

    After matching all 6 digits and then failing to match `foo', the +

    After matching all 6 digits and then failing to match ‘foo’, the normal action of the matcher is to try again with only 5 digits -matching the `\d+' item, and then with 4, and so on, before +matching the ‘\d+’ item, and then with 4, and so on, before ultimately failing. Atomic grouping (a term taken from Jeffrey Friedl's book) provides the means for specifying that once a subpattern has matched, it is not to be re-evaluated in this way.

    If we use atomic grouping for the previous example, the matcher gives -up immediately on failing to match `foo' the first time. The -notation is a kind of special parenthesis, starting with `(?>' as in +up immediately on failing to match ‘foo’ the first time. The +notation is a kind of special parenthesis, starting with ‘(?>’ as in this example:

    @@ -1312,15 +1312,15 @@ maximizing repeat that must swallow ever
     
        

    Simple cases such as the above example can be thought of as a maximizing repeat that must swallow everything it can. So, while both -`\d+' and `\d+?' are prepared to adjust the number of digits +‘\d+’ and ‘\d+?’ are prepared to adjust the number of digits they match in order to make the rest of the pattern match, -`(?>\d+)' can only match an entire sequence of digits. +‘(?>\d+)’ can only match an entire sequence of digits.

    Atomic groups in general can of course contain arbitrarily complicated subpatterns, and can be nested. However, when the subpattern for an atomic group is just a single repeated item, as in the example above, a simpler notation, called a possessive quantifier, can be -used. This consists of an additional `+' character following a +used. This consists of an additional ‘+’ character following a quantifier. Using this notation, the previous example can be rewritten as @@ -1336,7 +1336,7 @@ example:

    Possessive quantifiers are always greedy; the setting of the -`(?U)' option is ignored. They are a convenient notation for the +‘(?U)’ option is ignored. They are a convenient notation for the simpler forms of atomic group. However, there is no difference in the meaning of a possessive quantifier and the equivalent atomic group, though there may be a performance difference; possessive quantifiers @@ -1349,9 +1349,9 @@ ultimately found its way into Perl at re ultimately found its way into Perl at release 5.10.

    PCRE has an optimization that automatically “possessifies” certain -simple pattern constructs. For example, the sequence `A+B' is -treated as `A++B' because there is no point in backtracking into -a sequence of `A's when `B' must follow. +simple pattern constructs. For example, the sequence ‘A+B’ is +treated as ‘A++B’ because there is no point in backtracking into +a sequence of ‘A’s when ‘B’ must follow.

    When a pattern contains an unlimited repeat inside a subpattern that can itself be repeated an unlimited number of times, the use of an @@ -1363,8 +1363,8 @@ very long time indeed. The pattern

    matches an unlimited number of substrings that either consist of -non-digits, or digits enclosed in `<>', followed by either -`!' or `?'. When it matches, it runs quickly. However, if it +non-digits, or digits enclosed in ‘<>’, followed by either +‘!’ or ‘?’. When it matches, it runs quickly. However, if it is applied to

    @@ -1372,9 +1372,9 @@ is applied to
     

    it takes a long time before reporting failure. This is because the -string can be divided between the internal `\D+' repeat and the -external `*' repeat in a large number of ways, and all have to be -tried. (The example uses `[!?]' rather than a single character at +string can be divided between the internal ‘\D+’ repeat and the +external ‘*’ repeat in a large number of ways, and all have to be +tried. (The example uses ‘[!?]’ rather than a single character at the end, because both PCRE and Perl have an optimization that allows for fast failure when a single character is used. They remember the last single character that is required for a match, and fail early if @@ -1406,7 +1406,7 @@ subpattern whose number is 10 or more us

    It is not possible to have a numerical “forward back reference” to a subpattern whose number is 10 or more using this syntax because a -sequence such as `\50' is interpreted as a character defined in +sequence such as ‘\50’ is interpreted as a character defined in octal. See the subsection entitled “Non-printing characters” above for further details of the handling of digits following a backslash. There is no such problem when named parentheses are used. A @@ -1414,7 +1414,7 @@ back reference to any subpattern is poss (see below).

    Another way of avoiding the ambiguity inherent in the use of digits -following a backslash is to use the `\g' escape sequence, which +following a backslash is to use the ‘\g’ escape sequence, which is a feature introduced in Perl 5.10. This escape must be followed by an unsigned number or a negative number, optionally enclosed in braces. These examples are all identical: @@ -1434,10 +1434,10 @@ reference. Consider this example: (abc(def)ghi)\g{-1} -

    The sequence `\g{-1}' is a reference to the most recently -started capturing subpattern before `\g', that is, is it -equivalent to `\2'. Similarly, `\g{-2}' would be -equivalent to `\1'. The use of relative references can be helpful +

    The sequence ‘\g{-1}’ is a reference to the most recently +started capturing subpattern before ‘\g’, that is, is it +equivalent to ‘\2’. Similarly, ‘\g{-2}’ would be +equivalent to ‘\1’. The use of relative references can be helpful in long patterns, and also in patterns that are created by joining together fragments that contain references within themselves. @@ -1450,8 +1450,8 @@ for a way of doing that). So the pattern (sens|respons)e and \1ibility -

    matches `sense and sensibility' and `response and -responsibility', but not `sense and responsibility'. If caseful +

    matches ‘sense and sensibility’ and ‘response and +responsibility’, but not ‘sense and responsibility’. If caseful matching is in force at the time of the back reference, the case of letters is relevant. For example, @@ -1459,14 +1459,14 @@ letters is relevant. For example, ((?i)rah)\s+\1 -

    matches `rah rah' and `RAH RAH', but not `RAH rah', +

    matches ‘rah rah’ and ‘RAH RAH’, but not ‘RAH rah’, even though the original capturing subpattern is matched caselessly.

    There are several different ways of writing back references to named -subpatterns. The .NET syntax `\k{name}' and the Perl syntax -`\k<name>' or `\k'name'' are supported, as is the Python +subpatterns. The .NET syntax ‘\k{name}’ and the Perl syntax +‘\k<name>’ or ‘\k'name'’ are supported, as is the Python syntax (?P=name). Perl 5.10's unified back reference syntax, in which -`\g' can be used for both numeric and named references, is also +‘\g’ can be used for both numeric and named references, is also supported. We could rewrite the above example in any of the following ways: @@ -1488,16 +1488,16 @@ references to it always fail. For exampl (a|(bc))\2 -

    always fails if it starts to match `a' rather than -`bc'. Because there may be many capturing parentheses in a +

    always fails if it starts to match ‘a’ rather than +‘bc’. Because there may be many capturing parentheses in a pattern, all digits following the backslash are taken as part of a potential back reference number. If the pattern continues with a digit character, some delimiter must be used to terminate the back -reference. If the `(?x)' option is set, this can be whitespace. +reference. If the ‘(?x)’ option is set, this can be whitespace. Otherwise an empty comment (see “Comments” below) can be used.

    A back reference that occurs inside the parentheses to which it refers -fails when the subpattern is first used, so, for example, `(a\1)' +fails when the subpattern is first used, so, for example, ‘(a\1)’ never matches. However, such references can be useful inside repeated subpatterns. For example, the pattern @@ -1505,7 +1505,7 @@ subpatterns. For example, the pattern (a|b\1)+ -

    matches any number of `a's and also `aba', `ababbaa' +

    matches any number of ‘a’s and also ‘aba’, ‘ababbaa’ etc. At each iteration of the subpattern, the back reference matches the character string corresponding to the previous iteration. In order for this to work, the pattern must be such that the first iteration @@ -1517,8 +1517,8 @@ current matching point that does not act

    An assertion is a test on the characters following or preceding the current matching point that does not actually consume any -characters. The simple assertions coded as `\b', `\B', -`\A', `\G', `\Z', `\z', `^' and `$' are +characters. The simple assertions coded as ‘\b’, ‘\B’, +‘\A’, ‘\G’, ‘\Z’, ‘\z’, ‘^’ and ‘$’ are described above.

    More complicated assertions are coded as subpatterns. There are two @@ -1537,8 +1537,8 @@ sense for negative assertions.

    Lookahead Assertions
    -

    Lookahead assertions start with `(?=' for positive assertions and -`(?!' for negative assertions. For example, +

    Lookahead assertions start with ‘(?=’ for positive assertions and +‘(?!’ for negative assertions. For example,

              \w+(?=;)
    @@ -1551,35 +1551,35 @@ the match, and
              foo(?!bar)
     
    -

    matches any occurrence of `foo' that is not followed by -`bar'. Note that the apparently similar pattern +

    matches any occurrence of ‘foo’ that is not followed by +‘bar’. Note that the apparently similar pattern

              (?!foo)bar
     
    -

    does not find an occurrence of `bar' that is preceded by -something other than `foo'; it finds any occurrence of `bar' -whatsoever, because the assertion `(?!foo)' is always true when -the next three characters are `bar'. A lookbehind assertion is +

    does not find an occurrence of ‘bar’ that is preceded by +something other than ‘foo’; it finds any occurrence of ‘bar’ +whatsoever, because the assertion ‘(?!foo)’ is always true when +the next three characters are ‘bar’. A lookbehind assertion is needed to achieve the other effect.

    If you want to force a matching failure at some point in a pattern, -the most convenient way to do it is with `(?!)' because an empty +the most convenient way to do it is with ‘(?!)’ because an empty string always matches, so an assertion that requires there not to be an empty string must always fail.

    Lookbehind Assertions
    -

    Lookbehind assertions start with `(?<=' for positive assertions -and `(?<!' for negative assertions. For example, +

    Lookbehind assertions start with ‘(?<=’ for positive assertions +and ‘(?<!’ for negative assertions. For example,

              (?<!foo)bar
     
    -

    matches an occurrence of `bar' that is not preceded by -`foo'. The contents of a lookbehind assertion are restricted such +

    matches an occurrence of ‘bar’ that is not preceded by +‘foo’. The contents of a lookbehind assertion are restricted such that all the strings it matches must have a fixed length. However, if there are several top-level alternatives, they do not all have to have the same fixed length. Thus @@ -1611,7 +1611,7 @@ lengths, but it is acceptable if rewritt (?<=abc|abde) -

    In some cases, the Perl 5.10 escape sequence `\K' (see above) can +

    In some cases, the Perl 5.10 escape sequence ‘\K’ (see above) can be used instead of a lookbehind assertion; this is not restricted to a fixed-length. @@ -1620,10 +1620,10 @@ current position, the assertion fails. then try to match. If there are insufficient characters before the current position, the assertion fails. -

    PCRE does not allow the `\C' escape (which matches a single byte +

    PCRE does not allow the ‘\C’ escape (which matches a single byte in UTF-8 mode) to appear in lookbehind assertions, because it makes it -impossible to calculate the length of the lookbehind. The `\X' -and `\R' escapes, which can match different numbers of bytes, are +impossible to calculate the length of the lookbehind. The ‘\X’ +and ‘\R’ escapes, which can match different numbers of bytes, are also not permitted.

    Possessive quantifiers can be used in conjunction with lookbehind @@ -1635,7 +1635,7 @@ string. Consider a simple pattern such a

    when applied to a long string that does not match. Because matching -proceeds from left to right, PCRE will look for each `a' in the +proceeds from left to right, PCRE will look for each ‘a’ in the subject and then see if what follows matches the rest of the pattern. If the pattern is specified as @@ -1643,10 +1643,10 @@ pattern. If the pattern is specified as ^.*abcd$ -

    the initial `.*' matches the entire string at first, but when this fails -(because there is no following `a'), it backtracks to match all +

    the initial ‘.*’ matches the entire string at first, but when this fails +(because there is no following ‘a’), it backtracks to match all but the last character, then all but the last two characters, and so -on. Once again the search for `a' covers the entire string, from +on. Once again the search for ‘a’ covers the entire string, from right to left, so we are no better off. However, if the pattern is written as @@ -1654,7 +1654,7 @@ written as ^.*+(?<=abcd) -

    there can be no backtracking for the `.*+' item; it can match +

    there can be no backtracking for the ‘.*+’ item; it can match only the entire string. The subsequent lookbehind assertion does a single test on the last four characters. If it fails, the match fails immediately. For long strings, this approach makes a significant @@ -1668,14 +1668,14 @@ difference to the processing time. (?<=\d{3})(?<!999)foo -

    matches `foo' preceded by three digits that are not -`999'. Notice that each of the assertions is applied +

    matches ‘foo’ preceded by three digits that are not +‘999’. Notice that each of the assertions is applied independently at the same point in the subject string. First there is a check that the previous three characters are all digits, and then -there is a check that the same three characters are not `999'. -This pattern does not match `foo' preceded by six +there is a check that the same three characters are not ‘999’. +This pattern does not match ‘foo’ preceded by six characters, the first of which are digits and the last three of which -are not `999'. For example, it doesn't match `123abcfoo'. A +are not ‘999’. For example, it doesn't match ‘123abcfoo’. A pattern to do that is

    @@ -1685,7 +1685,7 @@ assertion checks that the preceding thre
     

    This time the first assertion looks at the preceding six characters, checking that the first three are digits, and then the second assertion checks that the preceding three characters are not -`999'. +‘999’.

    Assertions can be nested in any combination. For example, @@ -1693,15 +1693,15 @@ assertion checks that the preceding thre (?<=(?<!foo)bar)baz

    -

    matches an occurrence of `baz' that is preceded by `bar' -which in turn is not preceded by `foo', while +

    matches an occurrence of ‘baz’ that is preceded by ‘bar’ +which in turn is not preceded by ‘foo’, while

              (?<=\d{3}(?!999)...)foo
     
    -

    is another pattern that matches `foo' preceded by three digits -and any three characters that are not `999'. +

    is another pattern that matches ‘foo’ preceded by three digits +and any three characters that are not ‘999’.

    Conditional Subpatterns
    @@ -1721,7 +1721,7 @@ than two alternatives in the subpattern, than two alternatives in the subpattern, a compile-time error occurs.

    There are four kinds of condition: references to subpatterns, -references to recursion, a pseudo-condition called `DEFINE', and +references to recursion, a pseudo-condition called ‘DEFINE’, and assertions.

    Checking for a used subpattern by number
    @@ -1731,13 +1731,13 @@ relative rather than absolute. The most previously matched. An alternative notation is to precede the digits with a plus or minus sign. In this case, the subpattern number is relative rather than absolute. The most recently opened parentheses -can be referenced by `(?(-1)', the next most recent by -`(?(-2)', and so on. In looping constructs it can also make sense -to refer to subsequent groups with constructs such as `(?(+2)'. +can be referenced by ‘(?(-1)’, the next most recent by +‘(?(-2)’, and so on. In looping constructs it can also make sense +to refer to subsequent groups with constructs such as ‘(?(+2)’.

    Consider the following pattern, which contains non-significant white space to make it more readable and to divide it into three parts for -ease of discussion (assume a preceding `(?x)'): +ease of discussion (assume a preceding ‘(?x)’):

              ( \( )?    [^()]+    (?(1) \) )
    @@ -1765,10 +1765,10 @@ relative reference:
     
     
    Checking for a used subpattern by name
    -

    Perl uses the syntax `(?(<name>)...)' or `(?('name')...)' to +

    Perl uses the syntax ‘(?(<name>)...)’ or ‘(?('name')...)’ to test for a used subpattern by name. For compatibility with earlier versions of PCRE, which had this facility before Perl, the syntax -`(?(name)...)' is also recognized. However, there is a possible +‘(?(name)...)’ is also recognized. However, there is a possible ambiguity with this syntax, because subpattern names may consist entirely of digits. PCRE looks first for a named subpattern; if it cannot find one and the name consists entirely of digits, PCRE looks @@ -1784,10 +1784,10 @@ recommended.

    Checking for pattern recursion
    -

    If the condition is the string `(R)', and there is no subpattern -with the name `R', the condition is true if a recursive call to +

    If the condition is the string ‘(R)’, and there is no subpattern +with the name ‘R’, the condition is true if a recursive call to the whole pattern or any subpattern has been made. If digits or a name -preceded by ampersand follow the letter `R', for example: +preceded by ampersand follow the letter ‘R’, for example:

              (?(R3)...) or (?(R&name)...)
    @@ -1802,8 +1802,8 @@ patterns are described below.
     
     
    Defining subpatterns for use by reference only
    -

    If the condition is the string `(DEFINE)', and there is no -subpattern with the name `DEFINE', the condition is always +

    If the condition is the string ‘(DEFINE)’, and there is no +subpattern with the name ‘DEFINE’, the condition is always false. In this case, there may be only one alternative in the subpattern. It is always skipped if control reaches this point in the pattern; the idea of DEFINE is that it can be used to define @@ -1844,18 +1844,18 @@ otherwise it is matched against the seco it tests for the presence of at least one letter in the subject. If a letter is found, the subject is matched against the first alternative; otherwise it is matched against the second. This pattern matches -strings in one of the two forms `dd-aaa-dd' or -`dd-dd-dd', where aaa are letters and +strings in one of the two forms ‘dd-aaa-dd’ or +‘dd-dd-dd’, where aaa are letters and dd are digits.

    Comments
    -

    The sequence `(?#' marks the start of a comment that continues up +

    The sequence ‘(?#’ marks the start of a comment that continues up to the next closing parenthesis. Nested parentheses are not permitted. The characters that make up a comment play no part in the pattern matching at all. -

    If the `(?x)' option is set, an unescaped `#' character +

    If the ‘(?x)’ option is set, an unescaped ‘#’ character outside a character class introduces a comment that continues to immediately after the next newline in the pattern. @@ -1872,11 +1872,11 @@ release 5.10. PCRE and Python, this kind of recursion was introduced into Perl at release 5.10. -

    A special item that consists of `(?' followed by a number greater +

    A special item that consists of ‘(?’ followed by a number greater than zero and a closing parenthesis is a recursive call of the subpattern of the given number, provided that it occurs inside that subpattern. (If not, it is a subroutine call, which is described in -the next section.) The special item `(?R)' or `(?0)' is a +the next section.) The special item ‘(?R)’ or ‘(?0)’ is a recursive call of the entire regular expression.

    In PCRE (like Python, but unlike Perl), a recursive subpattern call is @@ -1908,21 +1908,21 @@ tricky. This is made easier by the use o

    In a larger pattern, keeping track of parenthesis numbers can be tricky. This is made easier by the use of relative references. (A Perl -5.10 feature.) Instead of `(?1)' in the pattern above you can -write `(?-2)' to refer to the second most recently opened +5.10 feature.) Instead of ‘(?1)’ in the pattern above you can +write ‘(?-2)’ to refer to the second most recently opened parentheses preceding the recursion. In other words, a negative number counts capturing parentheses leftwards from the point at which it is encountered.

    It is also possible to refer to subsequently opened parentheses, by -writing references such as `(?+2)'. However, these cannot be +writing references such as ‘(?+2)’. However, these cannot be recursive because the reference is not inside the parentheses that are referenced. They are always subroutine calls, as described in the next section.

    An alternative approach is to use named parentheses instead. The Perl -syntax for this is `(?&name)'; PCRE's earlier syntax -`(?P>name)' is also supported. We could rewrite the above example +syntax for this is ‘(?&name)’; PCRE's earlier syntax +‘(?P>name)’ is also supported. We could rewrite the above example as follows:

    @@ -1944,7 +1944,7 @@ runs for a very long time indeed because
     
     

    it fails quickly. However, if atomic grouping is not used, the match runs for a very long time indeed because there are so many different -ways the `+' and `*' repeats can carve up the subject, and +ways the ‘+’ and ‘*’ repeats can carve up the subject, and all have to be tested before failure can be reported.

    At the end of a match, the values set for any capturing subpatterns @@ -1955,7 +1955,7 @@ subpattern value is set. If the pattern (ab(cd)ef)

    -

    the value for the capturing parentheses is `ef', which is the +

    the value for the capturing parentheses is ‘ef’, which is the last value taken on at the top level. If additional parentheses are added, giving @@ -1978,9 +1978,9 @@ characters are permitted at the outer le < (?: (?(R) \d++ | [^<>]*+) | (?R)) * >

    -

    In this pattern, `(?(R)' is the start of a conditional +

    In this pattern, ‘(?(R)’ is the start of a conditional subpattern, with two different alternatives for the recursive and -non-recursive cases. The `(?R)' item is the actual recursive +non-recursive cases. The ‘(?R)’ item is the actual recursive call.

    Subpatterns as Subroutines
    @@ -2003,15 +2003,15 @@ reference can be absolute or relative, a (sens|respons)e and \1ibility
    -

    matches `sense and sensibility' and `response and -responsibility', but not `sense and responsibility'. If instead +

    matches ‘sense and sensibility’ and ‘response and +responsibility’, but not ‘sense and responsibility’. If instead the pattern

              (sens|respons)e and (?1)ibility
     
    -

    is used, it does match `sense and responsibility' as well as the +

    is used, it does match ‘sense and responsibility’ as well as the other two strings. Another example is given in the discussion of DEFINE above. @@ -2029,7 +2029,7 @@ pattern: (abc)(?i:(?-1)) -

    It matches `abcabc'. It does not match `abcABC' because the +

    It matches ‘abcabc’. It does not match ‘abcABC’ because the change of processing option does not affect the called subpattern.

    Backtracking Control
    @@ -2043,8 +2043,8 @@ opening parenthesis followed by an aster

    The new verbs make use of what was previously invalid syntax: an opening parenthesis followed by an asterisk. In Perl, they are -generally of the form `(*VERB:ARG)' but PCRE does not support the -use of arguments, so its general form is just `(*VERB)'. Any +generally of the form ‘(*VERB:ARG)’ but PCRE does not support the +use of arguments, so its general form is just ‘(*VERB)’. Any number of these verbs may occur in a pattern. There are two kinds:

    Verbs that act immediately
    @@ -2056,19 +2056,19 @@ ended immediately. PCRE differs from Per This verb causes the match to end successfully, skipping the remainder of the pattern. When inside a recursion, only the innermost pattern is ended immediately. PCRE differs from Perl in what happens if the -`(*ACCEPT)' is inside capturing parentheses. In Perl, the data so +‘(*ACCEPT)’ is inside capturing parentheses. In Perl, the data so far is captured: in PCRE no data is captured. For example:
         
                   A(A|B(*ACCEPT)|C)D
     
    -

    This matches `AB', `AAD', or `ACD', but when it matches -`AB', no data is captured. +

    This matches ‘AB’, ‘AAD’, or ‘ACD’, but when it matches +‘AB’, no data is captured.

    (*FAIL) or (*F)
    This verb causes the match to fail, forcing backtracking to occur. It -is equivalent to `(?!)' but easier to read. It is not clear +is equivalent to ‘(?!)’ but easier to read. It is not clear whether there is any use for this without the ability to execute code in the middle of the pattern (which Perl has but PCRE in Monotone does not). @@ -2094,7 +2094,7 @@ all. For example: a+(*COMMIT)b -

    This matches `xxaab' but not `aacaab'. It can be thought of +

    This matches ‘xxaab’ but not ‘aacaab’. It can be thought of as a kind of dynamic anchor, or “I've started, so I must finish.”

    (*PRUNE)
    @@ -2120,13 +2120,13 @@ leading up to it cannot be part of a suc a+(*SKIP)b -

    If the subject is `aaaac...', after the first match attempt fails +

    If the subject is ‘aaaac...’, after the first match attempt fails (starting at the first character in the string), the starting point -skips on to start the next attempt at `c'. Note that a possessive +skips on to start the next attempt at ‘c’. Note that a possessive quantifer does not have the same effect in this example; although it would suppress backtracking during the first match attempt, the second attempt would start at the second character instead of skipping on to -`c'. +‘c’.

    (*THEN)
    This verb causes a skip to the next alternation if the rest of the @@ -2141,10 +2141,10 @@ block: | COND3 (*THEN) BAZ ) ... -

    If the `COND1' pattern matches, `FOO' is tried (and possibly -further items after the end of the group if `FOO' succeeds); on +

    If the ‘COND1’ pattern matches, ‘FOO’ is tried (and possibly +further items after the end of the group if ‘FOO’ succeeds); on failure the matcher skips to the second alternative and tries -`COND2', without backtracking into COND1. If (*THEN) is used +‘COND2’, without backtracking into COND1. If (*THEN) is used outside of any alternation, it acts exactly like (*PRUNE). ============================================================ --- docs/Regexp-Summary.html 3bd9b164c47cfebf0b14099e9ca05f3be4d2cb11 +++ docs/Regexp-Summary.html cb5173340dc7d085bb9b31fee5beb7af838d7b23 @@ -1,9 +1,9 @@ Regexp Summary - monotone documentation - + @@ -79,10 +79,10 @@ used in Monotone.

    \X
    an extended Unicode sequence -

    `\d', `\D', `\s', `\S', `\w', and `\W' +

    \d’, ‘\D’, ‘\s’, ‘\S’, ‘\w’, and ‘\W’ recognize only ASCII characters. -

    General category property codes for `\p' and `\P'
    +
    General category property codes for ‘\p’ and ‘\P
    C
    Other @@ -131,7 +131,7 @@ recognize only ASCII characters.
    Zs
    Space separator
    -
    Script names for `\p' and `\P'
    +
    Script names for ‘\p’ and ‘\P

    Arabic, Armenian, @@ -221,12 +221,12 @@ Yi.

    punct
    printing, excluding alphanumeric
    space
    whitespace
    upper
    upper case letter -
    word
    same as `\w' +
    word
    same as ‘\w
    xdigit
    hexadecimal digit

    In PCRE, POSIX character set names recognize only ASCII -characters. You can use `\Q...\E' inside a character class. +characters. You can use ‘\Q...\E’ inside a character class.

    Quantifiers
    @@ -372,7 +372,7 @@ characters. You can use `
    (*ACCEPT)
    force successful match -
    (*FAIL)
    force backtrack; synonym `(*F)' +
    (*FAIL)
    force backtrack; synonym ‘(*F)

    The following act only when a subsequent match failure causes a backtrack to @@ -390,15 +390,15 @@ pattern is not anchored.

    Newline Conventions

    These are recognized only at the very start of the pattern or after a -`(*BSR_...)' option. +‘(*BSR_...)’ option.

    (*CR)
    (*LF)
    (*CRLF)
    (*ANYCRLF)
    (*ANY)
    -
    What `\R' Matches
    +
    What ‘\R’ Matches

    These are recognized only at the very start of the pattern or after a -`(*...)' option that sets the newline convention. +‘(*...)’ option that sets the newline convention.

    (*BSR_ANYCRLF)
    (*BSR_UNICODE)
    @@ -434,5 +434,5 @@ pattern is not anchored. - + ============================================================ --- docs/Regexps.html 815bd739ce0d1f2d8d11a5e170d0802c305147e5 +++ docs/Regexps.html 9d0e0674c0c926ccfe022142b805d810662a9284 @@ -1,9 +1,9 @@ Regexps - monotone documentation - + @@ -75,5 +75,5 @@ expected by the Perl-Compatible Regular - + ============================================================ --- docs/Reserved-Certs.html d6a34981e320fd16d5a567b0dc2d5b3970490790 +++ docs/Reserved-Certs.html d86b0c542ecf0fe616cfded24d4333b3c0dea012 @@ -1,9 +1,9 @@ Reserved Certs - monotone documentation - + ============================================================ --- docs/Reserved-Files.html 4b48d549c9e557f0a6de1f98adfba40bcc04baea +++ docs/Reserved-Files.html dc28c3e5da252e42e708ade445183f430694b199 @@ -1,9 +1,9 @@ Reserved Files - monotone documentation - + ============================================================ --- docs/Restrictions.html 452d413cdd9ca3b1fb4c7351d42a92e41a2588c4 +++ docs/Restrictions.html 6e8d7a166765607b49f7724e5a1fbf22938bb4e7 @@ -1,9 +1,9 @@ Restrictions - monotone documentation - + ============================================================ --- docs/Scripting.html a69b560f26880e3c1143a52f1c6e07aaf8b5f250 +++ docs/Scripting.html 30bea23402414b430800375915ca1647e5e9a5e0 @@ -1,9 +1,9 @@ Scripting - monotone documentation - + ============================================================ --- docs/Selectors.html e9adcb321fca68051ebe37e1f1d18894e0db656b +++ docs/Selectors.html 27ff2f3960012335a26dfe8f355773418e1bb9ac @@ -1,9 +1,9 @@ Selectors - monotone documentation - + ============================================================ --- docs/Special-Topics.html d54cb05bd92502bfe2a5c20a63e9c143f3709b36 +++ docs/Special-Topics.html 9705f227d829fe7eb1b89582c48a92e552e4465d @@ -1,9 +1,9 @@ Special Topics - monotone documentation - + ============================================================ --- docs/Starting-a-New-Project.html a3cbda53564080db304fd6237351ce954babea3c +++ docs/Starting-a-New-Project.html b4964123b9e63096310ca87c0b243c4b17718689 @@ -1,9 +1,9 @@ Starting a New Project - monotone documentation - + ============================================================ --- docs/Storage-and-workflow.html 69392c7d45bec0eaca2f17e9d2f92077c2ba4070 +++ docs/Storage-and-workflow.html caff1cbcb942d761fff3d5d66b9c46a97c4f3711 @@ -1,9 +1,9 @@ Storage and workflow - monotone documentation - + ============================================================ --- docs/Synchronising-Databases.html d40f41860ab082e6da7fb16844afb19357fe55b9 +++ docs/Synchronising-Databases.html fb29388f7fabc7926e1357e321da9fafdc2c7e11 @@ -1,9 +1,9 @@ Synchronising Databases - monotone documentation - + ============================================================ --- docs/Tree.html b557ca7b886640909179345fba5187d52d49b94f +++ docs/Tree.html b946d40b26991865e3db854d272f95addb6cd920 @@ -1,9 +1,9 @@ Tree - monotone documentation - + @@ -61,13 +61,13 @@ these entries: file "src/hello.c content [97dfc6fd4f486df95868d85b4b81197014ae2a84] - +

    Then the following files are created:

              directory/
               directory/Makefile
               directory/include/hello.h
               directory/src/hello.c
    -     
    +

    If you wish to checkout in the current directory, you can supply the special name . (a single period) for directory. When running checkout into an existing ============================================================ --- docs/Tutorial.html 5d27fde6085db7cb82e86332657d6e73de2dbf97 +++ docs/Tutorial.html 28141be0386259fcd886a517f7de6b6c2a04af53 @@ -1,9 +1,9 @@ Tutorial - monotone documentation - + ============================================================ --- docs/Using-packets.html 4077ccda595a86dd82f1fb9a7da0468b69010b02 +++ docs/Using-packets.html 9541f5784c2449819b1db89a826ea8917db3a416 @@ -1,9 +1,9 @@ Using packets - monotone documentation - + ============================================================ --- docs/Vars.html c7d3a44236435c862d6c7769d8a1b32fd57fd5fc +++ docs/Vars.html 7475e0311e505b2062aa865f3532d3ff83806b30 @@ -1,9 +1,9 @@ Vars - monotone documentation - + ============================================================ --- docs/Versions-of-files.html ef657adad28e050daad568c6a40e338c8e7c2049 +++ docs/Versions-of-files.html f3bf46039cd6085320e20be0a3e6bd8656eca3fc @@ -1,9 +1,9 @@ Versions of files - monotone documentation - + @@ -99,7 +99,7 @@ value. This probability is very small, s small probability of two different versions having the same sha1 value. This probability is very small, so we discount it.

    -


    +
    ============================================================ --- docs/Versions-of-trees.html 64531ae8cfd44ce9d5646702f3b65a74ea029e42 +++ docs/Versions-of-trees.html ae0aaa5884c8701862f7545b8f48416bb4eec93c @@ -1,9 +1,9 @@ Versions of trees - monotone documentation - + ============================================================ --- docs/Workspace-Collisions.html b1361bb5f32890fce1b8d3d6d62892b5dc882758 +++ docs/Workspace-Collisions.html ad276f0c48026052f87c36ebf66f04491fe2a5da @@ -1,9 +1,9 @@ Workspace Collisions - monotone documentation - + ============================================================ --- docs/Workspace.html 40b4543d5606e6701dace2e33f41aac6786bfd8c +++ docs/Workspace.html f20179119ab4fb1acf050df9897e87541c3711f7 @@ -1,9 +1,9 @@ Workspace - monotone documentation - + @@ -236,8 +236,8 @@ edit your current workspace to include t
    mtn pluck --revision=to
    mtn pluck --revision=from --revision=to
    This command takes changes made at any point in history, and attempts to edit your current workspace to include those changes. The end result is -identical to running mtn diff -r from --r to | patch -p0, except that this command +identical to running mtn diff -r from +-r to | patch -p0, except that this command uses monotone's merger, and thus intelligently handles renames, conflicts, and so on. @@ -287,7 +287,7 @@ workspace:
              $ mtn rename . new_root/put_old
               $ mtn rename new_root .
    -     
    +

    Except, of course, that these rename commands are illegal, because after the first command the tree has no root at all, and there is a directory loop. This illegality is the only reason for ============================================================ --- docs/index.html 617ce735f037a9cd24f680774150561b890a2faa +++ docs/index.html 90d19f0dee2f65ff5a94c152a3f80558fa56e6ee @@ -1,9 +1,9 @@ monotone documentation - + ============================================================ --- monotone.html 0c51e9b6862604f9724e8aadf48028c579da628f +++ monotone.html 6fa6307fc17a8f9880a0e07f365d13e2180601a9 @@ -1,9 +1,9 @@ monotone documentation - + @@ -3206,13 +3206,13 @@ these entries: file "src/hello.c content [97dfc6fd4f486df95868d85b4b81197014ae2a84] - +

    Then the following files are created:

              directory/
               directory/Makefile
               directory/include/hello.h
               directory/src/hello.c
    -     
    +

    If you wish to checkout in the current directory, you can supply the special name . (a single period) for directory. When running checkout into an existing @@ -3525,8 +3525,8 @@ edit your current workspace to include t

    mtn pluck --revision=to
    mtn pluck --revision=from --revision=to
    This command takes changes made at any point in history, and attempts to edit your current workspace to include those changes. The end result is -identical to running mtn diff -r from --r to | patch -p0, except that this command +identical to running mtn diff -r from +-r to | patch -p0, except that this command uses monotone's merger, and thus intelligently handles renames, conflicts, and so on. @@ -3576,7 +3576,7 @@ workspace:
              $ mtn rename . new_root/put_old
               $ mtn rename new_root .
    -     
    +

    Except, of course, that these rename commands are illegal, because after the first command the tree has no root at all, and there is a directory loop. This illegality is the only reason for @@ -3643,11 +3643,11 @@ branches. Supposing Alice's computer has alice.someisp.com, then Alice might run:

              $ mtn --bind=alice.someisp.com serve
    -     
    +

    And Bob might run

              $ mtn sync alice.someisp.com "net.venge.monotone*"
    -     
    +

    When the operation completes, all branches matching net.venge.monotone* will be synchronized between Alice and Bob's databases. @@ -3659,7 +3659,7 @@ with Alice again, he can simply run: with Alice again, he can simply run:

              $ mtn sync
    -     
    +

    Of course, he can still sync with other people and other branches by passing an address or address plus globs on the command line; this will not affect his default affinity for Alice. If you ever @@ -3773,7 +3773,7 @@ example, suppose you enter this command fa36deead87811b0e15208da2853c39d2f6ebe90 fa36b76dd0139177b28b379fe1d56b22342e5306 fa36965ec190bee14c5afcac235f1b8e2239bb2a - +

    Then monotone is telling you that there are 3 revisions it knows about, in its database, which begin with the 4 hex digits fa36. This command is intended to be used by programmable @@ -3788,7 +3788,7 @@ present users with additional informatio 01f5da490941bee1f0000f0561fc62eabfb2fa23 address@hidden 2003-12-03T03:14:35 01f992577bd8bcdcade0f89e724fd5dc2d2bbe8a address@hidden 2005-05-11T05:19:29 01faad191d8d0474777c70b4d606782942333a78 address@hidden 2005-04-11T04:24:01 - +

    mtn diff [--unified] [--show-encloser]
    mtn diff --context [--show-encloser]
    mtn diff --external [--diff-args=argstring]
    mtn diff pathname...
    mtn diff --revision=id
    mtn diff --revision=id pathname...
    mtn diff --revision=id1 --revision=id2
    mtn diff --revision=id1 --revision=id2 pathname...
    These commands print out GNU “unified diff format” textual difference listings between various manifest versions. With no --revision @@ -3821,9 +3821,9 @@ algorithm to produce a listing in &ldquo --unified, --context, --show-encloser, and --external. By default, monotone uses its built-in diff algorithm to produce a listing in “unified diff” format (analogous -to running the program diff -u); you can also explicitly +to running the program diff -u); you can also explicitly request this with --unified. The built-in diff algorithm can -also produce “context diff” format (analogous to diff -c), +also produce “context diff” format (analogous to diff -c), which you request by specifying --context. The short options that diff accepts for these modes, -u and -c, also work. @@ -3842,7 +3842,7 @@ syntax, See Regexps--unified requests the “unified diff” format, the default. --context requests the “context diff” format (analogous to -running the program diff -c). Both of these formats are +running the program diff -c). Both of these formats are generated directly by monotone, using its built-in diff algorithm.

    Sometimes, you may want more flexibility in output formats; for these @@ -3900,7 +3900,7 @@ branch: : * tests/t_cross.at: New test for merging merges. : * testsuite.at: Call t_cross.at. : - +

    mtn list keys
    mtn ls keys
    mtn list keys pattern
    mtn ls keys pattern
    These commands list rsa keys held in your keystore and current database. They do not print out any cryptographic information; they simply list the names of public and private keys you have on hand. @@ -4045,11 +4045,11 @@ following two examples are equivalent: enter passphrase for key ID address@hidden: $ mtn ci -m"Changed foo to bar" $ mtn push -k address@hidden - +
              $ mtn ci -m"Changed foo to bar"
               enter passphrase for key ID address@hidden:
               $ mtn push -k address@hidden
    -     
    +

    In the second example, monotone automatically added the key to ssh-agent, making entering the passphrase not needed during the push. @@ -4072,7 +4072,7 @@ will cache the key for you. Identity added: /home/user/.ssh/id_monotone (/home/user/.ssh/id_monotone) $ mtn ci -m"Changed foo to bar" $ mtn push -k address@hidden - +

    You can also use the --ssh-sign option to control whether ssh-agent will be used for signing. If set to yes, ssh-agent will be used to sign. If your key has not been added to ssh-agent monotone will fall back to its internal signing @@ -5780,7 +5780,7 @@ 3.0

    Added in:
    3.0 -
    Renamed from attributes to get_attributes in:
    +
    Renamed from attributes to get_attributes in:
    5.0
    Purpose:
    @@ -6408,7 +6408,7 @@ Workspace-less commit. Normally used via add_file "foo" content [5bf1fd927dfb8679496a2e6cf00cbe50c1c87145] - +
    Sample output:
              
               4c2c1d846fa561601254200918fba1fd71e6795d
    @@ -6684,7 +6684,7 @@ variable as in this example:
                   end
                   print("Ending note_mtn_startup")
               end
    -     
    +

    6.1.2 User Defaults

    @@ -6740,7 +6740,7 @@ definitions might be: if ((user == nil) or (host == nil)) then return nil end return string.format("address@hidden", user, host) end - +
              function get_author(branchname, keypair_id)
                       -- Branch name ignored.
                       if (keypair_id == "address@hidden") then
    @@ -6748,7 +6748,7 @@ definitions might be:
                       end
                       return keypair_id
               end
    -     
    +
    edit_comment (commentary, user_log_message)
    Returns a log entry for a given set of changes, described in commentary. The commentary is identical to the output of @@ -6778,7 +6778,7 @@ probably want this hook to return
              function persist_phrase_ok()
                       return true
               end
    -     
    +
    use_inodeprints ()
    Returns true if you want monotone to automatically enable Inodeprints support in all workspaces. Only affects working @@ -6788,7 +6788,7 @@ copies created after you modify the hook
              function use_inodeprints()
                       return false
               end
    -     
    +
    ignore_file (filename)
    Returns true if filename should be ignored while adding, dropping, or moving files. Otherwise returns false. This is @@ -6832,7 +6832,7 @@ the configuration directory. This file l comment "everyone can read these branches" pattern "net.example.{public,project}*" allow "*" - +

    This example allows everyone access to branches net.example.project and net.example.public and their sub-branches, except for the branches in net.example.project.security and net.example.project.private, @@ -6978,7 +6978,7 @@ components: end return argv end - +

    use_transport_auth (uri)
    Returns a boolean indicating whether monotone should use transport authentication mechanisms when communicating with uri. If this @@ -7004,7 +7004,7 @@ authentication assumptions. return true end end - +
    get_mtn_command(host)
    Returns a string containing the monotone command to be executed on host when communicating over ssh. The host @@ -7016,7 +7016,7 @@ monotone binary is not in the default pa
              function get_mtn_command(host)
               	return "mtn"
               end
    -     
    +

    @@ -7075,7 +7075,7 @@ the intersection of tables) is the follo return false end end - +

    In this example, any revision certificate is trusted if it is signed by at least one of three “trusted” keys, unless it is an branch certificate, in which case it must be signed by @@ -7105,7 +7105,7 @@ version carrying the old_results +

    This definition accepts only those updates which preserve the set of true test results from update source to target. If no test results exist, this hook has no affect; but once a true test @@ -7171,8 +7171,7 @@ you have a tool specific to certain file you have a tool specific to certain file types.

    - -
    merge3 (ancestor_path, left_path, right_path, merged_path, ancestor_text, left_text, right_text)
    +
    merge3 (ancestor_path, left_path, right_path, merged_path, ancestor_text, left_text, right_text)
    This hook is called to resolve merges that monotone could not resolve automatically. The actual ancestor, left, and right contents of the file are passed in the ancestor_text, left_text, and @@ -7194,8 +7193,7 @@ local system. For details, see the code and if not, then simply searches for whatever is installed on the local system. For details, see the code in Default hooks. -

    -

    get_preferred_merge3_command(tbl)
    +


    get_preferred_merge3_command(tbl)
    Returns the results of running an external merge on three strings. tbl wraps up the various arguments for each merge command and is always provided by merge3. If there is a particular editor @@ -7268,7 +7266,7 @@ attribute. Its definition is: make_executable(filename) end end - +
    attr_init_functions [attribute] (filename)
    This is not a hook function, but a table of hook functions. Each entry in the table attr_init_functions, at @@ -7297,7 +7295,7 @@ definition is: return nil end end - +

    The binary_file function is also defined as a Lua hook. See Default hooks. @@ -7358,7 +7356,7 @@ more of the following characters:

              0x00 thru 0x06
               0x0E thru 0x1a
               0x1c thru 0x1f
    -     
    +
    include(scriptfile)
    This function tries to load and execute the script contained into scriptfile. It returns true for success and false if there is an @@ -7438,7 +7436,7 @@ the arguments. thingy "baz" spork frob "oops" - +

    The output table will be:

              {
                  1 = { name = "thingy", values = { 1 = "foo", 2 = "bar" } },
    @@ -7446,7 +7444,7 @@ the arguments.
                  3 = { name = "spork", values = { } },
                  4 = { name = "frob", values = { 1 = "oops" } }
               }
    -     
    +
    regex.search(regexp, string)
    Returns true if a match for regexp is found in str, return false otherwise. See Regexps, for the syntax of regexp. @@ -7584,7 +7582,7 @@ an IDNA string is this: an IDNA string is this:
                    {ACE-prefix}{LDH-sanitized(punycode(nameprep(UTF-8-string)))}
    -     
    +

    It is important to understand that IDNA encoding does not preserve the input string: it both prohibits a wide variety of possible strings and normalizes non-equal strings to supposedly @@ -8880,10 +8878,10 @@ used in Monotone.

    \X
    an extended Unicode sequence
    -

    `\d', `\D', `\s', `\S', `\w', and `\W' +

    \d’, ‘\D’, ‘\s’, ‘\S’, ‘\w’, and ‘\W’ recognize only ASCII characters. -

    General category property codes for `\p' and `\P'
    +
    General category property codes for ‘\p’ and ‘\P
    C
    Other @@ -8932,7 +8930,7 @@ recognize only ASCII characters.
    Zs
    Space separator
    -
    Script names for `\p' and `\P'
    +
    Script names for ‘\p’ and ‘\P

    Arabic, Armenian, @@ -9022,12 +9020,12 @@ Yi.

    punct
    printing, excluding alphanumeric
    space
    whitespace
    upper
    upper case letter -
    word
    same as `\w' +
    word
    same as ‘\w
    xdigit
    hexadecimal digit

    In PCRE, POSIX character set names recognize only ASCII -characters. You can use `\Q...\E' inside a character class. +characters. You can use ‘\Q...\E’ inside a character class.

    Quantifiers
    @@ -9173,7 +9171,7 @@ characters. You can use `
    (*ACCEPT)
    force successful match -
    (*FAIL)
    force backtrack; synonym `(*F)' +
    (*FAIL)
    force backtrack; synonym ‘(*F)

    The following act only when a subsequent match failure causes a backtrack to @@ -9191,15 +9189,15 @@ pattern is not anchored.

    Newline Conventions

    These are recognized only at the very start of the pattern or after a -`(*BSR_...)' option. +‘(*BSR_...)’ option.

    (*CR)
    (*LF)
    (*CRLF)
    (*ANYCRLF)
    (*ANY)
    -
    What `\R' Matches
    +
    What ‘\R’ Matches

    These are recognized only at the very start of the pattern or after a -`(*...)' option that sets the newline convention. +‘(*...)’ option that sets the newline convention.

    (*BSR_ANYCRLF)
    (*BSR_UNICODE)
    @@ -9280,7 +9278,7 @@ brackets, the metacharacters are as foll
    |
    start of alternative branch
    (
    start subpattern
    )
    end subpattern -
    ?
    extends the meaning of `(' +
    ?
    extends the meaning of ‘(’ also 0 or 1 quantifier also quantifier minimizer
    *
    0 or more quantifier @@ -9310,22 +9308,22 @@ applies both inside and outside characte that character may have. This use of backslash as an escape character applies both inside and outside character classes. -

    For example, if you want to match a `*' character, you write -`\*' in the pattern. This escaping action applies whether or not +

    For example, if you want to match a ‘*’ character, you write +‘\*’ in the pattern. This escaping action applies whether or not the following character would otherwise be interpreted as a metacharacter, so it is always safe to precede a non-alphanumeric with backslash to specify that it stands for itself. In particular, if you -want to match a backslash, you write `\\'. +want to match a backslash, you write ‘\\’. -

    If a pattern is compiled with the `(?x)' option, whitespace in +

    If a pattern is compiled with the ‘(?x)’ option, whitespace in the pattern (other than in a character class) and characters between a -`#' outside a character class and the next newline are +‘#’ outside a character class and the next newline are ignored. An escaping backslash can be used to include a whitespace or -`#' character as part of the pattern. +‘#’ character as part of the pattern.

    If you want to remove the special meaning from a sequence of -characters, you can do so by putting them between `\Q' and -`\E'. The `\Q...\E' sequence is recognized both inside and +characters, you can do so by putting them between ‘\Q’ and +‘\E’. The ‘\Q...\E’ sequence is recognized both inside and outside character classes.

    Non-printing Characters
    @@ -9350,36 +9348,36 @@ represents:
    \x{hhh...}
    character with hex code hhh... -

    The precise effect of `\cx' is as follows: if x is a lower +

    The precise effect of ‘\cx’ is as follows: if x is a lower case letter, it is converted to upper case. Then bit 6 of the -character (hex 40) is inverted. Thus `\cz' becomes hex 1A (the -<SUB> control character, in ASCII), but `\c{' becomes hex 3B -(`;'), and `\c;' becomes hex 7B (`{'). +character (hex 40) is inverted. Thus ‘\cz’ becomes hex 1A (the +<SUB> control character, in ASCII), but ‘\c{’ becomes hex 3B +(‘;’), and ‘\c;’ becomes hex 7B (‘{’). -

    After `\x', from zero to two hexadecimal digits are read (letters +

    After ‘\x’, from zero to two hexadecimal digits are read (letters can be in upper or lower case). Any number of hexadecimal digits may -appear between `\x{' and `}', but the value of the +appear between ‘\x{’ and ‘}’, but the value of the character code must be less than 256 in non-UTF-8 mode, and less than 231in UTF-8 mode. That is, the maximum value in hexadecimal is 7FFFFFFF. Note that this is bigger than the largest Unicode code point, which is 10FFFF. -

    If characters other than hexadecimal digits appear between `\x{' -and `}', or if there is no terminating `}', this form of -escape is not recognized. Instead, the initial `\x' will be +

    If characters other than hexadecimal digits appear between ‘\x{’ +and ‘}’, or if there is no terminating ‘}’, this form of +escape is not recognized. Instead, the initial ‘\x’ will be interpreted as a basic hexadecimal escape, with no following digits, giving a character whose value is zero.

    Characters whose value is less than 256 can be defined by either of -the two syntaxes for `\x'. There is no difference in the way they -are handled. For example, `\xdc' is exactly the same as -`\x{dc}'. +the two syntaxes for ‘\x’. There is no difference in the way they +are handled. For example, ‘\xdc’ is exactly the same as +‘\x{dc}’. -

    After `\0' up to two further octal digits are read. If there are +

    After ‘\0’ up to two further octal digits are read. If there are fewer than two digits, just those that are present are used. Thus the -sequence `\0\x\07' specifies two binary zeros followed by a +sequence ‘\0\x\07’ specifies two binary zeros followed by a <BEL> character (octal 007). Make sure you supply two digits after the initial zero if the pattern character that follows is itself an octal digit. @@ -9397,8 +9395,8 @@ themselves. In non-UTF-8 mode, the value up to three octal digits following the backslash, and uses them to generate a data character. Any subsequent digits stand for themselves. In non-UTF-8 mode, the value of a character specified in -octal must be less than `\400'. In UTF-8 mode, values up to -`\777' are permitted. For example: +octal must be less than ‘\400’. In UTF-8 mode, values up to +‘\777’ are permitted. For example:

    \040
    is another way of writing a space @@ -9408,13 +9406,13 @@ octal must be less than `\11
    might be a back reference, or another way of writing a tab
    \011
    is always a tab -
    \0113
    is a tab followed by the character `3' +
    \0113
    is a tab followed by the character ‘3
    \113
    might be a back reference, otherwise the character with octal code 113
    \377
    might be a back reference, otherwise the byte consisting entirely of 1 bits
    \81
    is either a back reference, or a binary zero - followed by the two characters `8' and `1' + followed by the two characters ‘8’ and ‘1

    Note that octal values of 100 or greater must not be introduced by a @@ -9422,18 +9420,18 @@ both inside and outside character classe

    All the sequences that define a single character value can be used both inside and outside character classes. In addition, inside a -character class, the sequence `\b' is interpreted as the <BS> -character (hex 08), and the sequences `\R' and `\X' are -interpreted as the characters `R' and `X', +character class, the sequence ‘\b’ is interpreted as the <BS> +character (hex 08), and the sequences ‘\R’ and ‘\X’ are +interpreted as the characters ‘R’ and ‘X’, respectively. Outside a character class, these sequences have different meanings (see below).

    Absolute and Relative Back References
    -

    The sequence `\g' followed by an unsigned or a negative number, +

    The sequence ‘\g’ followed by an unsigned or a negative number, optionally enclosed in braces, is an absolute or relative back reference. A named back reference can be coded as -`\g{name}'. Back references are discussed later, following the +‘\g{name}’. Back references are discussed later, following the discussion of parenthesized subpatterns.

    Generic character types
    @@ -9463,18 +9461,18 @@ string, all of them fail, since there is type. If the current matching point is at the end of the subject string, all of them fail, since there is no character to match. -

    For compatibility with Perl, `\s' does not match the <VT> +

    For compatibility with Perl, ‘\s’ does not match the <VT> character (code 11). This makes it different from the the POSIX -“space” class. The `\s' characters are <TAB> (9), <LF> +“space” class. The ‘\s’ characters are <TAB> (9), <LF> (10), <FF> (12), <CR> (13), and <SPACE> (32).

    In UTF-8 mode, characters with values greater than 128 never match -`\d', `\s', or `\w', and always match `\D', -`\S', and `\W'. These sequences retain their original +‘\d’, ‘\s’, or ‘\w’, and always match ‘\D’, +‘\S’, and ‘\W’. These sequences retain their original meanings from before UTF-8 support was available, mainly for efficiency reasons. -

    The sequences `\h', `\H', `\v', and `\V' are Perl +

    The sequences ‘\h’, ‘\H’, ‘\v’, and ‘\V’ are Perl 5.10 features. In contrast to the other sequences, these do match certain high-valued codepoints in UTF-8 mode. The horizontal space characters are: @@ -9541,23 +9539,23 @@ the following five sequences: (*CR)a.b -

    changes the convention to CR. That pattern matches `a\nb' because +

    changes the convention to CR. That pattern matches ‘a\nb’ because LF is no longer a newline. Note that these special settings, which are not Perl-compatible, are recognized only at the very start of a pattern, and that they must be in upper case. If more than one of them is present, the last one is used. -

    The newline convention does not affect what the `\R' escape +

    The newline convention does not affect what the ‘\R’ escape sequence matches. By default, this is any Unicode newline sequence, for Perl compatibility. However, this can be changed; see the -description of `\R' below. A change of `\R' setting can be +description of ‘\R’ below. A change of ‘\R’ setting can be combined with a change of newline convention.

    Newline Sequences
    -

    Outside a character class, by default, the escape sequence `\R' matches +

    Outside a character class, by default, the escape sequence ‘\R’ matches any Unicode newline sequence. This is a Perl 5.10 feature. In -non-UTF-8 mode `\R' is equivalent to the following: +non-UTF-8 mode ‘\R’ is equivalent to the following:

              (?>\r\n|\n|\x0b|\f|\r|\x85)
    @@ -9574,7 +9572,7 @@ and <PS> (paragraph separator, U+2028)
     and <PS> (paragraph separator, U+2029).
     
    -   

    It is possible to change the meaning of `\R' by starting a +

    It is possible to change the meaning of ‘\R’ by starting a pattern string with one of the following sequences:

    @@ -9592,7 +9590,7 @@ example, a pattern can start with: (*ANY)(*BSR_ANYCRLF)
    -

    Inside a character class, `\R' matches the letter `R'. +

    Inside a character class, ‘\R’ matches the letter ‘R’.

    Unicode Character Properties
    @@ -9608,10 +9606,10 @@ 256, but they do work in this mode. The

    The property names represented by xx above are limited to the -Unicode script names, the general category properties, and `Any', +Unicode script names, the general category properties, and ‘Any’, which matches any character (including newline). Other properties such -as `InMusicalSymbols' are not currently supported by PCRE. Note -that `\P{Any}' does not match any characters, so always causes +as ‘InMusicalSymbols’ are not currently supported by PCRE. Note +that ‘\P{Any}’ does not match any characters, so always causes a match failure.

    Sets of Unicode characters are defined as belonging to certain @@ -9696,9 +9694,9 @@ by including a circumflex between the op

    Each character has exactly one general category property, specified by a two-letter abbreviation. For compatibility with Perl, negation can be specified by including a circumflex between the opening brace and the property name. For -example, `\p{^Lu}' is the same as `\P{Lu}'. +example, ‘\p{^Lu}’ is the same as ‘\P{Lu}’. -

    If only one letter is specified with `\p' or `\P', it +

    If only one letter is specified with ‘\p’ or ‘\P’, it includes all the general category properties that start with that letter. In this case, in the absence of negation, the curly brackets in the escape sequence are optional; these two examples have the same @@ -9757,28 +9755,28 @@ effect:

    Zs
    Space separator -

    The special property `L&' is also supported: it matches a -character that has the `Lu', `Ll', or `Lt' property, in +

    The special property ‘L&’ is also supported: it matches a +character that has the ‘Lu’, ‘Ll’, or ‘Lt’ property, in other words, a letter that is not classified as a modifier or “other.” -

    The `Cs' (Surrogate) property applies only to characters in the +

    The ‘Cs’ (Surrogate) property applies only to characters in the range U+D800 to U+DFFF. Such characters are not valid in UTF-8 strings (see RFC 3629) and so cannot be tested by PCRE.

    The long synonyms for these properties that Perl supports (such as -`\p{Letter}') are not supported by PCRE, nor is it permitted to -prefix any of these properties with `Is'. +‘\p{Letter}’) are not supported by PCRE, nor is it permitted to +prefix any of these properties with ‘Is’. -

    No character that is in the Unicode table has the `Cn' +

    No character that is in the Unicode table has the ‘Cn’ (unassigned) property. Instead, this property is assumed for any code point that is not in the Unicode table.

    Specifying caseless matching does not affect these escape sequences. For -example, `\p{Lu}' always matches only upper case letters. +example, ‘\p{Lu}’ always matches only upper case letters. -

    The `\X' escape matches any number of Unicode characters that -form an extended Unicode sequence. `\X' is equivalent to +

    The ‘\X’ escape matches any number of Unicode characters that +form an extended Unicode sequence. ‘\X’ is equivalent to

              (?>\PM\pM*)
    @@ -9789,16 +9787,16 @@ character. None of them have codepoints 
     treats the sequence as an atomic group (see below).  Characters with
     the “mark” property are typically accents that affect the preceding
     character. None of them have codepoints less than 256, so in non-UTF-8
    -mode `\X' matches any one character.
    +mode ‘\X’ matches any one character.
     
        

    Matching characters by Unicode property is not fast, because PCRE has to search a structure that contains data for over fifteen thousand characters. That is why the traditional escape sequences such as -`\d' and `\w' do not use Unicode properties in PCRE. +‘\d’ and ‘\w’ do not use Unicode properties in PCRE.

    Resetting the Match Start
    -

    The escape sequence `\K', which is a Perl 5.10 feature, causes +

    The escape sequence ‘\K’, which is a Perl 5.10 feature, causes any previously matched characters not to be included in the final matched sequence. For example, the pattern: @@ -9806,18 +9804,18 @@ matched sequence. For example, the patte foo\Kbar

    -

    matches `foobar', but reports that it has matched -`bar'. This feature is similar to a lookbehind assertion +

    matches ‘foobar’, but reports that it has matched +‘bar’. This feature is similar to a lookbehind assertion (described below). However, in this case, the part of the subject before the real match does not have to be of fixed length, as -lookbehind assertions do. The use of `\K' does not interfere with the +lookbehind assertions do. The use of ‘\K’ does not interfere with the setting of captured substrings. For example, when the pattern

              (foo)\Kbar
     
    -

    matches `foobar', the first substring is still set to `foo'. +

    matches ‘foobar’, the first substring is still set to ‘foo’.

    Simple assertions
    @@ -9838,31 +9836,31 @@ described below. The backslashed assert

    These assertions may not appear in character classes (but note that -`\b' has a different meaning, namely the backspace character, +‘\b’ has a different meaning, namely the backspace character, inside a character class).

    A word boundary is a position in the subject string where the current -character and the previous character do not both match `\w' or -`\W' (i.e. one matches `\w' and the other matches -`\W'), or the start or end of the string if the first or last -character matches `\w', respectively. +character and the previous character do not both match ‘\w’ or +‘\W’ (i.e. one matches ‘\w’ and the other matches +‘\W’), or the start or end of the string if the first or last +character matches ‘\w’, respectively. -

    The `\A', `\Z', and `\z' assertions differ from the +

    The ‘\A’, ‘\Z’, and ‘\z’ assertions differ from the traditional circumflex and dollar (described in the next section) in that they only ever match at the very start and end of the subject string, whatever options are set. Thus, they are independent of -multiline mode. The difference between `\Z' and `\z' is that -`\Z' matches before a newline at the end of the string as well as -at the very end, whereas `\z' matches only at the end. +multiline mode. The difference between ‘\Z’ and ‘\z’ is that +‘\Z’ matches before a newline at the end of the string as well as +at the very end, whereas ‘\z’ matches only at the end. -

    The `\G' assertion is true only when the current matching +

    The ‘\G’ assertion is true only when the current matching position is at the start point of the match. As used in Monotone, -`\G' is always equal to `\A'. +‘\G’ is always equal to ‘\A’.

    Circumflex and Dollar

    Outside a character class, in the default matching mode, the -circumflex character, `^', is an assertion that is true only if +circumflex character, ‘^’, is an assertion that is true only if the current matching point is at the start of the subject string. Inside a character class, circumflex has an entirely different meaning (see below). @@ -9875,7 +9873,7 @@ other constructs that can cause a patter subject, it is said to be an “anchored” pattern. (There are also other constructs that can cause a pattern to be anchored.) -

    A dollar character, `$', is an assertion that is true only if the +

    A dollar character, ‘$’, is an assertion that is true only if the current matching point is at the end of the subject string, or immediately before a newline at the end of the string (by default). Dollar need not be the last character of the pattern if a @@ -9884,23 +9882,23 @@ character class. character class.

    The meanings of the circumflex and dollar characters are changed if -the `(?m)' option is set. When this is the case, a circumflex +the ‘(?m)’ option is set. When this is the case, a circumflex matches immediately after internal newlines as well as at the start of the subject string. It does not match after a newline that ends the string. A dollar matches before any newlines in the string, as well as -at the very end, when `(?m)' is set. When newline is specified as +at the very end, when ‘(?m)’ is set. When newline is specified as the two-character sequence <CR><LF>, isolated <CR> and <LF> characters do not indicate newlines. -

    For example, the pattern `^abc$' matches the subject string -`def\nabc' (where `\n' represents a newline) in multiline +

    For example, the pattern ‘^abc$’ matches the subject string +‘def\nabc’ (where ‘\n’ represents a newline) in multiline mode, but not otherwise. Consequently, patterns that are anchored in single line mode because all branches start with ^ are not anchored in multiline mode. -

    Note that the sequences `\A', `\Z', and `\z' can be +

    Note that the sequences ‘\A’, ‘\Z’, and ‘\z’ can be used to match the start and end of the subject in both modes, and if -all branches of a pattern start with `\A' it is always anchored. +all branches of a pattern start with ‘\A’ it is always anchored.

    Full Stop (Period, Dot)
    @@ -9918,7 +9916,7 @@ other line ending characters. other line ending characters.

    The behaviour of dot with regard to newlines can be changed. If the -`(?s)' option is set, a dot matches any one character, without +‘(?s)’ option is set, a dot matches any one character, without exception. If the two-character sequence <CR><LF> is present in the subject string, it takes two dots to match it. @@ -9928,15 +9926,15 @@ special meaning in a character class.

    Matching a Single Byte
    -

    Outside a character class, the escape sequence `\C' matches any +

    Outside a character class, the escape sequence ‘\C’ matches any one byte, both in and out of UTF-8 mode. Unlike a dot, it always matches any line-ending characters. The feature is provided in Perl in order to match individual bytes in UTF-8 mode. Because it breaks up UTF-8 characters into individual bytes, what remains in the string may -be a malformed UTF-8 string. For this reason, the `\C' escape +be a malformed UTF-8 string. For this reason, the ‘\C’ escape sequence is best avoided. -

    PCRE does not allow `\C' to appear in lookbehind assertions +

    PCRE does not allow ‘\C’ to appear in lookbehind assertions (described below), because in UTF-8 mode this would make it impossible to calculate the length of the lookbehind. @@ -9956,8 +9954,8 @@ is not the first character, or escape it a circumflex is actually required as a member of the class, ensure it is not the first character, or escape it with a backslash. -

    For example, the character class `[aeiou]' matches any lower case -vowel, while `[^aeiou]' matches any character that is not a lower +

    For example, the character class ‘[aeiou]’ matches any lower case +vowel, while ‘[^aeiou]’ matches any character that is not a lower case vowel. Note that a circumflex is just a convenient notation for specifying the characters that are in the class by enumerating those that are not. A class that starts with a circumflex is not an @@ -9965,13 +9963,13 @@ therefore it fails if the current pointe therefore it fails if the current pointer is at the end of the string.

    In UTF-8 mode, characters with values greater than 255 can be included -in a class as a literal string of bytes, or by using the `\x{' +in a class as a literal string of bytes, or by using the ‘\x{’ escaping mechanism.

    When caseless matching is set, any letters in a class represent both their upper case and lower case versions, so for example, a caseless -`[aeiou]' matches `A' as well as `a', and a caseless [^aeiou] -does not match `A', whereas a caseful version would. In UTF-8 mode, +‘[aeiou]’ matches ‘A’ as well as ‘a’, and a caseless [^aeiou] +does not match ‘A’, whereas a caseful version would. In UTF-8 mode, PCRE always understands the concept of case for characters whose values are less than 128, so caseless matching is always possible. For characters with higher values, the concept of case is supported if @@ -9982,43 +9980,43 @@ special way when matching character clas

    Characters that might indicate line breaks are never treated in any special way when matching character classes, whatever line-ending -sequence is in use, and whatever setting of the `(?s)' and -`(?m)' options is used. A class such as `[^a]' always +sequence is in use, and whatever setting of the ‘(?s)’ and +‘(?m)’ options is used. A class such as ‘[^a]’ always matches one of these characters.

    The minus (hyphen) character can be used to specify a range of -characters in a character class. For example, `[d-m]' matches any -letter between `d' and `m', inclusive. If a minus character +characters in a character class. For example, ‘[d-m]’ matches any +letter between ‘d’ and ‘m’, inclusive. If a minus character is required in a class, it must be escaped with a backslash or appear in a position where it cannot be interpreted as indicating a range, typically as the first or last character in the class. -

    It is not possible to have the literal character `]' as the end -character of a range. A pattern such as `[W-]46]' is interpreted -as a class of two characters (`W' and `-') followed by a -literal string `46]', so it would match `W46]' or -`-46]'. However, if the `]' is escaped with a backslash it -is interpreted as the end of range, so `[W-\]46]' is interpreted +

    It is not possible to have the literal character ‘]’ as the end +character of a range. A pattern such as ‘[W-]46]’ is interpreted +as a class of two characters (‘W’ and ‘-’) followed by a +literal string ‘46]’, so it would match ‘W46]’ or +‘-46]’. However, if the ‘]’ is escaped with a backslash it +is interpreted as the end of range, so ‘[W-\]46]’ is interpreted as a class containing a range followed by two other characters. The -octal or hexadecimal representation of `]' can also be used to +octal or hexadecimal representation of ‘]’ can also be used to end a range.

    Ranges operate in the collating sequence of character values. They can also be used for characters specified numerically, for example -`[\000-\037]'. In UTF-8 mode, ranges can include characters whose -values are greater than 255, for example `[\x{100}-\x{2ff}]'. +‘[\000-\037]’. In UTF-8 mode, ranges can include characters whose +values are greater than 255, for example ‘[\x{100}-\x{2ff}]’.

    If a range that includes letters is used when caseless matching is -set, it matches the letters in either case. For example, `[W-c]' -is equivalent to `[][\\^_`wxyzabc]', matched caselessly. +set, it matches the letters in either case. For example, ‘[W-c]’ +is equivalent to ‘[][\\^_`wxyzabc]’, matched caselessly. -

    The character types `\d', `\D', `\p', `\P', -`\s', `\S', `\w', and `\W' may also appear in a +

    The character types ‘\d’, ‘\D’, ‘\p’, ‘\P’, +‘\s’, ‘\S’, ‘\w’, and ‘\W’ may also appear in a character class, and add the characters that they match to the -class. For example, `[\dABCDEF]' matches any hexadecimal digit. A +class. For example, ‘[\dABCDEF]’ matches any hexadecimal digit. A circumflex can conveniently be used with the upper case character types to specify a more restricted set of characters than the matching -lower case type. For example, the class `[^\W_]' matches any +lower case type. For example, the class ‘[^\W_]’ matches any letter or digit, but not underscore.

    The only metacharacters that are recognized in character classes are @@ -10031,14 +10029,14 @@ escaping other non-alphanumeric characte

    POSIX Character Classes

    Perl supports the POSIX notation for character classes. This uses -names enclosed by `[:' and `:]' within the enclosing square +names enclosed by ‘[:’ and ‘:]’ within the enclosing square brackets. PCRE also supports this notation. For example,

              [01[:alpha:]%]
     
    -

    matches `0', `1', any alphabetic character, or `%'. The +

    matches ‘0’, ‘1’, any alphabetic character, or ‘%’. The supported class names are

    @@ -10047,33 +10045,33 @@ supported class names are
    ascii
    character codes 0 – 127
    blank
    space or tab only
    cntrl
    control characters -
    digit
    decimal digits (same as `\d') +
    digit
    decimal digits (same as ‘\d’)
    graph
    printing characters, excluding space
    lower
    lower case letters
    print
    printing characters, including space
    punct
    printing characters, excluding letters and digits -
    space
    white space (not quite the same as `\s') +
    space
    white space (not quite the same as ‘\s’)
    upper
    upper case letters -
    word
    “word” characters (same as `\w') +
    word
    “word” characters (same as ‘\w’)
    xdigit
    hexadecimal digits

    The “space” characters are <HT> (9), <LF> (10), <VT> (11), <FF> (12), <CR> (13), and space (32). Notice that this list includes the <VT> character (code 11). This makes "space" -different to `\s', which does not include <VT> (for Perl +different to ‘\s’, which does not include <VT> (for Perl compatibility).

    The name “word” is a Perl extension, and “blank” is a GNU extension from Perl 5.8. Another Perl extension is negation, which is -indicated by a `^' character after the colon. For example, +indicated by a ‘^’ character after the colon. For example,

              [12[:^digit:]]
     
    -

    matches `1', `2', or any non-digit. PCRE (and Perl) also -recognize the POSIX syntax `[.ch.]' and `[=ch=]' +

    matches ‘1’, ‘2’, or any non-digit. PCRE (and Perl) also +recognize the POSIX syntax ‘[.ch.]’ and ‘[=ch=]’ where ch is a “collating element,” but these are not supported, and an error is given if they are encountered. @@ -10089,7 +10087,7 @@ example, the pattern gilbert|sullivan -

    matches either `gilbert' or `sullivan'. Any number of +

    matches either ‘gilbert’ or ‘sullivan’. Any number of alternatives may appear, and an empty alternative is permitted (matching the empty string). The matching process tries each alternative in turn, from left to right, and the first one that @@ -10100,13 +10098,13 @@ as the alternative in the subpattern.

    Internal Option Setting

    The behavior of the matching engine can be adjusted from within the -pattern by a sequence of option letters enclosed between `(?' and -`)'. The option letters are +pattern by a sequence of option letters enclosed between ‘(?’ and +‘)’. The option letters are

    i
    Caseless: characters in one case match the corresponding characters in other cases as well. -
    m
    Multiline: `^' and `$' match at newlines +
    m
    Multiline: ‘^’ and ‘$’ match at newlines as well as at beginning and end of string.
    s
    Dotall: dot matches any character, including newline characters.
    x
    Extended syntax: unescaped white space is ignored and embedded @@ -10117,9 +10115,9 @@ with no defined meaning appears. with no defined meaning appears.
    -

    For example, `(?im)' sets caseless, multiline matching. It is +

    For example, ‘(?im)’ sets caseless, multiline matching. It is also possible to unset these options by preceding the letters with a -hyphen, and a combined setting and unsetting such as `(?im-sx)' +hyphen, and a combined setting and unsetting such as ‘(?im-sx)’ is also permitted. (This would set the caseless and multiline options while unsetting the dotall and extended-syntax options.) If a letter appears both before and after the hyphen, the option is unset. The @@ -10136,7 +10134,7 @@ current pattern that follows it, so (a(?i)b)c -

    matches `abc' and `aBc' and no other strings. By this +

    matches ‘abc’ and ‘aBc’ and no other strings. By this means, options can be made to have different settings in different parts of the pattern. Any changes made in one alternative do carry on into subsequent branches within the same subpattern. For example, @@ -10145,14 +10143,14 @@ into subsequent branches within the same (a(?i)b|c) -

    matches `ab', `aB', `c', and `C', even though when -matching `C' the first branch is abandoned before the option +

    matches ‘ab’, ‘aB’, ‘c’, and ‘C’, even though when +matching ‘C’ the first branch is abandoned before the option setting. This is because the effects of option settings happen when the pattern is parsed. There would be some very weird behaviour otherwise.

    Note: Unlike these options, the similar, PCRE-specific option -sequences that start with `(*' may appear only at the very +sequences that start with ‘(*’ may appear only at the very beginning of the pattern. Details of these sequences are given in the section entitled “Newline sequences,” above. @@ -10169,9 +10167,9 @@ things: cat(aract|erpillar|) -

    matches one of the words `cat', `cataract', or -`caterpillar'. Without the parentheses, it would match -`cataract', `erpillar' or an empty string. +

    matches one of the words ‘cat’, ‘cataract’, or +‘caterpillar’. Without the parentheses, it would match +‘cataract’, ‘erpillar’ or an empty string.

  • It sets up the subpattern as a capturing subpattern. As used in Monotone this only means that during matching, the portion of the @@ -10181,14 +10179,14 @@ subpatterns. left to right (starting from 1) to obtain numbers for the capturing subpatterns. -

    For example, if the string `the red king' is matched against the pattern +

    For example, if the string ‘the red king’ is matched against the pattern

         
                   the ((red|white) (king|queen))
     
    -

    the captured substrings are `red king', `red', and -`king', and are numbered 1, 2, and 3, respectively. +

    the captured substrings are ‘red king’, ‘red’, and +‘king’, and are numbered 1, 2, and 3, respectively.

    The fact that plain parentheses fulfil two functions is not always @@ -10196,20 +10194,20 @@ capturing, and is not counted when compu without a capturing requirement. If an opening parenthesis is followed by a question mark and a colon, the subpattern does not do any capturing, and is not counted when computing the number of any -subsequent capturing subpatterns. For example, if the string `the -white queen' is matched against the pattern +subsequent capturing subpatterns. For example, if the string ‘the +white queen’ is matched against the pattern

              the ((?:red|white) (king|queen))
     
    -

    the captured substrings are `white queen' and `queen', and +

    the captured substrings are ‘white queen’ and ‘queen’, and are numbered 1 and 2. The maximum number of capturing subpatterns is 65535.

    As a convenient shorthand, if any option settings are required at the start of a non-capturing subpattern, the option letters may appear -between the `?' and the `:'. Thus the two patterns +between the ‘?’ and the ‘:’. Thus the two patterns

              (?i:saturday|sunday)
    @@ -10219,26 +10217,26 @@ of the subpattern is reached, an option 
     

    match exactly the same set of strings. Because alternative branches are tried from left to right, and options are not reset until the end of the subpattern is reached, an option setting in one branch does -affect subsequent branches, so the above patterns match `SUNDAY' -as well as `Saturday'. +affect subsequent branches, so the above patterns match ‘SUNDAY’ +as well as ‘Saturday’.

    Duplicate Subpattern Numbers

    Perl 5.10 introduced a feature whereby each alternative in a subpattern uses the same numbers for its capturing parentheses. Such a -subpattern starts with `(?|' and is itself a non-capturing +subpattern starts with ‘(?|’ and is itself a non-capturing subpattern. For example, consider this pattern:

              (?|(Sat)ur|(Sun))day
     
    -

    Because the two alternatives are inside a `(?|' group, both sets +

    Because the two alternatives are inside a ‘(?|’ group, both sets of capturing parentheses are numbered one. Thus, when the pattern matches, you can look at captured substring number one, whichever alternative matched. This construct is useful when you want to capture part, but not all, of one of a number of alternatives. Inside a -`(?|' group, parentheses are numbered as usual, but the number is +‘(?|’ group, parentheses are numbered as usual, but the number is reset at the start of each branch. The numbers of any capturing buffers that follow the subpattern start after the highest number used in any branch. The following example is taken from the Perl @@ -10269,8 +10267,8 @@ and the Python syntax. and the Python syntax.

    In PCRE, a subpattern can be named in one of three ways: -`(?<name>...)' or `(?'name'...)' as in Perl, or -`(?P<name>...)' as in Python. References to capturing +‘(?<name>...)’ or ‘(?'name'...)’ as in Perl, or +‘(?P<name>...)’ as in Python. References to capturing parentheses from other parts of the pattern, such as backreferences, recursion, and conditions, can be made by name as well as by number. @@ -10279,7 +10277,7 @@ as well as names, exactly as if the name as well as names, exactly as if the names were not present.

    By default, a name must be unique within a pattern, but it is possible -to relax this constraint by setting the `(?J)' option. This can +to relax this constraint by setting the ‘(?J)’ option. This can be useful for patterns where only one instance of the named parentheses can match. Suppose you want to match the name of a weekday, either as a 3-letter abbreviation or as the full name, and in @@ -10307,10 +10305,10 @@ the following items:

    • a literal data character
    • the dot metacharacter -
    • the `\C' escape sequence -
    • the `\X' escape sequence (in UTF-8 mode with Unicode properties) -
    • the `\R' escape sequence -
    • an escape such as `\d' that matches a single character +
    • the ‘\C’ escape sequence +
    • the ‘\X’ escape sequence (in UTF-8 mode with Unicode properties) +
    • the ‘\R’ escape sequence +
    • an escape such as ‘\d’ that matches a single character
    • a character class
    • a back reference (see next section)
    • a parenthesized subpattern (unless it is an assertion) @@ -10326,7 +10324,7 @@ example: z{2,4}
    -

    matches `zz', `zzz', or `zzzz'. A closing brace on its +

    matches ‘zz’, ‘zzz’, or ‘zzzz’. A closing brace on its own is not a special character. If the second number is omitted, but the comma is present, there is no upper limit; if the second number and the comma are both omitted, the quantifier specifies an exact @@ -10345,17 +10343,17 @@ the syntax of a quantifier, is taken as

    matches exactly 8 digits. An opening curly bracket that appears in a position where a quantifier is not allowed, or one that does not match the syntax of a quantifier, is taken as a literal character. For -example, `{,6}' is not a quantifier, but a literal string of four +example, ‘{,6}’ is not a quantifier, but a literal string of four characters.

    In UTF-8 mode, quantifiers apply to UTF-8 characters rather than to -individual bytes. Thus, for example, `\x{100}{2}' matches two +individual bytes. Thus, for example, ‘\x{100}{2}’ matches two UTF-8 characters, each of which is represented by a two-byte -sequence. Similarly, `\X{3}' matches three Unicode extended +sequence. Similarly, ‘\X{3}’ matches three Unicode extended sequences, each of which may be several bytes long (and they may be of different lengths). -

    The quantifier `{0}' is permitted, causing the expression to +

    The quantifier ‘{0}’ is permitted, causing the expression to behave as if the previous item and the quantifier were not present.

    For convenience, the three most common quantifiers have @@ -10384,8 +10382,8 @@ of where this gives problems is in tryin much as possible (up to the maximum number of permitted times), without causing the rest of the pattern to fail. The classic example of where this gives problems is in trying to match comments in C -programs. These appear between `/*' and `*/', and within the -comment, individual `*' and `/' characters may appear. An +programs. These appear between ‘/*’ and ‘*/’, and within the +comment, individual ‘*’ and ‘/’ characters may appear. An attempt to match C comments by applying the pattern

    @@ -10399,7 +10397,7 @@ attempt to match C comments by applying 
     

    fails, because it matches the entire string owing to the greediness of -the `.*' item. +the ‘.*’ item.

    However, if a quantifier is followed by a question mark, it ceases to be greedy, and instead matches the minimum number of times possible, @@ -10422,7 +10420,7 @@ way the rest of the pattern matches.

    which matches one digit by preference, but can match two if that is the only way the rest of the pattern matches. -

    If the `(?U)' option is set (an option that is not available in +

    If the ‘(?U)’ option is set (an option that is not available in Perl), the quantifiers are not greedy by default, but individual ones can be made greedy by following them with a question mark. In other words, it inverts the default behaviour. @@ -10431,17 +10429,17 @@ compiled pattern, in proportion to the s is greater than 1 or with a limited maximum, more memory is required for the compiled pattern, in proportion to the size of the minimum or maximum. -

    If a pattern starts with `.*' or `.{0,}' and the -`(?s)' option is set, thus allowing the dot to match newlines, +

    If a pattern starts with ‘.*’ or ‘.{0,}’ and the +‘(?s)’ option is set, thus allowing the dot to match newlines, the pattern is implicitly anchored, because whatever follows will be tried against every character position in the subject string, so there is no point in retrying the overall match at any position after the first. PCRE normally treats such a pattern as though it were preceded -by `\A'. +by ‘\A’.

    In cases where it is known that the subject string contains no -newlines, it is worth setting `(?s)' in order to obtain this -optimization, or alternatively using `^' or `\A' to indicate +newlines, it is worth setting ‘(?s)’ in order to obtain this +optimization, or alternatively using ‘^’ or ‘\A’ to indicate anchoring explicitly.

    However, there is one situation where the optimization cannot be @@ -10453,7 +10451,7 @@ fail where a later one succeeds. Conside (.*)abc\1 -

    If the subject is `xyz123abc123' the match point is the fourth +

    If the subject is ‘xyz123abc123’ the match point is the fourth character. For this reason, such a pattern is not implicitly anchored.

    When a capturing subpattern is repeated, the value captured is the @@ -10463,8 +10461,8 @@ substring that matched the final iterati (tweedle[dume]{3}\s*)+ -

    has matched `tweedledum tweedledee' the value of the captured -substring is `tweedledee'. However, if there are nested capturing +

    has matched ‘tweedledum tweedledee’ the value of the captured +substring is ‘tweedledee’. However, if there are nested capturing subpatterns, the corresponding captured values may have been set in previous iterations. For example, after @@ -10472,7 +10470,7 @@ previous iterations. For example, after (a|(b))+ -

    matches `aba' the value of the second captured substring is `b'. +

    matches ‘aba’ the value of the second captured substring is ‘b’.

    Atomic Grouping and Possessive Quantifiers
    @@ -10484,23 +10482,23 @@ the pattern knows there is no point in c to cause it fail earlier than it otherwise might, when the author of the pattern knows there is no point in carrying on. -

    Consider, for example, the pattern `\d+foo' when applied to the +

    Consider, for example, the pattern ‘\d+foo’ when applied to the subject line

              123456bar
     
    -

    After matching all 6 digits and then failing to match `foo', the +

    After matching all 6 digits and then failing to match ‘foo’, the normal action of the matcher is to try again with only 5 digits -matching the `\d+' item, and then with 4, and so on, before +matching the ‘\d+’ item, and then with 4, and so on, before ultimately failing. Atomic grouping (a term taken from Jeffrey Friedl's book) provides the means for specifying that once a subpattern has matched, it is not to be re-evaluated in this way.

    If we use atomic grouping for the previous example, the matcher gives -up immediately on failing to match `foo' the first time. The -notation is a kind of special parenthesis, starting with `(?>' as in +up immediately on failing to match ‘foo’ the first time. The +notation is a kind of special parenthesis, starting with ‘(?>’ as in this example:

    @@ -10519,15 +10517,15 @@ maximizing repeat that must swallow ever
     
        

    Simple cases such as the above example can be thought of as a maximizing repeat that must swallow everything it can. So, while both -`\d+' and `\d+?' are prepared to adjust the number of digits +‘\d+’ and ‘\d+?’ are prepared to adjust the number of digits they match in order to make the rest of the pattern match, -`(?>\d+)' can only match an entire sequence of digits. +‘(?>\d+)’ can only match an entire sequence of digits.

    Atomic groups in general can of course contain arbitrarily complicated subpatterns, and can be nested. However, when the subpattern for an atomic group is just a single repeated item, as in the example above, a simpler notation, called a possessive quantifier, can be -used. This consists of an additional `+' character following a +used. This consists of an additional ‘+’ character following a quantifier. Using this notation, the previous example can be rewritten as @@ -10543,7 +10541,7 @@ example:

    Possessive quantifiers are always greedy; the setting of the -`(?U)' option is ignored. They are a convenient notation for the +‘(?U)’ option is ignored. They are a convenient notation for the simpler forms of atomic group. However, there is no difference in the meaning of a possessive quantifier and the equivalent atomic group, though there may be a performance difference; possessive quantifiers @@ -10556,9 +10554,9 @@ ultimately found its way into Perl at re ultimately found its way into Perl at release 5.10.

    PCRE has an optimization that automatically “possessifies” certain -simple pattern constructs. For example, the sequence `A+B' is -treated as `A++B' because there is no point in backtracking into -a sequence of `A's when `B' must follow. +simple pattern constructs. For example, the sequence ‘A+B’ is +treated as ‘A++B’ because there is no point in backtracking into +a sequence of ‘A’s when ‘B’ must follow.

    When a pattern contains an unlimited repeat inside a subpattern that can itself be repeated an unlimited number of times, the use of an @@ -10570,8 +10568,8 @@ very long time indeed. The pattern

    matches an unlimited number of substrings that either consist of -non-digits, or digits enclosed in `<>', followed by either -`!' or `?'. When it matches, it runs quickly. However, if it +non-digits, or digits enclosed in ‘<>’, followed by either +‘!’ or ‘?’. When it matches, it runs quickly. However, if it is applied to

    @@ -10579,9 +10577,9 @@ is applied to
     

    it takes a long time before reporting failure. This is because the -string can be divided between the internal `\D+' repeat and the -external `*' repeat in a large number of ways, and all have to be -tried. (The example uses `[!?]' rather than a single character at +string can be divided between the internal ‘\D+’ repeat and the +external ‘*’ repeat in a large number of ways, and all have to be +tried. (The example uses ‘[!?]’ rather than a single character at the end, because both PCRE and Perl have an optimization that allows for fast failure when a single character is used. They remember the last single character that is required for a match, and fail early if @@ -10613,7 +10611,7 @@ subpattern whose number is 10 or more us

    It is not possible to have a numerical “forward back reference” to a subpattern whose number is 10 or more using this syntax because a -sequence such as `\50' is interpreted as a character defined in +sequence such as ‘\50’ is interpreted as a character defined in octal. See the subsection entitled “Non-printing characters” above for further details of the handling of digits following a backslash. There is no such problem when named parentheses are used. A @@ -10621,7 +10619,7 @@ back reference to any subpattern is poss (see below).

    Another way of avoiding the ambiguity inherent in the use of digits -following a backslash is to use the `\g' escape sequence, which +following a backslash is to use the ‘\g’ escape sequence, which is a feature introduced in Perl 5.10. This escape must be followed by an unsigned number or a negative number, optionally enclosed in braces. These examples are all identical: @@ -10641,10 +10639,10 @@ reference. Consider this example: (abc(def)ghi)\g{-1} -

    The sequence `\g{-1}' is a reference to the most recently -started capturing subpattern before `\g', that is, is it -equivalent to `\2'. Similarly, `\g{-2}' would be -equivalent to `\1'. The use of relative references can be helpful +

    The sequence ‘\g{-1}’ is a reference to the most recently +started capturing subpattern before ‘\g’, that is, is it +equivalent to ‘\2’. Similarly, ‘\g{-2}’ would be +equivalent to ‘\1’. The use of relative references can be helpful in long patterns, and also in patterns that are created by joining together fragments that contain references within themselves. @@ -10657,8 +10655,8 @@ for a way of doing that). So the pattern (sens|respons)e and \1ibility -

    matches `sense and sensibility' and `response and -responsibility', but not `sense and responsibility'. If caseful +

    matches ‘sense and sensibility’ and ‘response and +responsibility’, but not ‘sense and responsibility’. If caseful matching is in force at the time of the back reference, the case of letters is relevant. For example, @@ -10666,14 +10664,14 @@ letters is relevant. For example, ((?i)rah)\s+\1 -

    matches `rah rah' and `RAH RAH', but not `RAH rah', +

    matches ‘rah rah’ and ‘RAH RAH’, but not ‘RAH rah’, even though the original capturing subpattern is matched caselessly.

    There are several different ways of writing back references to named -subpatterns. The .NET syntax `\k{name}' and the Perl syntax -`\k<name>' or `\k'name'' are supported, as is the Python +subpatterns. The .NET syntax ‘\k{name}’ and the Perl syntax +‘\k<name>’ or ‘\k'name'’ are supported, as is the Python syntax (?P=name). Perl 5.10's unified back reference syntax, in which -`\g' can be used for both numeric and named references, is also +‘\g’ can be used for both numeric and named references, is also supported. We could rewrite the above example in any of the following ways: @@ -10695,16 +10693,16 @@ references to it always fail. For exampl (a|(bc))\2 -

    always fails if it starts to match `a' rather than -`bc'. Because there may be many capturing parentheses in a +

    always fails if it starts to match ‘a’ rather than +‘bc’. Because there may be many capturing parentheses in a pattern, all digits following the backslash are taken as part of a potential back reference number. If the pattern continues with a digit character, some delimiter must be used to terminate the back -reference. If the `(?x)' option is set, this can be whitespace. +reference. If the ‘(?x)’ option is set, this can be whitespace. Otherwise an empty comment (see “Comments” below) can be used.

    A back reference that occurs inside the parentheses to which it refers -fails when the subpattern is first used, so, for example, `(a\1)' +fails when the subpattern is first used, so, for example, ‘(a\1)’ never matches. However, such references can be useful inside repeated subpatterns. For example, the pattern @@ -10712,7 +10710,7 @@ subpatterns. For example, the pattern (a|b\1)+ -

    matches any number of `a's and also `aba', `ababbaa' +

    matches any number of ‘a’s and also ‘aba’, ‘ababbaa’ etc. At each iteration of the subpattern, the back reference matches the character string corresponding to the previous iteration. In order for this to work, the pattern must be such that the first iteration @@ -10724,8 +10722,8 @@ current matching point that does not act

    An assertion is a test on the characters following or preceding the current matching point that does not actually consume any -characters. The simple assertions coded as `\b', `\B', -`\A', `\G', `\Z', `\z', `^' and `$' are +characters. The simple assertions coded as ‘\b’, ‘\B’, +‘\A’, ‘\G’, ‘\Z’, ‘\z’, ‘^’ and ‘$’ are described above.

    More complicated assertions are coded as subpatterns. There are two @@ -10744,8 +10742,8 @@ sense for negative assertions.

    Lookahead Assertions
    -

    Lookahead assertions start with `(?=' for positive assertions and -`(?!' for negative assertions. For example, +

    Lookahead assertions start with ‘(?=’ for positive assertions and +‘(?!’ for negative assertions. For example,

              \w+(?=;)
    @@ -10758,35 +10756,35 @@ the match, and
              foo(?!bar)
     
    -

    matches any occurrence of `foo' that is not followed by -`bar'. Note that the apparently similar pattern +

    matches any occurrence of ‘foo’ that is not followed by +‘bar’. Note that the apparently similar pattern

              (?!foo)bar
     
    -

    does not find an occurrence of `bar' that is preceded by -something other than `foo'; it finds any occurrence of `bar' -whatsoever, because the assertion `(?!foo)' is always true when -the next three characters are `bar'. A lookbehind assertion is +

    does not find an occurrence of ‘bar’ that is preceded by +something other than ‘foo’; it finds any occurrence of ‘bar’ +whatsoever, because the assertion ‘(?!foo)’ is always true when +the next three characters are ‘bar’. A lookbehind assertion is needed to achieve the other effect.

    If you want to force a matching failure at some point in a pattern, -the most convenient way to do it is with `(?!)' because an empty +the most convenient way to do it is with ‘(?!)’ because an empty string always matches, so an assertion that requires there not to be an empty string must always fail.

    Lookbehind Assertions
    -

    Lookbehind assertions start with `(?<=' for positive assertions -and `(?<!' for negative assertions. For example, +

    Lookbehind assertions start with ‘(?<=’ for positive assertions +and ‘(?<!’ for negative assertions. For example,

              (?<!foo)bar
     
    -

    matches an occurrence of `bar' that is not preceded by -`foo'. The contents of a lookbehind assertion are restricted such +

    matches an occurrence of ‘bar’ that is not preceded by +‘foo’. The contents of a lookbehind assertion are restricted such that all the strings it matches must have a fixed length. However, if there are several top-level alternatives, they do not all have to have the same fixed length. Thus @@ -10818,7 +10816,7 @@ lengths, but it is acceptable if rewritt (?<=abc|abde) -

    In some cases, the Perl 5.10 escape sequence `\K' (see above) can +

    In some cases, the Perl 5.10 escape sequence ‘\K’ (see above) can be used instead of a lookbehind assertion; this is not restricted to a fixed-length. @@ -10827,10 +10825,10 @@ current position, the assertion fails. then try to match. If there are insufficient characters before the current position, the assertion fails. -

    PCRE does not allow the `\C' escape (which matches a single byte +

    PCRE does not allow the ‘\C’ escape (which matches a single byte in UTF-8 mode) to appear in lookbehind assertions, because it makes it -impossible to calculate the length of the lookbehind. The `\X' -and `\R' escapes, which can match different numbers of bytes, are +impossible to calculate the length of the lookbehind. The ‘\X’ +and ‘\R’ escapes, which can match different numbers of bytes, are also not permitted.

    Possessive quantifiers can be used in conjunction with lookbehind @@ -10842,7 +10840,7 @@ string. Consider a simple pattern such a

    when applied to a long string that does not match. Because matching -proceeds from left to right, PCRE will look for each `a' in the +proceeds from left to right, PCRE will look for each ‘a’ in the subject and then see if what follows matches the rest of the pattern. If the pattern is specified as @@ -10850,10 +10848,10 @@ pattern. If the pattern is specified as ^.*abcd$ -

    the initial `.*' matches the entire string at first, but when this fails -(because there is no following `a'), it backtracks to match all +

    the initial ‘.*’ matches the entire string at first, but when this fails +(because there is no following ‘a’), it backtracks to match all but the last character, then all but the last two characters, and so -on. Once again the search for `a' covers the entire string, from +on. Once again the search for ‘a’ covers the entire string, from right to left, so we are no better off. However, if the pattern is written as @@ -10861,7 +10859,7 @@ written as ^.*+(?<=abcd) -

    there can be no backtracking for the `.*+' item; it can match +

    there can be no backtracking for the ‘.*+’ item; it can match only the entire string. The subsequent lookbehind assertion does a single test on the last four characters. If it fails, the match fails immediately. For long strings, this approach makes a significant @@ -10875,14 +10873,14 @@ difference to the processing time. (?<=\d{3})(?<!999)foo -

    matches `foo' preceded by three digits that are not -`999'. Notice that each of the assertions is applied +

    matches ‘foo’ preceded by three digits that are not +‘999’. Notice that each of the assertions is applied independently at the same point in the subject string. First there is a check that the previous three characters are all digits, and then -there is a check that the same three characters are not `999'. -This pattern does not match `foo' preceded by six +there is a check that the same three characters are not ‘999’. +This pattern does not match ‘foo’ preceded by six characters, the first of which are digits and the last three of which -are not `999'. For example, it doesn't match `123abcfoo'. A +are not ‘999’. For example, it doesn't match ‘123abcfoo’. A pattern to do that is

    @@ -10892,7 +10890,7 @@ assertion checks that the preceding thre
     

    This time the first assertion looks at the preceding six characters, checking that the first three are digits, and then the second assertion checks that the preceding three characters are not -`999'. +‘999’.

    Assertions can be nested in any combination. For example, @@ -10900,15 +10898,15 @@ assertion checks that the preceding thre (?<=(?<!foo)bar)baz

    -

    matches an occurrence of `baz' that is preceded by `bar' -which in turn is not preceded by `foo', while +

    matches an occurrence of ‘baz’ that is preceded by ‘bar’ +which in turn is not preceded by ‘foo’, while

              (?<=\d{3}(?!999)...)foo
     
    -

    is another pattern that matches `foo' preceded by three digits -and any three characters that are not `999'. +

    is another pattern that matches ‘foo’ preceded by three digits +and any three characters that are not ‘999’.

    Conditional Subpatterns
    @@ -10928,7 +10926,7 @@ than two alternatives in the subpattern, than two alternatives in the subpattern, a compile-time error occurs.

    There are four kinds of condition: references to subpatterns, -references to recursion, a pseudo-condition called `DEFINE', and +references to recursion, a pseudo-condition called ‘DEFINE’, and assertions.

    Checking for a used subpattern by number
    @@ -10938,13 +10936,13 @@ relative rather than absolute. The most previously matched. An alternative notation is to precede the digits with a plus or minus sign. In this case, the subpattern number is relative rather than absolute. The most recently opened parentheses -can be referenced by `(?(-1)', the next most recent by -`(?(-2)', and so on. In looping constructs it can also make sense -to refer to subsequent groups with constructs such as `(?(+2)'. +can be referenced by ‘(?(-1)’, the next most recent by +‘(?(-2)’, and so on. In looping constructs it can also make sense +to refer to subsequent groups with constructs such as ‘(?(+2)’.

    Consider the following pattern, which contains non-significant white space to make it more readable and to divide it into three parts for -ease of discussion (assume a preceding `(?x)'): +ease of discussion (assume a preceding ‘(?x)’):

              ( \( )?    [^()]+    (?(1) \) )
    @@ -10972,10 +10970,10 @@ relative reference:
     
     
    Checking for a used subpattern by name
    -

    Perl uses the syntax `(?(<name>)...)' or `(?('name')...)' to +

    Perl uses the syntax ‘(?(<name>)...)’ or ‘(?('name')...)’ to test for a used subpattern by name. For compatibility with earlier versions of PCRE, which had this facility before Perl, the syntax -`(?(name)...)' is also recognized. However, there is a possible +‘(?(name)...)’ is also recognized. However, there is a possible ambiguity with this syntax, because subpattern names may consist entirely of digits. PCRE looks first for a named subpattern; if it cannot find one and the name consists entirely of digits, PCRE looks @@ -10991,10 +10989,10 @@ recommended.

    Checking for pattern recursion
    -

    If the condition is the string `(R)', and there is no subpattern -with the name `R', the condition is true if a recursive call to +

    If the condition is the string ‘(R)’, and there is no subpattern +with the name ‘R’, the condition is true if a recursive call to the whole pattern or any subpattern has been made. If digits or a name -preceded by ampersand follow the letter `R', for example: +preceded by ampersand follow the letter ‘R’, for example:

              (?(R3)...) or (?(R&name)...)
    @@ -11009,8 +11007,8 @@ patterns are described below.
     
     
    Defining subpatterns for use by reference only
    -

    If the condition is the string `(DEFINE)', and there is no -subpattern with the name `DEFINE', the condition is always +

    If the condition is the string ‘(DEFINE)’, and there is no +subpattern with the name ‘DEFINE’, the condition is always false. In this case, there may be only one alternative in the subpattern. It is always skipped if control reaches this point in the pattern; the idea of DEFINE is that it can be used to define @@ -11051,18 +11049,18 @@ otherwise it is matched against the seco it tests for the presence of at least one letter in the subject. If a letter is found, the subject is matched against the first alternative; otherwise it is matched against the second. This pattern matches -strings in one of the two forms `dd-aaa-dd' or -`dd-dd-dd', where aaa are letters and +strings in one of the two forms ‘dd-aaa-dd’ or +‘dd-dd-dd’, where aaa are letters and dd are digits.

    Comments
    -

    The sequence `(?#' marks the start of a comment that continues up +

    The sequence ‘(?#’ marks the start of a comment that continues up to the next closing parenthesis. Nested parentheses are not permitted. The characters that make up a comment play no part in the pattern matching at all. -

    If the `(?x)' option is set, an unescaped `#' character +

    If the ‘(?x)’ option is set, an unescaped ‘#’ character outside a character class introduces a comment that continues to immediately after the next newline in the pattern. @@ -11079,11 +11077,11 @@ release 5.10. PCRE and Python, this kind of recursion was introduced into Perl at release 5.10. -

    A special item that consists of `(?' followed by a number greater +

    A special item that consists of ‘(?’ followed by a number greater than zero and a closing parenthesis is a recursive call of the subpattern of the given number, provided that it occurs inside that subpattern. (If not, it is a subroutine call, which is described in -the next section.) The special item `(?R)' or `(?0)' is a +the next section.) The special item ‘(?R)’ or ‘(?0)’ is a recursive call of the entire regular expression.

    In PCRE (like Python, but unlike Perl), a recursive subpattern call is @@ -11115,21 +11113,21 @@ tricky. This is made easier by the use o

    In a larger pattern, keeping track of parenthesis numbers can be tricky. This is made easier by the use of relative references. (A Perl -5.10 feature.) Instead of `(?1)' in the pattern above you can -write `(?-2)' to refer to the second most recently opened +5.10 feature.) Instead of ‘(?1)’ in the pattern above you can +write ‘(?-2)’ to refer to the second most recently opened parentheses preceding the recursion. In other words, a negative number counts capturing parentheses leftwards from the point at which it is encountered.

    It is also possible to refer to subsequently opened parentheses, by -writing references such as `(?+2)'. However, these cannot be +writing references such as ‘(?+2)’. However, these cannot be recursive because the reference is not inside the parentheses that are referenced. They are always subroutine calls, as described in the next section.

    An alternative approach is to use named parentheses instead. The Perl -syntax for this is `(?&name)'; PCRE's earlier syntax -`(?P>name)' is also supported. We could rewrite the above example +syntax for this is ‘(?&name)’; PCRE's earlier syntax +‘(?P>name)’ is also supported. We could rewrite the above example as follows:

    @@ -11151,7 +11149,7 @@ runs for a very long time indeed because
     
     

    it fails quickly. However, if atomic grouping is not used, the match runs for a very long time indeed because there are so many different -ways the `+' and `*' repeats can carve up the subject, and +ways the ‘+’ and ‘*’ repeats can carve up the subject, and all have to be tested before failure can be reported.

    At the end of a match, the values set for any capturing subpatterns @@ -11162,7 +11160,7 @@ subpattern value is set. If the pattern (ab(cd)ef)

    -

    the value for the capturing parentheses is `ef', which is the +

    the value for the capturing parentheses is ‘ef’, which is the last value taken on at the top level. If additional parentheses are added, giving @@ -11185,9 +11183,9 @@ characters are permitted at the outer le < (?: (?(R) \d++ | [^<>]*+) | (?R)) * >

    -

    In this pattern, `(?(R)' is the start of a conditional +

    In this pattern, ‘(?(R)’ is the start of a conditional subpattern, with two different alternatives for the recursive and -non-recursive cases. The `(?R)' item is the actual recursive +non-recursive cases. The ‘(?R)’ item is the actual recursive call.

    Subpatterns as Subroutines
    @@ -11210,15 +11208,15 @@ reference can be absolute or relative, a (sens|respons)e and \1ibility
    -

    matches `sense and sensibility' and `response and -responsibility', but not `sense and responsibility'. If instead +

    matches ‘sense and sensibility’ and ‘response and +responsibility’, but not ‘sense and responsibility’. If instead the pattern

              (sens|respons)e and (?1)ibility
     
    -

    is used, it does match `sense and responsibility' as well as the +

    is used, it does match ‘sense and responsibility’ as well as the other two strings. Another example is given in the discussion of DEFINE above. @@ -11236,7 +11234,7 @@ pattern: (abc)(?i:(?-1)) -

    It matches `abcabc'. It does not match `abcABC' because the +

    It matches ‘abcabc’. It does not match ‘abcABC’ because the change of processing option does not affect the called subpattern.

    Backtracking Control
    @@ -11250,8 +11248,8 @@ opening parenthesis followed by an aster

    The new verbs make use of what was previously invalid syntax: an opening parenthesis followed by an asterisk. In Perl, they are -generally of the form `(*VERB:ARG)' but PCRE does not support the -use of arguments, so its general form is just `(*VERB)'. Any +generally of the form ‘(*VERB:ARG)’ but PCRE does not support the +use of arguments, so its general form is just ‘(*VERB)’. Any number of these verbs may occur in a pattern. There are two kinds:

    Verbs that act immediately
    @@ -11263,19 +11261,19 @@ ended immediately. PCRE differs from Per This verb causes the match to end successfully, skipping the remainder of the pattern. When inside a recursion, only the innermost pattern is ended immediately. PCRE differs from Perl in what happens if the -`(*ACCEPT)' is inside capturing parentheses. In Perl, the data so +‘(*ACCEPT)’ is inside capturing parentheses. In Perl, the data so far is captured: in PCRE no data is captured. For example:
         
                   A(A|B(*ACCEPT)|C)D
     
    -

    This matches `AB', `AAD', or `ACD', but when it matches -`AB', no data is captured. +

    This matches ‘AB’, ‘AAD’, or ‘ACD’, but when it matches +‘AB’, no data is captured.

    (*FAIL) or (*F)
    This verb causes the match to fail, forcing backtracking to occur. It -is equivalent to `(?!)' but easier to read. It is not clear +is equivalent to ‘(?!)’ but easier to read. It is not clear whether there is any use for this without the ability to execute code in the middle of the pattern (which Perl has but PCRE in Monotone does not). @@ -11301,7 +11299,7 @@ all. For example: a+(*COMMIT)b -

    This matches `xxaab' but not `aacaab'. It can be thought of +

    This matches ‘xxaab’ but not ‘aacaab’. It can be thought of as a kind of dynamic anchor, or “I've started, so I must finish.”

    (*PRUNE)
    @@ -11327,13 +11325,13 @@ leading up to it cannot be part of a suc a+(*SKIP)b -

    If the subject is `aaaac...', after the first match attempt fails +

    If the subject is ‘aaaac...’, after the first match attempt fails (starting at the first character in the string), the starting point -skips on to start the next attempt at `c'. Note that a possessive +skips on to start the next attempt at ‘c’. Note that a possessive quantifer does not have the same effect in this example; although it would suppress backtracking during the first match attempt, the second attempt would start at the second character instead of skipping on to -`c'. +‘c’.

    (*THEN)
    This verb causes a skip to the next alternation if the rest of the @@ -11348,10 +11346,10 @@ block: | COND3 (*THEN) BAZ ) ... -

    If the `COND1' pattern matches, `FOO' is tried (and possibly -further items after the end of the group if `FOO' succeeds); on +

    If the ‘COND1’ pattern matches, ‘FOO’ is tried (and possibly +further items after the end of the group if ‘FOO’ succeeds); on failure the matcher skips to the second alternative and tries -`COND2', without backtracking into COND1. If (*THEN) is used +‘COND2’, without backtracking into COND1. If (*THEN) is used outside of any alternation, it acts exactly like (*PRUNE). @@ -12131,7 +12129,7 @@ function get_preferred_merge3_command (t -- If there wasn't any user-given merger, take the first that's available -- and wanted. for _,mkey in ipairs(default_order) do - c = trymerger(mkey) ; if c then return c,nil end + c = trymerger(mkey) ; if c then return c,mkey end end end @@ -12478,70 +12476,115 @@ end return "socat" end --- Netsync notifiers are tables containing 5 functions: --- start, revision_received, cert_received, pubkey_received and end --- Those functions take exactly the same arguments as the corresponding --- note_netsync functions, but return a different kind of value, a tuple --- composed of a return code and a value to be returned back to monotone. --- The codes are strings: --- "continue" and "stop" --- When the code "continue" is returned and there's another notifier, the --- second value is ignored and the next notifier is called. Otherwise, --- the second value is returned immediately. -netsync_notifiers = {} +do + -- Hook functions are tables containing any of the following 6 items + -- with associated functions: + -- + -- startup Corresponds to note_mtn_startup() + -- start Corresponds to note_netsync_start() + -- revision_received Corresponds to note_netsync_revision_received() + -- cert_received Corresponds to note_netsync_cert_received() + -- pubkey_received Corresponds to note_netsync_pubkey_received() + -- end Corresponds to note_netsync_end() + -- + -- Those functions take exactly the same arguments as the corresponding + -- global functions, but return a different kind of value, a tuple + -- composed of a return code and a value to be returned back to monotone. + -- The codes are strings: + -- "continue" and "stop" + -- When the code "continue" is returned and there's another notifier, the + -- second value is ignored and the next notifier is called. Otherwise, + -- the second value is returned immediately. + local hook_functions = {} + local supported_items = { + "startup", + "start", "revision_received", "cert_received", "pubkey_received", "end" + } -function _note_netsync_helper(f,...) - local s = "continue" - local v = nil - for _,n in pairs(netsync_notifiers) do - if n[f] then - s,v = n[f](...) + function _hook_functions_helper(f,...) + local s = "continue" + local v = nil + for _,n in pairs(hook_functions) do + if n[f] then + s,v = n[f](...) + end + if s ~= "continue" then + break + end end - if s ~= "continue" then - break - end + return v end - return v -end -function note_netsync_start(...) - return _note_netsync_helper("start",...) -end -function note_netsync_revision_received(...) - return _note_netsync_helper("revision_received",...) -end -function note_netsync_cert_received(...) - return _note_netsync_helper("cert_received",...) -end -function note_netsync_pubkey_received(...) - return _note_netsync_helper("pubkey_received",...) -end -function note_netsync_end(...) - return _note_netsync_helper("end",...) -end + function note_mtn_startup(...) + return _hook_functions_helper("startup",...) + end + function note_netsync_start(...) + return _hook_functions_helper("start",...) + end + function note_netsync_revision_received(...) + return _hook_functions_helper("revision_received",...) + end + function note_netsync_cert_received(...) + return _hook_functions_helper("cert_received",...) + end + function note_netsync_pubkey_received(...) + return _hook_functions_helper("pubkey_received",...) + end + function note_netsync_end(...) + return _hook_functions_helper("end",...) + end -function add_netsync_notifier(notifier, precedence) - if type(notifier) ~= "table" or type(precedence) ~= "number" then - return false, "Invalid tyoe" + function add_hook_functions(functions, precedence) + if type(functions) ~= "table" or type(precedence) ~= "number" then + return false, "Invalid type" + end + if hook_functions[precedence] then + return false, "Precedence already taken" + end + + local unknown_items = "" + local warning = nil + local is_member = + function (s,t) + for k,v in pairs(t) do if s == v then return true end end + return false + end + + for n,f in pairs(functions) do + if type(n) == "string" then + if not is_member(n, supported_items) then + if unknown_items ~= "" then + unknown_items = unknown_items .. "," + end + unknown_items = unknown_items .. n + end + if type(f) ~= "function" then + return false, "Value for functions item "..n.." isn't a function" + end + else + warning = "Non-string item keys found in functions table" + end + end + + if warning == nil and unknown_items ~= "" then + warning = "Unknown item(s) " .. unknown_items .. " in functions table" + end + + hook_functions[precedence] = functions + return true, warning end - if netsync_notifiers[precedence] then - return false, "Precedence already taken" + function push_hook_functions(functions) + local n = table.maxn(hook_functions) + 1 + return add_hook_functions(functions, n) end - local warning = nil - for n,f in pairs(notifier) do - if type(n) ~= "string" or n ~= "start" - and n ~= "revision_received" - and n ~= "cert_received" - and n ~= "pubkey_received" - and n ~= "end" then - warning = "Unknown item found in notifier table" - elseif type(f) ~= "function" then - return false, "Value for notifier item "..n.." isn't a function" - end + + -- Kept for backward compatibility + function add_netsync_notifier(notifier, precedence) + return add_hook_functions(notifier, precedence) end - netsync_notifiers[precedence] = notifier - return true, warning -end - + function push_netsync_notifier(notifier) + return push_hook_functions(notifier) + end +end

    General Index

    @@ -12800,7 +12843,7 @@ be a rather serious security problem!

    -


    +
    ============================================================ # monotone.pdf is binary