# (Be in -*- mode: python; coding: utf-8 -*- mode.) # The program that is run to convert from CVS to git is called # cvs2git. Run it with the --options option, passing it this file # like this: # # cvs2git --options=cvs2git-example.options # # The output of cvs2git is a blob file and a dump file that can be # loaded into git using the "git fast-import" command. Please read # www/cvs2git.html for more information. # # Many options do not have defaults, so it is easier to copy this file # and modify what you need rather than creating a new options file # from scratch. This file is in Python syntax, but you don't need to # know Python to modify it. But if you *do* know Python, then you # will be happy to know that you can use arbitary Python constructs to # do fancy configuration tricks. # # But please be aware of the following: # # * In many places, leading whitespace is significant in Python (it is # used instead of curly braces to group statements together). # Therefore, if you don't know what you are doing, it is best to # leave the whitespace as it is. # # * In normal strings, Python treats a backslash ("\") as an escape # character. Therefore, if you want to specify a string that # contains a backslash, you need either to escape the backslash with # another backslash ("\\"), or use a "raw string", as in one if the # following equivalent examples: # # ctx.sort_executable = 'c:\\windows\\system32\\sort.exe' # ctx.sort_executable = r'c:\windows\system32\sort.exe' # # See http://docs.python.org/tutorial/introduction.html#strings for # more information. # # Two identifiers will have been defined before this file is executed, # and can be used freely within this file: # # ctx -- a Ctx object (see cvs2svn_lib/context.py), which holds # many configuration options # # run_options -- an instance of the GitRunOptions class (see # cvs2svn_lib/git_run_options.py), which holds some variables # governing how cvs2git is run # Import some modules that are used in setting the options: import re from cvs2svn_lib import config from cvs2svn_lib import changeset_database from cvs2svn_lib.common import CVSTextDecoder from cvs2svn_lib.log import Log from cvs2svn_lib.project import Project from cvs2svn_lib.git_revision_recorder import GitRevisionRecorder from cvs2svn_lib.git_output_option import GitRevisionMarkWriter from cvs2svn_lib.git_output_option import GitOutputOption from cvs2svn_lib.revision_manager import NullRevisionRecorder from cvs2svn_lib.revision_manager import NullRevisionExcluder from cvs2svn_lib.fulltext_revision_recorder \ import SimpleFulltextRevisionRecorderAdapter from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader from cvs2svn_lib.checkout_internal import InternalRevisionRecorder from cvs2svn_lib.checkout_internal import InternalRevisionExcluder from cvs2svn_lib.checkout_internal import InternalRevisionReader from cvs2svn_lib.symbol_strategy import AllBranchRule from cvs2svn_lib.symbol_strategy import AllTagRule from cvs2svn_lib.symbol_strategy import BranchIfCommitsRule from cvs2svn_lib.symbol_strategy import ExcludeRegexpStrategyRule from cvs2svn_lib.symbol_strategy import ForceBranchRegexpStrategyRule from cvs2svn_lib.symbol_strategy import ForceTagRegexpStrategyRule from cvs2svn_lib.symbol_strategy import ExcludeTrivialImportBranchRule from cvs2svn_lib.symbol_strategy import ExcludeVendorBranchRule from cvs2svn_lib.symbol_strategy import HeuristicStrategyRule from cvs2svn_lib.symbol_strategy import UnambiguousUsageRule from cvs2svn_lib.symbol_strategy import HeuristicPreferredParentRule from cvs2svn_lib.symbol_strategy import SymbolHintsFileRule from cvs2svn_lib.symbol_transform import SymbolTransform from cvs2svn_lib.symbol_transform import RegexpSymbolTransform from cvs2svn_lib.symbol_transform import ReplaceSubstringsSymbolTransform from cvs2svn_lib.symbol_transform import IgnoreSymbolTransform from cvs2svn_lib.symbol_transform import NormalizePathsSymbolTransform from cvs2svn_lib.property_setters import AutoPropsPropertySetter from cvs2svn_lib.property_setters import CVSBinaryFileDefaultMimeTypeSetter from cvs2svn_lib.property_setters import CVSBinaryFileEOLStyleSetter from cvs2svn_lib.property_setters import CVSRevisionNumberSetter from cvs2svn_lib.property_setters import DefaultEOLStyleSetter from cvs2svn_lib.property_setters import EOLStyleFromMimeTypeSetter from cvs2svn_lib.property_setters import ExecutablePropertySetter from cvs2svn_lib.property_setters import KeywordsPropertySetter from cvs2svn_lib.property_setters import MimeMapper from cvs2svn_lib.property_setters import SVNBinaryFileKeywordsPropertySetter #class PickyIgnoreSymbolTransform(SymbolTransform): # def __init__(self, cvs_path, symbol_name, revision): # self.cvs_path = cvs_path # self.symbol_name = symbol_name # self.revision = revision # # def transform(self, cvs_file, symbol_name, revision): # if cvs_file.cvs_path == self.cvs_path \ # and symbol_name == self.symbol_name \ # and revision == self.revision: # return None # else: # return symbol_name # To choose the level of logging output, uncomment one of the # following lines: #Log().log_level = Log.WARN #Log().log_level = Log.QUIET #Log().log_level = Log.NORMAL #Log().log_level = Log.VERBOSE Log().log_level = Log.DEBUG # During CollectRevsPass, cvs2git records the contents of file # revisions into a "blob" file in git-fast-import format. This option # configures that process: ctx.revision_recorder = SimpleFulltextRevisionRecorderAdapter( # The following option specifies how the revision contents of the RCS # files should be read. # # RCSRevisionReader uses RCS's "co" program to extract the revision # contents of the RCS files during CollectRevsPass. The constructor # argument specifies how to invoke the "co" executable. # # CVSRevisionReader uses the "cvs" program to extract the revision # contents out of the RCS files during OutputPass. This option is # considerably slower than RCSRevisionReader because "cvs" is # considerably slower than "co". However, it works in some situations # where RCSRevisionReader fails; see the HTML documentation of the # "--use-cvs" option for details. The constructor argument specifies # how to invoke the "co" executable. # # Uncomment one of the two following lines: #RCSRevisionReader(co_executable=r'co'), CVSRevisionReader(cvs_executable=r'cvs'), # The file in which to write the git-fast-import stream that # contains the file revision contents: GitRevisionRecorder('cvs2svn-tmp/git-blob.dat'), ) # cvs2git does not need to keep track of what revisions will be # excluded, so leave this option unchanged: ctx.revision_excluder = NullRevisionExcluder() # cvs2git doesn't need a revision reader because OutputPass only # refers to blobs that were output during CollectRevsPass, so leave # this option set to None. ctx.revision_reader = None # Set the name (and optionally the path) of some other executables # required by cvs2svn: ctx.sort_executable = r'sort' # Change the following line to True if the conversion should only # include the trunk of the repository (i.e., all branches and tags # should be omitted from the conversion): ctx.trunk_only = False # How to convert CVS author names, log messages, and filenames to # Unicode. The first argument to CVSTextDecoder is a list of encoders # that are tried in order in 'strict' mode until one of them succeeds. # If none of those succeeds, then fallback_encoder (if it is # specified) is used in lossy 'replace' mode. Setting a fallback # encoder ensures that the encoder always succeeds, but it can cause # information loss. ctx.cvs_author_decoder = CVSTextDecoder( [ 'latin1', 'utf8', 'ascii', ], fallback_encoding='ascii' ) ctx.cvs_log_decoder = CVSTextDecoder( [ 'latin1', 'utf8', 'ascii', ], fallback_encoding='ascii' ) # You might want to be especially strict when converting filenames to # Unicode (e.g., maybe not specify a fallback_encoding). ctx.cvs_filename_decoder = CVSTextDecoder( [ #'latin1', #'utf8', 'ascii', ], #fallback_encoding='ascii' ) # Template for the commit message to be used for initial project # commits. ctx.initial_project_commit_message = ( 'Standard project directories initialized by cvs2svn.' ) # Template for the commit message to be used for post commits, in # which modifications to a vendor branch are copied back to trunk. # This message can use '%(revnum)d' to include the SVN revision number # of the revision that included the change to the vendor branch # (admittedly rather pointless in a cvs2git conversion). ctx.post_commit_message = ( 'This commit was generated by cvs2svn to track changes on a CVS ' 'vendor branch.' ) # Template for the commit message to be used for commits in which # symbols are created. This message can use '%(symbol_type)d' to # include the type of the symbol ('branch' or 'tag') or # '%(symbol_name)' to include the name of the symbol. ctx.symbol_commit_message = ( "This commit was manufactured by cvs2svn to create %(symbol_type)s " "'%(symbol_name)s'." ) # Some CVS clients for MacOS store resource fork data into CVS along # with the file contents itself by wrapping it all up in a container # format called "AppleSingle". Subversion currently does not support # MacOS resource forks. Nevertheless, sometimes the resource fork # information is not necessary and can be discarded. Set the # following option to True if you would like cvs2svn to identify files # whose contents are encoded in AppleSingle format, and discard all # but the data fork for such files before committing them to # Subversion. (Please note that AppleSingle contents are identified # by the AppleSingle magic number as the first four bytes of the file. # This check is not failproof, so only set this option if you think # you need it.) ctx.decode_apple_single = False # This option can be set to the name of a filename to which are stored # statistics and conversion decisions about the CVS symbols. ctx.symbol_info_filename = None #ctx.symbol_info_filename = 'symbol-info.txt' # cvs2svn uses "symbol strategy rules" to help decide how to handle # CVS symbols. The rules in a project's symbol_strategy_rules are # applied in order, and each rule is allowed to modify the symbol. # The result (after each of the rules has been applied) is used for # the conversion. # # 1. A CVS symbol might be used as a tag in one file and as a branch # in another file. cvs2svn has to decide whether to convert such a # symbol as a tag or as a branch. cvs2svn uses a series of # heuristic rules to decide how to convert a symbol. The user can # override the default rules for specific symbols or symbols # matching regular expressions. # # 2. cvs2svn is also capable of excluding symbols from the conversion # (provided no other symbols depend on them. # # 3. CVS does not record unambiguously the line of development from # which a symbol sprouted. cvs2svn uses a heuristic to choose a # symbol's "preferred parents". # # The standard branch/tag/exclude StrategyRules do not change a symbol # that has already been processed by an earlier rule, so in effect the # first matching rule is the one that is used. global_symbol_strategy_rules = [ # It is possible to specify manually exactly how symbols should be # converted and what line of development should be used as the # preferred parent. To do so, create a file containing the symbol # hints and enable the following option. # # The format of the hints file is described in the documentation # for the --symbol-hints command-line option. The file output by # the --write-symbol-info (i.e., ctx.symbol_info_filename) option # is in the same format. The simplest way to use this option is # to run the conversion through CollateSymbolsPass with # --write-symbol-info option, copy the symbol info and edit it to # create a hints file, then re-start the conversion at # CollateSymbolsPass with this option enabled. #SymbolHintsFileRule('symbol-hints.txt'), # To force all symbols matching a regular expression to be # converted as branches, add rules like the following: #ForceBranchRegexpStrategyRule(r'branch.*'), # To force all symbols matching a regular expression to be # converted as tags, add rules like the following: #ForceTagRegexpStrategyRule(r'tag.*'), # To force all symbols matching a regular expression to be # excluded from the conversion, add rules like the following: #ExcludeRegexpStrategyRule(r'unknown-.*'), # Sometimes people use "cvs import" to get their own source code # into CVS. This practice creates a vendor branch 1.1.1 and # imports the code onto the vendor branch as 1.1.1.1, then copies # the same content to the trunk as version 1.1. Normally, such # vendor branches are useless and they complicate the SVN history # unnecessarily. The following rule excludes any branches that # only existed as a vendor branch with a single import (leaving # only the 1.1 revision). If you want to retain such branches, # comment out the following line. (Please note that this rule # does not exclude vendor *tags*, as they are not so easy to # identify.) ExcludeTrivialImportBranchRule(), # To exclude all vendor branches (branches that had "cvs import"s # on them bug no other kinds of commits), uncomment the following # line: #ExcludeVendorBranchRule(), # Usually you want this rule, to convert unambiguous symbols # (symbols that were only ever used as tags or only ever used as # branches in CVS) the same way they were used in CVS: UnambiguousUsageRule(), # If there was ever a commit on a symbol, then it cannot be # converted as a tag. This rule causes all such symbols to be # converted as branches. If you would like to resolve such # ambiguities manually, comment out the following line: BranchIfCommitsRule(), # Last in the list can be a catch-all rule that is used for # symbols that were not matched by any of the more specific rules # above. (Assuming that BranchIfCommitsRule() was included above, # then the symbols that are still indeterminate at this point can # sensibly be converted as branches or tags.) Include at most one # of these lines. If none of these catch-all rules are included, # then the presence of any ambiguous symbols (that haven't been # disambiguated above) is an error: # Convert ambiguous symbols based on whether they were used more # often as branches or as tags: HeuristicStrategyRule(), # Convert all ambiguous symbols as branches: #AllBranchRule(), # Convert all ambiguous symbols as tags: #AllTagRule(), # The last rule is here to choose the preferred parent of branches # and tags, that is, the line of development from which the symbol # sprouts. HeuristicPreferredParentRule(), ] # Specify a username to be used for commits for which CVS doesn't # record the original author (for example, the creation of a branch). # This should be a simple (unix-style) username, but it can be # translated into a git-style name by the author_transforms map. ctx.username = 'cvs2git' # ctx.svn_property_setters contains a list of rules used to set the # svn properties on files in the converted archive. For each file, # the rules are tried one by one. Any rule can add or suppress one or # more svn properties. Typically the rules will not overwrite # properties set by a previous rule (though they are free to do so). # # Obviously, SVN properties per se are not interesting for a cvs2git # conversion, but some of these properties have side-effects that do # affect the git output. FIXME: Document this in more detail. ctx.svn_property_setters.extend([ # To read auto-props rules from a file, uncomment the following line # and specify a filename. The boolean argument specifies whether # case should be ignored when matching filenames to the filename # patterns found in the auto-props file: #AutoPropsPropertySetter( # r'/home/username/.subversion/config', # ignore_case=True, # ), # To read mime types from a file, uncomment the following line and # specify a filename: #MimeMapper(r'/etc/mime.types'), # Omit the svn:eol-style property from any files that are listed # as binary (i.e., mode '-kb') in CVS: CVSBinaryFileEOLStyleSetter(), # If the file is binary and its svn:mime-type property is not yet # set, set svn:mime-type to 'application/octet-stream'. CVSBinaryFileDefaultMimeTypeSetter(), # To try to determine the eol-style from the mime type, uncomment # the following line: #EOLStyleFromMimeTypeSetter(), # Choose one of the following lines to set the default # svn:eol-style if none of the above rules applied. The argument # is the svn:eol-style that should be applied, or None if no # svn:eol-style should be set (i.e., the file should be treated as # binary). # # The default is to treat all files as binary unless one of the # previous rules has determined otherwise, because this is the # safest approach. However, if you have been diligent about # marking binary files with -kb in CVS and/or you have used the # above rules to definitely mark binary files as binary, then you # might prefer to use 'native' as the default, as it is usually # the most convenient setting for text files. Other possible # options: 'CRLF', 'CR', 'LF'. DefaultEOLStyleSetter(None), #DefaultEOLStyleSetter('native'), # Prevent svn:keywords from being set on files that have # svn:eol-style unset. SVNBinaryFileKeywordsPropertySetter(), # If svn:keywords has not been set yet, set it based on the file's # CVS mode: KeywordsPropertySetter(config.SVN_KEYWORDS_VALUE), # Set the svn:executable flag on any files that are marked in CVS as # being executable: ExecutablePropertySetter(), ]) # The directory to use for temporary files: ctx.tmpdir = r'cvs2svn-tmp' # To skip the cleanup of temporary files, uncomment the following # option: #ctx.skip_cleanup = True # In CVS, it is perfectly possible to make a single commit that # affects more than one project or more than one branch of a single # project. Subversion also allows such commits. Therefore, by # default, when cvs2svn sees what looks like a cross-project or # cross-branch CVS commit, it converts it into a # cross-project/cross-branch Subversion commit. # # However, other tools and SCMs have trouble representing # cross-project or cross-branch commits. (For example, Trac's Revtree # plugin, http://www.trac-hacks.org/wiki/RevtreePlugin is confused by # such commits.) Therefore, we provide the following two options to # allow cross-project/cross-branch commits to be suppressed. # cvs2git only supports single-project conversions (multiple-project # conversions wouldn't really make sense for git anyway). So this # option must be set to False: ctx.cross_project_commits = False # git itself doesn't allow commits that affect more than one branch, # so this option must be set to False: ctx.cross_branch_commits = False # cvs2git does not yet handle translating .cvsignore files into # .gitignore files, so by default, the .cvsignore files are included # in the conversion output. If you would like to omit the .cvsignore # files from the output, set this option to False: ctx.keep_cvsignore = True # By default, it is a fatal error for a CVS ",v" file to appear both # inside and outside of an "Attic" subdirectory (this should never # happen, but frequently occurs due to botched repository # administration). If you would like to retain both versions of such # files, change the following option to True, and the attic version of # the file will be written to a subdirectory called "Attic" in the # output repository: ctx.retain_conflicting_attic_files = False # CVS uses unix login names as author names whereas git requires # author names to be of the form "foo ". The default is to set # the git author to "cvsauthor ". author_transforms can be # used to map cvsauthor names (e.g., "jrandom") to a true name and # email address (e.g., "J. Random " for the # example shown). All values should be either Unicode strings (i.e., # with "u" as a prefix) or 8-bit strings in the utf-8 encoding. # Please substitute your own project's usernames here to use with the # author_transforms option of GitOutputOption below. author_transforms={ 'abraham' : ('Per Abrahamsen', 'address@hidden'), 'angeli' : ('Ralf Angeli', 'address@hidden'), 'arne' : (u'Arne Jørgensen', 'address@hidden'), 'ataka' : ('Masayuki Ataka', 'address@hidden'), 'berenddeboer' : ('Berend de Boer', 'address@hidden'), 'dak' : ('David Kastrup', 'address@hidden'), 'dash' : ('Dag Asheim', 'address@hidden'), 'frasson' : ('Miguel V. S. Frasson', 'address@hidden'), 'hsparr' : ('Holger Sparr', 'address@hidden'), 'jalar' : (u'Jan-Åke Larsson', 'address@hidden'), 'levana' : ('Patrick Gundlach', 'address@hidden'), 'philkime' : ('Philip Kime', 'address@hidden'), 'psg' : ('Peter Galbraith', 'address@hidden'), 'pvoostrum' : ('Piet van Oostrum', 'address@hidden'), 'rsteib' : ('Reiner Steib', 'address@hidden'), 'salve' : ('Davide Giovanni Maria Salvetti', 'address@hidden'), 'sperber' : ('Mike Sperber', 'address@hidden'), 'tsdh' : ('Tassilo Horn', 'address@hidden'), 'alanshutko': ('Alan Shutko', 'address@hidden'), 'amanda': ('Per Abrahamsen', 'address@hidden'), 'dakas': ('David Kastrup', 'address@hidden'), 'dominik': ('Carsten Dominik', 'address@hidden'), 'krab': ('Kresten Krab Thorup', 'address@hidden'), 'nixsf': ('Nicholas Alcock', 'address@hidden'), # 'arafune': ('', ''), # 'kurt': ('', ''), # 'nabe': ('', ''), # This one will be used for commits for which CVS doesn't record # the original author, as explained above. 'cvs2git' : ('cvs2git converter', 'address@hidden'), } # This is the main option that causes cvs2svn to output to a # "fastimport"-format dumpfile rather than to Subversion: ctx.output_option = GitOutputOption( # The file in which to write the git-fast-import stream that # contains the changesets and branch/tag information: 'cvs2svn-tmp/git-dump.dat', # The blobs will be written via the revision recorder, so in # OutputPass we only have to emit references to the blob marks: GitRevisionMarkWriter(), # This option can be set to an integer to limit the number of # revisions that are merged with the main parent in any commit. # For git output, this can be set to None (unlimited), though due # to the limitations of other tools you might want to set it to a # smaller number (e.g., 16). For Mercurial output, this should be # set to 1. max_merges=None, #max_merges=1, # Optional map from CVS author names to git author names: author_transforms=author_transforms, ) # Change this option to True to turn on profiling of cvs2svn (for # debugging purposes): run_options.profiling = False # Should CVSItem -> Changeset database files be memory mapped? In # some tests, using memory mapping speeded up the overall conversion # by about 5%. But this option can cause the conversion to fail with # an out of memory error if the conversion computer runs out of # virtual address space (e.g., when running a very large conversion on # a 32-bit operating system). Therefore it is disabled by default. # Uncomment the following line to allow these database files to be # memory mapped. #changeset_database.use_mmap_for_cvs_item_to_changeset_table = True # Now set the project to be converted to git. cvs2git only supports # single-project conversions, so this method must only be called # once: run_options.set_project( # The filesystem path to the part of the CVS repository (*not* a # CVS working copy) that should be converted. This may be a # subdirectory (i.e., a module) within a larger CVS repository. 'auctex/auctex', # A list of symbol transformations that can be used to rename # symbols in this project. symbol_transforms=[ # Use IgnoreSymbolTransforms like the following to completely # ignore symbols matching a regular expression when parsing # the CVS repository, for example to avoid warnings about # branches with two names and to choose the preferred name. # It is *not* recommended to use this instead of # ExcludeRegexpStrategyRule; though more efficient, # IgnoreSymbolTransforms are less flexible and don't exclude # branches correctly. The argument is a Python-style regular # expression that has to match the *whole* CVS symbol name: #IgnoreSymbolTransform(r'nightly-build-tag-.*') # RegexpSymbolTransforms transform symbols textually using a # regular expression. The first argument is a Python regular # expression pattern and the second is a replacement pattern. # The pattern is matched against each symbol name. If it # matches the whole symbol name, then the symbol name is # replaced with the corresponding replacement text. The # replacement can include substitution patterns (e.g., r'\1' # or r'\g'). Typically you will want to use raw strings # (strings with a preceding 'r', like shown in the examples) # for the regexp and its replacement to avoid backslash # substitution within those strings. #RegexpSymbolTransform(r'release-(\d+)_(\d+)', # r'release-\1.\2'), #RegexpSymbolTransform(r'release-(\d+)_(\d+)_(\d+)', # r'release-\1.\2.\3'), # Simple 1:1 character replacements can also be done. The # following transform, which converts backslashes into forward # slashes, should usually be included: ReplaceSubstringsSymbolTransform('\\','/'), # This last rule eliminates leading, trailing, and repeated # slashes within the output symbol names: NormalizePathsSymbolTransform() #PickyIgnoreSymbolTransform('auc-tex.el', 'release_5_2', '5.3') ], # See the definition of global_symbol_strategy_rules above for a # description of this option: symbol_strategy_rules=global_symbol_strategy_rules, )