[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[be] [ bibledit-Feature Requests-1764744 ] integrate checks from Umhloli
From: |
SourceForge.net |
Subject: |
[be] [ bibledit-Feature Requests-1764744 ] integrate checks from Umhloli |
Date: |
Thu, 17 Apr 2008 09:45:09 -0700 |
Feature Requests item #1764744, was opened at 2007-07-31 17:40
Message generated for change (Settings changed) made by teus
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=658624&aid=1764744&group_id=111189
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
>Status: Pending
Resolution: None
Priority: 5
Private: No
Submitted By: Teus Benschop (teus)
Assigned to: Nobody/Anonymous (nobody)
Summary: integrate checks from Umhloli
Initial Comment:
Some checks from Umhloli.
procedure CheckAllowedCharacters (USFM : TUSFM; Curr : char);
procedure CheckSentenceStructure (USFM : TUSFM; Curr : char; TidyUp : boolean =
false);
procedure CheckBracketStructure (USFM : TUSFM; Curr : char; TidyUp : boolean =
false);
procedure CheckAdjacentCharacters (USFM : TUSFM; Curr : char);
procedure InitializeQuotationMarks;
procedure CheckQuotationMarks (USFM : TUSFM; Curr : char; TidyUp : boolean =
false);
procedure CheckForAndRemoveTabsAndDoubleSpaces (Name : string);
procedure InitializeCharacterSets;
procedure CheckCommaSemicolonColon (USFM : TUSFM; Curr : char);
procedure InitializeCommaSemicolonColon;
procedure InitializeSentenceStructure (InitIt : boolean);
procedure InitializeUnwantedWords;
procedure CheckUnwantedWords (USFM : TUSFM; Curr : char; TidyUp : boolean =
false);
procedure CheckLeadingSpaces (USFM : TUSFM; Curr : char);
const
NumberChars : set of char = ['0' .. '9'];
var
SentenceEnders : set of char;
SentenceIndifference : set of char;
SmallLetters : set of char;
Capitals : set of char;
NonAdjacentChars : set of char;
RightOfQuoteChars : set of char;
AllowedCharacters : set of char;
SentenceCutterFound : boolean;
FilterOutChars : set of char;
PreviousSFMHeading : boolean;
PreviousSFMHeadingNumber : integer;
HeadingText : string;
SpaceFound : boolean;
implementation
uses
MyFeedback, Utilities, MyRegistry, UnitMain, SysUtils,
Classes, NamesList, UnwantedWordsUnit, TypesConstants, ScripturesUnit;
type
BF = record
Opener : char;
Closer : char;
Place : string;
end;
BC = record
Beginning : char;
Ending : char;
end;
const
SentenceStarters = ['A'..'Z'];
MaxBracketLevel = 5;
var
SentenceHasStarted : boolean;
BracketFound : array [1..MaxBracketLevel] of BF;
BracketLevel : integer;
BracketChars : array [1..10] of BC;
NumberOfBracketsToCheck : integer;
OpeningBracketWasThere : boolean;
QuotationMarksFound : integer;
CommaSemicolonColonFound : boolean;
NamesStringList : TStringList;
CapitalsBuildingUp : boolean;
CapitalizedWord : string;
SentenceEndedAtPreviousCharacter : boolean;
UnwantedWordsList, UnwantedTicksList, UnwantedCommentsList : TStringList;
UnwantedSearchPhrase, UnwantedSearchWord : string;
procedure InitializeSentenceStructure (InitIt : boolean);
var
Scripture : TScripture;
begin
if InitIt then
begin
SentenceHasStarted := false;
CapitalsBuildingUp := false;
CapitalizedWord := '';
NamesStringList := TStringList.Create;
NamesStringList.Sorted := true;
NamesStringList.CaseSensitive := true;
Scripture := TScripture.Create(nil, -1);
if FileExists (Scripture.UmhloliDataPath + ApprovedNamesTxt) then
NamesStringList.LoadFromFile(Scripture.UmhloliDataPath +
ApprovedNamesTxt);
Scripture.Free;
PreviousSFMHeading := false;
PreviousSFMHeadingNumber := 0;
SentenceEndedAtPreviousCharacter := false;
end
else
begin
NamesStringList.Free;
end;
end;
procedure CheckSentenceStructure (USFM : TUSFM; Curr : char; TidyUp : boolean =
false);
var
i : integer;
begin
// No checking of sentence structure when an indifferent character is found.
if not (Curr in SentenceIndifference) then
begin
if (Curr in SentenceEnders) and (not SentenceHasStarted) then
begin
AddResultMessage (USFM, USFM.PreviousChars + ' - A character ' + Curr + '
ends the sentence which hasn''t been started yet.');
end;
if (not (Curr in Capitals)) and (Curr <> ' ') and (not SentenceHasStarted)
then
begin
AddResultMessage (USFM, USFM.PreviousChars + ' - A sentence should not
start with ' + Curr + ' but with a capital.');
SentenceHasStarted := true;
end;
// Solve problems like characters following immediately after a
// sentence ender.
if SentenceEndedAtPreviousCharacter then
begin
if Curr <> ' ' then
AddResultMessage (USFM, USFM.PreviousChars + ' - Invalid character (' +
Curr + ') found straight after a sentence closer.');
SentenceEndedAtPreviousCharacter := false;
end;
// No spaces allowed before a sentence ender.
if Curr in SentenceEnders then
begin
if USFM.PreviousCharacter (1) = ' ' then
AddResultMessage (USFM, USFM.PreviousChars + ' - No space allowed
before a character ending a sentence.');
end;
if USFM.HeadingNow then
if Curr <> ' ' then HeadingText := HeadingText + Curr;
if (USFM.HeadingNow <> PreviousSFMHeading)
or (USFM.HeadingNumber <> PreviousSFMHeadingNumber) then
begin
if USFM.HeadingNow then
// Heading starts here.
begin
if SentenceHasStarted then
AddResultMessage (USFM, USFM.PreviousChars + ' - A heading started
whereas the running sentence has not yet been finished.');
end
else
// Heading ends here.
begin
if HeadingText <> '' then
if not SentenceHasStarted then
AddResultMessage (USFM, USFM.PreviousChars + ' - Heading should
start with a capital, and not end with a full stop.');
SentenceHasStarted := false;
end;
PreviousSFMHeading := USFM.HeadingNow;
PreviousSFMHeadingNumber := USFM.HeadingNumber;
end;
if not USFM.HeadingNow then HeadingText := '';
if USFM.NewParagraphStarted then
begin
if SentenceHasStarted then
AddResultMessage (USFM, USFM.PreviousChars + ' - A new paragraph
started whereas the running sentence has not yet been finished.');
USFM.NewParagraphStarted := false;
end;
end;
// Capitals are only allowed in ALL CAPS words and in names.
if (Curr in Capitals) and SentenceHasStarted then CapitalsBuildingUp := true;
if CapitalsBuildingUp then
begin
if ((Curr in Capitals) or (Curr in SmallLetters) or (Curr in
FilterOutChars))
and (Curr <> '-') then
CapitalizedWord := CapitalizedWord + Curr
else
begin
// Word with capital(s) found.
CapitalsBuildingUp := false;
// Skip ALL CAPS words.
if UpperCase (CapitalizedWord) <> CapitalizedWord then
begin
if CapitalizedWord [Length (CapitalizedWord)] in FilterOutChars then
Delete (CapitalizedWord, Length (CapitalizedWord), 1);
if not NamesStringList.Find (CapitalizedWord, i) then
begin
AddResultMessage (USFM, USFM.PreviousChars + ' - A capitalized word
(' + CapitalizedWord + ') was found within a sentence.');
AddResultMessage (USFM, 'Hint: If this capitalized word is a name,
add it to the names list.');
end;
end;
CapitalizedWord := '';
end;
end;
if not (Curr in SentenceIndifference) then
begin
if Curr in SentenceEnders then
begin
SentenceHasStarted := false;
SentenceEndedAtPreviousCharacter := true;
end;
if (Curr in SentenceStarters) and (not USFM.TitleNow) then
SentenceHasStarted := true;
end;
// End-of-book check.
if TidyUp and SentenceHasStarted then
AddResultMessage (USFM, USFM.PreviousChars + ' - The sentence has not been
ended properly at the end of the book.');
end;
procedure CheckBracketStructure (USFM : TUSFM; Curr : char; TidyUp : boolean =
false);
var
C : integer;
begin
If OpeningBracketWasThere then
begin
OpeningBracketWasThere := false;
if Curr = ' ' then
begin
AddResultMessage (USFM, USFM.PreviousChars + ' - A space has been found
right after an opening bracket.');
end;
end;
For C := 1 to NumberOfBracketsToCheck do
begin
if Curr = BracketChars [C].Beginning then
begin
OpeningBracketWasThere := true;
// Next line comes in useful when lots of wild brackets are in texts
// being checked which are not yet proofread.
if BracketLevel < 0 then BracketLevel := 0;
Inc (BracketLevel);
if (BracketLevel <= MaxBracketLevel) and (BracketLevel > 0) then
begin
BracketFound [BracketLevel].Opener := Curr;
BracketFound [BracketLevel].Closer := BracketChars [C].Ending;
BracketFound [BracketLevel].Place := USFM.CurrentPlace +
USFM.PreviousChars;
end
else
begin
AddResultMessage (USFM, USFM.PreviousChars + ' - Too many opening
brackets nested: ' + Curr);
end;
end;
end;
For C := 1 to NumberOfBracketsToCheck do
begin
if Curr = BracketChars [C].Ending then
begin
if USFM.PreviousCharacter (1) = ' ' then
begin
AddResultMessage (USFM, USFM.PreviousChars + ' - A space has been found
right before a closing bracket.');
end;
if (BracketLevel <= MaxBracketLevel) and (BracketLevel > 0) then
begin
if Curr = BracketFound [BracketLevel].Closer then
begin
end
else
begin
AddResultMessage (USFM, USFM.PreviousChars + ' - Found a closing
bracket ' + Curr + ' which does not match its opener...');
AddResultMessage (USFM, BracketFound [BracketLevel].Place + ' - ...
and here is the mismatched opener talked about in the previous message.',
false);
end;
end
else
begin
AddResultMessage (USFM, USFM.PreviousChars + ' - A closing bracket
found without matching opener: ' + Curr);
end;
Dec (BracketLevel);
// Next line makes sure that wild numbers of brackets don't disturb the
// checking too much.
if BracketLevel < 0 then BracketLevel := 0;
end;
end;
if TidyUp then
begin
if BracketLevel > MaxBracketLevel then BracketLevel := MaxBracketLevel;
For C := 1 to BracketLevel do
begin
AddResultMessage (USFM, BracketFound [C].Place + ' - An opening bracket
without matching closer has been found: ' + BracketFound [C].Opener, false);
end;
BracketLevel := 0;
end;
end;
procedure CheckAdjacentCharacters (USFM : TUSFM; Curr : char);
begin
if Curr in NonAdjacentChars then
if USFM.PreviousCharacter (1) in NonAdjacentChars then
AddResultMessage (USFM, USFM.PreviousChars + ' - Two characters found
which are not supposed to be adjacent to each other: ' + USFM.PreviousCharacter
(1) + ' and ' + Curr);
end;
procedure InitializeQuotationMarks;
begin
QuotationMarksFound := 0;
end;
procedure CheckQuotationMarks (USFM : TUSFM; Curr : char; TidyUp : boolean =
false);
begin
if Curr = '"' then
begin
Inc (QuotationMarksFound);
if (QuotationMarksFound mod 2) = 1 then
begin
if USFM.PreviousCharacter (1) <> ' ' then
begin
AddResultMessage (USFM, USFM.PreviousChars + ' - A space is supposed to
be found left to an opening quotation mark, but another character was found: '
+ USFM.PreviousCharacter (1));
end;
end;
end;
if (not (Curr in RightOfQuoteChars))
and (Curr <> ' ')
and ((QuotationMarksFound mod 2) = 0)
and (USFM.PreviousCharacter (1) = '"') then
AddResultMessage (USFM, USFM.PreviousChars + ' - An invalid character is
found next to a closing quotation mark: ' + Curr);
if TidyUp then
begin
if (QuotationMarksFound mod 2) <> 0 then
begin
AddResultMessage (USFM, 'Having reached the end of the book, it appears
that one or more quotation marks have not been closed.');
end;
end;
end;
procedure CheckCommaSemicolonColon (USFM : TUSFM; Curr : char);
{ After a comma, semicolon or colon, these rules apply:
- there should be one of the following characters:
1. a closing bracket
2. a space
3. a quotation mark
- if not, a message is written.
}
var
TheRightCharFound : boolean;
i : integer;
begin
if SentenceCutterFound then
begin
TheRightCharFound := false;
for i := 1 to NumberOfBracketsToCheck do
if Curr = BracketChars [i].Ending then TheRightCharFound := true;
if Curr = ' ' then TheRightCharFound := true;
if Curr = '"' then TheRightCharFound := true;
if not TheRightCharFound then
AddResultMessage (USFM, USFM.PreviousChars + ' - A sentence divider is
followed by an invalid character: ' + Curr);
SentenceCutterFound := false;
end;
if Curr in [',', ';', ':'] then
begin
SentenceCutterFound := true;
end;
end;
procedure CheckUnwantedWords (USFM : TUSFM; Curr : char; TidyUp : boolean =
false);
// There are two types of checks. 1. For words. 2. For phrases.
var
i, i2 : integer;
S : string;
begin
// Check for complete words.
if (Curr in Capitals) or (Curr in SmallLetters) or (Curr in FilterOutChars)
then
UnwantedSearchWord := UnwantedSearchWord + Curr
else
begin
if UnwantedSearchWord <> '' then
begin
i := UnwantedWordsList.IndexOf (LowerCase (UnwantedSearchWord));
if i >= 0 then
begin
if UnwantedTicksList.Strings [i] = UnwantedWordSearch then
AddResultMessage (USFM, USFM.PreviousChars + ' - An unwanted word ('
+ UnwantedSearchWord + ') has been found. Comment: ' +
UnwantedCommentsList.Strings [i] + '.');
end;
UnwantedSearchWord := '';
end;
end;
// Fill the search string and the reference array.
UnwantedSearchPhrase := UnwantedSearchPhrase + LowerCase (Curr);
Delete (UnwantedSearchPhrase, 1, 1);
UnwantedRefs [UnwantedRefsPointer] := USFM.CurrentPlace;
UnwantedPrevs [UnwantedRefsPointer] := USFM.PreviousChars;
Inc (UnwantedRefsPointer);
// If the limit has been reached, check for search phrases, if they exist.
if (UnwantedRefsPointer > UnwantedRefsSize) or TidyUp then
begin
for i := 0 to UnwantedWordsList.Count - 1 do
begin
if UnwantedTicksList.Strings [i] = UnwantedPhraseSearch then
begin
if Pos (LowerCase (UnwantedWordsList [i]), UnwantedSearchPhrase) > 0
then
begin
for i2 := 1 to UnwantedRefsPointer - 1 do
begin
S := Copy (UnwantedSearchPhrase, UnwantedPhraseMaxLength -
UnwantedRefsPointer + 1 + i2 - Length (UnwantedWordsList [i]) + 1, Length
(UnwantedWordsList [i]));
if S = UnwantedWordsList [i] then
begin
AddResultMessage (USFM, UnwantedRefs [i2] + ' ' + UnwantedPrevs
[i2] + ' - An unwanted phrase (' + S + ') has been found. Comment: ' +
UnwantedCommentsList.Strings [i] + '.', false);
end;
end;
end;
end;
end;
UnwantedRefsPointer := 1;
end;
// Initialize system for next search.
if TidyUp then InitializeUnwantedWords;
end;
procedure InitializeCommaSemicolonColon;
begin
CommaSemicolonColonFound := false;
end;
procedure CheckForAndRemoveTabsAndDoubleSpaces (Name : string);
var
F1, F2 : Textfile;
S : String;
i : integer;
Trimmed, DoubleSpaceFound, TabCharFound : boolean;
begin
Trimmed := false;
DoubleSpaceFound := false;
TabCharFound := false;
AssignFile(F1, Name);
Reset(F1);
while not Eof(F1) do
begin
Readln (F1, S);
if Trim (S) <> S then Trimmed := true;
if Pos (' ', S) > 0 then DoubleSpaceFound := true;
if Pos (TabChar, S) > 0 then TabCharFound := true;
end;
CloseFile(F1);
if Trimmed then
AddFeedbackMessage ('Unnecessary characters at the beginning or the end of
the line have been removed from ' + Name);
if DoublespaceFound then
AddFeedbackMessage ('Double spaces have been removed from ' + Name);
if TabCharFound then
AddFeedbackMessage ('Tabs have been removed from ' + Name);
if Trimmed or DoublespaceFound or TabCharFound then
begin
AssignFile(F1, Name);
Reset(F1);
AssignFile (F2, ProgramPath + 'tabspace.tmp');
Rewrite (F2);
while not Eof(F1) do
begin
Readln (F1, S);
S := Trim (S);
repeat
i := Pos (TabChar, S);
if i > 0 then
begin
Delete (S, i, 1);
Insert (' ', S, i);
end;
until i = 0;
repeat
i := Pos (' ', S);
if i > 0 then Delete (S, i, 1);
until i = 0;
WriteLn (F2, S);
end;
CloseFile(F1);
CloseFile (F2);
DeleteFile (Name);
RenameFile (ProgramPath + 'tabspace.tmp', Name);
end;
end;
procedure InitializeCharacterSets;
var
F1 : Textfile;
S : String;
i : integer;
begin
SentenceEnders := StringToSet (RegistryReadKey (SentenceEndersKey));
SentenceIndifference := StringToSet (RegistryReadKey
(SentenceIndifferenceKey));
SmallLetters := StringToSet (RegistryReadKey (SmallLettersKey));
Capitals := StringToSet (RegistryReadKey (CapitalsKey));
NonAdjacentChars := StringToSet (RegistryReadKey (NotAdjacentKey));
RightOfQuoteChars := StringtoSet (RegistryReadKey (RightQuotesKey));
FilterOutChars := StringToSet (RegistryReadKey (CharsFilteredOutKey));
AllowedCharacters := [];
for i := 1 to NumberOfBracketsToCheck do
begin
AllowedCharacters := AllowedCharacters
+ [BracketChars [i].Beginning] + [BracketChars [i].Ending];
end;
AllowedCharacters := AllowedCharacters + ['"'];
AllowedCharacters := AllowedCharacters + SentenceEnders
+ SentenceIndifference + SmallLetters + Capitals
+ NonAdjacentChars + RightOfQuoteChars + ['\'] + [' ']
+ StringToSet (RegistryReadKey (ExtraCharsInTextKey));
SentenceCutterFound := false;
// Initialize brackets.
BracketLevel := 0;
NumberOfBracketsToCheck := 0;
OpeningBracketWasThere := false;
AssignFile(F1, ProgramPath + 'brackets.txt');
Reset(F1);
while not Eof(F1) do
begin
Readln (F1, S);
if Length (S) = 2 then
begin
Inc (NumberOfBracketsToCheck);
BracketChars [NumberOfBracketsToCheck].Beginning := S[1];
BracketChars [NumberOfBracketsToCheck].Ending := S[2];
end
else AddFeedbackMessage ('While reading the brackets to check for, an
invalid set of brackets has been found: ' + S);
end;
CloseFile(F1);
SpaceFound := false;
end;
procedure InitializeUnwantedWords;
var
TList : TStringList;
i, i2 : integer;
S, S2 : string;
Scripture : TScripture;
begin
UnwantedSearchWord := '';
UnwantedSearchPhrase := StringOfChar(' ', UnwantedPhraseMaxLength);
TList := TStringList.Create;
Scripture := TScripture.Create(Nil, -1);
if FileExists (Scripture.UmhloliDataPath + UnwantedWordsTxt) then
TList.LoadFromFile (Scripture.UmhloliDataPath + UnwantedWordsTxt);
Scripture.Free;
UnwantedWordsList.Clear;
UnwantedTicksList.Clear;
UnwantedCommentsList.Clear;
for i := 0 to TList.Count - 1 do
begin
S := TList.Strings [i];
i2 := Pos (' (', S);
S2 := Copy (S, 1, i2 - 1);
UnwantedWordsList.Add (S2);
S := Copy (S, i2, 1000);
i2 := Pos (') (', S);
S2 := Copy (S, 3, i2 - 3);
UnwantedTicksList.Add (S2);
S2 := Copy (S, i2 + 3, 1000);
Delete (S2, Length (S2), 1);
UnwantedCommentsList.Add (S2);
end;
TList.Free;
for i := 1 to UnwantedRefsSize do
begin
UnwantedRefs [i] := '';
UnwantedPrevs [i] := '';
end;
UnwantedRefsPointer := 1;
end;
procedure CheckLeadingSpaces (USFM : TUSFM; Curr : char);
{ Certain punctuation cannot have a leading space. E.g. the following sentence
has a problem:
And he said to her : How do you do?
There is a space before a colon.
This procedure checks this type of mistakes.
}
begin
if SpaceFound then
begin
if Curr in [':', ',', ';', '?', '!'] then
AddResultMessage (USFM, USFM.PreviousChars + ' - A space precedes this
punctuation character: ' + Curr);
SpaceFound := false;
end;
if Curr = ' ' then
begin
SpaceFound := true;
end;
end;
initialization
UnwantedWordsList := TStringList.Create;
UnwantedTicksList := TStringList.Create;
UnwantedCommentsList := TStringList.Create;
finalization
UnwantedWordsList.Free;
UnwantedTicksList.Free;
UnwantedCommentsList.Free;
end.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=658624&aid=1764744&group_id=111189
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [be] [ bibledit-Feature Requests-1764744 ] integrate checks from Umhloli,
SourceForge.net <=