Download text file
#if 0
---------------------------- PowerBASIC/cc v2.0
---| DASoft |------------------------------------------
---------------------------- Code DATE: 2000-04-08
| FILE NAME SndEXkey.bas | by
---------------------------- Don Schullian, Jr.
This code is released into the Public Domain
----------------------------------------------------------
No guarantee as to the viability, accuracy, or safety of
use of this code is implied, warranted, or guaranteed
----------------------------------------------------------
Use at your own risk!
----------------------------------------------------------
CONTACT AUTHOR AT d83@DASoftVSS.com
-------------------------------------------------------------------------
PURPOSE: Create & maintain a SoundEX key file
-------------------------------------------------------------------------
The theory behind all this is that the user requires to search on a
given data set for one or more keywords that are NOT included in any of
the other indexed fields of the record. These keywords are stored and
sorted in a separate file as SoundEX values along with the parent record
number.
There may be more than one record associated with any one keyword but in
the event that two or more keywords create identical SoundEX values then
only one of the values will be stored as multiple equal values linked to
the same record would be superfluous.
Also, if/when the user inputs duplicate keywords (humanly speaking)
these will be removed from the string to allow more room for other key
words and to remove confusion. (Users are generally confused enough
without further assistance;) Conversely, in the event that two or more of
the unique key words create identical SoundEX values they will remain in
the user inputted string to, once again, stave off confusion.
eg: USER INPUT "RED, BLUE,, GREEN, GREEN, GRIN"
RETURNed "BLUE,GREEN,GRIN,RED"
Three keys will be created for "BLUE", "GREEN", and "RED" as
"GRIN" and "GREEN" create equal SoundEX values
The series of routines, below, ass/u/me that the comma (,) is used to
parse the individual key words in the string. If you wish to use something
other than that character then you'll have to insert that value into all
instances of PARSE$ and PARSECOUNT.
The file buffer is currently set to 512,000 bytes. If more or less is
required the instance of the value can be changed in KeyFileMove.
Also, please note that the incoming OldKeys string is assumed to have
already been formatted!
There is test code at the bottom of the file.
'-------------------------------------------------------------------------------
'----- -----
'-------------------------------------------------------------------------------
fKeyFileOpen (BYVAL FileSpec AS STRING,BYVAL FileNo AS LONG) AS LONG
FileSpec = drive:\path\filename.ext of file to open
FileNo = if > 0 then this number is used else FREEFILE will be called
RETURNS: ZERO if file was opened without error ELSE ERR
KeyFileClose ()
Closes file, releases g_KeyFile value
KeyFileInsert (BYVAL TheWord AS STRING, BYVAL RecNo AS LONG)
KeyFileDelete (BYVAL TheWord AS STRING, BYVAL RecNo AS LONG)
TheWord = the word to be processed
RecNo = the associated record number
NOTE: These two SUBs allow the programmer access to the data base on
the level of the individual item.
KeyFileUpdate (OldKeys AS STRING, NewKeys AS STRING, RecNo AS LONG)
OldKeys = Previously formatted string of key words
RecNo = associated record number for these keys
NewKeys INCOMING: unformatted user input
RETURNING: formatted string
NOTE: This is the only required entry point for parent routines
fKeyFileSearch (BYVAL TheWord AS STRING, SEG RecNo() AS LONG) AS LONG
TheWord = The word/phrase to be searched for
RecNo() = RETURNING: a list of all record numbers that match the
SoundEX value for TheWord
RETURNS: The number of elements used in RecNo() (BASE 1)
#endif
'
'-------------------------------------------------------------------------------
'----- START OF CODE -----
'-------------------------------------------------------------------------------
'
TYPE KeyWordTYPE
RecNo AS LONG
SoundEX AS LONG
END TYPE
UNION KeyWordUNION
tKey AS KeyWordTYPE
Qkey AS QUAD
END UNION
GLOBAL g_KeyLast AS LONG
GLOBAL g_KeyFile AS LONG
DECLARE FUNCTION fKeyFileOpen (BYVAL FileSpec AS STRING,BYVAL FileNo AS LONG) AS LONG
DECLARE FUNCTION fKeyFileSearch (BYVAL TheWord AS STRING,SEG RecNo() AS LONG) AS LONG
DECLARE SUB KeyFileClose ()
DECLARE SUB KeyFileUpdate (BYVAL OldKeys AS STRING,SEG NewKeys AS STRING,BYVAL RecNo AS LONG)
DECLARE SUB KeyFileInsert (BYVAL TheWord AS STRING,BYVAL RecNo AS LONG)
DECLARE SUB KeyFileDelete (BYVAL TheWord AS STRING,BYVAL RecNo AS LONG)
'-------------------------------------------------------------------------------
'----------- PRIVATE ROUTINES --------------------------------------------------
'-------------------------------------------------------------------------------
DECLARE FUNCTION fKeyFileSoundEX (SEG TheWord AS STRING) AS LONG
DECLARE FUNCTION fKeys2Array (SEG TheKeys AS STRING,SEG Keys() AS LONG) AS LONG
DECLARE FUNCTION fKeyFileSeek (SEG uKey AS KeyWordUNION,BYVAL Unique AS LONG) AS LONG
DECLARE SUB KeyStringFormat (SEG TheKeys AS STRING )
DECLARE SUB KeyFileMove (BYVAL Offset AS LONG,BYVAL Bytes AS LONG)
'
'-------------------------------------------------------------------------------
'
FUNCTION fKeyFileOpen ( BYVAL FileSpec AS STRING, _
BYVAL FileNo AS LONG ) AS LONG
ON ERROR GOTO Oops ' set local error trap
'
IF FileNo = 0 THEN FileNo = FREEFILE ' get next unused number
'
OPEN FileSpec FOR BINARY AS #FileNo BASE = 0 ' open file
g_KeyLast = LOF(FileNo) \ 8 ' compute nbr of keys
'
ExitFunction: ' Exit point
g_KeyFile = FileNo ' set global file number
EXIT FUNCTION ' We're outta here
'
Oops: ' ERROR TRAP
IF FILEATTR(FileNo,0) THEN CLOSE #FileNo ' close opened file
FileNo = 0 ' clear globals
g_KeyLast = 0 '
FUNCTION = ERR ' RETURN ERRor code
RESUME ExitFunction ' jmp to exit point
'
END FUNCTION
'
'-------------------------------------------------------------------------------
'
SUB KeyFileClose () EXPORT
IF FILEATTR(g_KeyFile,0) THEN CLOSE g_KeyFile ' if file is open
g_KeyFile = 0 '
g_KeyLast = 0 '
'
END SUB '
'
'-------------------------------------------------------------------------------
'
SUB KeyFileInsert( BYVAL TheWord AS STRING, _
BYVAL RecNo AS LONG ) EXPORT
DIM uKey AS LOCAL KeyWordUNION ' working record union
DIM Offset AS LOCAL LONG ' file offset
'
uKey.tKey.RecNo = RecNo ' set union values
uKey.tKey.SoundEX = fKeyFileSoundEX(UCASE$(TheWord)) ' create SoundEX value
Offset = fKeyFileSeek(uKey,-1) ' find unique key
IF Offset < 0 THEN ' if it doesn't exist
Offset = -Offset ' reverse sign on offset
KeyFileMove Offset, 8 ' open a hole for key
PUT g_KeyFile, Offset, uKey ' stuff the key
INCR g_KeyLast ' increase the last #
END IF '
'
END SUB
'
'-------------------------------------------------------------------------------
'
SUB KeyFileDelete ( BYVAL TheWord AS STRING, _
BYVAL RecNo AS LONG ) EXPORT
DIM uKey AS LOCAL KeyWordUNION ' working record union
DIM Offset AS LOCAL LONG ' file offset
'
IF g_KeyLast < 1 THEN EXIT SUB ' oops! noting in the file
'
uKey.tKey.RecNo = RecNo ' set union values
uKey.tKey.SoundEX = fKeyFileSoundEX(UCASE$(TheWord)) ' create SoundEX value
Offset = fKeyFileSeek(uKey,-1) ' find unique key
'
IF Offset < 0 THEN EXIT SUB ' not there! nothing to remove
'
KeyFileMove Offset, -8 ' close up the file
DECR g_KeyLast ' decrease the record count
SEEK g_KeyFile, (g_KeyLast * 8) ' set EOF of file
SETEOF g_KeyFile '
END SUB
'
'-------------------------------------------------------------------------------
'
SUB KeyFileUpdate ( BYVAL OldKeys AS STRING, _
SEG NewKeys AS STRING, _
BYVAL RecNo AS LONG ) EXPORT
DIM uKey AS KeyWordUNION ' record union
DIM N AS LOCAL LONG ' new keys temp
DIM Ncount AS LOCAL LONG ' # of individual new keys
DIM Nkeys(0) AS LOCAL LONG ' array for new keys
DIM O AS LOCAL LONG ' old keys temp
DIM Ocount AS LOCAL LONG ' # of old/existing keys
DIM Okeys(0) AS LOCAL LONG ' array for old keys
DIM Offset AS LOCAL LONG ' file offset
DIM OldLast AS LOCAL LONG ' current last key #
'
KeyStringFormat NewKeys ' format the new key string
IF OldKeys = NewKeys THEN EXIT SUB ' same old, same old
uKey.tKey.RecNo = RecNo ' only need this once
OldLast = g_KeyLast ' current last record
'
Ocount = fKeys2Array( OldKeys, Okeys() ) ' fill old keys array
Ncount = fKeys2Array( NewKeys, Nkeys() ) ' fill new keys array
'
FOR O = Ocount TO 1 STEP -1 ' once for each old key
IF Ncount > 0 THEN ' if there are any new keys
ARRAY SCAN Nkeys(1) FOR Ncount, = Okeys(O), TO N ' scan for match with old
IF N > 0 THEN ' if we've found a match
ARRAY DELETE Nkeys(N) ' delete from new list
DECR Ncount ' decrease new count
ITERATE ' loop it
END IF '
END IF '
IF g_KeyLast > 0 THEN ' this should be redundant!
uKey.tKey.SoundEX = Okeys(O) ' set union SoundEX value
Offset = fKeyFileSeek(uKey,-1) ' seek existing key offset
IF Offset => 0 THEN ' got it!
KeyFileMove Offset, -8 ' close up file
DECR g_KeyLast ' decrease last record #
END IF '
END IF '
NEXT '
'
WHILE Ncount > 0 ' if any new keys left in list
uKey.tKey.SoundEX = Nkeys(Ncount) ' set union SoundEX value
Offset = fKeyFileSeek(uKey,0) ' seek file offset
KeyFileMove Offset, 8 ' open file
PUT g_KeyFile, Offset, uKey ' stuff new key
INCR g_KeyLast ' increase last record #
DECR Ncount ' previous new key
WEND '
'
IF OldLast > g_KeyLast THEN ' if file is shorter then
SEEK #g_KeyFile, g_KeyLast * 8 ' truncate file
SETEOF #g_KeyFile '
END IF '
'
END SUB '
'
'-------------------------------------------------------------------------------
'
FUNCTION fKeyFileSearch ( BYVAL TheWord AS STRING, _
SEG RecNo() AS LONG ) EXPORT AS LONG
DIM Count AS LOCAL LONG ' found keys count
DIM uKey AS LOCAL KeyWordUNION ' record union
DIM Last AS LOCAL LONG ' last ubound for RecNo array
DIM Offset AS LOCAL LONG ' file offset
DIM SoundEX AS LOCAL LONG ' sought after SoundEX value
'
TheWord = UCASE$(TRIM$(TheWord)) ' strip and ucase key word
SoundEX = fKeyFileSoundEX(TheWord) ' set SoundEX value
uKey.tKey.RecNo = -1 ' create a SoundEX record 1 less
uKey.tKey.SoundEX = SoundEX - 1 ' than the one we want
Offset = fKeyFileSeek(uKey,0) ' find starting file offset
'
SEEK g_KeyFile, Offset ' set file pointer
FOR Offset = (Offset \ 8) TO g_KeyLast ' loop until end of file
GET g_KeyFile, , uKey ' read next record
IF uKey.tKey.SoundEX <> SoundEX THEN EXIT FOR ' if SoundEX values don't match
INCR Count ' bump the counter
IF Count > Last THEN ' if counter > ubound of array
Last = Last + 10 ' add a few new elements to array
REDIM PRESERVE RecNo(Last) ' redim holding array
END IF '
RecNo(Count) = uKey.tKey.RecNo ' stuff item
NEXT '
'
IF Count > 1 THEN ARRAY SORT RecNo(1) FOR Count ' if there's anyhting to sort
'
FUNCTION = Count ' RETURN number of found items
END FUNCTION
'
'-------------------------------------------------------------------------------
'-------------- SUPPORTING ROUTINES --------------------------------------------
'-------------------------------------------------------------------------------
'
SUB KeyFileMove ( BYVAL Offset AS LONG, _
BYVAL Bytes AS LONG )
DIM BytesLeft AS LOCAL LONG ' bytes left to be moved
DIM Buf AS LOCAL STRING ' buffer to hold bytes being moved
DIM Chunk AS LOCAL LONG ' size of current bytes being moved
DIM L AS LOCAL LONG ' temp variable
'
L = LOF( g_KeyFile ) ' length of file
BytesLeft = L - Offset '
'
IF Bytes > 0 THEN ' opening a space
Offset = L '
ELSE ' closing up a gap
Offset = Offset - Bytes '
BytesLeft = BytesLeft + Bytes '
END IF '
'
DO '
Chunk = MIN( 512000, BytesLeft ) ' size of chunk
IF Bytes > 0 THEN Offset = Offset - Chunk ' fix seek position
SEEK #g_KeyFile, Offset ' set get position
GET$ #g_KeyFile, Chunk, Buf ' take a bite
SEEK #g_KeyFile, Offset + Bytes ' set put position
PUT$ #g_KeyFile, Buf ' spit it out
IF Bytes < 0 THEN Offset = Offset + Chunk ' fix seek position
BytesLeft = BytesLeft - Chunk ' decr work load
LOOP UNTIL BytesLeft =< 0 '
END SUB
'
'-------------------------------------------------------------------------------
'
FUNCTION fKeyFileSoundEX ( SEG TheWord AS STRING ) AS LONG
REGISTER SndX AS LONG '
REGISTER Char AS LONG ' character value
'
DIM Last AS LOCAL LONG ' last character processed
DIM Letters AS LOCAL LONG '
DIM Vals AS LOCAL STRING * 27 ' SndX values
DIM V_ptr AS LOCAL BYTE PTR ' pointer to values
DIM W_ptr AS LOCAL BYTE PTR ' pointer to incoming data
DIM Widx AS LOCAL LONG ' index to incoming data pointer
'
Vals = CHR$(0,0,1,2,3,0,1,2,0,0,2,2,4,5) & _ ' SndX values from _A -> M
CHR$(5,0,1,2,6,2,3,0,1,0,2,0,2) ' N -> Z
V_ptr = VARPTR(Vals) ' pointer to above
W_ptr = STRPTR(TheWord) ' set ptr to incoming string
Letters = 9 ' 1 letter & 9 numbers
'
FOR Widx = 0 TO LEN(TheWord)-1 ' once for each letter
Char = @W_ptr[Widx] ' draw off the letter
IF Char < 65 THEN ITERATE ' can't be a valid letter
Char = ( Char AND &b11111 ) ' ucase & subtract 64
IF Char > 26 THEN ITERATE ' oops! this ain't a letter either
IF SndX <> 0 THEN '
Char = @V_ptr[Char] '
IF Char = Last THEN ITERATE ' same as last char
Last = Char '
IF Char = 0 THEN ITERATE '
SndX = (SndX OR Char) ' place value into buffer
IF Letters = 0 THEN EXIT FOR ' check if buffer is full
ELSE '
SndX = Char ' A = 1 Z = 26
Last = @V_ptr[Char] '
END IF '
SHIFT LEFT SndX, 3 '
DECR Letters ' decr buffer counter
NEXT '
'
IF Letters > 0 THEN ' add trailing zeros
SHIFT LEFT SndX, ( Letters * 3 ) '
END IF '
'
FUNCTION = SndX 'RETURN SOUNDEX VALUE
END FUNCTION
'
'-------------------------------------------------------------------------------
'
SUB KeyStringFormat ( SEG KeyWords AS STRING )
DIM Count AS LOCAL LONG ' # of keywords in string
DIM Keys(25) AS LOCAL STRING ' individual keywords
DIM X AS LOCAL LONG ' loop counter
'
IF LEN(KeyWords) = 0 THEN EXIT SUB ' nothing to do here
'
KeyWords = UCASE$(KeyWords) ' convert to all uppercase
'
Count = 1 ' assume at least 1 keyword
FOR X = 1 TO PARSECOUNT(KeyWords) ' loop through pieces of $
Keys(Count) = PARSE$(KeyWords,X) ' set new word value
Keys(Count) = TRIM$(Keys(Count)) ' strip any outside spaces
IF LEN(Keys(Count)) > 0 THEN INCR Count ' ok! we've got a word
NEXT '
DECR Count ' back off by one
KeyWords = "" ' reset incoming/returning $
'
IF Count = 0 THEN EXIT SUB ' no keywords found
'
ARRAY SORT Keys(1) FOR Count '
'
KeyWords = Keys(Count) ' start at the end
DECR Count '
FOR X = Count TO 1 STEP -1 ' rebuild KeyWords string
IF Keys(X) <> Keys(X+1) THEN ' if this word not already
KeyWords = Keys(X) & "," & KeyWords ' in the string then add it
END IF '
NEXT '
END SUB
'
'-------------------------------------------------------------------------------
'
FUNCTION fKeys2Array ( SEG KeyWords AS STRING, _
SEG Keys() AS LONG ) AS LONG
DIM Count AS LOCAL LONG ' # of keywords in string
DIM Temp AS LOCAL STRING ' temp string
DIM X AS LOCAL LONG ' loop counter
'
Count = PARSECOUNT(KeyWords) ' count the individual words
REDIM Keys(Count) ' establish SoundEX array
FOR X = 1 TO Count ' fill array
Temp = PARSE$(KeyWords,X) ' get individual word
Keys(X) = fKeyFileSoundEX(Temp) ' set/create SoundEX value
NEXT '
'
ARRAY SORT Keys(1) FOR Count, DESCEND ' sort SoundEx values
ARRAY SCAN Keys(1) FOR Count, = 0, TO X ' find any zero values
IF X > 0 THEN Count = X -1 ' if any 0s found set new count
IF Count = 0 THEN EXIT FUNCTION ' nothing left to do
'
X = 2 ' remove any double values
WHILE Count > 1 '
IF Keys(X-1) = Keys(X) THEN ' if same SoundEx value
ARRAY DELETE Keys(X) ' remove it from the array
DECR Count ' one less in the mess
ELSE '
INCR X ' next value
END IF '
IF X > Count THEN EXIT LOOP ' all done
WEND '
'
FUNCTION = Count ' RETURN number of SoundExs
END FUNCTION
'
'-------------------------------------------------------------------------------
'
FUNCTION fKeyFileSeek( SEG uKey AS KeyWordUNION, _
BYVAL Unique AS LONG ) AS LONG
DIM F AS LOCAL LONG ' top record #
DIM L AS LOCAL LONG ' bottom record #
DIM M AS LOCAL LONG ' middle record #
DIM Qval AS LOCAL QUAD ' middle record value
'
IF g_KeyLast = 0 THEN EXIT FUNCTION ' oops! nothing to do here
F = 0 ' first record #
L = g_KeyLast -1 ' last record #
DO ' start search
M = ( F + L ) \ 2 ' compute half way mark
GET g_KeyFile, M*8, Qval ' read record
IF M = F THEN EXIT LOOP ' if we've hit the mean record
IF uKey.Qkey =< Qval THEN L = M ELSE F = M ' compute new half of list
LOOP '
'
WHILE uKey.Qkey > Qval ' if we've not quite there yet
INCR M ' next record #
IF M => g_KeyLast THEN EXIT LOOP ' oops! run out of records!
GET g_KeyFile, M*8, Qval ' read record
WEND '
SHIFT LEFT M, 3 ' compute file offset
'
IF (Unique <> 0) AND (uKey.Qkey <> Qval) THEN M = -M ' if not unique or exact match
'
FUNCTION = M ' RETURN Offset value
'
END FUNCTION
'
'-------------------------------------------------------------------------------
'------------ TEST MESS! -------------------------------------------------------
'-------------------------------------------------------------------------------
'
#IF 1
FUNCTION PBmain ()
DIM FileSpec AS LOCAL STRING
DIM uKey AS LOCAL KEYWORDUNION
DIM L AS LOCAL LONG
DIM NewKeys AS LOCAL STRING
DIM OldKeys AS LOCAL STRING
DIM R(0) AS LOCAL LONG
DIM RecNo AS LOCAL LONG
DIM X AS LOCAL LONG
FileSpec = "KEYTEST.DAT" ' test file
OldKeys = "BLACK,GREEN,ORANGE,RED" ' existing formatted data
NewKeys = "RED,GREEN,BLUE,BLACK,,GREEN,black,PURPLE ,RED" ' user input
'
IF LEN(DIR$(FileSpec)) > 0 THEN KILL FileSpec ' kill test file
X = fKeyFileOpen( FileSpec, 0 ) '
IF X <> 0 THEN '
PRINT "ERROR"; X '
WAITKEY$ '
END IF '
'
FOR RecNo = 1 to 4 step 1 ' load up some data
IF RecNo = 3 THEN ITERATE '
KeyFileUpdate "" , OldKeys, RecNo '
KeyFileUpdate OldKeys, NewKeys, RecNo '
NEXT '
KeyFileInsert "Fire-Engine Red", 3 ' load individual stuff
KeyFileInsert "Blue", 3 '
KeyFileDelete "Blue", 2 ' delete one
'
PRINT NewKeys ' returned goodies
PRINT STRING$(79,45) '
PRINT g_KeyLast '
FOR X = 0 TO g_KeyLast -1 '
GET #g_KeyFile, x*8, uKey '
PRINT uKey.tKey.SoundEx, uKey.tKey.RecNo, '
PRINT FORMAT$(uKey.Qkey,"##################") '
NEXT '
PRINT STRING$(79,45) '
L = fKeyFileSearch("purple",R()) ' find all the matches for 'GRIN'
FOR X = 1 TO L ' these will be the same as for
PRINT R(X) ' GREEN
NEXT '
KeyFileClose '
WAITKEY$ '
END FUNCTION
#ENDIF