691 lines
17 KiB
C++
691 lines
17 KiB
C++
|
#include "resourceDatabase.h"
|
||
|
#include "StringTree.h"
|
||
|
|
||
|
|
||
|
// naive implementation for now with no intelligent index
|
||
|
// as performance issues grow, see this article:
|
||
|
|
||
|
// http://en.wikipedia.org/wiki/Substring_index
|
||
|
|
||
|
#include "minorGems/util/SimpleVector.h"
|
||
|
#include "minorGems/system/Time.h"
|
||
|
|
||
|
#include "stdio.h"
|
||
|
#include "minorGems/io/file/File.h"
|
||
|
|
||
|
#include "minorGems/util/log/AppLog.h"
|
||
|
|
||
|
|
||
|
static File *getFullDBFile() {
|
||
|
char *pathSteps[1];
|
||
|
|
||
|
pathSteps[0] = (char*)"resourceCache";
|
||
|
|
||
|
|
||
|
File *dbFile = new File( new Path( pathSteps, 1, false ),
|
||
|
"stringDatabase.txt" );
|
||
|
|
||
|
return dbFile;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
typedef struct resourceRecord {
|
||
|
const char *type;
|
||
|
char *wordString;
|
||
|
uniqueID id;
|
||
|
} resourceRecord;
|
||
|
|
||
|
|
||
|
void printResourceRecord( void *inR ) {
|
||
|
resourceRecord *r = (resourceRecord*)inR;
|
||
|
char *idString = getHumanReadableString( r->id );
|
||
|
|
||
|
printf( "Type %s, keyword %s, ID %s\n",
|
||
|
r->type, r->wordString, idString );
|
||
|
delete [] idString;
|
||
|
}
|
||
|
|
||
|
|
||
|
static SimpleVector<char*> dataLines;
|
||
|
|
||
|
|
||
|
static SimpleVector<resourceRecord *> records;
|
||
|
|
||
|
|
||
|
// resource types stored in each tree
|
||
|
// one tree
|
||
|
static SimpleVector<char *> recordTreeTypes;
|
||
|
static SimpleVector<StringTree *> recordTrees;
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
// hash table for quick check for duplicates on addData call
|
||
|
|
||
|
// number of bins
|
||
|
#define B 2000
|
||
|
|
||
|
|
||
|
|
||
|
SimpleVector<resourceRecord *> hashBins[B];
|
||
|
|
||
|
int getHashKey( uniqueID inID ) {
|
||
|
unsigned int sum = 0;
|
||
|
for( int i=0; i<U; i++ ) {
|
||
|
|
||
|
sum += inID.bytes[i];
|
||
|
}
|
||
|
return sum % B;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
void hashInsert( resourceRecord *inRecord ) {
|
||
|
hashBins[ getHashKey( inRecord->id ) ].push_back( inRecord );
|
||
|
}
|
||
|
|
||
|
void hashRemove( resourceRecord *inRecord ) {
|
||
|
uniqueID id = inRecord->id;
|
||
|
SimpleVector<resourceRecord *> *bin = &( hashBins[ getHashKey( id ) ] );
|
||
|
|
||
|
int numEntries = bin->size();
|
||
|
for( int i=0; i<numEntries; i++ ) {
|
||
|
resourceRecord *r = *( bin->getElement( i ) );
|
||
|
if( equal( r->id, id ) ) {
|
||
|
bin->deleteElement( i );
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
char hashExists( uniqueID inID ) {
|
||
|
SimpleVector<resourceRecord *> *bin = &( hashBins[ getHashKey( inID ) ] );
|
||
|
|
||
|
int numEntries = bin->size();
|
||
|
for( int i=0; i<numEntries; i++ ) {
|
||
|
resourceRecord *r = *( bin->getElement( i ) );
|
||
|
if( equal( r->id, inID ) ) {
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
|
||
|
resourceRecord *hashLookup( uniqueID inID ) {
|
||
|
SimpleVector<resourceRecord *> *bin = &( hashBins[ getHashKey( inID ) ] );
|
||
|
|
||
|
int numEntries = bin->size();
|
||
|
for( int i=0; i<numEntries; i++ ) {
|
||
|
resourceRecord *r = *( bin->getElement( i ) );
|
||
|
if( equal( r->id, inID ) ) {
|
||
|
return r;
|
||
|
}
|
||
|
}
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
StringTree *getTreeForType( const char *inType ) {
|
||
|
int numTrees = recordTrees.size();
|
||
|
for( int i=0; i<numTrees; i++ ) {
|
||
|
if( strcmp( inType, *( recordTreeTypes.getElement( i ) ) ) == 0 ) {
|
||
|
// match
|
||
|
return *( recordTrees.getElement( i ) );
|
||
|
}
|
||
|
}
|
||
|
// no match
|
||
|
|
||
|
// create a new one
|
||
|
recordTreeTypes.push_back( stringDuplicate( inType ) );
|
||
|
|
||
|
StringTree *returnVal = new StringTree();
|
||
|
recordTrees.push_back( returnVal );
|
||
|
|
||
|
return returnVal;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
static char writeNewEntriesToFile = true;
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
static char **getDataFileLines( int *outNumLines ) {
|
||
|
File *fullDBFile = getFullDBFile();
|
||
|
|
||
|
if( fullDBFile->exists() ) {
|
||
|
|
||
|
char *fileContents = fullDBFile->readFileContents();
|
||
|
|
||
|
delete fullDBFile;
|
||
|
|
||
|
|
||
|
if( fileContents == NULL ) {
|
||
|
AppLog::error(
|
||
|
"Error: failed to read from stringDatabase.txt\n" );
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
int numLines;
|
||
|
char **lines = split( fileContents, "\n", &numLines );
|
||
|
|
||
|
delete [] fileContents;
|
||
|
|
||
|
*outNumLines = numLines;
|
||
|
return lines;
|
||
|
}
|
||
|
else {
|
||
|
AppLog::error( "Error: stringDatabase.txt does not exist\n" );
|
||
|
}
|
||
|
|
||
|
delete fullDBFile;
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
void initDatabase() {
|
||
|
|
||
|
|
||
|
|
||
|
double start = Time::getCurrentTime();
|
||
|
|
||
|
|
||
|
writeNewEntriesToFile = false;
|
||
|
|
||
|
int numLines;
|
||
|
char **lines = getDataFileLines( &numLines );
|
||
|
|
||
|
|
||
|
if( lines != NULL ) {
|
||
|
|
||
|
AppLog::getLog()->logPrintf(
|
||
|
Log::INFO_LEVEL, "Splitting %d lines took %d ms\n",
|
||
|
numLines, (int)( 1000 * (Time::getCurrentTime() - start ) ) );
|
||
|
|
||
|
|
||
|
for( int i=0; i<numLines; i++ ) {
|
||
|
|
||
|
char *line = lines[i];
|
||
|
|
||
|
dataLines.push_back( line );
|
||
|
|
||
|
int charsLeft = strlen( line );
|
||
|
|
||
|
if( charsLeft < U * 2 ) {
|
||
|
AppLog::getLog()->logPrintf(
|
||
|
Log::ERROR_LEVEL,
|
||
|
"Failed to read unique ID from line %d of string DB\n",
|
||
|
i );
|
||
|
}
|
||
|
else {
|
||
|
|
||
|
// first 12 chars is unique ID
|
||
|
|
||
|
char *idString = new char[ U * 2 + 1 ];
|
||
|
|
||
|
memcpy( idString, line, U * 2 );
|
||
|
|
||
|
idString[ U * 2 ] = '\0';
|
||
|
|
||
|
uniqueID id;
|
||
|
char result = parseHumanReadableString( idString, &id );
|
||
|
|
||
|
if( !result ) {
|
||
|
AppLog::getLog()->logPrintf(
|
||
|
Log::ERROR_LEVEL,
|
||
|
"Failed to read unique ID from "
|
||
|
"line %d of string DB\n", i );
|
||
|
}
|
||
|
else {
|
||
|
|
||
|
// skip ID and space
|
||
|
char *substring = &( line[ U * 2 + 1 ] );
|
||
|
|
||
|
|
||
|
char typeString[100];
|
||
|
|
||
|
int numRead = sscanf( substring, "%99s", typeString );
|
||
|
|
||
|
if( numRead == 1 ) {
|
||
|
char *skipPointer = &substring[ strlen( typeString )
|
||
|
+ 1 ];
|
||
|
|
||
|
// rest of contents is word string
|
||
|
addData( typeString, id, skipPointer );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
delete [] idString;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
delete [] lines;
|
||
|
}
|
||
|
|
||
|
writeNewEntriesToFile = true;
|
||
|
|
||
|
|
||
|
AppLog::getLog()->logPrintf(
|
||
|
Log::INFO_LEVEL,
|
||
|
"Loading database cache from disk took %d ms\n",
|
||
|
(int)( 1000 * (Time::getCurrentTime() - start ) ) );
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
void freeDatabase() {
|
||
|
for( int i=0; i<records.size(); i++ ) {
|
||
|
resourceRecord *r = *( records.getElement(i) );
|
||
|
|
||
|
delete [] r->type;
|
||
|
delete [] r->wordString;
|
||
|
|
||
|
delete r;
|
||
|
}
|
||
|
records.deleteAll();
|
||
|
|
||
|
for( int i=0; i<dataLines.size(); i++ ) {
|
||
|
delete [] *( dataLines.getElement( i ) );
|
||
|
}
|
||
|
dataLines.deleteAll();
|
||
|
|
||
|
for( int i=0; i<recordTrees.size(); i++ ) {
|
||
|
delete [] *( recordTreeTypes.getElement( i ) );
|
||
|
|
||
|
delete *( recordTrees.getElement( i ) );
|
||
|
}
|
||
|
|
||
|
recordTreeTypes.deleteAll();
|
||
|
recordTrees.deleteAll();
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
void addData( const char *inResourceType,
|
||
|
uniqueID inID,
|
||
|
const char *inWordString ) {
|
||
|
|
||
|
//double start = Time::getCurrentTime();
|
||
|
|
||
|
//printf( "Adding data to database for %s,%s\n",
|
||
|
// inResourceType, inWordString );
|
||
|
|
||
|
|
||
|
// make sure there's no collision with existing data
|
||
|
if( hashExists( inID ) ) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
|
||
|
resourceRecord *r = new resourceRecord;
|
||
|
|
||
|
r->type = stringDuplicate( inResourceType );
|
||
|
|
||
|
r->wordString = stringDuplicate( inWordString );
|
||
|
r->id = inID;
|
||
|
|
||
|
|
||
|
records.push_back( r );
|
||
|
hashInsert( r );
|
||
|
|
||
|
|
||
|
|
||
|
// New code: string trees
|
||
|
StringTree *t = getTreeForType( inResourceType );
|
||
|
|
||
|
// lower case to make searches case-insensitive
|
||
|
char *lowerCase = stringToLowerCase( r->wordString );
|
||
|
t->insert( lowerCase, (void *)r );
|
||
|
delete [] lowerCase;
|
||
|
|
||
|
|
||
|
if( writeNewEntriesToFile ) {
|
||
|
|
||
|
File *fullDBFile = getFullDBFile();
|
||
|
|
||
|
char *fullFileName = fullDBFile->getFullFileName();
|
||
|
|
||
|
delete fullDBFile;
|
||
|
|
||
|
|
||
|
FILE *f = fopen( fullFileName, "a" );
|
||
|
|
||
|
delete [] fullFileName;
|
||
|
|
||
|
char *idString = getHumanReadableString( inID );
|
||
|
|
||
|
char *line = autoSprintf( "%s %s %s",
|
||
|
idString, inResourceType, inWordString );
|
||
|
|
||
|
dataLines.push_back( line );
|
||
|
|
||
|
|
||
|
fprintf( f, "\n%s", line );
|
||
|
fclose( f );
|
||
|
|
||
|
delete [] idString;
|
||
|
|
||
|
/*
|
||
|
printf( "Adding data took %d ms\n",
|
||
|
(int)( 1000 * (Time::getCurrentTime() - start ) ) );
|
||
|
*/
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
void removeData( const char *inResourceType,
|
||
|
uniqueID inID ) {
|
||
|
|
||
|
//double start = Time::getCurrentTime();
|
||
|
|
||
|
char found = false;
|
||
|
|
||
|
for( int i=0; i<records.size() && !found; i++ ) {
|
||
|
resourceRecord *r = *( records.getElement( i ) );
|
||
|
|
||
|
if( equal( inID, r->id ) &&
|
||
|
strcmp( inResourceType, r->type ) == 0 ) {
|
||
|
|
||
|
// New code: string trees
|
||
|
StringTree *t = getTreeForType( inResourceType );
|
||
|
|
||
|
|
||
|
// lower case for case-insensitive
|
||
|
char *lowerCase = stringToLowerCase( r->wordString );
|
||
|
t->remove( lowerCase, (void *)r );
|
||
|
|
||
|
delete [] lowerCase;
|
||
|
|
||
|
records.deleteElement( i );
|
||
|
hashRemove( r );
|
||
|
|
||
|
delete [] r->type;
|
||
|
delete [] r->wordString;
|
||
|
delete r;
|
||
|
|
||
|
found = true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
File *fullDBFile = getFullDBFile();
|
||
|
|
||
|
char *fullFileName = fullDBFile->getFullFileName();
|
||
|
delete fullDBFile;
|
||
|
|
||
|
|
||
|
FILE *f = fopen( fullFileName, "w" );
|
||
|
|
||
|
delete [] fullFileName;
|
||
|
|
||
|
|
||
|
|
||
|
char *idString = getHumanReadableString( inID );
|
||
|
char doneSkipping = false;
|
||
|
|
||
|
char someLinesWritten = false;
|
||
|
|
||
|
for( int i=0; i<dataLines.size(); i++ ) {
|
||
|
char *line = *( dataLines.getElement( i ) );
|
||
|
|
||
|
if( doneSkipping || strncmp( idString, line, 12 ) != 0 ) {
|
||
|
if( someLinesWritten ) {
|
||
|
fprintf( f, "\n%s", line );
|
||
|
}
|
||
|
else {
|
||
|
// first line, no newline
|
||
|
fprintf( f, "%s", line );
|
||
|
someLinesWritten = true;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
// skipped one!
|
||
|
doneSkipping = true;
|
||
|
delete [] line;
|
||
|
dataLines.deleteElement( i );
|
||
|
i--;
|
||
|
}
|
||
|
}
|
||
|
delete [] idString;
|
||
|
|
||
|
|
||
|
fclose( f );
|
||
|
|
||
|
|
||
|
/*
|
||
|
printf( "Removing data took %d ms\n",
|
||
|
(int)( 1000 * (Time::getCurrentTime() - start ) ) );
|
||
|
*/
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
static SimpleVector< resourceRecord *> *getUnionOfWordMatches(
|
||
|
SimpleVector<char *> *inWords, const char *inResourceType ) {
|
||
|
|
||
|
StringTree *t = getTreeForType( (char*)inResourceType );
|
||
|
|
||
|
|
||
|
SimpleVector< resourceRecord *> *unionMatches =
|
||
|
new SimpleVector< resourceRecord *>();
|
||
|
|
||
|
|
||
|
for( int i=0; i<inWords->size(); i++ ) {
|
||
|
int wordCount =
|
||
|
t->countMatches( *( inWords->getElement(i) ) );
|
||
|
|
||
|
resourceRecord **values = new resourceRecord *[wordCount];
|
||
|
|
||
|
// -1 means get all
|
||
|
t->getMatches( *( inWords->getElement(i) ), 0, wordCount,
|
||
|
(void **)values );
|
||
|
|
||
|
if( i == 0 ) {
|
||
|
// populate union with first set
|
||
|
for( int w=0; w<wordCount; w++ ) {
|
||
|
unionMatches->push_back( values[w] );
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
for( int u=0; u<unionMatches->size(); u++ ) {
|
||
|
// is this member of union in next set?
|
||
|
resourceRecord *unionMember =
|
||
|
*( unionMatches->getElement( u ) );
|
||
|
|
||
|
char found = false;
|
||
|
|
||
|
for( int w=0; w<wordCount && !found; w++ ) {
|
||
|
if( unionMember == values[w] ) {
|
||
|
found = true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if( !found ) {
|
||
|
unionMatches->deleteElement( u );
|
||
|
u--;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
delete [] values;
|
||
|
}
|
||
|
|
||
|
return unionMatches;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
int countSearchResults( const char *inResourceType,
|
||
|
const char *inSearchString ) {
|
||
|
|
||
|
//double start = Time::getCurrentTime();
|
||
|
|
||
|
int count = 0;
|
||
|
|
||
|
// New code: string trees
|
||
|
StringTree *t = getTreeForType( (char*)inResourceType );
|
||
|
|
||
|
// lower-case in tree
|
||
|
char *lowerSearchString = stringToLowerCase( inSearchString );
|
||
|
|
||
|
|
||
|
|
||
|
if( lowerSearchString[0] == '\0' ) {
|
||
|
// empty search, count all
|
||
|
count = t->countMatches( lowerSearchString );
|
||
|
}
|
||
|
else {
|
||
|
// how many words?
|
||
|
SimpleVector<char *> *words = tokenizeString( lowerSearchString );
|
||
|
|
||
|
if( words->size() == 0 ) {
|
||
|
// ignore whitespace, count everything
|
||
|
count = t->countMatches( "" );
|
||
|
}
|
||
|
else if( words->size() == 1 ) {
|
||
|
count = t->countMatches( *( words->getElement(0) ) );
|
||
|
}
|
||
|
else {
|
||
|
// multi-word, take union of results
|
||
|
|
||
|
SimpleVector< resourceRecord *> *unionMatches =
|
||
|
getUnionOfWordMatches( words, inResourceType );
|
||
|
|
||
|
// union now contains only element that match every word
|
||
|
count = unionMatches->size();
|
||
|
|
||
|
delete unionMatches;
|
||
|
}
|
||
|
|
||
|
for( int i=0; i<words->size(); i++ ) {
|
||
|
delete [] *( words->getElement( i ) );
|
||
|
}
|
||
|
delete words;
|
||
|
}
|
||
|
|
||
|
|
||
|
delete [] lowerSearchString;
|
||
|
|
||
|
/*
|
||
|
printf( "Counting %d results took %d ms\n",
|
||
|
count, (int)( 1000 * (Time::getCurrentTime() - start ) ) );
|
||
|
*/
|
||
|
|
||
|
return count;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
// caller allocates spaces for inNumToGet and passes pointer as outIDs
|
||
|
int getSearchResults( const char *inResourceType,
|
||
|
const char *inSearchString,
|
||
|
int inNumToSkip,
|
||
|
int inNumToGet,
|
||
|
uniqueID *outIDs ) {
|
||
|
|
||
|
//double start = Time::getCurrentTime();
|
||
|
|
||
|
int numGotten = 0;
|
||
|
|
||
|
// New code: string trees
|
||
|
StringTree *t = getTreeForType( (char*)inResourceType );
|
||
|
|
||
|
resourceRecord **values = new resourceRecord *[inNumToGet];
|
||
|
|
||
|
// lower-case in tree
|
||
|
char *lowerSearchString = stringToLowerCase( inSearchString );
|
||
|
|
||
|
|
||
|
if( lowerSearchString == '\0' ) {
|
||
|
// empty search, consider everything
|
||
|
numGotten = t->getMatches( lowerSearchString, inNumToSkip, inNumToGet,
|
||
|
(void **)values );
|
||
|
}
|
||
|
else {
|
||
|
// how many words?
|
||
|
SimpleVector<char *> *words = tokenizeString( lowerSearchString );
|
||
|
|
||
|
if( words->size() == 0 ) {
|
||
|
// ignore spaces, show everything
|
||
|
numGotten = t->getMatches( "", inNumToSkip, inNumToGet,
|
||
|
(void **)values );
|
||
|
}
|
||
|
else if( words->size() == 1 ) {
|
||
|
// just this word (no whitespace
|
||
|
numGotten = t->getMatches( *( words->getElement(0) ),
|
||
|
inNumToSkip, inNumToGet,
|
||
|
(void **)values );
|
||
|
}
|
||
|
else {
|
||
|
// take union of results from multiple words, THEN
|
||
|
// apply skip and limit
|
||
|
SimpleVector< resourceRecord *> *unionMatches =
|
||
|
getUnionOfWordMatches( words, inResourceType );
|
||
|
|
||
|
int count = unionMatches->size();
|
||
|
|
||
|
numGotten = 0;
|
||
|
|
||
|
for( int i=0; i<inNumToGet; i++ ) {
|
||
|
if( i + inNumToSkip < count ) {
|
||
|
values[i] =
|
||
|
*( unionMatches->getElement( i + inNumToSkip ) );
|
||
|
|
||
|
numGotten ++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
delete unionMatches;
|
||
|
}
|
||
|
|
||
|
for( int i=0; i<words->size(); i++ ) {
|
||
|
delete [] *( words->getElement( i ) );
|
||
|
}
|
||
|
delete words;
|
||
|
}
|
||
|
|
||
|
delete [] lowerSearchString;
|
||
|
|
||
|
for( int i=0; i<numGotten; i++ ) {
|
||
|
outIDs[i] = values[i]->id;
|
||
|
}
|
||
|
delete [] values;
|
||
|
|
||
|
/*
|
||
|
printf( "Gettingresults took %d ms\n",
|
||
|
(int)( 1000 * (Time::getCurrentTime() - start ) ) );
|
||
|
*/
|
||
|
|
||
|
return numGotten;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
char *getResourceName( uniqueID inID ) {
|
||
|
resourceRecord *r = hashLookup( inID );
|
||
|
|
||
|
if( r != NULL ) {
|
||
|
return r->wordString;
|
||
|
}
|
||
|
else {
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|