SleepIsDeath/gameSource/resourceDatabase.cpp

#include "resourceDatabase.h"
#include "StringTree.h"


// naive implementation for now with no intelligent index
// as performance issues grow, see this article:

// http://en.wikipedia.org/wiki/Substring_index

#include "minorGems/util/SimpleVector.h"
#include "minorGems/system/Time.h"

#include "stdio.h"
#include "minorGems/io/file/File.h"

#include "minorGems/util/log/AppLog.h"


static File *getFullDBFile() {
    char *pathSteps[1];

    pathSteps[0] = (char*)"resourceCache";


    File *dbFile = new File( new Path( pathSteps, 1, false ),
                             "stringDatabase.txt" );

    return dbFile;
    }


typedef struct resourceRecord {
        const char *type;
        char *wordString;
        uniqueID id;
    } resourceRecord;


void printResourceRecord( void *inR ) {
    resourceRecord *r = (resourceRecord*)inR;
    char *idString = getHumanReadableString( r->id );

    printf( "Type %s, keyword %s, ID %s\n",
            r->type, r->wordString, idString );
    delete [] idString;
    }


static SimpleVector<char*> dataLines;


static SimpleVector<resourceRecord *> records;


// resource types stored in each tree
// one tree
static SimpleVector<char *> recordTreeTypes;
static SimpleVector<StringTree *> recordTrees;


// hash table for quick check for duplicates on addData call

// number of bins
#define B 2000


SimpleVector<resourceRecord *> hashBins[B];

int getHashKey( uniqueID inID ) {
    unsigned int sum = 0;
    for( int i=0; i<U; i++ ) {

        sum += inID.bytes[i];
        }
    return sum % B;
    }


void hashInsert( resourceRecord *inRecord ) {
    hashBins[ getHashKey( inRecord->id ) ].push_back( inRecord );
    }

void hashRemove( resourceRecord *inRecord ) {
    uniqueID id = inRecord->id;
    SimpleVector<resourceRecord *> *bin = &( hashBins[ getHashKey( id ) ] );

    int numEntries = bin->size();
    for( int i=0; i<numEntries; i++ ) {
        resourceRecord *r = *( bin->getElement( i ) );
        if( equal( r->id, id ) ) {
            bin->deleteElement( i );
            return;
            }
        }
    }

char hashExists( uniqueID inID ) {
    SimpleVector<resourceRecord *> *bin = &( hashBins[ getHashKey( inID ) ] );

    int numEntries = bin->size();
    for( int i=0; i<numEntries; i++ ) {
        resourceRecord *r = *( bin->getElement( i ) );
        if( equal( r->id, inID ) ) {
            return true;
            }
        }
    return false;
    }


resourceRecord *hashLookup( uniqueID inID ) {
    SimpleVector<resourceRecord *> *bin = &( hashBins[ getHashKey( inID ) ] );

    int numEntries = bin->size();
    for( int i=0; i<numEntries; i++ ) {
        resourceRecord *r = *( bin->getElement( i ) );
        if( equal( r->id, inID ) ) {
            return r;
            }
        }
    return NULL;
    }


StringTree *getTreeForType( const char *inType ) {
    int numTrees = recordTrees.size();
    for( int i=0; i<numTrees; i++ ) {
        if( strcmp( inType, *( recordTreeTypes.getElement( i ) ) ) == 0 ) {
            // match
            return *( recordTrees.getElement( i ) );
            }
        }
    // no match

    // create a new one
    recordTreeTypes.push_back( stringDuplicate( inType ) );

    StringTree *returnVal = new StringTree();
    recordTrees.push_back( returnVal );

    return returnVal;
    }


static char writeNewEntriesToFile = true;


static char **getDataFileLines( int *outNumLines ) {
    File *fullDBFile = getFullDBFile();

    if( fullDBFile->exists() ) {

        char *fileContents = fullDBFile->readFileContents();

        delete fullDBFile;


        if( fileContents == NULL ) {
            AppLog::error(
                "Error:  failed to read from stringDatabase.txt\n" );
            return NULL;
            }

        int numLines;
        char **lines = split( fileContents, "\n", &numLines );

        delete [] fileContents;

        *outNumLines = numLines;
        return lines;
        }
    else {
        AppLog::error( "Error:  stringDatabase.txt does not exist\n" );
        }

    delete fullDBFile;
    return NULL;
    }


void initDatabase() {


    double start = Time::getCurrentTime();


    writeNewEntriesToFile = false;

    int numLines;
    char **lines = getDataFileLines( &numLines );


    if( lines != NULL ) {

        AppLog::getLog()->logPrintf(
            Log::INFO_LEVEL, "Splitting %d lines took %d ms\n",
            numLines, (int)( 1000 * (Time::getCurrentTime() - start ) ) );


        for( int i=0; i<numLines; i++ ) {

            char *line = lines[i];

            dataLines.push_back( line );

            int charsLeft = strlen( line );

            if( charsLeft < U * 2 ) {
                AppLog::getLog()->logPrintf(
                    Log::ERROR_LEVEL,
                    "Failed to read unique ID from line %d of string DB\n",
                    i );
                }
            else {

                // first 12 chars is unique ID

                char *idString = new char[ U * 2 + 1 ];

                memcpy( idString, line, U * 2 );

                idString[ U * 2 ] = '\0';

                uniqueID id;
                char result = parseHumanReadableString( idString, &id );

                if( !result ) {
                    AppLog::getLog()->logPrintf(
                        Log::ERROR_LEVEL,
                        "Failed to read unique ID from "
                        "line %d of string DB\n", i );
                    }
                else {

                    // skip ID and space
                    char *substring = &( line[ U * 2 + 1 ] );


                    char typeString[100];

                    int numRead = sscanf( substring, "%99s", typeString );

                    if( numRead == 1 ) {
                        char *skipPointer = &substring[ strlen( typeString )
                                                        + 1 ];

                        // rest of contents is word string
                        addData( typeString, id, skipPointer );
                        }
                    }


                delete [] idString;
                }

            }
        delete [] lines;
        }

    writeNewEntriesToFile = true;


    AppLog::getLog()->logPrintf(
        Log::INFO_LEVEL,
        "Loading database cache from disk took %d ms\n",
        (int)( 1000 * (Time::getCurrentTime() - start ) ) );
    }


void freeDatabase() {
    for( int i=0; i<records.size(); i++ ) {
        resourceRecord *r = *( records.getElement(i) );

        delete [] r->type;
        delete [] r->wordString;

        delete r;
        }
    records.deleteAll();

    for( int i=0; i<dataLines.size(); i++ ) {
        delete [] *( dataLines.getElement( i ) );
        }
    dataLines.deleteAll();

    for( int i=0; i<recordTrees.size(); i++ ) {
        delete [] *( recordTreeTypes.getElement( i ) );

        delete *( recordTrees.getElement( i ) );
        }

    recordTreeTypes.deleteAll();
    recordTrees.deleteAll();
    }


void addData( const char *inResourceType,
              uniqueID inID,
              const char *inWordString ) {

    //double start = Time::getCurrentTime();

    //printf( "Adding data to database for %s,%s\n",
    //        inResourceType, inWordString );


    // make sure there's no collision with existing data
    if( hashExists( inID ) ) {
        return;
        }


    resourceRecord *r = new resourceRecord;

    r->type = stringDuplicate( inResourceType );

    r->wordString = stringDuplicate( inWordString );
    r->id = inID;


    records.push_back( r );
    hashInsert( r );


    // New code:  string trees
    StringTree *t = getTreeForType( inResourceType );

    // lower case to make searches case-insensitive
    char *lowerCase = stringToLowerCase( r->wordString );
    t->insert( lowerCase, (void *)r );
    delete [] lowerCase;


    if( writeNewEntriesToFile ) {

        File *fullDBFile = getFullDBFile();

        char *fullFileName = fullDBFile->getFullFileName();

        delete fullDBFile;


        FILE *f = fopen( fullFileName, "a" );

        delete [] fullFileName;

        char *idString = getHumanReadableString( inID );

        char *line = autoSprintf( "%s %s %s",
                                  idString, inResourceType, inWordString );

        dataLines.push_back( line );


        fprintf( f, "\n%s", line );
        fclose( f );

        delete [] idString;

        /*
        printf( "Adding data took %d ms\n",
            (int)( 1000 * (Time::getCurrentTime() - start ) ) );
        */
        }
    }


void removeData( const char *inResourceType,
                 uniqueID inID ) {

    //double start = Time::getCurrentTime();

    char found = false;

    for( int i=0; i<records.size() && !found; i++ ) {
        resourceRecord *r = *( records.getElement( i ) );

        if( equal( inID, r->id ) &&
            strcmp( inResourceType, r->type ) == 0 ) {

            // New code:  string trees
            StringTree *t = getTreeForType( inResourceType );


            // lower case for case-insensitive
            char *lowerCase = stringToLowerCase( r->wordString );
            t->remove( lowerCase, (void *)r );

            delete [] lowerCase;

            records.deleteElement( i );
            hashRemove( r );

            delete [] r->type;
            delete [] r->wordString;
            delete r;

            found = true;
            }
        }


    File *fullDBFile = getFullDBFile();

    char *fullFileName = fullDBFile->getFullFileName();
    delete fullDBFile;


    FILE *f = fopen( fullFileName, "w" );

    delete [] fullFileName;


    char *idString = getHumanReadableString( inID );
    char doneSkipping = false;

    char someLinesWritten = false;

    for( int i=0; i<dataLines.size(); i++ ) {
        char *line = *( dataLines.getElement( i ) );

        if( doneSkipping || strncmp( idString, line, 12 ) != 0 ) {
            if( someLinesWritten ) {
                fprintf( f, "\n%s", line );
                }
            else {
                // first line, no newline
                fprintf( f, "%s", line );
                someLinesWritten = true;
                }
            }
        else {
            // skipped one!
            doneSkipping = true;
            delete [] line;
            dataLines.deleteElement( i );
            i--;
            }
        }
    delete [] idString;


    fclose( f );


    /*
    printf( "Removing data took %d ms\n",
            (int)( 1000 * (Time::getCurrentTime() - start ) ) );
    */
    }


static SimpleVector< resourceRecord *> *getUnionOfWordMatches(
    SimpleVector<char *> *inWords, const char *inResourceType ) {

    StringTree *t = getTreeForType( (char*)inResourceType );


    SimpleVector< resourceRecord *> *unionMatches =
        new SimpleVector< resourceRecord *>();


    for( int i=0; i<inWords->size(); i++ ) {
        int wordCount =
            t->countMatches( *( inWords->getElement(i) ) );

        resourceRecord **values = new resourceRecord *[wordCount];

        // -1 means get all
        t->getMatches( *( inWords->getElement(i) ), 0, wordCount,
                       (void **)values );

        if( i == 0 ) {
            // populate union with first set
            for( int w=0; w<wordCount; w++ ) {
                unionMatches->push_back( values[w] );
                }
            }
        else {
            for( int u=0; u<unionMatches->size(); u++ ) {
                // is this member of union in next set?
                resourceRecord *unionMember =
                    *( unionMatches->getElement( u ) );

                char found = false;

                for( int w=0; w<wordCount && !found; w++ ) {
                    if( unionMember == values[w] ) {
                        found = true;
                        }
                    }

                if( !found ) {
                    unionMatches->deleteElement( u );
                    u--;
                    }
                }
            }
        delete [] values;
        }

    return unionMatches;
    }


int countSearchResults( const char *inResourceType,
                        const char *inSearchString ) {

    //double start = Time::getCurrentTime();

    int count = 0;

    // New code:  string trees
    StringTree *t = getTreeForType( (char*)inResourceType );

    // lower-case in tree
    char *lowerSearchString = stringToLowerCase( inSearchString );


    if( lowerSearchString[0] == '\0' ) {
        // empty search, count all
        count = t->countMatches( lowerSearchString );
        }
    else {
        // how many words?
        SimpleVector<char *> *words = tokenizeString( lowerSearchString );

        if( words->size() == 0 ) {
            // ignore whitespace, count everything
            count = t->countMatches( "" );
            }
        else if( words->size() == 1 ) {
            count = t->countMatches( *( words->getElement(0) ) );
            }
        else {
            // multi-word, take union of results

            SimpleVector< resourceRecord *> *unionMatches =
                getUnionOfWordMatches( words, inResourceType );

            // union now contains only element that match every word
            count = unionMatches->size();

            delete unionMatches;
            }

        for( int i=0; i<words->size(); i++ ) {
            delete [] *( words->getElement( i ) );
            }
        delete words;
        }


    delete [] lowerSearchString;

    /*
    printf( "Counting %d results took %d ms\n",
            count, (int)( 1000 * (Time::getCurrentTime() - start ) ) );
    */

    return count;
    }


// caller allocates spaces for inNumToGet and passes pointer as outIDs
int getSearchResults(  const char *inResourceType,
                       const char *inSearchString,
                       int inNumToSkip,
                       int inNumToGet,
                       uniqueID *outIDs ) {

    //double start = Time::getCurrentTime();

    int numGotten = 0;

    // New code:  string trees
    StringTree *t = getTreeForType( (char*)inResourceType );

    resourceRecord **values = new resourceRecord *[inNumToGet];

    // lower-case in tree
    char *lowerSearchString = stringToLowerCase( inSearchString );


    if( lowerSearchString == '\0' ) {
        // empty search, consider everything
        numGotten = t->getMatches( lowerSearchString, inNumToSkip, inNumToGet,
                                   (void **)values );
        }
    else {
        // how many words?
        SimpleVector<char *> *words = tokenizeString( lowerSearchString );

        if( words->size() == 0 ) {
            // ignore spaces, show everything
            numGotten = t->getMatches( "", inNumToSkip, inNumToGet,
                                       (void **)values );
            }
        else if( words->size() == 1 ) {
            // just this word (no whitespace
            numGotten = t->getMatches( *( words->getElement(0) ),
                                       inNumToSkip, inNumToGet,
                                       (void **)values );
            }
        else {
            // take union of results from multiple words, THEN
            // apply skip and limit
            SimpleVector< resourceRecord *> *unionMatches =
                getUnionOfWordMatches( words, inResourceType );

            int count = unionMatches->size();

            numGotten = 0;

            for( int i=0; i<inNumToGet; i++ ) {
                if( i + inNumToSkip < count ) {
                    values[i] =
                        *( unionMatches->getElement( i + inNumToSkip ) );

                    numGotten ++;
                    }
                }

            delete unionMatches;
            }

        for( int i=0; i<words->size(); i++ ) {
            delete [] *( words->getElement( i ) );
            }
        delete words;
        }

    delete [] lowerSearchString;

    for( int i=0; i<numGotten; i++ ) {
        outIDs[i] = values[i]->id;
        }
    delete [] values;

    /*
    printf( "Gettingresults took %d ms\n",
            (int)( 1000 * (Time::getCurrentTime() - start ) ) );
    */

    return numGotten;
    }


char *getResourceName( uniqueID inID ) {
    resourceRecord *r = hashLookup( inID );

    if( r != NULL ) {
        return r->wordString;
        }
    else {
        return NULL;
        }
    }