A C++ Class for Editing Large Files

Spring 1997, Dr. Dobb's Journal

by Vincent Van Den Berghe


Listing One

#include <stddef.h>
#include <io.h>
#include <errno.h>

#include "fildelta.h"
#include "editable.h"
#include "sendelta.h"

// public functions /////////////////////////////////////////////////////////

/* Constructor */
TEditableFile::TEditableFile(int aHFile)
    :hFile(aHFile),deltaList(new TSentinelDelta),currentPos(0)
    {
    if(deltaList==NULL) return;
    cancelChanges();    // cancelChanges will build the original delta list
    }

/* Destructor */
TEditableFile::~TEditableFile()
    {
    flush();
    delete deltaList;
    }


/* Did constructor fail? */
int TEditableFile::failed() const
    {
    return deltaList==NULL || (filelength(hFile)>0 && 
                                                 deltaList->next==deltaList);
    }
/* Roll back to original file data, or file data when last flush occurred */
int TEditableFile::cancelChanges()
    {
    deleteDeltas();
    if(filelength(hFile)!=0) {    // if this is a non-zero-length file, 
                                  //                      insert a file delta
        TDelta *c=new TFileDelta(hFile,0,filelength(hFile));
        if(c==NULL) return 0;
    c->insertBefore(deltaList);
        }
    return 1;
    }
/* Does position 'pos' hold original or modified data?
   *endPos will contain the last position with the same status.
*/
int TEditableFile::isUnmodified(long pos,long *endPos) const
    {
    long originalPos=pos;
    TDelta *c=getDelta(&pos);
    int status=c->unmodified();
    if(endPos!=NULL) {
        *endPos=originalPos-pos;
        while((c!=deltaList) &&
          ((status && c->unmodified()) || (!status && !c->unmodified()))) {
            *endPos+=c->size();
            c=c->next;
            }
        --*endPos;
        }
    return status;
    }
/* total size in bytes of the edited file */
long TEditableFile::size() const
    {
    long deltaSize=0;
    TDelta *c=deltaList->next;
    while(c!=deltaList) {
        deltaSize+=c->size();
        c=c->next;
        }
    return deltaSize;
    }
/* same as lseek(): set the new current file position */
long TEditableFile::seek(long offset,int fromwhere)
    {
    switch(fromwhere) {
        case 0: currentPos=offset; break; // SEEK_SET
        case 1: currentPos+=offset; break; // SEEK_CUR
        case 2: currentPos=size()-offset; break; // SEEK_END
        }
    return currentPos;
/* The current position in bytes measured from the beginning of the file. */
long TEditableFile::tell() const
    {
    return currentPos;

/* Are we at end of file? */
int TEditableFile::eof() const
    {
    return tell()==size();
    }
/* Insert 'length' bytes from buffer at position 'pos' in the file. */
size_t TEditableFile::insert(const void *buffer,size_t length)
    {
    long pos=currentPos;
    TDelta *c=getDelta(&pos);
    return c->insert(pos,buffer,length);
    }
/* Write 'length' bytes from buffer at position 'pos' in the file. */
size_t TEditableFile::write(const void *buffer,size_t length)
    {
    long pos=currentPos;
    TDelta *c=getDelta(&pos);
    size_t bytesWritten=c->write(pos,buffer,length);
    currentPos+=bytesWritten;
    return bytesWritten;
    }
/* Read 'length' bytes position 'pos' in the file into buffer. */
size_t TEditableFile::read(void *buffer,size_t length)
    {
    long pos=currentPos;
    TDelta *c=getDelta(&pos);
    size_t bytesRead=c->read(pos,buffer,length);
    currentPos+=bytesRead;
    return bytesRead;
    }
/* Remove 'length' bytes from position 'pos'. */
size_t TEditableFile::remove(size_t length)
    {
    long pos=currentPos;
    TDelta *c=getDelta(&pos);
    return c->remove(pos,length);
    }
/* Flush the current file (commit all changes). */
void TEditableFile::flush()
    {
    flush(hFile);
    }
void TEditableFile::flush(int aHFile)
    {
    TDelta *c=deltaList->prev;  // flush all file deltas that moved up, 
                                //                            in reverse order
    long offset=0;
    while(c!=deltaList) {
        if(c->unmodified())
            c->flush(aHFile,offset,1);
        offset+=c->size();
        c=c->prev;
        }
    c=deltaList->next;    // flush all file deltas that were 
                          //                         moved down, sequentially
    offset=0;
    while(c!=deltaList) {
        if(c->unmodified())
            c->flush(aHFile,offset,0);
        offset+=c->size();
        c=c->next;
        }
    c=deltaList->next;    // move all other (edit) deltas
    offset=0;
    while(c!=deltaList) {
        if(!c->unmodified())
            c->flush(aHFile,offset,-1);
        offset+=c->size();
        c=c->next;
    // if we were writing on the original file, cancel all edits (since they
    // were written) and start with a new deltaList
    if(aHFile==hFile) cancelChanges();
    }
// private functions ////////////////////////////////////////////////////////
/* Find the delta containing file position 'pos'. */
TDelta *TEditableFile::getDelta(long *pos) const
{
    TDelta *c=deltaList->next;
    while(c!=deltaList && *pos>=c->size()) {     // find delta containing pos
        *pos-=c->size();
        c=c->next;
    }
    return c;
}
/* Delete the delta list, i.e. only one sentinel delta remains. */
void TEditableFile::deleteDeltas()
{
    TDelta *c=deltaList->next;
    while(c!=deltaList) {
        TDelta *old=c;
        c=c->next;
        delete old;
    }
}


Listing Two

#include <stddef.h>
#include <assert.h>
#include <io.h>

#include "fildelta.h"
#include "edtdelta.h"

/* Constructor. */
TFileDelta::TFileDelta(int aHFile,long aStartPos,long aSize)
    :TDelta(),hFile(aHFile),startPos(aStartPos),byteSize(aSize)
    {
    }
/* Return false, since TFileDelta constructor always succeeds. */
int TFileDelta::failed() const
    {
    return 0;
    }
/* Return size of this delta in bytes. */
long TFileDelta::size() const
    {
    return byteSize;
    }
/* File deltas have no bytes left by definition (the data is not editable). */
size_t TFileDelta::bytesLeft() const
    {
    return 0;
    }
/* Return 1, since a file delta always contains unmodified file data. */
int TFileDelta::unmodified() const
    {
    return 1;
    }
/* Insert 'length' bytes from 'buffer' at 'offset' within this delta. */
size_t TFileDelta::insert(long offset,const void *buffer,size_t length)
    {
    if(offset==0) { // Inserting at start of this delta
        size_t toCopy;
        if((toCopy=prev->bytesLeft())!=0) { // Can previous delta absorb some?
            if(toCopy>length) toCopy=length;
            prev->insert(prev->size(),buffer,toCopy);
            buffer=(const char *)buffer+toCopy;
            length-=toCopy;
            if(length==0) return toCopy;
            }
        // create a new edit delta, holding what's left to insert
        TDelta *c=TEditDelta::createNew(buffer,length);
        if(c==NULL) return toCopy;  // if creation failed, 
                                    //             return what we could insert
        c->insertBefore(this);
        return toCopy+length;
        }
    else {
        TDelta *c;
        if(offset==byteSize)  // insertion at end: pass request on 
                              //                              to next delta
            c=next;
        else { // split this file delta in 2 non-empty file deltas
            assert(offset<byteSize);
            c=new TFileDelta(hFile,startPos+offset,byteSize-offset);
            if(c==NULL) return 0;
            byteSize=offset;
            c->insertAfter(this);
            }
        c->insert(0,buffer,length);
        return length;
        }
    }
/* Write 'length' bytes from 'buffer' at 'offset' within this delta. */
size_t TFileDelta::write(long offset,const void *buffer,size_t length)
    {
    size_t toWrite;    // toWrite will be the max # of bytes 
                       //                           that can be overwritten
    if(byteSize-offset>length) toWrite=length;
    else toWrite=(size_t)(byteSize-offset);
    TDelta *c=TEditDelta::createNew(buffer,toWrite);
    if(c==NULL) return 0;    // if edit delta creation failed, 
                             //                        we're out of luck
    if(toWrite==byteSize) {
        c->next=next;   // the entire file delta is overwritten,
        c->prev=prev;   //  so it has to disappear
        next->prev=c;   // The newly created edit delta 'c' will replace it.
        prev->next=c;
        delete this;
        }
    else if(offset==0) { // the beginning of the file delta is overwritten, the
        c->insertBefore(this); // edit delta is inserted before "this" delta
        startPos+=toWrite;     // adjust position and size of what remains 
        byteSize-=toWrite;     // of this file delta
        }
    else if(offset+toWrite==byteSize) {   // the end of the file delta is 
        byteSize=offset;                  //    overwritten, the edit delta is 
                                          //    inserted after "this" delta
        c->insertAfter(this);             // adjust size of what remains of 
                                          //    this file delta
        }
    else {          // split the file delta.
        assert(offset+toWrite<byteSize);
        TDelta *f=new TFileDelta(hFile,startPos+offset+toWrite,
                                                   byteSize-(offset+toWrite));
        if(f==NULL) { delete c;  return 0; }
        f->insertAfter(this);
        c->insertAfter(this);
        }
    if(length!=toWrite) {  // if something remains, write at the end 
                           //          of the new delta
        assert(length>toWrite);
        return toWrite+c->write(toWrite,(char *)buffer+toWrite,length-toWrite);
        }
    return toWrite;
    }
/* Read 'length' bytes at 'offset' within this delta to 'buffer'. */
size_t TFileDelta::read(long offset,void *buffer,size_t length)
    {
    size_t toRead;
    if(byteSize-offset>length) toRead=length;
    else      toRead=(size_t)(byteSize-offset);
    lseek(hFile,startPos+offset,SEEK_SET);
    ::read(hFile,buffer,toRead);
    if(length!=toRead) {
        assert(length>toRead);
        return toRead+next->read(0,(char *)buffer+toRead,length-toRead);
        }
    return toRead;
    }
/* Remove 'length' bytes at 'offset'. */
size_t TFileDelta::remove(long offset,size_t length)
    {
    if(byteSize==0) return 0;  // zero-size delta means "nothing to remove"
    assert(offset<=byteSize);
    // compute number of bytes to remove. Cannot remove more than size of delta
    size_t toRemove;
    if(byteSize-offset>length)  toRemove=length;
    else    toRemove=(size_t)(byteSize-offset);
    length-=toRemove;    // adjust delta size and total bytes left to remove
    byteSize-=toRemove;
    if(length!=0) toRemove+=next->remove(0,length); // Remove remainder 
                                                    //   from the next delta
    if(byteSize==0) {     // if the size of this delta dropped to 0, we have 
        prev->next=next;  //   to remove it from the chain.
        next->prev=prev;
        delete this;
        }
    return toRemove;
    }
/* Flush the delta data to aHFile. */
int TFileDelta::flush(int aHFile,long offset,int up)
    {
    // if delta did not move in the same file
    if(hFile==aHFile && startPos==offset) return 1; 
    // check for proper direction request
    if(!((up && offset<=startPos) || (!up && offset>startPos)))  return 1;
    const size_t MAX_TRANSFER=65500u; // Maximum size of read/write operation
    size_t bufferSize=(MAX_TRANSFER<byteSize)?MAX_TRANSFER:(size_t)byteSize;

    char *b;
    while((b=new char[bufferSize])==NULL && b>0)
        bufferSize/=2;
    if(b==NULL) return 0;
    long toCopy=byteSize;
    if(offset<=startPos) { // Move data down in file
        lseek(hFile,startPos,SEEK_SET); // Where to copy from
        lseek(aHFile,offset,SEEK_SET); // Where to copy to

        while(toCopy!=0) {
            size_t transferred;
            if(toCopy<=bufferSize)  transferred=(size_t)toCopy;
            else   transferred=bufferSize;
            ::read(hFile,b,transferred);
            ::write(aHFile,b,transferred);
            toCopy-=transferred;
            }
        }
    else { // Move data up in file
        while(toCopy!=0) {
            size_t transferred;
            if(toCopy<=bufferSize) transferred=(size_t)toCopy;
            else   transferred=bufferSize;
            toCopy-=transferred;
            lseek(hFile,startPos+toCopy,SEEK_SET);
            ::read(hFile,b,transferred);
            lseek(aHFile,offset+toCopy,SEEK_SET);
            ::write(aHFile,b,transferred);
            }
        }
    delete [] b;
    return 1;
    }

End Listings