A C++ Class for Editing Large Files
Spring 1997, Dr. Dobb's Journal
by Vincent Van Den Berghe
#include <stddef.h>
#include <io.h>
#include <errno.h>
#include "fildelta.h"
#include "editable.h"
#include "sendelta.h"
// public functions /////////////////////////////////////////////////////////
/* Constructor */
TEditableFile::TEditableFile(int aHFile)
:hFile(aHFile),deltaList(new TSentinelDelta),currentPos(0)
{
if(deltaList==NULL) return;
cancelChanges(); // cancelChanges will build the original delta list
}
/* Destructor */
TEditableFile::~TEditableFile()
{
flush();
delete deltaList;
}
/* Did constructor fail? */
int TEditableFile::failed() const
{
return deltaList==NULL || (filelength(hFile)>0 &&
deltaList->next==deltaList);
}
/* Roll back to original file data, or file data when last flush occurred */
int TEditableFile::cancelChanges()
{
deleteDeltas();
if(filelength(hFile)!=0) { // if this is a non-zero-length file,
// insert a file delta
TDelta *c=new TFileDelta(hFile,0,filelength(hFile));
if(c==NULL) return 0;
c->insertBefore(deltaList);
}
return 1;
}
/* Does position 'pos' hold original or modified data?
*endPos will contain the last position with the same status.
*/
int TEditableFile::isUnmodified(long pos,long *endPos) const
{
long originalPos=pos;
TDelta *c=getDelta(&pos);
int status=c->unmodified();
if(endPos!=NULL) {
*endPos=originalPos-pos;
while((c!=deltaList) &&
((status && c->unmodified()) || (!status && !c->unmodified()))) {
*endPos+=c->size();
c=c->next;
}
--*endPos;
}
return status;
}
/* total size in bytes of the edited file */
long TEditableFile::size() const
{
long deltaSize=0;
TDelta *c=deltaList->next;
while(c!=deltaList) {
deltaSize+=c->size();
c=c->next;
}
return deltaSize;
}
/* same as lseek(): set the new current file position */
long TEditableFile::seek(long offset,int fromwhere)
{
switch(fromwhere) {
case 0: currentPos=offset; break; // SEEK_SET
case 1: currentPos+=offset; break; // SEEK_CUR
case 2: currentPos=size()-offset; break; // SEEK_END
}
return currentPos;
/* The current position in bytes measured from the beginning of the file. */
long TEditableFile::tell() const
{
return currentPos;
/* Are we at end of file? */
int TEditableFile::eof() const
{
return tell()==size();
}
/* Insert 'length' bytes from buffer at position 'pos' in the file. */
size_t TEditableFile::insert(const void *buffer,size_t length)
{
long pos=currentPos;
TDelta *c=getDelta(&pos);
return c->insert(pos,buffer,length);
}
/* Write 'length' bytes from buffer at position 'pos' in the file. */
size_t TEditableFile::write(const void *buffer,size_t length)
{
long pos=currentPos;
TDelta *c=getDelta(&pos);
size_t bytesWritten=c->write(pos,buffer,length);
currentPos+=bytesWritten;
return bytesWritten;
}
/* Read 'length' bytes position 'pos' in the file into buffer. */
size_t TEditableFile::read(void *buffer,size_t length)
{
long pos=currentPos;
TDelta *c=getDelta(&pos);
size_t bytesRead=c->read(pos,buffer,length);
currentPos+=bytesRead;
return bytesRead;
}
/* Remove 'length' bytes from position 'pos'. */
size_t TEditableFile::remove(size_t length)
{
long pos=currentPos;
TDelta *c=getDelta(&pos);
return c->remove(pos,length);
}
/* Flush the current file (commit all changes). */
void TEditableFile::flush()
{
flush(hFile);
}
void TEditableFile::flush(int aHFile)
{
TDelta *c=deltaList->prev; // flush all file deltas that moved up,
// in reverse order
long offset=0;
while(c!=deltaList) {
if(c->unmodified())
c->flush(aHFile,offset,1);
offset+=c->size();
c=c->prev;
}
c=deltaList->next; // flush all file deltas that were
// moved down, sequentially
offset=0;
while(c!=deltaList) {
if(c->unmodified())
c->flush(aHFile,offset,0);
offset+=c->size();
c=c->next;
}
c=deltaList->next; // move all other (edit) deltas
offset=0;
while(c!=deltaList) {
if(!c->unmodified())
c->flush(aHFile,offset,-1);
offset+=c->size();
c=c->next;
// if we were writing on the original file, cancel all edits (since they
// were written) and start with a new deltaList
if(aHFile==hFile) cancelChanges();
}
// private functions ////////////////////////////////////////////////////////
/* Find the delta containing file position 'pos'. */
TDelta *TEditableFile::getDelta(long *pos) const
{
TDelta *c=deltaList->next;
while(c!=deltaList && *pos>=c->size()) { // find delta containing pos
*pos-=c->size();
c=c->next;
}
return c;
}
/* Delete the delta list, i.e. only one sentinel delta remains. */
void TEditableFile::deleteDeltas()
{
TDelta *c=deltaList->next;
while(c!=deltaList) {
TDelta *old=c;
c=c->next;
delete old;
}
}
#include <stddef.h>
#include <assert.h>
#include <io.h>
#include "fildelta.h"
#include "edtdelta.h"
/* Constructor. */
TFileDelta::TFileDelta(int aHFile,long aStartPos,long aSize)
:TDelta(),hFile(aHFile),startPos(aStartPos),byteSize(aSize)
{
}
/* Return false, since TFileDelta constructor always succeeds. */
int TFileDelta::failed() const
{
return 0;
}
/* Return size of this delta in bytes. */
long TFileDelta::size() const
{
return byteSize;
}
/* File deltas have no bytes left by definition (the data is not editable). */
size_t TFileDelta::bytesLeft() const
{
return 0;
}
/* Return 1, since a file delta always contains unmodified file data. */
int TFileDelta::unmodified() const
{
return 1;
}
/* Insert 'length' bytes from 'buffer' at 'offset' within this delta. */
size_t TFileDelta::insert(long offset,const void *buffer,size_t length)
{
if(offset==0) { // Inserting at start of this delta
size_t toCopy;
if((toCopy=prev->bytesLeft())!=0) { // Can previous delta absorb some?
if(toCopy>length) toCopy=length;
prev->insert(prev->size(),buffer,toCopy);
buffer=(const char *)buffer+toCopy;
length-=toCopy;
if(length==0) return toCopy;
}
// create a new edit delta, holding what's left to insert
TDelta *c=TEditDelta::createNew(buffer,length);
if(c==NULL) return toCopy; // if creation failed,
// return what we could insert
c->insertBefore(this);
return toCopy+length;
}
else {
TDelta *c;
if(offset==byteSize) // insertion at end: pass request on
// to next delta
c=next;
else { // split this file delta in 2 non-empty file deltas
assert(offset<byteSize);
c=new TFileDelta(hFile,startPos+offset,byteSize-offset);
if(c==NULL) return 0;
byteSize=offset;
c->insertAfter(this);
}
c->insert(0,buffer,length);
return length;
}
}
/* Write 'length' bytes from 'buffer' at 'offset' within this delta. */
size_t TFileDelta::write(long offset,const void *buffer,size_t length)
{
size_t toWrite; // toWrite will be the max # of bytes
// that can be overwritten
if(byteSize-offset>length) toWrite=length;
else toWrite=(size_t)(byteSize-offset);
TDelta *c=TEditDelta::createNew(buffer,toWrite);
if(c==NULL) return 0; // if edit delta creation failed,
// we're out of luck
if(toWrite==byteSize) {
c->next=next; // the entire file delta is overwritten,
c->prev=prev; // so it has to disappear
next->prev=c; // The newly created edit delta 'c' will replace it.
prev->next=c;
delete this;
}
else if(offset==0) { // the beginning of the file delta is overwritten, the
c->insertBefore(this); // edit delta is inserted before "this" delta
startPos+=toWrite; // adjust position and size of what remains
byteSize-=toWrite; // of this file delta
}
else if(offset+toWrite==byteSize) { // the end of the file delta is
byteSize=offset; // overwritten, the edit delta is
// inserted after "this" delta
c->insertAfter(this); // adjust size of what remains of
// this file delta
}
else { // split the file delta.
assert(offset+toWrite<byteSize);
TDelta *f=new TFileDelta(hFile,startPos+offset+toWrite,
byteSize-(offset+toWrite));
if(f==NULL) { delete c; return 0; }
f->insertAfter(this);
c->insertAfter(this);
}
if(length!=toWrite) { // if something remains, write at the end
// of the new delta
assert(length>toWrite);
return toWrite+c->write(toWrite,(char *)buffer+toWrite,length-toWrite);
}
return toWrite;
}
/* Read 'length' bytes at 'offset' within this delta to 'buffer'. */
size_t TFileDelta::read(long offset,void *buffer,size_t length)
{
size_t toRead;
if(byteSize-offset>length) toRead=length;
else toRead=(size_t)(byteSize-offset);
lseek(hFile,startPos+offset,SEEK_SET);
::read(hFile,buffer,toRead);
if(length!=toRead) {
assert(length>toRead);
return toRead+next->read(0,(char *)buffer+toRead,length-toRead);
}
return toRead;
}
/* Remove 'length' bytes at 'offset'. */
size_t TFileDelta::remove(long offset,size_t length)
{
if(byteSize==0) return 0; // zero-size delta means "nothing to remove"
assert(offset<=byteSize);
// compute number of bytes to remove. Cannot remove more than size of delta
size_t toRemove;
if(byteSize-offset>length) toRemove=length;
else toRemove=(size_t)(byteSize-offset);
length-=toRemove; // adjust delta size and total bytes left to remove
byteSize-=toRemove;
if(length!=0) toRemove+=next->remove(0,length); // Remove remainder
// from the next delta
if(byteSize==0) { // if the size of this delta dropped to 0, we have
prev->next=next; // to remove it from the chain.
next->prev=prev;
delete this;
}
return toRemove;
}
/* Flush the delta data to aHFile. */
int TFileDelta::flush(int aHFile,long offset,int up)
{
// if delta did not move in the same file
if(hFile==aHFile && startPos==offset) return 1;
// check for proper direction request
if(!((up && offset<=startPos) || (!up && offset>startPos))) return 1;
const size_t MAX_TRANSFER=65500u; // Maximum size of read/write operation
size_t bufferSize=(MAX_TRANSFER<byteSize)?MAX_TRANSFER:(size_t)byteSize;
char *b;
while((b=new char[bufferSize])==NULL && b>0)
bufferSize/=2;
if(b==NULL) return 0;
long toCopy=byteSize;
if(offset<=startPos) { // Move data down in file
lseek(hFile,startPos,SEEK_SET); // Where to copy from
lseek(aHFile,offset,SEEK_SET); // Where to copy to
while(toCopy!=0) {
size_t transferred;
if(toCopy<=bufferSize) transferred=(size_t)toCopy;
else transferred=bufferSize;
::read(hFile,b,transferred);
::write(aHFile,b,transferred);
toCopy-=transferred;
}
}
else { // Move data up in file
while(toCopy!=0) {
size_t transferred;
if(toCopy<=bufferSize) transferred=(size_t)toCopy;
else transferred=bufferSize;
toCopy-=transferred;
lseek(hFile,startPos+toCopy,SEEK_SET);
::read(hFile,b,transferred);
lseek(aHFile,offset+toCopy,SEEK_SET);
::write(aHFile,b,transferred);
}
}
delete [] b;
return 1;
}
End Listings