Back to index

lightning-sunbird  0.9+nobinonly
pager.c
Go to the documentation of this file.
00001 /*
00002 ** 2001 September 15
00003 **
00004 ** The author disclaims copyright to this source code.  In place of
00005 ** a legal notice, here is a blessing:
00006 **
00007 **    May you do good and not evil.
00008 **    May you find forgiveness for yourself and forgive others.
00009 **    May you share freely, never taking more than you give.
00010 **
00011 *************************************************************************
00012 ** This is the implementation of the page cache subsystem or "pager".
00013 ** 
00014 ** The pager is used to access a database disk file.  It implements
00015 ** atomic commit and rollback through the use of a journal file that
00016 ** is separate from the database file.  The pager also implements file
00017 ** locking to prevent two processes from writing the same database
00018 ** file simultaneously, or one process from reading the database while
00019 ** another is writing.
00020 **
00021 ** @(#) $Id: pager.c,v 1.268 2006/05/07 17:49:39 drh Exp $
00022 */
00023 #ifndef SQLITE_OMIT_DISKIO
00024 #include "sqliteInt.h"
00025 #include "os.h"
00026 #include "pager.h"
00027 #include <assert.h>
00028 #include <string.h>
00029 
00030 /*
00031 ** Macros for troubleshooting.  Normally turned off
00032 */
00033 #if 0
00034 #define TRACE1(X)       sqlite3DebugPrintf(X)
00035 #define TRACE2(X,Y)     sqlite3DebugPrintf(X,Y)
00036 #define TRACE3(X,Y,Z)   sqlite3DebugPrintf(X,Y,Z)
00037 #define TRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)
00038 #define TRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)
00039 #else
00040 #define TRACE1(X)
00041 #define TRACE2(X,Y)
00042 #define TRACE3(X,Y,Z)
00043 #define TRACE4(X,Y,Z,W)
00044 #define TRACE5(X,Y,Z,W,V)
00045 #endif
00046 
00047 /*
00048 ** The following two macros are used within the TRACEX() macros above
00049 ** to print out file-descriptors. 
00050 **
00051 ** PAGERID() takes a pointer to a Pager struct as it's argument. The
00052 ** associated file-descriptor is returned. FILEHANDLEID() takes an OsFile
00053 ** struct as it's argument.
00054 */
00055 #define PAGERID(p) ((int)(p->fd))
00056 #define FILEHANDLEID(fd) ((int)fd)
00057 
00058 /*
00059 ** The page cache as a whole is always in one of the following
00060 ** states:
00061 **
00062 **   PAGER_UNLOCK        The page cache is not currently reading or 
00063 **                       writing the database file.  There is no
00064 **                       data held in memory.  This is the initial
00065 **                       state.
00066 **
00067 **   PAGER_SHARED        The page cache is reading the database.
00068 **                       Writing is not permitted.  There can be
00069 **                       multiple readers accessing the same database
00070 **                       file at the same time.
00071 **
00072 **   PAGER_RESERVED      This process has reserved the database for writing
00073 **                       but has not yet made any changes.  Only one process
00074 **                       at a time can reserve the database.  The original
00075 **                       database file has not been modified so other
00076 **                       processes may still be reading the on-disk
00077 **                       database file.
00078 **
00079 **   PAGER_EXCLUSIVE     The page cache is writing the database.
00080 **                       Access is exclusive.  No other processes or
00081 **                       threads can be reading or writing while one
00082 **                       process is writing.
00083 **
00084 **   PAGER_SYNCED        The pager moves to this state from PAGER_EXCLUSIVE
00085 **                       after all dirty pages have been written to the
00086 **                       database file and the file has been synced to
00087 **                       disk. All that remains to do is to remove the
00088 **                       journal file and the transaction will be
00089 **                       committed.
00090 **
00091 ** The page cache comes up in PAGER_UNLOCK.  The first time a
00092 ** sqlite3pager_get() occurs, the state transitions to PAGER_SHARED.
00093 ** After all pages have been released using sqlite_page_unref(),
00094 ** the state transitions back to PAGER_UNLOCK.  The first time
00095 ** that sqlite3pager_write() is called, the state transitions to
00096 ** PAGER_RESERVED.  (Note that sqlite_page_write() can only be
00097 ** called on an outstanding page which means that the pager must
00098 ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
00099 ** The transition to PAGER_EXCLUSIVE occurs when before any changes
00100 ** are made to the database file.  After an sqlite3pager_rollback()
00101 ** or sqlite_pager_commit(), the state goes back to PAGER_SHARED.
00102 */
00103 #define PAGER_UNLOCK      0
00104 #define PAGER_SHARED      1   /* same as SHARED_LOCK */
00105 #define PAGER_RESERVED    2   /* same as RESERVED_LOCK */
00106 #define PAGER_EXCLUSIVE   4   /* same as EXCLUSIVE_LOCK */
00107 #define PAGER_SYNCED      5
00108 
00109 /*
00110 ** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,
00111 ** then failed attempts to get a reserved lock will invoke the busy callback.
00112 ** This is off by default.  To see why, consider the following scenario:
00113 ** 
00114 ** Suppose thread A already has a shared lock and wants a reserved lock.
00115 ** Thread B already has a reserved lock and wants an exclusive lock.  If
00116 ** both threads are using their busy callbacks, it might be a long time
00117 ** be for one of the threads give up and allows the other to proceed.
00118 ** But if the thread trying to get the reserved lock gives up quickly
00119 ** (if it never invokes its busy callback) then the contention will be
00120 ** resolved quickly.
00121 */
00122 #ifndef SQLITE_BUSY_RESERVED_LOCK
00123 # define SQLITE_BUSY_RESERVED_LOCK 0
00124 #endif
00125 
00126 /*
00127 ** This macro rounds values up so that if the value is an address it
00128 ** is guaranteed to be an address that is aligned to an 8-byte boundary.
00129 */
00130 #define FORCE_ALIGNMENT(X)   (((X)+7)&~7)
00131 
00132 /*
00133 ** Each in-memory image of a page begins with the following header.
00134 ** This header is only visible to this pager module.  The client
00135 ** code that calls pager sees only the data that follows the header.
00136 **
00137 ** Client code should call sqlite3pager_write() on a page prior to making
00138 ** any modifications to that page.  The first time sqlite3pager_write()
00139 ** is called, the original page contents are written into the rollback
00140 ** journal and PgHdr.inJournal and PgHdr.needSync are set.  Later, once
00141 ** the journal page has made it onto the disk surface, PgHdr.needSync
00142 ** is cleared.  The modified page cannot be written back into the original
00143 ** database file until the journal pages has been synced to disk and the
00144 ** PgHdr.needSync has been cleared.
00145 **
00146 ** The PgHdr.dirty flag is set when sqlite3pager_write() is called and
00147 ** is cleared again when the page content is written back to the original
00148 ** database file.
00149 */
00150 typedef struct PgHdr PgHdr;
00151 struct PgHdr {
00152   Pager *pPager;                 /* The pager to which this page belongs */
00153   Pgno pgno;                     /* The page number for this page */
00154   PgHdr *pNextHash, *pPrevHash;  /* Hash collision chain for PgHdr.pgno */
00155   PgHdr *pNextFree, *pPrevFree;  /* Freelist of pages where nRef==0 */
00156   PgHdr *pNextAll;               /* A list of all pages */
00157   PgHdr *pNextStmt, *pPrevStmt;  /* List of pages in the statement journal */
00158   u8 inJournal;                  /* TRUE if has been written to journal */
00159   u8 inStmt;                     /* TRUE if in the statement subjournal */
00160   u8 dirty;                      /* TRUE if we need to write back changes */
00161   u8 needSync;                   /* Sync journal before writing this page */
00162   u8 alwaysRollback;             /* Disable dont_rollback() for this page */
00163   short int nRef;                /* Number of users of this page */
00164   PgHdr *pDirty, *pPrevDirty;    /* Dirty pages sorted by PgHdr.pgno */
00165   u32 notUsed;                   /* Buffer space */
00166 #ifdef SQLITE_CHECK_PAGES
00167   u32 pageHash;
00168 #endif
00169   /* pPager->pageSize bytes of page data follow this header */
00170   /* Pager.nExtra bytes of local data follow the page data */
00171 };
00172 
00173 /*
00174 ** For an in-memory only database, some extra information is recorded about
00175 ** each page so that changes can be rolled back.  (Journal files are not
00176 ** used for in-memory databases.)  The following information is added to
00177 ** the end of every EXTRA block for in-memory databases.
00178 **
00179 ** This information could have been added directly to the PgHdr structure.
00180 ** But then it would take up an extra 8 bytes of storage on every PgHdr
00181 ** even for disk-based databases.  Splitting it out saves 8 bytes.  This
00182 ** is only a savings of 0.8% but those percentages add up.
00183 */
00184 typedef struct PgHistory PgHistory;
00185 struct PgHistory {
00186   u8 *pOrig;     /* Original page text.  Restore to this on a full rollback */
00187   u8 *pStmt;     /* Text as it was at the beginning of the current statement */
00188 };
00189 
00190 /*
00191 ** A macro used for invoking the codec if there is one
00192 */
00193 #ifdef SQLITE_HAS_CODEC
00194 # define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); }
00195 # define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D))
00196 #else
00197 # define CODEC1(P,D,N,X) /* NO-OP */
00198 # define CODEC2(P,D,N,X) ((char*)D)
00199 #endif
00200 
00201 /*
00202 ** Convert a pointer to a PgHdr into a pointer to its data
00203 ** and back again.
00204 */
00205 #define PGHDR_TO_DATA(P)  ((void*)(&(P)[1]))
00206 #define DATA_TO_PGHDR(D)  (&((PgHdr*)(D))[-1])
00207 #define PGHDR_TO_EXTRA(G,P) ((void*)&((char*)(&(G)[1]))[(P)->pageSize])
00208 #define PGHDR_TO_HIST(P,PGR)  \
00209             ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->pageSize+(PGR)->nExtra])
00210 
00211 /*
00212 ** A open page cache is an instance of the following structure.
00213 **
00214 ** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, SQLITE_PROTOCOL
00215 ** or SQLITE_FULL. Once one of the first three errors occurs, it persists
00216 ** and is returned as the result of every major pager API call.  The
00217 ** SQLITE_FULL return code is slightly different. It persists only until the
00218 ** next successful rollback is performed on the pager cache. Also,
00219 ** SQLITE_FULL does not affect the sqlite3pager_get() and sqlite3pager_lookup()
00220 ** APIs, they may still be used successfully.
00221 */
00222 struct Pager {
00223   u8 journalOpen;             /* True if journal file descriptors is valid */
00224   u8 journalStarted;          /* True if header of journal is synced */
00225   u8 useJournal;              /* Use a rollback journal on this file */
00226   u8 noReadlock;              /* Do not bother to obtain readlocks */
00227   u8 stmtOpen;                /* True if the statement subjournal is open */
00228   u8 stmtInUse;               /* True we are in a statement subtransaction */
00229   u8 stmtAutoopen;            /* Open stmt journal when main journal is opened*/
00230   u8 noSync;                  /* Do not sync the journal if true */
00231   u8 fullSync;                /* Do extra syncs of the journal for robustness */
00232   u8 full_fsync;              /* Use F_FULLFSYNC when available */
00233   u8 state;                   /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
00234   u8 errCode;                 /* One of several kinds of errors */
00235   u8 tempFile;                /* zFilename is a temporary file */
00236   u8 readOnly;                /* True for a read-only database */
00237   u8 needSync;                /* True if an fsync() is needed on the journal */
00238   u8 dirtyCache;              /* True if cached pages have changed */
00239   u8 alwaysRollback;          /* Disable dont_rollback() for all pages */
00240   u8 memDb;                   /* True to inhibit all file I/O */
00241   u8 setMaster;               /* True if a m-j name has been written to jrnl */
00242   int dbSize;                 /* Number of pages in the file */
00243   int origDbSize;             /* dbSize before the current change */
00244   int stmtSize;               /* Size of database (in pages) at stmt_begin() */
00245   int nRec;                   /* Number of pages written to the journal */
00246   u32 cksumInit;              /* Quasi-random value added to every checksum */
00247   int stmtNRec;               /* Number of records in stmt subjournal */
00248   int nExtra;                 /* Add this many bytes to each in-memory page */
00249   int pageSize;               /* Number of bytes in a page */
00250   int nPage;                  /* Total number of in-memory pages */
00251   int nMaxPage;               /* High water mark of nPage */
00252   int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
00253   int mxPage;                 /* Maximum number of pages to hold in cache */
00254   u8 *aInJournal;             /* One bit for each page in the database file */
00255   u8 *aInStmt;                /* One bit for each page in the database */
00256   char *zFilename;            /* Name of the database file */
00257   char *zJournal;             /* Name of the journal file */
00258   char *zDirectory;           /* Directory hold database and journal files */
00259   OsFile *fd, *jfd;           /* File descriptors for database and journal */
00260   OsFile *stfd;               /* File descriptor for the statement subjournal*/
00261   BusyHandler *pBusyHandler;  /* Pointer to sqlite.busyHandler */
00262   PgHdr *pFirst, *pLast;      /* List of free pages */
00263   PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */
00264   PgHdr *pAll;                /* List of all pages */
00265   PgHdr *pStmt;               /* List of pages in the statement subjournal */
00266   PgHdr *pDirty;              /* List of all dirty pages */
00267   i64 journalOff;             /* Current byte offset in the journal file */
00268   i64 journalHdr;             /* Byte offset to previous journal header */
00269   i64 stmtHdrOff;             /* First journal header written this statement */
00270   i64 stmtCksum;              /* cksumInit when statement was started */
00271   i64 stmtJSize;              /* Size of journal at stmt_begin() */
00272   int sectorSize;             /* Assumed sector size during rollback */
00273 #ifdef SQLITE_TEST
00274   int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
00275   int nRead,nWrite;           /* Database pages read/written */
00276 #endif
00277   void (*xDestructor)(void*,int); /* Call this routine when freeing pages */
00278   void (*xReiniter)(void*,int);   /* Call this routine when reloading pages */
00279   void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
00280   void *pCodecArg;            /* First argument to xCodec() */
00281   int nHash;                  /* Size of the pager hash table */
00282   PgHdr **aHash;              /* Hash table to map page number to PgHdr */
00283 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
00284   Pager *pNext;               /* Linked list of pagers in this thread */
00285 #endif
00286 };
00287 
00288 /*
00289 ** If SQLITE_TEST is defined then increment the variable given in
00290 ** the argument
00291 */
00292 #ifdef SQLITE_TEST
00293 # define TEST_INCR(x)  x++
00294 #else
00295 # define TEST_INCR(x)
00296 #endif
00297 
00298 /*
00299 ** Journal files begin with the following magic string.  The data
00300 ** was obtained from /dev/random.  It is used only as a sanity check.
00301 **
00302 ** Since version 2.8.0, the journal format contains additional sanity
00303 ** checking information.  If the power fails while the journal is begin
00304 ** written, semi-random garbage data might appear in the journal
00305 ** file after power is restored.  If an attempt is then made
00306 ** to roll the journal back, the database could be corrupted.  The additional
00307 ** sanity checking data is an attempt to discover the garbage in the
00308 ** journal and ignore it.
00309 **
00310 ** The sanity checking information for the new journal format consists
00311 ** of a 32-bit checksum on each page of data.  The checksum covers both
00312 ** the page number and the pPager->pageSize bytes of data for the page.
00313 ** This cksum is initialized to a 32-bit random value that appears in the
00314 ** journal file right after the header.  The random initializer is important,
00315 ** because garbage data that appears at the end of a journal is likely
00316 ** data that was once in other files that have now been deleted.  If the
00317 ** garbage data came from an obsolete journal file, the checksums might
00318 ** be correct.  But by initializing the checksum to random value which
00319 ** is different for every journal, we minimize that risk.
00320 */
00321 static const unsigned char aJournalMagic[] = {
00322   0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
00323 };
00324 
00325 /*
00326 ** The size of the header and of each page in the journal is determined
00327 ** by the following macros.
00328 */
00329 #define JOURNAL_PG_SZ(pPager)  ((pPager->pageSize) + 8)
00330 
00331 /*
00332 ** The journal header size for this pager. In the future, this could be
00333 ** set to some value read from the disk controller. The important
00334 ** characteristic is that it is the same size as a disk sector.
00335 */
00336 #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
00337 
00338 /*
00339 ** The macro MEMDB is true if we are dealing with an in-memory database.
00340 ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
00341 ** the value of MEMDB will be a constant and the compiler will optimize
00342 ** out code that would never execute.
00343 */
00344 #ifdef SQLITE_OMIT_MEMORYDB
00345 # define MEMDB 0
00346 #else
00347 # define MEMDB pPager->memDb
00348 #endif
00349 
00350 /*
00351 ** The default size of a disk sector
00352 */
00353 #define PAGER_SECTOR_SIZE 512
00354 
00355 /*
00356 ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
00357 ** reserved for working around a windows/posix incompatibility). It is
00358 ** used in the journal to signify that the remainder of the journal file 
00359 ** is devoted to storing a master journal name - there are no more pages to
00360 ** roll back. See comments for function writeMasterJournal() for details.
00361 */
00362 /* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */
00363 #define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1)
00364 
00365 /*
00366 ** The maximum legal page number is (2^31 - 1).
00367 */
00368 #define PAGER_MAX_PGNO 2147483647
00369 
00370 /*
00371 ** Enable reference count tracking (for debugging) here:
00372 */
00373 #ifdef SQLITE_DEBUG
00374   int pager3_refinfo_enable = 0;
00375   static void pager_refinfo(PgHdr *p){
00376     static int cnt = 0;
00377     if( !pager3_refinfo_enable ) return;
00378     sqlite3DebugPrintf(
00379        "REFCNT: %4d addr=%p nRef=%d\n",
00380        p->pgno, PGHDR_TO_DATA(p), p->nRef
00381     );
00382     cnt++;   /* Something to set a breakpoint on */
00383   }
00384 # define REFINFO(X)  pager_refinfo(X)
00385 #else
00386 # define REFINFO(X)
00387 #endif
00388 
00389 
00390 /*
00391 ** Change the size of the pager hash table to N.  N must be a power
00392 ** of two.
00393 */
00394 static void pager_resize_hash_table(Pager *pPager, int N){
00395   PgHdr **aHash, *pPg;
00396   assert( N>0 && (N&(N-1))==0 );
00397   aHash = sqliteMalloc( sizeof(aHash[0])*N );
00398   if( aHash==0 ){
00399     /* Failure to rehash is not an error.  It is only a performance hit. */
00400     return;
00401   }
00402   sqliteFree(pPager->aHash);
00403   pPager->nHash = N;
00404   pPager->aHash = aHash;
00405   for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
00406     int h = pPg->pgno & (N-1);
00407     pPg->pNextHash = aHash[h];
00408     if( aHash[h] ){
00409       aHash[h]->pPrevHash = pPg;
00410     }
00411     aHash[h] = pPg;
00412     pPg->pPrevHash = 0;
00413   }
00414 }
00415 
00416 /*
00417 ** Read a 32-bit integer from the given file descriptor.  Store the integer
00418 ** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
00419 ** error code is something goes wrong.
00420 **
00421 ** All values are stored on disk as big-endian.
00422 */
00423 static int read32bits(OsFile *fd, u32 *pRes){
00424   unsigned char ac[4];
00425   int rc = sqlite3OsRead(fd, ac, sizeof(ac));
00426   if( rc==SQLITE_OK ){
00427     *pRes = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
00428   }
00429   return rc;
00430 }
00431 
00432 /*
00433 ** Write a 32-bit integer into a string buffer in big-endian byte order.
00434 */
00435 static void put32bits(char *ac, u32 val){
00436   ac[0] = (val>>24) & 0xff;
00437   ac[1] = (val>>16) & 0xff;
00438   ac[2] = (val>>8) & 0xff;
00439   ac[3] = val & 0xff;
00440 }
00441 
00442 /*
00443 ** Write a 32-bit integer into the given file descriptor.  Return SQLITE_OK
00444 ** on success or an error code is something goes wrong.
00445 */
00446 static int write32bits(OsFile *fd, u32 val){
00447   char ac[4];
00448   put32bits(ac, val);
00449   return sqlite3OsWrite(fd, ac, 4);
00450 }
00451 
00452 /*
00453 ** Read a 32-bit integer at offset 'offset' from the page identified by
00454 ** page header 'p'.
00455 */
00456 static u32 retrieve32bits(PgHdr *p, int offset){
00457   unsigned char *ac;
00458   ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
00459   return (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
00460 }
00461 
00462 
00463 /*
00464 ** This function should be called when an error occurs within the pager
00465 ** code. The first argument is a pointer to the pager structure, the
00466 ** second the error-code about to be returned by a pager API function. 
00467 ** The value returned is a copy of the second argument to this function. 
00468 **
00469 ** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT or SQLITE_PROTOCOL,
00470 ** the error becomes persistent. All subsequent API calls on this Pager
00471 ** will immediately return the same error code.
00472 */
00473 static int pager_error(Pager *pPager, int rc){
00474   assert( pPager->errCode==SQLITE_FULL || pPager->errCode==SQLITE_OK );
00475   if( 
00476     rc==SQLITE_FULL ||
00477     rc==SQLITE_IOERR ||
00478     rc==SQLITE_CORRUPT ||
00479     rc==SQLITE_PROTOCOL
00480   ){
00481     pPager->errCode = rc;
00482   }
00483   return rc;
00484 }
00485 
00486 #ifdef SQLITE_CHECK_PAGES
00487 /*
00488 ** Return a 32-bit hash of the page data for pPage.
00489 */
00490 static u32 pager_pagehash(PgHdr *pPage){
00491   u32 hash = 0;
00492   int i;
00493   unsigned char *pData = (unsigned char *)PGHDR_TO_DATA(pPage);
00494   for(i=0; i<pPage->pPager->pageSize; i++){
00495     hash = (hash+i)^pData[i];
00496   }
00497   return hash;
00498 }
00499 
00500 /*
00501 ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
00502 ** is defined, and NDEBUG is not defined, an assert() statement checks
00503 ** that the page is either dirty or still matches the calculated page-hash.
00504 */
00505 #define CHECK_PAGE(x) checkPage(x)
00506 static void checkPage(PgHdr *pPg){
00507   Pager *pPager = pPg->pPager;
00508   assert( !pPg->pageHash || pPager->errCode || MEMDB || pPg->dirty || 
00509       pPg->pageHash==pager_pagehash(pPg) );
00510 }
00511 
00512 #else
00513 #define CHECK_PAGE(x)
00514 #endif
00515 
00516 /*
00517 ** When this is called the journal file for pager pPager must be open.
00518 ** The master journal file name is read from the end of the file and 
00519 ** written into memory obtained from sqliteMalloc(). *pzMaster is
00520 ** set to point at the memory and SQLITE_OK returned. The caller must
00521 ** sqliteFree() *pzMaster.
00522 **
00523 ** If no master journal file name is present *pzMaster is set to 0 and
00524 ** SQLITE_OK returned.
00525 */
00526 static int readMasterJournal(OsFile *pJrnl, char **pzMaster){
00527   int rc;
00528   u32 len;
00529   i64 szJ;
00530   u32 cksum;
00531   int i;
00532   unsigned char aMagic[8]; /* A buffer to hold the magic header */
00533 
00534   *pzMaster = 0;
00535 
00536   rc = sqlite3OsFileSize(pJrnl, &szJ);
00537   if( rc!=SQLITE_OK || szJ<16 ) return rc;
00538 
00539   rc = sqlite3OsSeek(pJrnl, szJ-16);
00540   if( rc!=SQLITE_OK ) return rc;
00541  
00542   rc = read32bits(pJrnl, &len);
00543   if( rc!=SQLITE_OK ) return rc;
00544 
00545   rc = read32bits(pJrnl, &cksum);
00546   if( rc!=SQLITE_OK ) return rc;
00547 
00548   rc = sqlite3OsRead(pJrnl, aMagic, 8);
00549   if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
00550 
00551   rc = sqlite3OsSeek(pJrnl, szJ-16-len);
00552   if( rc!=SQLITE_OK ) return rc;
00553 
00554   *pzMaster = (char *)sqliteMalloc(len+1);
00555   if( !*pzMaster ){
00556     return SQLITE_NOMEM;
00557   }
00558   rc = sqlite3OsRead(pJrnl, *pzMaster, len);
00559   if( rc!=SQLITE_OK ){
00560     sqliteFree(*pzMaster);
00561     *pzMaster = 0;
00562     return rc;
00563   }
00564 
00565   /* See if the checksum matches the master journal name */
00566   for(i=0; i<len; i++){
00567     cksum -= (*pzMaster)[i];
00568   }
00569   if( cksum ){
00570     /* If the checksum doesn't add up, then one or more of the disk sectors
00571     ** containing the master journal filename is corrupted. This means
00572     ** definitely roll back, so just return SQLITE_OK and report a (nul)
00573     ** master-journal filename.
00574     */
00575     sqliteFree(*pzMaster);
00576     *pzMaster = 0;
00577   }else{
00578     (*pzMaster)[len] = '\0';
00579   }
00580    
00581   return SQLITE_OK;
00582 }
00583 
00584 /*
00585 ** Seek the journal file descriptor to the next sector boundary where a
00586 ** journal header may be read or written. Pager.journalOff is updated with
00587 ** the new seek offset.
00588 **
00589 ** i.e for a sector size of 512:
00590 **
00591 ** Input Offset              Output Offset
00592 ** ---------------------------------------
00593 ** 0                         0
00594 ** 512                       512
00595 ** 100                       512
00596 ** 2000                      2048
00597 ** 
00598 */
00599 static int seekJournalHdr(Pager *pPager){
00600   i64 offset = 0;
00601   i64 c = pPager->journalOff;
00602   if( c ){
00603     offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
00604   }
00605   assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
00606   assert( offset>=c );
00607   assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
00608   pPager->journalOff = offset;
00609   return sqlite3OsSeek(pPager->jfd, pPager->journalOff);
00610 }
00611 
00612 /*
00613 ** The journal file must be open when this routine is called. A journal
00614 ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
00615 ** current location.
00616 **
00617 ** The format for the journal header is as follows:
00618 ** - 8 bytes: Magic identifying journal format.
00619 ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
00620 ** - 4 bytes: Random number used for page hash.
00621 ** - 4 bytes: Initial database page count.
00622 ** - 4 bytes: Sector size used by the process that wrote this journal.
00623 ** 
00624 ** Followed by (JOURNAL_HDR_SZ - 24) bytes of unused space.
00625 */
00626 static int writeJournalHdr(Pager *pPager){
00627   char zHeader[sizeof(aJournalMagic)+16];
00628 
00629   int rc = seekJournalHdr(pPager);
00630   if( rc ) return rc;
00631 
00632   pPager->journalHdr = pPager->journalOff;
00633   if( pPager->stmtHdrOff==0 ){
00634     pPager->stmtHdrOff = pPager->journalHdr;
00635   }
00636   pPager->journalOff += JOURNAL_HDR_SZ(pPager);
00637 
00638   /* FIX ME: 
00639   **
00640   ** Possibly for a pager not in no-sync mode, the journal magic should not
00641   ** be written until nRec is filled in as part of next syncJournal(). 
00642   **
00643   ** Actually maybe the whole journal header should be delayed until that
00644   ** point. Think about this.
00645   */
00646   memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
00647   /* The nRec Field. 0xFFFFFFFF for no-sync journals. */
00648   put32bits(&zHeader[sizeof(aJournalMagic)], pPager->noSync ? 0xffffffff : 0);
00649   /* The random check-hash initialiser */ 
00650   sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
00651   put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
00652   /* The initial database size */
00653   put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
00654   /* The assumed sector size for this process */
00655   put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
00656   rc = sqlite3OsWrite(pPager->jfd, zHeader, sizeof(zHeader));
00657 
00658   /* The journal header has been written successfully. Seek the journal
00659   ** file descriptor to the end of the journal header sector.
00660   */
00661   if( rc==SQLITE_OK ){
00662     rc = sqlite3OsSeek(pPager->jfd, pPager->journalOff-1);
00663     if( rc==SQLITE_OK ){
00664       rc = sqlite3OsWrite(pPager->jfd, "\000", 1);
00665     }
00666   }
00667   return rc;
00668 }
00669 
00670 /*
00671 ** The journal file must be open when this is called. A journal header file
00672 ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
00673 ** file. See comments above function writeJournalHdr() for a description of
00674 ** the journal header format.
00675 **
00676 ** If the header is read successfully, *nRec is set to the number of
00677 ** page records following this header and *dbSize is set to the size of the
00678 ** database before the transaction began, in pages. Also, pPager->cksumInit
00679 ** is set to the value read from the journal header. SQLITE_OK is returned
00680 ** in this case.
00681 **
00682 ** If the journal header file appears to be corrupted, SQLITE_DONE is
00683 ** returned and *nRec and *dbSize are not set.  If JOURNAL_HDR_SZ bytes
00684 ** cannot be read from the journal file an error code is returned.
00685 */
00686 static int readJournalHdr(
00687   Pager *pPager, 
00688   i64 journalSize,
00689   u32 *pNRec, 
00690   u32 *pDbSize
00691 ){
00692   int rc;
00693   unsigned char aMagic[8]; /* A buffer to hold the magic header */
00694 
00695   rc = seekJournalHdr(pPager);
00696   if( rc ) return rc;
00697 
00698   if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
00699     return SQLITE_DONE;
00700   }
00701 
00702   rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic));
00703   if( rc ) return rc;
00704 
00705   if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
00706     return SQLITE_DONE;
00707   }
00708 
00709   rc = read32bits(pPager->jfd, pNRec);
00710   if( rc ) return rc;
00711 
00712   rc = read32bits(pPager->jfd, &pPager->cksumInit);
00713   if( rc ) return rc;
00714 
00715   rc = read32bits(pPager->jfd, pDbSize);
00716   if( rc ) return rc;
00717 
00718   /* Update the assumed sector-size to match the value used by 
00719   ** the process that created this journal. If this journal was
00720   ** created by a process other than this one, then this routine
00721   ** is being called from within pager_playback(). The local value
00722   ** of Pager.sectorSize is restored at the end of that routine.
00723   */
00724   rc = read32bits(pPager->jfd, (u32 *)&pPager->sectorSize);
00725   if( rc ) return rc;
00726 
00727   pPager->journalOff += JOURNAL_HDR_SZ(pPager);
00728   rc = sqlite3OsSeek(pPager->jfd, pPager->journalOff);
00729   return rc;
00730 }
00731 
00732 
00733 /*
00734 ** Write the supplied master journal name into the journal file for pager
00735 ** pPager at the current location. The master journal name must be the last
00736 ** thing written to a journal file. If the pager is in full-sync mode, the
00737 ** journal file descriptor is advanced to the next sector boundary before
00738 ** anything is written. The format is:
00739 **
00740 ** + 4 bytes: PAGER_MJ_PGNO.
00741 ** + N bytes: length of master journal name.
00742 ** + 4 bytes: N
00743 ** + 4 bytes: Master journal name checksum.
00744 ** + 8 bytes: aJournalMagic[].
00745 **
00746 ** The master journal page checksum is the sum of the bytes in the master
00747 ** journal name.
00748 **
00749 ** If zMaster is a NULL pointer (occurs for a single database transaction), 
00750 ** this call is a no-op.
00751 */
00752 static int writeMasterJournal(Pager *pPager, const char *zMaster){
00753   int rc;
00754   int len; 
00755   int i; 
00756   u32 cksum = 0;
00757   char zBuf[sizeof(aJournalMagic)+2*4];
00758 
00759   if( !zMaster || pPager->setMaster) return SQLITE_OK;
00760   pPager->setMaster = 1;
00761 
00762   len = strlen(zMaster);
00763   for(i=0; i<len; i++){
00764     cksum += zMaster[i];
00765   }
00766 
00767   /* If in full-sync mode, advance to the next disk sector before writing
00768   ** the master journal name. This is in case the previous page written to
00769   ** the journal has already been synced.
00770   */
00771   if( pPager->fullSync ){
00772     rc = seekJournalHdr(pPager);
00773     if( rc!=SQLITE_OK ) return rc;
00774   }
00775   pPager->journalOff += (len+20);
00776 
00777   rc = write32bits(pPager->jfd, PAGER_MJ_PGNO(pPager));
00778   if( rc!=SQLITE_OK ) return rc;
00779 
00780   rc = sqlite3OsWrite(pPager->jfd, zMaster, len);
00781   if( rc!=SQLITE_OK ) return rc;
00782 
00783   put32bits(zBuf, len);
00784   put32bits(&zBuf[4], cksum);
00785   memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic));
00786   rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic));
00787   pPager->needSync = !pPager->noSync;
00788   return rc;
00789 }
00790 
00791 /*
00792 ** Add or remove a page from the list of all pages that are in the
00793 ** statement journal.
00794 **
00795 ** The Pager keeps a separate list of pages that are currently in
00796 ** the statement journal.  This helps the sqlite3pager_stmt_commit()
00797 ** routine run MUCH faster for the common case where there are many
00798 ** pages in memory but only a few are in the statement journal.
00799 */
00800 static void page_add_to_stmt_list(PgHdr *pPg){
00801   Pager *pPager = pPg->pPager;
00802   if( pPg->inStmt ) return;
00803   assert( pPg->pPrevStmt==0 && pPg->pNextStmt==0 );
00804   pPg->pPrevStmt = 0;
00805   if( pPager->pStmt ){
00806     pPager->pStmt->pPrevStmt = pPg;
00807   }
00808   pPg->pNextStmt = pPager->pStmt;
00809   pPager->pStmt = pPg;
00810   pPg->inStmt = 1;
00811 }
00812 static void page_remove_from_stmt_list(PgHdr *pPg){
00813   if( !pPg->inStmt ) return;
00814   if( pPg->pPrevStmt ){
00815     assert( pPg->pPrevStmt->pNextStmt==pPg );
00816     pPg->pPrevStmt->pNextStmt = pPg->pNextStmt;
00817   }else{
00818     assert( pPg->pPager->pStmt==pPg );
00819     pPg->pPager->pStmt = pPg->pNextStmt;
00820   }
00821   if( pPg->pNextStmt ){
00822     assert( pPg->pNextStmt->pPrevStmt==pPg );
00823     pPg->pNextStmt->pPrevStmt = pPg->pPrevStmt;
00824   }
00825   pPg->pNextStmt = 0;
00826   pPg->pPrevStmt = 0;
00827   pPg->inStmt = 0;
00828 }
00829 
00830 /*
00831 ** Find a page in the hash table given its page number.  Return
00832 ** a pointer to the page or NULL if not found.
00833 */
00834 static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
00835   PgHdr *p;
00836   if( pPager->aHash==0 ) return 0;
00837   p = pPager->aHash[pgno & (pPager->nHash-1)];
00838   while( p && p->pgno!=pgno ){
00839     p = p->pNextHash;
00840   }
00841   return p;
00842 }
00843 
00844 /*
00845 ** Unlock the database and clear the in-memory cache.  This routine
00846 ** sets the state of the pager back to what it was when it was first
00847 ** opened.  Any outstanding pages are invalidated and subsequent attempts
00848 ** to access those pages will likely result in a coredump.
00849 */
00850 static void pager_reset(Pager *pPager){
00851   PgHdr *pPg, *pNext;
00852   if( pPager->errCode ) return;
00853   for(pPg=pPager->pAll; pPg; pPg=pNext){
00854     pNext = pPg->pNextAll;
00855     sqliteFree(pPg);
00856   }
00857   pPager->pFirst = 0;
00858   pPager->pFirstSynced = 0;
00859   pPager->pLast = 0;
00860   pPager->pAll = 0;
00861   pPager->nHash = 0;
00862   sqliteFree(pPager->aHash);
00863   pPager->nPage = 0;
00864   pPager->aHash = 0;
00865   if( pPager->state>=PAGER_RESERVED ){
00866     sqlite3pager_rollback(pPager);
00867   }
00868   sqlite3OsUnlock(pPager->fd, NO_LOCK);
00869   pPager->state = PAGER_UNLOCK;
00870   pPager->dbSize = -1;
00871   pPager->nRef = 0;
00872   assert( pPager->journalOpen==0 );
00873 }
00874 
00875 /*
00876 ** When this routine is called, the pager has the journal file open and
00877 ** a RESERVED or EXCLUSIVE lock on the database.  This routine releases
00878 ** the database lock and acquires a SHARED lock in its place.  The journal
00879 ** file is deleted and closed.
00880 **
00881 ** TODO: Consider keeping the journal file open for temporary databases.
00882 ** This might give a performance improvement on windows where opening
00883 ** a file is an expensive operation.
00884 */
00885 static int pager_unwritelock(Pager *pPager){
00886   PgHdr *pPg;
00887   int rc;
00888   assert( !MEMDB );
00889   if( pPager->state<PAGER_RESERVED ){
00890     return SQLITE_OK;
00891   }
00892   sqlite3pager_stmt_commit(pPager);
00893   if( pPager->stmtOpen ){
00894     sqlite3OsClose(&pPager->stfd);
00895     pPager->stmtOpen = 0;
00896   }
00897   if( pPager->journalOpen ){
00898     sqlite3OsClose(&pPager->jfd);
00899     pPager->journalOpen = 0;
00900     sqlite3OsDelete(pPager->zJournal);
00901     sqliteFree( pPager->aInJournal );
00902     pPager->aInJournal = 0;
00903     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
00904       pPg->inJournal = 0;
00905       pPg->dirty = 0;
00906       pPg->needSync = 0;
00907 #ifdef SQLITE_CHECK_PAGES
00908       pPg->pageHash = pager_pagehash(pPg);
00909 #endif
00910     }
00911     pPager->pDirty = 0;
00912     pPager->dirtyCache = 0;
00913     pPager->nRec = 0;
00914   }else{
00915     assert( pPager->aInJournal==0 );
00916     assert( pPager->dirtyCache==0 || pPager->useJournal==0 );
00917   }
00918   rc = sqlite3OsUnlock(pPager->fd, SHARED_LOCK);
00919   pPager->state = PAGER_SHARED;
00920   pPager->origDbSize = 0;
00921   pPager->setMaster = 0;
00922   pPager->needSync = 0;
00923   pPager->pFirstSynced = pPager->pFirst;
00924   return rc;
00925 }
00926 
00927 /*
00928 ** Compute and return a checksum for the page of data.
00929 **
00930 ** This is not a real checksum.  It is really just the sum of the 
00931 ** random initial value and the page number.  We experimented with
00932 ** a checksum of the entire data, but that was found to be too slow.
00933 **
00934 ** Note that the page number is stored at the beginning of data and
00935 ** the checksum is stored at the end.  This is important.  If journal
00936 ** corruption occurs due to a power failure, the most likely scenario
00937 ** is that one end or the other of the record will be changed.  It is
00938 ** much less likely that the two ends of the journal record will be
00939 ** correct and the middle be corrupt.  Thus, this "checksum" scheme,
00940 ** though fast and simple, catches the mostly likely kind of corruption.
00941 **
00942 ** FIX ME:  Consider adding every 200th (or so) byte of the data to the
00943 ** checksum.  That way if a single page spans 3 or more disk sectors and
00944 ** only the middle sector is corrupt, we will still have a reasonable
00945 ** chance of failing the checksum and thus detecting the problem.
00946 */
00947 static u32 pager_cksum(Pager *pPager, const u8 *aData){
00948   u32 cksum = pPager->cksumInit;
00949   int i = pPager->pageSize-200;
00950   while( i>0 ){
00951     cksum += aData[i];
00952     i -= 200;
00953   }
00954   return cksum;
00955 }
00956 
00957 /* Forward declaration */
00958 static void makeClean(PgHdr*);
00959 
00960 /*
00961 ** Read a single page from the journal file opened on file descriptor
00962 ** jfd.  Playback this one page.
00963 **
00964 ** If useCksum==0 it means this journal does not use checksums.  Checksums
00965 ** are not used in statement journals because statement journals do not
00966 ** need to survive power failures.
00967 */
00968 static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){
00969   int rc;
00970   PgHdr *pPg;                   /* An existing page in the cache */
00971   Pgno pgno;                    /* The page number of a page in journal */
00972   u32 cksum;                    /* Checksum used for sanity checking */
00973   u8 aData[SQLITE_MAX_PAGE_SIZE];  /* Temp storage for a page */
00974 
00975   /* useCksum should be true for the main journal and false for
00976   ** statement journals.  Verify that this is always the case
00977   */
00978   assert( jfd == (useCksum ? pPager->jfd : pPager->stfd) );
00979 
00980 
00981   rc = read32bits(jfd, &pgno);
00982   if( rc!=SQLITE_OK ) return rc;
00983   rc = sqlite3OsRead(jfd, &aData, pPager->pageSize);
00984   if( rc!=SQLITE_OK ) return rc;
00985   pPager->journalOff += pPager->pageSize + 4;
00986 
00987   /* Sanity checking on the page.  This is more important that I originally
00988   ** thought.  If a power failure occurs while the journal is being written,
00989   ** it could cause invalid data to be written into the journal.  We need to
00990   ** detect this invalid data (with high probability) and ignore it.
00991   */
00992   if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
00993     return SQLITE_DONE;
00994   }
00995   if( pgno>(unsigned)pPager->dbSize ){
00996     return SQLITE_OK;
00997   }
00998   if( useCksum ){
00999     rc = read32bits(jfd, &cksum);
01000     if( rc ) return rc;
01001     pPager->journalOff += 4;
01002     if( pager_cksum(pPager, aData)!=cksum ){
01003       return SQLITE_DONE;
01004     }
01005   }
01006 
01007   assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE );
01008 
01009   /* If the pager is in RESERVED state, then there must be a copy of this
01010   ** page in the pager cache. In this case just update the pager cache,
01011   ** not the database file. The page is left marked dirty in this case.
01012   **
01013   ** If in EXCLUSIVE state, then we update the pager cache if it exists
01014   ** and the main file. The page is then marked not dirty.
01015   **
01016   ** Ticket #1171:  The statement journal might contain page content that is
01017   ** different from the page content at the start of the transaction.
01018   ** This occurs when a page is changed prior to the start of a statement
01019   ** then changed again within the statement.  When rolling back such a
01020   ** statement we must not write to the original database unless we know
01021   ** for certain that original page contents are in the main rollback
01022   ** journal.  Otherwise, if a full ROLLBACK occurs after the statement
01023   ** rollback the full ROLLBACK will not restore the page to its original
01024   ** content.  Two conditions must be met before writing to the database
01025   ** files. (1) the database must be locked.  (2) we know that the original
01026   ** page content is in the main journal either because the page is not in
01027   ** cache or else it is marked as needSync==0.
01028   */
01029   pPg = pager_lookup(pPager, pgno);
01030   assert( pPager->state>=PAGER_EXCLUSIVE || pPg!=0 );
01031   TRACE3("PLAYBACK %d page %d\n", PAGERID(pPager), pgno);
01032   if( pPager->state>=PAGER_EXCLUSIVE && (pPg==0 || pPg->needSync==0) ){
01033     rc = sqlite3OsSeek(pPager->fd, (pgno-1)*(i64)pPager->pageSize);
01034     if( rc==SQLITE_OK ){
01035       rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize);
01036     }
01037     if( pPg ){
01038       makeClean(pPg);
01039     }
01040   }
01041   if( pPg ){
01042     /* No page should ever be explicitly rolled back that is in use, except
01043     ** for page 1 which is held in use in order to keep the lock on the
01044     ** database active. However such a page may be rolled back as a result
01045     ** of an internal error resulting in an automatic call to
01046     ** sqlite3pager_rollback().
01047     */
01048     void *pData;
01049     /* assert( pPg->nRef==0 || pPg->pgno==1 ); */
01050     pData = PGHDR_TO_DATA(pPg);
01051     memcpy(pData, aData, pPager->pageSize);
01052     if( pPager->xDestructor ){  /*** FIX ME:  Should this be xReinit? ***/
01053       pPager->xDestructor(pData, pPager->pageSize);
01054     }
01055 #ifdef SQLITE_CHECK_PAGES
01056     pPg->pageHash = pager_pagehash(pPg);
01057 #endif
01058     CODEC1(pPager, pData, pPg->pgno, 3);
01059   }
01060   return rc;
01061 }
01062 
01063 /*
01064 ** Parameter zMaster is the name of a master journal file. A single journal
01065 ** file that referred to the master journal file has just been rolled back.
01066 ** This routine checks if it is possible to delete the master journal file,
01067 ** and does so if it is.
01068 **
01069 ** The master journal file contains the names of all child journals.
01070 ** To tell if a master journal can be deleted, check to each of the
01071 ** children.  If all children are either missing or do not refer to
01072 ** a different master journal, then this master journal can be deleted.
01073 */
01074 static int pager_delmaster(const char *zMaster){
01075   int rc;
01076   int master_open = 0;
01077   OsFile *master = 0;
01078   char *zMasterJournal = 0; /* Contents of master journal file */
01079   i64 nMasterJournal;       /* Size of master journal file */
01080 
01081   /* Open the master journal file exclusively in case some other process
01082   ** is running this routine also. Not that it makes too much difference.
01083   */
01084   rc = sqlite3OsOpenReadOnly(zMaster, &master);
01085   if( rc!=SQLITE_OK ) goto delmaster_out;
01086   master_open = 1;
01087   rc = sqlite3OsFileSize(master, &nMasterJournal);
01088   if( rc!=SQLITE_OK ) goto delmaster_out;
01089 
01090   if( nMasterJournal>0 ){
01091     char *zJournal;
01092     char *zMasterPtr = 0;
01093 
01094     /* Load the entire master journal file into space obtained from
01095     ** sqliteMalloc() and pointed to by zMasterJournal. 
01096     */
01097     zMasterJournal = (char *)sqliteMalloc(nMasterJournal);
01098     if( !zMasterJournal ){
01099       rc = SQLITE_NOMEM;
01100       goto delmaster_out;
01101     }
01102     rc = sqlite3OsRead(master, zMasterJournal, nMasterJournal);
01103     if( rc!=SQLITE_OK ) goto delmaster_out;
01104 
01105     zJournal = zMasterJournal;
01106     while( (zJournal-zMasterJournal)<nMasterJournal ){
01107       if( sqlite3OsFileExists(zJournal) ){
01108         /* One of the journals pointed to by the master journal exists.
01109         ** Open it and check if it points at the master journal. If
01110         ** so, return without deleting the master journal file.
01111         */
01112         OsFile *journal = 0;
01113         int c;
01114 
01115         rc = sqlite3OsOpenReadOnly(zJournal, &journal);
01116         if( rc!=SQLITE_OK ){
01117           goto delmaster_out;
01118         }
01119 
01120         rc = readMasterJournal(journal, &zMasterPtr);
01121         sqlite3OsClose(&journal);
01122         if( rc!=SQLITE_OK ){
01123           goto delmaster_out;
01124         }
01125 
01126         c = zMasterPtr!=0 && strcmp(zMasterPtr, zMaster)==0;
01127         sqliteFree(zMasterPtr);
01128         if( c ){
01129           /* We have a match. Do not delete the master journal file. */
01130           goto delmaster_out;
01131         }
01132       }
01133       zJournal += (strlen(zJournal)+1);
01134     }
01135   }
01136   
01137   sqlite3OsDelete(zMaster);
01138 
01139 delmaster_out:
01140   if( zMasterJournal ){
01141     sqliteFree(zMasterJournal);
01142   }  
01143   if( master_open ){
01144     sqlite3OsClose(&master);
01145   }
01146   return rc;
01147 }
01148 
01149 /*
01150 ** Make every page in the cache agree with what is on disk.  In other words,
01151 ** reread the disk to reset the state of the cache.
01152 **
01153 ** This routine is called after a rollback in which some of the dirty cache
01154 ** pages had never been written out to disk.  We need to roll back the
01155 ** cache content and the easiest way to do that is to reread the old content
01156 ** back from the disk.
01157 */
01158 static int pager_reload_cache(Pager *pPager){
01159   PgHdr *pPg;
01160   int rc = SQLITE_OK;
01161   for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
01162     char zBuf[SQLITE_MAX_PAGE_SIZE];
01163     if( !pPg->dirty ) continue;
01164     if( (int)pPg->pgno <= pPager->origDbSize ){
01165       rc = sqlite3OsSeek(pPager->fd, pPager->pageSize*(i64)(pPg->pgno-1));
01166       if( rc==SQLITE_OK ){
01167         rc = sqlite3OsRead(pPager->fd, zBuf, pPager->pageSize);
01168       }
01169       TRACE3("REFETCH %d page %d\n", PAGERID(pPager), pPg->pgno);
01170       if( rc ) break;
01171       CODEC1(pPager, zBuf, pPg->pgno, 2);
01172     }else{
01173       memset(zBuf, 0, pPager->pageSize);
01174     }
01175     if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), pPager->pageSize) ){
01176       memcpy(PGHDR_TO_DATA(pPg), zBuf, pPager->pageSize);
01177       if( pPager->xReiniter ){
01178         pPager->xReiniter(PGHDR_TO_DATA(pPg), pPager->pageSize);
01179       }else{
01180         memset(PGHDR_TO_EXTRA(pPg, pPager), 0, pPager->nExtra);
01181       }
01182     }
01183     pPg->needSync = 0;
01184     pPg->dirty = 0;
01185 #ifdef SQLITE_CHECK_PAGES
01186     pPg->pageHash = pager_pagehash(pPg);
01187 #endif
01188   }
01189   pPager->pDirty = 0;
01190   return rc;
01191 }
01192 
01193 /*
01194 ** Truncate the main file of the given pager to the number of pages
01195 ** indicated.
01196 */
01197 static int pager_truncate(Pager *pPager, int nPage){
01198   assert( pPager->state>=PAGER_EXCLUSIVE );
01199   return sqlite3OsTruncate(pPager->fd, pPager->pageSize*(i64)nPage);
01200 }
01201 
01202 /*
01203 ** Playback the journal and thus restore the database file to
01204 ** the state it was in before we started making changes.  
01205 **
01206 ** The journal file format is as follows: 
01207 **
01208 **  (1)  8 byte prefix.  A copy of aJournalMagic[].
01209 **  (2)  4 byte big-endian integer which is the number of valid page records
01210 **       in the journal.  If this value is 0xffffffff, then compute the
01211 **       number of page records from the journal size.
01212 **  (3)  4 byte big-endian integer which is the initial value for the 
01213 **       sanity checksum.
01214 **  (4)  4 byte integer which is the number of pages to truncate the
01215 **       database to during a rollback.
01216 **  (5)  4 byte integer which is the number of bytes in the master journal
01217 **       name.  The value may be zero (indicate that there is no master
01218 **       journal.)
01219 **  (6)  N bytes of the master journal name.  The name will be nul-terminated
01220 **       and might be shorter than the value read from (5).  If the first byte
01221 **       of the name is \000 then there is no master journal.  The master
01222 **       journal name is stored in UTF-8.
01223 **  (7)  Zero or more pages instances, each as follows:
01224 **        +  4 byte page number.
01225 **        +  pPager->pageSize bytes of data.
01226 **        +  4 byte checksum
01227 **
01228 ** When we speak of the journal header, we mean the first 6 items above.
01229 ** Each entry in the journal is an instance of the 7th item.
01230 **
01231 ** Call the value from the second bullet "nRec".  nRec is the number of
01232 ** valid page entries in the journal.  In most cases, you can compute the
01233 ** value of nRec from the size of the journal file.  But if a power
01234 ** failure occurred while the journal was being written, it could be the
01235 ** case that the size of the journal file had already been increased but
01236 ** the extra entries had not yet made it safely to disk.  In such a case,
01237 ** the value of nRec computed from the file size would be too large.  For
01238 ** that reason, we always use the nRec value in the header.
01239 **
01240 ** If the nRec value is 0xffffffff it means that nRec should be computed
01241 ** from the file size.  This value is used when the user selects the
01242 ** no-sync option for the journal.  A power failure could lead to corruption
01243 ** in this case.  But for things like temporary table (which will be
01244 ** deleted when the power is restored) we don't care.  
01245 **
01246 ** If the file opened as the journal file is not a well-formed
01247 ** journal file then all pages up to the first corrupted page are rolled
01248 ** back (or no pages if the journal header is corrupted). The journal file
01249 ** is then deleted and SQLITE_OK returned, just as if no corruption had
01250 ** been encountered.
01251 **
01252 ** If an I/O or malloc() error occurs, the journal-file is not deleted
01253 ** and an error code is returned.
01254 */
01255 static int pager_playback(Pager *pPager){
01256   i64 szJ;                 /* Size of the journal file in bytes */
01257   u32 nRec;                /* Number of Records in the journal */
01258   int i;                   /* Loop counter */
01259   Pgno mxPg = 0;           /* Size of the original file in pages */
01260   int rc;                  /* Result code of a subroutine */
01261   char *zMaster = 0;       /* Name of master journal file if any */
01262 
01263   /* Figure out how many records are in the journal.  Abort early if
01264   ** the journal is empty.
01265   */
01266   assert( pPager->journalOpen );
01267   rc = sqlite3OsFileSize(pPager->jfd, &szJ);
01268   if( rc!=SQLITE_OK ){
01269     goto end_playback;
01270   }
01271 
01272   /* Read the master journal name from the journal, if it is present.
01273   ** If a master journal file name is specified, but the file is not
01274   ** present on disk, then the journal is not hot and does not need to be
01275   ** played back.
01276   */
01277   rc = readMasterJournal(pPager->jfd, &zMaster);
01278   assert( rc!=SQLITE_DONE );
01279   if( rc!=SQLITE_OK || (zMaster && !sqlite3OsFileExists(zMaster)) ){
01280     sqliteFree(zMaster);
01281     zMaster = 0;
01282     if( rc==SQLITE_DONE ) rc = SQLITE_OK;
01283     goto end_playback;
01284   }
01285   sqlite3OsSeek(pPager->jfd, 0);
01286   pPager->journalOff = 0;
01287 
01288   /* This loop terminates either when the readJournalHdr() call returns
01289   ** SQLITE_DONE or an IO error occurs. */
01290   while( 1 ){
01291 
01292     /* Read the next journal header from the journal file.  If there are
01293     ** not enough bytes left in the journal file for a complete header, or
01294     ** it is corrupted, then a process must of failed while writing it.
01295     ** This indicates nothing more needs to be rolled back.
01296     */
01297     rc = readJournalHdr(pPager, szJ, &nRec, &mxPg);
01298     if( rc!=SQLITE_OK ){ 
01299       if( rc==SQLITE_DONE ){
01300         rc = SQLITE_OK;
01301       }
01302       goto end_playback;
01303     }
01304 
01305     /* If nRec is 0xffffffff, then this journal was created by a process
01306     ** working in no-sync mode. This means that the rest of the journal
01307     ** file consists of pages, there are no more journal headers. Compute
01308     ** the value of nRec based on this assumption.
01309     */
01310     if( nRec==0xffffffff ){
01311       assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
01312       nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
01313     }
01314 
01315     /* If this is the first header read from the journal, truncate the
01316     ** database file back to it's original size.
01317     */
01318     if( pPager->state>=PAGER_EXCLUSIVE && 
01319         pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
01320       assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
01321       rc = pager_truncate(pPager, mxPg);
01322       if( rc!=SQLITE_OK ){
01323         goto end_playback;
01324       }
01325       pPager->dbSize = mxPg;
01326     }
01327 
01328     /* Copy original pages out of the journal and back into the database file.
01329     */
01330     for(i=0; i<nRec; i++){
01331       rc = pager_playback_one_page(pPager, pPager->jfd, 1);
01332       if( rc!=SQLITE_OK ){
01333         if( rc==SQLITE_DONE ){
01334           rc = SQLITE_OK;
01335           pPager->journalOff = szJ;
01336           break;
01337         }else{
01338           goto end_playback;
01339         }
01340       }
01341     }
01342   }
01343   /*NOTREACHED*/
01344   assert( 0 );
01345 
01346 end_playback:
01347   if( rc==SQLITE_OK ){
01348     rc = pager_unwritelock(pPager);
01349   }
01350   if( zMaster ){
01351     /* If there was a master journal and this routine will return true,
01352     ** see if it is possible to delete the master journal.
01353     */
01354     if( rc==SQLITE_OK ){
01355       rc = pager_delmaster(zMaster);
01356     }
01357     sqliteFree(zMaster);
01358   }
01359 
01360   /* The Pager.sectorSize variable may have been updated while rolling
01361   ** back a journal created by a process with a different PAGER_SECTOR_SIZE
01362   ** value. Reset it to the correct value for this process.
01363   */
01364   pPager->sectorSize = PAGER_SECTOR_SIZE;
01365   return rc;
01366 }
01367 
01368 /*
01369 ** Playback the statement journal.
01370 **
01371 ** This is similar to playing back the transaction journal but with
01372 ** a few extra twists.
01373 **
01374 **    (1)  The number of pages in the database file at the start of
01375 **         the statement is stored in pPager->stmtSize, not in the
01376 **         journal file itself.
01377 **
01378 **    (2)  In addition to playing back the statement journal, also
01379 **         playback all pages of the transaction journal beginning
01380 **         at offset pPager->stmtJSize.
01381 */
01382 static int pager_stmt_playback(Pager *pPager){
01383   i64 szJ;                 /* Size of the full journal */
01384   i64 hdrOff;
01385   int nRec;                /* Number of Records */
01386   int i;                   /* Loop counter */
01387   int rc;
01388 
01389   szJ = pPager->journalOff;
01390 #ifndef NDEBUG 
01391   {
01392     i64 os_szJ;
01393     rc = sqlite3OsFileSize(pPager->jfd, &os_szJ);
01394     if( rc!=SQLITE_OK ) return rc;
01395     assert( szJ==os_szJ );
01396   }
01397 #endif
01398 
01399   /* Set hdrOff to be the offset to the first journal header written
01400   ** this statement transaction, or the end of the file if no journal
01401   ** header was written.
01402   */
01403   hdrOff = pPager->stmtHdrOff;
01404   assert( pPager->fullSync || !hdrOff );
01405   if( !hdrOff ){
01406     hdrOff = szJ;
01407   }
01408   
01409   /* Truncate the database back to its original size.
01410   */
01411   if( pPager->state>=PAGER_EXCLUSIVE ){
01412     rc = pager_truncate(pPager, pPager->stmtSize);
01413   }
01414   pPager->dbSize = pPager->stmtSize;
01415 
01416   /* Figure out how many records are in the statement journal.
01417   */
01418   assert( pPager->stmtInUse && pPager->journalOpen );
01419   sqlite3OsSeek(pPager->stfd, 0);
01420   nRec = pPager->stmtNRec;
01421   
01422   /* Copy original pages out of the statement journal and back into the
01423   ** database file.  Note that the statement journal omits checksums from
01424   ** each record since power-failure recovery is not important to statement
01425   ** journals.
01426   */
01427   for(i=nRec-1; i>=0; i--){
01428     rc = pager_playback_one_page(pPager, pPager->stfd, 0);
01429     assert( rc!=SQLITE_DONE );
01430     if( rc!=SQLITE_OK ) goto end_stmt_playback;
01431   }
01432 
01433   /* Now roll some pages back from the transaction journal. Pager.stmtJSize
01434   ** was the size of the journal file when this statement was started, so
01435   ** everything after that needs to be rolled back, either into the
01436   ** database, the memory cache, or both.
01437   **
01438   ** If it is not zero, then Pager.stmtHdrOff is the offset to the start
01439   ** of the first journal header written during this statement transaction.
01440   */
01441   rc = sqlite3OsSeek(pPager->jfd, pPager->stmtJSize);
01442   if( rc!=SQLITE_OK ){
01443     goto end_stmt_playback;
01444   }
01445   pPager->journalOff = pPager->stmtJSize;
01446   pPager->cksumInit = pPager->stmtCksum;
01447   assert( JOURNAL_HDR_SZ(pPager)<(pPager->pageSize+8) );
01448   while( pPager->journalOff <= (hdrOff-(pPager->pageSize+8)) ){
01449     rc = pager_playback_one_page(pPager, pPager->jfd, 1);
01450     assert( rc!=SQLITE_DONE );
01451     if( rc!=SQLITE_OK ) goto end_stmt_playback;
01452   }
01453 
01454   while( pPager->journalOff < szJ ){
01455     u32 nJRec;         /* Number of Journal Records */
01456     u32 dummy;
01457     rc = readJournalHdr(pPager, szJ, &nJRec, &dummy);
01458     if( rc!=SQLITE_OK ){
01459       assert( rc!=SQLITE_DONE );
01460       goto end_stmt_playback;
01461     }
01462     if( nJRec==0 ){
01463       nJRec = (szJ - pPager->journalOff) / (pPager->pageSize+8);
01464     }
01465     for(i=nJRec-1; i>=0 && pPager->journalOff < szJ; i--){
01466       rc = pager_playback_one_page(pPager, pPager->jfd, 1);
01467       assert( rc!=SQLITE_DONE );
01468       if( rc!=SQLITE_OK ) goto end_stmt_playback;
01469     }
01470   }
01471 
01472   pPager->journalOff = szJ;
01473   
01474 end_stmt_playback:
01475   if( rc==SQLITE_OK) {
01476     pPager->journalOff = szJ;
01477     /* pager_reload_cache(pPager); */
01478   }
01479   return rc;
01480 }
01481 
01482 /*
01483 ** Change the maximum number of in-memory pages that are allowed.
01484 */
01485 void sqlite3pager_set_cachesize(Pager *pPager, int mxPage){
01486   if( mxPage>10 ){
01487     pPager->mxPage = mxPage;
01488   }else{
01489     pPager->mxPage = 10;
01490   }
01491 }
01492 
01493 /*
01494 ** Adjust the robustness of the database to damage due to OS crashes
01495 ** or power failures by changing the number of syncs()s when writing
01496 ** the rollback journal.  There are three levels:
01497 **
01498 **    OFF       sqlite3OsSync() is never called.  This is the default
01499 **              for temporary and transient files.
01500 **
01501 **    NORMAL    The journal is synced once before writes begin on the
01502 **              database.  This is normally adequate protection, but
01503 **              it is theoretically possible, though very unlikely,
01504 **              that an inopertune power failure could leave the journal
01505 **              in a state which would cause damage to the database
01506 **              when it is rolled back.
01507 **
01508 **    FULL      The journal is synced twice before writes begin on the
01509 **              database (with some additional information - the nRec field
01510 **              of the journal header - being written in between the two
01511 **              syncs).  If we assume that writing a
01512 **              single disk sector is atomic, then this mode provides
01513 **              assurance that the journal will not be corrupted to the
01514 **              point of causing damage to the database during rollback.
01515 **
01516 ** Numeric values associated with these states are OFF==1, NORMAL=2,
01517 ** and FULL=3.
01518 */
01519 #ifndef SQLITE_OMIT_PAGER_PRAGMAS
01520 void sqlite3pager_set_safety_level(Pager *pPager, int level, int full_fsync){
01521   pPager->noSync =  level==1 || pPager->tempFile;
01522   pPager->fullSync = level==3 && !pPager->tempFile;
01523   pPager->full_fsync = full_fsync;
01524   if( pPager->noSync ) pPager->needSync = 0;
01525 }
01526 #endif
01527 
01528 /*
01529 ** The following global variable is incremented whenever the library
01530 ** attempts to open a temporary file.  This information is used for
01531 ** testing and analysis only.  
01532 */
01533 int sqlite3_opentemp_count = 0;
01534 
01535 /*
01536 ** Open a temporary file.  Write the name of the file into zFile
01537 ** (zFile must be at least SQLITE_TEMPNAME_SIZE bytes long.)  Write
01538 ** the file descriptor into *fd.  Return SQLITE_OK on success or some
01539 ** other error code if we fail.
01540 **
01541 ** The OS will automatically delete the temporary file when it is
01542 ** closed.
01543 */
01544 static int sqlite3pager_opentemp(char *zFile, OsFile **pFd){
01545   int cnt = 8;
01546   int rc;
01547   sqlite3_opentemp_count++;  /* Used for testing and analysis only */
01548   do{
01549     cnt--;
01550     sqlite3OsTempFileName(zFile);
01551     rc = sqlite3OsOpenExclusive(zFile, pFd, 1);
01552   }while( cnt>0 && rc!=SQLITE_OK && rc!=SQLITE_NOMEM );
01553   return rc;
01554 }
01555 
01556 /*
01557 ** Create a new page cache and put a pointer to the page cache in *ppPager.
01558 ** The file to be cached need not exist.  The file is not locked until
01559 ** the first call to sqlite3pager_get() and is only held open until the
01560 ** last page is released using sqlite3pager_unref().
01561 **
01562 ** If zFilename is NULL then a randomly-named temporary file is created
01563 ** and used as the file to be cached.  The file will be deleted
01564 ** automatically when it is closed.
01565 **
01566 ** If zFilename is ":memory:" then all information is held in cache.
01567 ** It is never written to disk.  This can be used to implement an
01568 ** in-memory database.
01569 */
01570 int sqlite3pager_open(
01571   Pager **ppPager,         /* Return the Pager structure here */
01572   const char *zFilename,   /* Name of the database file to open */
01573   int nExtra,              /* Extra bytes append to each in-memory page */
01574   int flags                /* flags controlling this file */
01575 ){
01576   Pager *pPager = 0;
01577   char *zFullPathname = 0;
01578   int nameLen;  /* Compiler is wrong. This is always initialized before use */
01579   OsFile *fd;
01580   int rc = SQLITE_OK;
01581   int i;
01582   int tempFile = 0;
01583   int memDb = 0;
01584   int readOnly = 0;
01585   int useJournal = (flags & PAGER_OMIT_JOURNAL)==0;
01586   int noReadlock = (flags & PAGER_NO_READLOCK)!=0;
01587   char zTemp[SQLITE_TEMPNAME_SIZE];
01588 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
01589   /* A malloc() cannot fail in sqlite3ThreadData() as one or more calls to 
01590   ** malloc() must have already been made by this thread before it gets
01591   ** to this point. This means the ThreadData must have been allocated already
01592   ** so that ThreadData.nAlloc can be set. It would be nice to assert
01593   ** that ThreadData.nAlloc is non-zero, but alas this breaks test cases 
01594   ** written to invoke the pager directly.
01595   */
01596   ThreadData *pTsd = sqlite3ThreadData();
01597   assert( pTsd );
01598 #endif
01599 
01600   /* If malloc() has already failed return SQLITE_NOMEM. Before even
01601   ** testing for this, set *ppPager to NULL so the caller knows the pager
01602   ** structure was never allocated. 
01603   */
01604   *ppPager = 0;
01605   if( sqlite3MallocFailed() ){
01606     return SQLITE_NOMEM;
01607   }
01608   memset(&fd, 0, sizeof(fd));
01609 
01610   /* Open the pager file and set zFullPathname to point at malloc()ed 
01611   ** memory containing the complete filename (i.e. including the directory).
01612   */
01613   if( zFilename && zFilename[0] ){
01614 #ifndef SQLITE_OMIT_MEMORYDB
01615     if( strcmp(zFilename,":memory:")==0 ){
01616       memDb = 1;
01617       zFullPathname = sqliteStrDup("");
01618     }else
01619 #endif
01620     {
01621       zFullPathname = sqlite3OsFullPathname(zFilename);
01622       if( zFullPathname ){
01623         rc = sqlite3OsOpenReadWrite(zFullPathname, &fd, &readOnly);
01624       }
01625     }
01626   }else{
01627     rc = sqlite3pager_opentemp(zTemp, &fd);
01628     zFilename = zTemp;
01629     zFullPathname = sqlite3OsFullPathname(zFilename);
01630     if( rc==SQLITE_OK ){
01631       tempFile = 1;
01632     }
01633   }
01634 
01635   /* Allocate the Pager structure. As part of the same allocation, allocate
01636   ** space for the full paths of the file, directory and journal 
01637   ** (Pager.zFilename, Pager.zDirectory and Pager.zJournal).
01638   */
01639   if( zFullPathname ){
01640     nameLen = strlen(zFullPathname);
01641     pPager = sqliteMalloc( sizeof(*pPager) + nameLen*3 + 30 );
01642   }
01643 
01644   /* If an error occured in either of the blocks above, free the memory 
01645   ** pointed to by zFullPathname, free the Pager structure and close the 
01646   ** file. Since the pager is not allocated there is no need to set 
01647   ** any Pager.errMask variables.
01648   */
01649   if( !pPager || !zFullPathname || rc!=SQLITE_OK ){
01650     sqlite3OsClose(&fd);
01651     sqliteFree(zFullPathname);
01652     sqliteFree(pPager);
01653     return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc);
01654   }
01655 
01656   TRACE3("OPEN %d %s\n", FILEHANDLEID(fd), zFullPathname);
01657   pPager->zFilename = (char*)&pPager[1];
01658   pPager->zDirectory = &pPager->zFilename[nameLen+1];
01659   pPager->zJournal = &pPager->zDirectory[nameLen+1];
01660   strcpy(pPager->zFilename, zFullPathname);
01661   strcpy(pPager->zDirectory, zFullPathname);
01662 
01663   for(i=nameLen; i>0 && pPager->zDirectory[i-1]!='/'; i--){}
01664   if( i>0 ) pPager->zDirectory[i-1] = 0;
01665   strcpy(pPager->zJournal, zFullPathname);
01666   sqliteFree(zFullPathname);
01667   strcpy(&pPager->zJournal[nameLen], "-journal");
01668   pPager->fd = fd;
01669   /* pPager->journalOpen = 0; */
01670   pPager->useJournal = useJournal && !memDb;
01671   pPager->noReadlock = noReadlock && readOnly;
01672   /* pPager->stmtOpen = 0; */
01673   /* pPager->stmtInUse = 0; */
01674   /* pPager->nRef = 0; */
01675   pPager->dbSize = memDb-1;
01676   pPager->pageSize = SQLITE_DEFAULT_PAGE_SIZE;
01677   /* pPager->stmtSize = 0; */
01678   /* pPager->stmtJSize = 0; */
01679   /* pPager->nPage = 0; */
01680   /* pPager->nMaxPage = 0; */
01681   pPager->mxPage = 100;
01682   assert( PAGER_UNLOCK==0 );
01683   /* pPager->state = PAGER_UNLOCK; */
01684   /* pPager->errMask = 0; */
01685   pPager->tempFile = tempFile;
01686   pPager->memDb = memDb;
01687   pPager->readOnly = readOnly;
01688   /* pPager->needSync = 0; */
01689   pPager->noSync = pPager->tempFile || !useJournal;
01690   pPager->fullSync = (pPager->noSync?0:1);
01691   /* pPager->pFirst = 0; */
01692   /* pPager->pFirstSynced = 0; */
01693   /* pPager->pLast = 0; */
01694   pPager->nExtra = FORCE_ALIGNMENT(nExtra);
01695   pPager->sectorSize = PAGER_SECTOR_SIZE;
01696   /* pPager->pBusyHandler = 0; */
01697   /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
01698   *ppPager = pPager;
01699 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
01700   pPager->pNext = pTsd->pPager;
01701   pTsd->pPager = pPager;
01702 #endif
01703   return SQLITE_OK;
01704 }
01705 
01706 /*
01707 ** Set the busy handler function.
01708 */
01709 void sqlite3pager_set_busyhandler(Pager *pPager, BusyHandler *pBusyHandler){
01710   pPager->pBusyHandler = pBusyHandler;
01711 }
01712 
01713 /*
01714 ** Set the destructor for this pager.  If not NULL, the destructor is called
01715 ** when the reference count on each page reaches zero.  The destructor can
01716 ** be used to clean up information in the extra segment appended to each page.
01717 **
01718 ** The destructor is not called as a result sqlite3pager_close().  
01719 ** Destructors are only called by sqlite3pager_unref().
01720 */
01721 void sqlite3pager_set_destructor(Pager *pPager, void (*xDesc)(void*,int)){
01722   pPager->xDestructor = xDesc;
01723 }
01724 
01725 /*
01726 ** Set the reinitializer for this pager.  If not NULL, the reinitializer
01727 ** is called when the content of a page in cache is restored to its original
01728 ** value as a result of a rollback.  The callback gives higher-level code
01729 ** an opportunity to restore the EXTRA section to agree with the restored
01730 ** page data.
01731 */
01732 void sqlite3pager_set_reiniter(Pager *pPager, void (*xReinit)(void*,int)){
01733   pPager->xReiniter = xReinit;
01734 }
01735 
01736 /*
01737 ** Set the page size.  Return the new size.  If the suggest new page
01738 ** size is inappropriate, then an alternative page size is selected
01739 ** and returned.
01740 */
01741 int sqlite3pager_set_pagesize(Pager *pPager, int pageSize){
01742   assert( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE );
01743   if( !pPager->memDb ){
01744     pPager->pageSize = pageSize;
01745   }
01746   return pPager->pageSize;
01747 }
01748 
01749 /*
01750 ** The following set of routines are used to disable the simulated
01751 ** I/O error mechanism.  These routines are used to avoid simulated
01752 ** errors in places where we do not care about errors.
01753 **
01754 ** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
01755 ** and generate no code.
01756 */
01757 #ifdef SQLITE_TEST
01758 extern int sqlite3_io_error_pending;
01759 extern int sqlite3_io_error_hit;
01760 static int saved_cnt;
01761 void clear_simulated_io_error(){
01762   sqlite3_io_error_hit = 0;
01763 }
01764 void disable_simulated_io_errors(void){
01765   saved_cnt = sqlite3_io_error_pending;
01766   sqlite3_io_error_pending = -1;
01767 }
01768 void enable_simulated_io_errors(void){
01769   sqlite3_io_error_pending = saved_cnt;
01770 }
01771 #else
01772 # define clear_simulated_io_error()
01773 # define disable_simulated_io_errors()
01774 # define enable_simulated_io_errors()
01775 #endif
01776 
01777 /*
01778 ** Read the first N bytes from the beginning of the file into memory
01779 ** that pDest points to. 
01780 **
01781 ** No error checking is done. The rational for this is that this function 
01782 ** may be called even if the file does not exist or contain a header. In 
01783 ** these cases sqlite3OsRead() will return an error, to which the correct 
01784 ** response is to zero the memory at pDest and continue.  A real IO error 
01785 ** will presumably recur and be picked up later (Todo: Think about this).
01786 */
01787 void sqlite3pager_read_fileheader(Pager *pPager, int N, unsigned char *pDest){
01788   memset(pDest, 0, N);
01789   if( MEMDB==0 ){
01790     disable_simulated_io_errors();
01791     sqlite3OsSeek(pPager->fd, 0);
01792     sqlite3OsRead(pPager->fd, pDest, N);
01793     enable_simulated_io_errors();
01794   }
01795 }
01796 
01797 /*
01798 ** Return the total number of pages in the disk file associated with
01799 ** pPager. 
01800 **
01801 ** If the PENDING_BYTE lies on the page directly after the end of the
01802 ** file, then consider this page part of the file too. For example, if
01803 ** PENDING_BYTE is byte 4096 (the first byte of page 5) and the size of the
01804 ** file is 4096 bytes, 5 is returned instead of 4.
01805 */
01806 int sqlite3pager_pagecount(Pager *pPager){
01807   i64 n;
01808   assert( pPager!=0 );
01809   if( pPager->dbSize>=0 ){
01810     n = pPager->dbSize;
01811   } else {
01812     if( sqlite3OsFileSize(pPager->fd, &n)!=SQLITE_OK ){
01813       pager_error(pPager, SQLITE_IOERR);
01814       return 0;
01815     }
01816     if( n>0 && n<pPager->pageSize ){
01817       n = 1;
01818     }else{
01819       n /= pPager->pageSize;
01820     }
01821     if( pPager->state!=PAGER_UNLOCK ){
01822       pPager->dbSize = n;
01823     }
01824   }
01825   if( n==(PENDING_BYTE/pPager->pageSize) ){
01826     n++;
01827   }
01828   return n;
01829 }
01830 
01831 
01832 #ifndef SQLITE_OMIT_MEMORYDB
01833 /*
01834 ** Clear a PgHistory block
01835 */
01836 static void clearHistory(PgHistory *pHist){
01837   sqliteFree(pHist->pOrig);
01838   sqliteFree(pHist->pStmt);
01839   pHist->pOrig = 0;
01840   pHist->pStmt = 0;
01841 }
01842 #else
01843 #define clearHistory(x)
01844 #endif
01845 
01846 /*
01847 ** Forward declaration
01848 */
01849 static int syncJournal(Pager*);
01850 
01851 /*
01852 ** Unlink pPg from it's hash chain. Also set the page number to 0 to indicate
01853 ** that the page is not part of any hash chain. This is required because the
01854 ** sqlite3pager_movepage() routine can leave a page in the 
01855 ** pNextFree/pPrevFree list that is not a part of any hash-chain.
01856 */
01857 static void unlinkHashChain(Pager *pPager, PgHdr *pPg){
01858   if( pPg->pgno==0 ){
01859     /* If the page number is zero, then this page is not in any hash chain. */
01860     return;
01861   }
01862   if( pPg->pNextHash ){
01863     pPg->pNextHash->pPrevHash = pPg->pPrevHash;
01864   }
01865   if( pPg->pPrevHash ){
01866     assert( pPager->aHash[pPg->pgno & (pPager->nHash-1)]!=pPg );
01867     pPg->pPrevHash->pNextHash = pPg->pNextHash;
01868   }else{
01869     int h = pPg->pgno & (pPager->nHash-1);
01870     assert( pPager->aHash[h]==pPg );
01871     pPager->aHash[h] = pPg->pNextHash;
01872   }
01873   if( MEMDB ){
01874     clearHistory(PGHDR_TO_HIST(pPg, pPager));
01875   }
01876   pPg->pgno = 0;
01877   pPg->pNextHash = pPg->pPrevHash = 0;
01878 }
01879 
01880 /*
01881 ** Unlink a page from the free list (the list of all pages where nRef==0)
01882 ** and from its hash collision chain.
01883 */
01884 static void unlinkPage(PgHdr *pPg){
01885   Pager *pPager = pPg->pPager;
01886 
01887   /* Keep the pFirstSynced pointer pointing at the first synchronized page */
01888   if( pPg==pPager->pFirstSynced ){
01889     PgHdr *p = pPg->pNextFree;
01890     while( p && p->needSync ){ p = p->pNextFree; }
01891     pPager->pFirstSynced = p;
01892   }
01893 
01894   /* Unlink from the freelist */
01895   if( pPg->pPrevFree ){
01896     pPg->pPrevFree->pNextFree = pPg->pNextFree;
01897   }else{
01898     assert( pPager->pFirst==pPg );
01899     pPager->pFirst = pPg->pNextFree;
01900   }
01901   if( pPg->pNextFree ){
01902     pPg->pNextFree->pPrevFree = pPg->pPrevFree;
01903   }else{
01904     assert( pPager->pLast==pPg );
01905     pPager->pLast = pPg->pPrevFree;
01906   }
01907   pPg->pNextFree = pPg->pPrevFree = 0;
01908 
01909   /* Unlink from the pgno hash table */
01910   unlinkHashChain(pPager, pPg);
01911 }
01912 
01913 #ifndef SQLITE_OMIT_MEMORYDB
01914 /*
01915 ** This routine is used to truncate an in-memory database.  Delete
01916 ** all pages whose pgno is larger than pPager->dbSize and is unreferenced.
01917 ** Referenced pages larger than pPager->dbSize are zeroed.
01918 */
01919 static void memoryTruncate(Pager *pPager){
01920   PgHdr *pPg;
01921   PgHdr **ppPg;
01922   int dbSize = pPager->dbSize;
01923 
01924   ppPg = &pPager->pAll;
01925   while( (pPg = *ppPg)!=0 ){
01926     if( pPg->pgno<=dbSize ){
01927       ppPg = &pPg->pNextAll;
01928     }else if( pPg->nRef>0 ){
01929       memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
01930       ppPg = &pPg->pNextAll;
01931     }else{
01932       *ppPg = pPg->pNextAll;
01933       unlinkPage(pPg);
01934       makeClean(pPg);
01935       sqliteFree(pPg);
01936       pPager->nPage--;
01937     }
01938   }
01939 }
01940 #else
01941 #define memoryTruncate(p)
01942 #endif
01943 
01944 /*
01945 ** Try to obtain a lock on a file.  Invoke the busy callback if the lock
01946 ** is currently not available.  Repeat until the busy callback returns
01947 ** false or until the lock succeeds.
01948 **
01949 ** Return SQLITE_OK on success and an error code if we cannot obtain
01950 ** the lock.
01951 */
01952 static int pager_wait_on_lock(Pager *pPager, int locktype){
01953   int rc;
01954   assert( PAGER_SHARED==SHARED_LOCK );
01955   assert( PAGER_RESERVED==RESERVED_LOCK );
01956   assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK );
01957   if( pPager->state>=locktype ){
01958     rc = SQLITE_OK;
01959   }else{
01960     do {
01961       rc = sqlite3OsLock(pPager->fd, locktype);
01962     }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) );
01963     if( rc==SQLITE_OK ){
01964       pPager->state = locktype;
01965     }
01966   }
01967   return rc;
01968 }
01969 
01970 /*
01971 ** Truncate the file to the number of pages specified.
01972 */
01973 int sqlite3pager_truncate(Pager *pPager, Pgno nPage){
01974   int rc;
01975   sqlite3pager_pagecount(pPager);
01976   if( pPager->errCode ){
01977     rc = pPager->errCode;
01978     return rc;
01979   }
01980   if( nPage>=(unsigned)pPager->dbSize ){
01981     return SQLITE_OK;
01982   }
01983   if( MEMDB ){
01984     pPager->dbSize = nPage;
01985     memoryTruncate(pPager);
01986     return SQLITE_OK;
01987   }
01988   rc = syncJournal(pPager);
01989   if( rc!=SQLITE_OK ){
01990     return rc;
01991   }
01992 
01993   /* Get an exclusive lock on the database before truncating. */
01994   rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
01995   if( rc!=SQLITE_OK ){
01996     return rc;
01997   }
01998 
01999   rc = pager_truncate(pPager, nPage);
02000   if( rc==SQLITE_OK ){
02001     pPager->dbSize = nPage;
02002   }
02003   return rc;
02004 }
02005 
02006 /*
02007 ** Shutdown the page cache.  Free all memory and close all files.
02008 **
02009 ** If a transaction was in progress when this routine is called, that
02010 ** transaction is rolled back.  All outstanding pages are invalidated
02011 ** and their memory is freed.  Any attempt to use a page associated
02012 ** with this page cache after this function returns will likely
02013 ** result in a coredump.
02014 **
02015 ** This function always succeeds. If a transaction is active an attempt
02016 ** is made to roll it back. If an error occurs during the rollback 
02017 ** a hot journal may be left in the filesystem but no error is returned
02018 ** to the caller.
02019 */
02020 int sqlite3pager_close(Pager *pPager){
02021   PgHdr *pPg, *pNext;
02022 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
02023   /* A malloc() cannot fail in sqlite3ThreadData() as one or more calls to 
02024   ** malloc() must have already been made by this thread before it gets
02025   ** to this point. This means the ThreadData must have been allocated already
02026   ** so that ThreadData.nAlloc can be set.
02027   */
02028   ThreadData *pTsd = sqlite3ThreadData();
02029   assert( pPager );
02030   assert( pTsd && pTsd->nAlloc );
02031 #endif
02032 
02033   switch( pPager->state ){
02034     case PAGER_RESERVED:
02035     case PAGER_SYNCED: 
02036     case PAGER_EXCLUSIVE: {
02037       /* We ignore any IO errors that occur during the rollback
02038       ** operation. So disable IO error simulation so that testing
02039       ** works more easily.
02040       */
02041       disable_simulated_io_errors();
02042       sqlite3pager_rollback(pPager);
02043       enable_simulated_io_errors();
02044       if( !MEMDB ){
02045         sqlite3OsUnlock(pPager->fd, NO_LOCK);
02046       }
02047       assert( pPager->errCode || pPager->journalOpen==0 );
02048       break;
02049     }
02050     case PAGER_SHARED: {
02051       if( !MEMDB ){
02052         sqlite3OsUnlock(pPager->fd, NO_LOCK);
02053       }
02054       break;
02055     }
02056     default: {
02057       /* Do nothing */
02058       break;
02059     }
02060   }
02061   for(pPg=pPager->pAll; pPg; pPg=pNext){
02062 #ifndef NDEBUG
02063     if( MEMDB ){
02064       PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
02065       assert( !pPg->alwaysRollback );
02066       assert( !pHist->pOrig );
02067       assert( !pHist->pStmt );
02068     }
02069 #endif
02070     pNext = pPg->pNextAll;
02071     sqliteFree(pPg);
02072   }
02073   TRACE2("CLOSE %d\n", PAGERID(pPager));
02074   assert( pPager->errCode || (pPager->journalOpen==0 && pPager->stmtOpen==0) );
02075   if( pPager->journalOpen ){
02076     sqlite3OsClose(&pPager->jfd);
02077   }
02078   sqliteFree(pPager->aInJournal);
02079   if( pPager->stmtOpen ){
02080     sqlite3OsClose(&pPager->stfd);
02081   }
02082   sqlite3OsClose(&pPager->fd);
02083   /* Temp files are automatically deleted by the OS
02084   ** if( pPager->tempFile ){
02085   **   sqlite3OsDelete(pPager->zFilename);
02086   ** }
02087   */
02088 
02089 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
02090   /* Remove the pager from the linked list of pagers starting at 
02091   ** ThreadData.pPager if memory-management is enabled.
02092   */
02093   if( pPager==pTsd->pPager ){
02094     pTsd->pPager = pPager->pNext;
02095   }else{
02096     Pager *pTmp;
02097     for(pTmp = pTsd->pPager; pTmp->pNext!=pPager; pTmp=pTmp->pNext){}
02098     pTmp->pNext = pPager->pNext;
02099   }
02100 #endif
02101   sqliteFree(pPager->aHash);
02102   sqliteFree(pPager);
02103   return SQLITE_OK;
02104 }
02105 
02106 /*
02107 ** Return the page number for the given page data.
02108 */
02109 Pgno sqlite3pager_pagenumber(void *pData){
02110   PgHdr *p = DATA_TO_PGHDR(pData);
02111   return p->pgno;
02112 }
02113 
02114 /*
02115 ** The page_ref() function increments the reference count for a page.
02116 ** If the page is currently on the freelist (the reference count is zero) then
02117 ** remove it from the freelist.
02118 **
02119 ** For non-test systems, page_ref() is a macro that calls _page_ref()
02120 ** online of the reference count is zero.  For test systems, page_ref()
02121 ** is a real function so that we can set breakpoints and trace it.
02122 */
02123 static void _page_ref(PgHdr *pPg){
02124   if( pPg->nRef==0 ){
02125     /* The page is currently on the freelist.  Remove it. */
02126     if( pPg==pPg->pPager->pFirstSynced ){
02127       PgHdr *p = pPg->pNextFree;
02128       while( p && p->needSync ){ p = p->pNextFree; }
02129       pPg->pPager->pFirstSynced = p;
02130     }
02131     if( pPg->pPrevFree ){
02132       pPg->pPrevFree->pNextFree = pPg->pNextFree;
02133     }else{
02134       pPg->pPager->pFirst = pPg->pNextFree;
02135     }
02136     if( pPg->pNextFree ){
02137       pPg->pNextFree->pPrevFree = pPg->pPrevFree;
02138     }else{
02139       pPg->pPager->pLast = pPg->pPrevFree;
02140     }
02141     pPg->pPager->nRef++;
02142   }
02143   pPg->nRef++;
02144   REFINFO(pPg);
02145 }
02146 #ifdef SQLITE_DEBUG
02147   static void page_ref(PgHdr *pPg){
02148     if( pPg->nRef==0 ){
02149       _page_ref(pPg);
02150     }else{
02151       pPg->nRef++;
02152       REFINFO(pPg);
02153     }
02154   }
02155 #else
02156 # define page_ref(P)   ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
02157 #endif
02158 
02159 /*
02160 ** Increment the reference count for a page.  The input pointer is
02161 ** a reference to the page data.
02162 */
02163 int sqlite3pager_ref(void *pData){
02164   PgHdr *pPg = DATA_TO_PGHDR(pData);
02165   page_ref(pPg);
02166   return SQLITE_OK;
02167 }
02168 
02169 /*
02170 ** Sync the journal.  In other words, make sure all the pages that have
02171 ** been written to the journal have actually reached the surface of the
02172 ** disk.  It is not safe to modify the original database file until after
02173 ** the journal has been synced.  If the original database is modified before
02174 ** the journal is synced and a power failure occurs, the unsynced journal
02175 ** data would be lost and we would be unable to completely rollback the
02176 ** database changes.  Database corruption would occur.
02177 ** 
02178 ** This routine also updates the nRec field in the header of the journal.
02179 ** (See comments on the pager_playback() routine for additional information.)
02180 ** If the sync mode is FULL, two syncs will occur.  First the whole journal
02181 ** is synced, then the nRec field is updated, then a second sync occurs.
02182 **
02183 ** For temporary databases, we do not care if we are able to rollback
02184 ** after a power failure, so sync occurs.
02185 **
02186 ** This routine clears the needSync field of every page current held in
02187 ** memory.
02188 */
02189 static int syncJournal(Pager *pPager){
02190   PgHdr *pPg;
02191   int rc = SQLITE_OK;
02192 
02193   /* Sync the journal before modifying the main database
02194   ** (assuming there is a journal and it needs to be synced.)
02195   */
02196   if( pPager->needSync ){
02197     if( !pPager->tempFile ){
02198       assert( pPager->journalOpen );
02199       /* assert( !pPager->noSync ); // noSync might be set if synchronous
02200       ** was turned off after the transaction was started.  Ticket #615 */
02201 #ifndef NDEBUG
02202       {
02203         /* Make sure the pPager->nRec counter we are keeping agrees
02204         ** with the nRec computed from the size of the journal file.
02205         */
02206         i64 jSz;
02207         rc = sqlite3OsFileSize(pPager->jfd, &jSz);
02208         if( rc!=0 ) return rc;
02209         assert( pPager->journalOff==jSz );
02210       }
02211 #endif
02212       {
02213         /* Write the nRec value into the journal file header. If in
02214         ** full-synchronous mode, sync the journal first. This ensures that
02215         ** all data has really hit the disk before nRec is updated to mark
02216         ** it as a candidate for rollback. 
02217         */
02218         if( pPager->fullSync ){
02219           TRACE2("SYNC journal of %d\n", PAGERID(pPager));
02220           rc = sqlite3OsSync(pPager->jfd, 0);
02221           if( rc!=0 ) return rc;
02222         }
02223         rc = sqlite3OsSeek(pPager->jfd,
02224                            pPager->journalHdr + sizeof(aJournalMagic));
02225         if( rc ) return rc;
02226         rc = write32bits(pPager->jfd, pPager->nRec);
02227         if( rc ) return rc;
02228 
02229         rc = sqlite3OsSeek(pPager->jfd, pPager->journalOff);
02230         if( rc ) return rc;
02231       }
02232       TRACE2("SYNC journal of %d\n", PAGERID(pPager));
02233       rc = sqlite3OsSync(pPager->jfd, pPager->full_fsync);
02234       if( rc!=0 ) return rc;
02235       pPager->journalStarted = 1;
02236     }
02237     pPager->needSync = 0;
02238 
02239     /* Erase the needSync flag from every page.
02240     */
02241     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
02242       pPg->needSync = 0;
02243     }
02244     pPager->pFirstSynced = pPager->pFirst;
02245   }
02246 
02247 #ifndef NDEBUG
02248   /* If the Pager.needSync flag is clear then the PgHdr.needSync
02249   ** flag must also be clear for all pages.  Verify that this
02250   ** invariant is true.
02251   */
02252   else{
02253     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
02254       assert( pPg->needSync==0 );
02255     }
02256     assert( pPager->pFirstSynced==pPager->pFirst );
02257   }
02258 #endif
02259 
02260   return rc;
02261 }
02262 
02263 /*
02264 ** Given a list of pages (connected by the PgHdr.pDirty pointer) write
02265 ** every one of those pages out to the database file and mark them all
02266 ** as clean.
02267 */
02268 static int pager_write_pagelist(PgHdr *pList){
02269   Pager *pPager;
02270   int rc;
02271 
02272   if( pList==0 ) return SQLITE_OK;
02273   pPager = pList->pPager;
02274 
02275   /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
02276   ** database file. If there is already an EXCLUSIVE lock, the following
02277   ** calls to sqlite3OsLock() are no-ops.
02278   **
02279   ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
02280   ** through an intermediate state PENDING.   A PENDING lock prevents new
02281   ** readers from attaching to the database but is unsufficient for us to
02282   ** write.  The idea of a PENDING lock is to prevent new readers from
02283   ** coming in while we wait for existing readers to clear.
02284   **
02285   ** While the pager is in the RESERVED state, the original database file
02286   ** is unchanged and we can rollback without having to playback the
02287   ** journal into the original database file.  Once we transition to
02288   ** EXCLUSIVE, it means the database file has been changed and any rollback
02289   ** will require a journal playback.
02290   */
02291   rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
02292   if( rc!=SQLITE_OK ){
02293     return rc;
02294   }
02295 
02296   while( pList ){
02297     assert( pList->dirty );
02298     rc = sqlite3OsSeek(pPager->fd, (pList->pgno-1)*(i64)pPager->pageSize);
02299     if( rc ) return rc;
02300     /* If there are dirty pages in the page cache with page numbers greater
02301     ** than Pager.dbSize, this means sqlite3pager_truncate() was called to
02302     ** make the file smaller (presumably by auto-vacuum code). Do not write
02303     ** any such pages to the file.
02304     */
02305     if( pList->pgno<=pPager->dbSize ){
02306       char *pData = CODEC2(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
02307       TRACE3("STORE %d page %d\n", PAGERID(pPager), pList->pgno);
02308       rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize);
02309       TEST_INCR(pPager->nWrite);
02310     }
02311 #ifndef NDEBUG
02312     else{
02313       TRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno);
02314     }
02315 #endif
02316     if( rc ) return rc;
02317     pList->dirty = 0;
02318 #ifdef SQLITE_CHECK_PAGES
02319     pList->pageHash = pager_pagehash(pList);
02320 #endif
02321     pList = pList->pDirty;
02322   }
02323   return SQLITE_OK;
02324 }
02325 
02326 /*
02327 ** Collect every dirty page into a dirty list and
02328 ** return a pointer to the head of that list.  All pages are
02329 ** collected even if they are still in use.
02330 */
02331 static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
02332   return pPager->pDirty;
02333 }
02334 
02335 /*
02336 ** Return TRUE if there is a hot journal on the given pager.
02337 ** A hot journal is one that needs to be played back.
02338 **
02339 ** If the current size of the database file is 0 but a journal file
02340 ** exists, that is probably an old journal left over from a prior
02341 ** database with the same name.  Just delete the journal.
02342 */
02343 static int hasHotJournal(Pager *pPager){
02344   if( !pPager->useJournal ) return 0;
02345   if( !sqlite3OsFileExists(pPager->zJournal) ) return 0;
02346   if( sqlite3OsCheckReservedLock(pPager->fd) ) return 0;
02347   if( sqlite3pager_pagecount(pPager)==0 ){
02348     sqlite3OsDelete(pPager->zJournal);
02349     return 0;
02350   }else{
02351     return 1;
02352   }
02353 }
02354 
02355 /*
02356 ** Try to find a page in the cache that can be recycled. 
02357 **
02358 ** This routine may return SQLITE_IOERR, SQLITE_FULL or SQLITE_OK. It 
02359 ** does not set the pPager->errCode variable.
02360 */
02361 static int pager_recycle(Pager *pPager, int syncOk, PgHdr **ppPg){
02362   PgHdr *pPg;
02363   *ppPg = 0;
02364 
02365   /* Find a page to recycle.  Try to locate a page that does not
02366   ** require us to do an fsync() on the journal.
02367   */
02368   pPg = pPager->pFirstSynced;
02369 
02370   /* If we could not find a page that does not require an fsync()
02371   ** on the journal file then fsync the journal file.  This is a
02372   ** very slow operation, so we work hard to avoid it.  But sometimes
02373   ** it can't be helped.
02374   */
02375   if( pPg==0 && pPager->pFirst && syncOk && !MEMDB){
02376     int rc = syncJournal(pPager);
02377     if( rc!=0 ){
02378       return rc;
02379     }
02380     if( pPager->fullSync ){
02381       /* If in full-sync mode, write a new journal header into the
02382       ** journal file. This is done to avoid ever modifying a journal
02383       ** header that is involved in the rollback of pages that have
02384       ** already been written to the database (in case the header is
02385       ** trashed when the nRec field is updated).
02386       */
02387       pPager->nRec = 0;
02388       assert( pPager->journalOff > 0 );
02389       rc = writeJournalHdr(pPager);
02390       if( rc!=0 ){
02391         return rc;
02392       }
02393     }
02394     pPg = pPager->pFirst;
02395   }
02396   if( pPg==0 ){
02397     return SQLITE_OK;
02398   }
02399 
02400   assert( pPg->nRef==0 );
02401 
02402   /* Write the page to the database file if it is dirty.
02403   */
02404   if( pPg->dirty ){
02405     int rc;
02406     assert( pPg->needSync==0 );
02407     makeClean(pPg);
02408     pPg->dirty = 1;
02409     pPg->pDirty = 0;
02410     rc = pager_write_pagelist( pPg );
02411     if( rc!=SQLITE_OK ){
02412       return rc;
02413     }
02414   }
02415   assert( pPg->dirty==0 );
02416 
02417   /* If the page we are recycling is marked as alwaysRollback, then
02418   ** set the global alwaysRollback flag, thus disabling the
02419   ** sqlite_dont_rollback() optimization for the rest of this transaction.
02420   ** It is necessary to do this because the page marked alwaysRollback
02421   ** might be reloaded at a later time but at that point we won't remember
02422   ** that is was marked alwaysRollback.  This means that all pages must
02423   ** be marked as alwaysRollback from here on out.
02424   */
02425   if( pPg->alwaysRollback ){
02426     pPager->alwaysRollback = 1;
02427   }
02428 
02429   /* Unlink the old page from the free list and the hash table
02430   */
02431   unlinkPage(pPg);
02432   TEST_INCR(pPager->nOvfl);
02433 
02434   *ppPg = pPg;
02435   return SQLITE_OK;
02436 }
02437 
02438 /*
02439 ** This function is called to free superfluous dynamically allocated memory
02440 ** held by the pager system. Memory in use by any SQLite pager allocated
02441 ** by the current thread may be sqliteFree()ed.
02442 **
02443 ** nReq is the number of bytes of memory required. Once this much has
02444 ** been released, the function returns. A negative value for nReq means
02445 ** free as much memory as possible. The return value is the total number 
02446 ** of bytes of memory released.
02447 */
02448 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
02449 int sqlite3pager_release_memory(int nReq){
02450   const ThreadData *pTsdro = sqlite3ThreadDataReadOnly();
02451   Pager *p;
02452   int nReleased = 0;
02453   int i;
02454 
02455   /* If the the global mutex is held, this subroutine becomes a
02456   ** o-op; zero bytes of memory are freed.  This is because
02457   ** some of the code invoked by this function may also
02458   ** try to obtain the mutex, resulting in a deadlock.
02459   */
02460   if( sqlite3OsInMutex(0) ){
02461     return 0;
02462   }
02463 
02464   /* Outermost loop runs for at most two iterations. First iteration we
02465   ** try to find memory that can be released without calling fsync(). Second
02466   ** iteration (which only runs if the first failed to free nReq bytes of
02467   ** memory) is permitted to call fsync(). This is of course much more 
02468   ** expensive.
02469   */
02470   for(i=0; i<=1; i++){
02471 
02472     /* Loop through all the SQLite pagers opened by the current thread. */
02473     for(p=pTsdro->pPager; p && (nReq<0 || nReleased<nReq); p=p->pNext){
02474       PgHdr *pPg;
02475       int rc;
02476 
02477       /* For each pager, try to free as many pages as possible (without 
02478       ** calling fsync() if this is the first iteration of the outermost 
02479       ** loop).
02480       */
02481       while( SQLITE_OK==(rc = pager_recycle(p, i, &pPg)) && pPg) {
02482         /* We've found a page to free. At this point the page has been 
02483         ** removed from the page hash-table, free-list and synced-list 
02484         ** (pFirstSynced). It is still in the all pages (pAll) list. 
02485         ** Remove it from this list before freeing.
02486         **
02487         ** Todo: Check the Pager.pStmt list to make sure this is Ok. It 
02488         ** probably is though.
02489         */
02490         PgHdr *pTmp;
02491         assert( pPg );
02492         page_remove_from_stmt_list(pPg);
02493         if( pPg==p->pAll ){
02494            p->pAll = pPg->pNextAll;
02495         }else{
02496           for( pTmp=p->pAll; pTmp->pNextAll!=pPg; pTmp=pTmp->pNextAll ){}
02497           pTmp->pNextAll = pPg->pNextAll;
02498         }
02499         nReleased += sqliteAllocSize(pPg);
02500         sqliteFree(pPg);
02501       }
02502 
02503       if( rc!=SQLITE_OK ){
02504         /* An error occured whilst writing to the database file or 
02505         ** journal in pager_recycle(). The error is not returned to the 
02506         ** caller of this function. Instead, set the Pager.errCode variable.
02507         ** The error will be returned to the user (or users, in the case 
02508         ** of a shared pager cache) of the pager for which the error occured.
02509         */
02510         assert( rc==SQLITE_IOERR || rc==SQLITE_FULL );
02511         assert( p->state>=PAGER_RESERVED );
02512         pager_error(p, rc);
02513       }
02514     }
02515   }
02516 
02517   return nReleased;
02518 }
02519 #endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */
02520 
02521 /*
02522 ** Acquire a page.
02523 **
02524 ** A read lock on the disk file is obtained when the first page is acquired. 
02525 ** This read lock is dropped when the last page is released.
02526 **
02527 ** A _get works for any page number greater than 0.  If the database
02528 ** file is smaller than the requested page, then no actual disk
02529 ** read occurs and the memory image of the page is initialized to
02530 ** all zeros.  The extra data appended to a page is always initialized
02531 ** to zeros the first time a page is loaded into memory.
02532 **
02533 ** The acquisition might fail for several reasons.  In all cases,
02534 ** an appropriate error code is returned and *ppPage is set to NULL.
02535 **
02536 ** See also sqlite3pager_lookup().  Both this routine and _lookup() attempt
02537 ** to find a page in the in-memory cache first.  If the page is not already
02538 ** in memory, this routine goes to disk to read it in whereas _lookup()
02539 ** just returns 0.  This routine acquires a read-lock the first time it
02540 ** has to go to disk, and could also playback an old journal if necessary.
02541 ** Since _lookup() never goes to disk, it never has to deal with locks
02542 ** or journal files.
02543 */
02544 int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){
02545   /* This just passes through to our modified version with a NULL data pointer */
02546   return sqlite3pager_get2(pPager, pgno, ppPage, 0);
02547 }
02548 
02549 
02550 /*
02551 ** This is an internal version of pager_get that takes an extra parameter of
02552 ** data to use to fill the page with. This allows more efficient filling for
02553 ** preloaded data. If this extra parameter is NULL, we'll go to the file.
02554 **
02555 ** See sqlite3pager_loadall which uses this function.
02556 */
02557 int sqlite3pager_get2(Pager *pPager, Pgno pgno, void **ppPage,
02558                       unsigned char* pDataToFill) {
02559   PgHdr *pPg;
02560   int rc;
02561 
02562   /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
02563   ** number greater than this, or zero, is requested.
02564   */
02565   if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
02566     return SQLITE_CORRUPT_BKPT;
02567   }
02568 
02569   /* Make sure we have not hit any critical errors.
02570   */ 
02571   assert( pPager!=0 );
02572   *ppPage = 0;
02573   if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
02574     return pPager->errCode;
02575   }
02576 
02577   /* If this is the first page accessed, then get a SHARED lock
02578   ** on the database file.
02579   */
02580   if( pPager->nRef==0 && !MEMDB ){
02581     if( !pPager->noReadlock ){
02582       rc = pager_wait_on_lock(pPager, SHARED_LOCK);
02583       if( rc!=SQLITE_OK ){
02584         return pager_error(pPager, rc);
02585       }
02586     }
02587 
02588     /* If a journal file exists, and there is no RESERVED lock on the
02589     ** database file, then it either needs to be played back or deleted.
02590     */
02591     if( hasHotJournal(pPager) ){
02592        /* Get an EXCLUSIVE lock on the database file. At this point it is
02593        ** important that a RESERVED lock is not obtained on the way to the
02594        ** EXCLUSIVE lock. If it were, another process might open the
02595        ** database file, detect the RESERVED lock, and conclude that the
02596        ** database is safe to read while this process is still rolling it 
02597        ** back.
02598        ** 
02599        ** Because the intermediate RESERVED lock is not requested, the
02600        ** second process will get to this point in the code and fail to
02601        ** obtain it's own EXCLUSIVE lock on the database file.
02602        */
02603        rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK);
02604        if( rc!=SQLITE_OK ){
02605          sqlite3OsUnlock(pPager->fd, NO_LOCK);
02606          pPager->state = PAGER_UNLOCK;
02607          return pager_error(pPager, rc);
02608        }
02609        pPager->state = PAGER_EXCLUSIVE;
02610 
02611        /* Open the journal for reading only.  Return SQLITE_BUSY if
02612        ** we are unable to open the journal file. 
02613        **
02614        ** The journal file does not need to be locked itself.  The
02615        ** journal file is never open unless the main database file holds
02616        ** a write lock, so there is never any chance of two or more
02617        ** processes opening the journal at the same time.
02618        */
02619        rc = sqlite3OsOpenReadOnly(pPager->zJournal, &pPager->jfd);
02620        if( rc!=SQLITE_OK ){
02621          sqlite3OsUnlock(pPager->fd, NO_LOCK);
02622          pPager->state = PAGER_UNLOCK;
02623          return SQLITE_BUSY;
02624        }
02625        pPager->journalOpen = 1;
02626        pPager->journalStarted = 0;
02627        pPager->journalOff = 0;
02628        pPager->setMaster = 0;
02629        pPager->journalHdr = 0;
02630 
02631        /* Playback and delete the journal.  Drop the database write
02632        ** lock and reacquire the read lock.
02633        */
02634        rc = pager_playback(pPager);
02635        if( rc!=SQLITE_OK ){
02636          return pager_error(pPager, rc);
02637        }
02638     }
02639     pPg = 0;
02640   }else{
02641     /* Search for page in cache */
02642     pPg = pager_lookup(pPager, pgno);
02643     if( MEMDB && pPager->state==PAGER_UNLOCK ){
02644       pPager->state = PAGER_SHARED;
02645     }
02646   }
02647   if( pPg==0 ){
02648     /* The requested page is not in the page cache. */
02649     int h;
02650     TEST_INCR(pPager->nMiss);
02651     if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 || MEMDB ){
02652       /* Create a new page */
02653       if( pPager->nPage>=pPager->nHash ){
02654         pager_resize_hash_table(pPager,
02655            pPager->nHash<256 ? 256 : pPager->nHash*2);
02656         if( pPager->nHash==0 ){
02657           return SQLITE_NOMEM;
02658         }
02659       }
02660       pPg = sqliteMallocRaw( sizeof(*pPg) + pPager->pageSize
02661                               + sizeof(u32) + pPager->nExtra
02662                               + MEMDB*sizeof(PgHistory) );
02663       if( pPg==0 ){
02664         return SQLITE_NOMEM;
02665       }
02666       memset(pPg, 0, sizeof(*pPg));
02667       if( MEMDB ){
02668         memset(PGHDR_TO_HIST(pPg, pPager), 0, sizeof(PgHistory));
02669       }
02670       pPg->pPager = pPager;
02671       pPg->pNextAll = pPager->pAll;
02672       pPager->pAll = pPg;
02673       pPager->nPage++;
02674       if( pPager->nPage>pPager->nMaxPage ){
02675         assert( pPager->nMaxPage==(pPager->nPage-1) );
02676         pPager->nMaxPage++;
02677       }
02678     }else{
02679       rc = pager_recycle(pPager, 1, &pPg);
02680       if( rc!=SQLITE_OK ){
02681         return rc;
02682       }
02683       assert(pPg) ;
02684     }
02685     pPg->pgno = pgno;
02686     if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
02687       sqlite3CheckMemory(pPager->aInJournal, pgno/8);
02688       assert( pPager->journalOpen );
02689       pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
02690       pPg->needSync = 0;
02691     }else{
02692       pPg->inJournal = 0;
02693       pPg->needSync = 0;
02694     }
02695     if( pPager->aInStmt && (int)pgno<=pPager->stmtSize
02696              && (pPager->aInStmt[pgno/8] & (1<<(pgno&7)))!=0 ){
02697       page_add_to_stmt_list(pPg);
02698     }else{
02699       page_remove_from_stmt_list(pPg);
02700     }
02701     makeClean(pPg);
02702     pPg->nRef = 1;
02703     REFINFO(pPg);
02704 
02705     pPager->nRef++;
02706     if( pPager->nExtra>0 ){
02707       memset(PGHDR_TO_EXTRA(pPg, pPager), 0, pPager->nExtra);
02708     }
02709     if( pPager->errCode ){
02710       sqlite3pager_unref(PGHDR_TO_DATA(pPg));
02711       rc = pPager->errCode;
02712       return rc;
02713     }
02714 
02715     /* Populate the page with data, either by reading from the database
02716     ** file, or by setting the entire page to zero.
02717     */
02718     if( sqlite3pager_pagecount(pPager)<(int)pgno || MEMDB ){
02719       memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
02720     }else{
02721       if (pDataToFill) {
02722         /* Just copy from the given memory */
02723         memcpy(PGHDR_TO_DATA(pPg), pDataToFill, pPager->pageSize);
02724         CODEC1(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
02725       } else {
02726         /* Load from disk */
02727         assert( MEMDB==0 );
02728         rc = sqlite3OsSeek(pPager->fd, (pgno-1)*(i64)pPager->pageSize);
02729         if( rc==SQLITE_OK ){
02730           rc = sqlite3OsRead(pPager->fd, PGHDR_TO_DATA(pPg),
02731                                 pPager->pageSize);
02732         }
02733         TRACE3("FETCH %d page %d\n", PAGERID(pPager), pPg->pgno);
02734         CODEC1(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
02735         if( rc!=SQLITE_OK ){
02736           i64 fileSize;
02737           int rc2 = sqlite3OsFileSize(pPager->fd, &fileSize);
02738           if( rc2!=SQLITE_OK || fileSize>=pgno*pPager->pageSize ){
02739             /* An IO error occured in one of the the sqlite3OsSeek() or
02740             ** sqlite3OsRead() calls above. */
02741             pPg->pgno = 0;
02742             sqlite3pager_unref(PGHDR_TO_DATA(pPg));
02743             return rc;
02744           }else{
02745             clear_simulated_io_error();
02746             memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
02747           }
02748         }else{
02749           TEST_INCR(pPager->nRead);
02750         }
02751       }
02752     }
02753 
02754     /* Link the page into the page hash table */
02755     h = pgno & (pPager->nHash-1);
02756     pPg->pNextHash = pPager->aHash[h];
02757     pPager->aHash[h] = pPg;
02758     if( pPg->pNextHash ){
02759       assert( pPg->pNextHash->pPrevHash==0 );
02760       pPg->pNextHash->pPrevHash = pPg;
02761     }
02762 
02763 #ifdef SQLITE_CHECK_PAGES
02764     pPg->pageHash = pager_pagehash(pPg);
02765 #endif
02766   }else{
02767     /* The requested page is in the page cache. */
02768     TEST_INCR(pPager->nHit);
02769     page_ref(pPg);
02770   }
02771   *ppPage = PGHDR_TO_DATA(pPg);
02772   return SQLITE_OK;
02773 }
02774 
02775 /*
02776 ** Acquire a page if it is already in the in-memory cache.  Do
02777 ** not read the page from disk.  Return a pointer to the page,
02778 ** or 0 if the page is not in cache.
02779 **
02780 ** See also sqlite3pager_get().  The difference between this routine
02781 ** and sqlite3pager_get() is that _get() will go to the disk and read
02782 ** in the page if the page is not already in cache.  This routine
02783 ** returns NULL if the page is not in cache or if a disk I/O error 
02784 ** has ever happened.
02785 */
02786 void *sqlite3pager_lookup(Pager *pPager, Pgno pgno){
02787   PgHdr *pPg;
02788 
02789   assert( pPager!=0 );
02790   assert( pgno!=0 );
02791   if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
02792     return 0;
02793   }
02794   pPg = pager_lookup(pPager, pgno);
02795   if( pPg==0 ) return 0;
02796   page_ref(pPg);
02797   return PGHDR_TO_DATA(pPg);
02798 }
02799 
02800 /*
02801 ** Release a page.
02802 **
02803 ** If the number of references to the page drop to zero, then the
02804 ** page is added to the LRU list.  When all references to all pages
02805 ** are released, a rollback occurs and the lock on the database is
02806 ** removed.
02807 */
02808 int sqlite3pager_unref(void *pData){
02809   PgHdr *pPg;
02810 
02811   /* Decrement the reference count for this page
02812   */
02813   pPg = DATA_TO_PGHDR(pData);
02814   assert( pPg->nRef>0 );
02815   pPg->nRef--;
02816   REFINFO(pPg);
02817 
02818   CHECK_PAGE(pPg);
02819 
02820   /* When the number of references to a page reach 0, call the
02821   ** destructor and add the page to the freelist.
02822   */
02823   if( pPg->nRef==0 ){
02824     Pager *pPager;
02825     pPager = pPg->pPager;
02826     pPg->pNextFree = 0;
02827     pPg->pPrevFree = pPager->pLast;
02828     pPager->pLast = pPg;
02829     if( pPg->pPrevFree ){
02830       pPg->pPrevFree->pNextFree = pPg;
02831     }else{
02832       pPager->pFirst = pPg;
02833     }
02834     if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
02835       pPager->pFirstSynced = pPg;
02836     }
02837     if( pPager->xDestructor ){
02838       pPager->xDestructor(pData, pPager->pageSize);
02839     }
02840   
02841     /* When all pages reach the freelist, drop the read lock from
02842     ** the database file.
02843     */
02844     pPager->nRef--;
02845     assert( pPager->nRef>=0 );
02846     if( pPager->nRef==0 && !MEMDB ){
02847       pager_reset(pPager);
02848     }
02849   }
02850   return SQLITE_OK;
02851 }
02852 
02853 /*
02854 ** Create a journal file for pPager.  There should already be a RESERVED
02855 ** or EXCLUSIVE lock on the database file when this routine is called.
02856 **
02857 ** Return SQLITE_OK if everything.  Return an error code and release the
02858 ** write lock if anything goes wrong.
02859 */
02860 static int pager_open_journal(Pager *pPager){
02861   int rc;
02862   assert( !MEMDB );
02863   assert( pPager->state>=PAGER_RESERVED );
02864   assert( pPager->journalOpen==0 );
02865   assert( pPager->useJournal );
02866   assert( pPager->aInJournal==0 );
02867   sqlite3pager_pagecount(pPager);
02868   pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
02869   if( pPager->aInJournal==0 ){
02870     rc = SQLITE_NOMEM;
02871     goto failed_to_open_journal;
02872   }
02873   rc = sqlite3OsOpenExclusive(pPager->zJournal, &pPager->jfd,
02874                                  pPager->tempFile);
02875   pPager->journalOff = 0;
02876   pPager->setMaster = 0;
02877   pPager->journalHdr = 0;
02878   if( rc!=SQLITE_OK ){
02879     goto failed_to_open_journal;
02880   }
02881   sqlite3OsSetFullSync(pPager->jfd, pPager->full_fsync);
02882   sqlite3OsSetFullSync(pPager->fd, pPager->full_fsync);
02883   sqlite3OsOpenDirectory(pPager->jfd, pPager->zDirectory);
02884   pPager->journalOpen = 1;
02885   pPager->journalStarted = 0;
02886   pPager->needSync = 0;
02887   pPager->alwaysRollback = 0;
02888   pPager->nRec = 0;
02889   if( pPager->errCode ){
02890     rc = pPager->errCode;
02891     goto failed_to_open_journal;
02892   }
02893   pPager->origDbSize = pPager->dbSize;
02894 
02895   rc = writeJournalHdr(pPager);
02896 
02897   if( pPager->stmtAutoopen && rc==SQLITE_OK ){
02898     rc = sqlite3pager_stmt_begin(pPager);
02899   }
02900   if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
02901     rc = pager_unwritelock(pPager);
02902     if( rc==SQLITE_OK ){
02903       rc = SQLITE_FULL;
02904     }
02905   }
02906   return rc;
02907 
02908 failed_to_open_journal:
02909   sqliteFree(pPager->aInJournal);
02910   pPager->aInJournal = 0;
02911   if( rc==SQLITE_NOMEM ){
02912     /* If this was a malloc() failure, then we will not be closing the pager
02913     ** file. So delete any journal file we may have just created. Otherwise,
02914     ** the system will get confused, we have a read-lock on the file and a
02915     ** mysterious journal has appeared in the filesystem.
02916     */
02917     sqlite3OsDelete(pPager->zJournal);
02918   }else{
02919     sqlite3OsUnlock(pPager->fd, NO_LOCK);
02920     pPager->state = PAGER_UNLOCK;
02921   }
02922   return rc;
02923 }
02924 
02925 /*
02926 ** Acquire a write-lock on the database.  The lock is removed when
02927 ** the any of the following happen:
02928 **
02929 **   *  sqlite3pager_commit() is called.
02930 **   *  sqlite3pager_rollback() is called.
02931 **   *  sqlite3pager_close() is called.
02932 **   *  sqlite3pager_unref() is called to on every outstanding page.
02933 **
02934 ** The first parameter to this routine is a pointer to any open page of the
02935 ** database file.  Nothing changes about the page - it is used merely to
02936 ** acquire a pointer to the Pager structure and as proof that there is
02937 ** already a read-lock on the database.
02938 **
02939 ** The second parameter indicates how much space in bytes to reserve for a
02940 ** master journal file-name at the start of the journal when it is created.
02941 **
02942 ** A journal file is opened if this is not a temporary file.  For temporary
02943 ** files, the opening of the journal file is deferred until there is an
02944 ** actual need to write to the journal.
02945 **
02946 ** If the database is already reserved for writing, this routine is a no-op.
02947 **
02948 ** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file
02949 ** immediately instead of waiting until we try to flush the cache.  The
02950 ** exFlag is ignored if a transaction is already active.
02951 */
02952 int sqlite3pager_begin(void *pData, int exFlag){
02953   PgHdr *pPg = DATA_TO_PGHDR(pData);
02954   Pager *pPager = pPg->pPager;
02955   int rc = SQLITE_OK;
02956   assert( pPg->nRef>0 );
02957   assert( pPager->state!=PAGER_UNLOCK );
02958   if( pPager->state==PAGER_SHARED ){
02959     assert( pPager->aInJournal==0 );
02960     if( MEMDB ){
02961       pPager->state = PAGER_EXCLUSIVE;
02962       pPager->origDbSize = pPager->dbSize;
02963     }else{
02964       rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
02965       if( rc==SQLITE_OK ){
02966         pPager->state = PAGER_RESERVED;
02967         if( exFlag ){
02968           rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
02969         }
02970       }
02971       if( rc!=SQLITE_OK ){
02972         return rc;
02973       }
02974       pPager->dirtyCache = 0;
02975       TRACE2("TRANSACTION %d\n", PAGERID(pPager));
02976       if( pPager->useJournal && !pPager->tempFile ){
02977         rc = pager_open_journal(pPager);
02978       }
02979     }
02980   }
02981   return rc;
02982 }
02983 
02984 /*
02985 ** Make a page dirty.  Set its dirty flag and add it to the dirty
02986 ** page list.
02987 */
02988 static void makeDirty(PgHdr *pPg){
02989   if( pPg->dirty==0 ){
02990     Pager *pPager = pPg->pPager;
02991     pPg->dirty = 1;
02992     pPg->pDirty = pPager->pDirty;
02993     if( pPager->pDirty ){
02994       pPager->pDirty->pPrevDirty = pPg;
02995     }
02996     pPg->pPrevDirty = 0;
02997     pPager->pDirty = pPg;
02998   }
02999 }
03000 
03001 /*
03002 ** Make a page clean.  Clear its dirty bit and remove it from the
03003 ** dirty page list.
03004 */
03005 static void makeClean(PgHdr *pPg){
03006   if( pPg->dirty ){
03007     pPg->dirty = 0;
03008     if( pPg->pDirty ){
03009       pPg->pDirty->pPrevDirty = pPg->pPrevDirty;
03010     }
03011     if( pPg->pPrevDirty ){
03012       pPg->pPrevDirty->pDirty = pPg->pDirty;
03013     }else{
03014       pPg->pPager->pDirty = pPg->pDirty;
03015     }
03016   }
03017 }
03018 
03019 
03020 /*
03021 ** Mark a data page as writeable.  The page is written into the journal 
03022 ** if it is not there already.  This routine must be called before making
03023 ** changes to a page.
03024 **
03025 ** The first time this routine is called, the pager creates a new
03026 ** journal and acquires a RESERVED lock on the database.  If the RESERVED
03027 ** lock could not be acquired, this routine returns SQLITE_BUSY.  The
03028 ** calling routine must check for that return value and be careful not to
03029 ** change any page data until this routine returns SQLITE_OK.
03030 **
03031 ** If the journal file could not be written because the disk is full,
03032 ** then this routine returns SQLITE_FULL and does an immediate rollback.
03033 ** All subsequent write attempts also return SQLITE_FULL until there
03034 ** is a call to sqlite3pager_commit() or sqlite3pager_rollback() to
03035 ** reset.
03036 */
03037 int sqlite3pager_write(void *pData){
03038   PgHdr *pPg = DATA_TO_PGHDR(pData);
03039   Pager *pPager = pPg->pPager;
03040   int rc = SQLITE_OK;
03041 
03042   /* Check for errors
03043   */
03044   if( pPager->errCode ){ 
03045     return pPager->errCode;
03046   }
03047   if( pPager->readOnly ){
03048     return SQLITE_PERM;
03049   }
03050 
03051   assert( !pPager->setMaster );
03052 
03053   CHECK_PAGE(pPg);
03054 
03055   /* Mark the page as dirty.  If the page has already been written
03056   ** to the journal then we can return right away.
03057   */
03058   makeDirty(pPg);
03059   if( pPg->inJournal && (pPg->inStmt || pPager->stmtInUse==0) ){
03060     pPager->dirtyCache = 1;
03061   }else{
03062 
03063     /* If we get this far, it means that the page needs to be
03064     ** written to the transaction journal or the ckeckpoint journal
03065     ** or both.
03066     **
03067     ** First check to see that the transaction journal exists and
03068     ** create it if it does not.
03069     */
03070     assert( pPager->state!=PAGER_UNLOCK );
03071     rc = sqlite3pager_begin(pData, 0);
03072     if( rc!=SQLITE_OK ){
03073       return rc;
03074     }
03075     assert( pPager->state>=PAGER_RESERVED );
03076     if( !pPager->journalOpen && pPager->useJournal ){
03077       rc = pager_open_journal(pPager);
03078       if( rc!=SQLITE_OK ) return rc;
03079     }
03080     assert( pPager->journalOpen || !pPager->useJournal );
03081     pPager->dirtyCache = 1;
03082   
03083     /* The transaction journal now exists and we have a RESERVED or an
03084     ** EXCLUSIVE lock on the main database file.  Write the current page to
03085     ** the transaction journal if it is not there already.
03086     */
03087     if( !pPg->inJournal && (pPager->useJournal || MEMDB) ){
03088       if( (int)pPg->pgno <= pPager->origDbSize ){
03089         int szPg;
03090         if( MEMDB ){
03091           PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
03092           TRACE3("JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
03093           assert( pHist->pOrig==0 );
03094           pHist->pOrig = sqliteMallocRaw( pPager->pageSize );
03095           if( pHist->pOrig ){
03096             memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize);
03097           }
03098         }else{
03099           u32 cksum, saved;
03100           char *pData2, *pEnd;
03101           /* We should never write to the journal file the page that
03102           ** contains the database locks.  The following assert verifies
03103           ** that we do not. */
03104           assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
03105           pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
03106           cksum = pager_cksum(pPager, (u8*)pData2);
03107           pEnd = pData2 + pPager->pageSize;
03108           pData2 -= 4;
03109           saved = *(u32*)pEnd;
03110           put32bits(pEnd, cksum);
03111           szPg = pPager->pageSize+8;
03112           put32bits(pData2, pPg->pgno);
03113           rc = sqlite3OsWrite(pPager->jfd, pData2, szPg);
03114           pPager->journalOff += szPg;
03115           TRACE4("JOURNAL %d page %d needSync=%d\n",
03116                   PAGERID(pPager), pPg->pgno, pPg->needSync);
03117           *(u32*)pEnd = saved;
03118 
03119          /* An error has occured writing to the journal file. The 
03120           ** transaction will be rolled back by the layer above.
03121           */
03122           if( rc!=SQLITE_OK ){
03123             return rc;
03124           }
03125 
03126           pPager->nRec++;
03127           assert( pPager->aInJournal!=0 );
03128           pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
03129           pPg->needSync = !pPager->noSync;
03130           if( pPager->stmtInUse ){
03131             pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
03132             page_add_to_stmt_list(pPg);
03133           }
03134         }
03135       }else{
03136         pPg->needSync = !pPager->journalStarted && !pPager->noSync;
03137         TRACE4("APPEND %d page %d needSync=%d\n",
03138                 PAGERID(pPager), pPg->pgno, pPg->needSync);
03139       }
03140       if( pPg->needSync ){
03141         pPager->needSync = 1;
03142       }
03143       pPg->inJournal = 1;
03144     }
03145   
03146     /* If the statement journal is open and the page is not in it,
03147     ** then write the current page to the statement journal.  Note that
03148     ** the statement journal format differs from the standard journal format
03149     ** in that it omits the checksums and the header.
03150     */
03151     if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){
03152       assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
03153       if( MEMDB ){
03154         PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
03155         assert( pHist->pStmt==0 );
03156         pHist->pStmt = sqliteMallocRaw( pPager->pageSize );
03157         if( pHist->pStmt ){
03158           memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize);
03159         }
03160         TRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
03161       }else{
03162         char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7)-4;
03163         put32bits(pData2, pPg->pgno);
03164         rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize+4);
03165         TRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
03166         if( rc!=SQLITE_OK ){
03167           return rc;
03168         }
03169         pPager->stmtNRec++;
03170         assert( pPager->aInStmt!=0 );
03171         pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
03172       }
03173       page_add_to_stmt_list(pPg);
03174     }
03175   }
03176 
03177   /* Update the database size and return.
03178   */
03179   if( pPager->dbSize<(int)pPg->pgno ){
03180     pPager->dbSize = pPg->pgno;
03181     if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){
03182       pPager->dbSize++;
03183     }
03184   }
03185   return rc;
03186 }
03187 
03188 /*
03189 ** Return TRUE if the page given in the argument was previously passed
03190 ** to sqlite3pager_write().  In other words, return TRUE if it is ok
03191 ** to change the content of the page.
03192 */
03193 #ifndef NDEBUG
03194 int sqlite3pager_iswriteable(void *pData){
03195   PgHdr *pPg = DATA_TO_PGHDR(pData);
03196   return pPg->dirty;
03197 }
03198 #endif
03199 
03200 #ifndef SQLITE_OMIT_VACUUM
03201 /*
03202 ** Replace the content of a single page with the information in the third
03203 ** argument.
03204 */
03205 int sqlite3pager_overwrite(Pager *pPager, Pgno pgno, void *pData){
03206   void *pPage;
03207   int rc;
03208 
03209   rc = sqlite3pager_get(pPager, pgno, &pPage);
03210   if( rc==SQLITE_OK ){
03211     rc = sqlite3pager_write(pPage);
03212     if( rc==SQLITE_OK ){
03213       memcpy(pPage, pData, pPager->pageSize);
03214     }
03215     sqlite3pager_unref(pPage);
03216   }
03217   return rc;
03218 }
03219 #endif
03220 
03221 /*
03222 ** A call to this routine tells the pager that it is not necessary to
03223 ** write the information on page "pgno" back to the disk, even though
03224 ** that page might be marked as dirty.
03225 **
03226 ** The overlying software layer calls this routine when all of the data
03227 ** on the given page is unused.  The pager marks the page as clean so
03228 ** that it does not get written to disk.
03229 **
03230 ** Tests show that this optimization, together with the
03231 ** sqlite3pager_dont_rollback() below, more than double the speed
03232 ** of large INSERT operations and quadruple the speed of large DELETEs.
03233 **
03234 ** When this routine is called, set the alwaysRollback flag to true.
03235 ** Subsequent calls to sqlite3pager_dont_rollback() for the same page
03236 ** will thereafter be ignored.  This is necessary to avoid a problem
03237 ** where a page with data is added to the freelist during one part of
03238 ** a transaction then removed from the freelist during a later part
03239 ** of the same transaction and reused for some other purpose.  When it
03240 ** is first added to the freelist, this routine is called.  When reused,
03241 ** the dont_rollback() routine is called.  But because the page contains
03242 ** critical data, we still need to be sure it gets rolled back in spite
03243 ** of the dont_rollback() call.
03244 */
03245 void sqlite3pager_dont_write(Pager *pPager, Pgno pgno){
03246   PgHdr *pPg;
03247 
03248   if( MEMDB ) return;
03249 
03250   pPg = pager_lookup(pPager, pgno);
03251   assert( pPg!=0 );  /* We never call _dont_write unless the page is in mem */
03252   pPg->alwaysRollback = 1;
03253   if( pPg->dirty && !pPager->stmtInUse ){
03254     if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
03255       /* If this pages is the last page in the file and the file has grown
03256       ** during the current transaction, then do NOT mark the page as clean.
03257       ** When the database file grows, we must make sure that the last page
03258       ** gets written at least once so that the disk file will be the correct
03259       ** size. If you do not write this page and the size of the file
03260       ** on the disk ends up being too small, that can lead to database
03261       ** corruption during the next transaction.
03262       */
03263     }else{
03264       TRACE3("DONT_WRITE page %d of %d\n", pgno, PAGERID(pPager));
03265       makeClean(pPg);
03266 #ifdef SQLITE_CHECK_PAGES
03267       pPg->pageHash = pager_pagehash(pPg);
03268 #endif
03269     }
03270   }
03271 }
03272 
03273 /*
03274 ** A call to this routine tells the pager that if a rollback occurs,
03275 ** it is not necessary to restore the data on the given page.  This
03276 ** means that the pager does not have to record the given page in the
03277 ** rollback journal.
03278 */
03279 void sqlite3pager_dont_rollback(void *pData){
03280   PgHdr *pPg = DATA_TO_PGHDR(pData);
03281   Pager *pPager = pPg->pPager;
03282 
03283   if( pPager->state!=PAGER_EXCLUSIVE || pPager->journalOpen==0 ) return;
03284   if( pPg->alwaysRollback || pPager->alwaysRollback || MEMDB ) return;
03285   if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
03286     assert( pPager->aInJournal!=0 );
03287     pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
03288     pPg->inJournal = 1;
03289     if( pPager->stmtInUse ){
03290       pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
03291       page_add_to_stmt_list(pPg);
03292     }
03293     TRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager));
03294   }
03295   if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){
03296     assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
03297     assert( pPager->aInStmt!=0 );
03298     pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
03299     page_add_to_stmt_list(pPg);
03300   }
03301 }
03302 
03303 
03304 /*
03305 ** Commit all changes to the database and release the write lock.
03306 **
03307 ** If the commit fails for any reason, a rollback attempt is made
03308 ** and an error code is returned.  If the commit worked, SQLITE_OK
03309 ** is returned.
03310 */
03311 int sqlite3pager_commit(Pager *pPager){
03312   int rc;
03313   PgHdr *pPg;
03314 
03315   if( pPager->errCode ){
03316     return pPager->errCode;
03317   }
03318   if( pPager->state<PAGER_RESERVED ){
03319     return SQLITE_ERROR;
03320   }
03321   TRACE2("COMMIT %d\n", PAGERID(pPager));
03322   if( MEMDB ){
03323     pPg = pager_get_all_dirty_pages(pPager);
03324     while( pPg ){
03325       clearHistory(PGHDR_TO_HIST(pPg, pPager));
03326       pPg->dirty = 0;
03327       pPg->inJournal = 0;
03328       pPg->inStmt = 0;
03329       pPg->needSync = 0;
03330       pPg->pPrevStmt = pPg->pNextStmt = 0;
03331       pPg = pPg->pDirty;
03332     }
03333     pPager->pDirty = 0;
03334 #ifndef NDEBUG
03335     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
03336       PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
03337       assert( !pPg->alwaysRollback );
03338       assert( !pHist->pOrig );
03339       assert( !pHist->pStmt );
03340     }
03341 #endif
03342     pPager->pStmt = 0;
03343     pPager->state = PAGER_SHARED;
03344     return SQLITE_OK;
03345   }
03346   if( pPager->dirtyCache==0 ){
03347     /* Exit early (without doing the time-consuming sqlite3OsSync() calls)
03348     ** if there have been no changes to the database file. */
03349     assert( pPager->needSync==0 );
03350     rc = pager_unwritelock(pPager);
03351     pPager->dbSize = -1;
03352     return rc;
03353   }
03354   assert( pPager->journalOpen );
03355   rc = sqlite3pager_sync(pPager, 0, 0);
03356   if( rc==SQLITE_OK ){
03357     rc = pager_unwritelock(pPager);
03358     pPager->dbSize = -1;
03359   }
03360   return rc;
03361 }
03362 
03363 /*
03364 ** Rollback all changes.  The database falls back to PAGER_SHARED mode.
03365 ** All in-memory cache pages revert to their original data contents.
03366 ** The journal is deleted.
03367 **
03368 ** This routine cannot fail unless some other process is not following
03369 ** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
03370 ** process is writing trash into the journal file (SQLITE_CORRUPT) or
03371 ** unless a prior malloc() failed (SQLITE_NOMEM).  Appropriate error
03372 ** codes are returned for all these occasions.  Otherwise,
03373 ** SQLITE_OK is returned.
03374 */
03375 int sqlite3pager_rollback(Pager *pPager){
03376   int rc;
03377   TRACE2("ROLLBACK %d\n", PAGERID(pPager));
03378   if( MEMDB ){
03379     PgHdr *p;
03380     for(p=pPager->pAll; p; p=p->pNextAll){
03381       PgHistory *pHist;
03382       assert( !p->alwaysRollback );
03383       if( !p->dirty ){
03384         assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pOrig );
03385         assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pStmt );
03386         continue;
03387       }
03388 
03389       pHist = PGHDR_TO_HIST(p, pPager);
03390       if( pHist->pOrig ){
03391         memcpy(PGHDR_TO_DATA(p), pHist->pOrig, pPager->pageSize);
03392         TRACE3("ROLLBACK-PAGE %d of %d\n", p->pgno, PAGERID(pPager));
03393       }else{
03394         TRACE3("PAGE %d is clean on %d\n", p->pgno, PAGERID(pPager));
03395       }
03396       clearHistory(pHist);
03397       p->dirty = 0;
03398       p->inJournal = 0;
03399       p->inStmt = 0;
03400       p->pPrevStmt = p->pNextStmt = 0;
03401       if( pPager->xReiniter ){
03402         pPager->xReiniter(PGHDR_TO_DATA(p), pPager->pageSize);
03403       }
03404     }
03405     pPager->pDirty = 0;
03406     pPager->pStmt = 0;
03407     pPager->dbSize = pPager->origDbSize;
03408     memoryTruncate(pPager);
03409     pPager->stmtInUse = 0;
03410     pPager->state = PAGER_SHARED;
03411     return SQLITE_OK;
03412   }
03413 
03414   if( !pPager->dirtyCache || !pPager->journalOpen ){
03415     rc = pager_unwritelock(pPager);
03416     pPager->dbSize = -1;
03417     return rc;
03418   }
03419 
03420   if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
03421     if( pPager->state>=PAGER_EXCLUSIVE ){
03422       pager_playback(pPager);
03423     }
03424     return pPager->errCode;
03425   }
03426   if( pPager->state==PAGER_RESERVED ){
03427     int rc2;
03428     rc = pager_reload_cache(pPager);
03429     rc2 = pager_unwritelock(pPager);
03430     if( rc==SQLITE_OK ){
03431       rc = rc2;
03432     }
03433   }else{
03434     rc = pager_playback(pPager);
03435   }
03436   pPager->dbSize = -1;
03437 
03438   /* If an error occurs during a ROLLBACK, we can no longer trust the pager
03439   ** cache. So call pager_error() on the way out to make any error 
03440   ** persistent.
03441   */
03442   return pager_error(pPager, rc);
03443 }
03444 
03445 /*
03446 ** Return TRUE if the database file is opened read-only.  Return FALSE
03447 ** if the database is (in theory) writable.
03448 */
03449 int sqlite3pager_isreadonly(Pager *pPager){
03450   return pPager->readOnly;
03451 }
03452 
03453 /*
03454 ** This routine is used for testing and analysis only.
03455 */
03456 int *sqlite3pager_stats(Pager *pPager){
03457   static int a[11];
03458   a[0] = pPager->nRef;
03459   a[1] = pPager->nPage;
03460   a[2] = pPager->mxPage;
03461   a[3] = pPager->dbSize;
03462   a[4] = pPager->state;
03463   a[5] = pPager->errCode;
03464 #ifdef SQLITE_TEST
03465   a[6] = pPager->nHit;
03466   a[7] = pPager->nMiss;
03467   a[8] = pPager->nOvfl;
03468   a[9] = pPager->nRead;
03469   a[10] = pPager->nWrite;
03470 #endif
03471   return a;
03472 }
03473 
03474 /*
03475 ** Set the statement rollback point.
03476 **
03477 ** This routine should be called with the transaction journal already
03478 ** open.  A new statement journal is created that can be used to rollback
03479 ** changes of a single SQL command within a larger transaction.
03480 */
03481 int sqlite3pager_stmt_begin(Pager *pPager){
03482   int rc;
03483   char zTemp[SQLITE_TEMPNAME_SIZE];
03484   assert( !pPager->stmtInUse );
03485   assert( pPager->dbSize>=0 );
03486   TRACE2("STMT-BEGIN %d\n", PAGERID(pPager));
03487   if( MEMDB ){
03488     pPager->stmtInUse = 1;
03489     pPager->stmtSize = pPager->dbSize;
03490     return SQLITE_OK;
03491   }
03492   if( !pPager->journalOpen ){
03493     pPager->stmtAutoopen = 1;
03494     return SQLITE_OK;
03495   }
03496   assert( pPager->journalOpen );
03497   pPager->aInStmt = sqliteMalloc( pPager->dbSize/8 + 1 );
03498   if( pPager->aInStmt==0 ){
03499     /* sqlite3OsLock(pPager->fd, SHARED_LOCK); */
03500     return SQLITE_NOMEM;
03501   }
03502 #ifndef NDEBUG
03503   rc = sqlite3OsFileSize(pPager->jfd, &pPager->stmtJSize);
03504   if( rc ) goto stmt_begin_failed;
03505   assert( pPager->stmtJSize == pPager->journalOff );
03506 #endif
03507   pPager->stmtJSize = pPager->journalOff;
03508   pPager->stmtSize = pPager->dbSize;
03509   pPager->stmtHdrOff = 0;
03510   pPager->stmtCksum = pPager->cksumInit;
03511   if( !pPager->stmtOpen ){
03512     rc = sqlite3pager_opentemp(zTemp, &pPager->stfd);
03513     if( rc ) goto stmt_begin_failed;
03514     pPager->stmtOpen = 1;
03515     pPager->stmtNRec = 0;
03516   }
03517   pPager->stmtInUse = 1;
03518   return SQLITE_OK;
03519  
03520 stmt_begin_failed:
03521   if( pPager->aInStmt ){
03522     sqliteFree(pPager->aInStmt);
03523     pPager->aInStmt = 0;
03524   }
03525   return rc;
03526 }
03527 
03528 /*
03529 ** Commit a statement.
03530 */
03531 int sqlite3pager_stmt_commit(Pager *pPager){
03532   if( pPager->stmtInUse ){
03533     PgHdr *pPg, *pNext;
03534     TRACE2("STMT-COMMIT %d\n", PAGERID(pPager));
03535     if( !MEMDB ){
03536       sqlite3OsSeek(pPager->stfd, 0);
03537       /* sqlite3OsTruncate(pPager->stfd, 0); */
03538       sqliteFree( pPager->aInStmt );
03539       pPager->aInStmt = 0;
03540     }
03541     for(pPg=pPager->pStmt; pPg; pPg=pNext){
03542       pNext = pPg->pNextStmt;
03543       assert( pPg->inStmt );
03544       pPg->inStmt = 0;
03545       pPg->pPrevStmt = pPg->pNextStmt = 0;
03546       if( MEMDB ){
03547         PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
03548         sqliteFree(pHist->pStmt);
03549         pHist->pStmt = 0;
03550       }
03551     }
03552     pPager->stmtNRec = 0;
03553     pPager->stmtInUse = 0;
03554     pPager->pStmt = 0;
03555   }
03556   pPager->stmtAutoopen = 0;
03557   return SQLITE_OK;
03558 }
03559 
03560 /*
03561 ** Rollback a statement.
03562 */
03563 int sqlite3pager_stmt_rollback(Pager *pPager){
03564   int rc;
03565   if( pPager->stmtInUse ){
03566     TRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager));
03567     if( MEMDB ){
03568       PgHdr *pPg;
03569       for(pPg=pPager->pStmt; pPg; pPg=pPg->pNextStmt){
03570         PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
03571         if( pHist->pStmt ){
03572           memcpy(PGHDR_TO_DATA(pPg), pHist->pStmt, pPager->pageSize);
03573           sqliteFree(pHist->pStmt);
03574           pHist->pStmt = 0;
03575         }
03576       }
03577       pPager->dbSize = pPager->stmtSize;
03578       memoryTruncate(pPager);
03579       rc = SQLITE_OK;
03580     }else{
03581       rc = pager_stmt_playback(pPager);
03582     }
03583     sqlite3pager_stmt_commit(pPager);
03584   }else{
03585     rc = SQLITE_OK;
03586   }
03587   pPager->stmtAutoopen = 0;
03588   return rc;
03589 }
03590 
03591 /*
03592 ** Return the full pathname of the database file.
03593 */
03594 const char *sqlite3pager_filename(Pager *pPager){
03595   return pPager->zFilename;
03596 }
03597 
03598 /*
03599 ** Return the directory of the database file.
03600 */
03601 const char *sqlite3pager_dirname(Pager *pPager){
03602   return pPager->zDirectory;
03603 }
03604 
03605 /*
03606 ** Return the full pathname of the journal file.
03607 */
03608 const char *sqlite3pager_journalname(Pager *pPager){
03609   return pPager->zJournal;
03610 }
03611 
03612 /*
03613 ** Return true if fsync() calls are disabled for this pager.  Return FALSE
03614 ** if fsync()s are executed normally.
03615 */
03616 int sqlite3pager_nosync(Pager *pPager){
03617   return pPager->noSync;
03618 }
03619 
03620 /*
03621 ** Set the codec for this pager
03622 */
03623 void sqlite3pager_set_codec(
03624   Pager *pPager,
03625   void *(*xCodec)(void*,void*,Pgno,int),
03626   void *pCodecArg
03627 ){
03628   pPager->xCodec = xCodec;
03629   pPager->pCodecArg = pCodecArg;
03630 }
03631 
03632 /*
03633 ** This routine is called to increment the database file change-counter,
03634 ** stored at byte 24 of the pager file.
03635 */
03636 static int pager_incr_changecounter(Pager *pPager){
03637   void *pPage;
03638   PgHdr *pPgHdr;
03639   u32 change_counter;
03640   int rc;
03641 
03642   /* Open page 1 of the file for writing. */
03643   rc = sqlite3pager_get(pPager, 1, &pPage);
03644   if( rc!=SQLITE_OK ) return rc;
03645   rc = sqlite3pager_write(pPage);
03646   if( rc!=SQLITE_OK ) return rc;
03647 
03648   /* Read the current value at byte 24. */
03649   pPgHdr = DATA_TO_PGHDR(pPage);
03650   change_counter = retrieve32bits(pPgHdr, 24);
03651 
03652   /* Increment the value just read and write it back to byte 24. */
03653   change_counter++;
03654   put32bits(((char*)PGHDR_TO_DATA(pPgHdr))+24, change_counter);
03655 
03656   /* Release the page reference. */
03657   sqlite3pager_unref(pPage);
03658   return SQLITE_OK;
03659 }
03660 
03661 /*
03662 ** Sync the database file for the pager pPager. zMaster points to the name
03663 ** of a master journal file that should be written into the individual
03664 ** journal file. zMaster may be NULL, which is interpreted as no master
03665 ** journal (a single database transaction).
03666 **
03667 ** This routine ensures that the journal is synced, all dirty pages written
03668 ** to the database file and the database file synced. The only thing that
03669 ** remains to commit the transaction is to delete the journal file (or
03670 ** master journal file if specified).
03671 **
03672 ** Note that if zMaster==NULL, this does not overwrite a previous value
03673 ** passed to an sqlite3pager_sync() call.
03674 **
03675 ** If parameter nTrunc is non-zero, then the pager file is truncated to
03676 ** nTrunc pages (this is used by auto-vacuum databases).
03677 */
03678 int sqlite3pager_sync(Pager *pPager, const char *zMaster, Pgno nTrunc){
03679   int rc = SQLITE_OK;
03680 
03681   TRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n", 
03682       pPager->zFilename, zMaster, nTrunc);
03683 
03684   /* If this is an in-memory db, or no pages have been written to, or this
03685   ** function has already been called, it is a no-op.
03686   */
03687   if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
03688     PgHdr *pPg;
03689     assert( pPager->journalOpen );
03690 
03691     /* If a master journal file name has already been written to the
03692     ** journal file, then no sync is required. This happens when it is
03693     ** written, then the process fails to upgrade from a RESERVED to an
03694     ** EXCLUSIVE lock. The next time the process tries to commit the
03695     ** transaction the m-j name will have already been written.
03696     */
03697     if( !pPager->setMaster ){
03698       rc = pager_incr_changecounter(pPager);
03699       if( rc!=SQLITE_OK ) goto sync_exit;
03700 #ifndef SQLITE_OMIT_AUTOVACUUM
03701       if( nTrunc!=0 ){
03702         /* If this transaction has made the database smaller, then all pages
03703         ** being discarded by the truncation must be written to the journal
03704         ** file.
03705         */
03706         Pgno i;
03707         void *pPage;
03708         int iSkip = PAGER_MJ_PGNO(pPager);
03709         for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
03710           if( !(pPager->aInJournal[i/8] & (1<<(i&7))) && i!=iSkip ){
03711             rc = sqlite3pager_get(pPager, i, &pPage);
03712             if( rc!=SQLITE_OK ) goto sync_exit;
03713             rc = sqlite3pager_write(pPage);
03714             sqlite3pager_unref(pPage);
03715             if( rc!=SQLITE_OK ) goto sync_exit;
03716           }
03717         } 
03718       }
03719 #endif
03720       rc = writeMasterJournal(pPager, zMaster);
03721       if( rc!=SQLITE_OK ) goto sync_exit;
03722       rc = syncJournal(pPager);
03723       if( rc!=SQLITE_OK ) goto sync_exit;
03724     }
03725 
03726 #ifndef SQLITE_OMIT_AUTOVACUUM
03727     if( nTrunc!=0 ){
03728       rc = sqlite3pager_truncate(pPager, nTrunc);
03729       if( rc!=SQLITE_OK ) goto sync_exit;
03730     }
03731 #endif
03732 
03733     /* Write all dirty pages to the database file */
03734     pPg = pager_get_all_dirty_pages(pPager);
03735     rc = pager_write_pagelist(pPg);
03736     if( rc!=SQLITE_OK ) goto sync_exit;
03737 
03738     /* Sync the database file. */
03739     if( !pPager->noSync ){
03740       rc = sqlite3OsSync(pPager->fd, 0);
03741     }
03742 
03743     pPager->state = PAGER_SYNCED;
03744   }else if( MEMDB && nTrunc!=0 ){
03745     rc = sqlite3pager_truncate(pPager, nTrunc);
03746   }
03747 
03748 sync_exit:
03749   return rc;
03750 }
03751 
03752 #ifndef SQLITE_OMIT_AUTOVACUUM
03753 /*
03754 ** Move the page identified by pData to location pgno in the file. 
03755 **
03756 ** There must be no references to the current page pgno. If current page
03757 ** pgno is not already in the rollback journal, it is not written there by
03758 ** by this routine. The same applies to the page pData refers to on entry to
03759 ** this routine.
03760 **
03761 ** References to the page refered to by pData remain valid. Updating any
03762 ** meta-data associated with page pData (i.e. data stored in the nExtra bytes
03763 ** allocated along with the page) is the responsibility of the caller.
03764 **
03765 ** A transaction must be active when this routine is called. It used to be
03766 ** required that a statement transaction was not active, but this restriction
03767 ** has been removed (CREATE INDEX needs to move a page when a statement
03768 ** transaction is active).
03769 */
03770 int sqlite3pager_movepage(Pager *pPager, void *pData, Pgno pgno){
03771   PgHdr *pPg = DATA_TO_PGHDR(pData);
03772   PgHdr *pPgOld; 
03773   int h;
03774   Pgno needSyncPgno = 0;
03775 
03776   assert( pPg->nRef>0 );
03777 
03778   TRACE5("MOVE %d page %d (needSync=%d) moves to %d\n", 
03779       PAGERID(pPager), pPg->pgno, pPg->needSync, pgno);
03780 
03781   if( pPg->needSync ){
03782     needSyncPgno = pPg->pgno;
03783     assert( pPg->inJournal );
03784     assert( pPg->dirty );
03785     assert( pPager->needSync );
03786   }
03787 
03788   /* Unlink pPg from it's hash-chain */
03789   unlinkHashChain(pPager, pPg);
03790 
03791   /* If the cache contains a page with page-number pgno, remove it
03792   ** from it's hash chain. Also, if the PgHdr.needSync was set for 
03793   ** page pgno before the 'move' operation, it needs to be retained 
03794   ** for the page moved there.
03795   */
03796   pPgOld = pager_lookup(pPager, pgno);
03797   if( pPgOld ){
03798     assert( pPgOld->nRef==0 );
03799     unlinkHashChain(pPager, pPgOld);
03800     makeClean(pPgOld);
03801     if( pPgOld->needSync ){
03802       assert( pPgOld->inJournal );
03803       pPg->inJournal = 1;
03804       pPg->needSync = 1;
03805       assert( pPager->needSync );
03806     }
03807   }
03808 
03809   /* Change the page number for pPg and insert it into the new hash-chain. */
03810   pPg->pgno = pgno;
03811   h = pgno & (pPager->nHash-1);
03812   if( pPager->aHash[h] ){
03813     assert( pPager->aHash[h]->pPrevHash==0 );
03814     pPager->aHash[h]->pPrevHash = pPg;
03815   }
03816   pPg->pNextHash = pPager->aHash[h];
03817   pPager->aHash[h] = pPg;
03818   pPg->pPrevHash = 0;
03819 
03820   makeDirty(pPg);
03821   pPager->dirtyCache = 1;
03822 
03823   if( needSyncPgno ){
03824     /* If needSyncPgno is non-zero, then the journal file needs to be 
03825     ** sync()ed before any data is written to database file page needSyncPgno.
03826     ** Currently, no such page exists in the page-cache and the 
03827     ** Pager.aInJournal bit has been set. This needs to be remedied by loading
03828     ** the page into the pager-cache and setting the PgHdr.needSync flag.
03829     **
03830     ** The sqlite3pager_get() call may cause the journal to sync. So make
03831     ** sure the Pager.needSync flag is set too.
03832     */
03833     int rc;
03834     void *pNeedSync;
03835     assert( pPager->needSync );
03836     rc = sqlite3pager_get(pPager, needSyncPgno, &pNeedSync);
03837     if( rc!=SQLITE_OK ) return rc;
03838     pPager->needSync = 1;
03839     DATA_TO_PGHDR(pNeedSync)->needSync = 1;
03840     DATA_TO_PGHDR(pNeedSync)->inJournal = 1;
03841     makeDirty(DATA_TO_PGHDR(pNeedSync));
03842     sqlite3pager_unref(pNeedSync);
03843   }
03844 
03845   return SQLITE_OK;
03846 }
03847 #endif
03848 
03849 
03860 int sqlite3pager_loadall(Pager* pPager)
03861 {
03862   int i;
03863   int rc;
03864   int loadSize;
03865   int loadPages;
03866   unsigned char* fileData;
03867 
03868   if (pPager->dbSize < 0 || pPager->pageSize < 0) {
03869     /* pager not initialized, this means a statement is not open */
03870     return SQLITE_MISUSE;
03871   }
03872 
03873   /* compute sizes */
03874   if (pPager->mxPage < pPager->dbSize)
03875     loadPages = pPager->mxPage;
03876   else
03877     loadPages = pPager->dbSize;
03878   loadSize = loadPages * pPager->pageSize;
03879 
03880   rc = sqlite3OsSeek(pPager->fd, 0);
03881   if (rc != SQLITE_OK)
03882     return rc;
03883 
03884   /* load the file as one chunk */
03885   fileData = sqliteMallocRaw(loadSize);
03886   if (! fileData)
03887     return SQLITE_NOMEM;
03888   rc = sqlite3OsRead(pPager->fd, fileData, loadSize);
03889   if (rc != SQLITE_OK) {
03890     sqliteFree(fileData);
03891     return rc;
03892   }
03893 
03894   /* Copy the data to each page. Note that the page numbers we pass to _get
03895    * are one-based, 0 is a marker for no page. We also need to check that we
03896    * haven't loaded more pages than the cache can hold total. There may have
03897    * already been a few pages loaded before, so we may fill the cache before
03898    * loading all of the pages we want to.
03899    */
03900   for (i = 1; i <= loadPages && pPager->nPage < pPager->mxPage; i ++) {
03901     void *pPage;
03902     rc = sqlite3pager_get2(pPager, 1, &pPage,
03903                            &fileData[(i-1)*(i64)pPager->pageSize]);
03904     if (rc != SQLITE_OK)
03905       break;
03906     sqlite3pager_unref(pPage);
03907   }
03908   sqliteFree(fileData);
03909   return SQLITE_OK;
03910 }
03911 
03912 
03913 #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
03914 /*
03915 ** Return the current state of the file lock for the given pager.
03916 ** The return value is one of NO_LOCK, SHARED_LOCK, RESERVED_LOCK,
03917 ** PENDING_LOCK, or EXCLUSIVE_LOCK.
03918 */
03919 int sqlite3pager_lockstate(Pager *pPager){
03920   return sqlite3OsLockState(pPager->fd);
03921 }
03922 #endif
03923 
03924 #ifdef SQLITE_DEBUG
03925 /*
03926 ** Print a listing of all referenced pages and their ref count.
03927 */
03928 void sqlite3pager_refdump(Pager *pPager){
03929   PgHdr *pPg;
03930   for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
03931     if( pPg->nRef<=0 ) continue;
03932     sqlite3DebugPrintf("PAGE %3d addr=%p nRef=%d\n", 
03933        pPg->pgno, PGHDR_TO_DATA(pPg), pPg->nRef);
03934   }
03935 }
03936 #endif
03937 
03938 #endif /* SQLITE_OMIT_DISKIO */