Back to index

lightning-sunbird  0.9+nobinonly
btree.c
Go to the documentation of this file.
00001 /*
00002 ** 2004 April 6
00003 **
00004 ** The author disclaims copyright to this source code.  In place of
00005 ** a legal notice, here is a blessing:
00006 **
00007 **    May you do good and not evil.
00008 **    May you find forgiveness for yourself and forgive others.
00009 **    May you share freely, never taking more than you give.
00010 **
00011 *************************************************************************
00012 ** $Id: btree.c,v 1.324 2006/04/04 01:54:55 drh Exp $
00013 **
00014 ** This file implements a external (disk-based) database using BTrees.
00015 ** For a detailed discussion of BTrees, refer to
00016 **
00017 **     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
00018 **     "Sorting And Searching", pages 473-480. Addison-Wesley
00019 **     Publishing Company, Reading, Massachusetts.
00020 **
00021 ** The basic idea is that each page of the file contains N database
00022 ** entries and N+1 pointers to subpages.
00023 **
00024 **   ----------------------------------------------------------------
00025 **   |  Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N) | Ptr(N+1) |
00026 **   ----------------------------------------------------------------
00027 **
00028 ** All of the keys on the page that Ptr(0) points to have values less
00029 ** than Key(0).  All of the keys on page Ptr(1) and its subpages have
00030 ** values greater than Key(0) and less than Key(1).  All of the keys
00031 ** on Ptr(N+1) and its subpages have values greater than Key(N).  And
00032 ** so forth.
00033 **
00034 ** Finding a particular key requires reading O(log(M)) pages from the 
00035 ** disk where M is the number of entries in the tree.
00036 **
00037 ** In this implementation, a single file can hold one or more separate 
00038 ** BTrees.  Each BTree is identified by the index of its root page.  The
00039 ** key and data for any entry are combined to form the "payload".  A
00040 ** fixed amount of payload can be carried directly on the database
00041 ** page.  If the payload is larger than the preset amount then surplus
00042 ** bytes are stored on overflow pages.  The payload for an entry
00043 ** and the preceding pointer are combined to form a "Cell".  Each 
00044 ** page has a small header which contains the Ptr(N+1) pointer and other
00045 ** information such as the size of key and data.
00046 **
00047 ** FORMAT DETAILS
00048 **
00049 ** The file is divided into pages.  The first page is called page 1,
00050 ** the second is page 2, and so forth.  A page number of zero indicates
00051 ** "no such page".  The page size can be anything between 512 and 65536.
00052 ** Each page can be either a btree page, a freelist page or an overflow
00053 ** page.
00054 **
00055 ** The first page is always a btree page.  The first 100 bytes of the first
00056 ** page contain a special header (the "file header") that describes the file.
00057 ** The format of the file header is as follows:
00058 **
00059 **   OFFSET   SIZE    DESCRIPTION
00060 **      0      16     Header string: "SQLite format 3\000"
00061 **     16       2     Page size in bytes.  
00062 **     18       1     File format write version
00063 **     19       1     File format read version
00064 **     20       1     Bytes of unused space at the end of each page
00065 **     21       1     Max embedded payload fraction
00066 **     22       1     Min embedded payload fraction
00067 **     23       1     Min leaf payload fraction
00068 **     24       4     File change counter
00069 **     28       4     Reserved for future use
00070 **     32       4     First freelist page
00071 **     36       4     Number of freelist pages in the file
00072 **     40      60     15 4-byte meta values passed to higher layers
00073 **
00074 ** All of the integer values are big-endian (most significant byte first).
00075 **
00076 ** The file change counter is incremented when the database is changed more
00077 ** than once within the same second.  This counter, together with the
00078 ** modification time of the file, allows other processes to know
00079 ** when the file has changed and thus when they need to flush their
00080 ** cache.
00081 **
00082 ** The max embedded payload fraction is the amount of the total usable
00083 ** space in a page that can be consumed by a single cell for standard
00084 ** B-tree (non-LEAFDATA) tables.  A value of 255 means 100%.  The default
00085 ** is to limit the maximum cell size so that at least 4 cells will fit
00086 ** on one page.  Thus the default max embedded payload fraction is 64.
00087 **
00088 ** If the payload for a cell is larger than the max payload, then extra
00089 ** payload is spilled to overflow pages.  Once an overflow page is allocated,
00090 ** as many bytes as possible are moved into the overflow pages without letting
00091 ** the cell size drop below the min embedded payload fraction.
00092 **
00093 ** The min leaf payload fraction is like the min embedded payload fraction
00094 ** except that it applies to leaf nodes in a LEAFDATA tree.  The maximum
00095 ** payload fraction for a LEAFDATA tree is always 100% (or 255) and it
00096 ** not specified in the header.
00097 **
00098 ** Each btree pages is divided into three sections:  The header, the
00099 ** cell pointer array, and the cell area area.  Page 1 also has a 100-byte
00100 ** file header that occurs before the page header.
00101 **
00102 **      |----------------|
00103 **      | file header    |   100 bytes.  Page 1 only.
00104 **      |----------------|
00105 **      | page header    |   8 bytes for leaves.  12 bytes for interior nodes
00106 **      |----------------|
00107 **      | cell pointer   |   |  2 bytes per cell.  Sorted order.
00108 **      | array          |   |  Grows downward
00109 **      |                |   v
00110 **      |----------------|
00111 **      | unallocated    |
00112 **      | space          |
00113 **      |----------------|   ^  Grows upwards
00114 **      | cell content   |   |  Arbitrary order interspersed with freeblocks.
00115 **      | area           |   |  and free space fragments.
00116 **      |----------------|
00117 **
00118 ** The page headers looks like this:
00119 **
00120 **   OFFSET   SIZE     DESCRIPTION
00121 **      0       1      Flags. 1: intkey, 2: zerodata, 4: leafdata, 8: leaf
00122 **      1       2      byte offset to the first freeblock
00123 **      3       2      number of cells on this page
00124 **      5       2      first byte of the cell content area
00125 **      7       1      number of fragmented free bytes
00126 **      8       4      Right child (the Ptr(N+1) value).  Omitted on leaves.
00127 **
00128 ** The flags define the format of this btree page.  The leaf flag means that
00129 ** this page has no children.  The zerodata flag means that this page carries
00130 ** only keys and no data.  The intkey flag means that the key is a integer
00131 ** which is stored in the key size entry of the cell header rather than in
00132 ** the payload area.
00133 **
00134 ** The cell pointer array begins on the first byte after the page header.
00135 ** The cell pointer array contains zero or more 2-byte numbers which are
00136 ** offsets from the beginning of the page to the cell content in the cell
00137 ** content area.  The cell pointers occur in sorted order.  The system strives
00138 ** to keep free space after the last cell pointer so that new cells can
00139 ** be easily added without having to defragment the page.
00140 **
00141 ** Cell content is stored at the very end of the page and grows toward the
00142 ** beginning of the page.
00143 **
00144 ** Unused space within the cell content area is collected into a linked list of
00145 ** freeblocks.  Each freeblock is at least 4 bytes in size.  The byte offset
00146 ** to the first freeblock is given in the header.  Freeblocks occur in
00147 ** increasing order.  Because a freeblock must be at least 4 bytes in size,
00148 ** any group of 3 or fewer unused bytes in the cell content area cannot
00149 ** exist on the freeblock chain.  A group of 3 or fewer free bytes is called
00150 ** a fragment.  The total number of bytes in all fragments is recorded.
00151 ** in the page header at offset 7.
00152 **
00153 **    SIZE    DESCRIPTION
00154 **      2     Byte offset of the next freeblock
00155 **      2     Bytes in this freeblock
00156 **
00157 ** Cells are of variable length.  Cells are stored in the cell content area at
00158 ** the end of the page.  Pointers to the cells are in the cell pointer array
00159 ** that immediately follows the page header.  Cells is not necessarily
00160 ** contiguous or in order, but cell pointers are contiguous and in order.
00161 **
00162 ** Cell content makes use of variable length integers.  A variable
00163 ** length integer is 1 to 9 bytes where the lower 7 bits of each 
00164 ** byte are used.  The integer consists of all bytes that have bit 8 set and
00165 ** the first byte with bit 8 clear.  The most significant byte of the integer
00166 ** appears first.  A variable-length integer may not be more than 9 bytes long.
00167 ** As a special case, all 8 bytes of the 9th byte are used as data.  This
00168 ** allows a 64-bit integer to be encoded in 9 bytes.
00169 **
00170 **    0x00                      becomes  0x00000000
00171 **    0x7f                      becomes  0x0000007f
00172 **    0x81 0x00                 becomes  0x00000080
00173 **    0x82 0x00                 becomes  0x00000100
00174 **    0x80 0x7f                 becomes  0x0000007f
00175 **    0x8a 0x91 0xd1 0xac 0x78  becomes  0x12345678
00176 **    0x81 0x81 0x81 0x81 0x01  becomes  0x10204081
00177 **
00178 ** Variable length integers are used for rowids and to hold the number of
00179 ** bytes of key and data in a btree cell.
00180 **
00181 ** The content of a cell looks like this:
00182 **
00183 **    SIZE    DESCRIPTION
00184 **      4     Page number of the left child. Omitted if leaf flag is set.
00185 **     var    Number of bytes of data. Omitted if the zerodata flag is set.
00186 **     var    Number of bytes of key. Or the key itself if intkey flag is set.
00187 **      *     Payload
00188 **      4     First page of the overflow chain.  Omitted if no overflow
00189 **
00190 ** Overflow pages form a linked list.  Each page except the last is completely
00191 ** filled with data (pagesize - 4 bytes).  The last page can have as little
00192 ** as 1 byte of data.
00193 **
00194 **    SIZE    DESCRIPTION
00195 **      4     Page number of next overflow page
00196 **      *     Data
00197 **
00198 ** Freelist pages come in two subtypes: trunk pages and leaf pages.  The
00199 ** file header points to first in a linked list of trunk page.  Each trunk
00200 ** page points to multiple leaf pages.  The content of a leaf page is
00201 ** unspecified.  A trunk page looks like this:
00202 **
00203 **    SIZE    DESCRIPTION
00204 **      4     Page number of next trunk page
00205 **      4     Number of leaf pointers on this page
00206 **      *     zero or more pages numbers of leaves
00207 */
00208 #include "sqliteInt.h"
00209 #include "pager.h"
00210 #include "btree.h"
00211 #include "os.h"
00212 #include <assert.h>
00213 
00214 /* Round up a number to the next larger multiple of 8.  This is used
00215 ** to force 8-byte alignment on 64-bit architectures.
00216 */
00217 #define ROUND8(x)   ((x+7)&~7)
00218 
00219 
00220 /* The following value is the maximum cell size assuming a maximum page
00221 ** size give above.
00222 */
00223 #define MX_CELL_SIZE(pBt)  (pBt->pageSize-8)
00224 
00225 /* The maximum number of cells on a single page of the database.  This
00226 ** assumes a minimum cell size of 3 bytes.  Such small cells will be
00227 ** exceedingly rare, but they are possible.
00228 */
00229 #define MX_CELL(pBt) ((pBt->pageSize-8)/3)
00230 
00231 /* Forward declarations */
00232 typedef struct MemPage MemPage;
00233 typedef struct BtLock BtLock;
00234 
00235 /*
00236 ** This is a magic string that appears at the beginning of every
00237 ** SQLite database in order to identify the file as a real database.
00238 **
00239 ** You can change this value at compile-time by specifying a
00240 ** -DSQLITE_FILE_HEADER="..." on the compiler command-line.  The
00241 ** header must be exactly 16 bytes including the zero-terminator so
00242 ** the string itself should be 15 characters long.  If you change
00243 ** the header, then your custom library will not be able to read 
00244 ** databases generated by the standard tools and the standard tools
00245 ** will not be able to read databases created by your custom library.
00246 */
00247 #ifndef SQLITE_FILE_HEADER /* 123456789 123456 */
00248 #  define SQLITE_FILE_HEADER "SQLite format 3"
00249 #endif
00250 static const char zMagicHeader[] = SQLITE_FILE_HEADER;
00251 
00252 /*
00253 ** Page type flags.  An ORed combination of these flags appear as the
00254 ** first byte of every BTree page.
00255 */
00256 #define PTF_INTKEY    0x01
00257 #define PTF_ZERODATA  0x02
00258 #define PTF_LEAFDATA  0x04
00259 #define PTF_LEAF      0x08
00260 
00261 /*
00262 ** As each page of the file is loaded into memory, an instance of the following
00263 ** structure is appended and initialized to zero.  This structure stores
00264 ** information about the page that is decoded from the raw file page.
00265 **
00266 ** The pParent field points back to the parent page.  This allows us to
00267 ** walk up the BTree from any leaf to the root.  Care must be taken to
00268 ** unref() the parent page pointer when this page is no longer referenced.
00269 ** The pageDestructor() routine handles that chore.
00270 */
00271 struct MemPage {
00272   u8 isInit;           /* True if previously initialized. MUST BE FIRST! */
00273   u8 idxShift;         /* True if Cell indices have changed */
00274   u8 nOverflow;        /* Number of overflow cell bodies in aCell[] */
00275   u8 intKey;           /* True if intkey flag is set */
00276   u8 leaf;             /* True if leaf flag is set */
00277   u8 zeroData;         /* True if table stores keys only */
00278   u8 leafData;         /* True if tables stores data on leaves only */
00279   u8 hasData;          /* True if this page stores data */
00280   u8 hdrOffset;        /* 100 for page 1.  0 otherwise */
00281   u8 childPtrSize;     /* 0 if leaf==1.  4 if leaf==0 */
00282   u16 maxLocal;        /* Copy of Btree.maxLocal or Btree.maxLeaf */
00283   u16 minLocal;        /* Copy of Btree.minLocal or Btree.minLeaf */
00284   u16 cellOffset;      /* Index in aData of first cell pointer */
00285   u16 idxParent;       /* Index in parent of this node */
00286   u16 nFree;           /* Number of free bytes on the page */
00287   u16 nCell;           /* Number of cells on this page, local and ovfl */
00288   struct _OvflCell {   /* Cells that will not fit on aData[] */
00289     u8 *pCell;          /* Pointers to the body of the overflow cell */
00290     u16 idx;            /* Insert this cell before idx-th non-overflow cell */
00291   } aOvfl[5];
00292   BtShared *pBt;       /* Pointer back to BTree structure */
00293   u8 *aData;           /* Pointer back to the start of the page */
00294   Pgno pgno;           /* Page number for this page */
00295   MemPage *pParent;    /* The parent of this page.  NULL for root */
00296 };
00297 
00298 /*
00299 ** The in-memory image of a disk page has the auxiliary information appended
00300 ** to the end.  EXTRA_SIZE is the number of bytes of space needed to hold
00301 ** that extra information.
00302 */
00303 #define EXTRA_SIZE sizeof(MemPage)
00304 
00305 /* Btree handle */
00306 struct Btree {
00307   sqlite3 *pSqlite;
00308   BtShared *pBt;
00309   u8 inTrans;            /* TRANS_NONE, TRANS_READ or TRANS_WRITE */
00310 };
00311 
00312 /*
00313 ** Btree.inTrans may take one of the following values.
00314 **
00315 ** If the shared-data extension is enabled, there may be multiple users
00316 ** of the Btree structure. At most one of these may open a write transaction,
00317 ** but any number may have active read transactions. Variable Btree.pDb 
00318 ** points to the handle that owns any current write-transaction.
00319 */
00320 #define TRANS_NONE  0
00321 #define TRANS_READ  1
00322 #define TRANS_WRITE 2
00323 
00324 /*
00325 ** Everything we need to know about an open database
00326 */
00327 struct BtShared {
00328   Pager *pPager;        /* The page cache */
00329   BtCursor *pCursor;    /* A list of all open cursors */
00330   MemPage *pPage1;      /* First page of the database */
00331   u8 inStmt;            /* True if we are in a statement subtransaction */
00332   u8 readOnly;          /* True if the underlying file is readonly */
00333   u8 maxEmbedFrac;      /* Maximum payload as % of total page size */
00334   u8 minEmbedFrac;      /* Minimum payload as % of total page size */
00335   u8 minLeafFrac;       /* Minimum leaf payload as % of total page size */
00336   u8 pageSizeFixed;     /* True if the page size can no longer be changed */
00337 #ifndef SQLITE_OMIT_AUTOVACUUM
00338   u8 autoVacuum;        /* True if database supports auto-vacuum */
00339 #endif
00340   u16 pageSize;         /* Total number of bytes on a page */
00341   u16 usableSize;       /* Number of usable bytes on each page */
00342   int maxLocal;         /* Maximum local payload in non-LEAFDATA tables */
00343   int minLocal;         /* Minimum local payload in non-LEAFDATA tables */
00344   int maxLeaf;          /* Maximum local payload in a LEAFDATA table */
00345   int minLeaf;          /* Minimum local payload in a LEAFDATA table */
00346   BusyHandler *pBusyHandler;   /* Callback for when there is lock contention */
00347   u8 inTransaction;     /* Transaction state */
00348   int nRef;             /* Number of references to this structure */
00349   int nTransaction;     /* Number of open transactions (read + write) */
00350   void *pSchema;        /* Pointer to space allocated by sqlite3BtreeSchema() */
00351   void (*xFreeSchema)(void*);  /* Destructor for BtShared.pSchema */
00352 #ifndef SQLITE_OMIT_SHARED_CACHE
00353   BtLock *pLock;        /* List of locks held on this shared-btree struct */
00354   BtShared *pNext;      /* Next in ThreadData.pBtree linked list */
00355 #endif
00356 };
00357 
00358 /*
00359 ** An instance of the following structure is used to hold information
00360 ** about a cell.  The parseCellPtr() function fills in this structure
00361 ** based on information extract from the raw disk page.
00362 */
00363 typedef struct CellInfo CellInfo;
00364 struct CellInfo {
00365   u8 *pCell;     /* Pointer to the start of cell content */
00366   i64 nKey;      /* The key for INTKEY tables, or number of bytes in key */
00367   u32 nData;     /* Number of bytes of data */
00368   u16 nHeader;   /* Size of the cell content header in bytes */
00369   u16 nLocal;    /* Amount of payload held locally */
00370   u16 iOverflow; /* Offset to overflow page number.  Zero if no overflow */
00371   u16 nSize;     /* Size of the cell content on the main b-tree page */
00372 };
00373 
00374 /*
00375 ** A cursor is a pointer to a particular entry in the BTree.
00376 ** The entry is identified by its MemPage and the index in
00377 ** MemPage.aCell[] of the entry.
00378 */
00379 struct BtCursor {
00380   Btree *pBtree;            /* The Btree to which this cursor belongs */
00381   BtCursor *pNext, *pPrev;  /* Forms a linked list of all cursors */
00382   int (*xCompare)(void*,int,const void*,int,const void*); /* Key comp func */
00383   void *pArg;               /* First arg to xCompare() */
00384   Pgno pgnoRoot;            /* The root page of this tree */
00385   MemPage *pPage;           /* Page that contains the entry */
00386   int idx;                  /* Index of the entry in pPage->aCell[] */
00387   CellInfo info;            /* A parse of the cell we are pointing at */
00388   u8 wrFlag;                /* True if writable */
00389   u8 eState;                /* One of the CURSOR_XXX constants (see below) */
00390 #ifndef SQLITE_OMIT_SHARED_CACHE
00391   void *pKey;      /* Saved key that was cursor's last known position */
00392   i64 nKey;        /* Size of pKey, or last integer key */
00393   int skip;        /* (skip<0) -> Prev() is a no-op. (skip>0) -> Next() is */
00394 #endif
00395 };
00396 
00397 /*
00398 ** Potential values for BtCursor.eState. The first two values (VALID and 
00399 ** INVALID) may occur in any build. The third (REQUIRESEEK) may only occur 
00400 ** if sqlite was compiled without the OMIT_SHARED_CACHE symbol defined.
00401 **
00402 ** CURSOR_VALID:
00403 **   Cursor points to a valid entry. getPayload() etc. may be called.
00404 **
00405 ** CURSOR_INVALID:
00406 **   Cursor does not point to a valid entry. This can happen (for example) 
00407 **   because the table is empty or because BtreeCursorFirst() has not been
00408 **   called.
00409 **
00410 ** CURSOR_REQUIRESEEK:
00411 **   The table that this cursor was opened on still exists, but has been 
00412 **   modified since the cursor was last used. The cursor position is saved
00413 **   in variables BtCursor.pKey and BtCursor.nKey. When a cursor is in 
00414 **   this state, restoreOrClearCursorPosition() can be called to attempt to
00415 **   seek the cursor to the saved position.
00416 */
00417 #define CURSOR_INVALID           0
00418 #define CURSOR_VALID             1
00419 #define CURSOR_REQUIRESEEK       2
00420 
00421 /*
00422 ** The TRACE macro will print high-level status information about the
00423 ** btree operation when the global variable sqlite3_btree_trace is
00424 ** enabled.
00425 */
00426 #if SQLITE_TEST
00427 # define TRACE(X)   if( sqlite3_btree_trace )\
00428                         { sqlite3DebugPrintf X; fflush(stdout); }
00429 #else
00430 # define TRACE(X)
00431 #endif
00432 int sqlite3_btree_trace=0;  /* True to enable tracing */
00433 
00434 /*
00435 ** Forward declaration
00436 */
00437 static int checkReadLocks(BtShared*,Pgno,BtCursor*);
00438 
00439 /*
00440 ** Read or write a two- and four-byte big-endian integer values.
00441 */
00442 static u32 get2byte(unsigned char *p){
00443   return (p[0]<<8) | p[1];
00444 }
00445 static u32 get4byte(unsigned char *p){
00446   return (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
00447 }
00448 static void put2byte(unsigned char *p, u32 v){
00449   p[0] = v>>8;
00450   p[1] = v;
00451 }
00452 static void put4byte(unsigned char *p, u32 v){
00453   p[0] = v>>24;
00454   p[1] = v>>16;
00455   p[2] = v>>8;
00456   p[3] = v;
00457 }
00458 
00459 /*
00460 ** Routines to read and write variable-length integers.  These used to
00461 ** be defined locally, but now we use the varint routines in the util.c
00462 ** file.
00463 */
00464 #define getVarint    sqlite3GetVarint
00465 /* #define getVarint32  sqlite3GetVarint32 */
00466 #define getVarint32(A,B)  ((*B=*(A))<=0x7f?1:sqlite3GetVarint32(A,B))
00467 #define putVarint    sqlite3PutVarint
00468 
00469 /* The database page the PENDING_BYTE occupies. This page is never used.
00470 ** TODO: This macro is very similary to PAGER_MJ_PGNO() in pager.c. They
00471 ** should possibly be consolidated (presumably in pager.h).
00472 **
00473 ** If disk I/O is omitted (meaning that the database is stored purely
00474 ** in memory) then there is no pending byte.
00475 */
00476 #ifdef SQLITE_OMIT_DISKIO
00477 # define PENDING_BYTE_PAGE(pBt)  0x7fffffff
00478 #else
00479 # define PENDING_BYTE_PAGE(pBt) ((PENDING_BYTE/(pBt)->pageSize)+1)
00480 #endif
00481 
00482 /*
00483 ** A linked list of the following structures is stored at BtShared.pLock.
00484 ** Locks are added (or upgraded from READ_LOCK to WRITE_LOCK) when a cursor 
00485 ** is opened on the table with root page BtShared.iTable. Locks are removed
00486 ** from this list when a transaction is committed or rolled back, or when
00487 ** a btree handle is closed.
00488 */
00489 struct BtLock {
00490   Btree *pBtree;        /* Btree handle holding this lock */
00491   Pgno iTable;          /* Root page of table */
00492   u8 eLock;             /* READ_LOCK or WRITE_LOCK */
00493   BtLock *pNext;        /* Next in BtShared.pLock list */
00494 };
00495 
00496 /* Candidate values for BtLock.eLock */
00497 #define READ_LOCK     1
00498 #define WRITE_LOCK    2
00499 
00500 #ifdef SQLITE_OMIT_SHARED_CACHE
00501   /*
00502   ** The functions queryTableLock(), lockTable() and unlockAllTables()
00503   ** manipulate entries in the BtShared.pLock linked list used to store
00504   ** shared-cache table level locks. If the library is compiled with the
00505   ** shared-cache feature disabled, then there is only ever one user
00506   ** of each BtShared structure and so this locking is not necessary. 
00507   ** So define the lock related functions as no-ops.
00508   */
00509   #define queryTableLock(a,b,c) SQLITE_OK
00510   #define lockTable(a,b,c) SQLITE_OK
00511   #define unlockAllTables(a)
00512   #define restoreOrClearCursorPosition(a,b) SQLITE_OK
00513   #define saveAllCursors(a,b,c) SQLITE_OK
00514 
00515 #else
00516 
00517 static void releasePage(MemPage *pPage);
00518 
00519 /*
00520 ** Save the current cursor position in the variables BtCursor.nKey 
00521 ** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK.
00522 */
00523 static int saveCursorPosition(BtCursor *pCur){
00524   int rc;
00525 
00526   assert( CURSOR_VALID==pCur->eState );
00527   assert( 0==pCur->pKey );
00528 
00529   rc = sqlite3BtreeKeySize(pCur, &pCur->nKey);
00530 
00531   /* If this is an intKey table, then the above call to BtreeKeySize()
00532   ** stores the integer key in pCur->nKey. In this case this value is
00533   ** all that is required. Otherwise, if pCur is not open on an intKey
00534   ** table, then malloc space for and store the pCur->nKey bytes of key 
00535   ** data.
00536   */
00537   if( rc==SQLITE_OK && 0==pCur->pPage->intKey){
00538     void *pKey = sqliteMalloc(pCur->nKey);
00539     if( pKey ){
00540       rc = sqlite3BtreeKey(pCur, 0, pCur->nKey, pKey);
00541       if( rc==SQLITE_OK ){
00542         pCur->pKey = pKey;
00543       }else{
00544         sqliteFree(pKey);
00545       }
00546     }else{
00547       rc = SQLITE_NOMEM;
00548     }
00549   }
00550   assert( !pCur->pPage->intKey || !pCur->pKey );
00551 
00552   if( rc==SQLITE_OK ){
00553     releasePage(pCur->pPage);
00554     pCur->pPage = 0;
00555     pCur->eState = CURSOR_REQUIRESEEK;
00556   }
00557 
00558   return rc;
00559 }
00560 
00561 /*
00562 ** Save the positions of all cursors except pExcept open on the table 
00563 ** with root-page iRoot. Usually, this is called just before cursor
00564 ** pExcept is used to modify the table (BtreeDelete() or BtreeInsert()).
00565 */
00566 static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){
00567   BtCursor *p;
00568   if( sqlite3ThreadDataReadOnly()->useSharedData ){
00569     for(p=pBt->pCursor; p; p=p->pNext){
00570       if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) && 
00571           p->eState==CURSOR_VALID ){
00572         int rc = saveCursorPosition(p);
00573         if( SQLITE_OK!=rc ){
00574           return rc;
00575         }
00576       }
00577     }
00578   }
00579   return SQLITE_OK;
00580 }
00581 
00582 /*
00583 ** Restore the cursor to the position it was in (or as close to as possible)
00584 ** when saveCursorPosition() was called. Note that this call deletes the 
00585 ** saved position info stored by saveCursorPosition(), so there can be
00586 ** at most one effective restoreOrClearCursorPosition() call after each 
00587 ** saveCursorPosition().
00588 **
00589 ** If the second argument argument - doSeek - is false, then instead of 
00590 ** returning the cursor to it's saved position, any saved position is deleted
00591 ** and the cursor state set to CURSOR_INVALID.
00592 */
00593 static int restoreOrClearCursorPositionX(BtCursor *pCur, int doSeek){
00594   int rc = SQLITE_OK;
00595   assert( sqlite3ThreadDataReadOnly()->useSharedData );
00596   assert( pCur->eState==CURSOR_REQUIRESEEK );
00597   pCur->eState = CURSOR_INVALID;
00598   if( doSeek ){
00599     rc = sqlite3BtreeMoveto(pCur, pCur->pKey, pCur->nKey, &pCur->skip);
00600   }
00601   if( rc==SQLITE_OK ){
00602     sqliteFree(pCur->pKey);
00603     pCur->pKey = 0;
00604     assert( CURSOR_VALID==pCur->eState || CURSOR_INVALID==pCur->eState );
00605   }
00606   return rc;
00607 }
00608 
00609 #define restoreOrClearCursorPosition(p,x) \
00610   (p->eState==CURSOR_REQUIRESEEK?restoreOrClearCursorPositionX(p,x):SQLITE_OK)
00611 
00612 /*
00613 ** Query to see if btree handle p may obtain a lock of type eLock 
00614 ** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return
00615 ** SQLITE_OK if the lock may be obtained (by calling lockTable()), or
00616 ** SQLITE_LOCKED if not.
00617 */
00618 static int queryTableLock(Btree *p, Pgno iTab, u8 eLock){
00619   BtShared *pBt = p->pBt;
00620   BtLock *pIter;
00621 
00622   /* This is a no-op if the shared-cache is not enabled */
00623   if( 0==sqlite3ThreadDataReadOnly()->useSharedData ){
00624     return SQLITE_OK;
00625   }
00626 
00627   /* This (along with lockTable()) is where the ReadUncommitted flag is
00628   ** dealt with. If the caller is querying for a read-lock and the flag is
00629   ** set, it is unconditionally granted - even if there are write-locks
00630   ** on the table. If a write-lock is requested, the ReadUncommitted flag
00631   ** is not considered.
00632   **
00633   ** In function lockTable(), if a read-lock is demanded and the 
00634   ** ReadUncommitted flag is set, no entry is added to the locks list 
00635   ** (BtShared.pLock).
00636   **
00637   ** To summarize: If the ReadUncommitted flag is set, then read cursors do
00638   ** not create or respect table locks. The locking procedure for a 
00639   ** write-cursor does not change.
00640   */
00641   if( 
00642     !p->pSqlite || 
00643     0==(p->pSqlite->flags&SQLITE_ReadUncommitted) || 
00644     eLock==WRITE_LOCK ||
00645     iTab==MASTER_ROOT
00646   ){
00647     for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
00648       if( pIter->pBtree!=p && pIter->iTable==iTab && 
00649           (pIter->eLock!=eLock || eLock!=READ_LOCK) ){
00650         return SQLITE_LOCKED;
00651       }
00652     }
00653   }
00654   return SQLITE_OK;
00655 }
00656 
00657 /*
00658 ** Add a lock on the table with root-page iTable to the shared-btree used
00659 ** by Btree handle p. Parameter eLock must be either READ_LOCK or 
00660 ** WRITE_LOCK.
00661 **
00662 ** SQLITE_OK is returned if the lock is added successfully. SQLITE_BUSY and
00663 ** SQLITE_NOMEM may also be returned.
00664 */
00665 static int lockTable(Btree *p, Pgno iTable, u8 eLock){
00666   BtShared *pBt = p->pBt;
00667   BtLock *pLock = 0;
00668   BtLock *pIter;
00669 
00670   /* This is a no-op if the shared-cache is not enabled */
00671   if( 0==sqlite3ThreadDataReadOnly()->useSharedData ){
00672     return SQLITE_OK;
00673   }
00674 
00675   assert( SQLITE_OK==queryTableLock(p, iTable, eLock) );
00676 
00677   /* If the read-uncommitted flag is set and a read-lock is requested,
00678   ** return early without adding an entry to the BtShared.pLock list. See
00679   ** comment in function queryTableLock() for more info on handling 
00680   ** the ReadUncommitted flag.
00681   */
00682   if( 
00683     (p->pSqlite) && 
00684     (p->pSqlite->flags&SQLITE_ReadUncommitted) && 
00685     (eLock==READ_LOCK) &&
00686     iTable!=MASTER_ROOT
00687   ){
00688     return SQLITE_OK;
00689   }
00690 
00691   /* First search the list for an existing lock on this table. */
00692   for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
00693     if( pIter->iTable==iTable && pIter->pBtree==p ){
00694       pLock = pIter;
00695       break;
00696     }
00697   }
00698 
00699   /* If the above search did not find a BtLock struct associating Btree p
00700   ** with table iTable, allocate one and link it into the list.
00701   */
00702   if( !pLock ){
00703     pLock = (BtLock *)sqliteMalloc(sizeof(BtLock));
00704     if( !pLock ){
00705       return SQLITE_NOMEM;
00706     }
00707     pLock->iTable = iTable;
00708     pLock->pBtree = p;
00709     pLock->pNext = pBt->pLock;
00710     pBt->pLock = pLock;
00711   }
00712 
00713   /* Set the BtLock.eLock variable to the maximum of the current lock
00714   ** and the requested lock. This means if a write-lock was already held
00715   ** and a read-lock requested, we don't incorrectly downgrade the lock.
00716   */
00717   assert( WRITE_LOCK>READ_LOCK );
00718   if( eLock>pLock->eLock ){
00719     pLock->eLock = eLock;
00720   }
00721 
00722   return SQLITE_OK;
00723 }
00724 
00725 /*
00726 ** Release all the table locks (locks obtained via calls to the lockTable()
00727 ** procedure) held by Btree handle p.
00728 */
00729 static void unlockAllTables(Btree *p){
00730   BtLock **ppIter = &p->pBt->pLock;
00731 
00732   /* If the shared-cache extension is not enabled, there should be no
00733   ** locks in the BtShared.pLock list, making this procedure a no-op. Assert
00734   ** that this is the case.
00735   */
00736   assert( sqlite3ThreadDataReadOnly()->useSharedData || 0==*ppIter );
00737 
00738   while( *ppIter ){
00739     BtLock *pLock = *ppIter;
00740     if( pLock->pBtree==p ){
00741       *ppIter = pLock->pNext;
00742       sqliteFree(pLock);
00743     }else{
00744       ppIter = &pLock->pNext;
00745     }
00746   }
00747 }
00748 #endif /* SQLITE_OMIT_SHARED_CACHE */
00749 
00750 #ifndef SQLITE_OMIT_AUTOVACUUM
00751 /*
00752 ** These macros define the location of the pointer-map entry for a 
00753 ** database page. The first argument to each is the number of usable
00754 ** bytes on each page of the database (often 1024). The second is the
00755 ** page number to look up in the pointer map.
00756 **
00757 ** PTRMAP_PAGENO returns the database page number of the pointer-map
00758 ** page that stores the required pointer. PTRMAP_PTROFFSET returns
00759 ** the offset of the requested map entry.
00760 **
00761 ** If the pgno argument passed to PTRMAP_PAGENO is a pointer-map page,
00762 ** then pgno is returned. So (pgno==PTRMAP_PAGENO(pgsz, pgno)) can be
00763 ** used to test if pgno is a pointer-map page. PTRMAP_ISPAGE implements
00764 ** this test.
00765 */
00766 #define PTRMAP_PAGENO(pBt, pgno) ptrmapPageno(pBt, pgno)
00767 #define PTRMAP_PTROFFSET(pBt, pgno) (5*(pgno-ptrmapPageno(pBt, pgno)-1))
00768 #define PTRMAP_ISPAGE(pBt, pgno) (PTRMAP_PAGENO((pBt),(pgno))==(pgno))
00769 
00770 static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){
00771   int nPagesPerMapPage = (pBt->usableSize/5)+1;
00772   int iPtrMap = (pgno-2)/nPagesPerMapPage;
00773   int ret = (iPtrMap*nPagesPerMapPage) + 2; 
00774   if( ret==PENDING_BYTE_PAGE(pBt) ){
00775     ret++;
00776   }
00777   return ret;
00778 }
00779 
00780 /*
00781 ** The pointer map is a lookup table that identifies the parent page for
00782 ** each child page in the database file.  The parent page is the page that
00783 ** contains a pointer to the child.  Every page in the database contains
00784 ** 0 or 1 parent pages.  (In this context 'database page' refers
00785 ** to any page that is not part of the pointer map itself.)  Each pointer map
00786 ** entry consists of a single byte 'type' and a 4 byte parent page number.
00787 ** The PTRMAP_XXX identifiers below are the valid types.
00788 **
00789 ** The purpose of the pointer map is to facility moving pages from one
00790 ** position in the file to another as part of autovacuum.  When a page
00791 ** is moved, the pointer in its parent must be updated to point to the
00792 ** new location.  The pointer map is used to locate the parent page quickly.
00793 **
00794 ** PTRMAP_ROOTPAGE: The database page is a root-page. The page-number is not
00795 **                  used in this case.
00796 **
00797 ** PTRMAP_FREEPAGE: The database page is an unused (free) page. The page-number 
00798 **                  is not used in this case.
00799 **
00800 ** PTRMAP_OVERFLOW1: The database page is the first page in a list of 
00801 **                   overflow pages. The page number identifies the page that
00802 **                   contains the cell with a pointer to this overflow page.
00803 **
00804 ** PTRMAP_OVERFLOW2: The database page is the second or later page in a list of
00805 **                   overflow pages. The page-number identifies the previous
00806 **                   page in the overflow page list.
00807 **
00808 ** PTRMAP_BTREE: The database page is a non-root btree page. The page number
00809 **               identifies the parent page in the btree.
00810 */
00811 #define PTRMAP_ROOTPAGE 1
00812 #define PTRMAP_FREEPAGE 2
00813 #define PTRMAP_OVERFLOW1 3
00814 #define PTRMAP_OVERFLOW2 4
00815 #define PTRMAP_BTREE 5
00816 
00817 /*
00818 ** Write an entry into the pointer map.
00819 **
00820 ** This routine updates the pointer map entry for page number 'key'
00821 ** so that it maps to type 'eType' and parent page number 'pgno'.
00822 ** An error code is returned if something goes wrong, otherwise SQLITE_OK.
00823 */
00824 static int ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent){
00825   u8 *pPtrmap;    /* The pointer map page */
00826   Pgno iPtrmap;   /* The pointer map page number */
00827   int offset;     /* Offset in pointer map page */
00828   int rc;
00829 
00830   /* The master-journal page number must never be used as a pointer map page */
00831   assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) );
00832 
00833   assert( pBt->autoVacuum );
00834   if( key==0 ){
00835     return SQLITE_CORRUPT_BKPT;
00836   }
00837   iPtrmap = PTRMAP_PAGENO(pBt, key);
00838   rc = sqlite3pager_get(pBt->pPager, iPtrmap, (void **)&pPtrmap);
00839   if( rc!=SQLITE_OK ){
00840     return rc;
00841   }
00842   offset = PTRMAP_PTROFFSET(pBt, key);
00843 
00844   if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){
00845     TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent));
00846     rc = sqlite3pager_write(pPtrmap);
00847     if( rc==SQLITE_OK ){
00848       pPtrmap[offset] = eType;
00849       put4byte(&pPtrmap[offset+1], parent);
00850     }
00851   }
00852 
00853   sqlite3pager_unref(pPtrmap);
00854   return rc;
00855 }
00856 
00857 /*
00858 ** Read an entry from the pointer map.
00859 **
00860 ** This routine retrieves the pointer map entry for page 'key', writing
00861 ** the type and parent page number to *pEType and *pPgno respectively.
00862 ** An error code is returned if something goes wrong, otherwise SQLITE_OK.
00863 */
00864 static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){
00865   int iPtrmap;       /* Pointer map page index */
00866   u8 *pPtrmap;       /* Pointer map page data */
00867   int offset;        /* Offset of entry in pointer map */
00868   int rc;
00869 
00870   iPtrmap = PTRMAP_PAGENO(pBt, key);
00871   rc = sqlite3pager_get(pBt->pPager, iPtrmap, (void **)&pPtrmap);
00872   if( rc!=0 ){
00873     return rc;
00874   }
00875 
00876   offset = PTRMAP_PTROFFSET(pBt, key);
00877   assert( pEType!=0 );
00878   *pEType = pPtrmap[offset];
00879   if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]);
00880 
00881   sqlite3pager_unref(pPtrmap);
00882   if( *pEType<1 || *pEType>5 ) return SQLITE_CORRUPT_BKPT;
00883   return SQLITE_OK;
00884 }
00885 
00886 #endif /* SQLITE_OMIT_AUTOVACUUM */
00887 
00888 /*
00889 ** Given a btree page and a cell index (0 means the first cell on
00890 ** the page, 1 means the second cell, and so forth) return a pointer
00891 ** to the cell content.
00892 **
00893 ** This routine works only for pages that do not contain overflow cells.
00894 */
00895 static u8 *findCell(MemPage *pPage, int iCell){
00896   u8 *data = pPage->aData;
00897   assert( iCell>=0 );
00898   assert( iCell<get2byte(&data[pPage->hdrOffset+3]) );
00899   return data + get2byte(&data[pPage->cellOffset+2*iCell]);
00900 }
00901 
00902 /*
00903 ** This a more complex version of findCell() that works for
00904 ** pages that do contain overflow cells.  See insert
00905 */
00906 static u8 *findOverflowCell(MemPage *pPage, int iCell){
00907   int i;
00908   for(i=pPage->nOverflow-1; i>=0; i--){
00909     int k;
00910     struct _OvflCell *pOvfl;
00911     pOvfl = &pPage->aOvfl[i];
00912     k = pOvfl->idx;
00913     if( k<=iCell ){
00914       if( k==iCell ){
00915         return pOvfl->pCell;
00916       }
00917       iCell--;
00918     }
00919   }
00920   return findCell(pPage, iCell);
00921 }
00922 
00923 /*
00924 ** Parse a cell content block and fill in the CellInfo structure.  There
00925 ** are two versions of this function.  parseCell() takes a cell index
00926 ** as the second argument and parseCellPtr() takes a pointer to the
00927 ** body of the cell as its second argument.
00928 */
00929 static void parseCellPtr(
00930   MemPage *pPage,         /* Page containing the cell */
00931   u8 *pCell,              /* Pointer to the cell text. */
00932   CellInfo *pInfo         /* Fill in this structure */
00933 ){
00934   int n;                  /* Number bytes in cell content header */
00935   u32 nPayload;           /* Number of bytes of cell payload */
00936 
00937   pInfo->pCell = pCell;
00938   assert( pPage->leaf==0 || pPage->leaf==1 );
00939   n = pPage->childPtrSize;
00940   assert( n==4-4*pPage->leaf );
00941   if( pPage->hasData ){
00942     n += getVarint32(&pCell[n], &nPayload);
00943   }else{
00944     nPayload = 0;
00945   }
00946   pInfo->nData = nPayload;
00947   if( pPage->intKey ){
00948     n += getVarint(&pCell[n], (u64 *)&pInfo->nKey);
00949   }else{
00950     u32 x;
00951     n += getVarint32(&pCell[n], &x);
00952     pInfo->nKey = x;
00953     nPayload += x;
00954   }
00955   pInfo->nHeader = n;
00956   if( nPayload<=pPage->maxLocal ){
00957     /* This is the (easy) common case where the entire payload fits
00958     ** on the local page.  No overflow is required.
00959     */
00960     int nSize;          /* Total size of cell content in bytes */
00961     pInfo->nLocal = nPayload;
00962     pInfo->iOverflow = 0;
00963     nSize = nPayload + n;
00964     if( nSize<4 ){
00965       nSize = 4;        /* Minimum cell size is 4 */
00966     }
00967     pInfo->nSize = nSize;
00968   }else{
00969     /* If the payload will not fit completely on the local page, we have
00970     ** to decide how much to store locally and how much to spill onto
00971     ** overflow pages.  The strategy is to minimize the amount of unused
00972     ** space on overflow pages while keeping the amount of local storage
00973     ** in between minLocal and maxLocal.
00974     **
00975     ** Warning:  changing the way overflow payload is distributed in any
00976     ** way will result in an incompatible file format.
00977     */
00978     int minLocal;  /* Minimum amount of payload held locally */
00979     int maxLocal;  /* Maximum amount of payload held locally */
00980     int surplus;   /* Overflow payload available for local storage */
00981 
00982     minLocal = pPage->minLocal;
00983     maxLocal = pPage->maxLocal;
00984     surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize - 4);
00985     if( surplus <= maxLocal ){
00986       pInfo->nLocal = surplus;
00987     }else{
00988       pInfo->nLocal = minLocal;
00989     }
00990     pInfo->iOverflow = pInfo->nLocal + n;
00991     pInfo->nSize = pInfo->iOverflow + 4;
00992   }
00993 }
00994 static void parseCell(
00995   MemPage *pPage,         /* Page containing the cell */
00996   int iCell,              /* The cell index.  First cell is 0 */
00997   CellInfo *pInfo         /* Fill in this structure */
00998 ){
00999   parseCellPtr(pPage, findCell(pPage, iCell), pInfo);
01000 }
01001 
01002 /*
01003 ** Compute the total number of bytes that a Cell needs in the cell
01004 ** data area of the btree-page.  The return number includes the cell
01005 ** data header and the local payload, but not any overflow page or
01006 ** the space used by the cell pointer.
01007 */
01008 #ifndef NDEBUG
01009 static int cellSize(MemPage *pPage, int iCell){
01010   CellInfo info;
01011   parseCell(pPage, iCell, &info);
01012   return info.nSize;
01013 }
01014 #endif
01015 static int cellSizePtr(MemPage *pPage, u8 *pCell){
01016   CellInfo info;
01017   parseCellPtr(pPage, pCell, &info);
01018   return info.nSize;
01019 }
01020 
01021 #ifndef SQLITE_OMIT_AUTOVACUUM
01022 /*
01023 ** If the cell pCell, part of page pPage contains a pointer
01024 ** to an overflow page, insert an entry into the pointer-map
01025 ** for the overflow page.
01026 */
01027 static int ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell){
01028   if( pCell ){
01029     CellInfo info;
01030     parseCellPtr(pPage, pCell, &info);
01031     if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){
01032       Pgno ovfl = get4byte(&pCell[info.iOverflow]);
01033       return ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno);
01034     }
01035   }
01036   return SQLITE_OK;
01037 }
01038 /*
01039 ** If the cell with index iCell on page pPage contains a pointer
01040 ** to an overflow page, insert an entry into the pointer-map
01041 ** for the overflow page.
01042 */
01043 static int ptrmapPutOvfl(MemPage *pPage, int iCell){
01044   u8 *pCell;
01045   pCell = findOverflowCell(pPage, iCell);
01046   return ptrmapPutOvflPtr(pPage, pCell);
01047 }
01048 #endif
01049 
01050 
01051 /*
01052 ** Do sanity checking on a page.  Throw an exception if anything is
01053 ** not right.
01054 **
01055 ** This routine is used for internal error checking only.  It is omitted
01056 ** from most builds.
01057 */
01058 #if defined(BTREE_DEBUG) && !defined(NDEBUG) && 0
01059 static void _pageIntegrity(MemPage *pPage){
01060   int usableSize;
01061   u8 *data;
01062   int i, j, idx, c, pc, hdr, nFree;
01063   int cellOffset;
01064   int nCell, cellLimit;
01065   u8 *used;
01066 
01067   used = sqliteMallocRaw( pPage->pBt->pageSize );
01068   if( used==0 ) return;
01069   usableSize = pPage->pBt->usableSize;
01070   assert( pPage->aData==&((unsigned char*)pPage)[-pPage->pBt->pageSize] );
01071   hdr = pPage->hdrOffset;
01072   assert( hdr==(pPage->pgno==1 ? 100 : 0) );
01073   assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) );
01074   c = pPage->aData[hdr];
01075   if( pPage->isInit ){
01076     assert( pPage->leaf == ((c & PTF_LEAF)!=0) );
01077     assert( pPage->zeroData == ((c & PTF_ZERODATA)!=0) );
01078     assert( pPage->leafData == ((c & PTF_LEAFDATA)!=0) );
01079     assert( pPage->intKey == ((c & (PTF_INTKEY|PTF_LEAFDATA))!=0) );
01080     assert( pPage->hasData ==
01081              !(pPage->zeroData || (!pPage->leaf && pPage->leafData)) );
01082     assert( pPage->cellOffset==pPage->hdrOffset+12-4*pPage->leaf );
01083     assert( pPage->nCell = get2byte(&pPage->aData[hdr+3]) );
01084   }
01085   data = pPage->aData;
01086   memset(used, 0, usableSize);
01087   for(i=0; i<hdr+10-pPage->leaf*4; i++) used[i] = 1;
01088   nFree = 0;
01089   pc = get2byte(&data[hdr+1]);
01090   while( pc ){
01091     int size;
01092     assert( pc>0 && pc<usableSize-4 );
01093     size = get2byte(&data[pc+2]);
01094     assert( pc+size<=usableSize );
01095     nFree += size;
01096     for(i=pc; i<pc+size; i++){
01097       assert( used[i]==0 );
01098       used[i] = 1;
01099     }
01100     pc = get2byte(&data[pc]);
01101   }
01102   idx = 0;
01103   nCell = get2byte(&data[hdr+3]);
01104   cellLimit = get2byte(&data[hdr+5]);
01105   assert( pPage->isInit==0 
01106          || pPage->nFree==nFree+data[hdr+7]+cellLimit-(cellOffset+2*nCell) );
01107   cellOffset = pPage->cellOffset;
01108   for(i=0; i<nCell; i++){
01109     int size;
01110     pc = get2byte(&data[cellOffset+2*i]);
01111     assert( pc>0 && pc<usableSize-4 );
01112     size = cellSize(pPage, &data[pc]);
01113     assert( pc+size<=usableSize );
01114     for(j=pc; j<pc+size; j++){
01115       assert( used[j]==0 );
01116       used[j] = 1;
01117     }
01118   }
01119   for(i=cellOffset+2*nCell; i<cellimit; i++){
01120     assert( used[i]==0 );
01121     used[i] = 1;
01122   }
01123   nFree = 0;
01124   for(i=0; i<usableSize; i++){
01125     assert( used[i]<=1 );
01126     if( used[i]==0 ) nFree++;
01127   }
01128   assert( nFree==data[hdr+7] );
01129   sqliteFree(used);
01130 }
01131 #define pageIntegrity(X) _pageIntegrity(X)
01132 #else
01133 # define pageIntegrity(X)
01134 #endif
01135 
01136 /* A bunch of assert() statements to check the transaction state variables
01137 ** of handle p (type Btree*) are internally consistent.
01138 */
01139 #define btreeIntegrity(p) \
01140   assert( p->inTrans!=TRANS_NONE || p->pBt->nTransaction<p->pBt->nRef ); \
01141   assert( p->pBt->nTransaction<=p->pBt->nRef ); \
01142   assert( p->pBt->inTransaction!=TRANS_NONE || p->pBt->nTransaction==0 ); \
01143   assert( p->pBt->inTransaction>=p->inTrans ); 
01144 
01145 /*
01146 ** Defragment the page given.  All Cells are moved to the
01147 ** end of the page and all free space is collected into one
01148 ** big FreeBlk that occurs in between the header and cell
01149 ** pointer array and the cell content area.
01150 */
01151 static int defragmentPage(MemPage *pPage){
01152   int i;                     /* Loop counter */
01153   int pc;                    /* Address of a i-th cell */
01154   int addr;                  /* Offset of first byte after cell pointer array */
01155   int hdr;                   /* Offset to the page header */
01156   int size;                  /* Size of a cell */
01157   int usableSize;            /* Number of usable bytes on a page */
01158   int cellOffset;            /* Offset to the cell pointer array */
01159   int brk;                   /* Offset to the cell content area */
01160   int nCell;                 /* Number of cells on the page */
01161   unsigned char *data;       /* The page data */
01162   unsigned char *temp;       /* Temp area for cell content */
01163 
01164   assert( sqlite3pager_iswriteable(pPage->aData) );
01165   assert( pPage->pBt!=0 );
01166   assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE );
01167   assert( pPage->nOverflow==0 );
01168   temp = sqliteMalloc( pPage->pBt->pageSize );
01169   if( temp==0 ) return SQLITE_NOMEM;
01170   data = pPage->aData;
01171   hdr = pPage->hdrOffset;
01172   cellOffset = pPage->cellOffset;
01173   nCell = pPage->nCell;
01174   assert( nCell==get2byte(&data[hdr+3]) );
01175   usableSize = pPage->pBt->usableSize;
01176   brk = get2byte(&data[hdr+5]);
01177   memcpy(&temp[brk], &data[brk], usableSize - brk);
01178   brk = usableSize;
01179   for(i=0; i<nCell; i++){
01180     u8 *pAddr;     /* The i-th cell pointer */
01181     pAddr = &data[cellOffset + i*2];
01182     pc = get2byte(pAddr);
01183     assert( pc<pPage->pBt->usableSize );
01184     size = cellSizePtr(pPage, &temp[pc]);
01185     brk -= size;
01186     memcpy(&data[brk], &temp[pc], size);
01187     put2byte(pAddr, brk);
01188   }
01189   assert( brk>=cellOffset+2*nCell );
01190   put2byte(&data[hdr+5], brk);
01191   data[hdr+1] = 0;
01192   data[hdr+2] = 0;
01193   data[hdr+7] = 0;
01194   addr = cellOffset+2*nCell;
01195   memset(&data[addr], 0, brk-addr);
01196   sqliteFree(temp);
01197   return SQLITE_OK;
01198 }
01199 
01200 /*
01201 ** Allocate nByte bytes of space on a page.
01202 **
01203 ** Return the index into pPage->aData[] of the first byte of
01204 ** the new allocation. Or return 0 if there is not enough free
01205 ** space on the page to satisfy the allocation request.
01206 **
01207 ** If the page contains nBytes of free space but does not contain
01208 ** nBytes of contiguous free space, then this routine automatically
01209 ** calls defragementPage() to consolidate all free space before 
01210 ** allocating the new chunk.
01211 */
01212 static int allocateSpace(MemPage *pPage, int nByte){
01213   int addr, pc, hdr;
01214   int size;
01215   int nFrag;
01216   int top;
01217   int nCell;
01218   int cellOffset;
01219   unsigned char *data;
01220   
01221   data = pPage->aData;
01222   assert( sqlite3pager_iswriteable(data) );
01223   assert( pPage->pBt );
01224   if( nByte<4 ) nByte = 4;
01225   if( pPage->nFree<nByte || pPage->nOverflow>0 ) return 0;
01226   pPage->nFree -= nByte;
01227   hdr = pPage->hdrOffset;
01228 
01229   nFrag = data[hdr+7];
01230   if( nFrag<60 ){
01231     /* Search the freelist looking for a slot big enough to satisfy the
01232     ** space request. */
01233     addr = hdr+1;
01234     while( (pc = get2byte(&data[addr]))>0 ){
01235       size = get2byte(&data[pc+2]);
01236       if( size>=nByte ){
01237         if( size<nByte+4 ){
01238           memcpy(&data[addr], &data[pc], 2);
01239           data[hdr+7] = nFrag + size - nByte;
01240           return pc;
01241         }else{
01242           put2byte(&data[pc+2], size-nByte);
01243           return pc + size - nByte;
01244         }
01245       }
01246       addr = pc;
01247     }
01248   }
01249 
01250   /* Allocate memory from the gap in between the cell pointer array
01251   ** and the cell content area.
01252   */
01253   top = get2byte(&data[hdr+5]);
01254   nCell = get2byte(&data[hdr+3]);
01255   cellOffset = pPage->cellOffset;
01256   if( nFrag>=60 || cellOffset + 2*nCell > top - nByte ){
01257     if( defragmentPage(pPage) ) return 0;
01258     top = get2byte(&data[hdr+5]);
01259   }
01260   top -= nByte;
01261   assert( cellOffset + 2*nCell <= top );
01262   put2byte(&data[hdr+5], top);
01263   return top;
01264 }
01265 
01266 /*
01267 ** Return a section of the pPage->aData to the freelist.
01268 ** The first byte of the new free block is pPage->aDisk[start]
01269 ** and the size of the block is "size" bytes.
01270 **
01271 ** Most of the effort here is involved in coalesing adjacent
01272 ** free blocks into a single big free block.
01273 */
01274 static void freeSpace(MemPage *pPage, int start, int size){
01275   int addr, pbegin, hdr;
01276   unsigned char *data = pPage->aData;
01277 
01278   assert( pPage->pBt!=0 );
01279   assert( sqlite3pager_iswriteable(data) );
01280   assert( start>=pPage->hdrOffset+6+(pPage->leaf?0:4) );
01281   assert( (start + size)<=pPage->pBt->usableSize );
01282   if( size<4 ) size = 4;
01283 
01284 #ifdef SQLITE_SECURE_DELETE
01285   /* Overwrite deleted information with zeros when the SECURE_DELETE 
01286   ** option is enabled at compile-time */
01287   memset(&data[start], 0, size);
01288 #endif
01289 
01290   /* Add the space back into the linked list of freeblocks */
01291   hdr = pPage->hdrOffset;
01292   addr = hdr + 1;
01293   while( (pbegin = get2byte(&data[addr]))<start && pbegin>0 ){
01294     assert( pbegin<=pPage->pBt->usableSize-4 );
01295     assert( pbegin>addr );
01296     addr = pbegin;
01297   }
01298   assert( pbegin<=pPage->pBt->usableSize-4 );
01299   assert( pbegin>addr || pbegin==0 );
01300   put2byte(&data[addr], start);
01301   put2byte(&data[start], pbegin);
01302   put2byte(&data[start+2], size);
01303   pPage->nFree += size;
01304 
01305   /* Coalesce adjacent free blocks */
01306   addr = pPage->hdrOffset + 1;
01307   while( (pbegin = get2byte(&data[addr]))>0 ){
01308     int pnext, psize;
01309     assert( pbegin>addr );
01310     assert( pbegin<=pPage->pBt->usableSize-4 );
01311     pnext = get2byte(&data[pbegin]);
01312     psize = get2byte(&data[pbegin+2]);
01313     if( pbegin + psize + 3 >= pnext && pnext>0 ){
01314       int frag = pnext - (pbegin+psize);
01315       assert( frag<=data[pPage->hdrOffset+7] );
01316       data[pPage->hdrOffset+7] -= frag;
01317       put2byte(&data[pbegin], get2byte(&data[pnext]));
01318       put2byte(&data[pbegin+2], pnext+get2byte(&data[pnext+2])-pbegin);
01319     }else{
01320       addr = pbegin;
01321     }
01322   }
01323 
01324   /* If the cell content area begins with a freeblock, remove it. */
01325   if( data[hdr+1]==data[hdr+5] && data[hdr+2]==data[hdr+6] ){
01326     int top;
01327     pbegin = get2byte(&data[hdr+1]);
01328     memcpy(&data[hdr+1], &data[pbegin], 2);
01329     top = get2byte(&data[hdr+5]);
01330     put2byte(&data[hdr+5], top + get2byte(&data[pbegin+2]));
01331   }
01332 }
01333 
01334 /*
01335 ** Decode the flags byte (the first byte of the header) for a page
01336 ** and initialize fields of the MemPage structure accordingly.
01337 */
01338 static void decodeFlags(MemPage *pPage, int flagByte){
01339   BtShared *pBt;     /* A copy of pPage->pBt */
01340 
01341   assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
01342   pPage->intKey = (flagByte & (PTF_INTKEY|PTF_LEAFDATA))!=0;
01343   pPage->zeroData = (flagByte & PTF_ZERODATA)!=0;
01344   pPage->leaf = (flagByte & PTF_LEAF)!=0;
01345   pPage->childPtrSize = 4*(pPage->leaf==0);
01346   pBt = pPage->pBt;
01347   if( flagByte & PTF_LEAFDATA ){
01348     pPage->leafData = 1;
01349     pPage->maxLocal = pBt->maxLeaf;
01350     pPage->minLocal = pBt->minLeaf;
01351   }else{
01352     pPage->leafData = 0;
01353     pPage->maxLocal = pBt->maxLocal;
01354     pPage->minLocal = pBt->minLocal;
01355   }
01356   pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
01357 }
01358 
01359 /*
01360 ** Initialize the auxiliary information for a disk block.
01361 **
01362 ** The pParent parameter must be a pointer to the MemPage which
01363 ** is the parent of the page being initialized.  The root of a
01364 ** BTree has no parent and so for that page, pParent==NULL.
01365 **
01366 ** Return SQLITE_OK on success.  If we see that the page does
01367 ** not contain a well-formed database page, then return 
01368 ** SQLITE_CORRUPT.  Note that a return of SQLITE_OK does not
01369 ** guarantee that the page is well-formed.  It only shows that
01370 ** we failed to detect any corruption.
01371 */
01372 static int initPage(
01373   MemPage *pPage,        /* The page to be initialized */
01374   MemPage *pParent       /* The parent.  Might be NULL */
01375 ){
01376   int pc;            /* Address of a freeblock within pPage->aData[] */
01377   int hdr;           /* Offset to beginning of page header */
01378   u8 *data;          /* Equal to pPage->aData */
01379   BtShared *pBt;        /* The main btree structure */
01380   int usableSize;    /* Amount of usable space on each page */
01381   int cellOffset;    /* Offset from start of page to first cell pointer */
01382   int nFree;         /* Number of unused bytes on the page */
01383   int top;           /* First byte of the cell content area */
01384 
01385   pBt = pPage->pBt;
01386   assert( pBt!=0 );
01387   assert( pParent==0 || pParent->pBt==pBt );
01388   assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) );
01389   assert( pPage->aData == &((unsigned char*)pPage)[-pBt->pageSize] );
01390   if( pPage->pParent!=pParent && (pPage->pParent!=0 || pPage->isInit) ){
01391     /* The parent page should never change unless the file is corrupt */
01392     return SQLITE_CORRUPT_BKPT;
01393   }
01394   if( pPage->isInit ) return SQLITE_OK;
01395   if( pPage->pParent==0 && pParent!=0 ){
01396     pPage->pParent = pParent;
01397     sqlite3pager_ref(pParent->aData);
01398   }
01399   hdr = pPage->hdrOffset;
01400   data = pPage->aData;
01401   decodeFlags(pPage, data[hdr]);
01402   pPage->nOverflow = 0;
01403   pPage->idxShift = 0;
01404   usableSize = pBt->usableSize;
01405   pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
01406   top = get2byte(&data[hdr+5]);
01407   pPage->nCell = get2byte(&data[hdr+3]);
01408   if( pPage->nCell>MX_CELL(pBt) ){
01409     /* To many cells for a single page.  The page must be corrupt */
01410     return SQLITE_CORRUPT_BKPT;
01411   }
01412   if( pPage->nCell==0 && pParent!=0 && pParent->pgno!=1 ){
01413     /* All pages must have at least one cell, except for root pages */
01414     return SQLITE_CORRUPT_BKPT;
01415   }
01416 
01417   /* Compute the total free space on the page */
01418   pc = get2byte(&data[hdr+1]);
01419   nFree = data[hdr+7] + top - (cellOffset + 2*pPage->nCell);
01420   while( pc>0 ){
01421     int next, size;
01422     if( pc>usableSize-4 ){
01423       /* Free block is off the page */
01424       return SQLITE_CORRUPT_BKPT; 
01425     }
01426     next = get2byte(&data[pc]);
01427     size = get2byte(&data[pc+2]);
01428     if( next>0 && next<=pc+size+3 ){
01429       /* Free blocks must be in accending order */
01430       return SQLITE_CORRUPT_BKPT; 
01431     }
01432     nFree += size;
01433     pc = next;
01434   }
01435   pPage->nFree = nFree;
01436   if( nFree>=usableSize ){
01437     /* Free space cannot exceed total page size */
01438     return SQLITE_CORRUPT_BKPT; 
01439   }
01440 
01441   pPage->isInit = 1;
01442   pageIntegrity(pPage);
01443   return SQLITE_OK;
01444 }
01445 
01446 /*
01447 ** Set up a raw page so that it looks like a database page holding
01448 ** no entries.
01449 */
01450 static void zeroPage(MemPage *pPage, int flags){
01451   unsigned char *data = pPage->aData;
01452   BtShared *pBt = pPage->pBt;
01453   int hdr = pPage->hdrOffset;
01454   int first;
01455 
01456   assert( sqlite3pager_pagenumber(data)==pPage->pgno );
01457   assert( &data[pBt->pageSize] == (unsigned char*)pPage );
01458   assert( sqlite3pager_iswriteable(data) );
01459   memset(&data[hdr], 0, pBt->usableSize - hdr);
01460   data[hdr] = flags;
01461   first = hdr + 8 + 4*((flags&PTF_LEAF)==0);
01462   memset(&data[hdr+1], 0, 4);
01463   data[hdr+7] = 0;
01464   put2byte(&data[hdr+5], pBt->usableSize);
01465   pPage->nFree = pBt->usableSize - first;
01466   decodeFlags(pPage, flags);
01467   pPage->hdrOffset = hdr;
01468   pPage->cellOffset = first;
01469   pPage->nOverflow = 0;
01470   pPage->idxShift = 0;
01471   pPage->nCell = 0;
01472   pPage->isInit = 1;
01473   pageIntegrity(pPage);
01474 }
01475 
01476 /*
01477 ** Get a page from the pager.  Initialize the MemPage.pBt and
01478 ** MemPage.aData elements if needed.
01479 */
01480 static int getPage(BtShared *pBt, Pgno pgno, MemPage **ppPage){
01481   int rc;
01482   unsigned char *aData;
01483   MemPage *pPage;
01484   rc = sqlite3pager_get(pBt->pPager, pgno, (void**)&aData);
01485   if( rc ) return rc;
01486   pPage = (MemPage*)&aData[pBt->pageSize];
01487   pPage->aData = aData;
01488   pPage->pBt = pBt;
01489   pPage->pgno = pgno;
01490   pPage->hdrOffset = pPage->pgno==1 ? 100 : 0;
01491   *ppPage = pPage;
01492   return SQLITE_OK;
01493 }
01494 
01495 /*
01496 ** Get a page from the pager and initialize it.  This routine
01497 ** is just a convenience wrapper around separate calls to
01498 ** getPage() and initPage().
01499 */
01500 static int getAndInitPage(
01501   BtShared *pBt,          /* The database file */
01502   Pgno pgno,           /* Number of the page to get */
01503   MemPage **ppPage,    /* Write the page pointer here */
01504   MemPage *pParent     /* Parent of the page */
01505 ){
01506   int rc;
01507   if( pgno==0 ){
01508     return SQLITE_CORRUPT_BKPT; 
01509   }
01510   rc = getPage(pBt, pgno, ppPage);
01511   if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){
01512     rc = initPage(*ppPage, pParent);
01513   }
01514   return rc;
01515 }
01516 
01517 /*
01518 ** Release a MemPage.  This should be called once for each prior
01519 ** call to getPage.
01520 */
01521 static void releasePage(MemPage *pPage){
01522   if( pPage ){
01523     assert( pPage->aData );
01524     assert( pPage->pBt );
01525     assert( &pPage->aData[pPage->pBt->pageSize]==(unsigned char*)pPage );
01526     sqlite3pager_unref(pPage->aData);
01527   }
01528 }
01529 
01530 /*
01531 ** This routine is called when the reference count for a page
01532 ** reaches zero.  We need to unref the pParent pointer when that
01533 ** happens.
01534 */
01535 static void pageDestructor(void *pData, int pageSize){
01536   MemPage *pPage;
01537   assert( (pageSize & 7)==0 );
01538   pPage = (MemPage*)&((char*)pData)[pageSize];
01539   if( pPage->pParent ){
01540     MemPage *pParent = pPage->pParent;
01541     pPage->pParent = 0;
01542     releasePage(pParent);
01543   }
01544   pPage->isInit = 0;
01545 }
01546 
01547 /*
01548 ** During a rollback, when the pager reloads information into the cache
01549 ** so that the cache is restored to its original state at the start of
01550 ** the transaction, for each page restored this routine is called.
01551 **
01552 ** This routine needs to reset the extra data section at the end of the
01553 ** page to agree with the restored data.
01554 */
01555 static void pageReinit(void *pData, int pageSize){
01556   MemPage *pPage;
01557   assert( (pageSize & 7)==0 );
01558   pPage = (MemPage*)&((char*)pData)[pageSize];
01559   if( pPage->isInit ){
01560     pPage->isInit = 0;
01561     initPage(pPage, pPage->pParent);
01562   }
01563 }
01564 
01565 /*
01566 ** Open a database file.
01567 ** 
01568 ** zFilename is the name of the database file.  If zFilename is NULL
01569 ** a new database with a random name is created.  This randomly named
01570 ** database file will be deleted when sqlite3BtreeClose() is called.
01571 */
01572 int sqlite3BtreeOpen(
01573   const char *zFilename,  /* Name of the file containing the BTree database */
01574   sqlite3 *pSqlite,       /* Associated database handle */
01575   Btree **ppBtree,        /* Pointer to new Btree object written here */
01576   int flags               /* Options */
01577 ){
01578   BtShared *pBt;          /* Shared part of btree structure */
01579   Btree *p;               /* Handle to return */
01580   int rc;
01581   int nReserve;
01582   unsigned char zDbHeader[100];
01583 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
01584   const ThreadData *pTsdro;
01585 #endif
01586 
01587   /* Set the variable isMemdb to true for an in-memory database, or 
01588   ** false for a file-based database. This symbol is only required if
01589   ** either of the shared-data or autovacuum features are compiled 
01590   ** into the library.
01591   */
01592 #if !defined(SQLITE_OMIT_SHARED_CACHE) || !defined(SQLITE_OMIT_AUTOVACUUM)
01593   #ifdef SQLITE_OMIT_MEMORYDB
01594   const int isMemdb = !zFilename;
01595   #else
01596   const int isMemdb = !zFilename || (strcmp(zFilename, ":memory:")?0:1);
01597   #endif
01598 #endif
01599 
01600   p = sqliteMalloc(sizeof(Btree));
01601   if( !p ){
01602     return SQLITE_NOMEM;
01603   }
01604   p->inTrans = TRANS_NONE;
01605   p->pSqlite = pSqlite;
01606 
01607   /* Try to find an existing Btree structure opened on zFilename. */
01608 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
01609   pTsdro = sqlite3ThreadDataReadOnly();
01610   if( pTsdro->useSharedData && zFilename && !isMemdb ){
01611     char *zFullPathname = sqlite3OsFullPathname(zFilename);
01612     if( !zFullPathname ){
01613       sqliteFree(p);
01614       return SQLITE_NOMEM;
01615     }
01616     for(pBt=pTsdro->pBtree; pBt; pBt=pBt->pNext){
01617       assert( pBt->nRef>0 );
01618       if( 0==strcmp(zFullPathname, sqlite3pager_filename(pBt->pPager)) ){
01619         p->pBt = pBt;
01620         *ppBtree = p;
01621         pBt->nRef++;
01622         sqliteFree(zFullPathname);
01623         return SQLITE_OK;
01624       }
01625     }
01626     sqliteFree(zFullPathname);
01627   }
01628 #endif
01629 
01630   /*
01631   ** The following asserts make sure that structures used by the btree are
01632   ** the right size.  This is to guard against size changes that result
01633   ** when compiling on a different architecture.
01634   */
01635   assert( sizeof(i64)==8 || sizeof(i64)==4 );
01636   assert( sizeof(u64)==8 || sizeof(u64)==4 );
01637   assert( sizeof(u32)==4 );
01638   assert( sizeof(u16)==2 );
01639   assert( sizeof(Pgno)==4 );
01640 
01641   pBt = sqliteMalloc( sizeof(*pBt) );
01642   if( pBt==0 ){
01643     *ppBtree = 0;
01644     sqliteFree(p);
01645     return SQLITE_NOMEM;
01646   }
01647   rc = sqlite3pager_open(&pBt->pPager, zFilename, EXTRA_SIZE, flags);
01648   if( rc!=SQLITE_OK ){
01649     if( pBt->pPager ) sqlite3pager_close(pBt->pPager);
01650     sqliteFree(pBt);
01651     sqliteFree(p);
01652     *ppBtree = 0;
01653     return rc;
01654   }
01655   p->pBt = pBt;
01656 
01657   sqlite3pager_set_destructor(pBt->pPager, pageDestructor);
01658   sqlite3pager_set_reiniter(pBt->pPager, pageReinit);
01659   pBt->pCursor = 0;
01660   pBt->pPage1 = 0;
01661   pBt->readOnly = sqlite3pager_isreadonly(pBt->pPager);
01662   sqlite3pager_read_fileheader(pBt->pPager, sizeof(zDbHeader), zDbHeader);
01663   pBt->pageSize = get2byte(&zDbHeader[16]);
01664   if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE
01665        || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){
01666     pBt->pageSize = SQLITE_DEFAULT_PAGE_SIZE;
01667     pBt->maxEmbedFrac = 64;   /* 25% */
01668     pBt->minEmbedFrac = 32;   /* 12.5% */
01669     pBt->minLeafFrac = 32;    /* 12.5% */
01670 #ifndef SQLITE_OMIT_AUTOVACUUM
01671     /* If the magic name ":memory:" will create an in-memory database, then
01672     ** do not set the auto-vacuum flag, even if SQLITE_DEFAULT_AUTOVACUUM
01673     ** is true. On the other hand, if SQLITE_OMIT_MEMORYDB has been defined,
01674     ** then ":memory:" is just a regular file-name. Respect the auto-vacuum
01675     ** default in this case.
01676     */
01677     if( zFilename && !isMemdb ){
01678       pBt->autoVacuum = SQLITE_DEFAULT_AUTOVACUUM;
01679     }
01680 #endif
01681     nReserve = 0;
01682   }else{
01683     nReserve = zDbHeader[20];
01684     pBt->maxEmbedFrac = zDbHeader[21];
01685     pBt->minEmbedFrac = zDbHeader[22];
01686     pBt->minLeafFrac = zDbHeader[23];
01687     pBt->pageSizeFixed = 1;
01688 #ifndef SQLITE_OMIT_AUTOVACUUM
01689     pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0);
01690 #endif
01691   }
01692   pBt->usableSize = pBt->pageSize - nReserve;
01693   assert( (pBt->pageSize & 7)==0 );  /* 8-byte alignment of pageSize */
01694   sqlite3pager_set_pagesize(pBt->pPager, pBt->pageSize);
01695 
01696 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
01697   /* Add the new btree to the linked list starting at ThreadData.pBtree.
01698   ** There is no chance that a malloc() may fail inside of the 
01699   ** sqlite3ThreadData() call, as the ThreadData structure must have already
01700   ** been allocated for pTsdro->useSharedData to be non-zero.
01701   */
01702   if( pTsdro->useSharedData && zFilename && !isMemdb ){
01703     pBt->pNext = pTsdro->pBtree;
01704     sqlite3ThreadData()->pBtree = pBt;
01705   }
01706 #endif
01707   pBt->nRef = 1;
01708   *ppBtree = p;
01709   return SQLITE_OK;
01710 }
01711 
01712 /*
01713 ** Close an open database and invalidate all cursors.
01714 */
01715 int sqlite3BtreeClose(Btree *p){
01716   BtShared *pBt = p->pBt;
01717   BtCursor *pCur;
01718 
01719 #ifndef SQLITE_OMIT_SHARED_CACHE
01720   ThreadData *pTsd;
01721 #endif
01722 
01723   /* Close all cursors opened via this handle.  */
01724   pCur = pBt->pCursor;
01725   while( pCur ){
01726     BtCursor *pTmp = pCur;
01727     pCur = pCur->pNext;
01728     if( pTmp->pBtree==p ){
01729       sqlite3BtreeCloseCursor(pTmp);
01730     }
01731   }
01732 
01733   /* Rollback any active transaction and free the handle structure.
01734   ** The call to sqlite3BtreeRollback() drops any table-locks held by
01735   ** this handle.
01736   */
01737   sqlite3BtreeRollback(p);
01738   sqliteFree(p);
01739 
01740 #ifndef SQLITE_OMIT_SHARED_CACHE
01741   /* If there are still other outstanding references to the shared-btree
01742   ** structure, return now. The remainder of this procedure cleans 
01743   ** up the shared-btree.
01744   */
01745   assert( pBt->nRef>0 );
01746   pBt->nRef--;
01747   if( pBt->nRef ){
01748     return SQLITE_OK;
01749   }
01750 
01751   /* Remove the shared-btree from the thread wide list. Call 
01752   ** ThreadDataReadOnly() and then cast away the const property of the 
01753   ** pointer to avoid allocating thread data if it is not really required.
01754   */
01755   pTsd = (ThreadData *)sqlite3ThreadDataReadOnly();
01756   if( pTsd->pBtree==pBt ){
01757     assert( pTsd==sqlite3ThreadData() );
01758     pTsd->pBtree = pBt->pNext;
01759   }else{
01760     BtShared *pPrev;
01761     for(pPrev=pTsd->pBtree; pPrev && pPrev->pNext!=pBt; pPrev=pPrev->pNext){}
01762     if( pPrev ){
01763       assert( pTsd==sqlite3ThreadData() );
01764       pPrev->pNext = pBt->pNext;
01765     }
01766   }
01767 #endif
01768 
01769   /* Close the pager and free the shared-btree structure */
01770   assert( !pBt->pCursor );
01771   sqlite3pager_close(pBt->pPager);
01772   if( pBt->xFreeSchema && pBt->pSchema ){
01773     pBt->xFreeSchema(pBt->pSchema);
01774   }
01775   sqliteFree(pBt->pSchema);
01776   sqliteFree(pBt);
01777   return SQLITE_OK;
01778 }
01779 
01780 /*
01781 ** Change the busy handler callback function.
01782 */
01783 int sqlite3BtreeSetBusyHandler(Btree *p, BusyHandler *pHandler){
01784   BtShared *pBt = p->pBt;
01785   pBt->pBusyHandler = pHandler;
01786   sqlite3pager_set_busyhandler(pBt->pPager, pHandler);
01787   return SQLITE_OK;
01788 }
01789 
01790 /*
01791 ** Change the limit on the number of pages allowed in the cache.
01792 **
01793 ** The maximum number of cache pages is set to the absolute
01794 ** value of mxPage.  If mxPage is negative, the pager will
01795 ** operate asynchronously - it will not stop to do fsync()s
01796 ** to insure data is written to the disk surface before
01797 ** continuing.  Transactions still work if synchronous is off,
01798 ** and the database cannot be corrupted if this program
01799 ** crashes.  But if the operating system crashes or there is
01800 ** an abrupt power failure when synchronous is off, the database
01801 ** could be left in an inconsistent and unrecoverable state.
01802 ** Synchronous is on by default so database corruption is not
01803 ** normally a worry.
01804 */
01805 int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){
01806   BtShared *pBt = p->pBt;
01807   sqlite3pager_set_cachesize(pBt->pPager, mxPage);
01808   return SQLITE_OK;
01809 }
01810 
01811 /*
01812 ** Change the way data is synced to disk in order to increase or decrease
01813 ** how well the database resists damage due to OS crashes and power
01814 ** failures.  Level 1 is the same as asynchronous (no syncs() occur and
01815 ** there is a high probability of damage)  Level 2 is the default.  There
01816 ** is a very low but non-zero probability of damage.  Level 3 reduces the
01817 ** probability of damage to near zero but with a write performance reduction.
01818 */
01819 #ifndef SQLITE_OMIT_PAGER_PRAGMAS
01820 int sqlite3BtreeSetSafetyLevel(Btree *p, int level, int fullSync){
01821   BtShared *pBt = p->pBt;
01822   sqlite3pager_set_safety_level(pBt->pPager, level, fullSync);
01823   return SQLITE_OK;
01824 }
01825 #endif
01826 
01827 /*
01828 ** Return TRUE if the given btree is set to safety level 1.  In other
01829 ** words, return TRUE if no sync() occurs on the disk files.
01830 */
01831 int sqlite3BtreeSyncDisabled(Btree *p){
01832   BtShared *pBt = p->pBt;
01833   assert( pBt && pBt->pPager );
01834   return sqlite3pager_nosync(pBt->pPager);
01835 }
01836 
01837 #if !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM)
01838 /*
01839 ** Change the default pages size and the number of reserved bytes per page.
01840 **
01841 ** The page size must be a power of 2 between 512 and 65536.  If the page
01842 ** size supplied does not meet this constraint then the page size is not
01843 ** changed.
01844 **
01845 ** Page sizes are constrained to be a power of two so that the region
01846 ** of the database file used for locking (beginning at PENDING_BYTE,
01847 ** the first byte past the 1GB boundary, 0x40000000) needs to occur
01848 ** at the beginning of a page.
01849 **
01850 ** If parameter nReserve is less than zero, then the number of reserved
01851 ** bytes per page is left unchanged.
01852 */
01853 int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve){
01854   BtShared *pBt = p->pBt;
01855   if( pBt->pageSizeFixed ){
01856     return SQLITE_READONLY;
01857   }
01858   if( nReserve<0 ){
01859     nReserve = pBt->pageSize - pBt->usableSize;
01860   }
01861   if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE &&
01862         ((pageSize-1)&pageSize)==0 ){
01863     assert( (pageSize & 7)==0 );
01864     assert( !pBt->pPage1 && !pBt->pCursor );
01865     pBt->pageSize = sqlite3pager_set_pagesize(pBt->pPager, pageSize);
01866   }
01867   pBt->usableSize = pBt->pageSize - nReserve;
01868   return SQLITE_OK;
01869 }
01870 
01871 /*
01872 ** Return the currently defined page size
01873 */
01874 int sqlite3BtreeGetPageSize(Btree *p){
01875   return p->pBt->pageSize;
01876 }
01877 int sqlite3BtreeGetReserve(Btree *p){
01878   return p->pBt->pageSize - p->pBt->usableSize;
01879 }
01880 #endif /* !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM) */
01881 
01882 /*
01883 ** Change the 'auto-vacuum' property of the database. If the 'autoVacuum'
01884 ** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it
01885 ** is disabled. The default value for the auto-vacuum property is 
01886 ** determined by the SQLITE_DEFAULT_AUTOVACUUM macro.
01887 */
01888 int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){
01889   BtShared *pBt = p->pBt;;
01890 #ifdef SQLITE_OMIT_AUTOVACUUM
01891   return SQLITE_READONLY;
01892 #else
01893   if( pBt->pageSizeFixed ){
01894     return SQLITE_READONLY;
01895   }
01896   pBt->autoVacuum = (autoVacuum?1:0);
01897   return SQLITE_OK;
01898 #endif
01899 }
01900 
01901 /*
01902 ** Return the value of the 'auto-vacuum' property. If auto-vacuum is 
01903 ** enabled 1 is returned. Otherwise 0.
01904 */
01905 int sqlite3BtreeGetAutoVacuum(Btree *p){
01906 #ifdef SQLITE_OMIT_AUTOVACUUM
01907   return 0;
01908 #else
01909   return p->pBt->autoVacuum;
01910 #endif
01911 }
01912 
01913 
01914 /*
01915 ** Get a reference to pPage1 of the database file.  This will
01916 ** also acquire a readlock on that file.
01917 **
01918 ** SQLITE_OK is returned on success.  If the file is not a
01919 ** well-formed database file, then SQLITE_CORRUPT is returned.
01920 ** SQLITE_BUSY is returned if the database is locked.  SQLITE_NOMEM
01921 ** is returned if we run out of memory.  SQLITE_PROTOCOL is returned
01922 ** if there is a locking protocol violation.
01923 */
01924 static int lockBtree(BtShared *pBt){
01925   int rc, pageSize;
01926   MemPage *pPage1;
01927   if( pBt->pPage1 ) return SQLITE_OK;
01928   rc = getPage(pBt, 1, &pPage1);
01929   if( rc!=SQLITE_OK ) return rc;
01930   
01931 
01932   /* Do some checking to help insure the file we opened really is
01933   ** a valid database file. 
01934   */
01935   rc = SQLITE_NOTADB;
01936   if( sqlite3pager_pagecount(pBt->pPager)>0 ){
01937     u8 *page1 = pPage1->aData;
01938     if( memcmp(page1, zMagicHeader, 16)!=0 ){
01939       goto page1_init_failed;
01940     }
01941     if( page1[18]>1 || page1[19]>1 ){
01942       goto page1_init_failed;
01943     }
01944     pageSize = get2byte(&page1[16]);
01945     if( ((pageSize-1)&pageSize)!=0 ){
01946       goto page1_init_failed;
01947     }
01948     assert( (pageSize & 7)==0 );
01949     pBt->pageSize = pageSize;
01950     pBt->usableSize = pageSize - page1[20];
01951     if( pBt->usableSize<500 ){
01952       goto page1_init_failed;
01953     }
01954     pBt->maxEmbedFrac = page1[21];
01955     pBt->minEmbedFrac = page1[22];
01956     pBt->minLeafFrac = page1[23];
01957 #ifndef SQLITE_OMIT_AUTOVACUUM
01958     pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0);
01959 #endif
01960   }
01961 
01962   /* maxLocal is the maximum amount of payload to store locally for
01963   ** a cell.  Make sure it is small enough so that at least minFanout
01964   ** cells can will fit on one page.  We assume a 10-byte page header.
01965   ** Besides the payload, the cell must store:
01966   **     2-byte pointer to the cell
01967   **     4-byte child pointer
01968   **     9-byte nKey value
01969   **     4-byte nData value
01970   **     4-byte overflow page pointer
01971   ** So a cell consists of a 2-byte poiner, a header which is as much as
01972   ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow
01973   ** page pointer.
01974   */
01975   pBt->maxLocal = (pBt->usableSize-12)*pBt->maxEmbedFrac/255 - 23;
01976   pBt->minLocal = (pBt->usableSize-12)*pBt->minEmbedFrac/255 - 23;
01977   pBt->maxLeaf = pBt->usableSize - 35;
01978   pBt->minLeaf = (pBt->usableSize-12)*pBt->minLeafFrac/255 - 23;
01979   if( pBt->minLocal>pBt->maxLocal || pBt->maxLocal<0 ){
01980     goto page1_init_failed;
01981   }
01982   assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) );
01983   pBt->pPage1 = pPage1;
01984   return SQLITE_OK;
01985 
01986 page1_init_failed:
01987   releasePage(pPage1);
01988   pBt->pPage1 = 0;
01989   return rc;
01990 }
01991 
01992 /*
01993 ** This routine works like lockBtree() except that it also invokes the
01994 ** busy callback if there is lock contention.
01995 */
01996 static int lockBtreeWithRetry(Btree *pRef){
01997   int rc = SQLITE_OK;
01998   if( pRef->inTrans==TRANS_NONE ){
01999     u8 inTransaction = pRef->pBt->inTransaction;
02000     btreeIntegrity(pRef);
02001     rc = sqlite3BtreeBeginTrans(pRef, 0);
02002     pRef->pBt->inTransaction = inTransaction;
02003     pRef->inTrans = TRANS_NONE;
02004     if( rc==SQLITE_OK ){
02005       pRef->pBt->nTransaction--;
02006     }
02007     btreeIntegrity(pRef);
02008   }
02009   return rc;
02010 }
02011        
02012 
02013 /*
02014 ** If there are no outstanding cursors and we are not in the middle
02015 ** of a transaction but there is a read lock on the database, then
02016 ** this routine unrefs the first page of the database file which 
02017 ** has the effect of releasing the read lock.
02018 **
02019 ** If there are any outstanding cursors, this routine is a no-op.
02020 **
02021 ** If there is a transaction in progress, this routine is a no-op.
02022 */
02023 static void unlockBtreeIfUnused(BtShared *pBt){
02024   if( pBt->inTransaction==TRANS_NONE && pBt->pCursor==0 && pBt->pPage1!=0 ){
02025     if( pBt->pPage1->aData==0 ){
02026       MemPage *pPage = pBt->pPage1;
02027       pPage->aData = &((u8*)pPage)[-pBt->pageSize];
02028       pPage->pBt = pBt;
02029       pPage->pgno = 1;
02030     }
02031     releasePage(pBt->pPage1);
02032     pBt->pPage1 = 0;
02033     pBt->inStmt = 0;
02034   }
02035 }
02036 
02037 /*
02038 ** Create a new database by initializing the first page of the
02039 ** file.
02040 */
02041 static int newDatabase(BtShared *pBt){
02042   MemPage *pP1;
02043   unsigned char *data;
02044   int rc;
02045   if( sqlite3pager_pagecount(pBt->pPager)>0 ) return SQLITE_OK;
02046   pP1 = pBt->pPage1;
02047   assert( pP1!=0 );
02048   data = pP1->aData;
02049   rc = sqlite3pager_write(data);
02050   if( rc ) return rc;
02051   memcpy(data, zMagicHeader, sizeof(zMagicHeader));
02052   assert( sizeof(zMagicHeader)==16 );
02053   put2byte(&data[16], pBt->pageSize);
02054   data[18] = 1;
02055   data[19] = 1;
02056   data[20] = pBt->pageSize - pBt->usableSize;
02057   data[21] = pBt->maxEmbedFrac;
02058   data[22] = pBt->minEmbedFrac;
02059   data[23] = pBt->minLeafFrac;
02060   memset(&data[24], 0, 100-24);
02061   zeroPage(pP1, PTF_INTKEY|PTF_LEAF|PTF_LEAFDATA );
02062   pBt->pageSizeFixed = 1;
02063 #ifndef SQLITE_OMIT_AUTOVACUUM
02064   if( pBt->autoVacuum ){
02065     put4byte(&data[36 + 4*4], 1);
02066   }
02067 #endif
02068   return SQLITE_OK;
02069 }
02070 
02071 /*
02072 ** Attempt to start a new transaction. A write-transaction
02073 ** is started if the second argument is nonzero, otherwise a read-
02074 ** transaction.  If the second argument is 2 or more and exclusive
02075 ** transaction is started, meaning that no other process is allowed
02076 ** to access the database.  A preexisting transaction may not be
02077 ** upgraded to exclusive by calling this routine a second time - the
02078 ** exclusivity flag only works for a new transaction.
02079 **
02080 ** A write-transaction must be started before attempting any 
02081 ** changes to the database.  None of the following routines 
02082 ** will work unless a transaction is started first:
02083 **
02084 **      sqlite3BtreeCreateTable()
02085 **      sqlite3BtreeCreateIndex()
02086 **      sqlite3BtreeClearTable()
02087 **      sqlite3BtreeDropTable()
02088 **      sqlite3BtreeInsert()
02089 **      sqlite3BtreeDelete()
02090 **      sqlite3BtreeUpdateMeta()
02091 **
02092 ** If an initial attempt to acquire the lock fails because of lock contention
02093 ** and the database was previously unlocked, then invoke the busy handler
02094 ** if there is one.  But if there was previously a read-lock, do not
02095 ** invoke the busy handler - just return SQLITE_BUSY.  SQLITE_BUSY is 
02096 ** returned when there is already a read-lock in order to avoid a deadlock.
02097 **
02098 ** Suppose there are two processes A and B.  A has a read lock and B has
02099 ** a reserved lock.  B tries to promote to exclusive but is blocked because
02100 ** of A's read lock.  A tries to promote to reserved but is blocked by B.
02101 ** One or the other of the two processes must give way or there can be
02102 ** no progress.  By returning SQLITE_BUSY and not invoking the busy callback
02103 ** when A already has a read lock, we encourage A to give up and let B
02104 ** proceed.
02105 */
02106 int sqlite3BtreeBeginTrans(Btree *p, int wrflag){
02107   BtShared *pBt = p->pBt;
02108   int rc = SQLITE_OK;
02109 
02110   btreeIntegrity(p);
02111 
02112   /* If the btree is already in a write-transaction, or it
02113   ** is already in a read-transaction and a read-transaction
02114   ** is requested, this is a no-op.
02115   */
02116   if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){
02117     return SQLITE_OK;
02118   }
02119 
02120   /* Write transactions are not possible on a read-only database */
02121   if( pBt->readOnly && wrflag ){
02122     return SQLITE_READONLY;
02123   }
02124 
02125   /* If another database handle has already opened a write transaction 
02126   ** on this shared-btree structure and a second write transaction is
02127   ** requested, return SQLITE_BUSY.
02128   */
02129   if( pBt->inTransaction==TRANS_WRITE && wrflag ){
02130     return SQLITE_BUSY;
02131   }
02132 
02133   do {
02134     if( pBt->pPage1==0 ){
02135       rc = lockBtree(pBt);
02136     }
02137   
02138     if( rc==SQLITE_OK && wrflag ){
02139       rc = sqlite3pager_begin(pBt->pPage1->aData, wrflag>1);
02140       if( rc==SQLITE_OK ){
02141         rc = newDatabase(pBt);
02142       }
02143     }
02144   
02145     if( rc==SQLITE_OK ){
02146       if( wrflag ) pBt->inStmt = 0;
02147     }else{
02148       unlockBtreeIfUnused(pBt);
02149     }
02150   }while( rc==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE &&
02151           sqlite3InvokeBusyHandler(pBt->pBusyHandler) );
02152 
02153   if( rc==SQLITE_OK ){
02154     if( p->inTrans==TRANS_NONE ){
02155       pBt->nTransaction++;
02156     }
02157     p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ);
02158     if( p->inTrans>pBt->inTransaction ){
02159       pBt->inTransaction = p->inTrans;
02160     }
02161   }
02162 
02163   btreeIntegrity(p);
02164   return rc;
02165 }
02166 
02167 #ifndef SQLITE_OMIT_AUTOVACUUM
02168 
02169 /*
02170 ** Set the pointer-map entries for all children of page pPage. Also, if
02171 ** pPage contains cells that point to overflow pages, set the pointer
02172 ** map entries for the overflow pages as well.
02173 */
02174 static int setChildPtrmaps(MemPage *pPage){
02175   int i;                             /* Counter variable */
02176   int nCell;                         /* Number of cells in page pPage */
02177   int rc = SQLITE_OK;                /* Return code */
02178   BtShared *pBt = pPage->pBt;
02179   int isInitOrig = pPage->isInit;
02180   Pgno pgno = pPage->pgno;
02181 
02182   initPage(pPage, 0);
02183   nCell = pPage->nCell;
02184 
02185   for(i=0; i<nCell; i++){
02186     u8 *pCell = findCell(pPage, i);
02187 
02188     rc = ptrmapPutOvflPtr(pPage, pCell);
02189     if( rc!=SQLITE_OK ){
02190       goto set_child_ptrmaps_out;
02191     }
02192 
02193     if( !pPage->leaf ){
02194       Pgno childPgno = get4byte(pCell);
02195       rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno);
02196       if( rc!=SQLITE_OK ) goto set_child_ptrmaps_out;
02197     }
02198   }
02199 
02200   if( !pPage->leaf ){
02201     Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
02202     rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno);
02203   }
02204 
02205 set_child_ptrmaps_out:
02206   pPage->isInit = isInitOrig;
02207   return rc;
02208 }
02209 
02210 /*
02211 ** Somewhere on pPage, which is guarenteed to be a btree page, not an overflow
02212 ** page, is a pointer to page iFrom. Modify this pointer so that it points to
02213 ** iTo. Parameter eType describes the type of pointer to be modified, as 
02214 ** follows:
02215 **
02216 ** PTRMAP_BTREE:     pPage is a btree-page. The pointer points at a child 
02217 **                   page of pPage.
02218 **
02219 ** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow
02220 **                   page pointed to by one of the cells on pPage.
02221 **
02222 ** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next
02223 **                   overflow page in the list.
02224 */
02225 static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){
02226   if( eType==PTRMAP_OVERFLOW2 ){
02227     /* The pointer is always the first 4 bytes of the page in this case.  */
02228     if( get4byte(pPage->aData)!=iFrom ){
02229       return SQLITE_CORRUPT_BKPT;
02230     }
02231     put4byte(pPage->aData, iTo);
02232   }else{
02233     int isInitOrig = pPage->isInit;
02234     int i;
02235     int nCell;
02236 
02237     initPage(pPage, 0);
02238     nCell = pPage->nCell;
02239 
02240     for(i=0; i<nCell; i++){
02241       u8 *pCell = findCell(pPage, i);
02242       if( eType==PTRMAP_OVERFLOW1 ){
02243         CellInfo info;
02244         parseCellPtr(pPage, pCell, &info);
02245         if( info.iOverflow ){
02246           if( iFrom==get4byte(&pCell[info.iOverflow]) ){
02247             put4byte(&pCell[info.iOverflow], iTo);
02248             break;
02249           }
02250         }
02251       }else{
02252         if( get4byte(pCell)==iFrom ){
02253           put4byte(pCell, iTo);
02254           break;
02255         }
02256       }
02257     }
02258   
02259     if( i==nCell ){
02260       if( eType!=PTRMAP_BTREE || 
02261           get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){
02262         return SQLITE_CORRUPT_BKPT;
02263       }
02264       put4byte(&pPage->aData[pPage->hdrOffset+8], iTo);
02265     }
02266 
02267     pPage->isInit = isInitOrig;
02268   }
02269   return SQLITE_OK;
02270 }
02271 
02272 
02273 /*
02274 ** Move the open database page pDbPage to location iFreePage in the 
02275 ** database. The pDbPage reference remains valid.
02276 */
02277 static int relocatePage(
02278   BtShared *pBt,           /* Btree */
02279   MemPage *pDbPage,        /* Open page to move */
02280   u8 eType,                /* Pointer map 'type' entry for pDbPage */
02281   Pgno iPtrPage,           /* Pointer map 'page-no' entry for pDbPage */
02282   Pgno iFreePage           /* The location to move pDbPage to */
02283 ){
02284   MemPage *pPtrPage;   /* The page that contains a pointer to pDbPage */
02285   Pgno iDbPage = pDbPage->pgno;
02286   Pager *pPager = pBt->pPager;
02287   int rc;
02288 
02289   assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 || 
02290       eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE );
02291 
02292   /* Move page iDbPage from it's current location to page number iFreePage */
02293   TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n", 
02294       iDbPage, iFreePage, iPtrPage, eType));
02295   rc = sqlite3pager_movepage(pPager, pDbPage->aData, iFreePage);
02296   if( rc!=SQLITE_OK ){
02297     return rc;
02298   }
02299   pDbPage->pgno = iFreePage;
02300 
02301   /* If pDbPage was a btree-page, then it may have child pages and/or cells
02302   ** that point to overflow pages. The pointer map entries for all these
02303   ** pages need to be changed.
02304   **
02305   ** If pDbPage is an overflow page, then the first 4 bytes may store a
02306   ** pointer to a subsequent overflow page. If this is the case, then
02307   ** the pointer map needs to be updated for the subsequent overflow page.
02308   */
02309   if( eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ){
02310     rc = setChildPtrmaps(pDbPage);
02311     if( rc!=SQLITE_OK ){
02312       return rc;
02313     }
02314   }else{
02315     Pgno nextOvfl = get4byte(pDbPage->aData);
02316     if( nextOvfl!=0 ){
02317       rc = ptrmapPut(pBt, nextOvfl, PTRMAP_OVERFLOW2, iFreePage);
02318       if( rc!=SQLITE_OK ){
02319         return rc;
02320       }
02321     }
02322   }
02323 
02324   /* Fix the database pointer on page iPtrPage that pointed at iDbPage so
02325   ** that it points at iFreePage. Also fix the pointer map entry for
02326   ** iPtrPage.
02327   */
02328   if( eType!=PTRMAP_ROOTPAGE ){
02329     rc = getPage(pBt, iPtrPage, &pPtrPage);
02330     if( rc!=SQLITE_OK ){
02331       return rc;
02332     }
02333     rc = sqlite3pager_write(pPtrPage->aData);
02334     if( rc!=SQLITE_OK ){
02335       releasePage(pPtrPage);
02336       return rc;
02337     }
02338     rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType);
02339     releasePage(pPtrPage);
02340     if( rc==SQLITE_OK ){
02341       rc = ptrmapPut(pBt, iFreePage, eType, iPtrPage);
02342     }
02343   }
02344   return rc;
02345 }
02346 
02347 /* Forward declaration required by autoVacuumCommit(). */
02348 static int allocatePage(BtShared *, MemPage **, Pgno *, Pgno, u8);
02349 
02350 /*
02351 ** This routine is called prior to sqlite3pager_commit when a transaction
02352 ** is commited for an auto-vacuum database.
02353 */
02354 static int autoVacuumCommit(BtShared *pBt, Pgno *nTrunc){
02355   Pager *pPager = pBt->pPager;
02356   Pgno nFreeList;            /* Number of pages remaining on the free-list. */
02357   int nPtrMap;               /* Number of pointer-map pages deallocated */
02358   Pgno origSize;             /* Pages in the database file */
02359   Pgno finSize;              /* Pages in the database file after truncation */
02360   int rc;                    /* Return code */
02361   u8 eType;
02362   int pgsz = pBt->pageSize;  /* Page size for this database */
02363   Pgno iDbPage;              /* The database page to move */
02364   MemPage *pDbMemPage = 0;   /* "" */
02365   Pgno iPtrPage;             /* The page that contains a pointer to iDbPage */
02366   Pgno iFreePage;            /* The free-list page to move iDbPage to */
02367   MemPage *pFreeMemPage = 0; /* "" */
02368 
02369 #ifndef NDEBUG
02370   int nRef = *sqlite3pager_stats(pPager);
02371 #endif
02372 
02373   assert( pBt->autoVacuum );
02374   if( PTRMAP_ISPAGE(pBt, sqlite3pager_pagecount(pPager)) ){
02375     return SQLITE_CORRUPT_BKPT;
02376   }
02377 
02378   /* Figure out how many free-pages are in the database. If there are no
02379   ** free pages, then auto-vacuum is a no-op.
02380   */
02381   nFreeList = get4byte(&pBt->pPage1->aData[36]);
02382   if( nFreeList==0 ){
02383     *nTrunc = 0;
02384     return SQLITE_OK;
02385   }
02386 
02387   /* This block figures out how many pages there are in the database
02388   ** now (variable origSize), and how many there will be after the
02389   ** truncation (variable finSize).
02390   **
02391   ** The final size is the original size, less the number of free pages
02392   ** in the database, less any pointer-map pages that will no longer
02393   ** be required, less 1 if the pending-byte page was part of the database
02394   ** but is not after the truncation.
02395   **/
02396   origSize = sqlite3pager_pagecount(pPager);
02397   if( origSize==PENDING_BYTE_PAGE(pBt) ){
02398     origSize--;
02399   }
02400   nPtrMap = (nFreeList-origSize+PTRMAP_PAGENO(pBt, origSize)+pgsz/5)/(pgsz/5);
02401   finSize = origSize - nFreeList - nPtrMap;
02402   if( origSize>PENDING_BYTE_PAGE(pBt) && finSize<=PENDING_BYTE_PAGE(pBt) ){
02403     finSize--;
02404   }
02405   while( PTRMAP_ISPAGE(pBt, finSize) || finSize==PENDING_BYTE_PAGE(pBt) ){
02406     finSize--;
02407   }
02408   TRACE(("AUTOVACUUM: Begin (db size %d->%d)\n", origSize, finSize));
02409 
02410   /* Variable 'finSize' will be the size of the file in pages after
02411   ** the auto-vacuum has completed (the current file size minus the number
02412   ** of pages on the free list). Loop through the pages that lie beyond
02413   ** this mark, and if they are not already on the free list, move them
02414   ** to a free page earlier in the file (somewhere before finSize).
02415   */
02416   for( iDbPage=finSize+1; iDbPage<=origSize; iDbPage++ ){
02417     /* If iDbPage is a pointer map page, or the pending-byte page, skip it. */
02418     if( PTRMAP_ISPAGE(pBt, iDbPage) || iDbPage==PENDING_BYTE_PAGE(pBt) ){
02419       continue;
02420     }
02421 
02422     rc = ptrmapGet(pBt, iDbPage, &eType, &iPtrPage);
02423     if( rc!=SQLITE_OK ) goto autovacuum_out;
02424     if( eType==PTRMAP_ROOTPAGE ){
02425       rc = SQLITE_CORRUPT_BKPT;
02426       goto autovacuum_out;
02427     }
02428 
02429     /* If iDbPage is free, do not swap it.  */
02430     if( eType==PTRMAP_FREEPAGE ){
02431       continue;
02432     }
02433     rc = getPage(pBt, iDbPage, &pDbMemPage);
02434     if( rc!=SQLITE_OK ) goto autovacuum_out;
02435 
02436     /* Find the next page in the free-list that is not already at the end 
02437     ** of the file. A page can be pulled off the free list using the 
02438     ** allocatePage() routine.
02439     */
02440     do{
02441       if( pFreeMemPage ){
02442         releasePage(pFreeMemPage);
02443         pFreeMemPage = 0;
02444       }
02445       rc = allocatePage(pBt, &pFreeMemPage, &iFreePage, 0, 0);
02446       if( rc!=SQLITE_OK ){
02447         releasePage(pDbMemPage);
02448         goto autovacuum_out;
02449       }
02450       assert( iFreePage<=origSize );
02451     }while( iFreePage>finSize );
02452     releasePage(pFreeMemPage);
02453     pFreeMemPage = 0;
02454 
02455     /* Relocate the page into the body of the file. Note that although the 
02456     ** page has moved within the database file, the pDbMemPage pointer 
02457     ** remains valid. This means that this function can run without
02458     ** invalidating cursors open on the btree. This is important in 
02459     ** shared-cache mode.
02460     */
02461     rc = relocatePage(pBt, pDbMemPage, eType, iPtrPage, iFreePage);
02462     releasePage(pDbMemPage);
02463     if( rc!=SQLITE_OK ) goto autovacuum_out;
02464   }
02465 
02466   /* The entire free-list has been swapped to the end of the file. So
02467   ** truncate the database file to finSize pages and consider the
02468   ** free-list empty.
02469   */
02470   rc = sqlite3pager_write(pBt->pPage1->aData);
02471   if( rc!=SQLITE_OK ) goto autovacuum_out;
02472   put4byte(&pBt->pPage1->aData[32], 0);
02473   put4byte(&pBt->pPage1->aData[36], 0);
02474   *nTrunc = finSize;
02475   assert( finSize!=PENDING_BYTE_PAGE(pBt) );
02476 
02477 autovacuum_out:
02478   assert( nRef==*sqlite3pager_stats(pPager) );
02479   if( rc!=SQLITE_OK ){
02480     sqlite3pager_rollback(pPager);
02481   }
02482   return rc;
02483 }
02484 #endif
02485 
02486 /*
02487 ** Commit the transaction currently in progress.
02488 **
02489 ** This will release the write lock on the database file.  If there
02490 ** are no active cursors, it also releases the read lock.
02491 */
02492 int sqlite3BtreeCommit(Btree *p){
02493   BtShared *pBt = p->pBt;
02494 
02495   btreeIntegrity(p);
02496 
02497   /* If the handle has a write-transaction open, commit the shared-btrees 
02498   ** transaction and set the shared state to TRANS_READ.
02499   */
02500   if( p->inTrans==TRANS_WRITE ){
02501     int rc;
02502     assert( pBt->inTransaction==TRANS_WRITE );
02503     assert( pBt->nTransaction>0 );
02504     rc = sqlite3pager_commit(pBt->pPager);
02505     if( rc!=SQLITE_OK ){
02506       return rc;
02507     }
02508     pBt->inTransaction = TRANS_READ;
02509     pBt->inStmt = 0;
02510   }
02511   unlockAllTables(p);
02512 
02513   /* If the handle has any kind of transaction open, decrement the transaction
02514   ** count of the shared btree. If the transaction count reaches 0, set
02515   ** the shared state to TRANS_NONE. The unlockBtreeIfUnused() call below
02516   ** will unlock the pager.
02517   */
02518   if( p->inTrans!=TRANS_NONE ){
02519     pBt->nTransaction--;
02520     if( 0==pBt->nTransaction ){
02521       pBt->inTransaction = TRANS_NONE;
02522     }
02523   }
02524 
02525   /* Set the handles current transaction state to TRANS_NONE and unlock
02526   ** the pager if this call closed the only read or write transaction.
02527   */
02528   p->inTrans = TRANS_NONE;
02529   unlockBtreeIfUnused(pBt);
02530 
02531   btreeIntegrity(p);
02532   return SQLITE_OK;
02533 }
02534 
02535 #ifndef NDEBUG
02536 /*
02537 ** Return the number of write-cursors open on this handle. This is for use
02538 ** in assert() expressions, so it is only compiled if NDEBUG is not
02539 ** defined.
02540 */
02541 static int countWriteCursors(BtShared *pBt){
02542   BtCursor *pCur;
02543   int r = 0;
02544   for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
02545     if( pCur->wrFlag ) r++; 
02546   }
02547   return r;
02548 }
02549 #endif
02550 
02551 #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
02552 /*
02553 ** Print debugging information about all cursors to standard output.
02554 */
02555 void sqlite3BtreeCursorList(Btree *p){
02556   BtCursor *pCur;
02557   BtShared *pBt = p->pBt;
02558   for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
02559     MemPage *pPage = pCur->pPage;
02560     char *zMode = pCur->wrFlag ? "rw" : "ro";
02561     sqlite3DebugPrintf("CURSOR %p rooted at %4d(%s) currently at %d.%d%s\n",
02562        pCur, pCur->pgnoRoot, zMode,
02563        pPage ? pPage->pgno : 0, pCur->idx,
02564        (pCur->eState==CURSOR_VALID) ? "" : " eof"
02565     );
02566   }
02567 }
02568 #endif
02569 
02570 /*
02571 ** Rollback the transaction in progress.  All cursors will be
02572 ** invalided by this operation.  Any attempt to use a cursor
02573 ** that was open at the beginning of this operation will result
02574 ** in an error.
02575 **
02576 ** This will release the write lock on the database file.  If there
02577 ** are no active cursors, it also releases the read lock.
02578 */
02579 int sqlite3BtreeRollback(Btree *p){
02580   int rc;
02581   BtShared *pBt = p->pBt;
02582   MemPage *pPage1;
02583 
02584   rc = saveAllCursors(pBt, 0, 0);
02585 #ifndef SQLITE_OMIT_SHARED_CACHE
02586   if( rc!=SQLITE_OK ){
02587     /* This is a horrible situation. An IO or malloc() error occured whilst
02588     ** trying to save cursor positions. If this is an automatic rollback (as
02589     ** the result of a constraint, malloc() failure or IO error) then 
02590     ** the cache may be internally inconsistent (not contain valid trees) so
02591     ** we cannot simply return the error to the caller. Instead, abort 
02592     ** all queries that may be using any of the cursors that failed to save.
02593     */
02594     while( pBt->pCursor ){
02595       sqlite3 *db = pBt->pCursor->pBtree->pSqlite;
02596       if( db ){
02597         sqlite3AbortOtherActiveVdbes(db, 0);
02598       }
02599     }
02600   }
02601 #endif
02602   btreeIntegrity(p);
02603   unlockAllTables(p);
02604 
02605   if( p->inTrans==TRANS_WRITE ){
02606     int rc2;
02607 
02608     assert( TRANS_WRITE==pBt->inTransaction );
02609     rc2 = sqlite3pager_rollback(pBt->pPager);
02610     if( rc2!=SQLITE_OK ){
02611       rc = rc2;
02612     }
02613 
02614     /* The rollback may have destroyed the pPage1->aData value.  So
02615     ** call getPage() on page 1 again to make sure pPage1->aData is
02616     ** set correctly. */
02617     if( getPage(pBt, 1, &pPage1)==SQLITE_OK ){
02618       releasePage(pPage1);
02619     }
02620     assert( countWriteCursors(pBt)==0 );
02621     pBt->inTransaction = TRANS_READ;
02622   }
02623 
02624   if( p->inTrans!=TRANS_NONE ){
02625     assert( pBt->nTransaction>0 );
02626     pBt->nTransaction--;
02627     if( 0==pBt->nTransaction ){
02628       pBt->inTransaction = TRANS_NONE;
02629     }
02630   }
02631 
02632   p->inTrans = TRANS_NONE;
02633   pBt->inStmt = 0;
02634   unlockBtreeIfUnused(pBt);
02635 
02636   btreeIntegrity(p);
02637   return rc;
02638 }
02639 
02640 /*
02641 ** Start a statement subtransaction.  The subtransaction can
02642 ** can be rolled back independently of the main transaction.
02643 ** You must start a transaction before starting a subtransaction.
02644 ** The subtransaction is ended automatically if the main transaction
02645 ** commits or rolls back.
02646 **
02647 ** Only one subtransaction may be active at a time.  It is an error to try
02648 ** to start a new subtransaction if another subtransaction is already active.
02649 **
02650 ** Statement subtransactions are used around individual SQL statements
02651 ** that are contained within a BEGIN...COMMIT block.  If a constraint
02652 ** error occurs within the statement, the effect of that one statement
02653 ** can be rolled back without having to rollback the entire transaction.
02654 */
02655 int sqlite3BtreeBeginStmt(Btree *p){
02656   int rc;
02657   BtShared *pBt = p->pBt;
02658   if( (p->inTrans!=TRANS_WRITE) || pBt->inStmt ){
02659     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
02660   }
02661   assert( pBt->inTransaction==TRANS_WRITE );
02662   rc = pBt->readOnly ? SQLITE_OK : sqlite3pager_stmt_begin(pBt->pPager);
02663   pBt->inStmt = 1;
02664   return rc;
02665 }
02666 
02667 
02668 /*
02669 ** Commit the statment subtransaction currently in progress.  If no
02670 ** subtransaction is active, this is a no-op.
02671 */
02672 int sqlite3BtreeCommitStmt(Btree *p){
02673   int rc;
02674   BtShared *pBt = p->pBt;
02675   if( pBt->inStmt && !pBt->readOnly ){
02676     rc = sqlite3pager_stmt_commit(pBt->pPager);
02677   }else{
02678     rc = SQLITE_OK;
02679   }
02680   pBt->inStmt = 0;
02681   return rc;
02682 }
02683 
02684 /*
02685 ** Rollback the active statement subtransaction.  If no subtransaction
02686 ** is active this routine is a no-op.
02687 **
02688 ** All cursors will be invalidated by this operation.  Any attempt
02689 ** to use a cursor that was open at the beginning of this operation
02690 ** will result in an error.
02691 */
02692 int sqlite3BtreeRollbackStmt(Btree *p){
02693   int rc = SQLITE_OK;
02694   BtShared *pBt = p->pBt;
02695   sqlite3MallocDisallow();
02696   if( pBt->inStmt && !pBt->readOnly ){
02697     rc = sqlite3pager_stmt_rollback(pBt->pPager);
02698     assert( countWriteCursors(pBt)==0 );
02699     pBt->inStmt = 0;
02700   }
02701   sqlite3MallocAllow();
02702   return rc;
02703 }
02704 
02705 /*
02706 ** Default key comparison function to be used if no comparison function
02707 ** is specified on the sqlite3BtreeCursor() call.
02708 */
02709 static int dfltCompare(
02710   void *NotUsed,             /* User data is not used */
02711   int n1, const void *p1,    /* First key to compare */
02712   int n2, const void *p2     /* Second key to compare */
02713 ){
02714   int c;
02715   c = memcmp(p1, p2, n1<n2 ? n1 : n2);
02716   if( c==0 ){
02717     c = n1 - n2;
02718   }
02719   return c;
02720 }
02721 
02722 /*
02723 ** Create a new cursor for the BTree whose root is on the page
02724 ** iTable.  The act of acquiring a cursor gets a read lock on 
02725 ** the database file.
02726 **
02727 ** If wrFlag==0, then the cursor can only be used for reading.
02728 ** If wrFlag==1, then the cursor can be used for reading or for
02729 ** writing if other conditions for writing are also met.  These
02730 ** are the conditions that must be met in order for writing to
02731 ** be allowed:
02732 **
02733 ** 1:  The cursor must have been opened with wrFlag==1
02734 **
02735 ** 2:  No other cursors may be open with wrFlag==0 on the same table
02736 **
02737 ** 3:  The database must be writable (not on read-only media)
02738 **
02739 ** 4:  There must be an active transaction.
02740 **
02741 ** Condition 2 warrants further discussion.  If any cursor is opened
02742 ** on a table with wrFlag==0, that prevents all other cursors from
02743 ** writing to that table.  This is a kind of "read-lock".  When a cursor
02744 ** is opened with wrFlag==0 it is guaranteed that the table will not
02745 ** change as long as the cursor is open.  This allows the cursor to
02746 ** do a sequential scan of the table without having to worry about
02747 ** entries being inserted or deleted during the scan.  Cursors should
02748 ** be opened with wrFlag==0 only if this read-lock property is needed.
02749 ** That is to say, cursors should be opened with wrFlag==0 only if they
02750 ** intend to use the sqlite3BtreeNext() system call.  All other cursors
02751 ** should be opened with wrFlag==1 even if they never really intend
02752 ** to write.
02753 ** 
02754 ** No checking is done to make sure that page iTable really is the
02755 ** root page of a b-tree.  If it is not, then the cursor acquired
02756 ** will not work correctly.
02757 **
02758 ** The comparison function must be logically the same for every cursor
02759 ** on a particular table.  Changing the comparison function will result
02760 ** in incorrect operations.  If the comparison function is NULL, a
02761 ** default comparison function is used.  The comparison function is
02762 ** always ignored for INTKEY tables.
02763 */
02764 int sqlite3BtreeCursor(
02765   Btree *p,                                   /* The btree */
02766   int iTable,                                 /* Root page of table to open */
02767   int wrFlag,                                 /* 1 to write. 0 read-only */
02768   int (*xCmp)(void*,int,const void*,int,const void*), /* Key Comparison func */
02769   void *pArg,                                 /* First arg to xCompare() */
02770   BtCursor **ppCur                            /* Write new cursor here */
02771 ){
02772   int rc;
02773   BtCursor *pCur;
02774   BtShared *pBt = p->pBt;
02775 
02776   *ppCur = 0;
02777   if( wrFlag ){
02778     if( pBt->readOnly ){
02779       return SQLITE_READONLY;
02780     }
02781     if( checkReadLocks(pBt, iTable, 0) ){
02782       return SQLITE_LOCKED;
02783     }
02784   }
02785 
02786   if( pBt->pPage1==0 ){
02787     rc = lockBtreeWithRetry(p);
02788     if( rc!=SQLITE_OK ){
02789       return rc;
02790     }
02791   }
02792   pCur = sqliteMalloc( sizeof(*pCur) );
02793   if( pCur==0 ){
02794     rc = SQLITE_NOMEM;
02795     goto create_cursor_exception;
02796   }
02797   pCur->pgnoRoot = (Pgno)iTable;
02798   if( iTable==1 && sqlite3pager_pagecount(pBt->pPager)==0 ){
02799     rc = SQLITE_EMPTY;
02800     goto create_cursor_exception;
02801   }
02802   rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->pPage, 0);
02803   if( rc!=SQLITE_OK ){
02804     goto create_cursor_exception;
02805   }
02806 
02807   /* Now that no other errors can occur, finish filling in the BtCursor
02808   ** variables, link the cursor into the BtShared list and set *ppCur (the
02809   ** output argument to this function).
02810   */
02811   pCur->xCompare = xCmp ? xCmp : dfltCompare;
02812   pCur->pArg = pArg;
02813   pCur->pBtree = p;
02814   pCur->wrFlag = wrFlag;
02815   pCur->pNext = pBt->pCursor;
02816   if( pCur->pNext ){
02817     pCur->pNext->pPrev = pCur;
02818   }
02819   pBt->pCursor = pCur;
02820   pCur->eState = CURSOR_INVALID;
02821   *ppCur = pCur;
02822 
02823   return SQLITE_OK;
02824 create_cursor_exception:
02825   if( pCur ){
02826     releasePage(pCur->pPage);
02827     sqliteFree(pCur);
02828   }
02829   unlockBtreeIfUnused(pBt);
02830   return rc;
02831 }
02832 
02833 #if 0  /* Not Used */
02834 /*
02835 ** Change the value of the comparison function used by a cursor.
02836 */
02837 void sqlite3BtreeSetCompare(
02838   BtCursor *pCur,     /* The cursor to whose comparison function is changed */
02839   int(*xCmp)(void*,int,const void*,int,const void*), /* New comparison func */
02840   void *pArg          /* First argument to xCmp() */
02841 ){
02842   pCur->xCompare = xCmp ? xCmp : dfltCompare;
02843   pCur->pArg = pArg;
02844 }
02845 #endif
02846 
02847 /*
02848 ** Close a cursor.  The read lock on the database file is released
02849 ** when the last cursor is closed.
02850 */
02851 int sqlite3BtreeCloseCursor(BtCursor *pCur){
02852   BtShared *pBt = pCur->pBtree->pBt;
02853   restoreOrClearCursorPosition(pCur, 0);
02854   if( pCur->pPrev ){
02855     pCur->pPrev->pNext = pCur->pNext;
02856   }else{
02857     pBt->pCursor = pCur->pNext;
02858   }
02859   if( pCur->pNext ){
02860     pCur->pNext->pPrev = pCur->pPrev;
02861   }
02862   releasePage(pCur->pPage);
02863   unlockBtreeIfUnused(pBt);
02864   sqliteFree(pCur);
02865   return SQLITE_OK;
02866 }
02867 
02868 /*
02869 ** Make a temporary cursor by filling in the fields of pTempCur.
02870 ** The temporary cursor is not on the cursor list for the Btree.
02871 */
02872 static void getTempCursor(BtCursor *pCur, BtCursor *pTempCur){
02873   memcpy(pTempCur, pCur, sizeof(*pCur));
02874   pTempCur->pNext = 0;
02875   pTempCur->pPrev = 0;
02876   if( pTempCur->pPage ){
02877     sqlite3pager_ref(pTempCur->pPage->aData);
02878   }
02879 }
02880 
02881 /*
02882 ** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
02883 ** function above.
02884 */
02885 static void releaseTempCursor(BtCursor *pCur){
02886   if( pCur->pPage ){
02887     sqlite3pager_unref(pCur->pPage->aData);
02888   }
02889 }
02890 
02891 /*
02892 ** Make sure the BtCursor.info field of the given cursor is valid.
02893 ** If it is not already valid, call parseCell() to fill it in.
02894 **
02895 ** BtCursor.info is a cache of the information in the current cell.
02896 ** Using this cache reduces the number of calls to parseCell().
02897 */
02898 static void getCellInfo(BtCursor *pCur){
02899   if( pCur->info.nSize==0 ){
02900     parseCell(pCur->pPage, pCur->idx, &pCur->info);
02901   }else{
02902 #ifndef NDEBUG
02903     CellInfo info;
02904     memset(&info, 0, sizeof(info));
02905     parseCell(pCur->pPage, pCur->idx, &info);
02906     assert( memcmp(&info, &pCur->info, sizeof(info))==0 );
02907 #endif
02908   }
02909 }
02910 
02911 /*
02912 ** Set *pSize to the size of the buffer needed to hold the value of
02913 ** the key for the current entry.  If the cursor is not pointing
02914 ** to a valid entry, *pSize is set to 0. 
02915 **
02916 ** For a table with the INTKEY flag set, this routine returns the key
02917 ** itself, not the number of bytes in the key.
02918 */
02919 int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){
02920   int rc = restoreOrClearCursorPosition(pCur, 1);
02921   if( rc==SQLITE_OK ){
02922     assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID );
02923     if( pCur->eState==CURSOR_INVALID ){
02924       *pSize = 0;
02925     }else{
02926       getCellInfo(pCur);
02927       *pSize = pCur->info.nKey;
02928     }
02929   }
02930   return rc;
02931 }
02932 
02933 /*
02934 ** Set *pSize to the number of bytes of data in the entry the
02935 ** cursor currently points to.  Always return SQLITE_OK.
02936 ** Failure is not possible.  If the cursor is not currently
02937 ** pointing to an entry (which can happen, for example, if
02938 ** the database is empty) then *pSize is set to 0.
02939 */
02940 int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){
02941   int rc = restoreOrClearCursorPosition(pCur, 1);
02942   if( rc==SQLITE_OK ){
02943     assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID );
02944     if( pCur->eState==CURSOR_INVALID ){
02945       /* Not pointing at a valid entry - set *pSize to 0. */
02946       *pSize = 0;
02947     }else{
02948       getCellInfo(pCur);
02949       *pSize = pCur->info.nData;
02950     }
02951   }
02952   return rc;
02953 }
02954 
02955 /*
02956 ** Read payload information from the entry that the pCur cursor is
02957 ** pointing to.  Begin reading the payload at "offset" and read
02958 ** a total of "amt" bytes.  Put the result in zBuf.
02959 **
02960 ** This routine does not make a distinction between key and data.
02961 ** It just reads bytes from the payload area.  Data might appear
02962 ** on the main page or be scattered out on multiple overflow pages.
02963 */
02964 static int getPayload(
02965   BtCursor *pCur,      /* Cursor pointing to entry to read from */
02966   int offset,          /* Begin reading this far into payload */
02967   int amt,             /* Read this many bytes */
02968   unsigned char *pBuf, /* Write the bytes into this buffer */ 
02969   int skipKey          /* offset begins at data if this is true */
02970 ){
02971   unsigned char *aPayload;
02972   Pgno nextPage;
02973   int rc;
02974   MemPage *pPage;
02975   BtShared *pBt;
02976   int ovflSize;
02977   u32 nKey;
02978 
02979   assert( pCur!=0 && pCur->pPage!=0 );
02980   assert( pCur->eState==CURSOR_VALID );
02981   pBt = pCur->pBtree->pBt;
02982   pPage = pCur->pPage;
02983   pageIntegrity(pPage);
02984   assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
02985   getCellInfo(pCur);
02986   aPayload = pCur->info.pCell + pCur->info.nHeader;
02987   if( pPage->intKey ){
02988     nKey = 0;
02989   }else{
02990     nKey = pCur->info.nKey;
02991   }
02992   assert( offset>=0 );
02993   if( skipKey ){
02994     offset += nKey;
02995   }
02996   if( offset+amt > nKey+pCur->info.nData ){
02997     return SQLITE_ERROR;
02998   }
02999   if( offset<pCur->info.nLocal ){
03000     int a = amt;
03001     if( a+offset>pCur->info.nLocal ){
03002       a = pCur->info.nLocal - offset;
03003     }
03004     memcpy(pBuf, &aPayload[offset], a);
03005     if( a==amt ){
03006       return SQLITE_OK;
03007     }
03008     offset = 0;
03009     pBuf += a;
03010     amt -= a;
03011   }else{
03012     offset -= pCur->info.nLocal;
03013   }
03014   ovflSize = pBt->usableSize - 4;
03015   if( amt>0 ){
03016     nextPage = get4byte(&aPayload[pCur->info.nLocal]);
03017     while( amt>0 && nextPage ){
03018       rc = sqlite3pager_get(pBt->pPager, nextPage, (void**)&aPayload);
03019       if( rc!=0 ){
03020         return rc;
03021       }
03022       nextPage = get4byte(aPayload);
03023       if( offset<ovflSize ){
03024         int a = amt;
03025         if( a + offset > ovflSize ){
03026           a = ovflSize - offset;
03027         }
03028         memcpy(pBuf, &aPayload[offset+4], a);
03029         offset = 0;
03030         amt -= a;
03031         pBuf += a;
03032       }else{
03033         offset -= ovflSize;
03034       }
03035       sqlite3pager_unref(aPayload);
03036     }
03037   }
03038 
03039   if( amt>0 ){
03040     return SQLITE_CORRUPT_BKPT;
03041   }
03042   return SQLITE_OK;
03043 }
03044 
03045 /*
03046 ** Read part of the key associated with cursor pCur.  Exactly
03047 ** "amt" bytes will be transfered into pBuf[].  The transfer
03048 ** begins at "offset".
03049 **
03050 ** Return SQLITE_OK on success or an error code if anything goes
03051 ** wrong.  An error is returned if "offset+amt" is larger than
03052 ** the available payload.
03053 */
03054 int sqlite3BtreeKey(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
03055   int rc = restoreOrClearCursorPosition(pCur, 1);
03056   if( rc==SQLITE_OK ){
03057     assert( pCur->eState==CURSOR_VALID );
03058     assert( pCur->pPage!=0 );
03059     if( pCur->pPage->intKey ){
03060       return SQLITE_CORRUPT_BKPT;
03061     }
03062     assert( pCur->pPage->intKey==0 );
03063     assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
03064     rc = getPayload(pCur, offset, amt, (unsigned char*)pBuf, 0);
03065   }
03066   return rc;
03067 }
03068 
03069 /*
03070 ** Read part of the data associated with cursor pCur.  Exactly
03071 ** "amt" bytes will be transfered into pBuf[].  The transfer
03072 ** begins at "offset".
03073 **
03074 ** Return SQLITE_OK on success or an error code if anything goes
03075 ** wrong.  An error is returned if "offset+amt" is larger than
03076 ** the available payload.
03077 */
03078 int sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
03079   int rc = restoreOrClearCursorPosition(pCur, 1);
03080   if( rc==SQLITE_OK ){
03081     assert( pCur->eState==CURSOR_VALID );
03082     assert( pCur->pPage!=0 );
03083     assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
03084     rc = getPayload(pCur, offset, amt, pBuf, 1);
03085   }
03086   return rc;
03087 }
03088 
03089 /*
03090 ** Return a pointer to payload information from the entry that the 
03091 ** pCur cursor is pointing to.  The pointer is to the beginning of
03092 ** the key if skipKey==0 and it points to the beginning of data if
03093 ** skipKey==1.  The number of bytes of available key/data is written
03094 ** into *pAmt.  If *pAmt==0, then the value returned will not be
03095 ** a valid pointer.
03096 **
03097 ** This routine is an optimization.  It is common for the entire key
03098 ** and data to fit on the local page and for there to be no overflow
03099 ** pages.  When that is so, this routine can be used to access the
03100 ** key and data without making a copy.  If the key and/or data spills
03101 ** onto overflow pages, then getPayload() must be used to reassembly
03102 ** the key/data and copy it into a preallocated buffer.
03103 **
03104 ** The pointer returned by this routine looks directly into the cached
03105 ** page of the database.  The data might change or move the next time
03106 ** any btree routine is called.
03107 */
03108 static const unsigned char *fetchPayload(
03109   BtCursor *pCur,      /* Cursor pointing to entry to read from */
03110   int *pAmt,           /* Write the number of available bytes here */
03111   int skipKey          /* read beginning at data if this is true */
03112 ){
03113   unsigned char *aPayload;
03114   MemPage *pPage;
03115   u32 nKey;
03116   int nLocal;
03117 
03118   assert( pCur!=0 && pCur->pPage!=0 );
03119   assert( pCur->eState==CURSOR_VALID );
03120   pPage = pCur->pPage;
03121   pageIntegrity(pPage);
03122   assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
03123   getCellInfo(pCur);
03124   aPayload = pCur->info.pCell;
03125   aPayload += pCur->info.nHeader;
03126   if( pPage->intKey ){
03127     nKey = 0;
03128   }else{
03129     nKey = pCur->info.nKey;
03130   }
03131   if( skipKey ){
03132     aPayload += nKey;
03133     nLocal = pCur->info.nLocal - nKey;
03134   }else{
03135     nLocal = pCur->info.nLocal;
03136     if( nLocal>nKey ){
03137       nLocal = nKey;
03138     }
03139   }
03140   *pAmt = nLocal;
03141   return aPayload;
03142 }
03143 
03144 
03145 /*
03146 ** For the entry that cursor pCur is point to, return as
03147 ** many bytes of the key or data as are available on the local
03148 ** b-tree page.  Write the number of available bytes into *pAmt.
03149 **
03150 ** The pointer returned is ephemeral.  The key/data may move
03151 ** or be destroyed on the next call to any Btree routine.
03152 **
03153 ** These routines is used to get quick access to key and data
03154 ** in the common case where no overflow pages are used.
03155 */
03156 const void *sqlite3BtreeKeyFetch(BtCursor *pCur, int *pAmt){
03157   if( pCur->eState==CURSOR_VALID ){
03158     return (const void*)fetchPayload(pCur, pAmt, 0);
03159   }
03160   return 0;
03161 }
03162 const void *sqlite3BtreeDataFetch(BtCursor *pCur, int *pAmt){
03163   if( pCur->eState==CURSOR_VALID ){
03164     return (const void*)fetchPayload(pCur, pAmt, 1);
03165   }
03166   return 0;
03167 }
03168 
03169 
03170 /*
03171 ** Move the cursor down to a new child page.  The newPgno argument is the
03172 ** page number of the child page to move to.
03173 */
03174 static int moveToChild(BtCursor *pCur, u32 newPgno){
03175   int rc;
03176   MemPage *pNewPage;
03177   MemPage *pOldPage;
03178   BtShared *pBt = pCur->pBtree->pBt;
03179 
03180   assert( pCur->eState==CURSOR_VALID );
03181   rc = getAndInitPage(pBt, newPgno, &pNewPage, pCur->pPage);
03182   if( rc ) return rc;
03183   pageIntegrity(pNewPage);
03184   pNewPage->idxParent = pCur->idx;
03185   pOldPage = pCur->pPage;
03186   pOldPage->idxShift = 0;
03187   releasePage(pOldPage);
03188   pCur->pPage = pNewPage;
03189   pCur->idx = 0;
03190   pCur->info.nSize = 0;
03191   if( pNewPage->nCell<1 ){
03192     return SQLITE_CORRUPT_BKPT;
03193   }
03194   return SQLITE_OK;
03195 }
03196 
03197 /*
03198 ** Return true if the page is the virtual root of its table.
03199 **
03200 ** The virtual root page is the root page for most tables.  But
03201 ** for the table rooted on page 1, sometime the real root page
03202 ** is empty except for the right-pointer.  In such cases the
03203 ** virtual root page is the page that the right-pointer of page
03204 ** 1 is pointing to.
03205 */
03206 static int isRootPage(MemPage *pPage){
03207   MemPage *pParent = pPage->pParent;
03208   if( pParent==0 ) return 1;
03209   if( pParent->pgno>1 ) return 0;
03210   if( get2byte(&pParent->aData[pParent->hdrOffset+3])==0 ) return 1;
03211   return 0;
03212 }
03213 
03214 /*
03215 ** Move the cursor up to the parent page.
03216 **
03217 ** pCur->idx is set to the cell index that contains the pointer
03218 ** to the page we are coming from.  If we are coming from the
03219 ** right-most child page then pCur->idx is set to one more than
03220 ** the largest cell index.
03221 */
03222 static void moveToParent(BtCursor *pCur){
03223   MemPage *pParent;
03224   MemPage *pPage;
03225   int idxParent;
03226 
03227   assert( pCur->eState==CURSOR_VALID );
03228   pPage = pCur->pPage;
03229   assert( pPage!=0 );
03230   assert( !isRootPage(pPage) );
03231   pageIntegrity(pPage);
03232   pParent = pPage->pParent;
03233   assert( pParent!=0 );
03234   pageIntegrity(pParent);
03235   idxParent = pPage->idxParent;
03236   sqlite3pager_ref(pParent->aData);
03237   releasePage(pPage);
03238   pCur->pPage = pParent;
03239   pCur->info.nSize = 0;
03240   assert( pParent->idxShift==0 );
03241   pCur->idx = idxParent;
03242 }
03243 
03244 /*
03245 ** Move the cursor to the root page
03246 */
03247 static int moveToRoot(BtCursor *pCur){
03248   MemPage *pRoot;
03249   int rc = SQLITE_OK;
03250   BtShared *pBt = pCur->pBtree->pBt;
03251 
03252   restoreOrClearCursorPosition(pCur, 0);
03253   pRoot = pCur->pPage;
03254   if( pRoot && pRoot->pgno==pCur->pgnoRoot ){
03255     assert( pRoot->isInit );
03256   }else{
03257     if( 
03258       SQLITE_OK!=(rc = getAndInitPage(pBt, pCur->pgnoRoot, &pRoot, 0))
03259     ){
03260       pCur->eState = CURSOR_INVALID;
03261       return rc;
03262     }
03263     releasePage(pCur->pPage);
03264     pageIntegrity(pRoot);
03265     pCur->pPage = pRoot;
03266   }
03267   pCur->idx = 0;
03268   pCur->info.nSize = 0;
03269   if( pRoot->nCell==0 && !pRoot->leaf ){
03270     Pgno subpage;
03271     assert( pRoot->pgno==1 );
03272     subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]);
03273     assert( subpage>0 );
03274     pCur->eState = CURSOR_VALID;
03275     rc = moveToChild(pCur, subpage);
03276   }
03277   pCur->eState = ((pCur->pPage->nCell>0)?CURSOR_VALID:CURSOR_INVALID);
03278   return rc;
03279 }
03280 
03281 /*
03282 ** Move the cursor down to the left-most leaf entry beneath the
03283 ** entry to which it is currently pointing.
03284 **
03285 ** The left-most leaf is the one with the smallest key - the first
03286 ** in ascending order.
03287 */
03288 static int moveToLeftmost(BtCursor *pCur){
03289   Pgno pgno;
03290   int rc;
03291   MemPage *pPage;
03292 
03293   assert( pCur->eState==CURSOR_VALID );
03294   while( !(pPage = pCur->pPage)->leaf ){
03295     assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
03296     pgno = get4byte(findCell(pPage, pCur->idx));
03297     rc = moveToChild(pCur, pgno);
03298     if( rc ) return rc;
03299   }
03300   return SQLITE_OK;
03301 }
03302 
03303 /*
03304 ** Move the cursor down to the right-most leaf entry beneath the
03305 ** page to which it is currently pointing.  Notice the difference
03306 ** between moveToLeftmost() and moveToRightmost().  moveToLeftmost()
03307 ** finds the left-most entry beneath the *entry* whereas moveToRightmost()
03308 ** finds the right-most entry beneath the *page*.
03309 **
03310 ** The right-most entry is the one with the largest key - the last
03311 ** key in ascending order.
03312 */
03313 static int moveToRightmost(BtCursor *pCur){
03314   Pgno pgno;
03315   int rc;
03316   MemPage *pPage;
03317 
03318   assert( pCur->eState==CURSOR_VALID );
03319   while( !(pPage = pCur->pPage)->leaf ){
03320     pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
03321     pCur->idx = pPage->nCell;
03322     rc = moveToChild(pCur, pgno);
03323     if( rc ) return rc;
03324   }
03325   pCur->idx = pPage->nCell - 1;
03326   pCur->info.nSize = 0;
03327   return SQLITE_OK;
03328 }
03329 
03330 /* Move the cursor to the first entry in the table.  Return SQLITE_OK
03331 ** on success.  Set *pRes to 0 if the cursor actually points to something
03332 ** or set *pRes to 1 if the table is empty.
03333 */
03334 int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
03335   int rc;
03336   rc = moveToRoot(pCur);
03337   if( rc ) return rc;
03338   if( pCur->eState==CURSOR_INVALID ){
03339     assert( pCur->pPage->nCell==0 );
03340     *pRes = 1;
03341     return SQLITE_OK;
03342   }
03343   assert( pCur->pPage->nCell>0 );
03344   *pRes = 0;
03345   rc = moveToLeftmost(pCur);
03346   return rc;
03347 }
03348 
03349 /* Move the cursor to the last entry in the table.  Return SQLITE_OK
03350 ** on success.  Set *pRes to 0 if the cursor actually points to something
03351 ** or set *pRes to 1 if the table is empty.
03352 */
03353 int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
03354   int rc;
03355   rc = moveToRoot(pCur);
03356   if( rc ) return rc;
03357   if( CURSOR_INVALID==pCur->eState ){
03358     assert( pCur->pPage->nCell==0 );
03359     *pRes = 1;
03360     return SQLITE_OK;
03361   }
03362   assert( pCur->eState==CURSOR_VALID );
03363   *pRes = 0;
03364   rc = moveToRightmost(pCur);
03365   return rc;
03366 }
03367 
03368 /* Move the cursor so that it points to an entry near pKey/nKey.
03369 ** Return a success code.
03370 **
03371 ** For INTKEY tables, only the nKey parameter is used.  pKey is
03372 ** ignored.  For other tables, nKey is the number of bytes of data
03373 ** in pKey.  The comparison function specified when the cursor was
03374 ** created is used to compare keys.
03375 **
03376 ** If an exact match is not found, then the cursor is always
03377 ** left pointing at a leaf page which would hold the entry if it
03378 ** were present.  The cursor might point to an entry that comes
03379 ** before or after the key.
03380 **
03381 ** The result of comparing the key with the entry to which the
03382 ** cursor is written to *pRes if pRes!=NULL.  The meaning of
03383 ** this value is as follows:
03384 **
03385 **     *pRes<0      The cursor is left pointing at an entry that
03386 **                  is smaller than pKey or if the table is empty
03387 **                  and the cursor is therefore left point to nothing.
03388 **
03389 **     *pRes==0     The cursor is left pointing at an entry that
03390 **                  exactly matches pKey.
03391 **
03392 **     *pRes>0      The cursor is left pointing at an entry that
03393 **                  is larger than pKey.
03394 */
03395 int sqlite3BtreeMoveto(BtCursor *pCur, const void *pKey, i64 nKey, int *pRes){
03396   int rc;
03397   int tryRightmost;
03398   rc = moveToRoot(pCur);
03399   if( rc ) return rc;
03400   assert( pCur->pPage );
03401   assert( pCur->pPage->isInit );
03402   tryRightmost = pCur->pPage->intKey;
03403   if( pCur->eState==CURSOR_INVALID ){
03404     *pRes = -1;
03405     assert( pCur->pPage->nCell==0 );
03406     return SQLITE_OK;
03407   }
03408    for(;;){
03409     int lwr, upr;
03410     Pgno chldPg;
03411     MemPage *pPage = pCur->pPage;
03412     int c = -1;  /* pRes return if table is empty must be -1 */
03413     lwr = 0;
03414     upr = pPage->nCell-1;
03415     if( !pPage->intKey && pKey==0 ){
03416       return SQLITE_CORRUPT_BKPT;
03417     }
03418     pageIntegrity(pPage);
03419     while( lwr<=upr ){
03420       void *pCellKey;
03421       i64 nCellKey;
03422       pCur->idx = (lwr+upr)/2;
03423       pCur->info.nSize = 0;
03424       if( pPage->intKey ){
03425         u8 *pCell;
03426         if( tryRightmost ){
03427           pCur->idx = upr;
03428         }
03429         pCell = findCell(pPage, pCur->idx) + pPage->childPtrSize;
03430         if( pPage->hasData ){
03431           u32 dummy;
03432           pCell += getVarint32(pCell, &dummy);
03433         }
03434         getVarint(pCell, (u64 *)&nCellKey);
03435         if( nCellKey<nKey ){
03436           c = -1;
03437         }else if( nCellKey>nKey ){
03438           c = +1;
03439           tryRightmost = 0;
03440         }else{
03441           c = 0;
03442         }
03443       }else{
03444         int available;
03445         pCellKey = (void *)fetchPayload(pCur, &available, 0);
03446         nCellKey = pCur->info.nKey;
03447         if( available>=nCellKey ){
03448           c = pCur->xCompare(pCur->pArg, nCellKey, pCellKey, nKey, pKey);
03449         }else{
03450           pCellKey = sqliteMallocRaw( nCellKey );
03451           if( pCellKey==0 ) return SQLITE_NOMEM;
03452           rc = sqlite3BtreeKey(pCur, 0, nCellKey, (void *)pCellKey);
03453           c = pCur->xCompare(pCur->pArg, nCellKey, pCellKey, nKey, pKey);
03454           sqliteFree(pCellKey);
03455           if( rc ) return rc;
03456         }
03457       }
03458       if( c==0 ){
03459         if( pPage->leafData && !pPage->leaf ){
03460           lwr = pCur->idx;
03461           upr = lwr - 1;
03462           break;
03463         }else{
03464           if( pRes ) *pRes = 0;
03465           return SQLITE_OK;
03466         }
03467       }
03468       if( c<0 ){
03469         lwr = pCur->idx+1;
03470       }else{
03471         upr = pCur->idx-1;
03472       }
03473     }
03474     assert( lwr==upr+1 );
03475     assert( pPage->isInit );
03476     if( pPage->leaf ){
03477       chldPg = 0;
03478     }else if( lwr>=pPage->nCell ){
03479       chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
03480     }else{
03481       chldPg = get4byte(findCell(pPage, lwr));
03482     }
03483     if( chldPg==0 ){
03484       assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
03485       if( pRes ) *pRes = c;
03486       return SQLITE_OK;
03487     }
03488     pCur->idx = lwr;
03489     pCur->info.nSize = 0;
03490     rc = moveToChild(pCur, chldPg);
03491     if( rc ){
03492       return rc;
03493     }
03494   }
03495   /* NOT REACHED */
03496 }
03497 
03498 /*
03499 ** Return TRUE if the cursor is not pointing at an entry of the table.
03500 **
03501 ** TRUE will be returned after a call to sqlite3BtreeNext() moves
03502 ** past the last entry in the table or sqlite3BtreePrev() moves past
03503 ** the first entry.  TRUE is also returned if the table is empty.
03504 */
03505 int sqlite3BtreeEof(BtCursor *pCur){
03506   /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries
03507   ** have been deleted? This API will need to change to return an error code
03508   ** as well as the boolean result value.
03509   */
03510   return (CURSOR_VALID!=pCur->eState);
03511 }
03512 
03513 /*
03514 ** Advance the cursor to the next entry in the database.  If
03515 ** successful then set *pRes=0.  If the cursor
03516 ** was already pointing to the last entry in the database before
03517 ** this routine was called, then set *pRes=1.
03518 */
03519 int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
03520   int rc;
03521   MemPage *pPage;
03522 
03523 #ifndef SQLITE_OMIT_SHARED_CACHE
03524   rc = restoreOrClearCursorPosition(pCur, 1);
03525   if( rc!=SQLITE_OK ){
03526     return rc;
03527   }
03528   if( pCur->skip>0 ){
03529     pCur->skip = 0;
03530     *pRes = 0;
03531     return SQLITE_OK;
03532   }
03533   pCur->skip = 0;
03534 #endif 
03535 
03536   assert( pRes!=0 );
03537   pPage = pCur->pPage;
03538   if( CURSOR_INVALID==pCur->eState ){
03539     *pRes = 1;
03540     return SQLITE_OK;
03541   }
03542   assert( pPage->isInit );
03543   assert( pCur->idx<pPage->nCell );
03544 
03545   pCur->idx++;
03546   pCur->info.nSize = 0;
03547   if( pCur->idx>=pPage->nCell ){
03548     if( !pPage->leaf ){
03549       rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8]));
03550       if( rc ) return rc;
03551       rc = moveToLeftmost(pCur);
03552       *pRes = 0;
03553       return rc;
03554     }
03555     do{
03556       if( isRootPage(pPage) ){
03557         *pRes = 1;
03558         pCur->eState = CURSOR_INVALID;
03559         return SQLITE_OK;
03560       }
03561       moveToParent(pCur);
03562       pPage = pCur->pPage;
03563     }while( pCur->idx>=pPage->nCell );
03564     *pRes = 0;
03565     if( pPage->leafData ){
03566       rc = sqlite3BtreeNext(pCur, pRes);
03567     }else{
03568       rc = SQLITE_OK;
03569     }
03570     return rc;
03571   }
03572   *pRes = 0;
03573   if( pPage->leaf ){
03574     return SQLITE_OK;
03575   }
03576   rc = moveToLeftmost(pCur);
03577   return rc;
03578 }
03579 
03580 /*
03581 ** Step the cursor to the back to the previous entry in the database.  If
03582 ** successful then set *pRes=0.  If the cursor
03583 ** was already pointing to the first entry in the database before
03584 ** this routine was called, then set *pRes=1.
03585 */
03586 int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){
03587   int rc;
03588   Pgno pgno;
03589   MemPage *pPage;
03590 
03591 #ifndef SQLITE_OMIT_SHARED_CACHE
03592   rc = restoreOrClearCursorPosition(pCur, 1);
03593   if( rc!=SQLITE_OK ){
03594     return rc;
03595   }
03596   if( pCur->skip<0 ){
03597     pCur->skip = 0;
03598     *pRes = 0;
03599     return SQLITE_OK;
03600   }
03601   pCur->skip = 0;
03602 #endif
03603 
03604   if( CURSOR_INVALID==pCur->eState ){
03605     *pRes = 1;
03606     return SQLITE_OK;
03607   }
03608 
03609   pPage = pCur->pPage;
03610   assert( pPage->isInit );
03611   assert( pCur->idx>=0 );
03612   if( !pPage->leaf ){
03613     pgno = get4byte( findCell(pPage, pCur->idx) );
03614     rc = moveToChild(pCur, pgno);
03615     if( rc ) return rc;
03616     rc = moveToRightmost(pCur);
03617   }else{
03618     while( pCur->idx==0 ){
03619       if( isRootPage(pPage) ){
03620         pCur->eState = CURSOR_INVALID;
03621         *pRes = 1;
03622         return SQLITE_OK;
03623       }
03624       moveToParent(pCur);
03625       pPage = pCur->pPage;
03626     }
03627     pCur->idx--;
03628     pCur->info.nSize = 0;
03629     if( pPage->leafData && !pPage->leaf ){
03630       rc = sqlite3BtreePrevious(pCur, pRes);
03631     }else{
03632       rc = SQLITE_OK;
03633     }
03634   }
03635   *pRes = 0;
03636   return rc;
03637 }
03638 
03639 /*
03640 ** Allocate a new page from the database file.
03641 **
03642 ** The new page is marked as dirty.  (In other words, sqlite3pager_write()
03643 ** has already been called on the new page.)  The new page has also
03644 ** been referenced and the calling routine is responsible for calling
03645 ** sqlite3pager_unref() on the new page when it is done.
03646 **
03647 ** SQLITE_OK is returned on success.  Any other return value indicates
03648 ** an error.  *ppPage and *pPgno are undefined in the event of an error.
03649 ** Do not invoke sqlite3pager_unref() on *ppPage if an error is returned.
03650 **
03651 ** If the "nearby" parameter is not 0, then a (feeble) effort is made to 
03652 ** locate a page close to the page number "nearby".  This can be used in an
03653 ** attempt to keep related pages close to each other in the database file,
03654 ** which in turn can make database access faster.
03655 **
03656 ** If the "exact" parameter is not 0, and the page-number nearby exists 
03657 ** anywhere on the free-list, then it is guarenteed to be returned. This
03658 ** is only used by auto-vacuum databases when allocating a new table.
03659 */
03660 static int allocatePage(
03661   BtShared *pBt, 
03662   MemPage **ppPage, 
03663   Pgno *pPgno, 
03664   Pgno nearby,
03665   u8 exact
03666 ){
03667   MemPage *pPage1;
03668   int rc;
03669   int n;     /* Number of pages on the freelist */
03670   int k;     /* Number of leaves on the trunk of the freelist */
03671 
03672   pPage1 = pBt->pPage1;
03673   n = get4byte(&pPage1->aData[36]);
03674   if( n>0 ){
03675     /* There are pages on the freelist.  Reuse one of those pages. */
03676     MemPage *pTrunk = 0;
03677     Pgno iTrunk;
03678     MemPage *pPrevTrunk = 0;
03679     u8 searchList = 0; /* If the free-list must be searched for 'nearby' */
03680     
03681     /* If the 'exact' parameter was true and a query of the pointer-map
03682     ** shows that the page 'nearby' is somewhere on the free-list, then
03683     ** the entire-list will be searched for that page.
03684     */
03685 #ifndef SQLITE_OMIT_AUTOVACUUM
03686     if( exact ){
03687       u8 eType;
03688       assert( nearby>0 );
03689       assert( pBt->autoVacuum );
03690       rc = ptrmapGet(pBt, nearby, &eType, 0);
03691       if( rc ) return rc;
03692       if( eType==PTRMAP_FREEPAGE ){
03693         searchList = 1;
03694       }
03695       *pPgno = nearby;
03696     }
03697 #endif
03698 
03699     /* Decrement the free-list count by 1. Set iTrunk to the index of the
03700     ** first free-list trunk page. iPrevTrunk is initially 1.
03701     */
03702     rc = sqlite3pager_write(pPage1->aData);
03703     if( rc ) return rc;
03704     put4byte(&pPage1->aData[36], n-1);
03705 
03706     /* The code within this loop is run only once if the 'searchList' variable
03707     ** is not true. Otherwise, it runs once for each trunk-page on the
03708     ** free-list until the page 'nearby' is located.
03709     */
03710     do {
03711       pPrevTrunk = pTrunk;
03712       if( pPrevTrunk ){
03713         iTrunk = get4byte(&pPrevTrunk->aData[0]);
03714       }else{
03715         iTrunk = get4byte(&pPage1->aData[32]);
03716       }
03717       rc = getPage(pBt, iTrunk, &pTrunk);
03718       if( rc ){
03719         releasePage(pPrevTrunk);
03720         return rc;
03721       }
03722 
03723       /* TODO: This should move to after the loop? */
03724       rc = sqlite3pager_write(pTrunk->aData);
03725       if( rc ){
03726         releasePage(pTrunk);
03727         releasePage(pPrevTrunk);
03728         return rc;
03729       }
03730 
03731       k = get4byte(&pTrunk->aData[4]);
03732       if( k==0 && !searchList ){
03733         /* The trunk has no leaves and the list is not being searched. 
03734         ** So extract the trunk page itself and use it as the newly 
03735         ** allocated page */
03736         assert( pPrevTrunk==0 );
03737         *pPgno = iTrunk;
03738         memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
03739         *ppPage = pTrunk;
03740         pTrunk = 0;
03741         TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
03742       }else if( k>pBt->usableSize/4 - 8 ){
03743         /* Value of k is out of range.  Database corruption */
03744         return SQLITE_CORRUPT_BKPT;
03745 #ifndef SQLITE_OMIT_AUTOVACUUM
03746       }else if( searchList && nearby==iTrunk ){
03747         /* The list is being searched and this trunk page is the page
03748         ** to allocate, regardless of whether it has leaves.
03749         */
03750         assert( *pPgno==iTrunk );
03751         *ppPage = pTrunk;
03752         searchList = 0;
03753         if( k==0 ){
03754           if( !pPrevTrunk ){
03755             memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
03756           }else{
03757             memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4);
03758           }
03759         }else{
03760           /* The trunk page is required by the caller but it contains 
03761           ** pointers to free-list leaves. The first leaf becomes a trunk
03762           ** page in this case.
03763           */
03764           MemPage *pNewTrunk;
03765           Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
03766           rc = getPage(pBt, iNewTrunk, &pNewTrunk);
03767           if( rc!=SQLITE_OK ){
03768             releasePage(pTrunk);
03769             releasePage(pPrevTrunk);
03770             return rc;
03771           }
03772           rc = sqlite3pager_write(pNewTrunk->aData);
03773           if( rc!=SQLITE_OK ){
03774             releasePage(pNewTrunk);
03775             releasePage(pTrunk);
03776             releasePage(pPrevTrunk);
03777             return rc;
03778           }
03779           memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4);
03780           put4byte(&pNewTrunk->aData[4], k-1);
03781           memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4);
03782           if( !pPrevTrunk ){
03783             put4byte(&pPage1->aData[32], iNewTrunk);
03784           }else{
03785             put4byte(&pPrevTrunk->aData[0], iNewTrunk);
03786           }
03787           releasePage(pNewTrunk);
03788         }
03789         pTrunk = 0;
03790         TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
03791 #endif
03792       }else{
03793         /* Extract a leaf from the trunk */
03794         int closest;
03795         Pgno iPage;
03796         unsigned char *aData = pTrunk->aData;
03797         if( nearby>0 ){
03798           int i, dist;
03799           closest = 0;
03800           dist = get4byte(&aData[8]) - nearby;
03801           if( dist<0 ) dist = -dist;
03802           for(i=1; i<k; i++){
03803             int d2 = get4byte(&aData[8+i*4]) - nearby;
03804             if( d2<0 ) d2 = -d2;
03805             if( d2<dist ){
03806               closest = i;
03807               dist = d2;
03808             }
03809           }
03810         }else{
03811           closest = 0;
03812         }
03813 
03814         iPage = get4byte(&aData[8+closest*4]);
03815         if( !searchList || iPage==nearby ){
03816           *pPgno = iPage;
03817           if( *pPgno>sqlite3pager_pagecount(pBt->pPager) ){
03818             /* Free page off the end of the file */
03819             return SQLITE_CORRUPT_BKPT;
03820           }
03821           TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
03822                  ": %d more free pages\n",
03823                  *pPgno, closest+1, k, pTrunk->pgno, n-1));
03824           if( closest<k-1 ){
03825             memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
03826           }
03827           put4byte(&aData[4], k-1);
03828           rc = getPage(pBt, *pPgno, ppPage);
03829           if( rc==SQLITE_OK ){
03830             sqlite3pager_dont_rollback((*ppPage)->aData);
03831             rc = sqlite3pager_write((*ppPage)->aData);
03832             if( rc!=SQLITE_OK ){
03833               releasePage(*ppPage);
03834             }
03835           }
03836           searchList = 0;
03837         }
03838       }
03839       releasePage(pPrevTrunk);
03840     }while( searchList );
03841     releasePage(pTrunk);
03842   }else{
03843     /* There are no pages on the freelist, so create a new page at the
03844     ** end of the file */
03845     *pPgno = sqlite3pager_pagecount(pBt->pPager) + 1;
03846 
03847 #ifndef SQLITE_OMIT_AUTOVACUUM
03848     if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, *pPgno) ){
03849       /* If *pPgno refers to a pointer-map page, allocate two new pages
03850       ** at the end of the file instead of one. The first allocated page
03851       ** becomes a new pointer-map page, the second is used by the caller.
03852       */
03853       TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", *pPgno));
03854       assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
03855       (*pPgno)++;
03856     }
03857 #endif
03858 
03859     assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
03860     rc = getPage(pBt, *pPgno, ppPage);
03861     if( rc ) return rc;
03862     rc = sqlite3pager_write((*ppPage)->aData);
03863     if( rc!=SQLITE_OK ){
03864       releasePage(*ppPage);
03865     }
03866     TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
03867   }
03868 
03869   assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
03870   return rc;
03871 }
03872 
03873 /*
03874 ** Add a page of the database file to the freelist.
03875 **
03876 ** sqlite3pager_unref() is NOT called for pPage.
03877 */
03878 static int freePage(MemPage *pPage){
03879   BtShared *pBt = pPage->pBt;
03880   MemPage *pPage1 = pBt->pPage1;
03881   int rc, n, k;
03882 
03883   /* Prepare the page for freeing */
03884   assert( pPage->pgno>1 );
03885   pPage->isInit = 0;
03886   releasePage(pPage->pParent);
03887   pPage->pParent = 0;
03888 
03889   /* Increment the free page count on pPage1 */
03890   rc = sqlite3pager_write(pPage1->aData);
03891   if( rc ) return rc;
03892   n = get4byte(&pPage1->aData[36]);
03893   put4byte(&pPage1->aData[36], n+1);
03894 
03895 #ifdef SQLITE_SECURE_DELETE
03896   /* If the SQLITE_SECURE_DELETE compile-time option is enabled, then
03897   ** always fully overwrite deleted information with zeros.
03898   */
03899   rc = sqlite3pager_write(pPage->aData);
03900   if( rc ) return rc;
03901   memset(pPage->aData, 0, pPage->pBt->pageSize);
03902 #endif
03903 
03904 #ifndef SQLITE_OMIT_AUTOVACUUM
03905   /* If the database supports auto-vacuum, write an entry in the pointer-map
03906   ** to indicate that the page is free.
03907   */
03908   if( pBt->autoVacuum ){
03909     rc = ptrmapPut(pBt, pPage->pgno, PTRMAP_FREEPAGE, 0);
03910     if( rc ) return rc;
03911   }
03912 #endif
03913 
03914   if( n==0 ){
03915     /* This is the first free page */
03916     rc = sqlite3pager_write(pPage->aData);
03917     if( rc ) return rc;
03918     memset(pPage->aData, 0, 8);
03919     put4byte(&pPage1->aData[32], pPage->pgno);
03920     TRACE(("FREE-PAGE: %d first\n", pPage->pgno));
03921   }else{
03922     /* Other free pages already exist.  Retrive the first trunk page
03923     ** of the freelist and find out how many leaves it has. */
03924     MemPage *pTrunk;
03925     rc = getPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk);
03926     if( rc ) return rc;
03927     k = get4byte(&pTrunk->aData[4]);
03928     if( k>=pBt->usableSize/4 - 8 ){
03929       /* The trunk is full.  Turn the page being freed into a new
03930       ** trunk page with no leaves. */
03931       rc = sqlite3pager_write(pPage->aData);
03932       if( rc ) return rc;
03933       put4byte(pPage->aData, pTrunk->pgno);
03934       put4byte(&pPage->aData[4], 0);
03935       put4byte(&pPage1->aData[32], pPage->pgno);
03936       TRACE(("FREE-PAGE: %d new trunk page replacing %d\n",
03937               pPage->pgno, pTrunk->pgno));
03938     }else{
03939       /* Add the newly freed page as a leaf on the current trunk */
03940       rc = sqlite3pager_write(pTrunk->aData);
03941       if( rc ) return rc;
03942       put4byte(&pTrunk->aData[4], k+1);
03943       put4byte(&pTrunk->aData[8+k*4], pPage->pgno);
03944 #ifndef SQLITE_SECURE_DELETE
03945       sqlite3pager_dont_write(pBt->pPager, pPage->pgno);
03946 #endif
03947       TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno));
03948     }
03949     releasePage(pTrunk);
03950   }
03951   return rc;
03952 }
03953 
03954 /*
03955 ** Free any overflow pages associated with the given Cell.
03956 */
03957 static int clearCell(MemPage *pPage, unsigned char *pCell){
03958   BtShared *pBt = pPage->pBt;
03959   CellInfo info;
03960   Pgno ovflPgno;
03961   int rc;
03962 
03963   parseCellPtr(pPage, pCell, &info);
03964   if( info.iOverflow==0 ){
03965     return SQLITE_OK;  /* No overflow pages. Return without doing anything */
03966   }
03967   ovflPgno = get4byte(&pCell[info.iOverflow]);
03968   while( ovflPgno!=0 ){
03969     MemPage *pOvfl;
03970     if( ovflPgno>sqlite3pager_pagecount(pBt->pPager) ){
03971       return SQLITE_CORRUPT_BKPT;
03972     }
03973     rc = getPage(pBt, ovflPgno, &pOvfl);
03974     if( rc ) return rc;
03975     ovflPgno = get4byte(pOvfl->aData);
03976     rc = freePage(pOvfl);
03977     sqlite3pager_unref(pOvfl->aData);
03978     if( rc ) return rc;
03979   }
03980   return SQLITE_OK;
03981 }
03982 
03983 /*
03984 ** Create the byte sequence used to represent a cell on page pPage
03985 ** and write that byte sequence into pCell[].  Overflow pages are
03986 ** allocated and filled in as necessary.  The calling procedure
03987 ** is responsible for making sure sufficient space has been allocated
03988 ** for pCell[].
03989 **
03990 ** Note that pCell does not necessary need to point to the pPage->aData
03991 ** area.  pCell might point to some temporary storage.  The cell will
03992 ** be constructed in this temporary area then copied into pPage->aData
03993 ** later.
03994 */
03995 static int fillInCell(
03996   MemPage *pPage,                /* The page that contains the cell */
03997   unsigned char *pCell,          /* Complete text of the cell */
03998   const void *pKey, i64 nKey,    /* The key */
03999   const void *pData,int nData,   /* The data */
04000   int *pnSize                    /* Write cell size here */
04001 ){
04002   int nPayload;
04003   const u8 *pSrc;
04004   int nSrc, n, rc;
04005   int spaceLeft;
04006   MemPage *pOvfl = 0;
04007   MemPage *pToRelease = 0;
04008   unsigned char *pPrior;
04009   unsigned char *pPayload;
04010   BtShared *pBt = pPage->pBt;
04011   Pgno pgnoOvfl = 0;
04012   int nHeader;
04013   CellInfo info;
04014 
04015   /* Fill in the header. */
04016   nHeader = 0;
04017   if( !pPage->leaf ){
04018     nHeader += 4;
04019   }
04020   if( pPage->hasData ){
04021     nHeader += putVarint(&pCell[nHeader], nData);
04022   }else{
04023     nData = 0;
04024   }
04025   nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey);
04026   parseCellPtr(pPage, pCell, &info);
04027   assert( info.nHeader==nHeader );
04028   assert( info.nKey==nKey );
04029   assert( info.nData==nData );
04030   
04031   /* Fill in the payload */
04032   nPayload = nData;
04033   if( pPage->intKey ){
04034     pSrc = pData;
04035     nSrc = nData;
04036     nData = 0;
04037   }else{
04038     nPayload += nKey;
04039     pSrc = pKey;
04040     nSrc = nKey;
04041   }
04042   *pnSize = info.nSize;
04043   spaceLeft = info.nLocal;
04044   pPayload = &pCell[nHeader];
04045   pPrior = &pCell[info.iOverflow];
04046 
04047   while( nPayload>0 ){
04048     if( spaceLeft==0 ){
04049 #ifndef SQLITE_OMIT_AUTOVACUUM
04050       Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */
04051 #endif
04052       rc = allocatePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, 0);
04053 #ifndef SQLITE_OMIT_AUTOVACUUM
04054       /* If the database supports auto-vacuum, and the second or subsequent
04055       ** overflow page is being allocated, add an entry to the pointer-map
04056       ** for that page now. The entry for the first overflow page will be
04057       ** added later, by the insertCell() routine.
04058       */
04059       if( pBt->autoVacuum && pgnoPtrmap!=0 && rc==SQLITE_OK ){
04060         rc = ptrmapPut(pBt, pgnoOvfl, PTRMAP_OVERFLOW2, pgnoPtrmap);
04061       }
04062 #endif
04063       if( rc ){
04064         releasePage(pToRelease);
04065         /* clearCell(pPage, pCell); */
04066         return rc;
04067       }
04068       put4byte(pPrior, pgnoOvfl);
04069       releasePage(pToRelease);
04070       pToRelease = pOvfl;
04071       pPrior = pOvfl->aData;
04072       put4byte(pPrior, 0);
04073       pPayload = &pOvfl->aData[4];
04074       spaceLeft = pBt->usableSize - 4;
04075     }
04076     n = nPayload;
04077     if( n>spaceLeft ) n = spaceLeft;
04078     if( n>nSrc ) n = nSrc;
04079     assert( pSrc );
04080     memcpy(pPayload, pSrc, n);
04081     nPayload -= n;
04082     pPayload += n;
04083     pSrc += n;
04084     nSrc -= n;
04085     spaceLeft -= n;
04086     if( nSrc==0 ){
04087       nSrc = nData;
04088       pSrc = pData;
04089     }
04090   }
04091   releasePage(pToRelease);
04092   return SQLITE_OK;
04093 }
04094 
04095 /*
04096 ** Change the MemPage.pParent pointer on the page whose number is
04097 ** given in the second argument so that MemPage.pParent holds the
04098 ** pointer in the third argument.
04099 */
04100 static int reparentPage(BtShared *pBt, Pgno pgno, MemPage *pNewParent, int idx){
04101   MemPage *pThis;
04102   unsigned char *aData;
04103 
04104   assert( pNewParent!=0 );
04105   if( pgno==0 ) return SQLITE_OK;
04106   assert( pBt->pPager!=0 );
04107   aData = sqlite3pager_lookup(pBt->pPager, pgno);
04108   if( aData ){
04109     pThis = (MemPage*)&aData[pBt->pageSize];
04110     assert( pThis->aData==aData );
04111     if( pThis->isInit ){
04112       if( pThis->pParent!=pNewParent ){
04113         if( pThis->pParent ) sqlite3pager_unref(pThis->pParent->aData);
04114         pThis->pParent = pNewParent;
04115         sqlite3pager_ref(pNewParent->aData);
04116       }
04117       pThis->idxParent = idx;
04118     }
04119     sqlite3pager_unref(aData);
04120   }
04121 
04122 #ifndef SQLITE_OMIT_AUTOVACUUM
04123   if( pBt->autoVacuum ){
04124     return ptrmapPut(pBt, pgno, PTRMAP_BTREE, pNewParent->pgno);
04125   }
04126 #endif
04127   return SQLITE_OK;
04128 }
04129 
04130 
04131 
04132 /*
04133 ** Change the pParent pointer of all children of pPage to point back
04134 ** to pPage.
04135 **
04136 ** In other words, for every child of pPage, invoke reparentPage()
04137 ** to make sure that each child knows that pPage is its parent.
04138 **
04139 ** This routine gets called after you memcpy() one page into
04140 ** another.
04141 */
04142 static int reparentChildPages(MemPage *pPage){
04143   int i;
04144   BtShared *pBt = pPage->pBt;
04145   int rc = SQLITE_OK;
04146 
04147   if( pPage->leaf ) return SQLITE_OK;
04148 
04149   for(i=0; i<pPage->nCell; i++){
04150     u8 *pCell = findCell(pPage, i);
04151     if( !pPage->leaf ){
04152       rc = reparentPage(pBt, get4byte(pCell), pPage, i);
04153       if( rc!=SQLITE_OK ) return rc;
04154     }
04155   }
04156   if( !pPage->leaf ){
04157     rc = reparentPage(pBt, get4byte(&pPage->aData[pPage->hdrOffset+8]), 
04158        pPage, i);
04159     pPage->idxShift = 0;
04160   }
04161   return rc;
04162 }
04163 
04164 /*
04165 ** Remove the i-th cell from pPage.  This routine effects pPage only.
04166 ** The cell content is not freed or deallocated.  It is assumed that
04167 ** the cell content has been copied someplace else.  This routine just
04168 ** removes the reference to the cell from pPage.
04169 **
04170 ** "sz" must be the number of bytes in the cell.
04171 */
04172 static void dropCell(MemPage *pPage, int idx, int sz){
04173   int i;          /* Loop counter */
04174   int pc;         /* Offset to cell content of cell being deleted */
04175   u8 *data;       /* pPage->aData */
04176   u8 *ptr;        /* Used to move bytes around within data[] */
04177 
04178   assert( idx>=0 && idx<pPage->nCell );
04179   assert( sz==cellSize(pPage, idx) );
04180   assert( sqlite3pager_iswriteable(pPage->aData) );
04181   data = pPage->aData;
04182   ptr = &data[pPage->cellOffset + 2*idx];
04183   pc = get2byte(ptr);
04184   assert( pc>10 && pc+sz<=pPage->pBt->usableSize );
04185   freeSpace(pPage, pc, sz);
04186   for(i=idx+1; i<pPage->nCell; i++, ptr+=2){
04187     ptr[0] = ptr[2];
04188     ptr[1] = ptr[3];
04189   }
04190   pPage->nCell--;
04191   put2byte(&data[pPage->hdrOffset+3], pPage->nCell);
04192   pPage->nFree += 2;
04193   pPage->idxShift = 1;
04194 }
04195 
04196 /*
04197 ** Insert a new cell on pPage at cell index "i".  pCell points to the
04198 ** content of the cell.
04199 **
04200 ** If the cell content will fit on the page, then put it there.  If it
04201 ** will not fit, then make a copy of the cell content into pTemp if
04202 ** pTemp is not null.  Regardless of pTemp, allocate a new entry
04203 ** in pPage->aOvfl[] and make it point to the cell content (either
04204 ** in pTemp or the original pCell) and also record its index. 
04205 ** Allocating a new entry in pPage->aCell[] implies that 
04206 ** pPage->nOverflow is incremented.
04207 **
04208 ** If nSkip is non-zero, then do not copy the first nSkip bytes of the
04209 ** cell. The caller will overwrite them after this function returns. If
04210 ** nSkip is non-zero, then pCell may not point to an invalid memory location 
04211 ** (but pCell+nSkip is always valid).
04212 */
04213 static int insertCell(
04214   MemPage *pPage,   /* Page into which we are copying */
04215   int i,            /* New cell becomes the i-th cell of the page */
04216   u8 *pCell,        /* Content of the new cell */
04217   int sz,           /* Bytes of content in pCell */
04218   u8 *pTemp,        /* Temp storage space for pCell, if needed */
04219   u8 nSkip          /* Do not write the first nSkip bytes of the cell */
04220 ){
04221   int idx;          /* Where to write new cell content in data[] */
04222   int j;            /* Loop counter */
04223   int top;          /* First byte of content for any cell in data[] */
04224   int end;          /* First byte past the last cell pointer in data[] */
04225   int ins;          /* Index in data[] where new cell pointer is inserted */
04226   int hdr;          /* Offset into data[] of the page header */
04227   int cellOffset;   /* Address of first cell pointer in data[] */
04228   u8 *data;         /* The content of the whole page */
04229   u8 *ptr;          /* Used for moving information around in data[] */
04230 
04231   assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
04232   assert( sz==cellSizePtr(pPage, pCell) );
04233   assert( sqlite3pager_iswriteable(pPage->aData) );
04234   if( pPage->nOverflow || sz+2>pPage->nFree ){
04235     if( pTemp ){
04236       memcpy(pTemp+nSkip, pCell+nSkip, sz-nSkip);
04237       pCell = pTemp;
04238     }
04239     j = pPage->nOverflow++;
04240     assert( j<sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0]) );
04241     pPage->aOvfl[j].pCell = pCell;
04242     pPage->aOvfl[j].idx = i;
04243     pPage->nFree = 0;
04244   }else{
04245     data = pPage->aData;
04246     hdr = pPage->hdrOffset;
04247     top = get2byte(&data[hdr+5]);
04248     cellOffset = pPage->cellOffset;
04249     end = cellOffset + 2*pPage->nCell + 2;
04250     ins = cellOffset + 2*i;
04251     if( end > top - sz ){
04252       int rc = defragmentPage(pPage);
04253       if( rc!=SQLITE_OK ) return rc;
04254       top = get2byte(&data[hdr+5]);
04255       assert( end + sz <= top );
04256     }
04257     idx = allocateSpace(pPage, sz);
04258     assert( idx>0 );
04259     assert( end <= get2byte(&data[hdr+5]) );
04260     pPage->nCell++;
04261     pPage->nFree -= 2;
04262     memcpy(&data[idx+nSkip], pCell+nSkip, sz-nSkip);
04263     for(j=end-2, ptr=&data[j]; j>ins; j-=2, ptr-=2){
04264       ptr[0] = ptr[-2];
04265       ptr[1] = ptr[-1];
04266     }
04267     put2byte(&data[ins], idx);
04268     put2byte(&data[hdr+3], pPage->nCell);
04269     pPage->idxShift = 1;
04270     pageIntegrity(pPage);
04271 #ifndef SQLITE_OMIT_AUTOVACUUM
04272     if( pPage->pBt->autoVacuum ){
04273       /* The cell may contain a pointer to an overflow page. If so, write
04274       ** the entry for the overflow page into the pointer map.
04275       */
04276       CellInfo info;
04277       parseCellPtr(pPage, pCell, &info);
04278       if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){
04279         Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
04280         int rc = ptrmapPut(pPage->pBt, pgnoOvfl, PTRMAP_OVERFLOW1, pPage->pgno);
04281         if( rc!=SQLITE_OK ) return rc;
04282       }
04283     }
04284 #endif
04285   }
04286 
04287   return SQLITE_OK;
04288 }
04289 
04290 /*
04291 ** Add a list of cells to a page.  The page should be initially empty.
04292 ** The cells are guaranteed to fit on the page.
04293 */
04294 static void assemblePage(
04295   MemPage *pPage,   /* The page to be assemblied */
04296   int nCell,        /* The number of cells to add to this page */
04297   u8 **apCell,      /* Pointers to cell bodies */
04298   int *aSize        /* Sizes of the cells */
04299 ){
04300   int i;            /* Loop counter */
04301   int totalSize;    /* Total size of all cells */
04302   int hdr;          /* Index of page header */
04303   int cellptr;      /* Address of next cell pointer */
04304   int cellbody;     /* Address of next cell body */
04305   u8 *data;         /* Data for the page */
04306 
04307   assert( pPage->nOverflow==0 );
04308   totalSize = 0;
04309   for(i=0; i<nCell; i++){
04310     totalSize += aSize[i];
04311   }
04312   assert( totalSize+2*nCell<=pPage->nFree );
04313   assert( pPage->nCell==0 );
04314   cellptr = pPage->cellOffset;
04315   data = pPage->aData;
04316   hdr = pPage->hdrOffset;
04317   put2byte(&data[hdr+3], nCell);
04318   if( nCell ){
04319     cellbody = allocateSpace(pPage, totalSize);
04320     assert( cellbody>0 );
04321     assert( pPage->nFree >= 2*nCell );
04322     pPage->nFree -= 2*nCell;
04323     for(i=0; i<nCell; i++){
04324       put2byte(&data[cellptr], cellbody);
04325       memcpy(&data[cellbody], apCell[i], aSize[i]);
04326       cellptr += 2;
04327       cellbody += aSize[i];
04328     }
04329     assert( cellbody==pPage->pBt->usableSize );
04330   }
04331   pPage->nCell = nCell;
04332 }
04333 
04334 /*
04335 ** The following parameters determine how many adjacent pages get involved
04336 ** in a balancing operation.  NN is the number of neighbors on either side
04337 ** of the page that participate in the balancing operation.  NB is the
04338 ** total number of pages that participate, including the target page and
04339 ** NN neighbors on either side.
04340 **
04341 ** The minimum value of NN is 1 (of course).  Increasing NN above 1
04342 ** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance
04343 ** in exchange for a larger degradation in INSERT and UPDATE performance.
04344 ** The value of NN appears to give the best results overall.
04345 */
04346 #define NN 1             /* Number of neighbors on either side of pPage */
04347 #define NB (NN*2+1)      /* Total pages involved in the balance */
04348 
04349 /* Forward reference */
04350 static int balance(MemPage*, int);
04351 
04352 #ifndef SQLITE_OMIT_QUICKBALANCE
04353 /*
04354 ** This version of balance() handles the common special case where
04355 ** a new entry is being inserted on the extreme right-end of the
04356 ** tree, in other words, when the new entry will become the largest
04357 ** entry in the tree.
04358 **
04359 ** Instead of trying balance the 3 right-most leaf pages, just add
04360 ** a new page to the right-hand side and put the one new entry in
04361 ** that page.  This leaves the right side of the tree somewhat
04362 ** unbalanced.  But odds are that we will be inserting new entries
04363 ** at the end soon afterwards so the nearly empty page will quickly
04364 ** fill up.  On average.
04365 **
04366 ** pPage is the leaf page which is the right-most page in the tree.
04367 ** pParent is its parent.  pPage must have a single overflow entry
04368 ** which is also the right-most entry on the page.
04369 */
04370 static int balance_quick(MemPage *pPage, MemPage *pParent){
04371   int rc;
04372   MemPage *pNew;
04373   Pgno pgnoNew;
04374   u8 *pCell;
04375   int szCell;
04376   CellInfo info;
04377   BtShared *pBt = pPage->pBt;
04378   int parentIdx = pParent->nCell;   /* pParent new divider cell index */
04379   int parentSize;                   /* Size of new divider cell */
04380   u8 parentCell[64];                /* Space for the new divider cell */
04381 
04382   /* Allocate a new page. Insert the overflow cell from pPage
04383   ** into it. Then remove the overflow cell from pPage.
04384   */
04385   rc = allocatePage(pBt, &pNew, &pgnoNew, 0, 0);
04386   if( rc!=SQLITE_OK ){
04387     return rc;
04388   }
04389   pCell = pPage->aOvfl[0].pCell;
04390   szCell = cellSizePtr(pPage, pCell);
04391   zeroPage(pNew, pPage->aData[0]);
04392   assemblePage(pNew, 1, &pCell, &szCell);
04393   pPage->nOverflow = 0;
04394 
04395   /* Set the parent of the newly allocated page to pParent. */
04396   pNew->pParent = pParent;
04397   sqlite3pager_ref(pParent->aData);
04398 
04399   /* pPage is currently the right-child of pParent. Change this
04400   ** so that the right-child is the new page allocated above and
04401   ** pPage is the next-to-right child. 
04402   */
04403   assert( pPage->nCell>0 );
04404   parseCellPtr(pPage, findCell(pPage, pPage->nCell-1), &info);
04405   rc = fillInCell(pParent, parentCell, 0, info.nKey, 0, 0, &parentSize);
04406   if( rc!=SQLITE_OK ){
04407     return rc;
04408   }
04409   assert( parentSize<64 );
04410   rc = insertCell(pParent, parentIdx, parentCell, parentSize, 0, 4);
04411   if( rc!=SQLITE_OK ){
04412     return rc;
04413   }
04414   put4byte(findOverflowCell(pParent,parentIdx), pPage->pgno);
04415   put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew);
04416 
04417 #ifndef SQLITE_OMIT_AUTOVACUUM
04418   /* If this is an auto-vacuum database, update the pointer map
04419   ** with entries for the new page, and any pointer from the 
04420   ** cell on the page to an overflow page.
04421   */
04422   if( pBt->autoVacuum ){
04423     rc = ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno);
04424     if( rc!=SQLITE_OK ){
04425       return rc;
04426     }
04427     rc = ptrmapPutOvfl(pNew, 0);
04428     if( rc!=SQLITE_OK ){
04429       return rc;
04430     }
04431   }
04432 #endif
04433 
04434   /* Release the reference to the new page and balance the parent page,
04435   ** in case the divider cell inserted caused it to become overfull.
04436   */
04437   releasePage(pNew);
04438   return balance(pParent, 0);
04439 }
04440 #endif /* SQLITE_OMIT_QUICKBALANCE */
04441 
04442 /*
04443 ** The ISAUTOVACUUM macro is used within balance_nonroot() to determine
04444 ** if the database supports auto-vacuum or not. Because it is used
04445 ** within an expression that is an argument to another macro 
04446 ** (sqliteMallocRaw), it is not possible to use conditional compilation.
04447 ** So, this macro is defined instead.
04448 */
04449 #ifndef SQLITE_OMIT_AUTOVACUUM
04450 #define ISAUTOVACUUM (pBt->autoVacuum)
04451 #else
04452 #define ISAUTOVACUUM 0
04453 #endif
04454 
04455 /*
04456 ** This routine redistributes Cells on pPage and up to NN*2 siblings
04457 ** of pPage so that all pages have about the same amount of free space.
04458 ** Usually NN siblings on either side of pPage is used in the balancing,
04459 ** though more siblings might come from one side if pPage is the first
04460 ** or last child of its parent.  If pPage has fewer than 2*NN siblings
04461 ** (something which can only happen if pPage is the root page or a 
04462 ** child of root) then all available siblings participate in the balancing.
04463 **
04464 ** The number of siblings of pPage might be increased or decreased by one or
04465 ** two in an effort to keep pages nearly full but not over full. The root page
04466 ** is special and is allowed to be nearly empty. If pPage is 
04467 ** the root page, then the depth of the tree might be increased
04468 ** or decreased by one, as necessary, to keep the root page from being
04469 ** overfull or completely empty.
04470 **
04471 ** Note that when this routine is called, some of the Cells on pPage
04472 ** might not actually be stored in pPage->aData[].  This can happen
04473 ** if the page is overfull.  Part of the job of this routine is to
04474 ** make sure all Cells for pPage once again fit in pPage->aData[].
04475 **
04476 ** In the course of balancing the siblings of pPage, the parent of pPage
04477 ** might become overfull or underfull.  If that happens, then this routine
04478 ** is called recursively on the parent.
04479 **
04480 ** If this routine fails for any reason, it might leave the database
04481 ** in a corrupted state.  So if this routine fails, the database should
04482 ** be rolled back.
04483 */
04484 static int balance_nonroot(MemPage *pPage){
04485   MemPage *pParent;            /* The parent of pPage */
04486   BtShared *pBt;                  /* The whole database */
04487   int nCell = 0;               /* Number of cells in apCell[] */
04488   int nMaxCells = 0;           /* Allocated size of apCell, szCell, aFrom. */
04489   int nOld;                    /* Number of pages in apOld[] */
04490   int nNew;                    /* Number of pages in apNew[] */
04491   int nDiv;                    /* Number of cells in apDiv[] */
04492   int i, j, k;                 /* Loop counters */
04493   int idx;                     /* Index of pPage in pParent->aCell[] */
04494   int nxDiv;                   /* Next divider slot in pParent->aCell[] */
04495   int rc;                      /* The return code */
04496   int leafCorrection;          /* 4 if pPage is a leaf.  0 if not */
04497   int leafData;                /* True if pPage is a leaf of a LEAFDATA tree */
04498   int usableSpace;             /* Bytes in pPage beyond the header */
04499   int pageFlags;               /* Value of pPage->aData[0] */
04500   int subtotal;                /* Subtotal of bytes in cells on one page */
04501   int iSpace = 0;              /* First unused byte of aSpace[] */
04502   MemPage *apOld[NB];          /* pPage and up to two siblings */
04503   Pgno pgnoOld[NB];            /* Page numbers for each page in apOld[] */
04504   MemPage *apCopy[NB];         /* Private copies of apOld[] pages */
04505   MemPage *apNew[NB+2];        /* pPage and up to NB siblings after balancing */
04506   Pgno pgnoNew[NB+2];          /* Page numbers for each page in apNew[] */
04507   u8 *apDiv[NB];               /* Divider cells in pParent */
04508   int cntNew[NB+2];            /* Index in aCell[] of cell after i-th page */
04509   int szNew[NB+2];             /* Combined size of cells place on i-th page */
04510   u8 **apCell = 0;             /* All cells begin balanced */
04511   int *szCell;                 /* Local size of all cells in apCell[] */
04512   u8 *aCopy[NB];               /* Space for holding data of apCopy[] */
04513   u8 *aSpace;                  /* Space to hold copies of dividers cells */
04514 #ifndef SQLITE_OMIT_AUTOVACUUM
04515   u8 *aFrom = 0;
04516 #endif
04517 
04518   /* 
04519   ** Find the parent page.
04520   */
04521   assert( pPage->isInit );
04522   assert( sqlite3pager_iswriteable(pPage->aData) );
04523   pBt = pPage->pBt;
04524   pParent = pPage->pParent;
04525   assert( pParent );
04526   if( SQLITE_OK!=(rc = sqlite3pager_write(pParent->aData)) ){
04527     return rc;
04528   }
04529   TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));
04530 
04531 #ifndef SQLITE_OMIT_QUICKBALANCE
04532   /*
04533   ** A special case:  If a new entry has just been inserted into a
04534   ** table (that is, a btree with integer keys and all data at the leaves)
04535   ** and the new entry is the right-most entry in the tree (it has the
04536   ** largest key) then use the special balance_quick() routine for
04537   ** balancing.  balance_quick() is much faster and results in a tighter
04538   ** packing of data in the common case.
04539   */
04540   if( pPage->leaf &&
04541       pPage->intKey &&
04542       pPage->leafData &&
04543       pPage->nOverflow==1 &&
04544       pPage->aOvfl[0].idx==pPage->nCell &&
04545       pPage->pParent->pgno!=1 &&
04546       get4byte(&pParent->aData[pParent->hdrOffset+8])==pPage->pgno
04547   ){
04548     /*
04549     ** TODO: Check the siblings to the left of pPage. It may be that
04550     ** they are not full and no new page is required.
04551     */
04552     return balance_quick(pPage, pParent);
04553   }
04554 #endif
04555 
04556   /*
04557   ** Find the cell in the parent page whose left child points back
04558   ** to pPage.  The "idx" variable is the index of that cell.  If pPage
04559   ** is the rightmost child of pParent then set idx to pParent->nCell 
04560   */
04561   if( pParent->idxShift ){
04562     Pgno pgno;
04563     pgno = pPage->pgno;
04564     assert( pgno==sqlite3pager_pagenumber(pPage->aData) );
04565     for(idx=0; idx<pParent->nCell; idx++){
04566       if( get4byte(findCell(pParent, idx))==pgno ){
04567         break;
04568       }
04569     }
04570     assert( idx<pParent->nCell
04571              || get4byte(&pParent->aData[pParent->hdrOffset+8])==pgno );
04572   }else{
04573     idx = pPage->idxParent;
04574   }
04575 
04576   /*
04577   ** Initialize variables so that it will be safe to jump
04578   ** directly to balance_cleanup at any moment.
04579   */
04580   nOld = nNew = 0;
04581   sqlite3pager_ref(pParent->aData);
04582 
04583   /*
04584   ** Find sibling pages to pPage and the cells in pParent that divide
04585   ** the siblings.  An attempt is made to find NN siblings on either
04586   ** side of pPage.  More siblings are taken from one side, however, if
04587   ** pPage there are fewer than NN siblings on the other side.  If pParent
04588   ** has NB or fewer children then all children of pParent are taken.
04589   */
04590   nxDiv = idx - NN;
04591   if( nxDiv + NB > pParent->nCell ){
04592     nxDiv = pParent->nCell - NB + 1;
04593   }
04594   if( nxDiv<0 ){
04595     nxDiv = 0;
04596   }
04597   nDiv = 0;
04598   for(i=0, k=nxDiv; i<NB; i++, k++){
04599     if( k<pParent->nCell ){
04600       apDiv[i] = findCell(pParent, k);
04601       nDiv++;
04602       assert( !pParent->leaf );
04603       pgnoOld[i] = get4byte(apDiv[i]);
04604     }else if( k==pParent->nCell ){
04605       pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+8]);
04606     }else{
04607       break;
04608     }
04609     rc = getAndInitPage(pBt, pgnoOld[i], &apOld[i], pParent);
04610     if( rc ) goto balance_cleanup;
04611     apOld[i]->idxParent = k;
04612     apCopy[i] = 0;
04613     assert( i==nOld );
04614     nOld++;
04615     nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow;
04616   }
04617 
04618   /* Make nMaxCells a multiple of 2 in order to preserve 8-byte
04619   ** alignment */
04620   nMaxCells = (nMaxCells + 1)&~1;
04621 
04622   /*
04623   ** Allocate space for memory structures
04624   */
04625   apCell = sqliteMallocRaw( 
04626        nMaxCells*sizeof(u8*)                           /* apCell */
04627      + nMaxCells*sizeof(int)                           /* szCell */
04628      + ROUND8(sizeof(MemPage))*NB                      /* aCopy */
04629      + pBt->pageSize*(5+NB)                            /* aSpace */
04630      + (ISAUTOVACUUM ? nMaxCells : 0)                  /* aFrom */
04631   );
04632   if( apCell==0 ){
04633     rc = SQLITE_NOMEM;
04634     goto balance_cleanup;
04635   }
04636   szCell = (int*)&apCell[nMaxCells];
04637   aCopy[0] = (u8*)&szCell[nMaxCells];
04638   assert( ((aCopy[0] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
04639   for(i=1; i<NB; i++){
04640     aCopy[i] = &aCopy[i-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
04641     assert( ((aCopy[i] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
04642   }
04643   aSpace = &aCopy[NB-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
04644   assert( ((aSpace - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
04645 #ifndef SQLITE_OMIT_AUTOVACUUM
04646   if( pBt->autoVacuum ){
04647     aFrom = &aSpace[5*pBt->pageSize];
04648   }
04649 #endif
04650   
04651   /*
04652   ** Make copies of the content of pPage and its siblings into aOld[].
04653   ** The rest of this function will use data from the copies rather
04654   ** that the original pages since the original pages will be in the
04655   ** process of being overwritten.
04656   */
04657   for(i=0; i<nOld; i++){
04658     MemPage *p = apCopy[i] = (MemPage*)&aCopy[i][pBt->pageSize];
04659     p->aData = &((u8*)p)[-pBt->pageSize];
04660     memcpy(p->aData, apOld[i]->aData, pBt->pageSize + sizeof(MemPage));
04661     /* The memcpy() above changes the value of p->aData so we have to
04662     ** set it again. */
04663     p->aData = &((u8*)p)[-pBt->pageSize];
04664   }
04665 
04666   /*
04667   ** Load pointers to all cells on sibling pages and the divider cells
04668   ** into the local apCell[] array.  Make copies of the divider cells
04669   ** into space obtained form aSpace[] and remove the the divider Cells
04670   ** from pParent.
04671   **
04672   ** If the siblings are on leaf pages, then the child pointers of the
04673   ** divider cells are stripped from the cells before they are copied
04674   ** into aSpace[].  In this way, all cells in apCell[] are without
04675   ** child pointers.  If siblings are not leaves, then all cell in
04676   ** apCell[] include child pointers.  Either way, all cells in apCell[]
04677   ** are alike.
04678   **
04679   ** leafCorrection:  4 if pPage is a leaf.  0 if pPage is not a leaf.
04680   **       leafData:  1 if pPage holds key+data and pParent holds only keys.
04681   */
04682   nCell = 0;
04683   leafCorrection = pPage->leaf*4;
04684   leafData = pPage->leafData && pPage->leaf;
04685   for(i=0; i<nOld; i++){
04686     MemPage *pOld = apCopy[i];
04687     int limit = pOld->nCell+pOld->nOverflow;
04688     for(j=0; j<limit; j++){
04689       assert( nCell<nMaxCells );
04690       apCell[nCell] = findOverflowCell(pOld, j);
04691       szCell[nCell] = cellSizePtr(pOld, apCell[nCell]);
04692 #ifndef SQLITE_OMIT_AUTOVACUUM
04693       if( pBt->autoVacuum ){
04694         int a;
04695         aFrom[nCell] = i;
04696         for(a=0; a<pOld->nOverflow; a++){
04697           if( pOld->aOvfl[a].pCell==apCell[nCell] ){
04698             aFrom[nCell] = 0xFF;
04699             break;
04700           }
04701         }
04702       }
04703 #endif
04704       nCell++;
04705     }
04706     if( i<nOld-1 ){
04707       int sz = cellSizePtr(pParent, apDiv[i]);
04708       if( leafData ){
04709         /* With the LEAFDATA flag, pParent cells hold only INTKEYs that
04710         ** are duplicates of keys on the child pages.  We need to remove
04711         ** the divider cells from pParent, but the dividers cells are not
04712         ** added to apCell[] because they are duplicates of child cells.
04713         */
04714         dropCell(pParent, nxDiv, sz);
04715       }else{
04716         u8 *pTemp;
04717         assert( nCell<nMaxCells );
04718         szCell[nCell] = sz;
04719         pTemp = &aSpace[iSpace];
04720         iSpace += sz;
04721         assert( iSpace<=pBt->pageSize*5 );
04722         memcpy(pTemp, apDiv[i], sz);
04723         apCell[nCell] = pTemp+leafCorrection;
04724 #ifndef SQLITE_OMIT_AUTOVACUUM
04725         if( pBt->autoVacuum ){
04726           aFrom[nCell] = 0xFF;
04727         }
04728 #endif
04729         dropCell(pParent, nxDiv, sz);
04730         szCell[nCell] -= leafCorrection;
04731         assert( get4byte(pTemp)==pgnoOld[i] );
04732         if( !pOld->leaf ){
04733           assert( leafCorrection==0 );
04734           /* The right pointer of the child page pOld becomes the left
04735           ** pointer of the divider cell */
04736           memcpy(apCell[nCell], &pOld->aData[pOld->hdrOffset+8], 4);
04737         }else{
04738           assert( leafCorrection==4 );
04739         }
04740         nCell++;
04741       }
04742     }
04743   }
04744 
04745   /*
04746   ** Figure out the number of pages needed to hold all nCell cells.
04747   ** Store this number in "k".  Also compute szNew[] which is the total
04748   ** size of all cells on the i-th page and cntNew[] which is the index
04749   ** in apCell[] of the cell that divides page i from page i+1.  
04750   ** cntNew[k] should equal nCell.
04751   **
04752   ** Values computed by this block:
04753   **
04754   **           k: The total number of sibling pages
04755   **    szNew[i]: Spaced used on the i-th sibling page.
04756   **   cntNew[i]: Index in apCell[] and szCell[] for the first cell to
04757   **              the right of the i-th sibling page.
04758   ** usableSpace: Number of bytes of space available on each sibling.
04759   ** 
04760   */
04761   usableSpace = pBt->usableSize - 12 + leafCorrection;
04762   for(subtotal=k=i=0; i<nCell; i++){
04763     assert( i<nMaxCells );
04764     subtotal += szCell[i] + 2;
04765     if( subtotal > usableSpace ){
04766       szNew[k] = subtotal - szCell[i];
04767       cntNew[k] = i;
04768       if( leafData ){ i--; }
04769       subtotal = 0;
04770       k++;
04771     }
04772   }
04773   szNew[k] = subtotal;
04774   cntNew[k] = nCell;
04775   k++;
04776 
04777   /*
04778   ** The packing computed by the previous block is biased toward the siblings
04779   ** on the left side.  The left siblings are always nearly full, while the
04780   ** right-most sibling might be nearly empty.  This block of code attempts
04781   ** to adjust the packing of siblings to get a better balance.
04782   **
04783   ** This adjustment is more than an optimization.  The packing above might
04784   ** be so out of balance as to be illegal.  For example, the right-most
04785   ** sibling might be completely empty.  This adjustment is not optional.
04786   */
04787   for(i=k-1; i>0; i--){
04788     int szRight = szNew[i];  /* Size of sibling on the right */
04789     int szLeft = szNew[i-1]; /* Size of sibling on the left */
04790     int r;              /* Index of right-most cell in left sibling */
04791     int d;              /* Index of first cell to the left of right sibling */
04792 
04793     r = cntNew[i-1] - 1;
04794     d = r + 1 - leafData;
04795     assert( d<nMaxCells );
04796     assert( r<nMaxCells );
04797     while( szRight==0 || szRight+szCell[d]+2<=szLeft-(szCell[r]+2) ){
04798       szRight += szCell[d] + 2;
04799       szLeft -= szCell[r] + 2;
04800       cntNew[i-1]--;
04801       r = cntNew[i-1] - 1;
04802       d = r + 1 - leafData;
04803     }
04804     szNew[i] = szRight;
04805     szNew[i-1] = szLeft;
04806   }
04807 
04808   /* Either we found one or more cells (cntnew[0])>0) or we are the
04809   ** a virtual root page.  A virtual root page is when the real root
04810   ** page is page 1 and we are the only child of that page.
04811   */
04812   assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) );
04813 
04814   /*
04815   ** Allocate k new pages.  Reuse old pages where possible.
04816   */
04817   assert( pPage->pgno>1 );
04818   pageFlags = pPage->aData[0];
04819   for(i=0; i<k; i++){
04820     MemPage *pNew;
04821     if( i<nOld ){
04822       pNew = apNew[i] = apOld[i];
04823       pgnoNew[i] = pgnoOld[i];
04824       apOld[i] = 0;
04825       rc = sqlite3pager_write(pNew->aData);
04826       if( rc ) goto balance_cleanup;
04827     }else{
04828       assert( i>0 );
04829       rc = allocatePage(pBt, &pNew, &pgnoNew[i], pgnoNew[i-1], 0);
04830       if( rc ) goto balance_cleanup;
04831       apNew[i] = pNew;
04832     }
04833     nNew++;
04834     zeroPage(pNew, pageFlags);
04835   }
04836 
04837   /* Free any old pages that were not reused as new pages.
04838   */
04839   while( i<nOld ){
04840     rc = freePage(apOld[i]);
04841     if( rc ) goto balance_cleanup;
04842     releasePage(apOld[i]);
04843     apOld[i] = 0;
04844     i++;
04845   }
04846 
04847   /*
04848   ** Put the new pages in accending order.  This helps to
04849   ** keep entries in the disk file in order so that a scan
04850   ** of the table is a linear scan through the file.  That
04851   ** in turn helps the operating system to deliver pages
04852   ** from the disk more rapidly.
04853   **
04854   ** An O(n^2) insertion sort algorithm is used, but since
04855   ** n is never more than NB (a small constant), that should
04856   ** not be a problem.
04857   **
04858   ** When NB==3, this one optimization makes the database
04859   ** about 25% faster for large insertions and deletions.
04860   */
04861   for(i=0; i<k-1; i++){
04862     int minV = pgnoNew[i];
04863     int minI = i;
04864     for(j=i+1; j<k; j++){
04865       if( pgnoNew[j]<(unsigned)minV ){
04866         minI = j;
04867         minV = pgnoNew[j];
04868       }
04869     }
04870     if( minI>i ){
04871       int t;
04872       MemPage *pT;
04873       t = pgnoNew[i];
04874       pT = apNew[i];
04875       pgnoNew[i] = pgnoNew[minI];
04876       apNew[i] = apNew[minI];
04877       pgnoNew[minI] = t;
04878       apNew[minI] = pT;
04879     }
04880   }
04881   TRACE(("BALANCE: old: %d %d %d  new: %d(%d) %d(%d) %d(%d) %d(%d) %d(%d)\n",
04882     pgnoOld[0], 
04883     nOld>=2 ? pgnoOld[1] : 0,
04884     nOld>=3 ? pgnoOld[2] : 0,
04885     pgnoNew[0], szNew[0],
04886     nNew>=2 ? pgnoNew[1] : 0, nNew>=2 ? szNew[1] : 0,
04887     nNew>=3 ? pgnoNew[2] : 0, nNew>=3 ? szNew[2] : 0,
04888     nNew>=4 ? pgnoNew[3] : 0, nNew>=4 ? szNew[3] : 0,
04889     nNew>=5 ? pgnoNew[4] : 0, nNew>=5 ? szNew[4] : 0));
04890 
04891   /*
04892   ** Evenly distribute the data in apCell[] across the new pages.
04893   ** Insert divider cells into pParent as necessary.
04894   */
04895   j = 0;
04896   for(i=0; i<nNew; i++){
04897     /* Assemble the new sibling page. */
04898     MemPage *pNew = apNew[i];
04899     assert( j<nMaxCells );
04900     assert( pNew->pgno==pgnoNew[i] );
04901     assemblePage(pNew, cntNew[i]-j, &apCell[j], &szCell[j]);
04902     assert( pNew->nCell>0 || (nNew==1 && cntNew[0]==0) );
04903     assert( pNew->nOverflow==0 );
04904 
04905 #ifndef SQLITE_OMIT_AUTOVACUUM
04906     /* If this is an auto-vacuum database, update the pointer map entries
04907     ** that point to the siblings that were rearranged. These can be: left
04908     ** children of cells, the right-child of the page, or overflow pages
04909     ** pointed to by cells.
04910     */
04911     if( pBt->autoVacuum ){
04912       for(k=j; k<cntNew[i]; k++){
04913         assert( k<nMaxCells );
04914         if( aFrom[k]==0xFF || apCopy[aFrom[k]]->pgno!=pNew->pgno ){
04915           rc = ptrmapPutOvfl(pNew, k-j);
04916           if( rc!=SQLITE_OK ){
04917             goto balance_cleanup;
04918           }
04919         }
04920       }
04921     }
04922 #endif
04923 
04924     j = cntNew[i];
04925 
04926     /* If the sibling page assembled above was not the right-most sibling,
04927     ** insert a divider cell into the parent page.
04928     */
04929     if( i<nNew-1 && j<nCell ){
04930       u8 *pCell;
04931       u8 *pTemp;
04932       int sz;
04933 
04934       assert( j<nMaxCells );
04935       pCell = apCell[j];
04936       sz = szCell[j] + leafCorrection;
04937       if( !pNew->leaf ){
04938         memcpy(&pNew->aData[8], pCell, 4);
04939         pTemp = 0;
04940       }else if( leafData ){
04941        /* If the tree is a leaf-data tree, and the siblings are leaves, 
04942         ** then there is no divider cell in apCell[]. Instead, the divider 
04943         ** cell consists of the integer key for the right-most cell of 
04944         ** the sibling-page assembled above only.
04945         */
04946         CellInfo info;
04947         j--;
04948         parseCellPtr(pNew, apCell[j], &info);
04949         pCell = &aSpace[iSpace];
04950         fillInCell(pParent, pCell, 0, info.nKey, 0, 0, &sz);
04951         iSpace += sz;
04952         assert( iSpace<=pBt->pageSize*5 );
04953         pTemp = 0;
04954       }else{
04955         pCell -= 4;
04956         pTemp = &aSpace[iSpace];
04957         iSpace += sz;
04958         assert( iSpace<=pBt->pageSize*5 );
04959       }
04960       rc = insertCell(pParent, nxDiv, pCell, sz, pTemp, 4);
04961       if( rc!=SQLITE_OK ) goto balance_cleanup;
04962       put4byte(findOverflowCell(pParent,nxDiv), pNew->pgno);
04963 #ifndef SQLITE_OMIT_AUTOVACUUM
04964       /* If this is an auto-vacuum database, and not a leaf-data tree,
04965       ** then update the pointer map with an entry for the overflow page
04966       ** that the cell just inserted points to (if any).
04967       */
04968       if( pBt->autoVacuum && !leafData ){
04969         rc = ptrmapPutOvfl(pParent, nxDiv);
04970         if( rc!=SQLITE_OK ){
04971           goto balance_cleanup;
04972         }
04973       }
04974 #endif
04975       j++;
04976       nxDiv++;
04977     }
04978   }
04979   assert( j==nCell );
04980   assert( nOld>0 );
04981   assert( nNew>0 );
04982   if( (pageFlags & PTF_LEAF)==0 ){
04983     memcpy(&apNew[nNew-1]->aData[8], &apCopy[nOld-1]->aData[8], 4);
04984   }
04985   if( nxDiv==pParent->nCell+pParent->nOverflow ){
04986     /* Right-most sibling is the right-most child of pParent */
04987     put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew[nNew-1]);
04988   }else{
04989     /* Right-most sibling is the left child of the first entry in pParent
04990     ** past the right-most divider entry */
04991     put4byte(findOverflowCell(pParent, nxDiv), pgnoNew[nNew-1]);
04992   }
04993 
04994   /*
04995   ** Reparent children of all cells.
04996   */
04997   for(i=0; i<nNew; i++){
04998     rc = reparentChildPages(apNew[i]);
04999     if( rc!=SQLITE_OK ) goto balance_cleanup;
05000   }
05001   rc = reparentChildPages(pParent);
05002   if( rc!=SQLITE_OK ) goto balance_cleanup;
05003 
05004   /*
05005   ** Balance the parent page.  Note that the current page (pPage) might
05006   ** have been added to the freelist so it might no longer be initialized.
05007   ** But the parent page will always be initialized.
05008   */
05009   assert( pParent->isInit );
05010   /* assert( pPage->isInit ); // No! pPage might have been added to freelist */
05011   /* pageIntegrity(pPage);    // No! pPage might have been added to freelist */ 
05012   rc = balance(pParent, 0);
05013   
05014   /*
05015   ** Cleanup before returning.
05016   */
05017 balance_cleanup:
05018   sqliteFree(apCell);
05019   for(i=0; i<nOld; i++){
05020     releasePage(apOld[i]);
05021   }
05022   for(i=0; i<nNew; i++){
05023     releasePage(apNew[i]);
05024   }
05025   releasePage(pParent);
05026   TRACE(("BALANCE: finished with %d: old=%d new=%d cells=%d\n",
05027           pPage->pgno, nOld, nNew, nCell));
05028   return rc;
05029 }
05030 
05031 /*
05032 ** This routine is called for the root page of a btree when the root
05033 ** page contains no cells.  This is an opportunity to make the tree
05034 ** shallower by one level.
05035 */
05036 static int balance_shallower(MemPage *pPage){
05037   MemPage *pChild;             /* The only child page of pPage */
05038   Pgno pgnoChild;              /* Page number for pChild */
05039   int rc = SQLITE_OK;          /* Return code from subprocedures */
05040   BtShared *pBt;                  /* The main BTree structure */
05041   int mxCellPerPage;           /* Maximum number of cells per page */
05042   u8 **apCell;                 /* All cells from pages being balanced */
05043   int *szCell;                 /* Local size of all cells */
05044 
05045   assert( pPage->pParent==0 );
05046   assert( pPage->nCell==0 );
05047   pBt = pPage->pBt;
05048   mxCellPerPage = MX_CELL(pBt);
05049   apCell = sqliteMallocRaw( mxCellPerPage*(sizeof(u8*)+sizeof(int)) );
05050   if( apCell==0 ) return SQLITE_NOMEM;
05051   szCell = (int*)&apCell[mxCellPerPage];
05052   if( pPage->leaf ){
05053     /* The table is completely empty */
05054     TRACE(("BALANCE: empty table %d\n", pPage->pgno));
05055   }else{
05056     /* The root page is empty but has one child.  Transfer the
05057     ** information from that one child into the root page if it 
05058     ** will fit.  This reduces the depth of the tree by one.
05059     **
05060     ** If the root page is page 1, it has less space available than
05061     ** its child (due to the 100 byte header that occurs at the beginning
05062     ** of the database fle), so it might not be able to hold all of the 
05063     ** information currently contained in the child.  If this is the 
05064     ** case, then do not do the transfer.  Leave page 1 empty except
05065     ** for the right-pointer to the child page.  The child page becomes
05066     ** the virtual root of the tree.
05067     */
05068     pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]);
05069     assert( pgnoChild>0 );
05070     assert( pgnoChild<=sqlite3pager_pagecount(pPage->pBt->pPager) );
05071     rc = getPage(pPage->pBt, pgnoChild, &pChild);
05072     if( rc ) goto end_shallow_balance;
05073     if( pPage->pgno==1 ){
05074       rc = initPage(pChild, pPage);
05075       if( rc ) goto end_shallow_balance;
05076       assert( pChild->nOverflow==0 );
05077       if( pChild->nFree>=100 ){
05078         /* The child information will fit on the root page, so do the
05079         ** copy */
05080         int i;
05081         zeroPage(pPage, pChild->aData[0]);
05082         for(i=0; i<pChild->nCell; i++){
05083           apCell[i] = findCell(pChild,i);
05084           szCell[i] = cellSizePtr(pChild, apCell[i]);
05085         }
05086         assemblePage(pPage, pChild->nCell, apCell, szCell);
05087         /* Copy the right-pointer of the child to the parent. */
05088         put4byte(&pPage->aData[pPage->hdrOffset+8], 
05089             get4byte(&pChild->aData[pChild->hdrOffset+8]));
05090         freePage(pChild);
05091         TRACE(("BALANCE: child %d transfer to page 1\n", pChild->pgno));
05092       }else{
05093         /* The child has more information that will fit on the root.
05094         ** The tree is already balanced.  Do nothing. */
05095         TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno));
05096       }
05097     }else{
05098       memcpy(pPage->aData, pChild->aData, pPage->pBt->usableSize);
05099       pPage->isInit = 0;
05100       pPage->pParent = 0;
05101       rc = initPage(pPage, 0);
05102       assert( rc==SQLITE_OK );
05103       freePage(pChild);
05104       TRACE(("BALANCE: transfer child %d into root %d\n",
05105               pChild->pgno, pPage->pgno));
05106     }
05107     rc = reparentChildPages(pPage);
05108     assert( pPage->nOverflow==0 );
05109 #ifndef SQLITE_OMIT_AUTOVACUUM
05110     if( pBt->autoVacuum ){
05111       int i;
05112       for(i=0; i<pPage->nCell; i++){ 
05113         rc = ptrmapPutOvfl(pPage, i);
05114         if( rc!=SQLITE_OK ){
05115           goto end_shallow_balance;
05116         }
05117       }
05118     }
05119 #endif
05120     if( rc!=SQLITE_OK ) goto end_shallow_balance;
05121     releasePage(pChild);
05122   }
05123 end_shallow_balance:
05124   sqliteFree(apCell);
05125   return rc;
05126 }
05127 
05128 
05129 /*
05130 ** The root page is overfull
05131 **
05132 ** When this happens, Create a new child page and copy the
05133 ** contents of the root into the child.  Then make the root
05134 ** page an empty page with rightChild pointing to the new
05135 ** child.   Finally, call balance_internal() on the new child
05136 ** to cause it to split.
05137 */
05138 static int balance_deeper(MemPage *pPage){
05139   int rc;             /* Return value from subprocedures */
05140   MemPage *pChild;    /* Pointer to a new child page */
05141   Pgno pgnoChild;     /* Page number of the new child page */
05142   BtShared *pBt;         /* The BTree */
05143   int usableSize;     /* Total usable size of a page */
05144   u8 *data;           /* Content of the parent page */
05145   u8 *cdata;          /* Content of the child page */
05146   int hdr;            /* Offset to page header in parent */
05147   int brk;            /* Offset to content of first cell in parent */
05148 
05149   assert( pPage->pParent==0 );
05150   assert( pPage->nOverflow>0 );
05151   pBt = pPage->pBt;
05152   rc = allocatePage(pBt, &pChild, &pgnoChild, pPage->pgno, 0);
05153   if( rc ) return rc;
05154   assert( sqlite3pager_iswriteable(pChild->aData) );
05155   usableSize = pBt->usableSize;
05156   data = pPage->aData;
05157   hdr = pPage->hdrOffset;
05158   brk = get2byte(&data[hdr+5]);
05159   cdata = pChild->aData;
05160   memcpy(cdata, &data[hdr], pPage->cellOffset+2*pPage->nCell-hdr);
05161   memcpy(&cdata[brk], &data[brk], usableSize-brk);
05162   assert( pChild->isInit==0 );
05163   rc = initPage(pChild, pPage);
05164   if( rc ) goto balancedeeper_out;
05165   memcpy(pChild->aOvfl, pPage->aOvfl, pPage->nOverflow*sizeof(pPage->aOvfl[0]));
05166   pChild->nOverflow = pPage->nOverflow;
05167   if( pChild->nOverflow ){
05168     pChild->nFree = 0;
05169   }
05170   assert( pChild->nCell==pPage->nCell );
05171   zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF);
05172   put4byte(&pPage->aData[pPage->hdrOffset+8], pgnoChild);
05173   TRACE(("BALANCE: copy root %d into %d\n", pPage->pgno, pChild->pgno));
05174 #ifndef SQLITE_OMIT_AUTOVACUUM
05175   if( pBt->autoVacuum ){
05176     int i;
05177     rc = ptrmapPut(pBt, pChild->pgno, PTRMAP_BTREE, pPage->pgno);
05178     if( rc ) goto balancedeeper_out;
05179     for(i=0; i<pChild->nCell; i++){
05180       rc = ptrmapPutOvfl(pChild, i);
05181       if( rc!=SQLITE_OK ){
05182         return rc;
05183       }
05184     }
05185   }
05186 #endif
05187   rc = balance_nonroot(pChild);
05188 
05189 balancedeeper_out:
05190   releasePage(pChild);
05191   return rc;
05192 }
05193 
05194 /*
05195 ** Decide if the page pPage needs to be balanced.  If balancing is
05196 ** required, call the appropriate balancing routine.
05197 */
05198 static int balance(MemPage *pPage, int insert){
05199   int rc = SQLITE_OK;
05200   if( pPage->pParent==0 ){
05201     if( pPage->nOverflow>0 ){
05202       rc = balance_deeper(pPage);
05203     }
05204     if( rc==SQLITE_OK && pPage->nCell==0 ){
05205       rc = balance_shallower(pPage);
05206     }
05207   }else{
05208     if( pPage->nOverflow>0 || 
05209         (!insert && pPage->nFree>pPage->pBt->usableSize*2/3) ){
05210       rc = balance_nonroot(pPage);
05211     }
05212   }
05213   return rc;
05214 }
05215 
05216 /*
05217 ** This routine checks all cursors that point to table pgnoRoot.
05218 ** If any of those cursors other than pExclude were opened with 
05219 ** wrFlag==0 then this routine returns SQLITE_LOCKED.  If all
05220 ** cursors that point to pgnoRoot were opened with wrFlag==1
05221 ** then this routine returns SQLITE_OK.
05222 **
05223 ** In addition to checking for read-locks (where a read-lock 
05224 ** means a cursor opened with wrFlag==0) this routine also moves
05225 ** all cursors other than pExclude so that they are pointing to the 
05226 ** first Cell on root page.  This is necessary because an insert 
05227 ** or delete might change the number of cells on a page or delete
05228 ** a page entirely and we do not want to leave any cursors 
05229 ** pointing to non-existant pages or cells.
05230 */
05231 static int checkReadLocks(BtShared *pBt, Pgno pgnoRoot, BtCursor *pExclude){
05232   BtCursor *p;
05233   for(p=pBt->pCursor; p; p=p->pNext){
05234     u32 flags = (p->pBtree->pSqlite ? p->pBtree->pSqlite->flags : 0);
05235     if( p->pgnoRoot!=pgnoRoot || p==pExclude ) continue;
05236     if( p->wrFlag==0 && flags&SQLITE_ReadUncommitted ) continue;
05237     if( p->wrFlag==0 ) return SQLITE_LOCKED;
05238     if( p->pPage->pgno!=p->pgnoRoot ){
05239       moveToRoot(p);
05240     }
05241   }
05242   return SQLITE_OK;
05243 }
05244 
05245 /*
05246 ** Insert a new record into the BTree.  The key is given by (pKey,nKey)
05247 ** and the data is given by (pData,nData).  The cursor is used only to
05248 ** define what table the record should be inserted into.  The cursor
05249 ** is left pointing at a random location.
05250 **
05251 ** For an INTKEY table, only the nKey value of the key is used.  pKey is
05252 ** ignored.  For a ZERODATA table, the pData and nData are both ignored.
05253 */
05254 int sqlite3BtreeInsert(
05255   BtCursor *pCur,                /* Insert data into the table of this cursor */
05256   const void *pKey, i64 nKey,    /* The key of the new record */
05257   const void *pData, int nData   /* The data of the new record */
05258 ){
05259   int rc;
05260   int loc;
05261   int szNew;
05262   MemPage *pPage;
05263   BtShared *pBt = pCur->pBtree->pBt;
05264   unsigned char *oldCell;
05265   unsigned char *newCell = 0;
05266 
05267   if( pBt->inTransaction!=TRANS_WRITE ){
05268     /* Must start a transaction before doing an insert */
05269     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
05270   }
05271   assert( !pBt->readOnly );
05272   if( !pCur->wrFlag ){
05273     return SQLITE_PERM;   /* Cursor not open for writing */
05274   }
05275   if( checkReadLocks(pBt, pCur->pgnoRoot, pCur) ){
05276     return SQLITE_LOCKED; /* The table pCur points to has a read lock */
05277   }
05278 
05279   /* Save the positions of any other cursors open on this table */
05280   restoreOrClearCursorPosition(pCur, 0);
05281   if( 
05282     SQLITE_OK!=(rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur)) ||
05283     SQLITE_OK!=(rc = sqlite3BtreeMoveto(pCur, pKey, nKey, &loc))
05284   ){
05285     return rc;
05286   }
05287 
05288   pPage = pCur->pPage;
05289   assert( pPage->intKey || nKey>=0 );
05290   assert( pPage->leaf || !pPage->leafData );
05291   TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
05292           pCur->pgnoRoot, nKey, nData, pPage->pgno,
05293           loc==0 ? "overwrite" : "new entry"));
05294   assert( pPage->isInit );
05295   rc = sqlite3pager_write(pPage->aData);
05296   if( rc ) return rc;
05297   newCell = sqliteMallocRaw( MX_CELL_SIZE(pBt) );
05298   if( newCell==0 ) return SQLITE_NOMEM;
05299   rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, &szNew);
05300   if( rc ) goto end_insert;
05301   assert( szNew==cellSizePtr(pPage, newCell) );
05302   assert( szNew<=MX_CELL_SIZE(pBt) );
05303   if( loc==0 && CURSOR_VALID==pCur->eState ){
05304     int szOld;
05305     assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
05306     oldCell = findCell(pPage, pCur->idx);
05307     if( !pPage->leaf ){
05308       memcpy(newCell, oldCell, 4);
05309     }
05310     szOld = cellSizePtr(pPage, oldCell);
05311     rc = clearCell(pPage, oldCell);
05312     if( rc ) goto end_insert;
05313     dropCell(pPage, pCur->idx, szOld);
05314   }else if( loc<0 && pPage->nCell>0 ){
05315     assert( pPage->leaf );
05316     pCur->idx++;
05317     pCur->info.nSize = 0;
05318   }else{
05319     assert( pPage->leaf );
05320   }
05321   rc = insertCell(pPage, pCur->idx, newCell, szNew, 0, 0);
05322   if( rc!=SQLITE_OK ) goto end_insert;
05323   rc = balance(pPage, 1);
05324   /* sqlite3BtreePageDump(pCur->pBt, pCur->pgnoRoot, 1); */
05325   /* fflush(stdout); */
05326   if( rc==SQLITE_OK ){
05327     moveToRoot(pCur);
05328   }
05329 end_insert:
05330   sqliteFree(newCell);
05331   return rc;
05332 }
05333 
05334 /*
05335 ** Delete the entry that the cursor is pointing to.  The cursor
05336 ** is left pointing at a random location.
05337 */
05338 int sqlite3BtreeDelete(BtCursor *pCur){
05339   MemPage *pPage = pCur->pPage;
05340   unsigned char *pCell;
05341   int rc;
05342   Pgno pgnoChild = 0;
05343   BtShared *pBt = pCur->pBtree->pBt;
05344 
05345   assert( pPage->isInit );
05346   if( pBt->inTransaction!=TRANS_WRITE ){
05347     /* Must start a transaction before doing a delete */
05348     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
05349   }
05350   assert( !pBt->readOnly );
05351   if( pCur->idx >= pPage->nCell ){
05352     return SQLITE_ERROR;  /* The cursor is not pointing to anything */
05353   }
05354   if( !pCur->wrFlag ){
05355     return SQLITE_PERM;   /* Did not open this cursor for writing */
05356   }
05357   if( checkReadLocks(pBt, pCur->pgnoRoot, pCur) ){
05358     return SQLITE_LOCKED; /* The table pCur points to has a read lock */
05359   }
05360 
05361   /* Restore the current cursor position (a no-op if the cursor is not in 
05362   ** CURSOR_REQUIRESEEK state) and save the positions of any other cursors 
05363   ** open on the same table. Then call sqlite3pager_write() on the page
05364   ** that the entry will be deleted from.
05365   */
05366   if( 
05367     (rc = restoreOrClearCursorPosition(pCur, 1))!=0 ||
05368     (rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur))!=0 ||
05369     (rc = sqlite3pager_write(pPage->aData))!=0
05370   ){
05371     return rc;
05372   }
05373 
05374   /* Locate the cell within it's page and leave pCell pointing to the
05375   ** data. The clearCell() call frees any overflow pages associated with the
05376   ** cell. The cell itself is still intact.
05377   */
05378   pCell = findCell(pPage, pCur->idx);
05379   if( !pPage->leaf ){
05380     pgnoChild = get4byte(pCell);
05381   }
05382   rc = clearCell(pPage, pCell);
05383   if( rc ) return rc;
05384 
05385   if( !pPage->leaf ){
05386     /*
05387     ** The entry we are about to delete is not a leaf so if we do not
05388     ** do something we will leave a hole on an internal page.
05389     ** We have to fill the hole by moving in a cell from a leaf.  The
05390     ** next Cell after the one to be deleted is guaranteed to exist and
05391     ** to be a leaf so we can use it.
05392     */
05393     BtCursor leafCur;
05394     unsigned char *pNext;
05395     int szNext;  /* The compiler warning is wrong: szNext is always 
05396                  ** initialized before use.  Adding an extra initialization
05397                  ** to silence the compiler slows down the code. */
05398     int notUsed;
05399     unsigned char *tempCell = 0;
05400     assert( !pPage->leafData );
05401     getTempCursor(pCur, &leafCur);
05402     rc = sqlite3BtreeNext(&leafCur, &notUsed);
05403     if( rc!=SQLITE_OK ){
05404       if( rc!=SQLITE_NOMEM ){
05405         rc = SQLITE_CORRUPT_BKPT; 
05406       }
05407     }
05408     if( rc==SQLITE_OK ){
05409       rc = sqlite3pager_write(leafCur.pPage->aData);
05410     }
05411     if( rc==SQLITE_OK ){
05412       TRACE(("DELETE: table=%d delete internal from %d replace from leaf %d\n",
05413          pCur->pgnoRoot, pPage->pgno, leafCur.pPage->pgno));
05414       dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
05415       pNext = findCell(leafCur.pPage, leafCur.idx);
05416       szNext = cellSizePtr(leafCur.pPage, pNext);
05417       assert( MX_CELL_SIZE(pBt)>=szNext+4 );
05418       tempCell = sqliteMallocRaw( MX_CELL_SIZE(pBt) );
05419       if( tempCell==0 ){
05420         rc = SQLITE_NOMEM;
05421       }
05422     }
05423     if( rc==SQLITE_OK ){
05424       rc = insertCell(pPage, pCur->idx, pNext-4, szNext+4, tempCell, 0);
05425     }
05426     if( rc==SQLITE_OK ){
05427       put4byte(findOverflowCell(pPage, pCur->idx), pgnoChild);
05428       rc = balance(pPage, 0);
05429     }
05430     if( rc==SQLITE_OK ){
05431       dropCell(leafCur.pPage, leafCur.idx, szNext);
05432       rc = balance(leafCur.pPage, 0);
05433     }
05434     sqliteFree(tempCell);
05435     releaseTempCursor(&leafCur);
05436   }else{
05437     TRACE(("DELETE: table=%d delete from leaf %d\n",
05438        pCur->pgnoRoot, pPage->pgno));
05439     dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
05440     rc = balance(pPage, 0);
05441   }
05442   if( rc==SQLITE_OK ){
05443     moveToRoot(pCur);
05444   }
05445   return rc;
05446 }
05447 
05448 /*
05449 ** Create a new BTree table.  Write into *piTable the page
05450 ** number for the root page of the new table.
05451 **
05452 ** The type of type is determined by the flags parameter.  Only the
05453 ** following values of flags are currently in use.  Other values for
05454 ** flags might not work:
05455 **
05456 **     BTREE_INTKEY|BTREE_LEAFDATA     Used for SQL tables with rowid keys
05457 **     BTREE_ZERODATA                  Used for SQL indices
05458 */
05459 int sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){
05460   BtShared *pBt = p->pBt;
05461   MemPage *pRoot;
05462   Pgno pgnoRoot;
05463   int rc;
05464   if( pBt->inTransaction!=TRANS_WRITE ){
05465     /* Must start a transaction first */
05466     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
05467   }
05468   assert( !pBt->readOnly );
05469 
05470   /* It is illegal to create a table if any cursors are open on the
05471   ** database. This is because in auto-vacuum mode the backend may
05472   ** need to move a database page to make room for the new root-page.
05473   ** If an open cursor was using the page a problem would occur.
05474   */
05475   if( pBt->pCursor ){
05476     return SQLITE_LOCKED;
05477   }
05478 
05479 #ifdef SQLITE_OMIT_AUTOVACUUM
05480   rc = allocatePage(pBt, &pRoot, &pgnoRoot, 1, 0);
05481   if( rc ) return rc;
05482 #else
05483   if( pBt->autoVacuum ){
05484     Pgno pgnoMove;      /* Move a page here to make room for the root-page */
05485     MemPage *pPageMove; /* The page to move to. */
05486 
05487     /* Read the value of meta[3] from the database to determine where the
05488     ** root page of the new table should go. meta[3] is the largest root-page
05489     ** created so far, so the new root-page is (meta[3]+1).
05490     */
05491     rc = sqlite3BtreeGetMeta(p, 4, &pgnoRoot);
05492     if( rc!=SQLITE_OK ) return rc;
05493     pgnoRoot++;
05494 
05495     /* The new root-page may not be allocated on a pointer-map page, or the
05496     ** PENDING_BYTE page.
05497     */
05498     if( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) ||
05499         pgnoRoot==PENDING_BYTE_PAGE(pBt) ){
05500       pgnoRoot++;
05501     }
05502     assert( pgnoRoot>=3 );
05503 
05504     /* Allocate a page. The page that currently resides at pgnoRoot will
05505     ** be moved to the allocated page (unless the allocated page happens
05506     ** to reside at pgnoRoot).
05507     */
05508     rc = allocatePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, 1);
05509     if( rc!=SQLITE_OK ){
05510       return rc;
05511     }
05512 
05513     if( pgnoMove!=pgnoRoot ){
05514       u8 eType;
05515       Pgno iPtrPage;
05516 
05517       releasePage(pPageMove);
05518       rc = getPage(pBt, pgnoRoot, &pRoot);
05519       if( rc!=SQLITE_OK ){
05520         return rc;
05521       }
05522       rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage);
05523       if( rc!=SQLITE_OK || eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){
05524         releasePage(pRoot);
05525         return rc;
05526       }
05527       assert( eType!=PTRMAP_ROOTPAGE );
05528       assert( eType!=PTRMAP_FREEPAGE );
05529       rc = sqlite3pager_write(pRoot->aData);
05530       if( rc!=SQLITE_OK ){
05531         releasePage(pRoot);
05532         return rc;
05533       }
05534       rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove);
05535       releasePage(pRoot);
05536       if( rc!=SQLITE_OK ){
05537         return rc;
05538       }
05539       rc = getPage(pBt, pgnoRoot, &pRoot);
05540       if( rc!=SQLITE_OK ){
05541         return rc;
05542       }
05543       rc = sqlite3pager_write(pRoot->aData);
05544       if( rc!=SQLITE_OK ){
05545         releasePage(pRoot);
05546         return rc;
05547       }
05548     }else{
05549       pRoot = pPageMove;
05550     } 
05551 
05552     /* Update the pointer-map and meta-data with the new root-page number. */
05553     rc = ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0);
05554     if( rc ){
05555       releasePage(pRoot);
05556       return rc;
05557     }
05558     rc = sqlite3BtreeUpdateMeta(p, 4, pgnoRoot);
05559     if( rc ){
05560       releasePage(pRoot);
05561       return rc;
05562     }
05563 
05564   }else{
05565     rc = allocatePage(pBt, &pRoot, &pgnoRoot, 1, 0);
05566     if( rc ) return rc;
05567   }
05568 #endif
05569   assert( sqlite3pager_iswriteable(pRoot->aData) );
05570   zeroPage(pRoot, flags | PTF_LEAF);
05571   sqlite3pager_unref(pRoot->aData);
05572   *piTable = (int)pgnoRoot;
05573   return SQLITE_OK;
05574 }
05575 
05576 /*
05577 ** Erase the given database page and all its children.  Return
05578 ** the page to the freelist.
05579 */
05580 static int clearDatabasePage(
05581   BtShared *pBt,           /* The BTree that contains the table */
05582   Pgno pgno,            /* Page number to clear */
05583   MemPage *pParent,     /* Parent page.  NULL for the root */
05584   int freePageFlag      /* Deallocate page if true */
05585 ){
05586   MemPage *pPage = 0;
05587   int rc;
05588   unsigned char *pCell;
05589   int i;
05590 
05591   if( pgno>sqlite3pager_pagecount(pBt->pPager) ){
05592     return SQLITE_CORRUPT_BKPT;
05593   }
05594 
05595   rc = getAndInitPage(pBt, pgno, &pPage, pParent);
05596   if( rc ) goto cleardatabasepage_out;
05597   rc = sqlite3pager_write(pPage->aData);
05598   if( rc ) goto cleardatabasepage_out;
05599   for(i=0; i<pPage->nCell; i++){
05600     pCell = findCell(pPage, i);
05601     if( !pPage->leaf ){
05602       rc = clearDatabasePage(pBt, get4byte(pCell), pPage->pParent, 1);
05603       if( rc ) goto cleardatabasepage_out;
05604     }
05605     rc = clearCell(pPage, pCell);
05606     if( rc ) goto cleardatabasepage_out;
05607   }
05608   if( !pPage->leaf ){
05609     rc = clearDatabasePage(pBt, get4byte(&pPage->aData[8]), pPage->pParent, 1);
05610     if( rc ) goto cleardatabasepage_out;
05611   }
05612   if( freePageFlag ){
05613     rc = freePage(pPage);
05614   }else{
05615     zeroPage(pPage, pPage->aData[0] | PTF_LEAF);
05616   }
05617 
05618 cleardatabasepage_out:
05619   releasePage(pPage);
05620   return rc;
05621 }
05622 
05623 /*
05624 ** Delete all information from a single table in the database.  iTable is
05625 ** the page number of the root of the table.  After this routine returns,
05626 ** the root page is empty, but still exists.
05627 **
05628 ** This routine will fail with SQLITE_LOCKED if there are any open
05629 ** read cursors on the table.  Open write cursors are moved to the
05630 ** root of the table.
05631 */
05632 int sqlite3BtreeClearTable(Btree *p, int iTable){
05633   int rc;
05634   BtCursor *pCur;
05635   BtShared *pBt = p->pBt;
05636   sqlite3 *db = p->pSqlite;
05637   if( p->inTrans!=TRANS_WRITE ){
05638     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
05639   }
05640 
05641   /* If this connection is not in read-uncommitted mode and currently has
05642   ** a read-cursor open on the table being cleared, return SQLITE_LOCKED.
05643   */
05644   if( 0==db || 0==(db->flags&SQLITE_ReadUncommitted) ){
05645     for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
05646       if( pCur->pBtree==p && pCur->pgnoRoot==(Pgno)iTable ){
05647         if( 0==pCur->wrFlag ){
05648           return SQLITE_LOCKED;
05649         }
05650         moveToRoot(pCur);
05651       }
05652     }
05653   }
05654 
05655   /* Save the position of all cursors open on this table */
05656   if( SQLITE_OK!=(rc = saveAllCursors(pBt, iTable, 0)) ){
05657     return rc;
05658   }
05659 
05660   return clearDatabasePage(pBt, (Pgno)iTable, 0, 0);
05661 }
05662 
05663 /*
05664 ** Erase all information in a table and add the root of the table to
05665 ** the freelist.  Except, the root of the principle table (the one on
05666 ** page 1) is never added to the freelist.
05667 **
05668 ** This routine will fail with SQLITE_LOCKED if there are any open
05669 ** cursors on the table.
05670 **
05671 ** If AUTOVACUUM is enabled and the page at iTable is not the last
05672 ** root page in the database file, then the last root page 
05673 ** in the database file is moved into the slot formerly occupied by
05674 ** iTable and that last slot formerly occupied by the last root page
05675 ** is added to the freelist instead of iTable.  In this say, all
05676 ** root pages are kept at the beginning of the database file, which
05677 ** is necessary for AUTOVACUUM to work right.  *piMoved is set to the 
05678 ** page number that used to be the last root page in the file before
05679 ** the move.  If no page gets moved, *piMoved is set to 0.
05680 ** The last root page is recorded in meta[3] and the value of
05681 ** meta[3] is updated by this procedure.
05682 */
05683 int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
05684   int rc;
05685   MemPage *pPage = 0;
05686   BtShared *pBt = p->pBt;
05687 
05688   if( p->inTrans!=TRANS_WRITE ){
05689     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
05690   }
05691 
05692   /* It is illegal to drop a table if any cursors are open on the
05693   ** database. This is because in auto-vacuum mode the backend may
05694   ** need to move another root-page to fill a gap left by the deleted
05695   ** root page. If an open cursor was using this page a problem would 
05696   ** occur.
05697   */
05698   if( pBt->pCursor ){
05699     return SQLITE_LOCKED;
05700   }
05701 
05702   rc = getPage(pBt, (Pgno)iTable, &pPage);
05703   if( rc ) return rc;
05704   rc = sqlite3BtreeClearTable(p, iTable);
05705   if( rc ){
05706     releasePage(pPage);
05707     return rc;
05708   }
05709 
05710   *piMoved = 0;
05711 
05712   if( iTable>1 ){
05713 #ifdef SQLITE_OMIT_AUTOVACUUM
05714     rc = freePage(pPage);
05715     releasePage(pPage);
05716 #else
05717     if( pBt->autoVacuum ){
05718       Pgno maxRootPgno;
05719       rc = sqlite3BtreeGetMeta(p, 4, &maxRootPgno);
05720       if( rc!=SQLITE_OK ){
05721         releasePage(pPage);
05722         return rc;
05723       }
05724 
05725       if( iTable==maxRootPgno ){
05726         /* If the table being dropped is the table with the largest root-page
05727         ** number in the database, put the root page on the free list. 
05728         */
05729         rc = freePage(pPage);
05730         releasePage(pPage);
05731         if( rc!=SQLITE_OK ){
05732           return rc;
05733         }
05734       }else{
05735         /* The table being dropped does not have the largest root-page
05736         ** number in the database. So move the page that does into the 
05737         ** gap left by the deleted root-page.
05738         */
05739         MemPage *pMove;
05740         releasePage(pPage);
05741         rc = getPage(pBt, maxRootPgno, &pMove);
05742         if( rc!=SQLITE_OK ){
05743           return rc;
05744         }
05745         rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable);
05746         releasePage(pMove);
05747         if( rc!=SQLITE_OK ){
05748           return rc;
05749         }
05750         rc = getPage(pBt, maxRootPgno, &pMove);
05751         if( rc!=SQLITE_OK ){
05752           return rc;
05753         }
05754         rc = freePage(pMove);
05755         releasePage(pMove);
05756         if( rc!=SQLITE_OK ){
05757           return rc;
05758         }
05759         *piMoved = maxRootPgno;
05760       }
05761 
05762       /* Set the new 'max-root-page' value in the database header. This
05763       ** is the old value less one, less one more if that happens to
05764       ** be a root-page number, less one again if that is the
05765       ** PENDING_BYTE_PAGE.
05766       */
05767       maxRootPgno--;
05768       if( maxRootPgno==PENDING_BYTE_PAGE(pBt) ){
05769         maxRootPgno--;
05770       }
05771       if( maxRootPgno==PTRMAP_PAGENO(pBt, maxRootPgno) ){
05772         maxRootPgno--;
05773       }
05774       assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) );
05775 
05776       rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno);
05777     }else{
05778       rc = freePage(pPage);
05779       releasePage(pPage);
05780     }
05781 #endif
05782   }else{
05783     /* If sqlite3BtreeDropTable was called on page 1. */
05784     zeroPage(pPage, PTF_INTKEY|PTF_LEAF );
05785     releasePage(pPage);
05786   }
05787   return rc;  
05788 }
05789 
05790 
05791 /*
05792 ** Read the meta-information out of a database file.  Meta[0]
05793 ** is the number of free pages currently in the database.  Meta[1]
05794 ** through meta[15] are available for use by higher layers.  Meta[0]
05795 ** is read-only, the others are read/write.
05796 ** 
05797 ** The schema layer numbers meta values differently.  At the schema
05798 ** layer (and the SetCookie and ReadCookie opcodes) the number of
05799 ** free pages is not visible.  So Cookie[0] is the same as Meta[1].
05800 */
05801 int sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){
05802   int rc;
05803   unsigned char *pP1;
05804   BtShared *pBt = p->pBt;
05805 
05806   /* Reading a meta-data value requires a read-lock on page 1 (and hence
05807   ** the sqlite_master table. We grab this lock regardless of whether or
05808   ** not the SQLITE_ReadUncommitted flag is set (the table rooted at page
05809   ** 1 is treated as a special case by queryTableLock() and lockTable()).
05810   */
05811   rc = queryTableLock(p, 1, READ_LOCK);
05812   if( rc!=SQLITE_OK ){
05813     return rc;
05814   }
05815 
05816   assert( idx>=0 && idx<=15 );
05817   rc = sqlite3pager_get(pBt->pPager, 1, (void**)&pP1);
05818   if( rc ) return rc;
05819   *pMeta = get4byte(&pP1[36 + idx*4]);
05820   sqlite3pager_unref(pP1);
05821 
05822   /* If autovacuumed is disabled in this build but we are trying to 
05823   ** access an autovacuumed database, then make the database readonly. 
05824   */
05825 #ifdef SQLITE_OMIT_AUTOVACUUM
05826   if( idx==4 && *pMeta>0 ) pBt->readOnly = 1;
05827 #endif
05828 
05829   /* Grab the read-lock on page 1. */
05830   rc = lockTable(p, 1, READ_LOCK);
05831   return rc;
05832 }
05833 
05834 /*
05835 ** Write meta-information back into the database.  Meta[0] is
05836 ** read-only and may not be written.
05837 */
05838 int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){
05839   BtShared *pBt = p->pBt;
05840   unsigned char *pP1;
05841   int rc;
05842   assert( idx>=1 && idx<=15 );
05843   if( p->inTrans!=TRANS_WRITE ){
05844     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
05845   }
05846   assert( pBt->pPage1!=0 );
05847   pP1 = pBt->pPage1->aData;
05848   rc = sqlite3pager_write(pP1);
05849   if( rc ) return rc;
05850   put4byte(&pP1[36 + idx*4], iMeta);
05851   return SQLITE_OK;
05852 }
05853 
05854 /*
05855 ** Return the flag byte at the beginning of the page that the cursor
05856 ** is currently pointing to.
05857 */
05858 int sqlite3BtreeFlags(BtCursor *pCur){
05859   /* TODO: What about CURSOR_REQUIRESEEK state? Probably need to call
05860   ** restoreOrClearCursorPosition() here.
05861   */
05862   MemPage *pPage = pCur->pPage;
05863   return pPage ? pPage->aData[pPage->hdrOffset] : 0;
05864 }
05865 
05866 #ifdef SQLITE_DEBUG
05867 /*
05868 ** Print a disassembly of the given page on standard output.  This routine
05869 ** is used for debugging and testing only.
05870 */
05871 static int btreePageDump(BtShared *pBt, int pgno, int recursive, MemPage *pParent){
05872   int rc;
05873   MemPage *pPage;
05874   int i, j, c;
05875   int nFree;
05876   u16 idx;
05877   int hdr;
05878   int nCell;
05879   int isInit;
05880   unsigned char *data;
05881   char range[20];
05882   unsigned char payload[20];
05883 
05884   rc = getPage(pBt, (Pgno)pgno, &pPage);
05885   isInit = pPage->isInit;
05886   if( pPage->isInit==0 ){
05887     initPage(pPage, pParent);
05888   }
05889   if( rc ){
05890     return rc;
05891   }
05892   hdr = pPage->hdrOffset;
05893   data = pPage->aData;
05894   c = data[hdr];
05895   pPage->intKey = (c & (PTF_INTKEY|PTF_LEAFDATA))!=0;
05896   pPage->zeroData = (c & PTF_ZERODATA)!=0;
05897   pPage->leafData = (c & PTF_LEAFDATA)!=0;
05898   pPage->leaf = (c & PTF_LEAF)!=0;
05899   pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
05900   nCell = get2byte(&data[hdr+3]);
05901   sqlite3DebugPrintf("PAGE %d:  flags=0x%02x  frag=%d   parent=%d\n", pgno,
05902     data[hdr], data[hdr+7], 
05903     (pPage->isInit && pPage->pParent) ? pPage->pParent->pgno : 0);
05904   assert( hdr == (pgno==1 ? 100 : 0) );
05905   idx = hdr + 12 - pPage->leaf*4;
05906   for(i=0; i<nCell; i++){
05907     CellInfo info;
05908     Pgno child;
05909     unsigned char *pCell;
05910     int sz;
05911     int addr;
05912 
05913     addr = get2byte(&data[idx + 2*i]);
05914     pCell = &data[addr];
05915     parseCellPtr(pPage, pCell, &info);
05916     sz = info.nSize;
05917     sprintf(range,"%d..%d", addr, addr+sz-1);
05918     if( pPage->leaf ){
05919       child = 0;
05920     }else{
05921       child = get4byte(pCell);
05922     }
05923     sz = info.nData;
05924     if( !pPage->intKey ) sz += info.nKey;
05925     if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1;
05926     memcpy(payload, &pCell[info.nHeader], sz);
05927     for(j=0; j<sz; j++){
05928       if( payload[j]<0x20 || payload[j]>0x7f ) payload[j] = '.';
05929     }
05930     payload[sz] = 0;
05931     sqlite3DebugPrintf(
05932       "cell %2d: i=%-10s chld=%-4d nk=%-4lld nd=%-4d payload=%s\n",
05933       i, range, child, info.nKey, info.nData, payload
05934     );
05935   }
05936   if( !pPage->leaf ){
05937     sqlite3DebugPrintf("right_child: %d\n", get4byte(&data[hdr+8]));
05938   }
05939   nFree = 0;
05940   i = 0;
05941   idx = get2byte(&data[hdr+1]);
05942   while( idx>0 && idx<pPage->pBt->usableSize ){
05943     int sz = get2byte(&data[idx+2]);
05944     sprintf(range,"%d..%d", idx, idx+sz-1);
05945     nFree += sz;
05946     sqlite3DebugPrintf("freeblock %2d: i=%-10s size=%-4d total=%d\n",
05947        i, range, sz, nFree);
05948     idx = get2byte(&data[idx]);
05949     i++;
05950   }
05951   if( idx!=0 ){
05952     sqlite3DebugPrintf("ERROR: next freeblock index out of range: %d\n", idx);
05953   }
05954   if( recursive && !pPage->leaf ){
05955     for(i=0; i<nCell; i++){
05956       unsigned char *pCell = findCell(pPage, i);
05957       btreePageDump(pBt, get4byte(pCell), 1, pPage);
05958       idx = get2byte(pCell);
05959     }
05960     btreePageDump(pBt, get4byte(&data[hdr+8]), 1, pPage);
05961   }
05962   pPage->isInit = isInit;
05963   sqlite3pager_unref(data);
05964   fflush(stdout);
05965   return SQLITE_OK;
05966 }
05967 int sqlite3BtreePageDump(Btree *p, int pgno, int recursive){
05968   return btreePageDump(p->pBt, pgno, recursive, 0);
05969 }
05970 #endif
05971 
05972 #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
05973 /*
05974 ** Fill aResult[] with information about the entry and page that the
05975 ** cursor is pointing to.
05976 ** 
05977 **   aResult[0] =  The page number
05978 **   aResult[1] =  The entry number
05979 **   aResult[2] =  Total number of entries on this page
05980 **   aResult[3] =  Cell size (local payload + header)
05981 **   aResult[4] =  Number of free bytes on this page
05982 **   aResult[5] =  Number of free blocks on the page
05983 **   aResult[6] =  Total payload size (local + overflow)
05984 **   aResult[7] =  Header size in bytes
05985 **   aResult[8] =  Local payload size
05986 **   aResult[9] =  Parent page number
05987 **
05988 ** This routine is used for testing and debugging only.
05989 */
05990 int sqlite3BtreeCursorInfo(BtCursor *pCur, int *aResult, int upCnt){
05991   int cnt, idx;
05992   MemPage *pPage = pCur->pPage;
05993   BtCursor tmpCur;
05994 
05995   int rc = restoreOrClearCursorPosition(pCur, 1);
05996   if( rc!=SQLITE_OK ){
05997     return rc;
05998   }
05999 
06000   pageIntegrity(pPage);
06001   assert( pPage->isInit );
06002   getTempCursor(pCur, &tmpCur);
06003   while( upCnt-- ){
06004     moveToParent(&tmpCur);
06005   }
06006   pPage = tmpCur.pPage;
06007   pageIntegrity(pPage);
06008   aResult[0] = sqlite3pager_pagenumber(pPage->aData);
06009   assert( aResult[0]==pPage->pgno );
06010   aResult[1] = tmpCur.idx;
06011   aResult[2] = pPage->nCell;
06012   if( tmpCur.idx>=0 && tmpCur.idx<pPage->nCell ){
06013     getCellInfo(&tmpCur);
06014     aResult[3] = tmpCur.info.nSize;
06015     aResult[6] = tmpCur.info.nData;
06016     aResult[7] = tmpCur.info.nHeader;
06017     aResult[8] = tmpCur.info.nLocal;
06018   }else{
06019     aResult[3] = 0;
06020     aResult[6] = 0;
06021     aResult[7] = 0;
06022     aResult[8] = 0;
06023   }
06024   aResult[4] = pPage->nFree;
06025   cnt = 0;
06026   idx = get2byte(&pPage->aData[pPage->hdrOffset+1]);
06027   while( idx>0 && idx<pPage->pBt->usableSize ){
06028     cnt++;
06029     idx = get2byte(&pPage->aData[idx]);
06030   }
06031   aResult[5] = cnt;
06032   if( pPage->pParent==0 || isRootPage(pPage) ){
06033     aResult[9] = 0;
06034   }else{
06035     aResult[9] = pPage->pParent->pgno;
06036   }
06037   releaseTempCursor(&tmpCur);
06038   return SQLITE_OK;
06039 }
06040 #endif
06041 
06042 /*
06043 ** Return the pager associated with a BTree.  This routine is used for
06044 ** testing and debugging only.
06045 */
06046 Pager *sqlite3BtreePager(Btree *p){
06047   return p->pBt->pPager;
06048 }
06049 
06050 /*
06051 ** This structure is passed around through all the sanity checking routines
06052 ** in order to keep track of some global state information.
06053 */
06054 typedef struct IntegrityCk IntegrityCk;
06055 struct IntegrityCk {
06056   BtShared *pBt;    /* The tree being checked out */
06057   Pager *pPager; /* The associated pager.  Also accessible by pBt->pPager */
06058   int nPage;     /* Number of pages in the database */
06059   int *anRef;    /* Number of times each page is referenced */
06060   char *zErrMsg; /* An error message.  NULL of no errors seen. */
06061 };
06062 
06063 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
06064 /*
06065 ** Append a message to the error message string.
06066 */
06067 static void checkAppendMsg(
06068   IntegrityCk *pCheck,
06069   char *zMsg1,
06070   const char *zFormat,
06071   ...
06072 ){
06073   va_list ap;
06074   char *zMsg2;
06075   va_start(ap, zFormat);
06076   zMsg2 = sqlite3VMPrintf(zFormat, ap);
06077   va_end(ap);
06078   if( zMsg1==0 ) zMsg1 = "";
06079   if( pCheck->zErrMsg ){
06080     char *zOld = pCheck->zErrMsg;
06081     pCheck->zErrMsg = 0;
06082     sqlite3SetString(&pCheck->zErrMsg, zOld, "\n", zMsg1, zMsg2, (char*)0);
06083     sqliteFree(zOld);
06084   }else{
06085     sqlite3SetString(&pCheck->zErrMsg, zMsg1, zMsg2, (char*)0);
06086   }
06087   sqliteFree(zMsg2);
06088 }
06089 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
06090 
06091 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
06092 /*
06093 ** Add 1 to the reference count for page iPage.  If this is the second
06094 ** reference to the page, add an error message to pCheck->zErrMsg.
06095 ** Return 1 if there are 2 ore more references to the page and 0 if
06096 ** if this is the first reference to the page.
06097 **
06098 ** Also check that the page number is in bounds.
06099 */
06100 static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){
06101   if( iPage==0 ) return 1;
06102   if( iPage>pCheck->nPage || iPage<0 ){
06103     checkAppendMsg(pCheck, zContext, "invalid page number %d", iPage);
06104     return 1;
06105   }
06106   if( pCheck->anRef[iPage]==1 ){
06107     checkAppendMsg(pCheck, zContext, "2nd reference to page %d", iPage);
06108     return 1;
06109   }
06110   return  (pCheck->anRef[iPage]++)>1;
06111 }
06112 
06113 #ifndef SQLITE_OMIT_AUTOVACUUM
06114 /*
06115 ** Check that the entry in the pointer-map for page iChild maps to 
06116 ** page iParent, pointer type ptrType. If not, append an error message
06117 ** to pCheck.
06118 */
06119 static void checkPtrmap(
06120   IntegrityCk *pCheck,   /* Integrity check context */
06121   Pgno iChild,           /* Child page number */
06122   u8 eType,              /* Expected pointer map type */
06123   Pgno iParent,          /* Expected pointer map parent page number */
06124   char *zContext         /* Context description (used for error msg) */
06125 ){
06126   int rc;
06127   u8 ePtrmapType;
06128   Pgno iPtrmapParent;
06129 
06130   rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent);
06131   if( rc!=SQLITE_OK ){
06132     checkAppendMsg(pCheck, zContext, "Failed to read ptrmap key=%d", iChild);
06133     return;
06134   }
06135 
06136   if( ePtrmapType!=eType || iPtrmapParent!=iParent ){
06137     checkAppendMsg(pCheck, zContext, 
06138       "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)", 
06139       iChild, eType, iParent, ePtrmapType, iPtrmapParent);
06140   }
06141 }
06142 #endif
06143 
06144 /*
06145 ** Check the integrity of the freelist or of an overflow page list.
06146 ** Verify that the number of pages on the list is N.
06147 */
06148 static void checkList(
06149   IntegrityCk *pCheck,  /* Integrity checking context */
06150   int isFreeList,       /* True for a freelist.  False for overflow page list */
06151   int iPage,            /* Page number for first page in the list */
06152   int N,                /* Expected number of pages in the list */
06153   char *zContext        /* Context for error messages */
06154 ){
06155   int i;
06156   int expected = N;
06157   int iFirst = iPage;
06158   while( N-- > 0 ){
06159     unsigned char *pOvfl;
06160     if( iPage<1 ){
06161       checkAppendMsg(pCheck, zContext,
06162          "%d of %d pages missing from overflow list starting at %d",
06163           N+1, expected, iFirst);
06164       break;
06165     }
06166     if( checkRef(pCheck, iPage, zContext) ) break;
06167     if( sqlite3pager_get(pCheck->pPager, (Pgno)iPage, (void**)&pOvfl) ){
06168       checkAppendMsg(pCheck, zContext, "failed to get page %d", iPage);
06169       break;
06170     }
06171     if( isFreeList ){
06172       int n = get4byte(&pOvfl[4]);
06173 #ifndef SQLITE_OMIT_AUTOVACUUM
06174       if( pCheck->pBt->autoVacuum ){
06175         checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0, zContext);
06176       }
06177 #endif
06178       if( n>pCheck->pBt->usableSize/4-8 ){
06179         checkAppendMsg(pCheck, zContext,
06180            "freelist leaf count too big on page %d", iPage);
06181         N--;
06182       }else{
06183         for(i=0; i<n; i++){
06184           Pgno iFreePage = get4byte(&pOvfl[8+i*4]);
06185 #ifndef SQLITE_OMIT_AUTOVACUUM
06186           if( pCheck->pBt->autoVacuum ){
06187             checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0, zContext);
06188           }
06189 #endif
06190           checkRef(pCheck, iFreePage, zContext);
06191         }
06192         N -= n;
06193       }
06194     }
06195 #ifndef SQLITE_OMIT_AUTOVACUUM
06196     else{
06197       /* If this database supports auto-vacuum and iPage is not the last
06198       ** page in this overflow list, check that the pointer-map entry for
06199       ** the following page matches iPage.
06200       */
06201       if( pCheck->pBt->autoVacuum && N>0 ){
06202         i = get4byte(pOvfl);
06203         checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage, zContext);
06204       }
06205     }
06206 #endif
06207     iPage = get4byte(pOvfl);
06208     sqlite3pager_unref(pOvfl);
06209   }
06210 }
06211 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
06212 
06213 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
06214 /*
06215 ** Do various sanity checks on a single page of a tree.  Return
06216 ** the tree depth.  Root pages return 0.  Parents of root pages
06217 ** return 1, and so forth.
06218 ** 
06219 ** These checks are done:
06220 **
06221 **      1.  Make sure that cells and freeblocks do not overlap
06222 **          but combine to completely cover the page.
06223 **  NO  2.  Make sure cell keys are in order.
06224 **  NO  3.  Make sure no key is less than or equal to zLowerBound.
06225 **  NO  4.  Make sure no key is greater than or equal to zUpperBound.
06226 **      5.  Check the integrity of overflow pages.
06227 **      6.  Recursively call checkTreePage on all children.
06228 **      7.  Verify that the depth of all children is the same.
06229 **      8.  Make sure this page is at least 33% full or else it is
06230 **          the root of the tree.
06231 */
06232 static int checkTreePage(
06233   IntegrityCk *pCheck,  /* Context for the sanity check */
06234   int iPage,            /* Page number of the page to check */
06235   MemPage *pParent,     /* Parent page */
06236   char *zParentContext  /* Parent context */
06237 ){
06238   MemPage *pPage;
06239   int i, rc, depth, d2, pgno, cnt;
06240   int hdr, cellStart;
06241   int nCell;
06242   u8 *data;
06243   BtShared *pBt;
06244   int usableSize;
06245   char zContext[100];
06246   char *hit;
06247 
06248   sprintf(zContext, "Page %d: ", iPage);
06249 
06250   /* Check that the page exists
06251   */
06252   pBt = pCheck->pBt;
06253   usableSize = pBt->usableSize;
06254   if( iPage==0 ) return 0;
06255   if( checkRef(pCheck, iPage, zParentContext) ) return 0;
06256   if( (rc = getPage(pBt, (Pgno)iPage, &pPage))!=0 ){
06257     checkAppendMsg(pCheck, zContext,
06258        "unable to get the page. error code=%d", rc);
06259     return 0;
06260   }
06261   if( (rc = initPage(pPage, pParent))!=0 ){
06262     checkAppendMsg(pCheck, zContext, "initPage() returns error code %d", rc);
06263     releasePage(pPage);
06264     return 0;
06265   }
06266 
06267   /* Check out all the cells.
06268   */
06269   depth = 0;
06270   for(i=0; i<pPage->nCell; i++){
06271     u8 *pCell;
06272     int sz;
06273     CellInfo info;
06274 
06275     /* Check payload overflow pages
06276     */
06277     sprintf(zContext, "On tree page %d cell %d: ", iPage, i);
06278     pCell = findCell(pPage,i);
06279     parseCellPtr(pPage, pCell, &info);
06280     sz = info.nData;
06281     if( !pPage->intKey ) sz += info.nKey;
06282     if( sz>info.nLocal ){
06283       int nPage = (sz - info.nLocal + usableSize - 5)/(usableSize - 4);
06284       Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
06285 #ifndef SQLITE_OMIT_AUTOVACUUM
06286       if( pBt->autoVacuum ){
06287         checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage, zContext);
06288       }
06289 #endif
06290       checkList(pCheck, 0, pgnoOvfl, nPage, zContext);
06291     }
06292 
06293     /* Check sanity of left child page.
06294     */
06295     if( !pPage->leaf ){
06296       pgno = get4byte(pCell);
06297 #ifndef SQLITE_OMIT_AUTOVACUUM
06298       if( pBt->autoVacuum ){
06299         checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, zContext);
06300       }
06301 #endif
06302       d2 = checkTreePage(pCheck,pgno,pPage,zContext);
06303       if( i>0 && d2!=depth ){
06304         checkAppendMsg(pCheck, zContext, "Child page depth differs");
06305       }
06306       depth = d2;
06307     }
06308   }
06309   if( !pPage->leaf ){
06310     pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
06311     sprintf(zContext, "On page %d at right child: ", iPage);
06312 #ifndef SQLITE_OMIT_AUTOVACUUM
06313     if( pBt->autoVacuum ){
06314       checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, 0);
06315     }
06316 #endif
06317     checkTreePage(pCheck, pgno, pPage, zContext);
06318   }
06319  
06320   /* Check for complete coverage of the page
06321   */
06322   data = pPage->aData;
06323   hdr = pPage->hdrOffset;
06324   hit = sqliteMalloc( usableSize );
06325   if( hit ){
06326     memset(hit, 1, get2byte(&data[hdr+5]));
06327     nCell = get2byte(&data[hdr+3]);
06328     cellStart = hdr + 12 - 4*pPage->leaf;
06329     for(i=0; i<nCell; i++){
06330       int pc = get2byte(&data[cellStart+i*2]);
06331       int size = cellSizePtr(pPage, &data[pc]);
06332       int j;
06333       if( (pc+size-1)>=usableSize || pc<0 ){
06334         checkAppendMsg(pCheck, 0, 
06335             "Corruption detected in cell %d on page %d",i,iPage,0);
06336       }else{
06337         for(j=pc+size-1; j>=pc; j--) hit[j]++;
06338       }
06339     }
06340     for(cnt=0, i=get2byte(&data[hdr+1]); i>0 && i<usableSize && cnt<10000; 
06341            cnt++){
06342       int size = get2byte(&data[i+2]);
06343       int j;
06344       if( (i+size-1)>=usableSize || i<0 ){
06345         checkAppendMsg(pCheck, 0,  
06346             "Corruption detected in cell %d on page %d",i,iPage,0);
06347       }else{
06348         for(j=i+size-1; j>=i; j--) hit[j]++;
06349       }
06350       i = get2byte(&data[i]);
06351     }
06352     for(i=cnt=0; i<usableSize; i++){
06353       if( hit[i]==0 ){
06354         cnt++;
06355       }else if( hit[i]>1 ){
06356         checkAppendMsg(pCheck, 0,
06357           "Multiple uses for byte %d of page %d", i, iPage);
06358         break;
06359       }
06360     }
06361     if( cnt!=data[hdr+7] ){
06362       checkAppendMsg(pCheck, 0, 
06363           "Fragmented space is %d byte reported as %d on page %d",
06364           cnt, data[hdr+7], iPage);
06365     }
06366   }
06367   sqliteFree(hit);
06368 
06369   releasePage(pPage);
06370   return depth+1;
06371 }
06372 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
06373 
06374 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
06375 /*
06376 ** This routine does a complete check of the given BTree file.  aRoot[] is
06377 ** an array of pages numbers were each page number is the root page of
06378 ** a table.  nRoot is the number of entries in aRoot.
06379 **
06380 ** If everything checks out, this routine returns NULL.  If something is
06381 ** amiss, an error message is written into memory obtained from malloc()
06382 ** and a pointer to that error message is returned.  The calling function
06383 ** is responsible for freeing the error message when it is done.
06384 */
06385 char *sqlite3BtreeIntegrityCheck(Btree *p, int *aRoot, int nRoot){
06386   int i;
06387   int nRef;
06388   IntegrityCk sCheck;
06389   BtShared *pBt = p->pBt;
06390 
06391   nRef = *sqlite3pager_stats(pBt->pPager);
06392   if( lockBtreeWithRetry(p)!=SQLITE_OK ){
06393     return sqliteStrDup("Unable to acquire a read lock on the database");
06394   }
06395   sCheck.pBt = pBt;
06396   sCheck.pPager = pBt->pPager;
06397   sCheck.nPage = sqlite3pager_pagecount(sCheck.pPager);
06398   if( sCheck.nPage==0 ){
06399     unlockBtreeIfUnused(pBt);
06400     return 0;
06401   }
06402   sCheck.anRef = sqliteMallocRaw( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) );
06403   if( !sCheck.anRef ){
06404     unlockBtreeIfUnused(pBt);
06405     return sqlite3MPrintf("Unable to malloc %d bytes", 
06406         (sCheck.nPage+1)*sizeof(sCheck.anRef[0]));
06407   }
06408   for(i=0; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; }
06409   i = PENDING_BYTE_PAGE(pBt);
06410   if( i<=sCheck.nPage ){
06411     sCheck.anRef[i] = 1;
06412   }
06413   sCheck.zErrMsg = 0;
06414 
06415   /* Check the integrity of the freelist
06416   */
06417   checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]),
06418             get4byte(&pBt->pPage1->aData[36]), "Main freelist: ");
06419 
06420   /* Check all the tables.
06421   */
06422   for(i=0; i<nRoot; i++){
06423     if( aRoot[i]==0 ) continue;
06424 #ifndef SQLITE_OMIT_AUTOVACUUM
06425     if( pBt->autoVacuum && aRoot[i]>1 ){
06426       checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0, 0);
06427     }
06428 #endif
06429     checkTreePage(&sCheck, aRoot[i], 0, "List of tree roots: ");
06430   }
06431 
06432   /* Make sure every page in the file is referenced
06433   */
06434   for(i=1; i<=sCheck.nPage; i++){
06435 #ifdef SQLITE_OMIT_AUTOVACUUM
06436     if( sCheck.anRef[i]==0 ){
06437       checkAppendMsg(&sCheck, 0, "Page %d is never used", i);
06438     }
06439 #else
06440     /* If the database supports auto-vacuum, make sure no tables contain
06441     ** references to pointer-map pages.
06442     */
06443     if( sCheck.anRef[i]==0 && 
06444        (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){
06445       checkAppendMsg(&sCheck, 0, "Page %d is never used", i);
06446     }
06447     if( sCheck.anRef[i]!=0 && 
06448        (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){
06449       checkAppendMsg(&sCheck, 0, "Pointer map page %d is referenced", i);
06450     }
06451 #endif
06452   }
06453 
06454   /* Make sure this analysis did not leave any unref() pages
06455   */
06456   unlockBtreeIfUnused(pBt);
06457   if( nRef != *sqlite3pager_stats(pBt->pPager) ){
06458     checkAppendMsg(&sCheck, 0, 
06459       "Outstanding page count goes from %d to %d during this analysis",
06460       nRef, *sqlite3pager_stats(pBt->pPager)
06461     );
06462   }
06463 
06464   /* Clean  up and report errors.
06465   */
06466   sqliteFree(sCheck.anRef);
06467   return sCheck.zErrMsg;
06468 }
06469 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
06470 
06471 /*
06472 ** Return the full pathname of the underlying database file.
06473 */
06474 const char *sqlite3BtreeGetFilename(Btree *p){
06475   assert( p->pBt->pPager!=0 );
06476   return sqlite3pager_filename(p->pBt->pPager);
06477 }
06478 
06479 /*
06480 ** Return the pathname of the directory that contains the database file.
06481 */
06482 const char *sqlite3BtreeGetDirname(Btree *p){
06483   assert( p->pBt->pPager!=0 );
06484   return sqlite3pager_dirname(p->pBt->pPager);
06485 }
06486 
06487 /*
06488 ** Return the pathname of the journal file for this database. The return
06489 ** value of this routine is the same regardless of whether the journal file
06490 ** has been created or not.
06491 */
06492 const char *sqlite3BtreeGetJournalname(Btree *p){
06493   assert( p->pBt->pPager!=0 );
06494   return sqlite3pager_journalname(p->pBt->pPager);
06495 }
06496 
06497 #ifndef SQLITE_OMIT_VACUUM
06498 /*
06499 ** Copy the complete content of pBtFrom into pBtTo.  A transaction
06500 ** must be active for both files.
06501 **
06502 ** The size of file pBtFrom may be reduced by this operation.
06503 ** If anything goes wrong, the transaction on pBtFrom is rolled back.
06504 */
06505 int sqlite3BtreeCopyFile(Btree *pTo, Btree *pFrom){
06506   int rc = SQLITE_OK;
06507   Pgno i, nPage, nToPage, iSkip;
06508 
06509   BtShared *pBtTo = pTo->pBt;
06510   BtShared *pBtFrom = pFrom->pBt;
06511 
06512   if( pTo->inTrans!=TRANS_WRITE || pFrom->inTrans!=TRANS_WRITE ){
06513     return SQLITE_ERROR;
06514   }
06515   if( pBtTo->pCursor ) return SQLITE_BUSY;
06516   nToPage = sqlite3pager_pagecount(pBtTo->pPager);
06517   nPage = sqlite3pager_pagecount(pBtFrom->pPager);
06518   iSkip = PENDING_BYTE_PAGE(pBtTo);
06519   for(i=1; rc==SQLITE_OK && i<=nPage; i++){
06520     void *pPage;
06521     if( i==iSkip ) continue;
06522     rc = sqlite3pager_get(pBtFrom->pPager, i, &pPage);
06523     if( rc ) break;
06524     rc = sqlite3pager_overwrite(pBtTo->pPager, i, pPage);
06525     if( rc ) break;
06526     sqlite3pager_unref(pPage);
06527   }
06528   for(i=nPage+1; rc==SQLITE_OK && i<=nToPage; i++){
06529     void *pPage;
06530     if( i==iSkip ) continue;
06531     rc = sqlite3pager_get(pBtTo->pPager, i, &pPage);
06532     if( rc ) break;
06533     rc = sqlite3pager_write(pPage);
06534     sqlite3pager_unref(pPage);
06535     sqlite3pager_dont_write(pBtTo->pPager, i);
06536   }
06537   if( !rc && nPage<nToPage ){
06538     rc = sqlite3pager_truncate(pBtTo->pPager, nPage);
06539   }
06540   if( rc ){
06541     sqlite3BtreeRollback(pTo);
06542   }
06543   return rc;  
06544 }
06545 #endif /* SQLITE_OMIT_VACUUM */
06546 
06547 /*
06548 ** Return non-zero if a transaction is active.
06549 */
06550 int sqlite3BtreeIsInTrans(Btree *p){
06551   return (p && (p->inTrans==TRANS_WRITE));
06552 }
06553 
06554 /*
06555 ** Return non-zero if a statement transaction is active.
06556 */
06557 int sqlite3BtreeIsInStmt(Btree *p){
06558   return (p->pBt && p->pBt->inStmt);
06559 }
06560 
06561 /*
06562 ** This call is a no-op if no write-transaction is currently active on pBt.
06563 **
06564 ** Otherwise, sync the database file for the btree pBt. zMaster points to
06565 ** the name of a master journal file that should be written into the
06566 ** individual journal file, or is NULL, indicating no master journal file 
06567 ** (single database transaction).
06568 **
06569 ** When this is called, the master journal should already have been
06570 ** created, populated with this journal pointer and synced to disk.
06571 **
06572 ** Once this is routine has returned, the only thing required to commit
06573 ** the write-transaction for this database file is to delete the journal.
06574 */
06575 int sqlite3BtreeSync(Btree *p, const char *zMaster){
06576   int rc = SQLITE_OK;
06577   if( p->inTrans==TRANS_WRITE ){
06578     BtShared *pBt = p->pBt;
06579     Pgno nTrunc = 0;
06580 #ifndef SQLITE_OMIT_AUTOVACUUM
06581     if( pBt->autoVacuum ){
06582       rc = autoVacuumCommit(pBt, &nTrunc); 
06583       if( rc!=SQLITE_OK ){
06584         return rc;
06585       }
06586     }
06587 #endif
06588     rc = sqlite3pager_sync(pBt->pPager, zMaster, nTrunc);
06589   }
06590   return rc;
06591 }
06592 
06593 /*
06594 ** This function returns a pointer to a blob of memory associated with
06595 ** a single shared-btree. The memory is used by client code for it's own
06596 ** purposes (for example, to store a high-level schema associated with 
06597 ** the shared-btree). The btree layer manages reference counting issues.
06598 **
06599 ** The first time this is called on a shared-btree, nBytes bytes of memory
06600 ** are allocated, zeroed, and returned to the caller. For each subsequent 
06601 ** call the nBytes parameter is ignored and a pointer to the same blob
06602 ** of memory returned. 
06603 **
06604 ** Just before the shared-btree is closed, the function passed as the 
06605 ** xFree argument when the memory allocation was made is invoked on the 
06606 ** blob of allocated memory. This function should not call sqliteFree()
06607 ** on the memory, the btree layer does that.
06608 */
06609 void *sqlite3BtreeSchema(Btree *p, int nBytes, void(*xFree)(void *)){
06610   BtShared *pBt = p->pBt;
06611   if( !pBt->pSchema ){
06612     pBt->pSchema = sqliteMalloc(nBytes);
06613     pBt->xFreeSchema = xFree;
06614   }
06615   return pBt->pSchema;
06616 }
06617 
06618 /*
06619 ** Return true if another user of the same shared btree as the argument
06620 ** handle holds an exclusive lock on the sqlite_master table.
06621 */
06622 int sqlite3BtreeSchemaLocked(Btree *p){
06623   return (queryTableLock(p, MASTER_ROOT, READ_LOCK)!=SQLITE_OK);
06624 }
06625 
06626 
06627 #ifndef SQLITE_OMIT_SHARED_CACHE
06628 /*
06629 ** Obtain a lock on the table whose root page is iTab.  The
06630 ** lock is a write lock if isWritelock is true or a read lock
06631 ** if it is false.
06632 */
06633 int sqlite3BtreeLockTable(Btree *p, int iTab, u8 isWriteLock){
06634   int rc = SQLITE_OK;
06635   u8 lockType = (isWriteLock?WRITE_LOCK:READ_LOCK);
06636   rc = queryTableLock(p, iTab, lockType);
06637   if( rc==SQLITE_OK ){
06638     rc = lockTable(p, iTab, lockType);
06639   }
06640   return rc;
06641 }
06642 #endif
06643 
06644 /*
06645 ** The following debugging interface has to be in this file (rather
06646 ** than in, for example, test1.c) so that it can get access to
06647 ** the definition of BtShared.
06648 */
06649 #if defined(SQLITE_DEBUG) && defined(TCLSH)
06650 #include <tcl.h>
06651 int sqlite3_shared_cache_report(
06652   void * clientData,
06653   Tcl_Interp *interp,
06654   int objc,
06655   Tcl_Obj *CONST objv[]
06656 ){
06657 #ifndef SQLITE_OMIT_SHARED_CACHE
06658   const ThreadData *pTd = sqlite3ThreadDataReadOnly();
06659   if( pTd->useSharedData ){
06660     BtShared *pBt;
06661     Tcl_Obj *pRet = Tcl_NewObj();
06662     for(pBt=pTd->pBtree; pBt; pBt=pBt->pNext){
06663       const char *zFile = sqlite3pager_filename(pBt->pPager);
06664       Tcl_ListObjAppendElement(interp, pRet, Tcl_NewStringObj(zFile, -1));
06665       Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(pBt->nRef));
06666     }
06667     Tcl_SetObjResult(interp, pRet);
06668   }
06669 #endif
06670   return TCL_OK;
06671 }
06672 #endif