--- libdb/fileops/fop_util.c.orig	Thu Nov 20 23:13:30 2003
+++ libdb/fileops/fop_util.c	Fri Mar 18 20:31:10 2005
@@ -40,7 +40,7 @@
 	u_int32_t __lockval;						\
 									\
 	if (LOCKING_ON((ENV))) {					\
-		__lockval = 0;						\
+		__lockval = 1;						\
 		__dbt.data = &__lockval;				\
 		__dbt.size = sizeof(__lockval);				\
 		if ((ret = (ENV)->lock_get((ENV), (ID),			\
--- libdb/dbinc/mp.h.orig	Thu Nov 20 23:13:17 2003
+++ libdb/dbinc/mp.h	Fri Mar 18 20:31:14 2005
@@ -149,6 +149,13 @@
 	 * region lock).
 	 */
 	DB_MPOOL_STAT stat;		/* Per-cache mpool statistics. */
+ 
+	 /*
+	  * We track page puts so that we can decide when allocation is never
+	  * going to succeed.  We don't lock the field, all we care about is
+	  * if it changes.
+	  */
+	 u_int32_t  put_counter;                /* Count of page put calls. */
 };
 
 struct __db_mpool_hash {
--- libdb/mp/mp_fput.c.orig	Thu Nov 20 23:13:36 2003
+++ libdb/mp/mp_fput.c	Fri Mar 18 20:31:14 2005
@@ -19,6 +19,8 @@
 #include "dbinc/db_shash.h"
 #include "dbinc/mp.h"
 
+static void __memp_reset_lru __P((DB_ENV *, REGINFO *));
+
 /*
  * __memp_fput --
  *	Mpool file put function.
@@ -198,5 +200,56 @@
 
 	MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
 
+	/*
+	 * On every buffer put we update the buffer generation number and check
+	 * for wraparound.
+	 */
+	if (++c_mp->lru_count == UINT32_T_MAX)
+		__memp_reset_lru(dbenv, dbmp->reginfo);
+
 	return (0);
+}
+
+/*
+ * __memp_reset_lru --
+ *	Reset the cache LRU counter.
+ */
+static void
+__memp_reset_lru(dbenv, memreg)
+	DB_ENV *dbenv;
+	REGINFO *memreg;
+{
+	BH *bhp;
+	DB_MPOOL_HASH *hp;
+	MPOOL *c_mp;
+	int bucket;
+
+	c_mp = memreg->primary;
+
+	/*
+	 * Update the counter so all future allocations will start at the
+	 * bottom.
+	 */
+	c_mp->lru_count -= MPOOL_BASE_DECREMENT;
+
+	/* Adjust the priority of every buffer in the system. */
+	for (hp = R_ADDR(memreg, c_mp->htab),
+	    bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
+		/*
+		 * Skip empty buckets.
+		 *
+		 * We can check for empty buckets before locking as we
+		 * only care if the pointer is zero or non-zero.
+		 */
+		if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
+			continue;
+
+		MUTEX_LOCK(dbenv, &hp->hash_mutex);
+		for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
+		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
+			if (bhp->priority != UINT32_T_MAX &&
+			    bhp->priority > MPOOL_BASE_DECREMENT)
+				bhp->priority -= MPOOL_BASE_DECREMENT;
+		MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
+	}
 }
--- libdb/mp/mp_alloc.c.orig	Thu Nov 20 23:13:36 2003
+++ libdb/mp/mp_alloc.c	Fri Mar 18 20:31:14 2005
@@ -25,7 +25,6 @@
 } HS;
 
 static void __memp_bad_buffer __P((DB_MPOOL_HASH *));
-static void __memp_reset_lru __P((DB_ENV *, REGINFO *, MPOOL *));
 
 /*
  * __memp_alloc --
@@ -50,8 +49,9 @@
 	MPOOL *c_mp;
 	MPOOLFILE *bh_mfp;
 	size_t freed_space;
-	u_int32_t buckets, buffers, high_priority, max_na, priority;
-	int aggressive, ret;
+	u_int32_t buckets, buffers, high_priority, priority, put_counter;
+	u_int32_t total_buckets;
+	int aggressive, giveup, ret;
 	void *p;
 
 	dbenv = dbmp->dbenv;
@@ -59,18 +59,13 @@
 	dbht = R_ADDR(memreg, c_mp->htab);
 	hp_end = &dbht[c_mp->htab_buckets];
 
-	buckets = buffers = 0;
-	aggressive = 0;
+	buckets = buffers = put_counter = total_buckets = 0;
+	aggressive = giveup = 0;
+	hp_tmp = NULL;
 
 	c_mp->stat.st_alloc++;
 
 	/*
-	 * Get aggressive if we've tried to flush the number of pages as are
-	 * in the system without finding space.
-	 */
-	max_na = 5 * c_mp->htab_buckets;
-
-	/*
 	 * If we're allocating a buffer, and the one we're discarding is the
 	 * same size, we don't want to waste the time to re-integrate it into
 	 * the shared memory free list.  If the DB_MPOOLFILE argument isn't
@@ -81,19 +76,10 @@
 		len = (sizeof(BH) - sizeof(u_int8_t)) + mfp->stat.st_pagesize;
 
 	R_LOCK(dbenv, memreg);
-
-	/*
-	 * On every buffer allocation we update the buffer generation number
-	 * and check for wraparound.
-	 */
-	if (++c_mp->lru_count == UINT32_T_MAX)
-		__memp_reset_lru(dbenv, memreg, c_mp);
-
 	/*
 	 * Anything newer than 1/10th of the buffer pool is ignored during
 	 * allocation (unless allocation starts failing).
 	 */
-	DB_ASSERT(c_mp->lru_count > c_mp->stat.st_pages / 10);
 	high_priority = c_mp->lru_count - c_mp->stat.st_pages / 10;
 
 	/*
@@ -120,10 +106,11 @@
 		 * We're not holding the region locked here, these statistics
 		 * can't be trusted.
 		 */
-		if (buckets != 0) {
-			if (buckets > c_mp->stat.st_alloc_max_buckets)
-				c_mp->stat.st_alloc_max_buckets = buckets;
-			c_mp->stat.st_alloc_buckets += buckets;
+		total_buckets += buckets;
+		if (total_buckets != 0) {
+			if (total_buckets > c_mp->stat.st_alloc_max_buckets)
+				c_mp->stat.st_alloc_max_buckets = total_buckets;
+			c_mp->stat.st_alloc_buckets += total_buckets;
 		}
 		if (buffers != 0) {
 			if (buffers > c_mp->stat.st_alloc_max_pages)
@@ -131,6 +118,12 @@
 			c_mp->stat.st_alloc_pages += buffers;
 		}
 		return (0);
+	} else if (giveup || c_mp->stat.st_pages == 0) {
+		R_UNLOCK(dbenv, memreg);
+
+		__db_err(dbenv,
+		    "unable to allocate space from the buffer cache");
+		return (ret);
 	}
 
 	/*
@@ -138,26 +131,24 @@
 	 * we need.  Reset our free-space counter.
 	 */
 	freed_space = 0;
+	total_buckets += buckets;
+	buckets = 0;
 
 	/*
 	 * Walk the hash buckets and find the next two with potentially useful
 	 * buffers.  Free the buffer with the lowest priority from the buckets'
 	 * chains.
 	 */
-	for (hp_tmp = NULL;;) {
+	for (;;) {
+		/* All pages have been freed, make one last try */
+		if (c_mp->stat.st_pages == 0)
+			goto alloc;
+
 		/* Check for wrap around. */
 		hp = &dbht[c_mp->last_checked++];
 		if (hp >= hp_end) {
 			c_mp->last_checked = 0;
-
-			/*
-			 * If we've gone through all of the hash buckets, try
-			 * an allocation.  If the cache is small, the old page
-			 * size is small, and the new page size is large, we
-			 * might have freed enough memory (but not 3 times the
-			 * memory).
-			 */
-			goto alloc;
+			hp = &dbht[c_mp->last_checked++];
 		}
 
 		/*
@@ -172,39 +163,59 @@
 		/*
 		 * The failure mode is when there are too many buffers we can't
 		 * write or there's not enough memory in the system.  We don't
-		 * have a metric for deciding if allocation has no possible way
-		 * to succeed, so we don't ever fail, we assume memory will be
-		 * available if we wait long enough.
+		 * have a way to know that allocation has no way to succeed.
+		 * We fail if there were no pages returned to the cache after
+		 * we've been trying for a relatively long time.
 		 *
-		 * Get aggressive if we've tried to flush 5 times the number of
-		 * hash buckets as are in the system -- it's possible we have
-		 * been repeatedly trying to flush the same buffers, although
-		 * it's unlikely.  Aggressive means:
+		 * Get aggressive if we've tried to flush the number of hash
+		 * buckets as are in the system and have not found any more
+		 * space.  Aggressive means:
 		 *
 		 * a: set a flag to attempt to flush high priority buffers as
 		 *    well as other buffers.
 		 * b: sync the mpool to force out queue extent pages.  While we
 		 *    might not have enough space for what we want and flushing
 		 *    is expensive, why not?
-		 * c: sleep for a second -- hopefully someone else will run and
-		 *    free up some memory.  Try to allocate memory too, in case
-		 *    the other thread returns its memory to the region.
-		 * d: look at a buffer in every hash bucket rather than choose
+		 * c: look at a buffer in every hash bucket rather than choose
 		 *    the more preferable of two.
+		 * d: start to think about giving up.
+		 *
+		 * If we get here twice, sleep for a second, hopefully someone
+		 * else will run and free up some memory.
+		 *
+		 * Always try to allocate memory too, in case some other thread
+		 * returns its memory to the region.
 		 *
 		 * !!!
 		 * This test ignores pathological cases like no buffers in the
 		 * system -- that shouldn't be possible.
 		 */
-		if ((++buckets % max_na) == 0) {
-			aggressive = 1;
-
+		if ((++buckets % c_mp->htab_buckets) == 0) {
+			if (freed_space > 0)
+				goto alloc;
 			R_UNLOCK(dbenv, memreg);
 
-			(void)__memp_sync_int(
-			    dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
-
-			(void)__os_sleep(dbenv, 1, 0);
+			switch (++aggressive) {
+			case 1:
+				break;
+			case 2:
+				put_counter = c_mp->put_counter;
+				/* FALLTHROUGH */
+			case 3:
+			case 4:
+			case 5:
+			case 6:
+				(void)__memp_sync_int(
+				    dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
+
+				(void)__os_sleep(dbenv, 1, 0);
+				break;
+			default:
+				aggressive = 1;
+				if (put_counter == c_mp->put_counter)
+					giveup = 1;
+				break;
+			}
 
 			R_LOCK(dbenv, memreg);
 			goto alloc;
@@ -277,7 +288,8 @@
 		 * thread may have acquired this buffer and incremented the ref
 		 * count after we wrote it, in which case we can't have it.
 		 *
-		 * If there's a write error, avoid selecting this buffer again
+		 * If there's a write error and we're having problems finding
+		 * something to allocate, avoid selecting this buffer again
 		 * by making it the bucket's least-desirable buffer.
 		 */
 		if (ret != 0 || bhp->ref != 0) {
@@ -301,6 +313,8 @@
 
 		freed_space += __db_shsizeof(bhp);
 		__memp_bhfree(dbmp, hp, bhp, 1);
+		if (aggressive > 1)
+			aggressive = 1;
 
 		/*
 		 * Unlock this hash bucket and re-acquire the region lock. If
@@ -360,54 +374,6 @@
 
 	/* Reset the hash bucket's priority. */
 	hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
-}
-
-/*
- * __memp_reset_lru --
- *	Reset the cache LRU counter.
- */
-static void
-__memp_reset_lru(dbenv, memreg, c_mp)
-	DB_ENV *dbenv;
-	REGINFO *memreg;
-	MPOOL *c_mp;
-{
-	BH *bhp;
-	DB_MPOOL_HASH *hp;
-	int bucket;
-
-	/*
-	 * Update the counter so all future allocations will start at the
-	 * bottom.
-	 */
-	c_mp->lru_count -= MPOOL_BASE_DECREMENT;
-
-	/* Release the region lock. */
-	R_UNLOCK(dbenv, memreg);
-
-	/* Adjust the priority of every buffer in the system. */
-	for (hp = R_ADDR(memreg, c_mp->htab),
-	    bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
-		/*
-		 * Skip empty buckets.
-		 *
-		 * We can check for empty buckets before locking as we
-		 * only care if the pointer is zero or non-zero.
-		 */
-		if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
-			continue;
-
-		MUTEX_LOCK(dbenv, &hp->hash_mutex);
-		for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
-		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
-			if (bhp->priority != UINT32_T_MAX &&
-			    bhp->priority > MPOOL_BASE_DECREMENT)
-				bhp->priority -= MPOOL_BASE_DECREMENT;
-		MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
-	}
-
-	/* Reacquire the region lock. */
-	R_LOCK(dbenv, memreg);
 }
 
 #ifdef DIAGNOSTIC
--- libdb/dbreg/dbreg_rec.c.orig	Thu Nov 20 23:13:19 2003
+++ libdb/dbreg/dbreg_rec.c	Fri Mar 18 20:31:14 2005
@@ -174,19 +174,20 @@
 			 * Typically, closes should match an open which means
 			 * that if this is a close, there should be a valid
 			 * entry in the dbentry table when we get here,
-			 * however there is an exception.  If this is an
+			 * however there are exceptions.  1. If this is an
 			 * OPENFILES pass, then we may have started from
 			 * a log file other than the first, and the
 			 * corresponding open appears in an earlier file.
-			 * We can ignore that case, but all others are errors.
+			 * 2. If we are undoing an open on an abort or
+			 * recovery, it's possible that we failed after
+			 * the log record, but before we actually entered
+			 * a handle here.
 			 */
 			dbe = &dblp->dbentry[argp->fileid];
 			if (dbe->dbp == NULL && !dbe->deleted) {
 				/* No valid entry here. */
-				if ((argp->opcode != LOG_CLOSE &&
-				    argp->opcode != LOG_RCLOSE) ||
-				    (op != DB_TXN_OPENFILES &&
-				    op !=DB_TXN_POPENFILES)) {
+				if (DB_REDO(op) ||
+				    argp->opcode == LOG_CHECKPOINT) {
 					__db_err(dbenv,
 					    "Improper file close at %lu/%lu",
 					    (u_long)lsnp->file,
--- libdb/env/env_recover.c.orig	Thu Nov 20 23:13:20 2003
+++ libdb/env/env_recover.c	Fri Mar 18 20:31:14 2005
@@ -232,12 +232,9 @@
 	 * we'll still need to do a vtruncate based on information we haven't
 	 * yet collected.
 	 */
-	if (ret == DB_NOTFOUND) {
+	if (ret == DB_NOTFOUND) 
 		ret = 0;
-		if (max_lsn == NULL)
-			goto done;
-	}
-	if (ret != 0)
+	else if (ret != 0)
 		goto err;
 
 	hi_txn = txnid;
@@ -331,7 +328,7 @@
 
 	/* Find a low txnid. */
 	ret = 0;
-	do {
+	if (hi_txn != 0) do {
 		/* txnid is after rectype, which is a u_int32. */
 		memcpy(&txnid,
 		    (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
@@ -344,11 +341,8 @@
 	 * There are no transactions and we're not recovering to an LSN (see
 	 * above), so there is nothing to do.
 	 */
-	if (ret == DB_NOTFOUND) {
+	if (ret == DB_NOTFOUND) 
 		ret = 0;
-		if (max_lsn == NULL)
-			goto done;
-	}
 
 	/* Reset to the first lsn. */
 	if (ret != 0 || (ret = logc->get(logc, &first_lsn, &data, DB_SET)) != 0)
@@ -367,6 +361,10 @@
 	    txninfo, &data, &first_lsn, &last_lsn, nfiles, 1)) != 0)
 		goto err;
 
+	/* If there were no transactions, then we can bail out early. */
+	if (hi_txn == 0 && max_lsn == NULL)
+		goto done;
+		
 	/*
 	 * Pass #2.
 	 *
@@ -483,6 +481,7 @@
 	if ((ret = __dbreg_close_files(dbenv)) != 0)
 		goto err;
 
+done:
 	if (max_lsn != NULL) {
 		region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn;
 
@@ -538,7 +537,8 @@
 		__db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
 		__db_err(dbenv, "%s %lx %s [%lu][%lu]",
 		    "Maximum transaction ID",
-		    ((DB_TXNHEAD *)txninfo)->maxid,
+		    txninfo == NULL ? TXN_MINIMUM :
+			((DB_TXNHEAD *)txninfo)->maxid,
 		    "Recovery checkpoint",
 		    (u_long)region->last_ckp.file,
 		    (u_long)region->last_ckp.offset);
@@ -550,7 +550,6 @@
 		    (u_long)lsn.file, (u_long)lsn.offset, pass);
 	}
 
-done:
 err:	if (lockid != DB_LOCK_INVALIDID) {
 		if ((t_ret = __rep_unlockpages(dbenv, lockid)) != 0 && ret == 0)
 			ret = t_ret;