/*
 *---------------------------------------------------------------------
 *
 * antifreeze.c
 *		Utility to scan a table and freeze tuples
 *
 * Copyright (C)  2016 2ndQuadrant Ltd, All rights reserved
 * Licensed only for use by 2ndQuadrant customers
 *---------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/heapam.h"
#include "access/multixact.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "access/xlog_internal.h"
#include "catalog/pg_control.h"
#include "commands/vacuum.h"
#include "fmgr.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
#include "utils/syscache.h"
#include "utils/tqual.h"

#if PG_VERSION_NUM >= 90300
#include "access/heapam_xlog.h"
#include "access/htup_details.h"
#endif


typedef struct AntifreezeCounters
{
	uint64		pgs_needing_freeze;
	uint64		multixacts_removed;
	uint64		dead_tuples;
	uint64		pages_pruned;
	uint64		tuples_frozen;
	uint64		pages_frozen;
	uint64		full_page_images;
	uint64		needs_because_multi;
	uint64		needs_because_other;
} AntifreezeCounters;


PG_FUNCTION_INFO_V1(pg_antifreeze);
PG_FUNCTION_INFO_V1(pg_update_datfrozenxid);
Datum		pg_antifreeze(PG_FUNCTION_ARGS);
Datum		pg_update_datfrozenxid(PG_FUNCTION_ARGS);

void		_PG_init(void);

static void update_relstats(Relation relation, TransactionId frozenxid,
				MultiXactId minmulti);
static bool ext_pg_needs_freeze(Buffer buf, TransactionId freeze_limit,
					MultiXactId multi_cutoff, AntifreezeCounters *counters);
static void report_relation_xid_limits(Relation relation);
static void ext_MultiXactShmemInit(void);

#if PG_VERSION_NUM >= 90300
static bool multixact_is_within_range(MultiXactId multi,
						  MultiXactId oldestMulti,
						  MultiXactId nextMulti);
#endif
static void report_counters(AntifreezeCounters *counters, BlockNumber blkno);
static void emit_fpi_wal_record(Buffer buf, AntifreezeCounters *counters);

PG_MODULE_MAGIC;

#if PG_VERSION_NUM >= 90300
typedef struct MultiXactStateData
{
	/* next-to-be-assigned MultiXactId */
	MultiXactId nextMXact;

	/* next-to-be-assigned offset */
	MultiXactOffset nextOffset;

	/* Have we completed multixact startup? */
	bool		finishedStartup;

	/*
	 * Oldest multixact that is still potentially referenced by a relation.
	 * Anything older than this should not be consulted.  These values are
	 * updated by vacuum.
	 */
	MultiXactId oldestMultiXactId;
	Oid			oldestMultiXactDB;

	/*
	 * Oldest multixact offset that is potentially referenced by a
	 * multixact referenced by a relation.  We don't always know this value,
	 * so there's a flag here to indicate whether or not we currently do.
	 */
	MultiXactOffset oldestOffset;
	bool		oldestOffsetKnown;

	/* This field is not present in 9.5+ */
#if !(PG_VERSION_NUM >= 90500)
	/*
	 * This is what the previous checkpoint stored as the truncate position.
	 * This value is the oldestMultiXactId that was valid when a checkpoint
	 * was last executed.
	 */
	MultiXactId lastCheckpointedOldest;
#endif

	/* support for anti-wraparound measures */
	MultiXactId multiVacLimit;
	MultiXactId multiWarnLimit;
	MultiXactId multiStopLimit;
	MultiXactId multiWrapLimit;

	/* support for members anti-wraparound measures */
	MultiXactOffset offsetStopLimit;

	/* This field is not present in 9.5+ */
#if !(PG_VERSION_NUM >= 90500)
	bool offsetStopLimitKnown;
#endif

	MultiXactId perBackendXactIds[1];	/* VARIABLE LENGTH ARRAY */
} MultiXactStateData;

/*
 * Last element of OldestMemberMXactID and OldestVisibleMXactId arrays.
 * Valid elements are (1..MaxOldestSlot); element 0 is never used.
 */
#define MaxOldestSlot	(MaxBackends + max_prepared_xacts)

/* Pointers to the state data in shared memory */
static MultiXactStateData *MultiXactState;
#endif /* VERSION_NUM >= 9.3 */


/* custom GUC vars */
static int af_freeze_xid_trigger_age;

enum FreezeStage
{
	FREEZE_START,
	FREEZE_PAGE,
	FREEZE_PAGE_NEEDED,
	FREEZE_PAGE_CHECKING,
	FREEZE_UPDATE_RELSTATS,
	FREEZE_VACUUM_FSM
};

struct FreezeStatus
{
	enum FreezeStage stage;
	BlockNumber cur_block;
	OffsetNumber cur_offset;
};

void
_PG_init(void)
{
	DefineCustomIntVariable("antifreeze.freeze_xid_trigger_age",
							"Maximum tuple age to cause removal of Xids from a page.",
							"Maximum age for any tuple in a page before tuples in that page are pruned or frozen.\n"
							"Live tuples in such pages are marked as frozen.\n"
							"Dead tuples no longer visible to any running transaction are pruned.",
							&af_freeze_xid_trigger_age,
							10000000,
							0,
							2000000000,
							PGC_USERSET,
							0,
							NULL,
							NULL,
							NULL);

	ext_MultiXactShmemInit();
}

static void
freezeerrcontext_callback(void *arg)
{
	struct FreezeStatus *status = (struct FreezeStatus *) arg;

	switch (status->stage)
	{
		case FREEZE_START:
			errcontext("pg_antifreeze start");
			break;

		case FREEZE_PAGE:
			errcontext("pg_antifreeze, freezing page %u", status->cur_block);
			break;

		case FREEZE_PAGE_NEEDED:
			errcontext("pg_antifreeze, freezing page %u, offset %u", status->cur_block, status->cur_offset);
			break;

		case FREEZE_PAGE_CHECKING:
			errcontext("pg_antifreeze, checking page %u, offset %u",
					   status->cur_block, status->cur_offset);
			break;

		case FREEZE_UPDATE_RELSTATS:
			errcontext("pg_antifreeze update relstats");
			break;

		case FREEZE_VACUUM_FSM:
			errcontext("pg_antifreeze vacuum fsm");
			break;
	}
}

Datum
pg_antifreeze(PG_FUNCTION_ARGS)
{
	Oid			relid = PG_GETARG_OID(0);
	Relation	relation;
	BufferAccessStrategy strategy;
	BlockNumber blkno;
	BlockNumber lastblk;
	TransactionId frozenxid;
	TransactionId very_old_xid;
	MultiXactId cutoff_multi;
	bool		sharedRel;
#if PG_VERSION_NUM >= 90300
	xl_heap_freeze_tuple *frozen;
	MultiXactId	oldestMulti;
	MultiXactId	nextMulti;
#else
	OffsetNumber frozen[MaxHeapTuplesPerPage];
#endif
	ErrorContextCallback freezeerrcontext;
	struct FreezeStatus freeze_status;
	AntifreezeCounters counters;

	if (PG_NARGS() > 1 &&
		get_fn_expr_argtype(fcinfo->flinfo, 1) == INT8OID)
	{
		ereport(ERROR,
				(errmsg("outdated antifreeze extension"),
				 errhint("Please run ALTER EXTENSION antifreeze UPDATE.")));
	}

	if (RecoveryInProgress())
	        ereport(ERROR,
			(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
			 errmsg("recovery is in progress"),
			 errhint("antifreeze cannot be executed during recovery.")));
			 
	memset(&counters, 0, sizeof(AntifreezeCounters));

	freeze_status.stage = FREEZE_START;
	freeze_status.cur_block = 0;
	freeze_status.cur_offset = 0;

	freezeerrcontext.callback = freezeerrcontext_callback;
	freezeerrcontext.arg = (void *) (&freeze_status);
	freezeerrcontext.previous = error_context_stack;
	error_context_stack = &freezeerrcontext;

	/*
	 * Set us up in procarray just like vacuum.  Note we cheat on the first
	 * argument of PreventTransactionChain, but we have no choice.  If
	 * somebody runs us inside a function, we're screwed.
	 */
	PreventTransactionChain(true, "pg_antifreeze");
	if (GetTopTransactionIdIfAny() != InvalidTransactionId)
		elog(ERROR, "refusing to execute pg_antifreeze inside a write transaction");
	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
	if ((MyPgXact->vacuumFlags & PROC_IN_VACUUM) != 0)
		elog(ERROR, "refusing to execute pg_antifreeze twice in a transaction");
	MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
	LWLockRelease(ProcArrayLock);

	/* lock as per vacuum */
	relation = heap_open(relid, ShareUpdateExclusiveLock);

	sharedRel = relation->rd_rel->relisshared;
	strategy = GetAccessStrategy(BAS_BULKWRITE);
	lastblk = RelationGetNumberOfBlocks(relation);

#if PG_VERSION_NUM >= 90300
	frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage);
#endif

	/*
	 * Whenever any tuple in a page is older than freeze_xid_trigger_age, then the
	 * page is pruned and all freeze-able (older than frozenxid) tuples therein
	 * are frozen.
	 */
#if PG_VERSION_NUM >= 90400
	frozenxid = GetOldestXmin(relation, true);
#else
	frozenxid = GetOldestXmin(sharedRel, true);
#endif
	very_old_xid = frozenxid - af_freeze_xid_trigger_age;
	if (!TransactionIdIsNormal(very_old_xid))
		very_old_xid = FirstNormalTransactionId;

#if PG_VERSION_NUM >= 90300
	/*
	 * Use vacuum_set_xid_limits to determine multixact cutoff point.  We
	 * ignore all other output.  (In particular, we don't care about the
	 * multixact full-table scan limit, because we *always* do a full-table
	 * scan).
	 */
	{
		TransactionId dummy_xid;
		MultiXactId dummy_mxid;

		vacuum_set_xid_limits(0, 0, vacuum_multixact_freeze_min_age, 0,
							  sharedRel,
							  &dummy_xid, &dummy_xid, &dummy_xid,
							  &cutoff_multi,
							  &dummy_mxid);
	}
#endif

	error_context_stack = freezeerrcontext.previous;

#if PG_VERSION_NUM >= 90300
	elog(INFO, "processing relation %s.%s; limits: page prune xid: %u Xid %u multixact %u",
		 get_namespace_name(relation->rd_rel->relnamespace),
		 RelationGetRelationName(relation),
		 very_old_xid,
		 frozenxid, cutoff_multi);
#else
	elog(INFO, "processing relation %s.%s; limits: page prune xid: %u Xid %u",
		 get_namespace_name(relation->rd_rel->relnamespace),
		 RelationGetRelationName(relation),
		 very_old_xid, frozenxid);
#endif

	report_relation_xid_limits(relation);

	freezeerrcontext.callback = freezeerrcontext_callback;

	PG_TRY();
	{
#if PG_VERSION_NUM >= 90400
		HeapTupleData tup;
#endif

		/* Set up vacuum cost accounting */
		VacuumCostActive = (VacuumCostDelay > 0);
		VacuumCostBalance = 0;
		VacuumPageHit = 0;
		VacuumPageMiss = 0;
		VacuumPageDirty = 0;

#if PG_VERSION_NUM >= 90400
		/* avoid repetitious assignment */
		tup.t_tableOid = relid;
#endif

		/*
		 * Scan each block of the relation.  Note that we don't care if the
		 * relation grows while this is running: the last few pages can only
		 * have tuples that will certainly not need any freezing.
		 */
		for (blkno = 0; blkno < lastblk; blkno++)
		{
			Buffer		buf;
			Page		page;
			bool		need_fpi = false;

			freeze_status.stage = FREEZE_PAGE;

			CHECK_FOR_INTERRUPTS();

			vacuum_delay_point();

			freeze_status.cur_block = blkno;
			freeze_status.cur_offset = 0;

			if (blkno % 131072 == 0)	/* pages in a segment for BLCKSZ 8192 */
			{
				error_context_stack = freezeerrcontext.previous;
				report_counters(&counters, blkno);
				freezeerrcontext.callback = freezeerrcontext_callback;
			}

			buf = ReadBufferExtended(relation, MAIN_FORKNUM, blkno,
									 RBM_NORMAL, strategy);
			page = BufferGetPage(buf);

			if (PageIsNew(page) || PageIsEmpty(page))
				continue;

			LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);

			/*
			 * Does this page need freezing?  If so, do it.  Note that we
			 * intentionally use a very lax limit for Xids, so that we don't
			 * remove xids unless we absolutely need to.  By contrast, we use
			 * a tight limit for multixacts because that's what we want to
			 * reclaim as much as possible.  However, if we do decide to clean
			 * the page, we will switch to a very aggressive setting for Xids
			 * too.
			 */
			if (ext_pg_needs_freeze(buf, very_old_xid, cutoff_multi,
									&counters))
			{
				OffsetNumber offnum,
							maxoff;
				int			nfrozen = 0;
				Size		freespace;
				bool		mustprune = false;

				freeze_status.stage = FREEZE_PAGE_NEEDED;

				LockBuffer(buf, BUFFER_LOCK_UNLOCK);
				LockBufferForCleanup(buf);

				counters.pgs_needing_freeze++;

#if PG_VERSION_NUM >= 90300
				/* update our multixact range endpoints */
				LWLockAcquire(MultiXactGenLock, LW_SHARED);
				oldestMulti = MultiXactState->oldestMultiXactId;
				nextMulti = MultiXactState->nextMXact;
				LWLockRelease(MultiXactGenLock);
#endif

				maxoff = PageGetMaxOffsetNumber(page);

				/*
				 * Determine whether there is any item that needs to be
				 * removed before freezing the page; if so, do it.
				 */
				for (offnum = FirstOffsetNumber;
					 offnum <= maxoff;
					 offnum = OffsetNumberNext(offnum))
				{
					HTSV_Result result;
					HeapTupleHeader tuple;
					ItemId		itemid;
					freeze_status.cur_offset = offnum;

					itemid = PageGetItemId(page, offnum);

					/* nothing for unused or redirect page items */
					if (!ItemIdIsUsed(itemid) ||
						ItemIdIsRedirected(itemid) ||
						ItemIdIsDead(itemid))
						continue;

					tuple = (HeapTupleHeader) PageGetItem(page, itemid);

#if PG_VERSION_NUM >= 90400
					tup.t_data = tuple;
					tup.t_len = ItemIdGetLength(itemid);
					ItemPointerSet(&(tup.t_self), blkno, offnum);
#endif

#if PG_VERSION_NUM >= 90300
					/*
					 * Special case for wrapped-around multixacts.
					 *
					 * 9.2 and earlier was able to insert multixacts in Xmax
					 * and then leave them there untouched for a long time,
					 * even after a wraparound cycle.  This didn't cause any
					 * problem; a reader would just see the bogosity, ignore it
					 * and move on.  For 9.3 which has made multixacts
					 * persistent, this is a problem: GetMultiXactIdMembers
					 * complains that it cannot resolve the value any more
					 * because "it is in the future".  So regular freezing
					 * fails with an error.  To fix, simply reset the Xmax
					 * value to Invalid (and remove the infomask bits).
					 *
					 * There isn't any good way to emit a XLOG record for this
					 * at this point, so we just remember that we need to do so
					 * at the end of the routine.  That way, we avoid redundant
					 * WAL records when more than one tuple in the page needs
					 * this treatment.
					 */
					if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
					{
						MultiXactId multi = HeapTupleHeaderGetRawXmax(tuple);

						if (!multixact_is_within_range(multi,
													   oldestMulti, nextMulti))
						{
							HeapTupleHeaderSetXmax(tuple, InvalidTransactionId);
							tuple->t_infomask |= HEAP_XMAX_INVALID;
							tuple->t_infomask &= ~HEAP_XMAX_IS_MULTI;

							counters.multixacts_removed++;
						}

						need_fpi = true;
					}
#endif

					/*
					 * XXX this dirties the page, but it's okay: by now we
					 * already know we want to freeze tuples in it and so will
					 * dirty it regardless.
					 */
#if PG_VERSION_NUM >= 90400
					result = HeapTupleSatisfiesVacuum(&tup, frozenxid, buf);
#else
					result = HeapTupleSatisfiesVacuum(tuple, frozenxid, buf);
#endif

					/*
					 * If we see that this tuple must be removed, remember
					 * that we need to prune the page later.  We mustn't
					 * exit the loop just yet, in case we need to fix other
					 * multixacts in the page.
					 */
					if (result == HEAPTUPLE_DEAD)
					{
						mustprune = true;
						counters.dead_tuples++;
					}
				}

				/* if prune is in order, do it now. */
				if (mustprune)
				{
					TransactionId lastRemovedXid;

					/*
					 * heap_page_prune does its own WAL logging; it needs
					 * lastRemovedXid but we don't need to do anything with
					 * the return value, since we are skipping much of what
					 * VACUUM does.
					 */
					(void) heap_page_prune(relation, buf, frozenxid, true,
										   &lastRemovedXid);
					counters.pages_pruned++;
				}

				/*
				 * Update latest tuple offset.  Some tuples could have been
				 * removed.
				 */
				maxoff = PageGetMaxOffsetNumber(page);

				/*
				 * Make really sure all the dead tuples have been removed by
				 * heap_page_prune.  Any leftovers could cause trouble later
				 * on, so abort the current run.
				 */
				for (offnum = FirstOffsetNumber;
					 offnum <= maxoff;
					 offnum = OffsetNumberNext(offnum))
				{
					HTSV_Result result;
					HeapTupleHeader tuple;
					ItemId		itemid = PageGetItemId(page, offnum);

					/* don't worry about unused or redirect page items */
					if (!ItemIdIsUsed(itemid) ||
						ItemIdIsRedirected(itemid) ||
						ItemIdIsDead(itemid))
						continue;

					freeze_status.cur_offset = offnum;

					tuple = (HeapTupleHeader) PageGetItem(page, itemid);

#if PG_VERSION_NUM >= 90400
					tup.t_data = tuple;
					tup.t_len = ItemIdGetLength(itemid);
					ItemPointerSet(&(tup.t_self), blkno, offnum);
#endif

					/* verify that all DEAD tuples are gone */
#if PG_VERSION_NUM >= 90400
					result = HeapTupleSatisfiesVacuum(&tup, frozenxid, buf);
#else
					result = HeapTupleSatisfiesVacuum(tuple, frozenxid, buf);
#endif
					if (result == HEAPTUPLE_DEAD)
						ereport(ERROR,
								(errmsg("heap_page_prune left dead tuples around")));
				}

				/*
				 * Scan the whole block and freeze everything that needs
				 * freezing. Here we use an aggressive freeze setting for Xids
				 * so that we remove as much as possible.
				 */
				nfrozen = 0;
				for (offnum = FirstOffsetNumber;
					 offnum <= maxoff;
					 offnum = OffsetNumberNext(offnum))
				{
					ItemId		itemid;
					HeapTupleHeader tuple;

					freeze_status.cur_offset = offnum;

					itemid = PageGetItemId(page, offnum);

					/* nothing for unused or redirect page items */
					if (!ItemIdIsUsed(itemid) ||
						ItemIdIsRedirected(itemid) ||
						ItemIdIsDead(itemid))
						continue;

					/* see whether this page needs freezing; set it up if so */
					tuple = (HeapTupleHeader) PageGetItem(page, itemid);

					/*
					 * Note significant version difference: in 9.2 and earlier,
					 * the freezing here is executed immediately; no need for a
					 * execute step below, which 9.3 requires.
					 */
#if PG_VERSION_NUM >= 90300
					if (heap_prepare_freeze_tuple(tuple, frozenxid, cutoff_multi,
												  &frozen[nfrozen]))
					{
						frozen[nfrozen++].offset = offnum;
						counters.tuples_frozen++;
					}
#else
					if (heap_freeze_tuple(tuple, frozenxid))
					{
						frozen[nfrozen++] = offnum;
						counters.tuples_frozen++;
					}
#endif
				}

				/*
				 * If we froze any tuples, mark the buffer dirty, and write a
				 * WAL record recording the changes.  We must log the changes
				 * to be crash-safe against future truncation of CLOG.
				 */
				if (nfrozen > 0)
				{
#if PG_VERSION_NUM >= 90300
					int			i;
#endif

					START_CRIT_SECTION();

					MarkBufferDirty(buf);

#if PG_VERSION_NUM >= 90300
					/* execute collected freezes */
					for (i = 0; i < nfrozen; i++)
					{
						ItemId		itemid;
						HeapTupleHeader htup;

						itemid = PageGetItemId(page, frozen[i].offset);
						htup = (HeapTupleHeader) PageGetItem(page, itemid);

						heap_execute_freeze_tuple(htup, &frozen[i]);
					}
#endif

					/* Now WAL-log freezing if necessary */
					if (RelationNeedsWAL(relation))
					{
						XLogRecPtr	recptr;

						recptr = log_heap_freeze(relation, buf, frozenxid,
												 frozen, nfrozen);
						PageSetLSN(page, recptr);
					}

					END_CRIT_SECTION();

					counters.pages_frozen++;
				}

				/* Record how much free space we have now */
				freespace = PageGetHeapFreeSpace(page);
				RecordPageWithFreeSpace(relation, blkno, freespace);
			}

			/*
			 * Emit an additional FPI record for this buffer, if needed.
			 * See above for an explanation.  Don't be tempted into skipping
			 * this if we already marked the page dirty by freezing: that's
			 * not enough because there would be no WAL log to remove the
			 * multixacts if the page happens not to require a FPI.
			 */
			if (need_fpi && RelationNeedsWAL(relation))
				emit_fpi_wal_record(buf, &counters);

			freeze_status.stage = FREEZE_PAGE;

			/* done with this page */
			UnlockReleaseBuffer(buf);
		}
	}
	PG_CATCH();
	{
		VacuumCostActive = false;
		PG_RE_THROW();
	}
	PG_END_TRY();

	freeze_status.stage = FREEZE_VACUUM_FSM;
	FreeSpaceMapVacuum(relation);

	report_counters(&counters, blkno);

	/*
	 * Update the pg_class freeze values.  Note we must use the very_old_xid
	 * limit for Xids, not the tighter frozenxid!  The reason for this is that
	 * we have skipped freezing any pages that have tuples that are newer than
	 * very_old_xid but older than frozenxid, if they don't have any multixact
	 * worth pruning.
	 */
	freeze_status.stage = FREEZE_UPDATE_RELSTATS;
	update_relstats(relation, very_old_xid, cutoff_multi);

	error_context_stack = freezeerrcontext.previous;

	elog(INFO, "relation %s.%s complete",
		 get_namespace_name(relation->rd_rel->relnamespace),
		 RelationGetRelationName(relation));

	heap_close(relation, ShareUpdateExclusiveLock);

	PG_RETURN_VOID();
}

Datum
pg_update_datfrozenxid(PG_FUNCTION_ARGS)
{
	vac_update_datfrozenxid();

	PG_RETURN_VOID();
}


/*
 * This is a modified copy of lazy_check_needs_freeze.  We don't use the global
 * variables that the original uses.
 */
static bool
ext_pg_needs_freeze(Buffer buf,
					TransactionId freeze_limit,
					MultiXactId multi_cutoff,
					AntifreezeCounters *counters)
{
	Page		page;
	OffsetNumber offnum,
				maxoff;
	HeapTupleHeader tupleheader;
#if PG_VERSION_NUM >= 90300
	MultiXactId	nextMulti;
	MultiXactId	oldestMulti;
#endif

#if PG_VERSION_NUM >= 90300
	LWLockAcquire(MultiXactGenLock, LW_SHARED);
	oldestMulti = MultiXactState->oldestMultiXactId;
	nextMulti = MultiXactState->nextMXact;
	LWLockRelease(MultiXactGenLock);
#endif

	page = BufferGetPage(buf);

	if (PageIsNew(page) || PageIsEmpty(page))
	{
		/* PageIsNew probably shouldn't happen... */
		return false;
	}

	maxoff = PageGetMaxOffsetNumber(page);
	for (offnum = FirstOffsetNumber;
		 offnum <= maxoff;
		 offnum = OffsetNumberNext(offnum))
	{
		ItemId		itemid;

		itemid = PageGetItemId(page, offnum);

		if (!ItemIdIsNormal(itemid))
			continue;

		tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);

#if PG_VERSION_NUM >= 90300
		/*
		 * Before trying to read a possible MultiXactId, make sure the value
		 * is within the valid range; otherwise, heap_tuple_needs_freeze would
		 * error out.
		 */
		if (tupleheader->t_infomask & HEAP_XMAX_IS_MULTI &&
			!multixact_is_within_range(HeapTupleHeaderGetRawXmax(tupleheader),
									   oldestMulti,
									   nextMulti))
		{
			counters->needs_because_multi++;
			return true;
		}
#endif

		if (heap_tuple_needs_freeze(tupleheader, freeze_limit,
#if PG_VERSION_NUM >= 90300
									multi_cutoff,
#endif
									buf))
		{
			counters->needs_because_other++;
			return true;
		}
	}							/* scan along page */

	return false;
}

/*
 * Very much like vac_update_relstats, but we only update the multixact freeze limit.
 */
static void
update_relstats(Relation relation, TransactionId frozenxid, MultiXactId minmulti)
{
	Oid			relid = RelationGetRelid(relation);
	Relation	rd;
	HeapTuple	ctup;
	Form_pg_class pgcform;
	bool		dirty = false;

	rd = heap_open(RelationRelationId, RowExclusiveLock);

	ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
	if (!HeapTupleIsValid(ctup))
		elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
			 relid);
	pgcform = (Form_pg_class) GETSTRUCT(ctup);

	/*
	 * Update relfrozenxid.
	 *
	 * Ordinarily, we don't let relfrozenxid go backwards: if things are
	 * working correctly, the only way the new frozenxid could be older would
	 * be if a previous VACUUM was done with a tighter freeze_min_age, in
	 * which case we don't want to forget the work it already did.  However,
	 * if the stored relfrozenxid is "in the future", then it must be corrupt
	 * and it seems best to overwrite it with the cutoff we used this time.
	 * This should match vac_update_datfrozenxid() concerning what we consider
	 * to be "in the future".
	 */
	if (pgcform->relfrozenxid != frozenxid &&
		(TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
		 TransactionIdPrecedes(ReadNewTransactionId(),
							   pgcform->relfrozenxid)))
	{
		pgcform->relfrozenxid = frozenxid;
		dirty = true;
	}

#if PG_VERSION_NUM >= 90300
	/*
	 * Update relminmxid.
	 *
	 * Ordinarily, we don't let relminmxid go backwards: if things are working
	 * correctly, the only way the new minmulti could be older would be if a
	 * previous VACUUM was done with a tighter freeze_min_age, in which case
	 * we don't want to forget the work it already did.  However, if the
	 * stored relminmxid is "in the future", then it must be corrupt and it
	 * seems best to overwrite it with the cutoff we used this time. This
	 * should match vac_update_datfrozenxid() concerning what we consider to
	 * be "in the future".
	 */

	if (pgcform->relminmxid != minmulti &&
		(MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
		 MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
	{
		pgcform->relminmxid = minmulti;
		dirty = true;
	}
#endif

	if (dirty)
	{
#if PG_VERSION_NUM >= 90300
		elog(INFO, "Updating relfrozenxid=%u, relminmxid=%u for relation %s.%s",
			 pgcform->relfrozenxid, pgcform->relminmxid,
			 get_namespace_name(relation->rd_rel->relnamespace),
			 RelationGetRelationName(relation));
#else
		elog(INFO,
			 "Updating relfrozenxid=%u for relation %s.%s",
			 pgcform->relfrozenxid,
			 get_namespace_name(relation->rd_rel->relnamespace),
			 RelationGetRelationName(relation));
#endif
		heap_inplace_update(rd, ctup);
	}

	heap_close(rd, RowExclusiveLock);
}

/*
 * Report relfrozenxid and relminmxid for a relation.
 */
static void
report_relation_xid_limits(Relation relation)
{
	Oid			relid = RelationGetRelid(relation);
	Relation	rd;
	HeapTuple	ctup;
	Form_pg_class pgcform;

	rd = heap_open(RelationRelationId, RowExclusiveLock);

	ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
	if (!HeapTupleIsValid(ctup))
		elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
			 relid);
	pgcform = (Form_pg_class) GETSTRUCT(ctup);

#if PG_VERSION_NUM >= 90300
	elog(INFO, "relation %s.%s has relfrozenxid=%u, relminmxid=%u",
		 get_namespace_name(relation->rd_rel->relnamespace),
		 RelationGetRelationName(relation),
		 pgcform->relfrozenxid, pgcform->relminmxid);
#else
	elog(INFO, "relation %s.%s has relfrozenxid=%u",
		 get_namespace_name(relation->rd_rel->relnamespace),
		 RelationGetRelationName(relation),
		 pgcform->relfrozenxid);
#endif

	heap_close(rd, RowExclusiveLock);
}

static void
ext_MultiXactShmemInit(void)
{
#if PG_VERSION_NUM >= 90300
	bool        found;

	elog(DEBUG2, "Shared Memory Init for MultiXact");

#if 0
	MultiXactOffsetCtl->PagePrecedes = MultiXactOffsetPagePrecedes;
	MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes;

	SimpleLruInit(MultiXactOffsetCtl,
				  "MultiXactOffset Ctl", NUM_MXACTOFFSET_BUFFERS, 0,
				  MultiXactOffsetControlLock, "pg_multixact/offsets");
	SimpleLruInit(MultiXactMemberCtl,
				  "MultiXactMember Ctl", NUM_MXACTMEMBER_BUFFERS, 0,
				  MultiXactMemberControlLock, "pg_multixact/members");
#endif

#if PG_VERSION_NUM >= 90500
#define SHARED_MULTIXACT_STATE_SIZE \
	add_size(offsetof(MultiXactStateData, perBackendXactIds) + sizeof(MultiXactId), \
			mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))
#else
#define SHARED_MULTIXACT_STATE_SIZE \
	add_size(sizeof(MultiXactStateData), \
			 mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))
#endif
	/* Initialize our shared state struct */
	MultiXactState = ShmemInitStruct("Shared MultiXact State",
									 SHARED_MULTIXACT_STATE_SIZE,
									 &found);
#if 0
	if (!IsUnderPostmaster)
	{
		Assert(!found);

		/* Make sure we zero out the per-backend state */
		MemSet(MultiXactState, 0, SHARED_MULTIXACT_STATE_SIZE);
	}
	else
#endif
		Assert(found);

#if 0
	/*
	 * Set up array pointers.  Note that perBackendXactIds[0] is wasted space
	 * since we only use indexes 1..MaxOldestSlot in each array.
	 */
	OldestMemberMXactId = MultiXactState->perBackendXactIds;
	OldestVisibleMXactId = OldestMemberMXactId + MaxOldestSlot;
#endif

#endif /* PG_VERSION_NUM >= 9.3 */
}

#if PG_VERSION_NUM >= 90300
static bool
multixact_is_within_range(MultiXactId multi,
						   MultiXactId oldestMulti,
						   MultiXactId nextMulti)
{
	/* it's OK if it's Invalid */
	if (multi == InvalidMultiXactId)
		return true;

	/*
	 * If there's no valid range, no value is in range (except
	 * InvalidMultiXactId, checked above)
	 */
	if (nextMulti == oldestMulti)
		return false;

	/* otherwise check boundaries */
	if (oldestMulti < nextMulti)
		return multi > oldestMulti && multi < nextMulti;
	else
		return multi < nextMulti || multi > oldestMulti;
}
#endif

static void
report_counters(AntifreezeCounters *counters, BlockNumber blkno)
{
	StringInfoData	str;

	initStringInfo(&str);

	appendStringInfo(&str, "processing page %u\n", blkno);
	appendStringInfo(&str,
					 "pgs_needing_freeze: %lu   pages_frozen: %lu    pages_pruned: %lu  full_page_images: %lu\n",
					 counters->pgs_needing_freeze, counters->pages_frozen,
					 counters->pages_pruned, counters->full_page_images);
	appendStringInfo(&str, "dead_tuples: %lu    tuples_frozen: %lu   multixacts_removed: %lu\n",
					 counters->dead_tuples, counters->tuples_frozen, counters->multixacts_removed);
	appendStringInfo(&str, "needs_because_multi: %lu   needs_because_other: %lu",
					 counters->needs_because_multi, counters->needs_because_other);

	elog(INFO, "%s", str.data);
	pfree(str.data);
}

/*
 * Emit a XLOG_FPI wal record for the given buffer; counters->full_page_images
 * is incremented.
 *
 * This only supports 9.3 and up; current callers don't need to support 9.2,
 * so on assert-enabled 9.2 builds this causes a crash.
 */
static void
emit_fpi_wal_record(Buffer buf, AntifreezeCounters *counters)
{
#if PG_VERSION_NUM >= 90500
	RelFileNode	node;
	ForkNumber	fork;
	BlockNumber	block;

	BufferGetTag(buf, &node, &fork, &block);
	log_newpage(&node, fork, block, BufferGetPage(buf), true);
#elif PG_VERSION_NUM >= 90300
	XLogRecData rdata[2];
	char		copied_buffer[BLCKSZ];
	PageHeader	pagehdr = (PageHeader) BufferGetPage(buf);
	char	   *origdata = (char *) BufferGetBlock(buf);
	BkpBlock	bkpb;

	/*
	 * This mimicks XLogCheckBuffer.  We cannot run the function
	 * itself, not only because it's static in xlog.c, but also
	 * because it checks the LSN in the page, which isn't useful
	 * here: we want to emit a FPI no matter what, because we've
	 * changed the Xmax of some tuples forcibly and not all of
	 * these changes have been wal-logged.
	 */
	BufferGetTag(buf, &bkpb.node, &bkpb.fork, &bkpb.block);
	{
		uint16	lower = pagehdr->pd_lower;
		uint16	upper = pagehdr->pd_upper;

		if (lower >= SizeOfPageHeaderData &&
			upper > lower &&
			upper <= BLCKSZ)
		{
			bkpb.hole_offset = lower;
			bkpb.hole_length = upper - lower;
		}
		else
		{
			bkpb.hole_offset = 0;
			bkpb.hole_length = 0;
		}
	}

	memcpy(copied_buffer, origdata, bkpb.hole_offset);
	memcpy(copied_buffer + bkpb.hole_offset,
			origdata + bkpb.hole_offset + bkpb.hole_length,
			BLCKSZ - bkpb.hole_offset - bkpb.hole_length);

	/* Header for the backup block */
	rdata[0].data = (char *) &bkpb;
	rdata[0].len = sizeof(BkpBlock);
	rdata[0].buffer = InvalidBuffer;
	rdata[0].next = &(rdata[1]);

	/* save copy of the buffer */
	rdata[1].data = copied_buffer;
	rdata[1].len = BLCKSZ - bkpb.hole_length;
	rdata[1].buffer = InvalidBuffer;
	rdata[1].next = NULL;

	(void) XLogInsert(RM_XLOG_ID, XLOG_FPI, rdata);
	MarkBufferDirty(buf);
#else
	/* 9.2 and down should not require FPIs */
	Assert(false);
#endif

	counters->full_page_images++;
}
