Eliminate COPY FREEZE use of XLOG_HEAP2_VISIBLE
authorMelanie Plageman <melanieplageman@gmail.com>
Thu, 9 Oct 2025 20:25:50 +0000 (16:25 -0400)
committerMelanie Plageman <melanieplageman@gmail.com>
Thu, 9 Oct 2025 20:29:01 +0000 (16:29 -0400)
Instead of emitting a separate WAL XLOG_HEAP2_VISIBLE record for setting
bits in the VM, specify the VM block changes in the
XLOG_HEAP2_MULTI_INSERT record.

This halves the number of WAL records emitted by COPY FREEZE.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com

src/backend/access/heap/heapam.c
src/backend/access/heap/heapam_xlog.c
src/backend/access/heap/visibilitymap.c
src/backend/access/rmgrdesc/heapdesc.c
src/include/access/visibilitymap.h

index ed0c0c2dc9f48177079d744c93f86407e3a4bffe..568696333c259b643141c0df1e7b86f0db68e390 100644 (file)
@@ -2466,7 +2466,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
        starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
 
        if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
+       {
            all_frozen_set = true;
+           /* Lock the vmbuffer before entering the critical section */
+           LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
+       }
 
        /* NO EREPORT(ERROR) from here till changes are logged */
        START_CRIT_SECTION();
@@ -2506,7 +2510,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
         * going to add further frozen rows to it.
         *
         * If we're only adding already frozen rows to a previously empty
-        * page, mark it as all-visible.
+        * page, mark it as all-frozen and update the visibility map. We're
+        * already holding a pin on the vmbuffer.
         */
        if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
        {
@@ -2517,7 +2522,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                                vmbuffer, VISIBILITYMAP_VALID_BITS);
        }
        else if (all_frozen_set)
+       {
            PageSetAllVisible(page);
+           visibilitymap_set_vmbits(BufferGetBlockNumber(buffer),
+                                    vmbuffer,
+                                    VISIBILITYMAP_ALL_VISIBLE |
+                                    VISIBILITYMAP_ALL_FROZEN,
+                                    relation->rd_locator);
+       }
 
        /*
         * XXX Should we set PageSetPrunable on this page ? See heap_insert()
@@ -2565,6 +2577,12 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
            xlrec->flags = 0;
            if (all_visible_cleared)
                xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED;
+
+           /*
+            * We don't have to worry about including a conflict xid in the
+            * WAL record, as HEAP_INSERT_FROZEN intentionally violates
+            * visibility rules.
+            */
            if (all_frozen_set)
                xlrec->flags = XLH_INSERT_ALL_FROZEN_SET;
 
@@ -2628,6 +2646,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
            XLogBeginInsert();
            XLogRegisterData(xlrec, tupledata - scratch.data);
            XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
+           if (all_frozen_set)
+               XLogRegisterBuffer(1, vmbuffer, 0);
 
            XLogRegisterBufData(0, tupledata, totaldatalen);
 
@@ -2637,26 +2657,17 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
            recptr = XLogInsert(RM_HEAP2_ID, info);
 
            PageSetLSN(page, recptr);
+           if (all_frozen_set)
+           {
+               Assert(BufferIsDirty(vmbuffer));
+               PageSetLSN(BufferGetPage(vmbuffer), recptr);
+           }
        }
 
        END_CRIT_SECTION();
 
-       /*
-        * If we've frozen everything on the page, update the visibilitymap.
-        * We're already holding pin on the vmbuffer.
-        */
        if (all_frozen_set)
-       {
-           /*
-            * It's fine to use InvalidTransactionId here - this is only used
-            * when HEAP_INSERT_FROZEN is specified, which intentionally
-            * violates visibility rules.
-            */
-           visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
-                             InvalidXLogRecPtr, vmbuffer,
-                             InvalidTransactionId,
-                             VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
-       }
+           LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
 
        UnlockReleaseBuffer(buffer);
        ndone += nthispage;
index cf843277938de424f5a09135e9ae7ebbcb9cc39d..30e339d8fe2d65bc44fd99aec106f81c836a5ddc 100644 (file)
@@ -551,6 +551,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
    int         i;
    bool        isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
    XLogRedoAction action;
+   Buffer      vmbuffer = InvalidBuffer;
 
    /*
     * Insertion doesn't overwrite MVCC data, so no conflict processing is
@@ -571,11 +572,11 @@ heap_xlog_multi_insert(XLogReaderState *record)
    if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
    {
        Relation    reln = CreateFakeRelcacheEntry(rlocator);
-       Buffer      vmbuffer = InvalidBuffer;
 
        visibilitymap_pin(reln, blkno, &vmbuffer);
        visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
        ReleaseBuffer(vmbuffer);
+       vmbuffer = InvalidBuffer;
        FreeFakeRelcacheEntry(reln);
    }
 
@@ -662,6 +663,52 @@ heap_xlog_multi_insert(XLogReaderState *record)
    if (BufferIsValid(buffer))
        UnlockReleaseBuffer(buffer);
 
+   buffer = InvalidBuffer;
+
+   /*
+    * Read and update the visibility map (VM) block.
+    *
+    * We must always redo VM changes, even if the corresponding heap page
+    * update was skipped due to the LSN interlock. Each VM block covers
+    * multiple heap pages, so later WAL records may update other bits in the
+    * same block. If this record includes an FPI (full-page image),
+    * subsequent WAL records may depend on it to guard against torn pages.
+    *
+    * Heap page changes are replayed first to preserve the invariant:
+    * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
+    *
+    * Note that we released the heap page lock above. During normal
+    * operation, this would be unsafe — a concurrent modification could
+    * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
+    * invariant.
+    *
+    * During recovery, however, no concurrent writers exist. Therefore,
+    * updating the VM without holding the heap page lock is safe enough. This
+    * same approach is taken when replaying xl_heap_visible records (see
+    * heap_xlog_visible()).
+    */
+   if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
+       XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
+                                     &vmbuffer) == BLK_NEEDS_REDO)
+   {
+       Page        vmpage = BufferGetPage(vmbuffer);
+
+       /* initialize the page if it was read as zeros */
+       if (PageIsNew(vmpage))
+           PageInit(vmpage, BLCKSZ, 0);
+
+       visibilitymap_set_vmbits(blkno,
+                                vmbuffer,
+                                VISIBILITYMAP_ALL_VISIBLE |
+                                VISIBILITYMAP_ALL_FROZEN,
+                                rlocator);
+
+       PageSetLSN(vmpage, lsn);
+   }
+
+   if (BufferIsValid(vmbuffer))
+       UnlockReleaseBuffer(vmbuffer);
+
    /*
     * If the page is running low on free space, update the FSM as well.
     * Arbitrarily, our definition of "low" is less than 20%. We can't do much
index 0414ce1945c3fd46382b278d44509cb41fa590a2..2f5e61e239220d5f569a9460c7aeeda87c1ef146 100644 (file)
@@ -14,7 +14,8 @@
  *     visibilitymap_clear  - clear bits for one page in the visibility map
  *     visibilitymap_pin    - pin a map page for setting a bit
  *     visibilitymap_pin_ok - check whether correct map page is already pinned
- *     visibilitymap_set    - set a bit in a previously pinned page
+ *     visibilitymap_set    - set bit(s) in a previously pinned page and log
+ *     visibilitymap_set_vmbits - set bit(s) in a pinned page
  *     visibilitymap_get_status - get status of bits
  *     visibilitymap_count  - count number of bits set in visibility map
  *     visibilitymap_prepare_truncate -
@@ -322,6 +323,73 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
    return status;
 }
 
+/*
+ * Set VM (visibility map) flags in the VM block in vmBuf.
+ *
+ * This function is intended for callers that log VM changes together
+ * with the heap page modifications that rendered the page all-visible.
+ * Callers that log VM changes separately should use visibilitymap_set().
+ *
+ * vmBuf must be pinned and exclusively locked, and it must cover the VM bits
+ * corresponding to heapBlk.
+ *
+ * In normal operation (not recovery), this must be called inside a critical
+ * section that also applies the necessary heap page changes and, if
+ * applicable, emits WAL.
+ *
+ * The caller is responsible for ensuring consistency between the heap page
+ * and the VM page by holding a pin and exclusive lock on the buffer
+ * containing heapBlk.
+ *
+ * rlocator is used only for debugging messages.
+ */
+uint8
+visibilitymap_set_vmbits(BlockNumber heapBlk,
+                        Buffer vmBuf, uint8 flags,
+                        const RelFileLocator rlocator)
+{
+   BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
+   uint32      mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
+   uint8       mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
+   Page        page;
+   uint8      *map;
+   uint8       status;
+
+#ifdef TRACE_VISIBILITYMAP
+   elog(DEBUG1, "vm_set flags 0x%02X for %s %d",
+        flags,
+        relpathbackend(rlocator, MyProcNumber, MAIN_FORKNUM).str,
+        heapBlk);
+#endif
+
+   /* Call in same critical section where WAL is emitted. */
+   Assert(InRecovery || CritSectionCount > 0);
+
+   /* Flags should be valid. Also never clear bits with this function */
+   Assert((flags & VISIBILITYMAP_VALID_BITS) == flags);
+
+   /* Must never set all_frozen bit without also setting all_visible bit */
+   Assert(flags != VISIBILITYMAP_ALL_FROZEN);
+
+   /* Check that we have the right VM page pinned */
+   if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
+       elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
+
+   Assert(BufferIsLockedByMeInMode(vmBuf, BUFFER_LOCK_EXCLUSIVE));
+
+   page = BufferGetPage(vmBuf);
+   map = (uint8 *) PageGetContents(page);
+
+   status = (map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS;
+   if (flags != status)
+   {
+       map[mapByte] |= (flags << mapOffset);
+       MarkBufferDirty(vmBuf);
+   }
+
+   return status;
+}
+
 /*
  * visibilitymap_get_status - get status of bits
  *
index 82b62c95de5744659bd087f59ebe869af20e9128..056beb24d4075d0bb7642569b529c71bee8c5a6f 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "access/heapam_xlog.h"
 #include "access/rmgrdesc_utils.h"
+#include "access/visibilitymapdefs.h"
 #include "storage/standbydefs.h"
 
 /*
@@ -354,6 +355,11 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
        appendStringInfo(buf, "ntuples: %d, flags: 0x%02X", xlrec->ntuples,
                         xlrec->flags);
 
+       if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
+           appendStringInfo(buf, ", vm_flags: 0x%02X",
+                            VISIBILITYMAP_ALL_VISIBLE |
+                            VISIBILITYMAP_ALL_FROZEN);
+
        if (XLogRecHasBlockData(record, 0) && !isinit)
        {
            appendStringInfoString(buf, ", offsets:");
index be21c6dd1a306b9dbd53a2ffda35a9f3e7a3a403..c6fa37be9682cf4607a38d138da4484f7e63eaed 100644 (file)
@@ -18,6 +18,7 @@
 #include "access/xlogdefs.h"
 #include "storage/block.h"
 #include "storage/buf.h"
+#include "storage/relfilelocator.h"
 #include "utils/relcache.h"
 
 /* Macros for visibilitymap test */
@@ -37,6 +38,9 @@ extern uint8 visibilitymap_set(Relation rel,
                               Buffer vmBuf,
                               TransactionId cutoff_xid,
                               uint8 flags);
+extern uint8 visibilitymap_set_vmbits(BlockNumber heapBlk,
+                                     Buffer vmBuf, uint8 flags,
+                                     const RelFileLocator rlocator);
 extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
 extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen);
 extern BlockNumber visibilitymap_prepare_truncate(Relation rel,