bloom: Optimize bitmap scan path with streaming read
authorMichael Paquier <michael@paquier.xyz>
Tue, 10 Mar 2026 22:36:10 +0000 (07:36 +0900)
committerMichael Paquier <michael@paquier.xyz>
Tue, 10 Mar 2026 22:36:10 +0000 (07:36 +0900)
This commit replaces the per-page buffer read look in blgetbitmap() with
a reading stream, to improve scan efficiency, particularly useful for
large bloom indexes.  Some benchmarking with a large number of rows has
shown a very nice improvement in terms of runtime and IO read reduction
with test cases up to 10M rows for a bloom index scan.

For the io_uring method, The author has reported a 3x in runtime with
io_uring while I was at close to a 7x.  For the worker method with 3
workers, the author has reported better numbers than myself in runtime,
with the reduction in IO stats being appealing for all the cases
measured.

Author: Xuneng Zhou <xunengzhou@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Reviewed-by: Nazir Bilal Yavuz <byavuz81@gmail.com>
Discussion: https://postgr.es/m/CABPTF7VrqfbcDXqGrdLQ2xaQ=K0RzExNuw6U_GGqzSJu32wfdQ@mail.gmail.com

contrib/bloom/blscan.c

index 0535d45f2d825705dd0209a75057f9356c3360d8..1a0e42021ec1e6627e423e8d73a1b456cbfff3c3 100644 (file)
@@ -18,6 +18,7 @@
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "storage/bufmgr.h"
+#include "storage/read_stream.h"
 
 /*
  * Begin scan of bloom index.
@@ -76,11 +77,13 @@ int64
 blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 {
    int64       ntids = 0;
-   BlockNumber blkno = BLOOM_HEAD_BLKNO,
+   BlockNumber blkno,
                npages;
    int         i;
    BufferAccessStrategy bas;
    BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
+   BlockRangeReadStreamPrivate p;
+   ReadStream *stream;
 
    if (so->sign == NULL)
    {
@@ -120,14 +123,29 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
    if (scan->instrument)
        scan->instrument->nsearches++;
 
+   /* Scan all blocks except the metapage using streaming reads */
+   p.current_blocknum = BLOOM_HEAD_BLKNO;
+   p.last_exclusive = npages;
+
+   /*
+    * It is safe to use batchmode as block_range_read_stream_cb takes no
+    * locks.
+    */
+   stream = read_stream_begin_relation(READ_STREAM_FULL |
+                                       READ_STREAM_USE_BATCHING,
+                                       bas,
+                                       scan->indexRelation,
+                                       MAIN_FORKNUM,
+                                       block_range_read_stream_cb,
+                                       &p,
+                                       0);
+
    for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
    {
        Buffer      buffer;
        Page        page;
 
-       buffer = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM,
-                                   blkno, RBM_NORMAL, bas);
-
+       buffer = read_stream_next_buffer(stream, NULL);
        LockBuffer(buffer, BUFFER_LOCK_SHARE);
        page = BufferGetPage(buffer);
 
@@ -163,6 +181,9 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
        UnlockReleaseBuffer(buffer);
        CHECK_FOR_INTERRUPTS();
    }
+
+   Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
+   read_stream_end(stream);
    FreeAccessStrategy(bas);
 
    return ntids;