/* * Copyright (c) 2002 Network Storage Solutions, Inc. * All rights reserved. * * Written by Chris M. Jepeway for Network Storage Solutions, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Network Storage Solutions, Inc. * 4. The name of Network Storage Solutions, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY NETWORK STORAGE SOLUTIONS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NETWORK STORAGE SOLUTIONS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(_KERNEL_OPT) #include "opt_cluster.h" #endif static struct vm_map *cluster_map = NULL; /* Max # of buffers we cluster together -- the default */ /* setting is no more than MAXBSIZE worth of pages; its */ /* dependency on PAGE_SIZE means that MAX_CLUSTERS is */ /* not a constant */ #ifndef MAX_CLUSTERS #define MAX_CLUSTERS (MAXPHYS / PAGE_SIZE) #endif /* Max amount of VA we'll ever allow to be mapped by clusters */ #ifndef MAX_CLUSTEREDVA #define MAX_CLUSTEREDVA (32 * 1024 * 1024) #endif /* Make "constants" patchable */ static int max_clusters = -1; static int max_clusteredva = -1; #if defined(DIAGNOSTIC) || defined(CLUSTER_STATS) /* Counters for determining how well our maxes are set */ /* XXX - since interrupts cause these to be updated, we */ /* either need to synchronize access to them or we ass- */ /* ume they are updated atomically; take the latter, */ /* easier approach for now */ static unsigned long currentclusters; static unsigned long totalclusters; /* This might overflow, if we do > 135 */ /* (or so) clusters a second for a year; */ /* if we ever synchronize updates, make */ /* it a long long or a double */ static unsigned long totalclusteredbuffers; /* Total # of buffers */ /* put into clusters */ static unsigned long missedclusters;/* We hope this never overflows */ #define CLUSTER_STAT(x) x #else #define CLUSTER_STAT(x) #endif #define b_clusterflags b_flags /* hide our flags here */ #define B_CLUSTERFAILED B_XXX /* clustering failed */ #ifdef DIAGNOSTIC #define BUFQ_REMOVE(bs, bp) \ do { \ struct buf *fbp; \ fbp = BUFQ_GET(bs); \ KASSERT(fbp == bp); \ } while (0) #else #define BUFQ_REMOVE(bs, bp) (void) BUFQ_GET(bs) #endif void cluster_init(void) { paddr_t min, max; if (max_clusters < 0) max_clusters = MAX_CLUSTERS; if (max_clusteredva < 0) max_clusteredva = MAX_CLUSTEREDVA; min = max = 0; if (cluster_map == NULL) { cluster_map = uvm_km_suballoc(kernel_map, &min, &max, max_clusteredva, VM_MAP_INTRSAFE, 0, NULL); if (cluster_map == NULL) panic("cluster_init"); } CLUSTER_STAT(currentclusters = 0); CLUSTER_STAT(totalclusters = 0); CLUSTER_STAT(totalclusteredbuffers = 0); CLUSTER_STAT(missedclusters = 0); } /* * Initialize a buf for use in clustering */ void cluster_init_buf(struct buf *bp) { bp->b_clusterflags &= ~B_CLUSTERFAILED; } static void clusterdone(struct buf *); typedef struct cluster { struct bufq_state *bstate; int nbufs; struct buf *cluster[1]; } cluster_t; /* * Get the next cluster from the given buffer store */ struct buf * cluster(struct bufq_state *bstate, u_int32_t secsz) { cluster_t *cl; struct buf *cbp, *bp, *bufs[2 * max_clusters], **bpp; int i, n, off, s, nblks; daddr_t top, bot; int size; int topi, boti; long rw; paddr_t p; KASSERT(max_clusters > 0); KASSERT(max_clusteredva > 0); bp = BUFQ_PEEK(bstate); if (bp == NULL) return NULL; if (secsz == 0) /* Must be reading disklabel */ goto nocluster; s = splbio(); cbp = pool_get(&bufpool, PR_NOWAIT); splx(s); if (cbp == NULL) goto nocluster; n = 0; size = 0; rw = bp->b_flags & B_READ; bot = top = bp->b_rawblkno; boti = topi = max_clusters; while (bp) { /* * XXX - we could map a page that's not full at the * end of the cluster; instead, we only cluster * buffers that are multiples of the page size */ if ((bp->b_flags & B_READ) != rw || (vaddr_t) bp->b_data != trunc_page((vaddr_t) bp->b_data) || bp->b_bcount != trunc_page(bp->b_bcount) || size + bp->b_bcount > MAXBSIZE || n == max_clusters || bp->b_clusterflags & B_CLUSTERFAILED) break; nblks = howmany(bp->b_bcount, secsz); if (bp->b_rawblkno == top) { i = topi++; top += nblks; } else if (bp->b_rawblkno + nblks == bot) { i = --boti; bot -= nblks; } else break; n++; BUFQ_REMOVE(bstate, bp); bufs[i] = bp; size += bp->b_bcount; bp = BUFQ_PEEK(bstate); } switch (n) { case 0: BUFQ_REMOVE(bstate, bp); /* U-G-L-Y */ bufs[max_clusters] = bp; /* Note fall through */ case 1: goto dropcluster; } cl = malloc(sizeof(*cl) - sizeof(cl->cluster) + n * sizeof(cl->cluster[0]), M_DEVBUF, M_NOWAIT); if (cl == NULL) goto dropcluster; cbp->b_data = (caddr_t) uvm_km_kmemalloc(cluster_map, NULL, size, UVM_KMF_VALLOC | UVM_KMF_TRYLOCK); if (cbp->b_data == 0) goto freecluster; cbp->b_bufsize = 0; cl->nbufs = n; memcpy(cl->cluster, &bufs[boti], n * sizeof(bufs[0])); cl->bstate = bstate; bpp = cl->cluster; off = 0; bp = *bpp++; while (cbp->b_bufsize < size) { if (off >= bp->b_bcount) { bp = *bpp++; off = 0; } /* * Try to find page mapped by this offset. */ p = vtophys((vaddr_t) bp->b_data + off); if (p == 0) /* * XXX - For now, drop everything; * could instead continue w/ * those mappings we've been * able to establish */ goto dropmap; pmap_kenter_pa((vaddr_t) cbp->b_data + cbp->b_bufsize, p, VM_PROT_READ | (rw == B_READ) ? VM_PROT_WRITE : 0); off += PAGE_SIZE; cbp->b_bufsize += PAGE_SIZE; } pmap_update(pmap_kernel()); cbp->b_resid = cbp->b_bcount = cbp->b_bufsize; cbp->b_private = cl; cbp->b_vp = NULL; cbp->b_rawblkno = bot; cbp->b_blkno = cbp->b_lblkno = -1; LIST_INIT(&cbp->b_dep); cbp->b_flags = B_BUSY | B_CALL | rw; cbp->b_iodone = clusterdone; CLUSTER_STAT(currentclusters++); CLUSTER_STAT(totalclusters++); CLUSTER_STAT(totalclusteredbuffers += cl->nbufs); return cbp; dropmap: if (cbp->b_bufsize) pmap_kremove((vaddr_t) cbp->b_data, cbp->b_bufsize); uvm_km_free_wakeup(cluster_map, (vaddr_t) cbp->b_data, size); freecluster: free(cl, M_DEVBUF); CLUSTER_STAT(missedclusters++); dropcluster: cbp->b_flags = B_INVAL; # if DIAGNOSTIC cbp->b_private = (void *) 0xdeadbeef; cbp->b_iodone = NULL; # endif s = splbio(); pool_put(&bufpool, cbp); splx(s); for (i = boti; i < topi; i++) if (i != max_clusters) BUFQ_PUT(bstate, bufs[i]); bp = bufs[max_clusters]; return bp; nocluster: BUFQ_REMOVE(bstate, bp); return bp; } /* * Should be called at splbio() */ static void clusterdone(struct buf *cbp) { struct buf *bp; cluster_t *cl; int i; pmap_kremove((vaddr_t) cbp->b_data, cbp->b_bufsize); pmap_update(pmap_kernel()); uvm_km_free_wakeup(cluster_map, (vaddr_t) cbp->b_data, cbp->b_bufsize); cbp->b_bufsize = 0; cl = (cluster_t *) cbp->b_private; # if DIAGNOSTIC if (cl == (void *) 0xdeadbeef) /* WTF? */ panic("cluster is deadbeef"); # endif i = cl->nbufs; # ifdef CLUSTER_DEBUG printf("clusterdone: %p\t%d\n", cbp, i); # endif while (i--) { bp = cl->cluster[i]; /* * We should never find a buf in a cluster that * has failed a previous try at clustering */ KASSERT((bp->b_clusterflags & B_CLUSTERFAILED) == 0); if (cbp->b_flags & B_ERROR) { /* * Uncluster and re-try */ bp->b_clusterflags |= B_CLUSTERFAILED; BUFQ_PUT(cl->bstate, bp); } else { bp->b_resid = min(cbp->b_resid, bp->b_bcount); if (cbp->b_resid) { cbp->b_resid -= bp->b_bcount; if (cbp->b_resid < 0) cbp->b_resid = 0; } biodone(bp); } } cbp->b_flags = B_INVAL; # if DIAGNOSTIC cbp->b_private = (void *) 0xdeadbeef; cbp->b_iodone = NULL; # endif pool_put(&bufpool, cbp); free(cl, M_DEVBUF); CLUSTER_STAT(currentclusters--); }