diff mbox series

[10/12] fscache: Fix cookie key hashing

Message ID 162431201844.2908479.8293647220901514696.stgit@warthog.procyon.org.uk
State New
Headers show
Series fscache: Some prep work for fscache rewrite | expand

Commit Message

David Howells June 21, 2021, 9:46 p.m. UTC
The current hash algorithm used for hashing cookie keys is really bad,
producing almost no dispersion (after a test kernel build, ~30000 files
were split over just 18 out of the 32768 hash buckets).

Borrow the full_name_hash() hash function into fscache to do the hashing
for cookie keys and, in the future, volume keys.

I don't want to use full_name_hash() as-is because I want the hash value to
be consistent across arches and over time as the hash value produced may
get used on disk.

I can also optimise parts of it away as the key will always be a padded
array of aligned 32-bit words.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 fs/fscache/cookie.c   |   14 +-------------
 fs/fscache/internal.h |    2 ++
 fs/fscache/main.c     |   39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 13 deletions(-)

Comments

Jeff Layton Aug. 24, 2021, 4:11 p.m. UTC | #1
On Mon, 2021-06-21 at 22:46 +0100, David Howells wrote:
> The current hash algorithm used for hashing cookie keys is really bad,

> producing almost no dispersion (after a test kernel build, ~30000 files

> were split over just 18 out of the 32768 hash buckets).

> 

> Borrow the full_name_hash() hash function into fscache to do the hashing

> for cookie keys and, in the future, volume keys.

> 

> I don't want to use full_name_hash() as-is because I want the hash value to

> be consistent across arches and over time as the hash value produced may

> get used on disk.

> 

> I can also optimise parts of it away as the key will always be a padded

> array of aligned 32-bit words.

> 

> Signed-off-by: David Howells <dhowells@redhat.com>

> ---

> 


What happens when this patch encounters a cache that was built before
it? Do you need to couple this with some sort of global cache
invalidation or rehashing event?

>  fs/fscache/cookie.c   |   14 +-------------

>  fs/fscache/internal.h |    2 ++

>  fs/fscache/main.c     |   39 +++++++++++++++++++++++++++++++++++++++

>  3 files changed, 42 insertions(+), 13 deletions(-)

> 

> diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c

> index ec9bce33085f..2558814193e9 100644

> --- a/fs/fscache/cookie.c

> +++ b/fs/fscache/cookie.c

> @@ -87,10 +87,8 @@ void fscache_free_cookie(struct fscache_cookie *cookie)

>  static int fscache_set_key(struct fscache_cookie *cookie,

>  			   const void *index_key, size_t index_key_len)

>  {

> -	unsigned long long h;

>  	u32 *buf;

>  	int bufs;

> -	int i;

>  

>  	bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf));

>  

> @@ -104,17 +102,7 @@ static int fscache_set_key(struct fscache_cookie *cookie,

>  	}

>  

>  	memcpy(buf, index_key, index_key_len);

> -

> -	/* Calculate a hash and combine this with the length in the first word

> -	 * or first half word

> -	 */

> -	h = (unsigned long)cookie->parent;

> -	h += index_key_len + cookie->type;

> -

> -	for (i = 0; i < bufs; i++)

> -		h += buf[i];

> -

> -	cookie->key_hash = h ^ (h >> 32);

> +	cookie->key_hash = fscache_hash(0, buf, bufs);

>  	return 0;

>  }

>  

> diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h

> index 200082cafdda..a49136c63e4b 100644

> --- a/fs/fscache/internal.h

> +++ b/fs/fscache/internal.h

> @@ -74,6 +74,8 @@ extern struct workqueue_struct *fscache_object_wq;

>  extern struct workqueue_struct *fscache_op_wq;

>  DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);

>  

> +extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n);

> +

>  static inline bool fscache_object_congested(void)

>  {

>  	return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);

> diff --git a/fs/fscache/main.c b/fs/fscache/main.c

> index c1e6cc9091aa..4207f98e405f 100644

> --- a/fs/fscache/main.c

> +++ b/fs/fscache/main.c

> @@ -93,6 +93,45 @@ static struct ctl_table fscache_sysctls_root[] = {

>  };

>  #endif

>  

> +/*

> + * Mixing scores (in bits) for (7,20):

> + * Input delta: 1-bit      2-bit

> + * 1 round:     330.3     9201.6

> + * 2 rounds:   1246.4    25475.4

> + * 3 rounds:   1907.1    31295.1

> + * 4 rounds:   2042.3    31718.6

> + * Perfect:    2048      31744

> + *            (32*64)   (32*31/2 * 64)

> + */

> +#define HASH_MIX(x, y, a)	\

> +	(	x ^= (a),	\

> +	y ^= x,	x = rol32(x, 7),\

> +	x += y,	y = rol32(y,20),\

> +	y *= 9			)

> +

> +static inline unsigned int fold_hash(unsigned long x, unsigned long y)

> +{

> +	/* Use arch-optimized multiply if one exists */

> +	return __hash_32(y ^ __hash_32(x));

> +}

> +

> +/*

> + * Generate a hash.  This is derived from full_name_hash(), but we want to be

> + * sure it is arch independent and that it doesn't change as bits of the

> + * computed hash value might appear on disk.  The caller also guarantees that

> + * the hashed data will be a series of aligned 32-bit words.

> + */

> +unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n)

> +{

> +	unsigned int a, x = 0, y = salt;

> +

> +	for (; n; n--) {

> +		a = *data++;

> +		HASH_MIX(x, y, a);

> +	}

> +	return fold_hash(x, y);

> +}

> +

>  /*

>   * initialise the fs caching module

>   */

> 

> 


-- 
Jeff Layton <jlayton@redhat.com>
David Howells Aug. 25, 2021, 2:04 p.m. UTC | #2
Jeff Layton <jlayton@redhat.com> wrote:

> What happens when this patch encounters a cache that was built before

> it? Do you need to couple this with some sort of global cache

> invalidation or rehashing event?


At the moment, nothing.  cachefiles generates a second hash value, but doing
so is a duplication of effort.

David
diff mbox series

Patch

diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index ec9bce33085f..2558814193e9 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -87,10 +87,8 @@  void fscache_free_cookie(struct fscache_cookie *cookie)
 static int fscache_set_key(struct fscache_cookie *cookie,
 			   const void *index_key, size_t index_key_len)
 {
-	unsigned long long h;
 	u32 *buf;
 	int bufs;
-	int i;
 
 	bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf));
 
@@ -104,17 +102,7 @@  static int fscache_set_key(struct fscache_cookie *cookie,
 	}
 
 	memcpy(buf, index_key, index_key_len);
-
-	/* Calculate a hash and combine this with the length in the first word
-	 * or first half word
-	 */
-	h = (unsigned long)cookie->parent;
-	h += index_key_len + cookie->type;
-
-	for (i = 0; i < bufs; i++)
-		h += buf[i];
-
-	cookie->key_hash = h ^ (h >> 32);
+	cookie->key_hash = fscache_hash(0, buf, bufs);
 	return 0;
 }
 
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 200082cafdda..a49136c63e4b 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -74,6 +74,8 @@  extern struct workqueue_struct *fscache_object_wq;
 extern struct workqueue_struct *fscache_op_wq;
 DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
 
+extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n);
+
 static inline bool fscache_object_congested(void)
 {
 	return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index c1e6cc9091aa..4207f98e405f 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -93,6 +93,45 @@  static struct ctl_table fscache_sysctls_root[] = {
 };
 #endif
 
+/*
+ * Mixing scores (in bits) for (7,20):
+ * Input delta: 1-bit      2-bit
+ * 1 round:     330.3     9201.6
+ * 2 rounds:   1246.4    25475.4
+ * 3 rounds:   1907.1    31295.1
+ * 4 rounds:   2042.3    31718.6
+ * Perfect:    2048      31744
+ *            (32*64)   (32*31/2 * 64)
+ */
+#define HASH_MIX(x, y, a)	\
+	(	x ^= (a),	\
+	y ^= x,	x = rol32(x, 7),\
+	x += y,	y = rol32(y,20),\
+	y *= 9			)
+
+static inline unsigned int fold_hash(unsigned long x, unsigned long y)
+{
+	/* Use arch-optimized multiply if one exists */
+	return __hash_32(y ^ __hash_32(x));
+}
+
+/*
+ * Generate a hash.  This is derived from full_name_hash(), but we want to be
+ * sure it is arch independent and that it doesn't change as bits of the
+ * computed hash value might appear on disk.  The caller also guarantees that
+ * the hashed data will be a series of aligned 32-bit words.
+ */
+unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n)
+{
+	unsigned int a, x = 0, y = salt;
+
+	for (; n; n--) {
+		a = *data++;
+		HASH_MIX(x, y, a);
+	}
+	return fold_hash(x, y);
+}
+
 /*
  * initialise the fs caching module
  */