From 182d919b84902eece162c63ed3d476c8016b4197 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Feb 2015 23:47:31 +0000 Subject: [PATCH 001/839] FS-Cache: Count culled objects and objects rejected due to lack of space Count the number of objects that get culled by the cache backend and the number of objects that the cache backend declines to instantiate due to lack of space in the cache. These numbers are made available through /proc/fs/fscache/stats Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- .../filesystems/caching/backend-api.txt | 23 +++++++++ Documentation/filesystems/caching/fscache.txt | 4 ++ fs/cachefiles/internal.h | 1 - fs/cachefiles/namei.c | 33 ++++++++----- fs/fscache/internal.h | 5 ++ fs/fscache/object.c | 47 +++++++++++++++++++ fs/fscache/stats.c | 10 ++++ include/linux/fscache-cache.h | 12 +++++ 8 files changed, 122 insertions(+), 13 deletions(-) diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt index 277d1e810670..c0bd5677271b 100644 --- a/Documentation/filesystems/caching/backend-api.txt +++ b/Documentation/filesystems/caching/backend-api.txt @@ -676,6 +676,29 @@ FS-Cache provides some utilities that a cache backend may make use of: as possible. + (*) Indicate that a stale object was found and discarded: + + void fscache_object_retrying_stale(struct fscache_object *object); + + This is called to indicate that the lookup procedure found an object in + the cache that the netfs decided was stale. The object has been + discarded from the cache and the lookup will be performed again. + + + (*) Indicate that the caching backend killed an object: + + void fscache_object_mark_killed(struct fscache_object *object, + enum fscache_why_object_killed why); + + This is called to indicate that the cache backend preemptively killed an + object. The why parameter should be set to indicate the reason: + + FSCACHE_OBJECT_IS_STALE - the object was stale and needs discarding. + FSCACHE_OBJECT_NO_SPACE - there was insufficient cache space + FSCACHE_OBJECT_WAS_RETIRED - the object was retired when relinquished. + FSCACHE_OBJECT_WAS_CULLED - the object was culled to make space. + + (*) Get and release references on a retrieval record: void fscache_get_retrieval(struct fscache_retrieval *op); diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 770267af5b3e..66fa7fbccfa4 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -303,6 +303,10 @@ proc files. wrp=N Number of in-progress write_page() cache ops ucp=N Number of in-progress uncache_page() cache ops dsp=N Number of in-progress dissociate_pages() cache ops + CacheEv nsp=N Number of object lookups/creations rejected due to lack of space + stl=N Number of stale objects deleted + rtr=N Number of objects retired when relinquished + cul=N Number of objects culled (*) /proc/fs/fscache/histogram diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 8c52472d2efa..aecd0859eacb 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -43,7 +43,6 @@ struct cachefiles_object { loff_t i_size; /* object size */ unsigned long flags; #define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ -#define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */ atomic_t usage; /* object usage count */ uint8_t type; /* object type */ uint8_t new; /* T if object new */ diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 7f8e83f9d74e..1d60195fc518 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -97,7 +97,8 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object, * call vfs_unlink(), vfs_rmdir() or vfs_rename() */ static void cachefiles_mark_object_buried(struct cachefiles_cache *cache, - struct dentry *dentry) + struct dentry *dentry, + enum fscache_why_object_killed why) { struct cachefiles_object *object; struct rb_node *p; @@ -132,8 +133,9 @@ found_dentry: pr_err("\n"); pr_err("Error: Can't preemptively bury live object\n"); cachefiles_printk_object(object, NULL); - } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { - pr_err("Error: Object already preemptively buried\n"); + } else { + if (why != FSCACHE_OBJECT_IS_STALE) + fscache_object_mark_killed(&object->fscache, why); } write_unlock(&cache->active_lock); @@ -265,7 +267,8 @@ requeue: static int cachefiles_bury_object(struct cachefiles_cache *cache, struct dentry *dir, struct dentry *rep, - bool preemptive) + bool preemptive, + enum fscache_why_object_killed why) { struct dentry *grave, *trap; struct path path, path_to_graveyard; @@ -289,7 +292,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, ret = vfs_unlink(dir->d_inode, rep, NULL); if (preemptive) - cachefiles_mark_object_buried(cache, rep); + cachefiles_mark_object_buried(cache, rep, why); } mutex_unlock(&dir->d_inode->i_mutex); @@ -394,7 +397,7 @@ try_again: "Rename failed with error %d", ret); if (preemptive) - cachefiles_mark_object_buried(cache, rep); + cachefiles_mark_object_buried(cache, rep, why); } unlock_rename(cache->graveyard, dir); @@ -422,7 +425,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { + if (test_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->fscache.flags)) { /* object allocation for the same key preemptively deleted this * object's file so that it could create its own file */ _debug("object preemptively buried"); @@ -433,7 +436,8 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, * may have been renamed */ if (dir == object->dentry->d_parent) { ret = cachefiles_bury_object(cache, dir, - object->dentry, false); + object->dentry, false, + FSCACHE_OBJECT_WAS_RETIRED); } else { /* it got moved, presumably by cachefilesd culling it, * so it's no longer in the key path and we can ignore @@ -522,7 +526,7 @@ lookup_again: if (!next->d_inode) { ret = cachefiles_has_space(cache, 1, 0); if (ret < 0) - goto create_error; + goto no_space_error; path.dentry = dir; ret = security_path_mkdir(&path, next, 0); @@ -551,7 +555,7 @@ lookup_again: if (!next->d_inode) { ret = cachefiles_has_space(cache, 1, 0); if (ret < 0) - goto create_error; + goto no_space_error; path.dentry = dir; ret = security_path_mknod(&path, next, S_IFREG, 0); @@ -602,7 +606,8 @@ lookup_again: * mutex) */ object->dentry = NULL; - ret = cachefiles_bury_object(cache, dir, next, true); + ret = cachefiles_bury_object(cache, dir, next, true, + FSCACHE_OBJECT_IS_STALE); dput(next); next = NULL; @@ -610,6 +615,7 @@ lookup_again: goto delete_error; _debug("redo lookup"); + fscache_object_retrying_stale(&object->fscache); goto lookup_again; } } @@ -662,6 +668,8 @@ lookup_again: _leave(" = 0 [%lu]", object->dentry->d_inode->i_ino); return 0; +no_space_error: + fscache_object_mark_killed(&object->fscache, FSCACHE_OBJECT_NO_SPACE); create_error: _debug("create error %d", ret); if (ret == -EIO) @@ -927,7 +935,8 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, /* actually remove the victim (drops the dir mutex) */ _debug("bury"); - ret = cachefiles_bury_object(cache, dir, victim, false); + ret = cachefiles_bury_object(cache, dir, victim, false, + FSCACHE_OBJECT_WAS_CULLED); if (ret < 0) goto error; diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 7872a62ef30c..3063a58b7d3d 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -271,6 +271,11 @@ extern atomic_t fscache_n_cop_write_page; extern atomic_t fscache_n_cop_uncache_page; extern atomic_t fscache_n_cop_dissociate_pages; +extern atomic_t fscache_n_cache_no_space_reject; +extern atomic_t fscache_n_cache_stale_objects; +extern atomic_t fscache_n_cache_retired_objects; +extern atomic_t fscache_n_cache_culled_objects; + static inline void fscache_stat(atomic_t *stat) { atomic_inc(stat); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index da032daf0e0d..12bb468bf0ae 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -1016,3 +1016,50 @@ static const struct fscache_state *fscache_update_object(struct fscache_object * _leave(""); return transit_to(WAIT_FOR_CMD); } + +/** + * fscache_object_retrying_stale - Note retrying stale object + * @object: The object that will be retried + * + * Note that an object lookup found an on-disk object that was adjudged to be + * stale and has been deleted. The lookup will be retried. + */ +void fscache_object_retrying_stale(struct fscache_object *object) +{ + fscache_stat(&fscache_n_cache_no_space_reject); +} +EXPORT_SYMBOL(fscache_object_retrying_stale); + +/** + * fscache_object_mark_killed - Note that an object was killed + * @object: The object that was culled + * @why: The reason the object was killed. + * + * Note that an object was killed. Returns true if the object was + * already marked killed, false if it wasn't. + */ +void fscache_object_mark_killed(struct fscache_object *object, + enum fscache_why_object_killed why) +{ + if (test_and_set_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->flags)) { + pr_err("Error: Object already killed by cache [%s]\n", + object->cache->identifier); + return; + } + + switch (why) { + case FSCACHE_OBJECT_NO_SPACE: + fscache_stat(&fscache_n_cache_no_space_reject); + break; + case FSCACHE_OBJECT_IS_STALE: + fscache_stat(&fscache_n_cache_stale_objects); + break; + case FSCACHE_OBJECT_WAS_RETIRED: + fscache_stat(&fscache_n_cache_retired_objects); + break; + case FSCACHE_OBJECT_WAS_CULLED: + fscache_stat(&fscache_n_cache_culled_objects); + break; + } +} +EXPORT_SYMBOL(fscache_object_mark_killed); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 40d13c70ef51..3a722e8f2307 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -130,6 +130,11 @@ atomic_t fscache_n_cop_write_page; atomic_t fscache_n_cop_uncache_page; atomic_t fscache_n_cop_dissociate_pages; +atomic_t fscache_n_cache_no_space_reject; +atomic_t fscache_n_cache_stale_objects; +atomic_t fscache_n_cache_retired_objects; +atomic_t fscache_n_cache_culled_objects; + /* * display the general statistics */ @@ -271,6 +276,11 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_cop_write_page), atomic_read(&fscache_n_cop_uncache_page), atomic_read(&fscache_n_cop_dissociate_pages)); + seq_printf(m, "CacheEv: nsp=%d stl=%d rtr=%d cul=%d\n", + atomic_read(&fscache_n_cache_no_space_reject), + atomic_read(&fscache_n_cache_stale_objects), + atomic_read(&fscache_n_cache_retired_objects), + atomic_read(&fscache_n_cache_culled_objects)); return 0; } diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 771484993ca7..c9dafdaf3347 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -371,6 +371,7 @@ struct fscache_object { #define FSCACHE_OBJECT_IS_LOOKED_UP 4 /* T if object has been looked up */ #define FSCACHE_OBJECT_IS_AVAILABLE 5 /* T if object has become active */ #define FSCACHE_OBJECT_RETIRED 6 /* T if object was retired on relinquishment */ +#define FSCACHE_OBJECT_KILLED_BY_CACHE 7 /* T if object was killed by the cache */ struct list_head cache_link; /* link in cache->object_list */ struct hlist_node cookie_link; /* link in cookie->backing_objects */ @@ -551,4 +552,15 @@ extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object, const void *data, uint16_t datalen); +extern void fscache_object_retrying_stale(struct fscache_object *object); + +enum fscache_why_object_killed { + FSCACHE_OBJECT_IS_STALE, + FSCACHE_OBJECT_NO_SPACE, + FSCACHE_OBJECT_WAS_RETIRED, + FSCACHE_OBJECT_WAS_CULLED, +}; +extern void fscache_object_mark_killed(struct fscache_object *object, + enum fscache_why_object_killed why); + #endif /* _LINUX_FSCACHE_CACHE_H */ From 3c3059841a9b99fa8dd8d878607deceec1e363e9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:39:28 +0000 Subject: [PATCH 002/839] FS-Cache: Move fscache_report_unexpected_submission() to make it more available Move fscache_report_unexpected_submission() up within operation.c so that it can be called from fscache_submit_exclusive_op() too. Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- fs/fscache/operation.c | 74 +++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index e7b87a0e5185..68ead8482617 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -75,6 +75,43 @@ static void fscache_run_op(struct fscache_object *object, fscache_stat(&fscache_n_op_run); } +/* + * report an unexpected submission + */ +static void fscache_report_unexpected_submission(struct fscache_object *object, + struct fscache_operation *op, + const struct fscache_state *ostate) +{ + static bool once_only; + struct fscache_operation *p; + unsigned n; + + if (once_only) + return; + once_only = true; + + kdebug("unexpected submission OP%x [OBJ%x %s]", + op->debug_id, object->debug_id, object->state->name); + kdebug("objstate=%s [%s]", object->state->name, ostate->name); + kdebug("objflags=%lx", object->flags); + kdebug("objevent=%lx [%lx]", object->events, object->event_mask); + kdebug("ops=%u inp=%u exc=%u", + object->n_ops, object->n_in_progress, object->n_exclusive); + + if (!list_empty(&object->pending_ops)) { + n = 0; + list_for_each_entry(p, &object->pending_ops, pend_link) { + ASSERTCMP(p->object, ==, object); + kdebug("%p %p", op->processor, op->release); + n++; + } + + kdebug("n=%u", n); + } + + dump_stack(); +} + /* * submit an exclusive operation for an object * - other ops are excluded from running simultaneously with this one @@ -138,43 +175,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object, return ret; } -/* - * report an unexpected submission - */ -static void fscache_report_unexpected_submission(struct fscache_object *object, - struct fscache_operation *op, - const struct fscache_state *ostate) -{ - static bool once_only; - struct fscache_operation *p; - unsigned n; - - if (once_only) - return; - once_only = true; - - kdebug("unexpected submission OP%x [OBJ%x %s]", - op->debug_id, object->debug_id, object->state->name); - kdebug("objstate=%s [%s]", object->state->name, ostate->name); - kdebug("objflags=%lx", object->flags); - kdebug("objevent=%lx [%lx]", object->events, object->event_mask); - kdebug("ops=%u inp=%u exc=%u", - object->n_ops, object->n_in_progress, object->n_exclusive); - - if (!list_empty(&object->pending_ops)) { - n = 0; - list_for_each_entry(p, &object->pending_ops, pend_link) { - ASSERTCMP(p->object, ==, object); - kdebug("%p %p", op->processor, op->release); - n++; - } - - kdebug("n=%u", n); - } - - dump_stack(); -} - /* * submit an operation for an object * - objects may be submitted only in the following states: From 30ceec6284129662efc3a1e7675b2bd857a046fe Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:05:27 +0000 Subject: [PATCH 003/839] FS-Cache: When submitting an op, cancel it if the target object is dying When submitting an operation, prefer to cancel the operation immediately rather than queuing it for later processing if the object is marked as dying (ie. the object state machine has reached the KILL_OBJECT state). Whilst we're at it, change the series of related test_bit() calls into a READ_ONCE() and bitwise-AND operators to reduce the number of load instructions (test_bit() has a volatile address). Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- fs/fscache/operation.c | 47 ++++++++++++++++++++++------------- include/linux/fscache-cache.h | 9 +++++-- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 68ead8482617..dec6defe3be3 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -120,6 +120,8 @@ static void fscache_report_unexpected_submission(struct fscache_object *object, int fscache_submit_exclusive_op(struct fscache_object *object, struct fscache_operation *op) { + const struct fscache_state *ostate; + unsigned long flags; int ret; _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); @@ -132,8 +134,19 @@ int fscache_submit_exclusive_op(struct fscache_object *object, ASSERTCMP(object->n_ops, >=, object->n_exclusive); ASSERT(list_empty(&op->pend_link)); + ostate = object->state; + smp_rmb(); + op->state = FSCACHE_OP_ST_PENDING; - if (fscache_object_is_active(object)) { + flags = READ_ONCE(object->flags); + if (unlikely(!(flags & BIT(FSCACHE_OBJECT_IS_LIVE)))) { + fscache_stat(&fscache_n_op_rejected); + op->state = FSCACHE_OP_ST_CANCELLED; + ret = -ENOBUFS; + } else if (unlikely(fscache_cache_is_broken(object))) { + op->state = FSCACHE_OP_ST_CANCELLED; + ret = -EIO; + } else if (flags & BIT(FSCACHE_OBJECT_IS_AVAILABLE)) { op->object = object; object->n_ops++; object->n_exclusive++; /* reads and writes must wait */ @@ -155,7 +168,7 @@ int fscache_submit_exclusive_op(struct fscache_object *object, /* need to issue a new write op after this */ clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); ret = 0; - } else if (test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)) { + } else if (flags & BIT(FSCACHE_OBJECT_IS_LOOKED_UP)) { op->object = object; object->n_ops++; object->n_exclusive++; /* reads and writes must wait */ @@ -164,11 +177,9 @@ int fscache_submit_exclusive_op(struct fscache_object *object, fscache_stat(&fscache_n_op_pend); ret = 0; } else { - /* If we're in any other state, there must have been an I/O - * error of some nature. - */ - ASSERT(test_bit(FSCACHE_IOERROR, &object->cache->flags)); - ret = -EIO; + fscache_report_unexpected_submission(object, op, ostate); + op->state = FSCACHE_OP_ST_CANCELLED; + ret = -ENOBUFS; } spin_unlock(&object->lock); @@ -187,6 +198,7 @@ int fscache_submit_op(struct fscache_object *object, struct fscache_operation *op) { const struct fscache_state *ostate; + unsigned long flags; int ret; _enter("{OBJ%x OP%x},{%u}", @@ -204,7 +216,15 @@ int fscache_submit_op(struct fscache_object *object, smp_rmb(); op->state = FSCACHE_OP_ST_PENDING; - if (fscache_object_is_active(object)) { + flags = READ_ONCE(object->flags); + if (unlikely(!(flags & BIT(FSCACHE_OBJECT_IS_LIVE)))) { + fscache_stat(&fscache_n_op_rejected); + op->state = FSCACHE_OP_ST_CANCELLED; + ret = -ENOBUFS; + } else if (unlikely(fscache_cache_is_broken(object))) { + op->state = FSCACHE_OP_ST_CANCELLED; + ret = -EIO; + } else if (flags & BIT(FSCACHE_OBJECT_IS_AVAILABLE)) { op->object = object; object->n_ops++; @@ -222,25 +242,18 @@ int fscache_submit_op(struct fscache_object *object, fscache_run_op(object, op); } ret = 0; - } else if (test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)) { + } else if (flags & BIT(FSCACHE_OBJECT_IS_LOOKED_UP)) { op->object = object; object->n_ops++; atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); ret = 0; - } else if (fscache_object_is_dying(object)) { - fscache_stat(&fscache_n_op_rejected); - op->state = FSCACHE_OP_ST_CANCELLED; - ret = -ENOBUFS; - } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) { + } else { fscache_report_unexpected_submission(object, op, ostate); ASSERT(!fscache_object_is_active(object)); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; - } else { - op->state = FSCACHE_OP_ST_CANCELLED; - ret = -ENOBUFS; } spin_unlock(&object->lock); diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index c9dafdaf3347..2e83a141e465 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -411,17 +411,22 @@ static inline bool fscache_object_is_available(struct fscache_object *object) return test_bit(FSCACHE_OBJECT_IS_AVAILABLE, &object->flags); } +static inline bool fscache_cache_is_broken(struct fscache_object *object) +{ + return test_bit(FSCACHE_IOERROR, &object->cache->flags); +} + static inline bool fscache_object_is_active(struct fscache_object *object) { return fscache_object_is_available(object) && fscache_object_is_live(object) && - !test_bit(FSCACHE_IOERROR, &object->cache->flags); + !fscache_cache_is_broken(object); } static inline bool fscache_object_is_dead(struct fscache_object *object) { return fscache_object_is_dying(object) && - test_bit(FSCACHE_IOERROR, &object->cache->flags); + fscache_cache_is_broken(object); } /** From 6515d1dbf424c5c3b94d44e9c7f581026e7fc0d3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 25 Feb 2015 11:53:57 +0000 Subject: [PATCH 004/839] FS-Cache: Handle a new operation submitted against a killed object Reject new operations that are being submitted against an object if that object has failed its lookup or creation states or has been killed by the cache backend for some other reason, such as having been culled. Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- fs/fscache/object.c | 2 ++ fs/fscache/operation.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 12bb468bf0ae..9b79fc9a1464 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -610,6 +610,8 @@ static const struct fscache_state *fscache_lookup_failure(struct fscache_object object->cache->ops->lookup_complete(object); fscache_stat_d(&fscache_n_cop_lookup_complete); + set_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->flags); + cookie = object->cookie; set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index dec6defe3be3..18658fffbba1 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -176,6 +176,9 @@ int fscache_submit_exclusive_op(struct fscache_object *object, list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); ret = 0; + } else if (flags & BIT(FSCACHE_OBJECT_KILLED_BY_CACHE)) { + op->state = FSCACHE_OP_ST_CANCELLED; + ret = -ENOBUFS; } else { fscache_report_unexpected_submission(object, op, ostate); op->state = FSCACHE_OP_ST_CANCELLED; @@ -249,6 +252,9 @@ int fscache_submit_op(struct fscache_object *object, list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); ret = 0; + } else if (flags & BIT(FSCACHE_OBJECT_KILLED_BY_CACHE)) { + op->state = FSCACHE_OP_ST_CANCELLED; + ret = -ENOBUFS; } else { fscache_report_unexpected_submission(object, op, ostate); ASSERT(!fscache_object_is_active(object)); From f09b443d0e09f37121c55d7f83056f6ebff6ab4f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:05:28 +0000 Subject: [PATCH 005/839] FS-Cache: Synchronise object death state change vs operation submission When an object is being marked as no longer live, do this under the object spinlock to prevent a race with operation submission targeted on that object. The problem occurs due to the following pair of intertwined sequences when the cache tries to create an object that would take it over the hard available space limit: NETFS INTERFACE =============== (A) The netfs calls fscache_acquire_cookie(). object creation is deferred to the object state machine and the netfs is allowed to continue. OBJECT STATE MACHINE KTHREAD ============================ (1) The object is looked up on disk by fscache_look_up_object() calling cachefiles_walk_to_object(). The latter finds that the object is not yet represented on disk and calls fscache_object_lookup_negative(). (2) fscache_object_lookup_negative() sets FSCACHE_COOKIE_NO_DATA_YET and clears FSCACHE_COOKIE_LOOKING_UP, thus allowing the netfs to start queuing read operations. (B) The netfs calls fscache_read_or_alloc_pages(). This calls fscache_wait_for_deferred_lookup() which sees FSCACHE_COOKIE_LOOKING_UP become clear, allowing the read to begin. (C) A read operation is set up and passed to fscache_submit_op() to deal with. (3) cachefiles_walk_to_object() calls cachefiles_has_space(), which fails (or one of the file operations to create stuff fails). cachefiles returns an error to fscache. (4) fscache_look_up_object() transits to the LOOKUP_FAILURE state, (5) fscache_lookup_failure() sets FSCACHE_OBJECT_LOOKED_UP and FSCACHE_COOKIE_UNAVAILABLE and clears FSCACHE_COOKIE_LOOKING_UP then transits to the KILL_OBJECT state. (6) fscache_kill_object() clears FSCACHE_OBJECT_IS_LIVE in an attempt to reject any further requests from the netfs. (7) object->n_ops is examined and found to be 0. fscache_kill_object() transits to the DROP_OBJECT state. (D) fscache_submit_op() locks the object spinlock, sees if it can dispatch the op immediately by calling fscache_object_is_active() - which fails since FSCACHE_OBJECT_IS_AVAILABLE has not yet been set. (E) fscache_submit_op() then tests FSCACHE_OBJECT_LOOKED_UP - which is set. It then queues the object and increments object->n_ops. (8) fscache_drop_object() releases the object and eventually fscache_put_object() calls cachefiles_put_object() which suffers an assertion failure here: ASSERTCMP(object->fscache.n_ops, ==, 0); Locking the object spinlock in step (6) around the clearance of FSCACHE_OBJECT_IS_LIVE ensures that the the decision trees in fscache_submit_op() and fscache_submit_exclusive_op() don't see the IS_LIVE flag being cleared mid-decision: either the op is queued before step (7) - in which case fscache_kill_object() will see n_ops>0 and will deal with the op - or the op will be rejected. This, combined with rejecting op submission if the target object is dying, fix the problem. The problem shows up as the following oops: CacheFiles: Assertion failed CacheFiles: 1 == 0 is false ------------[ cut here ]------------ kernel BUG at ../fs/cachefiles/interface.c:339! ... RIP: 0010:[] [] cachefiles_put_object+0x2a4/0x301 [cachefiles] ... Call Trace: [] fscache_put_object+0x18/0x21 [fscache] [] fscache_object_work_func+0x3ba/0x3c9 [fscache] [] process_one_work+0x226/0x441 [] worker_thread+0x273/0x36b [] ? rescuer_thread+0x2e1/0x2e1 [] kthread+0x10e/0x116 [] ? kthread_create_on_node+0x1bb/0x1bb [] ret_from_fork+0x7c/0xb0 [] ? kthread_create_on_node+0x1bb/0x1bb Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- fs/fscache/object.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 9b79fc9a1464..40049f7505f0 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -327,6 +327,17 @@ void fscache_object_init(struct fscache_object *object, } EXPORT_SYMBOL(fscache_object_init); +/* + * Mark the object as no longer being live, making sure that we synchronise + * against op submission. + */ +static inline void fscache_mark_object_dead(struct fscache_object *object) +{ + spin_lock(&object->lock); + clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); + spin_unlock(&object->lock); +} + /* * Abort object initialisation before we start it. */ @@ -631,7 +642,7 @@ static const struct fscache_state *fscache_kill_object(struct fscache_object *ob _enter("{OBJ%x,%d,%d},%d", object->debug_id, object->n_ops, object->n_children, event); - clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); + fscache_mark_object_dead(object); object->oob_event_mask = 0; if (list_empty(&object->dependents) && @@ -976,13 +987,13 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj return transit_to(UPDATE_OBJECT); nomem: - clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); + fscache_mark_object_dead(object); fscache_unuse_cookie(object); _leave(" [ENOMEM]"); return transit_to(KILL_OBJECT); submit_op_failed: - clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); + fscache_mark_object_dead(object); spin_unlock(&cookie->lock); fscache_unuse_cookie(object); kfree(op); From 87021526300f1a292dd966e141e183630ac95317 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:52:51 +0000 Subject: [PATCH 006/839] FS-Cache: fscache_object_is_dead() has wrong logic, kill it fscache_object_is_dead() returns true only if the object is marked dead and the cache got an I/O error. This should be a logical OR instead. Since two of the callers got split up into handling for separate subcases, expand the other callers and kill the function. This is probably the right thing to do anyway since one of the subcases isn't about the object at all, but rather about the cache. Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- fs/fscache/cookie.c | 3 ++- fs/fscache/page.c | 6 ++++-- include/linux/fscache-cache.h | 6 ------ 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 89acec742e0b..8de22164f5fb 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -327,7 +327,8 @@ static int fscache_alloc_object(struct fscache_cache *cache, object_already_extant: ret = -ENOBUFS; - if (fscache_object_is_dead(object)) { + if (fscache_object_is_dying(object) || + fscache_cache_is_broken(object)) { spin_unlock(&cookie->lock); goto error; } diff --git a/fs/fscache/page.c b/fs/fscache/page.c index de33b3fccca6..d0805e31361c 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -377,11 +377,13 @@ check_if_dead: _leave(" = -ENOBUFS [cancelled]"); return -ENOBUFS; } - if (unlikely(fscache_object_is_dead(object))) { - pr_err("%s() = -ENOBUFS [obj dead %d]\n", __func__, op->state); + if (unlikely(fscache_object_is_dying(object) || + fscache_cache_is_broken(object))) { + enum fscache_operation_state state = op->state; fscache_cancel_op(op, do_cancel); if (stat_object_dead) fscache_stat(stat_object_dead); + _leave(" = -ENOBUFS [obj dead %d]", state); return -ENOBUFS; } return 0; diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 2e83a141e465..ca3d550da11e 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -423,12 +423,6 @@ static inline bool fscache_object_is_active(struct fscache_object *object) !fscache_cache_is_broken(object); } -static inline bool fscache_object_is_dead(struct fscache_object *object) -{ - return fscache_object_is_dying(object) && - fscache_cache_is_broken(object); -} - /** * fscache_object_destroyed - Note destruction of an object in a cache * @cache: The cache from which the object came From 418b7eb9e1011bc11220a03ad0045885d04698d2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:05:28 +0000 Subject: [PATCH 007/839] FS-Cache: Permit fscache_cancel_op() to cancel in-progress operations too Currently, fscache_cancel_op() only cancels pending operations - attempts to cancel in-progress operations are ignored. This leads to a problem in fscache_wait_for_operation_activation() whereby the wait is terminated, but the object has been killed. The check at the end of the function now triggers because it's no longer contingent on the cache having produced an I/O error since the commit that fixed the logic error in fscache_object_is_dead(). The result of the check is that it tries to cancel the operation - but since the object may not be pending by this point, the cancellation request may be ignored - with the result that the the object is just put by the caller and fscache_put_operation has an assertion failure because the operation isn't in either the COMPLETE or the CANCELLED states. To fix this, we permit in-progress ops to be cancelled under some circumstances. The bug results in an oops that looks something like this: FS-Cache: fscache_wait_for_operation_activation() = -ENOBUFS [obj dead 3] FS-Cache: FS-Cache: Assertion failed FS-Cache: 3 == 5 is false ------------[ cut here ]------------ kernel BUG at ../fs/fscache/operation.c:432! ... RIP: 0010:[] fscache_put_operation+0xf2/0x2cd Call Trace: [] __fscache_read_or_alloc_pages+0x2ec/0x3b3 [] __nfs_readpages_from_fscache+0x59/0xbf [nfs] [] nfs_readpages+0x10c/0x185 [nfs] [] ? alloc_pages_current+0x119/0x13e [] ? __page_cache_alloc+0xfb/0x10a [] __do_page_cache_readahead+0x188/0x22c [] ondemand_readahead+0x29e/0x2af [] page_cache_sync_readahead+0x38/0x3a [] generic_file_read_iter+0x1a2/0x55a [] ? nfs_revalidate_mapping+0xd6/0x288 [nfs] [] nfs_file_read+0x49/0x70 [nfs] [] new_sync_read+0x78/0x9c [] __vfs_read+0x13/0x38 [] vfs_read+0x95/0x121 [] SyS_read+0x4c/0x8a [] system_call_fastpath+0x12/0x17 Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- fs/fscache/internal.h | 3 ++- fs/fscache/operation.c | 20 +++++++++++++++++--- fs/fscache/page.c | 4 ++-- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 3063a58b7d3d..87c4544ec912 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -125,7 +125,8 @@ extern int fscache_submit_exclusive_op(struct fscache_object *, extern int fscache_submit_op(struct fscache_object *, struct fscache_operation *); extern int fscache_cancel_op(struct fscache_operation *, - void (*)(struct fscache_operation *)); + void (*)(struct fscache_operation *), + bool); extern void fscache_cancel_all_ops(struct fscache_object *); extern void fscache_abort_object(struct fscache_object *); extern void fscache_start_operations(struct fscache_object *); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 18658fffbba1..67594a8d791a 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -312,9 +312,11 @@ void fscache_start_operations(struct fscache_object *object) * cancel an operation that's pending on an object */ int fscache_cancel_op(struct fscache_operation *op, - void (*do_cancel)(struct fscache_operation *)) + void (*do_cancel)(struct fscache_operation *), + bool cancel_in_progress_op) { struct fscache_object *object = op->object; + bool put = false; int ret; _enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id); @@ -328,8 +330,19 @@ int fscache_cancel_op(struct fscache_operation *op, ret = -EBUSY; if (op->state == FSCACHE_OP_ST_PENDING) { ASSERT(!list_empty(&op->pend_link)); - fscache_stat(&fscache_n_op_cancelled); list_del_init(&op->pend_link); + put = true; + fscache_stat(&fscache_n_op_cancelled); + if (do_cancel) + do_cancel(op); + op->state = FSCACHE_OP_ST_CANCELLED; + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) + object->n_exclusive--; + if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) + wake_up_bit(&op->flags, FSCACHE_OP_WAITING); + ret = 0; + } else if (op->state == FSCACHE_OP_ST_IN_PROGRESS && cancel_in_progress_op) { + fscache_stat(&fscache_n_op_cancelled); if (do_cancel) do_cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; @@ -337,10 +350,11 @@ int fscache_cancel_op(struct fscache_operation *op, object->n_exclusive--; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) wake_up_bit(&op->flags, FSCACHE_OP_WAITING); - fscache_put_operation(op); ret = 0; } + if (put) + fscache_put_operation(op); spin_unlock(&object->lock); _leave(" = %d", ret); return ret; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index d0805e31361c..433cae927eca 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -359,7 +359,7 @@ int fscache_wait_for_operation_activation(struct fscache_object *object, fscache_stat(stat_op_waits); if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING, TASK_INTERRUPTIBLE) != 0) { - ret = fscache_cancel_op(op, do_cancel); + ret = fscache_cancel_op(op, do_cancel, false); if (ret == 0) return -ERESTARTSYS; @@ -380,7 +380,7 @@ check_if_dead: if (unlikely(fscache_object_is_dying(object) || fscache_cache_is_broken(object))) { enum fscache_operation_state state = op->state; - fscache_cancel_op(op, do_cancel); + fscache_cancel_op(op, do_cancel, true); if (stat_object_dead) fscache_stat(stat_object_dead); _leave(" = -ENOBUFS [obj dead %d]", state); From 1339ec98e32b4bc8efb6fbb71c006a465130aaba Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 25 Feb 2015 13:26:39 +0000 Subject: [PATCH 008/839] FS-Cache: Out of line fscache_operation_init() Out of line fscache_operation_init() so that it can access internal FS-Cache features, such as stats, in a later commit. Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- fs/fscache/operation.c | 22 ++++++++++++++++++++++ include/linux/fscache-cache.h | 24 +++--------------------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 67594a8d791a..61a6e78b85fa 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -20,6 +20,28 @@ atomic_t fscache_op_debug_id; EXPORT_SYMBOL(fscache_op_debug_id); +/** + * fscache_operation_init - Do basic initialisation of an operation + * @op: The operation to initialise + * @release: The release function to assign + * + * Do basic initialisation of an operation. The caller must still set flags, + * object and processor if needed. + */ +void fscache_operation_init(struct fscache_operation *op, + fscache_operation_processor_t processor, + fscache_operation_release_t release) +{ + INIT_WORK(&op->work, fscache_op_work_func); + atomic_set(&op->usage, 1); + op->state = FSCACHE_OP_ST_INITIALISED; + op->debug_id = atomic_inc_return(&fscache_op_debug_id); + op->processor = processor; + op->release = release; + INIT_LIST_HEAD(&op->pend_link); +} +EXPORT_SYMBOL(fscache_operation_init); + /** * fscache_enqueue_operation - Enqueue an operation for processing * @op: The operation to enqueue diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index ca3d550da11e..0e26d49972e3 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -119,27 +119,9 @@ extern void fscache_op_work_func(struct work_struct *work); extern void fscache_enqueue_operation(struct fscache_operation *); extern void fscache_op_complete(struct fscache_operation *, bool); extern void fscache_put_operation(struct fscache_operation *); - -/** - * fscache_operation_init - Do basic initialisation of an operation - * @op: The operation to initialise - * @release: The release function to assign - * - * Do basic initialisation of an operation. The caller must still set flags, - * object and processor if needed. - */ -static inline void fscache_operation_init(struct fscache_operation *op, - fscache_operation_processor_t processor, - fscache_operation_release_t release) -{ - INIT_WORK(&op->work, fscache_op_work_func); - atomic_set(&op->usage, 1); - op->state = FSCACHE_OP_ST_INITIALISED; - op->debug_id = atomic_inc_return(&fscache_op_debug_id); - op->processor = processor; - op->release = release; - INIT_LIST_HEAD(&op->pend_link); -} +extern void fscache_operation_init(struct fscache_operation *, + fscache_operation_processor_t, + fscache_operation_release_t); /* * data read operation From 03cdd0e4b9a98ae995b81cd8f58e992ec3f44ae2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 25 Feb 2015 13:21:15 +0000 Subject: [PATCH 009/839] FS-Cache: Count the number of initialised operations Count and display through /proc/fs/fscache/stats the number of initialised operations. Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- Documentation/filesystems/caching/fscache.txt | 3 ++- fs/fscache/internal.h | 1 + fs/fscache/operation.c | 1 + fs/fscache/stats.c | 4 +++- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 66fa7fbccfa4..50f0a5757f48 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -284,8 +284,9 @@ proc files. enq=N Number of times async ops queued for processing can=N Number of async ops cancelled rej=N Number of async ops rejected due to object lookup/create failure + ini=N Number of async ops initialised dfr=N Number of async ops queued for deferred release - rel=N Number of async ops released + rel=N Number of async ops released (should equal ini=N when idle) gc=N Number of deferred-release async ops garbage collected CacheOp alo=N Number of in-progress alloc_object() cache ops luo=N Number of in-progress lookup_object() cache ops diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 87c4544ec912..a63225116db6 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -165,6 +165,7 @@ extern atomic_t fscache_n_op_pend; extern atomic_t fscache_n_op_run; extern atomic_t fscache_n_op_enqueue; extern atomic_t fscache_n_op_deferred_release; +extern atomic_t fscache_n_op_initialised; extern atomic_t fscache_n_op_release; extern atomic_t fscache_n_op_gc; extern atomic_t fscache_n_op_cancelled; diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 61a6e78b85fa..9761df4fc2ab 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -39,6 +39,7 @@ void fscache_operation_init(struct fscache_operation *op, op->processor = processor; op->release = release; INIT_LIST_HEAD(&op->pend_link); + fscache_stat(&fscache_n_op_initialised); } EXPORT_SYMBOL(fscache_operation_init); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 3a722e8f2307..7cfa0aacdf6d 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -23,6 +23,7 @@ atomic_t fscache_n_op_run; atomic_t fscache_n_op_enqueue; atomic_t fscache_n_op_requeue; atomic_t fscache_n_op_deferred_release; +atomic_t fscache_n_op_initialised; atomic_t fscache_n_op_release; atomic_t fscache_n_op_gc; atomic_t fscache_n_op_cancelled; @@ -251,7 +252,8 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_op_enqueue), atomic_read(&fscache_n_op_cancelled), atomic_read(&fscache_n_op_rejected)); - seq_printf(m, "Ops : dfr=%u rel=%u gc=%u\n", + seq_printf(m, "Ops : ini=%u dfr=%u rel=%u gc=%u\n", + atomic_read(&fscache_n_op_initialised), atomic_read(&fscache_n_op_deferred_release), atomic_read(&fscache_n_op_release), atomic_read(&fscache_n_op_gc)); From 73c04a47bf79770fbe7f3cf515f5831fccab88ee Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 25 Feb 2015 14:07:25 +0000 Subject: [PATCH 010/839] FS-Cache: Fix cancellation of in-progress operation Cancellation of an in-progress operation needs to update the relevant counters and start any operations that are pending waiting on this one. Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- fs/fscache/operation.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 9761df4fc2ab..b6bf5f399d70 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -365,6 +365,13 @@ int fscache_cancel_op(struct fscache_operation *op, wake_up_bit(&op->flags, FSCACHE_OP_WAITING); ret = 0; } else if (op->state == FSCACHE_OP_ST_IN_PROGRESS && cancel_in_progress_op) { + ASSERTCMP(object->n_in_progress, >, 0); + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) + object->n_exclusive--; + object->n_in_progress--; + if (object->n_in_progress == 0) + fscache_start_operations(object); + fscache_stat(&fscache_n_op_cancelled); if (do_cancel) do_cancel(op); From a39caadf06879017cb9a8c5c5cb4fc4ccb213275 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 25 Feb 2015 14:22:40 +0000 Subject: [PATCH 011/839] FS-Cache: Put an aborted initialised op so that it is accounted correctly Call fscache_put_operation() or a wrapper on any op that has gone through fscache_operation_init() so that the accounting shown in /proc is done correctly, specifically fscache_n_op_release. fscache_put_operation() therefore now allows an op in the INITIALISED state as well as in the CANCELLED and COMPLETE states. Note that this means that an operation can get put that doesn't have its ->object pointer filled in, so anything that depends on the object needs to be conditional in fscache_put_operation(). Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- fs/fscache/operation.c | 52 ++++++++++++++++++++++-------------------- fs/fscache/page.c | 14 ++++++------ 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index b6bf5f399d70..c76c09730768 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -472,7 +472,8 @@ void fscache_put_operation(struct fscache_operation *op) return; _debug("PUT OP"); - ASSERTIFCMP(op->state != FSCACHE_OP_ST_COMPLETE, + ASSERTIFCMP(op->state != FSCACHE_OP_ST_INITIALISED && + op->state != FSCACHE_OP_ST_COMPLETE, op->state, ==, FSCACHE_OP_ST_CANCELLED); op->state = FSCACHE_OP_ST_DEAD; @@ -484,35 +485,36 @@ void fscache_put_operation(struct fscache_operation *op) } object = op->object; + if (likely(object)) { + if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) + atomic_dec(&object->n_reads); + if (test_bit(FSCACHE_OP_UNUSE_COOKIE, &op->flags)) + fscache_unuse_cookie(object); - if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) - atomic_dec(&object->n_reads); - if (test_bit(FSCACHE_OP_UNUSE_COOKIE, &op->flags)) - fscache_unuse_cookie(object); + /* now... we may get called with the object spinlock held, so we + * complete the cleanup here only if we can immediately acquire the + * lock, and defer it otherwise */ + if (!spin_trylock(&object->lock)) { + _debug("defer put"); + fscache_stat(&fscache_n_op_deferred_release); - /* now... we may get called with the object spinlock held, so we - * complete the cleanup here only if we can immediately acquire the - * lock, and defer it otherwise */ - if (!spin_trylock(&object->lock)) { - _debug("defer put"); - fscache_stat(&fscache_n_op_deferred_release); + cache = object->cache; + spin_lock(&cache->op_gc_list_lock); + list_add_tail(&op->pend_link, &cache->op_gc_list); + spin_unlock(&cache->op_gc_list_lock); + schedule_work(&cache->op_gc); + _leave(" [defer]"); + return; + } - cache = object->cache; - spin_lock(&cache->op_gc_list_lock); - list_add_tail(&op->pend_link, &cache->op_gc_list); - spin_unlock(&cache->op_gc_list_lock); - schedule_work(&cache->op_gc); - _leave(" [defer]"); - return; + ASSERTCMP(object->n_ops, >, 0); + object->n_ops--; + if (object->n_ops == 0) + fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); + + spin_unlock(&object->lock); } - ASSERTCMP(object->n_ops, >, 0); - object->n_ops--; - if (object->n_ops == 0) - fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); - - spin_unlock(&object->lock); - kfree(op); _leave(" [done]"); } diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 433cae927eca..7db9752252ef 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -239,7 +239,7 @@ nobufs_dec: wake_cookie = __fscache_unuse_cookie(cookie); nobufs: spin_unlock(&cookie->lock); - kfree(op); + fscache_put_operation(op); if (wake_cookie) __fscache_wake_unused_cookie(cookie); fscache_stat(&fscache_n_attr_changed_nobufs); @@ -505,7 +505,7 @@ nobufs_unlock: spin_unlock(&cookie->lock); if (wake_cookie) __fscache_wake_unused_cookie(cookie); - kfree(op); + fscache_put_retrieval(op); nobufs: fscache_stat(&fscache_n_retrievals_nobufs); _leave(" = -ENOBUFS"); @@ -634,7 +634,7 @@ nobufs_unlock_dec: wake_cookie = __fscache_unuse_cookie(cookie); nobufs_unlock: spin_unlock(&cookie->lock); - kfree(op); + fscache_put_retrieval(op); if (wake_cookie) __fscache_wake_unused_cookie(cookie); nobufs: @@ -728,7 +728,7 @@ nobufs_unlock_dec: wake_cookie = __fscache_unuse_cookie(cookie); nobufs_unlock: spin_unlock(&cookie->lock); - kfree(op); + fscache_put_retrieval(op); if (wake_cookie) __fscache_wake_unused_cookie(cookie); nobufs: @@ -1018,7 +1018,7 @@ already_pending: spin_unlock(&object->lock); spin_unlock(&cookie->lock); radix_tree_preload_end(); - kfree(op); + fscache_put_operation(&op->op); fscache_stat(&fscache_n_stores_ok); _leave(" = 0"); return 0; @@ -1038,7 +1038,7 @@ nobufs_unlock_obj: nobufs: spin_unlock(&cookie->lock); radix_tree_preload_end(); - kfree(op); + fscache_put_operation(&op->op); if (wake_cookie) __fscache_wake_unused_cookie(cookie); fscache_stat(&fscache_n_stores_nobufs); @@ -1046,7 +1046,7 @@ nobufs: return -ENOBUFS; nomem_free: - kfree(op); + fscache_put_operation(&op->op); nomem: fscache_stat(&fscache_n_stores_oom); _leave(" = -ENOMEM"); From d3b97ca4a99e4e6c78f5a21c968eadf5c8ba9971 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:05:29 +0000 Subject: [PATCH 012/839] FS-Cache: The operation cancellation method needs calling in more places Any time an incomplete operation is cancelled, the operation cancellation function needs to be called to clean up. This is currently being passed directly to some of the functions that might want to call it, but not all. Instead, pass the cancellation method pointer to the fscache_operation_init() and have that cache it in the operation struct. Further, plug in a dummy cancellation handler if the caller declines to set one as this allows us to call the function unconditionally (the extra overhead isn't worth bothering about as we don't expect to be calling this typically). The cancellation method must thence be called everywhere the CANCELLED state is set. Note that we call it *before* setting the CANCELLED state such that the method can use the old state value to guide its operation. fscache_do_cancel_retrieval() needs moving higher up in the sources so that the init function can use it now. Without this, the following oops may be seen: FS-Cache: Assertion failed FS-Cache: 3 == 0 is false ------------[ cut here ]------------ kernel BUG at ../fs/fscache/page.c:261! ... RIP: 0010:[] fscache_release_retrieval_op+0x77/0x100 [] fscache_put_operation+0x114/0x2da [] __fscache_read_or_alloc_pages+0x358/0x3b3 [] __nfs_readpages_from_fscache+0x59/0xbf [nfs] [] nfs_readpages+0x10c/0x185 [nfs] [] ? alloc_pages_current+0x119/0x13e [] ? __page_cache_alloc+0xfb/0x10a [] __do_page_cache_readahead+0x188/0x22c [] ondemand_readahead+0x29e/0x2af [] page_cache_sync_readahead+0x38/0x3a [] generic_file_read_iter+0x1a2/0x55a [] ? nfs_revalidate_mapping+0xd6/0x288 [nfs] [] nfs_file_read+0x49/0x70 [nfs] [] new_sync_read+0x78/0x9c [] __vfs_read+0x13/0x38 [] vfs_read+0x95/0x121 [] SyS_read+0x4c/0x8a [] system_call_fastpath+0x12/0x17 The assertion is showing that the remaining number of pages (n_pages) is not 0 when the operation is being released. Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- fs/fscache/cookie.c | 5 ++-- fs/fscache/internal.h | 7 ++---- fs/fscache/object.c | 3 ++- fs/fscache/operation.c | 31 +++++++++++++++++------ fs/fscache/page.c | 46 +++++++++++++++++------------------ include/linux/fscache-cache.h | 5 ++++ 6 files changed, 57 insertions(+), 40 deletions(-) diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 8de22164f5fb..d403c69bee08 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -672,7 +672,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) if (!op) return -ENOMEM; - fscache_operation_init(op, NULL, NULL); + fscache_operation_init(op, NULL, NULL, NULL); op->flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING) | (1 << FSCACHE_OP_UNUSE_COOKIE); @@ -696,8 +696,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) /* the work queue now carries its own ref on the object */ spin_unlock(&cookie->lock); - ret = fscache_wait_for_operation_activation(object, op, - NULL, NULL, NULL); + ret = fscache_wait_for_operation_activation(object, op, NULL, NULL); if (ret == 0) { /* ask the cache to honour the operation */ ret = object->cache->ops->check_consistency(op); diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index a63225116db6..97ec45110957 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -124,9 +124,7 @@ extern int fscache_submit_exclusive_op(struct fscache_object *, struct fscache_operation *); extern int fscache_submit_op(struct fscache_object *, struct fscache_operation *); -extern int fscache_cancel_op(struct fscache_operation *, - void (*)(struct fscache_operation *), - bool); +extern int fscache_cancel_op(struct fscache_operation *, bool); extern void fscache_cancel_all_ops(struct fscache_object *); extern void fscache_abort_object(struct fscache_object *); extern void fscache_start_operations(struct fscache_object *); @@ -139,8 +137,7 @@ extern int fscache_wait_for_deferred_lookup(struct fscache_cookie *); extern int fscache_wait_for_operation_activation(struct fscache_object *, struct fscache_operation *, atomic_t *, - atomic_t *, - void (*)(struct fscache_operation *)); + atomic_t *); extern void fscache_invalidate_writes(struct fscache_cookie *); /* diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 40049f7505f0..9e792e30f4db 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -961,7 +961,8 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj if (!op) goto nomem; - fscache_operation_init(op, object->cache->ops->invalidate_object, NULL); + fscache_operation_init(op, object->cache->ops->invalidate_object, + NULL, NULL); op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE) | (1 << FSCACHE_OP_UNUSE_COOKIE); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index c76c09730768..57d4abb68656 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -20,6 +20,10 @@ atomic_t fscache_op_debug_id; EXPORT_SYMBOL(fscache_op_debug_id); +static void fscache_operation_dummy_cancel(struct fscache_operation *op) +{ +} + /** * fscache_operation_init - Do basic initialisation of an operation * @op: The operation to initialise @@ -30,6 +34,7 @@ EXPORT_SYMBOL(fscache_op_debug_id); */ void fscache_operation_init(struct fscache_operation *op, fscache_operation_processor_t processor, + fscache_operation_cancel_t cancel, fscache_operation_release_t release) { INIT_WORK(&op->work, fscache_op_work_func); @@ -37,6 +42,7 @@ void fscache_operation_init(struct fscache_operation *op, op->state = FSCACHE_OP_ST_INITIALISED; op->debug_id = atomic_inc_return(&fscache_op_debug_id); op->processor = processor; + op->cancel = cancel ?: fscache_operation_dummy_cancel; op->release = release; INIT_LIST_HEAD(&op->pend_link); fscache_stat(&fscache_n_op_initialised); @@ -164,9 +170,11 @@ int fscache_submit_exclusive_op(struct fscache_object *object, flags = READ_ONCE(object->flags); if (unlikely(!(flags & BIT(FSCACHE_OBJECT_IS_LIVE)))) { fscache_stat(&fscache_n_op_rejected); + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else if (unlikely(fscache_cache_is_broken(object))) { + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -EIO; } else if (flags & BIT(FSCACHE_OBJECT_IS_AVAILABLE)) { @@ -200,10 +208,12 @@ int fscache_submit_exclusive_op(struct fscache_object *object, fscache_stat(&fscache_n_op_pend); ret = 0; } else if (flags & BIT(FSCACHE_OBJECT_KILLED_BY_CACHE)) { + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else { fscache_report_unexpected_submission(object, op, ostate); + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } @@ -245,9 +255,11 @@ int fscache_submit_op(struct fscache_object *object, flags = READ_ONCE(object->flags); if (unlikely(!(flags & BIT(FSCACHE_OBJECT_IS_LIVE)))) { fscache_stat(&fscache_n_op_rejected); + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else if (unlikely(fscache_cache_is_broken(object))) { + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -EIO; } else if (flags & BIT(FSCACHE_OBJECT_IS_AVAILABLE)) { @@ -276,11 +288,13 @@ int fscache_submit_op(struct fscache_object *object, fscache_stat(&fscache_n_op_pend); ret = 0; } else if (flags & BIT(FSCACHE_OBJECT_KILLED_BY_CACHE)) { + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else { fscache_report_unexpected_submission(object, op, ostate); ASSERT(!fscache_object_is_active(object)); + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } @@ -335,7 +349,6 @@ void fscache_start_operations(struct fscache_object *object) * cancel an operation that's pending on an object */ int fscache_cancel_op(struct fscache_operation *op, - void (*do_cancel)(struct fscache_operation *), bool cancel_in_progress_op) { struct fscache_object *object = op->object; @@ -355,9 +368,9 @@ int fscache_cancel_op(struct fscache_operation *op, ASSERT(!list_empty(&op->pend_link)); list_del_init(&op->pend_link); put = true; + fscache_stat(&fscache_n_op_cancelled); - if (do_cancel) - do_cancel(op); + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; @@ -373,8 +386,7 @@ int fscache_cancel_op(struct fscache_operation *op, fscache_start_operations(object); fscache_stat(&fscache_n_op_cancelled); - if (do_cancel) - do_cancel(op); + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; @@ -408,6 +420,7 @@ void fscache_cancel_all_ops(struct fscache_object *object) list_del_init(&op->pend_link); ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) @@ -440,8 +453,12 @@ void fscache_op_complete(struct fscache_operation *op, bool cancelled) spin_lock(&object->lock); - op->state = cancelled ? - FSCACHE_OP_ST_CANCELLED : FSCACHE_OP_ST_COMPLETE; + if (!cancelled) { + op->state = FSCACHE_OP_ST_COMPLETE; + } else { + op->cancel(op); + op->state = FSCACHE_OP_ST_CANCELLED; + } if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 7db9752252ef..d1b23a67c031 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -213,7 +213,7 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) return -ENOMEM; } - fscache_operation_init(op, fscache_attr_changed_op, NULL); + fscache_operation_init(op, fscache_attr_changed_op, NULL, NULL); op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE) | (1 << FSCACHE_OP_UNUSE_COOKIE); @@ -248,6 +248,17 @@ nobufs: } EXPORT_SYMBOL(__fscache_attr_changed); +/* + * Handle cancellation of a pending retrieval op + */ +static void fscache_do_cancel_retrieval(struct fscache_operation *_op) +{ + struct fscache_retrieval *op = + container_of(_op, struct fscache_retrieval, op); + + atomic_set(&op->n_pages, 0); +} + /* * release a retrieval op reference */ @@ -285,7 +296,9 @@ static struct fscache_retrieval *fscache_alloc_retrieval( return NULL; } - fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op); + fscache_operation_init(&op->op, NULL, + fscache_do_cancel_retrieval, + fscache_release_retrieval_op); op->op.flags = FSCACHE_OP_MYTHREAD | (1UL << FSCACHE_OP_WAITING) | (1UL << FSCACHE_OP_UNUSE_COOKIE); @@ -329,25 +342,13 @@ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) return 0; } -/* - * Handle cancellation of a pending retrieval op - */ -static void fscache_do_cancel_retrieval(struct fscache_operation *_op) -{ - struct fscache_retrieval *op = - container_of(_op, struct fscache_retrieval, op); - - atomic_set(&op->n_pages, 0); -} - /* * wait for an object to become active (or dead) */ int fscache_wait_for_operation_activation(struct fscache_object *object, struct fscache_operation *op, atomic_t *stat_op_waits, - atomic_t *stat_object_dead, - void (*do_cancel)(struct fscache_operation *)) + atomic_t *stat_object_dead) { int ret; @@ -359,7 +360,7 @@ int fscache_wait_for_operation_activation(struct fscache_object *object, fscache_stat(stat_op_waits); if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING, TASK_INTERRUPTIBLE) != 0) { - ret = fscache_cancel_op(op, do_cancel, false); + ret = fscache_cancel_op(op, false); if (ret == 0) return -ERESTARTSYS; @@ -380,7 +381,7 @@ check_if_dead: if (unlikely(fscache_object_is_dying(object) || fscache_cache_is_broken(object))) { enum fscache_operation_state state = op->state; - fscache_cancel_op(op, do_cancel, true); + fscache_cancel_op(op, true); if (stat_object_dead) fscache_stat(stat_object_dead); _leave(" = -ENOBUFS [obj dead %d]", state); @@ -464,8 +465,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, ret = fscache_wait_for_operation_activation( object, &op->op, __fscache_stat(&fscache_n_retrieval_op_waits), - __fscache_stat(&fscache_n_retrievals_object_dead), - fscache_do_cancel_retrieval); + __fscache_stat(&fscache_n_retrievals_object_dead)); if (ret < 0) goto error; @@ -595,8 +595,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, ret = fscache_wait_for_operation_activation( object, &op->op, __fscache_stat(&fscache_n_retrieval_op_waits), - __fscache_stat(&fscache_n_retrievals_object_dead), - fscache_do_cancel_retrieval); + __fscache_stat(&fscache_n_retrievals_object_dead)); if (ret < 0) goto error; @@ -702,8 +701,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, ret = fscache_wait_for_operation_activation( object, &op->op, __fscache_stat(&fscache_n_alloc_op_waits), - __fscache_stat(&fscache_n_allocs_object_dead), - fscache_do_cancel_retrieval); + __fscache_stat(&fscache_n_allocs_object_dead)); if (ret < 0) goto error; @@ -946,7 +944,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, if (!op) goto nomem; - fscache_operation_init(&op->op, fscache_write_op, + fscache_operation_init(&op->op, fscache_write_op, NULL, fscache_release_write_op); op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING) | diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 0e26d49972e3..69c6eb10d858 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -74,6 +74,7 @@ extern wait_queue_head_t fscache_cache_cleared_wq; */ typedef void (*fscache_operation_release_t)(struct fscache_operation *op); typedef void (*fscache_operation_processor_t)(struct fscache_operation *op); +typedef void (*fscache_operation_cancel_t)(struct fscache_operation *op); enum fscache_operation_state { FSCACHE_OP_ST_BLANK, /* Op is not yet submitted */ @@ -109,6 +110,9 @@ struct fscache_operation { * the op in a non-pool thread */ fscache_operation_processor_t processor; + /* Operation cancellation cleanup (optional) */ + fscache_operation_cancel_t cancel; + /* operation releaser */ fscache_operation_release_t release; }; @@ -121,6 +125,7 @@ extern void fscache_op_complete(struct fscache_operation *, bool); extern void fscache_put_operation(struct fscache_operation *); extern void fscache_operation_init(struct fscache_operation *, fscache_operation_processor_t, + fscache_operation_cancel_t, fscache_operation_release_t); /* From 4a47132ff472a0c2c5441baeb50cf97f2580bc43 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:05:29 +0000 Subject: [PATCH 013/839] FS-Cache: Retain the netfs context in the retrieval op earlier Now that the retrieval operation may be disposed of by fscache_put_operation() before we actually set the context, the retrieval-specific cleanup operation can produce a NULL-pointer dereference when it tries to unconditionally clean up the netfs context. Given that it is expected that we'll get at least as far as the place where we currently set the context pointer and it is unlikely we'll go through the error handling paths prior to that point, retain the context right from the point that the retrieval op is allocated. Concomitant to this, we need to retain the cookie pointer in the retrieval op also so that we can call the netfs to release its context in the release method. In addition, we might now get into fscache_release_retrieval_op() with the op only initialised. To this end, set the operation to DEAD only after the release method has been called and skip the n_pages test upon cleanup if the op is still in the INITIALISED state. Without these changes, the following oops might be seen: BUG: unable to handle kernel NULL pointer dereference at 00000000000000b8 ... RIP: 0010:[] fscache_release_retrieval_op+0xae/0x100 ... Call Trace: [] fscache_put_operation+0x117/0x2e0 [] __fscache_read_or_alloc_pages+0x351/0x3ac [] __nfs_readpages_from_fscache+0x59/0xbf [nfs] [] nfs_readpages+0x10c/0x185 [nfs] [] ? alloc_pages_current+0x119/0x13e [] ? __page_cache_alloc+0xfb/0x10a [] __do_page_cache_readahead+0x188/0x22c [] ondemand_readahead+0x29e/0x2af [] page_cache_sync_readahead+0x38/0x3a [] generic_file_read_iter+0x1a2/0x55a [] ? nfs_revalidate_mapping+0xd6/0x288 [nfs] [] nfs_file_read+0x49/0x70 [nfs] [] new_sync_read+0x78/0x9c [] __vfs_read+0x13/0x38 [] vfs_read+0x95/0x121 [] SyS_read+0x4c/0x8a [] system_call_fastpath+0x12/0x17 Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- fs/fscache/operation.c | 2 +- fs/fscache/page.c | 20 ++++++++++---------- include/linux/fscache-cache.h | 1 + 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 57d4abb68656..de67745e1cd7 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -492,7 +492,6 @@ void fscache_put_operation(struct fscache_operation *op) ASSERTIFCMP(op->state != FSCACHE_OP_ST_INITIALISED && op->state != FSCACHE_OP_ST_COMPLETE, op->state, ==, FSCACHE_OP_ST_CANCELLED); - op->state = FSCACHE_OP_ST_DEAD; fscache_stat(&fscache_n_op_release); @@ -500,6 +499,7 @@ void fscache_put_operation(struct fscache_operation *op) op->release(op); op->release = NULL; } + op->state = FSCACHE_OP_ST_DEAD; object = op->object; if (likely(object)) { diff --git a/fs/fscache/page.c b/fs/fscache/page.c index d1b23a67c031..483bbc613bf0 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -269,11 +269,12 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op) _enter("{OP%x}", op->op.debug_id); - ASSERTCMP(atomic_read(&op->n_pages), ==, 0); + ASSERTIFCMP(op->op.state != FSCACHE_OP_ST_INITIALISED, + atomic_read(&op->n_pages), ==, 0); fscache_hist(fscache_retrieval_histogram, op->start_time); if (op->context) - fscache_put_context(op->op.object->cookie, op->context); + fscache_put_context(op->cookie, op->context); _leave(""); } @@ -302,11 +303,18 @@ static struct fscache_retrieval *fscache_alloc_retrieval( op->op.flags = FSCACHE_OP_MYTHREAD | (1UL << FSCACHE_OP_WAITING) | (1UL << FSCACHE_OP_UNUSE_COOKIE); + op->cookie = cookie; op->mapping = mapping; op->end_io_func = end_io_func; op->context = context; op->start_time = jiffies; INIT_LIST_HEAD(&op->to_do); + + /* Pin the netfs read context in case we need to do the actual netfs + * read because we've encountered a cache read failure. + */ + if (context) + fscache_get_context(op->cookie, context); return op; } @@ -456,10 +464,6 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, fscache_stat(&fscache_n_retrieval_ops); - /* pin the netfs read context in case we need to do the actual netfs - * read because we've encountered a cache read failure */ - fscache_get_context(object->cookie, op->context); - /* we wait for the operation to become active, and then process it * *here*, in this thread, and not in the thread pool */ ret = fscache_wait_for_operation_activation( @@ -586,10 +590,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, fscache_stat(&fscache_n_retrieval_ops); - /* pin the netfs read context in case we need to do the actual netfs - * read because we've encountered a cache read failure */ - fscache_get_context(object->cookie, op->context); - /* we wait for the operation to become active, and then process it * *here*, in this thread, and not in the thread pool */ ret = fscache_wait_for_operation_activation( diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 69c6eb10d858..604e1526cd00 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -133,6 +133,7 @@ extern void fscache_operation_init(struct fscache_operation *, */ struct fscache_retrieval { struct fscache_operation op; + struct fscache_cookie *cookie; /* The netfs cookie */ struct address_space *mapping; /* netfs pages */ fscache_rw_complete_t end_io_func; /* function to call on I/O completion */ void *context; /* netfs read context (pinned) */ From ee4eb20dbce91751c248399e8a0cf9b8813adf5a Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 17 Apr 2015 23:35:04 +0200 Subject: [PATCH 014/839] Makefile: Fix detection of clang when cross-compiling When the host's C compiler is clang, and when attempting to cross-compile Linux e.g. to MIPS with mipsel-linux-gcc, the Makefile would incorrectly detect the use of clang, which resulted in clang-specific flags being passed to mipsel-linux-gcc. This can be verified under Debian by installing the "clang" package, and then using it as the default compiler with: sudo update-alternatives --config cc This patch moves the detection of clang after the $(CC) variable is initialized to the name of the cross-compiler, so that the check applies to the cross-compiler and not the host's C compiler. v2: Move the detection of clang after the inclusion of the arch/*/Makefile (as they might set $(CROSS_COMPILE)) Signed-off-by: Paul Cercueil Signed-off-by: Michal Marek --- Makefile | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 7ff1239f9cd2..2d225aa4d7e3 100644 --- a/Makefile +++ b/Makefile @@ -336,15 +336,6 @@ endif export KBUILD_MODULES KBUILD_BUILTIN export KBUILD_CHECKSRC KBUILD_SRC KBUILD_EXTMOD -ifneq ($(CC),) -ifeq ($(shell $(CC) -v 2>&1 | grep -c "clang version"), 1) -COMPILER := clang -else -COMPILER := gcc -endif -export COMPILER -endif - # We need some generic definitions (do not try to remake the file). scripts/Kbuild.include: ; include scripts/Kbuild.include @@ -671,6 +662,13 @@ endif endif KBUILD_CFLAGS += $(stackp-flag) +ifeq ($(shell $(CC) -v 2>&1 | grep -c "clang version"), 1) +COMPILER := clang +else +COMPILER := gcc +endif +export COMPILER + ifeq ($(COMPILER),clang) KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) KBUILD_CPPFLAGS += $(call cc-option,-Wno-unknown-warning-option,) From 0ce8cf2f31d661a27b7656e791acacb86eb58739 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 4 Mar 2015 20:01:13 -0600 Subject: [PATCH 015/839] Documentation: dt: add common bindings for hwspinlock This patch adds the generic common bindings used to represent a hwlock device and use/request locks in a device-tree build. Each hwspinlock provider should have the '#hwlock-cells' property, which represents the number of cells to be used for representing a specific hwspinlock. Client users shall use the property 'hwlocks' for requesting specific lock(s). Note that the document is named hwlock.txt deliberately to keep it a bit more generic. Cc: Rob Herring Signed-off-by: Suman Anna Reviewed-by: Bjorn Andersson Acked-by: Mark Rutland Signed-off-by: Ohad Ben-Cohen --- .../devicetree/bindings/hwlock/hwlock.txt | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 Documentation/devicetree/bindings/hwlock/hwlock.txt diff --git a/Documentation/devicetree/bindings/hwlock/hwlock.txt b/Documentation/devicetree/bindings/hwlock/hwlock.txt new file mode 100644 index 000000000000..085d1f5c916a --- /dev/null +++ b/Documentation/devicetree/bindings/hwlock/hwlock.txt @@ -0,0 +1,59 @@ +Generic hwlock bindings +======================= + +Generic bindings that are common to all the hwlock platform specific driver +implementations. + +Please also look through the individual platform specific hwlock binding +documentations for identifying any additional properties specific to that +platform. + +hwlock providers: +================= + +Required properties: +- #hwlock-cells: Specifies the number of cells needed to represent a + specific lock. + +hwlock users: +============= + +Consumers that require specific hwlock(s) should specify them using the +property "hwlocks", and an optional "hwlock-names" property. + +Required properties: +- hwlocks: List of phandle to a hwlock provider node and an + associated hwlock args specifier as indicated by + #hwlock-cells. The list can have just a single hwlock + or multiple hwlocks, with each hwlock represented by + a phandle and a corresponding args specifier. + +Optional properties: +- hwlock-names: List of hwlock name strings defined in the same order + as the hwlocks, with one name per hwlock. Consumers can + use the hwlock-names to match and get a specific hwlock. + + +1. Example of a node using a single specific hwlock: + +The following example has a node requesting a hwlock in the bank defined by +the node hwlock1. hwlock1 is a hwlock provider with an argument specifier +of length 1. + + node { + ... + hwlocks = <&hwlock1 2>; + ... + }; + +2. Example of a node using multiple specific hwlocks: + +The following example has a node requesting two hwlocks, a hwlock within +the hwlock device node 'hwlock1' with #hwlock-cells value of 1, and another +hwlock within the hwlock device node 'hwlock2' with #hwlock-cells value of 2. + + node { + ... + hwlocks = <&hwlock1 2>, <&hwlock2 0 3>; + ... + }; From fb7737e949e31d8a71acee6bbb670f32dbd2a2c0 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 4 Mar 2015 20:01:14 -0600 Subject: [PATCH 016/839] hwspinlock/core: add device tree support This patch adds a new OF-friendly API of_hwspin_lock_get_id() for hwspinlock clients to use/request locks from a hwspinlock device instantiated through a device-tree blob. This new API can be used by hwspinlock clients to get the id for a specific lock using the phandle + args specifier, so that it can be requested using the available hwspin_lock_request_specific() API. Signed-off-by: Suman Anna Reviewed-by: Bjorn Andersson [small comment clarification] Signed-off-by: Ohad Ben-Cohen --- Documentation/hwspinlock.txt | 10 ++++ drivers/hwspinlock/hwspinlock_core.c | 79 ++++++++++++++++++++++++++++ include/linux/hwspinlock.h | 7 +++ 3 files changed, 96 insertions(+) diff --git a/Documentation/hwspinlock.txt b/Documentation/hwspinlock.txt index 62f7d4ea6e26..61c1ee98e59f 100644 --- a/Documentation/hwspinlock.txt +++ b/Documentation/hwspinlock.txt @@ -48,6 +48,16 @@ independent, drivers. ids for predefined purposes. Should be called from a process context (might sleep). + int of_hwspin_lock_get_id(struct device_node *np, int index); + - retrieve the global lock id for an OF phandle-based specific lock. + This function provides a means for DT users of a hwspinlock module + to get the global lock id of a specific hwspinlock, so that it can + be requested using the normal hwspin_lock_request_specific() API. + The function returns a lock id number on success, -EPROBE_DEFER if + the hwspinlock device is not yet registered with the core, or other + error values. + Should be called from a process context (might sleep). + int hwspin_lock_free(struct hwspinlock *hwlock); - free a previously-assigned hwspinlock; returns 0 on success, or an appropriate error code on failure (e.g. -EINVAL if the hwspinlock diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c index 461a0d739d75..52f708bcf77f 100644 --- a/drivers/hwspinlock/hwspinlock_core.c +++ b/drivers/hwspinlock/hwspinlock_core.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "hwspinlock_internal.h" @@ -257,6 +258,84 @@ void __hwspin_unlock(struct hwspinlock *hwlock, int mode, unsigned long *flags) } EXPORT_SYMBOL_GPL(__hwspin_unlock); +/** + * of_hwspin_lock_simple_xlate - translate hwlock_spec to return a lock id + * @bank: the hwspinlock device bank + * @hwlock_spec: hwlock specifier as found in the device tree + * + * This is a simple translation function, suitable for hwspinlock platform + * drivers that only has a lock specifier length of 1. + * + * Returns a relative index of the lock within a specified bank on success, + * or -EINVAL on invalid specifier cell count. + */ +static inline int +of_hwspin_lock_simple_xlate(const struct of_phandle_args *hwlock_spec) +{ + if (WARN_ON(hwlock_spec->args_count != 1)) + return -EINVAL; + + return hwlock_spec->args[0]; +} + +/** + * of_hwspin_lock_get_id() - get lock id for an OF phandle-based specific lock + * @np: device node from which to request the specific hwlock + * @index: index of the hwlock in the list of values + * + * This function provides a means for DT users of the hwspinlock module to + * get the global lock id of a specific hwspinlock using the phandle of the + * hwspinlock device, so that it can be requested using the normal + * hwspin_lock_request_specific() API. + * + * Returns the global lock id number on success, -EPROBE_DEFER if the hwspinlock + * device is not yet registered, -EINVAL on invalid args specifier value or an + * appropriate error as returned from the OF parsing of the DT client node. + */ +int of_hwspin_lock_get_id(struct device_node *np, int index) +{ + struct of_phandle_args args; + struct hwspinlock *hwlock; + struct radix_tree_iter iter; + void **slot; + int id; + int ret; + + ret = of_parse_phandle_with_args(np, "hwlocks", "#hwlock-cells", index, + &args); + if (ret) + return ret; + + /* Find the hwspinlock device: we need its base_id */ + ret = -EPROBE_DEFER; + rcu_read_lock(); + radix_tree_for_each_slot(slot, &hwspinlock_tree, &iter, 0) { + hwlock = radix_tree_deref_slot(slot); + if (unlikely(!hwlock)) + continue; + + if (hwlock->bank->dev->of_node == args.np) { + ret = 0; + break; + } + } + rcu_read_unlock(); + if (ret < 0) + goto out; + + id = of_hwspin_lock_simple_xlate(&args); + if (id < 0 || id >= hwlock->bank->num_locks) { + ret = -EINVAL; + goto out; + } + id += hwlock->bank->base_id; + +out: + of_node_put(args.np); + return ret ? ret : id; +} +EXPORT_SYMBOL_GPL(of_hwspin_lock_get_id); + static int hwspin_lock_register_single(struct hwspinlock *hwlock, int id) { struct hwspinlock *tmp; diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h index 3343298e40e8..859d673d98c8 100644 --- a/include/linux/hwspinlock.h +++ b/include/linux/hwspinlock.h @@ -26,6 +26,7 @@ #define HWLOCK_IRQ 0x02 /* Disable interrupts, don't save state */ struct device; +struct device_node; struct hwspinlock; struct hwspinlock_device; struct hwspinlock_ops; @@ -66,6 +67,7 @@ int hwspin_lock_unregister(struct hwspinlock_device *bank); struct hwspinlock *hwspin_lock_request(void); struct hwspinlock *hwspin_lock_request_specific(unsigned int id); int hwspin_lock_free(struct hwspinlock *hwlock); +int of_hwspin_lock_get_id(struct device_node *np, int index); int hwspin_lock_get_id(struct hwspinlock *hwlock); int __hwspin_lock_timeout(struct hwspinlock *, unsigned int, int, unsigned long *); @@ -120,6 +122,11 @@ void __hwspin_unlock(struct hwspinlock *hwlock, int mode, unsigned long *flags) { } +static inline int of_hwspin_lock_get_id(struct device_node *np, int index) +{ + return 0; +} + static inline int hwspin_lock_get_id(struct hwspinlock *hwlock) { return 0; From 67140ed1c9d518565e6e2b86ba761652eb9bf3c4 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 4 Mar 2015 20:01:15 -0600 Subject: [PATCH 017/839] Documentation: dt: add the omap hwspinlock bindings document HwSpinlock IP is present only on OMAP4 and other newer SoCs, which are all device-tree boot only. This patch adds the DT bindings information for OMAP hwspinlock module. Cc: Rob Herring Signed-off-by: Suman Anna Acked-by: Mark Rutland Signed-off-by: Ohad Ben-Cohen --- .../bindings/hwlock/omap-hwspinlock.txt | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 Documentation/devicetree/bindings/hwlock/omap-hwspinlock.txt diff --git a/Documentation/devicetree/bindings/hwlock/omap-hwspinlock.txt b/Documentation/devicetree/bindings/hwlock/omap-hwspinlock.txt new file mode 100644 index 000000000000..2c9804f4f4ac --- /dev/null +++ b/Documentation/devicetree/bindings/hwlock/omap-hwspinlock.txt @@ -0,0 +1,26 @@ +OMAP4+ HwSpinlock Driver +======================== + +Required properties: +- compatible: Should be "ti,omap4-hwspinlock" for + OMAP44xx, OMAP54xx, AM33xx, AM43xx, DRA7xx SoCs +- reg: Contains the hwspinlock module register address space + (base address and length) +- ti,hwmods: Name of the hwmod associated with the hwspinlock device +- #hwlock-cells: Should be 1. The OMAP hwspinlock users will use a + 0-indexed relative hwlock number as the argument + specifier value for requesting a specific hwspinlock + within a hwspinlock bank. + +Please look at the generic hwlock binding for usage information for consumers, +"Documentation/devicetree/bindings/hwlock/hwlock.txt" + +Example: + +/* OMAP4 */ +hwspinlock: spinlock@4a0f6000 { + compatible = "ti,omap4-hwspinlock"; + reg = <0x4a0f6000 0x1000>; + ti,hwmods = "spinlock"; + #hwlock-cells = <1>; +}; From 65bd4341d61678494ea14994d0d7df73644ca014 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 4 Mar 2015 20:01:16 -0600 Subject: [PATCH 018/839] hwspinlock/omap: add support for dt nodes HwSpinlock IP is present only on OMAP4 and other newer SoCs, which are all device-tree boot only. This patch adds the base support for parsing the DT nodes, and removes the code dealing with the traditional platform device instantiation. Signed-off-by: Suman Anna [tony@atomide.com: ack for legacy file removal] Acked-by: Tony Lindgren [comment on the imperfect always-zero base_id] Signed-off-by: Ohad Ben-Cohen --- MAINTAINERS | 1 - arch/arm/mach-omap2/Makefile | 3 -- arch/arm/mach-omap2/hwspinlock.c | 60 ---------------------------- drivers/hwspinlock/omap_hwspinlock.c | 18 +++++++-- 4 files changed, 14 insertions(+), 68 deletions(-) delete mode 100644 arch/arm/mach-omap2/hwspinlock.c diff --git a/MAINTAINERS b/MAINTAINERS index 2e5bbc0d68b2..5fe8941ba12d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7095,7 +7095,6 @@ M: Ohad Ben-Cohen L: linux-omap@vger.kernel.org S: Maintained F: drivers/hwspinlock/omap_hwspinlock.c -F: arch/arm/mach-omap2/hwspinlock.c OMAP MMC SUPPORT M: Jarkko Lavinen diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index ec002bd4af77..9a2c88e56bae 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -277,8 +277,5 @@ obj-y += $(nand-m) $(nand-y) smsc911x-$(CONFIG_SMSC911X) := gpmc-smsc911x.o obj-y += $(smsc911x-m) $(smsc911x-y) -ifneq ($(CONFIG_HWSPINLOCK_OMAP),) -obj-y += hwspinlock.o -endif obj-y += common-board-devices.o twl-common.o dss-common.o diff --git a/arch/arm/mach-omap2/hwspinlock.c b/arch/arm/mach-omap2/hwspinlock.c deleted file mode 100644 index ef175acaeaa2..000000000000 --- a/arch/arm/mach-omap2/hwspinlock.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * OMAP hardware spinlock device initialization - * - * Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com - * - * Contact: Simon Que - * Hari Kanigeri - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ - -#include -#include -#include -#include - -#include "soc.h" -#include "omap_hwmod.h" -#include "omap_device.h" - -static struct hwspinlock_pdata omap_hwspinlock_pdata __initdata = { - .base_id = 0, -}; - -static int __init hwspinlocks_init(void) -{ - int retval = 0; - struct omap_hwmod *oh; - struct platform_device *pdev; - const char *oh_name = "spinlock"; - const char *dev_name = "omap_hwspinlock"; - - /* - * Hwmod lookup will fail in case our platform doesn't support the - * hardware spinlock module, so it is safe to run this initcall - * on all omaps - */ - oh = omap_hwmod_lookup(oh_name); - if (oh == NULL) - return -EINVAL; - - pdev = omap_device_build(dev_name, 0, oh, &omap_hwspinlock_pdata, - sizeof(struct hwspinlock_pdata)); - if (IS_ERR(pdev)) { - pr_err("Can't build omap_device for %s:%s\n", dev_name, - oh_name); - retval = PTR_ERR(pdev); - } - - return retval; -} -/* early board code might need to reserve specific hwspinlock instances */ -omap_postcore_initcall(hwspinlocks_init); diff --git a/drivers/hwspinlock/omap_hwspinlock.c b/drivers/hwspinlock/omap_hwspinlock.c index 47a275c6ece1..ad2f8cac8487 100644 --- a/drivers/hwspinlock/omap_hwspinlock.c +++ b/drivers/hwspinlock/omap_hwspinlock.c @@ -1,7 +1,7 @@ /* * OMAP hardware spinlock driver * - * Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2010-2015 Texas Instruments Incorporated - http://www.ti.com * * Contact: Simon Que * Hari Kanigeri @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "hwspinlock_internal.h" @@ -80,14 +81,16 @@ static const struct hwspinlock_ops omap_hwspinlock_ops = { static int omap_hwspinlock_probe(struct platform_device *pdev) { - struct hwspinlock_pdata *pdata = pdev->dev.platform_data; + struct device_node *node = pdev->dev.of_node; struct hwspinlock_device *bank; struct hwspinlock *hwlock; struct resource *res; void __iomem *io_base; int num_locks, i, ret; + /* Only a single hwspinlock block device is supported */ + int base_id = 0; - if (!pdata) + if (!node) return -ENODEV; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -141,7 +144,7 @@ static int omap_hwspinlock_probe(struct platform_device *pdev) hwlock->priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i; ret = hwspin_lock_register(bank, &pdev->dev, &omap_hwspinlock_ops, - pdata->base_id, num_locks); + base_id, num_locks); if (ret) goto reg_fail; @@ -174,11 +177,18 @@ static int omap_hwspinlock_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id omap_hwspinlock_of_match[] = { + { .compatible = "ti,omap4-hwspinlock", }, + { /* end */ }, +}; +MODULE_DEVICE_TABLE(of, omap_hwspinlock_of_match); + static struct platform_driver omap_hwspinlock_driver = { .probe = omap_hwspinlock_probe, .remove = omap_hwspinlock_remove, .driver = { .name = "omap_hwspinlock", + .of_match_table = of_match_ptr(omap_hwspinlock_of_match), }, }; From 5b5711f0f1aff1b25ee48da279de4f0c202f7a68 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 24 Mar 2015 10:11:04 -0700 Subject: [PATCH 019/839] DT: hwspinlock: Add binding documentation for Qualcomm hwmutex Add binding documentation for the Qualcomm Hardware Mutex. Signed-off-by: Bjorn Andersson Acked-by: Kumar Gala Reviewed-by: Andy Gross Reviewed-by: Jeffrey Hugo Signed-off-by: Ohad Ben-Cohen --- .../bindings/hwlock/qcom-hwspinlock.txt | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.txt diff --git a/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.txt b/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.txt new file mode 100644 index 000000000000..4563f524556b --- /dev/null +++ b/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.txt @@ -0,0 +1,39 @@ +Qualcomm Hardware Mutex Block: + +The hardware block provides mutexes utilized between different processors on +the SoC as part of the communication protocol used by these processors. + +- compatible: + Usage: required + Value type: + Definition: must be one of: + "qcom,sfpb-mutex", + "qcom,tcsr-mutex" + +- syscon: + Usage: required + Value type: + Definition: one cell containing: + syscon phandle + offset of the hwmutex block within the syscon + stride of the hwmutex registers + +- #hwlock-cells: + Usage: required + Value type: + Definition: must be 1, the specified cell represent the lock id + (hwlock standard property, see hwlock.txt) + +Example: + + tcsr_mutex_block: syscon@fd484000 { + compatible = "syscon"; + reg = <0xfd484000 0x2000>; + }; + + hwlock@fd484000 { + compatible = "qcom,tcsr-mutex"; + syscon = <&tcsr_mutex_block 0 0x80>; + + #hwlock-cells = <1>; + }; From 19a0f61224d2d91860fa8291ab63cb104ee86bdd Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 24 Mar 2015 10:11:05 -0700 Subject: [PATCH 020/839] hwspinlock: qcom: Add support for Qualcomm HW Mutex block Add driver for Qualcomm Hardware Mutex block found in many Qualcomm SoCs. Based on initial effort by Kumar Gala Signed-off-by: Bjorn Andersson Reviewed-by: Andy Gross Reviewed-by: Jeffrey Hugo Signed-off-by: Ohad Ben-Cohen --- drivers/hwspinlock/Kconfig | 12 ++ drivers/hwspinlock/Makefile | 1 + drivers/hwspinlock/qcom_hwspinlock.c | 181 +++++++++++++++++++++++++++ 3 files changed, 194 insertions(+) create mode 100644 drivers/hwspinlock/qcom_hwspinlock.c diff --git a/drivers/hwspinlock/Kconfig b/drivers/hwspinlock/Kconfig index 3612cb5b30b2..b5b4f52cef73 100644 --- a/drivers/hwspinlock/Kconfig +++ b/drivers/hwspinlock/Kconfig @@ -18,6 +18,18 @@ config HWSPINLOCK_OMAP If unsure, say N. +config HWSPINLOCK_QCOM + tristate "Qualcomm Hardware Spinlock device" + depends on ARCH_QCOM + select HWSPINLOCK + select MFD_SYSCON + help + Say y here to support the Qualcomm Hardware Mutex functionality, which + provides a synchronisation mechanism for the various processors on + the SoC. + + If unsure, say N. + config HSEM_U8500 tristate "STE Hardware Semaphore functionality" depends on ARCH_U8500 diff --git a/drivers/hwspinlock/Makefile b/drivers/hwspinlock/Makefile index 93eb64b66486..68f95d9a4855 100644 --- a/drivers/hwspinlock/Makefile +++ b/drivers/hwspinlock/Makefile @@ -4,4 +4,5 @@ obj-$(CONFIG_HWSPINLOCK) += hwspinlock_core.o obj-$(CONFIG_HWSPINLOCK_OMAP) += omap_hwspinlock.o +obj-$(CONFIG_HWSPINLOCK_QCOM) += qcom_hwspinlock.o obj-$(CONFIG_HSEM_U8500) += u8500_hsem.o diff --git a/drivers/hwspinlock/qcom_hwspinlock.c b/drivers/hwspinlock/qcom_hwspinlock.c new file mode 100644 index 000000000000..93b62e055ff6 --- /dev/null +++ b/drivers/hwspinlock/qcom_hwspinlock.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2013, The Linux Foundation. All rights reserved. + * Copyright (c) 2015, Sony Mobile Communications AB + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hwspinlock_internal.h" + +#define QCOM_MUTEX_APPS_PROC_ID 1 +#define QCOM_MUTEX_NUM_LOCKS 32 + +static int qcom_hwspinlock_trylock(struct hwspinlock *lock) +{ + struct regmap_field *field = lock->priv; + u32 lock_owner; + int ret; + + ret = regmap_field_write(field, QCOM_MUTEX_APPS_PROC_ID); + if (ret) + return ret; + + ret = regmap_field_read(field, &lock_owner); + if (ret) + return ret; + + return lock_owner == QCOM_MUTEX_APPS_PROC_ID; +} + +static void qcom_hwspinlock_unlock(struct hwspinlock *lock) +{ + struct regmap_field *field = lock->priv; + u32 lock_owner; + int ret; + + ret = regmap_field_read(field, &lock_owner); + if (ret) { + pr_err("%s: unable to query spinlock owner\n", __func__); + return; + } + + if (lock_owner != QCOM_MUTEX_APPS_PROC_ID) { + pr_err("%s: spinlock not owned by us (actual owner is %d)\n", + __func__, lock_owner); + } + + ret = regmap_field_write(field, 0); + if (ret) + pr_err("%s: failed to unlock spinlock\n", __func__); +} + +static const struct hwspinlock_ops qcom_hwspinlock_ops = { + .trylock = qcom_hwspinlock_trylock, + .unlock = qcom_hwspinlock_unlock, +}; + +static const struct of_device_id qcom_hwspinlock_of_match[] = { + { .compatible = "qcom,sfpb-mutex" }, + { .compatible = "qcom,tcsr-mutex" }, + { } +}; +MODULE_DEVICE_TABLE(of, qcom_hwspinlock_of_match); + +static int qcom_hwspinlock_probe(struct platform_device *pdev) +{ + struct hwspinlock_device *bank; + struct device_node *syscon; + struct reg_field field; + struct regmap *regmap; + size_t array_size; + u32 stride; + u32 base; + int ret; + int i; + + syscon = of_parse_phandle(pdev->dev.of_node, "syscon", 0); + if (!syscon) { + dev_err(&pdev->dev, "no syscon property\n"); + return -ENODEV; + } + + regmap = syscon_node_to_regmap(syscon); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + ret = of_property_read_u32_index(pdev->dev.of_node, "syscon", 1, &base); + if (ret < 0) { + dev_err(&pdev->dev, "no offset in syscon\n"); + return -EINVAL; + } + + ret = of_property_read_u32_index(pdev->dev.of_node, "syscon", 2, &stride); + if (ret < 0) { + dev_err(&pdev->dev, "no stride syscon\n"); + return -EINVAL; + } + + array_size = QCOM_MUTEX_NUM_LOCKS * sizeof(struct hwspinlock); + bank = devm_kzalloc(&pdev->dev, sizeof(*bank) + array_size, GFP_KERNEL); + if (!bank) + return -ENOMEM; + + platform_set_drvdata(pdev, bank); + + for (i = 0; i < QCOM_MUTEX_NUM_LOCKS; i++) { + field.reg = base + i * stride; + field.lsb = 0; + field.msb = 32; + + bank->lock[i].priv = devm_regmap_field_alloc(&pdev->dev, + regmap, field); + } + + pm_runtime_enable(&pdev->dev); + + ret = hwspin_lock_register(bank, &pdev->dev, &qcom_hwspinlock_ops, + 0, QCOM_MUTEX_NUM_LOCKS); + if (ret) + pm_runtime_disable(&pdev->dev); + + return ret; +} + +static int qcom_hwspinlock_remove(struct platform_device *pdev) +{ + struct hwspinlock_device *bank = platform_get_drvdata(pdev); + int ret; + + ret = hwspin_lock_unregister(bank); + if (ret) { + dev_err(&pdev->dev, "%s failed: %d\n", __func__, ret); + return ret; + } + + pm_runtime_disable(&pdev->dev); + + return 0; +} + +static struct platform_driver qcom_hwspinlock_driver = { + .probe = qcom_hwspinlock_probe, + .remove = qcom_hwspinlock_remove, + .driver = { + .name = "qcom_hwspinlock", + .of_match_table = qcom_hwspinlock_of_match, + }, +}; + +static int __init qcom_hwspinlock_init(void) +{ + return platform_driver_register(&qcom_hwspinlock_driver); +} +/* board init code might need to reserve hwspinlocks for predefined purposes */ +postcore_initcall(qcom_hwspinlock_init); + +static void __exit qcom_hwspinlock_exit(void) +{ + platform_driver_unregister(&qcom_hwspinlock_driver); +} +module_exit(qcom_hwspinlock_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Hardware spinlock driver for Qualcomm SoCs"); From 9aa1cfca255bf3ba83f44b3452d34cc34607b94a Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 27 Feb 2015 17:18:21 -0600 Subject: [PATCH 021/839] remoteproc/ste: add blank lines after declarations Fix couple of checkpatch warnings of the type, "WARNING: Missing a blank line after declarations" Signed-off-by: Suman Anna Signed-off-by: Ohad Ben-Cohen --- drivers/remoteproc/ste_modem_rproc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/remoteproc/ste_modem_rproc.c b/drivers/remoteproc/ste_modem_rproc.c index dd193f35a1ff..53dc17bdd54e 100644 --- a/drivers/remoteproc/ste_modem_rproc.c +++ b/drivers/remoteproc/ste_modem_rproc.c @@ -67,8 +67,7 @@ static int sproc_load_segments(struct rproc *rproc, const struct firmware *fw) static const struct ste_toc_entry *sproc_find_rsc_entry(const void *data) { int i; - const struct ste_toc *toc; - toc = data; + const struct ste_toc *toc = data; /* Search the table for the resource table */ for (i = 0; i < SPROC_MAX_TOC_ENTRIES && @@ -230,6 +229,7 @@ static int sproc_start(struct rproc *rproc) static int sproc_stop(struct rproc *rproc) { struct sproc *sproc = rproc->priv; + sproc_dbg(sproc, "stop ste-modem\n"); return sproc->mdev->ops.power(sproc->mdev, false); From e17aee37a68b3884114f515df3ee506870f5eea1 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 27 Feb 2015 17:18:22 -0600 Subject: [PATCH 022/839] remoteproc/davinci: fix quoted split string checkpatch warning Fix the following checkpatch warning, WARNING: quoted string split across lines + "\n\t\tName of DSP firmware file in /lib/firmware" + " (if not specified defaults to 'rproc-dsp-fw')"); Signed-off-by: Suman Anna [remove leading whitespace as well] Signed-off-by: Ohad Ben-Cohen --- drivers/remoteproc/da8xx_remoteproc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/remoteproc/da8xx_remoteproc.c b/drivers/remoteproc/da8xx_remoteproc.c index f8d6a0661c14..009e56f67de2 100644 --- a/drivers/remoteproc/da8xx_remoteproc.c +++ b/drivers/remoteproc/da8xx_remoteproc.c @@ -26,8 +26,7 @@ static char *da8xx_fw_name; module_param(da8xx_fw_name, charp, S_IRUGO); MODULE_PARM_DESC(da8xx_fw_name, - "\n\t\tName of DSP firmware file in /lib/firmware" - " (if not specified defaults to 'rproc-dsp-fw')"); + "Name of DSP firmware file in /lib/firmware (if not specified defaults to 'rproc-dsp-fw')"); /* * OMAP-L138 Technical References: From 172e6ab1caffcd2dd2910b44d88d096f2c6985fa Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 27 Feb 2015 17:18:23 -0600 Subject: [PATCH 023/839] remoteproc: fix various checkpatch warnings Fix all the checkpatch warnings in the core remoteproc code. The fixes cover the following warnings: 1. WARNING: void function return statements are not generally useful 2. WARNING: Possible unnecessary 'out of memory' message 3. WARNING: line over 80 characters 4. WARNING: braces {} are not necessary for single statement blocks 5. WARNING: Unnecessary space before function pointer arguments Signed-off-by: Suman Anna Signed-off-by: Ohad Ben-Cohen --- drivers/remoteproc/remoteproc_core.c | 27 ++++++++---------------- drivers/remoteproc/remoteproc_internal.h | 2 +- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 11cdb119e4f3..e991512e04ed 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -132,8 +132,6 @@ static void rproc_disable_iommu(struct rproc *rproc) iommu_detach_device(domain, dev); iommu_domain_free(domain); - - return; } /* @@ -411,10 +409,8 @@ static int rproc_handle_trace(struct rproc *rproc, struct fw_rsc_trace *rsc, } trace = kzalloc(sizeof(*trace), GFP_KERNEL); - if (!trace) { - dev_err(dev, "kzalloc trace failed\n"); + if (!trace) return -ENOMEM; - } /* set the trace buffer dma properties */ trace->len = rsc->len; @@ -489,10 +485,8 @@ static int rproc_handle_devmem(struct rproc *rproc, struct fw_rsc_devmem *rsc, } mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); - if (!mapping) { - dev_err(dev, "kzalloc mapping failed\n"); + if (!mapping) return -ENOMEM; - } ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len, rsc->flags); if (ret) { @@ -565,10 +559,8 @@ static int rproc_handle_carveout(struct rproc *rproc, rsc->da, rsc->pa, rsc->len, rsc->flags); carveout = kzalloc(sizeof(*carveout), GFP_KERNEL); - if (!carveout) { - dev_err(dev, "kzalloc carveout failed\n"); + if (!carveout) return -ENOMEM; - } va = dma_alloc_coherent(dev->parent, rsc->len, &dma, GFP_KERNEL); if (!va) { @@ -768,7 +760,8 @@ static void rproc_resource_cleanup(struct rproc *rproc) /* clean up carveout allocations */ list_for_each_entry_safe(entry, tmp, &rproc->carveouts, node) { - dma_free_coherent(dev->parent, entry->len, entry->va, entry->dma); + dma_free_coherent(dev->parent, entry->len, entry->va, + entry->dma); list_del(&entry->node); kfree(entry); } @@ -808,9 +801,8 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw) /* look for the resource table */ table = rproc_find_rsc_table(rproc, fw, &tablesz); - if (!table) { + if (!table) goto clean_up; - } /* Verify that resource table in loaded fw is unchanged */ if (rproc->table_csum != crc32(0, table, tablesz)) { @@ -911,7 +903,8 @@ static void rproc_fw_config_virtio(const struct firmware *fw, void *context) /* count the number of notify-ids */ rproc->max_notifyid = -1; - ret = rproc_handle_resources(rproc, tablesz, rproc_count_vrings_handler); + ret = rproc_handle_resources(rproc, tablesz, + rproc_count_vrings_handler); if (ret) goto out; @@ -1268,10 +1261,8 @@ struct rproc *rproc_alloc(struct device *dev, const char *name, name_len = strlen(name) + strlen(template) - 2 + 1; rproc = kzalloc(sizeof(struct rproc) + len + name_len, GFP_KERNEL); - if (!rproc) { - dev_err(dev, "%s: kzalloc failed\n", __func__); + if (!rproc) return NULL; - } if (!firmware) { p = (char *)rproc + sizeof(struct rproc) + len; diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h index 70701a50ddfa..8041b95cb058 100644 --- a/drivers/remoteproc/remoteproc_internal.h +++ b/drivers/remoteproc/remoteproc_internal.h @@ -35,7 +35,7 @@ struct rproc; * @get_boot_addr: get boot address to entry point specified in firmware */ struct rproc_fw_ops { - struct resource_table *(*find_rsc_table) (struct rproc *rproc, + struct resource_table *(*find_rsc_table)(struct rproc *rproc, const struct firmware *fw, int *tablesz); struct resource_table *(*find_loaded_rsc_table)(struct rproc *rproc, From ab160dbbc4ba71a4e339382d37b31ea44fd43e86 Mon Sep 17 00:00:00 2001 From: Sylvain BERTRAND Date: Thu, 7 May 2015 00:36:04 +0000 Subject: [PATCH 024/839] scripts: fix link-vmlinux.sh bash-ism While building linux with dash shell: LINK vmlinux trap: SIGHUP: bad trap /src/linux-4.0/Makefile:933: recipe for target 'vmlinux' failed make[1]: *** [vmlinux] Error 1 See the following document for behavior of posix shell trap instruction: http://pubs.opengroup.org/onlinepubs/000095399/utilities/trap.html Cc: Ingo Molnar Signed-off-by: Sylvain BERTRAND Signed-off-by: Michal Marek --- scripts/link-vmlinux.sh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 86a4fe75f453..c9a11d15d228 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -111,7 +111,6 @@ sortextable() } # Delete output files in case of error -trap cleanup SIGHUP SIGINT SIGQUIT SIGTERM ERR cleanup() { rm -f .old_version @@ -124,6 +123,20 @@ cleanup() rm -f vmlinux.o } +on_exit() +{ + if [ $? -ne 0 ]; then + cleanup + fi +} +trap on_exit EXIT + +on_signals() +{ + exit 1 +} +trap on_signals HUP INT QUIT TERM + # # # Use "make V=1" to debug this script @@ -231,7 +244,6 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then if ! cmp -s System.map .tmp_System.map; then echo >&2 Inconsistent kallsyms data echo >&2 Try "make KALLSYMS_EXTRA_PASS=1" as a workaround - cleanup exit 1 fi fi From d4bd441532b81fe2be1706e7f9dbbe8b5a364bcf Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Mon, 11 May 2015 10:15:48 +0200 Subject: [PATCH 025/839] x86/kaslr: Fix typo in the KASLR_FLAG documentation Documentation/x86/boot.txt labels the bit in boot_params.hdr.loadflags as ALSR_FLAG while it should be KASLR_FLAG. Signed-off-by: Miroslav Benes Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Kosina Cc: Jonathan Corbet Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1429011324-7170-1-git-send-email-mbenes@suse.cz Link: http://lkml.kernel.org/r/1431332153-18566-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- Documentation/x86/boot.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index 88b85899d309..69e139791868 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -406,7 +406,7 @@ Protocol: 2.00+ - If 0, the protected-mode code is loaded at 0x10000. - If 1, the protected-mode code is loaded at 0x100000. - Bit 1 (kernel internal): ALSR_FLAG + Bit 1 (kernel internal): KASLR_FLAG - Used internally by the compressed kernel to communicate KASLR status to kernel proper. If 1, KASLR enabled. From 1b852bceb0d111e510d1a15826ecc4a19358d512 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 23:22:29 -0500 Subject: [PATCH 026/839] mnt: Refactor the logic for mounting sysfs and proc in a user namespace Fresh mounts of proc and sysfs are a very special case that works very much like a bind mount. Unfortunately the current structure can not preserve the MNT_LOCK... mount flags. Therefore refactor the logic into a form that can be modified to preserve those lock bits. Add a new filesystem flag FS_USERNS_VISIBLE that requires some mount of the filesystem be fully visible in the current mount namespace, before the filesystem may be mounted. Move the logic for calling fs_fully_visible from proc and sysfs into fs/namespace.c where it has greater access to mount namespace state. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 8 +++++++- fs/proc/root.c | 5 +---- fs/sysfs/mount.c | 5 +---- include/linux/fs.h | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 1b9e11167bae..8e7edaf60fe1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2332,6 +2332,8 @@ unlock: return err; } +static bool fs_fully_visible(struct file_system_type *fs_type); + /* * create a new mount for userspace and request it to be added into the * namespace's tree @@ -2363,6 +2365,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, flags |= MS_NODEV; mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } + if (type->fs_flags & FS_USERNS_VISIBLE) { + if (!fs_fully_visible(type)) + return -EPERM; + } } mnt = vfs_kern_mount(type, flags, name, data); @@ -3164,7 +3170,7 @@ bool current_chrooted(void) return chrooted; } -bool fs_fully_visible(struct file_system_type *type) +static bool fs_fully_visible(struct file_system_type *type) { struct mnt_namespace *ns = current->nsproxy->mnt_ns; struct mount *mnt; diff --git a/fs/proc/root.c b/fs/proc/root.c index b7fa4bfe896a..64e1ab64bde6 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -112,9 +112,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, ns = task_active_pid_ns(current); options = data; - if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) - return ERR_PTR(-EPERM); - /* Does the mounter have privilege over the pid namespace? */ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); @@ -159,7 +156,7 @@ static struct file_system_type proc_fs_type = { .name = "proc", .mount = proc_mount, .kill_sb = proc_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT, }; void __init proc_root_init(void) diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 8a49486bf30c..1c6ac6fcee9f 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -31,9 +31,6 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, bool new_sb; if (!(flags & MS_KERNMOUNT)) { - if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) - return ERR_PTR(-EPERM); - if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) return ERR_PTR(-EPERM); } @@ -58,7 +55,7 @@ static struct file_system_type sysfs_fs_type = { .name = "sysfs", .mount = sysfs_mount, .kill_sb = sysfs_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT, }; int __init sysfs_init(void) diff --git a/include/linux/fs.h b/include/linux/fs.h index 35ec87e490b1..2d24eeb8e59c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1897,6 +1897,7 @@ struct file_system_type { #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ +#define FS_USERNS_VISIBLE 32 /* FS must already be visible */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); @@ -1984,7 +1985,6 @@ extern int vfs_ustat(dev_t, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); extern bool our_mnt(struct vfsmount *mnt); -extern bool fs_fully_visible(struct file_system_type *); extern int current_umask(void); From cc8475305203ddfd117b81e2e732194b67d8f310 Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Thu, 21 May 2015 19:17:44 +0800 Subject: [PATCH 027/839] scripts: link-vmlinux: Don't pass page offset to kallsyms if XIP Kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When Kernel is executed in place from ROM, the symbol addresses can be lower than the page offset. Tested-by: Chanwoo Choi Signed-off-by: Maxime Coquelin Tested-by: Andreas Färber Signed-off-by: Michal Marek --- scripts/link-vmlinux.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index c9a11d15d228..1a10d8ac8162 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -82,7 +82,7 @@ kallsyms() kallsymopt="${kallsymopt} --all-symbols" fi - if [ -n "${CONFIG_ARM}" ] && [ -n "${CONFIG_PAGE_OFFSET}" ]; then + if [ -n "${CONFIG_ARM}" ] && [ -z "${CONFIG_XIP_KERNEL}" ] && [ -n "${CONFIG_PAGE_OFFSET}" ]; then kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET" fi From 5b83b2234be6733cfe22036c38031b2c890b3db8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 21 May 2015 15:51:52 -0700 Subject: [PATCH 028/839] mmc: omap_hsmmc: Change wake-up interrupt to use generic wakeirq We can now use generic wakeirq handling and remove the custom handling for the wake-up interrupts. Acked-by: Ulf Hansson Signed-off-by: Tony Lindgren --- drivers/mmc/host/omap_hsmmc.c | 49 +++++------------------------------ 1 file changed, 6 insertions(+), 43 deletions(-) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 9df2b6801f76..b2b411da297b 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -43,6 +43,7 @@ #include #include #include +#include #include /* OMAP HSMMC Host Controller Registers */ @@ -218,7 +219,6 @@ struct omap_hsmmc_host { unsigned int flags; #define AUTO_CMD23 (1 << 0) /* Auto CMD23 support */ #define HSMMC_SDIO_IRQ_ENABLED (1 << 1) /* SDIO irq enabled */ -#define HSMMC_WAKE_IRQ_ENABLED (1 << 2) struct omap_hsmmc_next next_data; struct omap_hsmmc_platform_data *pdata; @@ -1117,22 +1117,6 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static irqreturn_t omap_hsmmc_wake_irq(int irq, void *dev_id) -{ - struct omap_hsmmc_host *host = dev_id; - - /* cirq is level triggered, disable to avoid infinite loop */ - spin_lock(&host->irq_lock); - if (host->flags & HSMMC_WAKE_IRQ_ENABLED) { - disable_irq_nosync(host->wake_irq); - host->flags &= ~HSMMC_WAKE_IRQ_ENABLED; - } - spin_unlock(&host->irq_lock); - pm_request_resume(host->dev); /* no use counter */ - - return IRQ_HANDLED; -} - static void set_sd_bus_power(struct omap_hsmmc_host *host) { unsigned long i; @@ -1665,7 +1649,6 @@ static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable) static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host) { - struct mmc_host *mmc = host->mmc; int ret; /* @@ -1677,11 +1660,7 @@ static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host) if (!host->dev->of_node || !host->wake_irq) return -ENODEV; - /* Prevent auto-enabling of IRQ */ - irq_set_status_flags(host->wake_irq, IRQ_NOAUTOEN); - ret = devm_request_irq(host->dev, host->wake_irq, omap_hsmmc_wake_irq, - IRQF_TRIGGER_LOW | IRQF_ONESHOT, - mmc_hostname(mmc), host); + ret = dev_pm_set_dedicated_wake_irq(host->dev, host->wake_irq); if (ret) { dev_err(mmc_dev(host->mmc), "Unable to request wake IRQ\n"); goto err; @@ -1718,7 +1697,7 @@ static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host) return 0; err_free_irq: - devm_free_irq(host->dev, host->wake_irq, host); + dev_pm_clear_wake_irq(host->dev); err: dev_warn(host->dev, "no SDIO IRQ support, falling back to polling\n"); host->wake_irq = 0; @@ -2007,6 +1986,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev) omap_hsmmc_ops.multi_io_quirk = omap_hsmmc_multi_io_quirk; } + device_init_wakeup(&pdev->dev, true); pm_runtime_enable(host->dev); pm_runtime_get_sync(host->dev); pm_runtime_set_autosuspend_delay(host->dev, MMC_AUTOSUSPEND_DELAY); @@ -2147,6 +2127,7 @@ err_slot_name: if (host->use_reg) omap_hsmmc_reg_put(host); err_irq: + device_init_wakeup(&pdev->dev, false); if (host->tx_chan) dma_release_channel(host->tx_chan); if (host->rx_chan) @@ -2178,6 +2159,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev) pm_runtime_put_sync(host->dev); pm_runtime_disable(host->dev); + device_init_wakeup(&pdev->dev, false); if (host->dbclk) clk_disable_unprepare(host->dbclk); @@ -2204,11 +2186,6 @@ static int omap_hsmmc_suspend(struct device *dev) OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP); } - /* do not wake up due to sdio irq */ - if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && - !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ)) - disable_irq(host->wake_irq); - if (host->dbclk) clk_disable_unprepare(host->dbclk); @@ -2233,11 +2210,6 @@ static int omap_hsmmc_resume(struct device *dev) omap_hsmmc_conf_bus_power(host); omap_hsmmc_protect_card(host); - - if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && - !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ)) - enable_irq(host->wake_irq); - pm_runtime_mark_last_busy(host->dev); pm_runtime_put_autosuspend(host->dev); return 0; @@ -2277,10 +2249,6 @@ static int omap_hsmmc_runtime_suspend(struct device *dev) } pinctrl_pm_select_idle_state(dev); - - WARN_ON(host->flags & HSMMC_WAKE_IRQ_ENABLED); - enable_irq(host->wake_irq); - host->flags |= HSMMC_WAKE_IRQ_ENABLED; } else { pinctrl_pm_select_idle_state(dev); } @@ -2302,11 +2270,6 @@ static int omap_hsmmc_runtime_resume(struct device *dev) spin_lock_irqsave(&host->irq_lock, flags); if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && (host->flags & HSMMC_SDIO_IRQ_ENABLED)) { - /* sdio irq flag can't change while in runtime suspend */ - if (host->flags & HSMMC_WAKE_IRQ_ENABLED) { - disable_irq_nosync(host->wake_irq); - host->flags &= ~HSMMC_WAKE_IRQ_ENABLED; - } pinctrl_pm_select_default_state(host->dev); From e911503085ae8a6bb826588076f66df05890253c Mon Sep 17 00:00:00 2001 From: Martin Walch Date: Fri, 22 May 2015 13:41:52 +0200 Subject: [PATCH 029/839] Kconfig: Remove bad inference rules expr_eliminate_dups2() expr_eliminate_dups2() in scripts/kconfig/expr.c applies two invalid inference rules: (FOO || BAR) && (!FOO && !BAR) -> n (FOO && BAR) || (!FOO || !BAR) -> y They would be correct in propositional logic, but this is a three-valued logic, and here it is wrong in that it changes semantics. It becomes immediately visible when assigning the value 1 to both, FOO and BAR: (FOO || BAR) && (!FOO && !BAR) -> min(max(1, 1), min(2-1, 2-1)) = min(1, 1) = 1 while n evaluates to 0 and (FOO && BAR) || (!FOO || !BAR) -> max(min(1, 1), max(2-1, 2-1)) = max(1, 1) = 1 with y evaluating to 2. Fix it by removing expr_eliminate_dups2() and the functions that have no use anywhere else: expr_extract_eq_and(), expr_extract_eq_or(), and expr_extract_eq() from scripts/kconfig/expr.c Currently the bug is not triggered in mainline, so this patch does not modify the configuration space there. To observe the bug consider this example: config MODULES def_bool y option modules config FOO def_tristate m config BAR def_tristate m config TEST1 def_tristate y depends on (FOO || BAR) && (!FOO && !BAR) if TEST1 = n comment "TEST1 broken" endif config TEST2 def_tristate y depends on (FOO && BAR) || (!FOO || !BAR) if TEST2 = y comment "TEST2 broken" endif config TEST3 def_tristate y depends on m && !m if TEST3 = n comment "TEST3 broken" endif TEST1, TEST2 and TEST3 should all evaluate to m, but without the patch, none of them does. It is probably not obvious that TEST3 is the same bug, but it becomes clear when considering what happens internally to the expression m && !m": First it expands to (m && MODULES) && !(m && MODULES), then it is transformed into (m && MODULES) && (!m || !MODULES), and finally due to the bug it is replaced with n. As a side effect, this patch reduces code size in expr.c by roughly 10% and slightly improves startup time for all configuration frontends. Signed-off-by: Martin Walch Signed-off-by: Michal Marek --- scripts/kconfig/expr.c | 111 ----------------------------------------- 1 file changed, 111 deletions(-) diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c index fb0a2a286dca..0d7364ce9132 100644 --- a/scripts/kconfig/expr.c +++ b/scripts/kconfig/expr.c @@ -13,9 +13,6 @@ static int expr_eq(struct expr *e1, struct expr *e2); static struct expr *expr_eliminate_yn(struct expr *e); -static struct expr *expr_extract_eq_and(struct expr **ep1, struct expr **ep2); -static struct expr *expr_extract_eq_or(struct expr **ep1, struct expr **ep2); -static void expr_extract_eq(enum expr_type type, struct expr **ep, struct expr **ep1, struct expr **ep2); struct expr *expr_alloc_symbol(struct symbol *sym) { @@ -559,62 +556,6 @@ static void expr_eliminate_dups1(enum expr_type type, struct expr **ep1, struct #undef e2 } -static void expr_eliminate_dups2(enum expr_type type, struct expr **ep1, struct expr **ep2) -{ -#define e1 (*ep1) -#define e2 (*ep2) - struct expr *tmp, *tmp1, *tmp2; - - if (e1->type == type) { - expr_eliminate_dups2(type, &e1->left.expr, &e2); - expr_eliminate_dups2(type, &e1->right.expr, &e2); - return; - } - if (e2->type == type) { - expr_eliminate_dups2(type, &e1, &e2->left.expr); - expr_eliminate_dups2(type, &e1, &e2->right.expr); - } - if (e1 == e2) - return; - - switch (e1->type) { - case E_OR: - expr_eliminate_dups2(e1->type, &e1, &e1); - // (FOO || BAR) && (!FOO && !BAR) -> n - tmp1 = expr_transform(expr_alloc_one(E_NOT, expr_copy(e1))); - tmp2 = expr_copy(e2); - tmp = expr_extract_eq_and(&tmp1, &tmp2); - if (expr_is_yes(tmp1)) { - expr_free(e1); - e1 = expr_alloc_symbol(&symbol_no); - trans_count++; - } - expr_free(tmp2); - expr_free(tmp1); - expr_free(tmp); - break; - case E_AND: - expr_eliminate_dups2(e1->type, &e1, &e1); - // (FOO && BAR) || (!FOO || !BAR) -> y - tmp1 = expr_transform(expr_alloc_one(E_NOT, expr_copy(e1))); - tmp2 = expr_copy(e2); - tmp = expr_extract_eq_or(&tmp1, &tmp2); - if (expr_is_no(tmp1)) { - expr_free(e1); - e1 = expr_alloc_symbol(&symbol_yes); - trans_count++; - } - expr_free(tmp2); - expr_free(tmp1); - expr_free(tmp); - break; - default: - ; - } -#undef e1 -#undef e2 -} - struct expr *expr_eliminate_dups(struct expr *e) { int oldcount; @@ -627,7 +568,6 @@ struct expr *expr_eliminate_dups(struct expr *e) switch (e->type) { case E_OR: case E_AND: expr_eliminate_dups1(e->type, &e, &e); - expr_eliminate_dups2(e->type, &e, &e); default: ; } @@ -829,57 +769,6 @@ bool expr_depends_symbol(struct expr *dep, struct symbol *sym) return false; } -static struct expr *expr_extract_eq_and(struct expr **ep1, struct expr **ep2) -{ - struct expr *tmp = NULL; - expr_extract_eq(E_AND, &tmp, ep1, ep2); - if (tmp) { - *ep1 = expr_eliminate_yn(*ep1); - *ep2 = expr_eliminate_yn(*ep2); - } - return tmp; -} - -static struct expr *expr_extract_eq_or(struct expr **ep1, struct expr **ep2) -{ - struct expr *tmp = NULL; - expr_extract_eq(E_OR, &tmp, ep1, ep2); - if (tmp) { - *ep1 = expr_eliminate_yn(*ep1); - *ep2 = expr_eliminate_yn(*ep2); - } - return tmp; -} - -static void expr_extract_eq(enum expr_type type, struct expr **ep, struct expr **ep1, struct expr **ep2) -{ -#define e1 (*ep1) -#define e2 (*ep2) - if (e1->type == type) { - expr_extract_eq(type, ep, &e1->left.expr, &e2); - expr_extract_eq(type, ep, &e1->right.expr, &e2); - return; - } - if (e2->type == type) { - expr_extract_eq(type, ep, ep1, &e2->left.expr); - expr_extract_eq(type, ep, ep1, &e2->right.expr); - return; - } - if (expr_eq(e1, e2)) { - *ep = *ep ? expr_alloc_two(type, *ep, e1) : e1; - expr_free(e2); - if (type == E_AND) { - e1 = expr_alloc_symbol(&symbol_yes); - e2 = expr_alloc_symbol(&symbol_yes); - } else if (type == E_OR) { - e1 = expr_alloc_symbol(&symbol_no); - e2 = expr_alloc_symbol(&symbol_no); - } - } -#undef e1 -#undef e2 -} - struct expr *expr_trans_compare(struct expr *e, enum expr_type type, struct symbol *sym) { struct expr *e1, *e2; From 092373c2b8cb67b757ec59f3e6251b45ef333b38 Mon Sep 17 00:00:00 2001 From: Diego Viola Date: Mon, 6 Apr 2015 06:27:45 -0300 Subject: [PATCH 030/839] scripts/kconfig/Makefile: Fix spelling of Qt Signed-off-by: Diego Viola Signed-off-by: Michal Marek --- scripts/kconfig/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index d9b1fef0c67e..3b48f7afb894 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -124,7 +124,7 @@ help: @echo ' config - Update current config utilising a line-oriented program' @echo ' nconfig - Update current config utilising a ncurses menu based program' @echo ' menuconfig - Update current config utilising a menu based program' - @echo ' xconfig - Update current config utilising a QT based front-end' + @echo ' xconfig - Update current config utilising a Qt based front-end' @echo ' gconfig - Update current config utilising a GTK based front-end' @echo ' oldconfig - Update current config utilising a provided .config as base' @echo ' localmodconfig - Update current config disabling modules not loaded' @@ -158,7 +158,7 @@ HOST_EXTRACFLAGS += $(shell $(CONFIG_SHELL) $(check-lxdialog) -ccflags) \ # mconf: Used for the menuconfig target # Utilizes the lxdialog package # qconf: Used for the xconfig target -# Based on QT which needs to be installed to compile it +# Based on Qt which needs to be installed to compile it # gconf: Used for the gconfig target # Based on GTK which needs to be installed to compile it # object files used by all kconfig flavours @@ -217,11 +217,11 @@ ifeq ($(MAKECMDGOALS),xconfig) $(obj)/.tmp_qtcheck: $(src)/Makefile -include $(obj)/.tmp_qtcheck -# QT needs some extra effort... +# Qt needs some extra effort... $(obj)/.tmp_qtcheck: @set -e; $(kecho) " CHECK qt"; dir=""; pkg=""; \ if ! pkg-config --exists QtCore 2> /dev/null; then \ - echo "* Unable to find the QT4 tool qmake. Trying to use QT3"; \ + echo "* Unable to find the Qt4 tool qmake. Trying to use Qt3"; \ pkg-config --exists qt 2> /dev/null && pkg=qt; \ pkg-config --exists qt-mt 2> /dev/null && pkg=qt-mt; \ if [ -n "$$pkg" ]; then \ @@ -235,8 +235,8 @@ $(obj)/.tmp_qtcheck: done; \ if [ -z "$$dir" ]; then \ echo >&2 "*"; \ - echo >&2 "* Unable to find any QT installation. Please make sure that"; \ - echo >&2 "* the QT4 or QT3 development package is correctly installed and"; \ + echo >&2 "* Unable to find any Qt installation. Please make sure that"; \ + echo >&2 "* the Qt4 or Qt3 development package is correctly installed and"; \ echo >&2 "* either qmake can be found or install pkg-config or set"; \ echo >&2 "* the QTDIR environment variable to the correct location."; \ echo >&2 "*"; \ From f75b6fae1a1d0a79dcbb9cbaed1d06bf3fe57a3c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 19 Apr 2015 20:27:19 +0300 Subject: [PATCH 031/839] target: Merge sbc_verify_dif_read|write Instead of providing DIF verify routines for read/write that are almost identical and conditionally copy protection information, just let the caller do the right thing. Have a single sbc_dif_verify that handles an sgl (that does NOT copy any data) and a protection information copy routine used by rd_mcp and fileio backend. In the WRITE case, call sbc_dif_verify with cmd->t_prot_sg and then do the copy from it to local sgl (assuming the verify succeeded of course). In the READ case, call sbc_dif_verify with the local sgl and if it succeeds, copy it to t_prot_sg (or not if we are stripping it). (Fix apply breakage from commit c836777 - nab) Tested-by: Akinobu Mita Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_file.c | 10 +-- drivers/target/target_core_rd.c | 20 ++++-- drivers/target/target_core_sbc.c | 94 ++------------------------ drivers/target/target_core_transport.c | 17 ++--- include/target/target_core_backend.h | 8 +-- 5 files changed, 37 insertions(+), 112 deletions(-) diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 3f27bfd816d8..deec7560f835 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -643,13 +643,14 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type) { u32 sectors = cmd->data_length / dev->dev_attrib.block_size; - rc = sbc_dif_verify_read(cmd, cmd->t_task_lba, sectors, - 0, fd_prot.prot_sg, 0); + rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, + 0, fd_prot.prot_sg, 0); if (rc) { kfree(fd_prot.prot_sg); kfree(fd_prot.prot_buf); return rc; } + sbc_dif_copy_prot(cmd, sectors, true, fd_prot.prot_sg, 0); kfree(fd_prot.prot_sg); kfree(fd_prot.prot_buf); } @@ -663,13 +664,14 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (ret < 0) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - rc = sbc_dif_verify_write(cmd, cmd->t_task_lba, sectors, - 0, fd_prot.prot_sg, 0); + rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, + 0, cmd->t_prot_sg, 0); if (rc) { kfree(fd_prot.prot_sg); kfree(fd_prot.prot_buf); return rc; } + sbc_dif_copy_prot(cmd, sectors, false, fd_prot.prot_sg, 0); } ret = fd_do_rw(cmd, sgl, sgl_nents, 1); diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index d16489b6a1a4..55315fd0f5d3 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -403,10 +403,7 @@ static struct rd_dev_sg_table *rd_get_prot_table(struct rd_dev *rd_dev, u32 page return NULL; } -typedef sense_reason_t (*dif_verify)(struct se_cmd *, sector_t, unsigned int, - unsigned int, struct scatterlist *, int); - -static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, dif_verify dif_verify) +static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, bool is_read) { struct se_device *se_dev = cmd->se_dev; struct rd_dev *dev = RD_DEV(se_dev); @@ -466,7 +463,16 @@ static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, dif_verify dif_verify) #endif /* !CONFIG_ARCH_HAS_SG_CHAIN */ - rc = dif_verify(cmd, cmd->t_task_lba, sectors, 0, prot_sg, prot_offset); + if (is_read) + rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0, + prot_sg, prot_offset); + else + rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0, + cmd->t_prot_sg, 0); + + if (!rc) + sbc_dif_copy_prot(cmd, sectors, is_read, prot_sg, prot_offset); + if (need_to_release) kfree(prot_sg); @@ -512,7 +518,7 @@ rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (cmd->prot_type && se_dev->dev_attrib.pi_prot_type && data_direction == DMA_TO_DEVICE) { - rc = rd_do_prot_rw(cmd, sbc_dif_verify_write); + rc = rd_do_prot_rw(cmd, false); if (rc) return rc; } @@ -580,7 +586,7 @@ rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (cmd->prot_type && se_dev->dev_attrib.pi_prot_type && data_direction == DMA_FROM_DEVICE) { - rc = rd_do_prot_rw(cmd, sbc_dif_verify_read); + rc = rd_do_prot_rw(cmd, true); if (rc) return rc; } diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 733824e3825f..ca95fab3521a 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1266,9 +1266,8 @@ check_ref: return 0; } -static void -sbc_dif_copy_prot(struct se_cmd *cmd, unsigned int sectors, bool read, - struct scatterlist *sg, int sg_off) +void sbc_dif_copy_prot(struct se_cmd *cmd, unsigned int sectors, bool read, + struct scatterlist *sg, int sg_off) { struct se_device *dev = cmd->se_dev; struct scatterlist *psg; @@ -1309,68 +1308,11 @@ sbc_dif_copy_prot(struct se_cmd *cmd, unsigned int sectors, bool read, kunmap_atomic(paddr); } } +EXPORT_SYMBOL(sbc_dif_copy_prot); sense_reason_t -sbc_dif_verify_write(struct se_cmd *cmd, sector_t start, unsigned int sectors, - unsigned int ei_lba, struct scatterlist *sg, int sg_off) -{ - struct se_device *dev = cmd->se_dev; - struct se_dif_v1_tuple *sdt; - struct scatterlist *dsg, *psg = cmd->t_prot_sg; - sector_t sector = start; - void *daddr, *paddr; - int i, j, offset = 0; - sense_reason_t rc; - - for_each_sg(cmd->t_data_sg, dsg, cmd->t_data_nents, i) { - daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; - paddr = kmap_atomic(sg_page(psg)) + psg->offset; - - for (j = 0; j < dsg->length; j += dev->dev_attrib.block_size) { - - if (offset >= psg->length) { - kunmap_atomic(paddr); - psg = sg_next(psg); - paddr = kmap_atomic(sg_page(psg)) + psg->offset; - offset = 0; - } - - sdt = paddr + offset; - - pr_debug("DIF WRITE sector: %llu guard_tag: 0x%04x" - " app_tag: 0x%04x ref_tag: %u\n", - (unsigned long long)sector, sdt->guard_tag, - sdt->app_tag, be32_to_cpu(sdt->ref_tag)); - - rc = sbc_dif_v1_verify(cmd, sdt, daddr + j, sector, - ei_lba); - if (rc) { - kunmap_atomic(paddr); - kunmap_atomic(daddr); - cmd->bad_sector = sector; - return rc; - } - - sector++; - ei_lba++; - offset += sizeof(struct se_dif_v1_tuple); - } - - kunmap_atomic(paddr); - kunmap_atomic(daddr); - } - if (!sg) - return 0; - - sbc_dif_copy_prot(cmd, sectors, false, sg, sg_off); - - return 0; -} -EXPORT_SYMBOL(sbc_dif_verify_write); - -static sense_reason_t -__sbc_dif_verify_read(struct se_cmd *cmd, sector_t start, unsigned int sectors, - unsigned int ei_lba, struct scatterlist *sg, int sg_off) +sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors, + unsigned int ei_lba, struct scatterlist *sg, int sg_off) { struct se_device *dev = cmd->se_dev; struct se_dif_v1_tuple *sdt; @@ -1426,28 +1368,4 @@ __sbc_dif_verify_read(struct se_cmd *cmd, sector_t start, unsigned int sectors, return 0; } - -sense_reason_t -sbc_dif_read_strip(struct se_cmd *cmd) -{ - struct se_device *dev = cmd->se_dev; - u32 sectors = cmd->prot_length / dev->prot_length; - - return __sbc_dif_verify_read(cmd, cmd->t_task_lba, sectors, 0, - cmd->t_prot_sg, 0); -} - -sense_reason_t -sbc_dif_verify_read(struct se_cmd *cmd, sector_t start, unsigned int sectors, - unsigned int ei_lba, struct scatterlist *sg, int sg_off) -{ - sense_reason_t rc; - - rc = __sbc_dif_verify_read(cmd, start, sectors, ei_lba, sg, sg_off); - if (rc) - return rc; - - sbc_dif_copy_prot(cmd, sectors, true, sg, sg_off); - return 0; -} -EXPORT_SYMBOL(sbc_dif_verify_read); +EXPORT_SYMBOL(sbc_dif_verify); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 675f2d9d1f14..105f4fd0fad4 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1766,8 +1766,8 @@ static int target_write_prot_action(struct se_cmd *cmd) break; sectors = cmd->data_length >> ilog2(cmd->se_dev->dev_attrib.block_size); - cmd->pi_err = sbc_dif_verify_write(cmd, cmd->t_task_lba, - sectors, 0, NULL, 0); + cmd->pi_err = sbc_dif_verify(cmd, cmd->t_task_lba, + sectors, 0, cmd->t_prot_sg, 0); if (unlikely(cmd->pi_err)) { spin_lock_irq(&cmd->t_state_lock); cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT); @@ -1991,16 +1991,17 @@ static void transport_handle_queue_full( static bool target_read_prot_action(struct se_cmd *cmd) { - sense_reason_t rc; - switch (cmd->prot_op) { case TARGET_PROT_DIN_STRIP: if (!(cmd->se_sess->sup_prot_ops & TARGET_PROT_DIN_STRIP)) { - rc = sbc_dif_read_strip(cmd); - if (rc) { - cmd->pi_err = rc; + u32 sectors = cmd->data_length >> + ilog2(cmd->se_dev->dev_attrib.block_size); + + cmd->pi_err = sbc_dif_verify(cmd, cmd->t_task_lba, + sectors, 0, cmd->t_prot_sg, + 0); + if (cmd->pi_err) return true; - } } break; case TARGET_PROT_DIN_INSERT: diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 5f1225706993..47fafec556f8 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -84,12 +84,10 @@ sense_reason_t sbc_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb), void *priv); void sbc_dif_generate(struct se_cmd *); -sense_reason_t sbc_dif_verify_write(struct se_cmd *, sector_t, unsigned int, +sense_reason_t sbc_dif_verify(struct se_cmd *, sector_t, unsigned int, unsigned int, struct scatterlist *, int); -sense_reason_t sbc_dif_verify_read(struct se_cmd *, sector_t, unsigned int, - unsigned int, struct scatterlist *, int); -sense_reason_t sbc_dif_read_strip(struct se_cmd *); - +void sbc_dif_copy_prot(struct se_cmd *, unsigned int, bool, + struct scatterlist *, int); void transport_set_vpd_proto_id(struct t10_vpd *, unsigned char *); int transport_set_vpd_assoc(struct t10_vpd *, unsigned char *); int transport_set_vpd_ident_type(struct t10_vpd *, unsigned char *); From 8287fa5fb43c3fa05748560b8e13b91c4e88f586 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 19 Apr 2015 20:27:20 +0300 Subject: [PATCH 032/839] target/file: Remove fd_prot bounce buffer The reason this bounce buffer exists is to allow code reuse between rd_mcp and fileio in DIF mode. But the fact is, that this bounce is really not needed at all, we can simply call sbc_dif_verify on cmd->t_prot_sg and use it for file IO. This also removes fd_do_prot_rw as fd_do_rw was generalised to receive file pointer, block size (8 bytes for DIF data) and total data length. (Fix apply breakage from commit c836777 - nab) Tested-by: Akinobu Mita Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_file.c | 140 ++++++++---------------------- drivers/target/target_core_file.h | 6 -- 2 files changed, 34 insertions(+), 112 deletions(-) diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index deec7560f835..fe6c19c1e001 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -258,75 +258,15 @@ static void fd_free_device(struct se_device *dev) kfree(fd_dev); } -static int fd_do_prot_rw(struct se_cmd *cmd, struct fd_prot *fd_prot, - int is_write) +static int fd_do_rw(struct se_cmd *cmd, struct file *fd, + u32 block_size, struct scatterlist *sgl, + u32 sgl_nents, u32 data_length, int is_write) { - struct se_device *se_dev = cmd->se_dev; - struct fd_dev *dev = FD_DEV(se_dev); - struct file *prot_fd = dev->fd_prot_file; - loff_t pos = (cmd->t_task_lba * se_dev->prot_length); - unsigned char *buf; - u32 prot_size; - int rc, ret = 1; - - prot_size = (cmd->data_length / se_dev->dev_attrib.block_size) * - se_dev->prot_length; - - if (!is_write) { - fd_prot->prot_buf = kzalloc(prot_size, GFP_KERNEL); - if (!fd_prot->prot_buf) { - pr_err("Unable to allocate fd_prot->prot_buf\n"); - return -ENOMEM; - } - buf = fd_prot->prot_buf; - - fd_prot->prot_sg_nents = 1; - fd_prot->prot_sg = kzalloc(sizeof(struct scatterlist), - GFP_KERNEL); - if (!fd_prot->prot_sg) { - pr_err("Unable to allocate fd_prot->prot_sg\n"); - kfree(fd_prot->prot_buf); - return -ENOMEM; - } - sg_init_table(fd_prot->prot_sg, fd_prot->prot_sg_nents); - sg_set_buf(fd_prot->prot_sg, buf, prot_size); - } - - if (is_write) { - rc = kernel_write(prot_fd, fd_prot->prot_buf, prot_size, pos); - if (rc < 0 || prot_size != rc) { - pr_err("kernel_write() for fd_do_prot_rw failed:" - " %d\n", rc); - ret = -EINVAL; - } - } else { - rc = kernel_read(prot_fd, pos, fd_prot->prot_buf, prot_size); - if (rc < 0) { - pr_err("kernel_read() for fd_do_prot_rw failed:" - " %d\n", rc); - ret = -EINVAL; - } - } - - if (is_write || ret < 0) { - kfree(fd_prot->prot_sg); - kfree(fd_prot->prot_buf); - } - - return ret; -} - -static int fd_do_rw(struct se_cmd *cmd, struct scatterlist *sgl, - u32 sgl_nents, int is_write) -{ - struct se_device *se_dev = cmd->se_dev; - struct fd_dev *dev = FD_DEV(se_dev); - struct file *fd = dev->fd_file; struct scatterlist *sg; struct iov_iter iter; struct bio_vec *bvec; ssize_t len = 0; - loff_t pos = (cmd->t_task_lba * se_dev->dev_attrib.block_size); + loff_t pos = (cmd->t_task_lba * block_size); int ret = 0, i; bvec = kcalloc(sgl_nents, sizeof(struct bio_vec), GFP_KERNEL); @@ -352,7 +292,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct scatterlist *sgl, kfree(bvec); if (is_write) { - if (ret < 0 || ret != cmd->data_length) { + if (ret < 0 || ret != data_length) { pr_err("%s() write returned %d\n", __func__, ret); return (ret < 0 ? ret : -EINVAL); } @@ -363,10 +303,10 @@ static int fd_do_rw(struct se_cmd *cmd, struct scatterlist *sgl, * block_device. */ if (S_ISBLK(file_inode(fd)->i_mode)) { - if (ret < 0 || ret != cmd->data_length) { + if (ret < 0 || ret != data_length) { pr_err("%s() returned %d, expecting %u for " "S_ISBLK\n", __func__, ret, - cmd->data_length); + data_length); return (ret < 0 ? ret : -EINVAL); } } else { @@ -612,7 +552,9 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, enum dma_data_direction data_direction) { struct se_device *dev = cmd->se_dev; - struct fd_prot fd_prot; + struct fd_dev *fd_dev = FD_DEV(dev); + struct file *file = fd_dev->fd_file; + struct file *pfile = fd_dev->fd_prot_file; sense_reason_t rc; int ret = 0; /* @@ -630,51 +572,39 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, * physical memory addresses to struct iovec virtual memory. */ if (data_direction == DMA_FROM_DEVICE) { - memset(&fd_prot, 0, sizeof(struct fd_prot)); - if (cmd->prot_type && dev->dev_attrib.pi_prot_type) { - ret = fd_do_prot_rw(cmd, &fd_prot, false); + ret = fd_do_rw(cmd, pfile, dev->prot_length, + cmd->t_prot_sg, cmd->t_prot_nents, + cmd->prot_length, 0); if (ret < 0) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } - ret = fd_do_rw(cmd, sgl, sgl_nents, 0); + ret = fd_do_rw(cmd, file, dev->dev_attrib.block_size, + sgl, sgl_nents, cmd->data_length, 0); if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type) { - u32 sectors = cmd->data_length / dev->dev_attrib.block_size; - - rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, - 0, fd_prot.prot_sg, 0); - if (rc) { - kfree(fd_prot.prot_sg); - kfree(fd_prot.prot_buf); - return rc; - } - sbc_dif_copy_prot(cmd, sectors, true, fd_prot.prot_sg, 0); - kfree(fd_prot.prot_sg); - kfree(fd_prot.prot_buf); - } - } else { - memset(&fd_prot, 0, sizeof(struct fd_prot)); - - if (cmd->prot_type && dev->dev_attrib.pi_prot_type) { - u32 sectors = cmd->data_length / dev->dev_attrib.block_size; - - ret = fd_do_prot_rw(cmd, &fd_prot, false); - if (ret < 0) - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + u32 sectors = cmd->data_length >> + ilog2(dev->dev_attrib.block_size); rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0, cmd->t_prot_sg, 0); - if (rc) { - kfree(fd_prot.prot_sg); - kfree(fd_prot.prot_buf); + if (rc) + return rc; + } + } else { + if (cmd->prot_type && dev->dev_attrib.pi_prot_type) { + u32 sectors = cmd->data_length >> + ilog2(dev->dev_attrib.block_size); + + rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, + 0, cmd->t_prot_sg, 0); + if (rc) return rc; - } - sbc_dif_copy_prot(cmd, sectors, false, fd_prot.prot_sg, 0); } - ret = fd_do_rw(cmd, sgl, sgl_nents, 1); + ret = fd_do_rw(cmd, file, dev->dev_attrib.block_size, + sgl, sgl_nents, cmd->data_length, 1); /* * Perform implicit vfs_fsync_range() for fd_do_writev() ops * for SCSI WRITEs with Forced Unit Access (FUA) set. @@ -683,7 +613,6 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (ret > 0 && dev->dev_attrib.emulate_fua_write > 0 && (cmd->se_cmd_flags & SCF_FUA)) { - struct fd_dev *fd_dev = FD_DEV(dev); loff_t start = cmd->t_task_lba * dev->dev_attrib.block_size; loff_t end; @@ -697,17 +626,16 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, } if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type) { - ret = fd_do_prot_rw(cmd, &fd_prot, true); + ret = fd_do_rw(cmd, pfile, dev->prot_length, + cmd->t_prot_sg, cmd->t_prot_nents, + cmd->prot_length, 1); if (ret < 0) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } } - if (ret < 0) { - kfree(fd_prot.prot_sg); - kfree(fd_prot.prot_buf); + if (ret < 0) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - } if (ret) target_complete_cmd(cmd, SAM_STAT_GOOD); diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h index 182cbb295039..068966fce308 100644 --- a/drivers/target/target_core_file.h +++ b/drivers/target/target_core_file.h @@ -21,12 +21,6 @@ #define FDBD_HAS_BUFFERED_IO_WCE 0x04 #define FDBD_FORMAT_UNIT_SIZE 2048 -struct fd_prot { - unsigned char *prot_buf; - struct scatterlist *prot_sg; - u32 prot_sg_nents; -}; - struct fd_dev { struct se_device dev; From 414e4627277e38e3a557c53c9def7fe9f1ec9b2c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 19 Apr 2015 20:27:21 +0300 Subject: [PATCH 033/839] target/sbc: Fix sbc_dif_verify inconsistent map/unmap Fix map/unmap consistency and get rid of a redundant local variable psg. Reported-by: Akinobu Mita Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_sbc.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index ca95fab3521a..3df2cd538478 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1312,30 +1312,30 @@ EXPORT_SYMBOL(sbc_dif_copy_prot); sense_reason_t sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors, - unsigned int ei_lba, struct scatterlist *sg, int sg_off) + unsigned int ei_lba, struct scatterlist *psg, int psg_off) { struct se_device *dev = cmd->se_dev; struct se_dif_v1_tuple *sdt; - struct scatterlist *dsg, *psg = sg; + struct scatterlist *dsg; sector_t sector = start; void *daddr, *paddr; - int i, j, offset = sg_off; + int i, j; sense_reason_t rc; for_each_sg(cmd->t_data_sg, dsg, cmd->t_data_nents, i) { daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; - paddr = kmap_atomic(sg_page(psg)) + sg->offset; + paddr = kmap_atomic(sg_page(psg)) + psg->offset; for (j = 0; j < dsg->length; j += dev->dev_attrib.block_size) { - if (offset >= psg->length) { - kunmap_atomic(paddr); + if (psg_off >= psg->length) { + kunmap_atomic(paddr - psg->offset); psg = sg_next(psg); paddr = kmap_atomic(sg_page(psg)) + psg->offset; - offset = 0; + psg_off = 0; } - sdt = paddr + offset; + sdt = paddr + psg_off; pr_debug("DIF READ sector: %llu guard_tag: 0x%04x" " app_tag: 0x%04x ref_tag: %u\n", @@ -1344,26 +1344,26 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors, if (sdt->app_tag == cpu_to_be16(0xffff)) { sector++; - offset += sizeof(struct se_dif_v1_tuple); + psg_off += sizeof(struct se_dif_v1_tuple); continue; } rc = sbc_dif_v1_verify(cmd, sdt, daddr + j, sector, ei_lba); if (rc) { - kunmap_atomic(paddr); - kunmap_atomic(daddr); + kunmap_atomic(paddr - psg->offset); + kunmap_atomic(daddr - dsg->offset); cmd->bad_sector = sector; return rc; } sector++; ei_lba++; - offset += sizeof(struct se_dif_v1_tuple); + psg_off += sizeof(struct se_dif_v1_tuple); } - kunmap_atomic(paddr); - kunmap_atomic(daddr); + kunmap_atomic(paddr - psg->offset); + kunmap_atomic(daddr - dsg->offset); } return 0; From c04a6091c951c88636a569cdd29feb65fb5003af Mon Sep 17 00:00:00 2001 From: Christophe Vu-Brugier Date: Sun, 19 Apr 2015 22:18:33 +0200 Subject: [PATCH 034/839] iscsi-target: remove support for obsolete markers Support for markers is currently broken because of a bug in iscsi_enforce_integrity_rules(): the "IFMarkInt_Reject" and "OFMarkInt_Reject" variables are always equal to 1 in iscsi_enforce_integrity_rules(). Moreover, fixed interval markers keys (IFMarker, OFMarker, IFMarkInt and OFMarkInt) are obsolete according to iSCSI RFC 7143: >From http://tools.ietf.org/html/rfc7143#section-13.25: 13.25. Obsoleted Keys This document obsoletes the following keys defined in [RFC3720]: IFMarker, OFMarker, OFMarkInt, and IFMarkInt. However, iSCSI implementations compliant to this document may still receive these obsoleted keys -- i.e., in a responder role -- in a text negotiation. When an IFMarker or OFMarker key is received, a compliant iSCSI implementation SHOULD respond with the constant "Reject" value. The implementation MAY alternatively respond with a "No" value. However, the implementation MUST NOT respond with a "NotUnderstood" value for either of these keys. When an IFMarkInt or OFMarkInt key is received, a compliant iSCSI implementation MUST respond with the constant "Reject" value. The implementation MUST NOT respond with a "NotUnderstood" value for either of these keys. This patch disables markers by turning the corresponding parameters to read-only. The default value of IFMarker and OFMarker remains "No" but the user cannot change it to "Yes" anymore. The new value of IFMarkInt and OFMarkInt is "Reject". (Drop left-over iscsi_get_value_from_number_range + make configfs parameters attrs R/W nops - nab) Signed-off-by: Christophe Vu-Brugier Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 20 +- drivers/target/iscsi/iscsi_target_erl0.c | 53 ---- drivers/target/iscsi/iscsi_target_erl0.h | 1 - drivers/target/iscsi/iscsi_target_login.c | 58 +--- drivers/target/iscsi/iscsi_target_login.h | 1 - .../target/iscsi/iscsi_target_parameters.c | 267 +----------------- .../target/iscsi/iscsi_target_parameters.h | 11 +- drivers/target/iscsi/iscsi_target_util.c | 48 ---- drivers/target/iscsi/iscsi_target_util.h | 1 - include/target/iscsi/iscsi_target_core.h | 10 - 10 files changed, 16 insertions(+), 454 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 74e6114ff18f..21620c751071 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -2736,11 +2736,7 @@ static int iscsit_send_datain(struct iscsi_cmd *cmd, struct iscsi_conn *conn) cmd->iov_data_count = iov_count; cmd->tx_size = tx_size; - /* sendpage is preferred but can't insert markers */ - if (!conn->conn_ops->IFMarker) - ret = iscsit_fe_sendpage_sg(cmd, conn); - else - ret = iscsit_send_tx_data(cmd, conn, 0); + ret = iscsit_fe_sendpage_sg(cmd, conn); iscsit_unmap_iovec(cmd); @@ -4072,17 +4068,9 @@ static int iscsi_target_rx_opcode(struct iscsi_conn *conn, unsigned char *buf) " opcode while ERL=0, closing iSCSI connection.\n"); return -1; } - if (!conn->conn_ops->OFMarker) { - pr_err("Unable to recover from unknown" - " opcode while OFMarker=No, closing iSCSI" - " connection.\n"); - return -1; - } - if (iscsit_recover_from_unknown_opcode(conn) < 0) { - pr_err("Unable to recover from unknown" - " opcode, closing iSCSI connection.\n"); - return -1; - } + pr_err("Unable to recover from unknown opcode while OFMarker=No," + " closing iSCSI connection.\n"); + ret = -1; break; } diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 959a14c9dd5d..210f6e4830e3 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -956,56 +956,3 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) iscsit_handle_connection_cleanup(conn); } - -/* - * This is the simple function that makes the magic of - * sync and steering happen in the follow paradoxical order: - * - * 0) Receive conn->of_marker (bytes left until next OFMarker) - * bytes into an offload buffer. When we pass the exact number - * of bytes in conn->of_marker, iscsit_dump_data_payload() and hence - * rx_data() will automatically receive the identical u32 marker - * values and store it in conn->of_marker_offset; - * 1) Now conn->of_marker_offset will contain the offset to the start - * of the next iSCSI PDU. Dump these remaining bytes into another - * offload buffer. - * 2) We are done! - * Next byte in the TCP stream will contain the next iSCSI PDU! - * Cool Huh?! - */ -int iscsit_recover_from_unknown_opcode(struct iscsi_conn *conn) -{ - /* - * Make sure the remaining bytes to next maker is a sane value. - */ - if (conn->of_marker > (conn->conn_ops->OFMarkInt * 4)) { - pr_err("Remaining bytes to OFMarker: %u exceeds" - " OFMarkInt bytes: %u.\n", conn->of_marker, - conn->conn_ops->OFMarkInt * 4); - return -1; - } - - pr_debug("Advancing %u bytes in TCP stream to get to the" - " next OFMarker.\n", conn->of_marker); - - if (iscsit_dump_data_payload(conn, conn->of_marker, 0) < 0) - return -1; - - /* - * Make sure the offset marker we retrived is a valid value. - */ - if (conn->of_marker_offset > (ISCSI_HDR_LEN + (ISCSI_CRC_LEN * 2) + - conn->conn_ops->MaxRecvDataSegmentLength)) { - pr_err("OfMarker offset value: %u exceeds limit.\n", - conn->of_marker_offset); - return -1; - } - - pr_debug("Discarding %u bytes of TCP stream to get to the" - " next iSCSI Opcode.\n", conn->of_marker_offset); - - if (iscsit_dump_data_payload(conn, conn->of_marker_offset, 0) < 0) - return -1; - - return 0; -} diff --git a/drivers/target/iscsi/iscsi_target_erl0.h b/drivers/target/iscsi/iscsi_target_erl0.h index 21acc9a06376..a9e2f9497fb2 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.h +++ b/drivers/target/iscsi/iscsi_target_erl0.h @@ -10,6 +10,5 @@ extern void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *); extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int); extern void iscsit_fall_back_to_erl0(struct iscsi_session *); extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *); -extern int iscsit_recover_from_unknown_opcode(struct iscsi_conn *); #endif /*** ISCSI_TARGET_ERL0_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 70d799dfab03..3d0fe4ff5590 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -410,8 +410,6 @@ static int iscsi_login_zero_tsih_s2( if (iscsi_change_param_sprintf(conn, "ErrorRecoveryLevel=%d", na->default_erl)) return -1; - if (iscsi_login_disable_FIM_keys(conn->param_list, conn) < 0) - return -1; /* * Set RDMAExtensions=Yes by default for iSER enabled network portals */ @@ -477,59 +475,6 @@ check_prot: return 0; } -/* - * Remove PSTATE_NEGOTIATE for the four FIM related keys. - * The Initiator node will be able to enable FIM by proposing them itself. - */ -int iscsi_login_disable_FIM_keys( - struct iscsi_param_list *param_list, - struct iscsi_conn *conn) -{ - struct iscsi_param *param; - - param = iscsi_find_param_from_key("OFMarker", param_list); - if (!param) { - pr_err("iscsi_find_param_from_key() for" - " OFMarker failed\n"); - iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, - ISCSI_LOGIN_STATUS_NO_RESOURCES); - return -1; - } - param->state &= ~PSTATE_NEGOTIATE; - - param = iscsi_find_param_from_key("OFMarkInt", param_list); - if (!param) { - pr_err("iscsi_find_param_from_key() for" - " IFMarker failed\n"); - iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, - ISCSI_LOGIN_STATUS_NO_RESOURCES); - return -1; - } - param->state &= ~PSTATE_NEGOTIATE; - - param = iscsi_find_param_from_key("IFMarker", param_list); - if (!param) { - pr_err("iscsi_find_param_from_key() for" - " IFMarker failed\n"); - iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, - ISCSI_LOGIN_STATUS_NO_RESOURCES); - return -1; - } - param->state &= ~PSTATE_NEGOTIATE; - - param = iscsi_find_param_from_key("IFMarkInt", param_list); - if (!param) { - pr_err("iscsi_find_param_from_key() for" - " IFMarker failed\n"); - iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, - ISCSI_LOGIN_STATUS_NO_RESOURCES); - return -1; - } - param->state &= ~PSTATE_NEGOTIATE; - - return 0; -} - static int iscsi_login_non_zero_tsih_s1( struct iscsi_conn *conn, unsigned char *buf) @@ -616,7 +561,7 @@ static int iscsi_login_non_zero_tsih_s2( if (iscsi_change_param_sprintf(conn, "TargetPortalGroupTag=%hu", sess->tpg->tpgt)) return -1; - return iscsi_login_disable_FIM_keys(conn->param_list, conn); + return 0; } int iscsi_login_post_auth_non_zero_tsih( @@ -765,7 +710,6 @@ int iscsi_post_login_handler( conn->conn_state = TARG_CONN_STATE_LOGGED_IN; iscsi_set_connection_parameters(conn->conn_ops, conn->param_list); - iscsit_set_sync_and_steering_values(conn); /* * SCSI Initiator -> SCSI Target Port Mapping */ diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h index 29d098324b7f..1c7358081533 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -16,6 +16,5 @@ extern int iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); extern void iscsi_target_login_sess_out(struct iscsi_conn *, struct iscsi_np *, bool, bool); extern int iscsi_target_login_thread(void *); -extern int iscsi_login_disable_FIM_keys(struct iscsi_param_list *, struct iscsi_conn *); #endif /*** ISCSI_TARGET_LOGIN_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index d4f9e9645697..be336df0a887 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -34,13 +34,6 @@ int iscsi_login_rx_data( iov.iov_len = length; iov.iov_base = buf; - /* - * Initial Marker-less Interval. - * Add the values regardless of IFMarker/OFMarker, considering - * it may not be negoitated yet. - */ - conn->of_marker += length; - rx_got = rx_data(conn, &iov, 1, length); if (rx_got != length) { pr_err("rx_data returned %d, expecting %d.\n", @@ -72,13 +65,6 @@ int iscsi_login_tx_data( iov_cnt++; } - /* - * Initial Marker-less Interval. - * Add the values regardless of IFMarker/OFMarker, considering - * it may not be negoitated yet. - */ - conn->if_marker += length; - tx_sent = tx_data(conn, &iov[0], iov_cnt, length); if (tx_sent != length) { pr_err("tx_data returned %d, expecting %d.\n", @@ -97,12 +83,6 @@ void iscsi_dump_conn_ops(struct iscsi_conn_ops *conn_ops) "CRC32C" : "None"); pr_debug("MaxRecvDataSegmentLength: %u\n", conn_ops->MaxRecvDataSegmentLength); - pr_debug("OFMarker: %s\n", (conn_ops->OFMarker) ? "Yes" : "No"); - pr_debug("IFMarker: %s\n", (conn_ops->IFMarker) ? "Yes" : "No"); - if (conn_ops->OFMarker) - pr_debug("OFMarkInt: %u\n", conn_ops->OFMarkInt); - if (conn_ops->IFMarker) - pr_debug("IFMarkInt: %u\n", conn_ops->IFMarkInt); } void iscsi_dump_sess_ops(struct iscsi_sess_ops *sess_ops) @@ -194,10 +174,6 @@ static struct iscsi_param *iscsi_set_default_param(struct iscsi_param_list *para case TYPERANGE_DIGEST: param->type = TYPE_VALUE_LIST | TYPE_STRING; break; - case TYPERANGE_MARKINT: - param->type = TYPE_NUMBER_RANGE; - param->type_range |= TYPERANGE_1_TO_65535; - break; case TYPERANGE_ISCSINAME: case TYPERANGE_SESSIONTYPE: case TYPERANGE_TARGETADDRESS: @@ -422,13 +398,13 @@ int iscsi_create_default_params(struct iscsi_param_list **param_list_ptr) param = iscsi_set_default_param(pl, IFMARKINT, INITIAL_IFMARKINT, PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, - TYPERANGE_MARKINT, USE_INITIAL_ONLY); + TYPERANGE_UTF8, USE_INITIAL_ONLY); if (!param) goto out; param = iscsi_set_default_param(pl, OFMARKINT, INITIAL_OFMARKINT, PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, - TYPERANGE_MARKINT, USE_INITIAL_ONLY); + TYPERANGE_UTF8, USE_INITIAL_ONLY); if (!param) goto out; /* @@ -524,9 +500,9 @@ int iscsi_set_keys_to_negotiate( } else if (!strcmp(param->name, OFMARKER)) { SET_PSTATE_NEGOTIATE(param); } else if (!strcmp(param->name, IFMARKINT)) { - SET_PSTATE_NEGOTIATE(param); + SET_PSTATE_REJECT(param); } else if (!strcmp(param->name, OFMARKINT)) { - SET_PSTATE_NEGOTIATE(param); + SET_PSTATE_REJECT(param); } else if (!strcmp(param->name, RDMAEXTENSIONS)) { if (iser) SET_PSTATE_NEGOTIATE(param); @@ -906,91 +882,6 @@ static int iscsi_check_numerical_value(struct iscsi_param *param, char *value_pt return 0; } -static int iscsi_check_numerical_range_value(struct iscsi_param *param, char *value) -{ - char *left_val_ptr = NULL, *right_val_ptr = NULL; - char *tilde_ptr = NULL; - u32 left_val, right_val, local_left_val; - - if (strcmp(param->name, IFMARKINT) && - strcmp(param->name, OFMARKINT)) { - pr_err("Only parameters \"%s\" or \"%s\" may contain a" - " numerical range value.\n", IFMARKINT, OFMARKINT); - return -1; - } - - if (IS_PSTATE_PROPOSER(param)) - return 0; - - tilde_ptr = strchr(value, '~'); - if (!tilde_ptr) { - pr_err("Unable to locate numerical range indicator" - " \"~\" for \"%s\".\n", param->name); - return -1; - } - *tilde_ptr = '\0'; - - left_val_ptr = value; - right_val_ptr = value + strlen(left_val_ptr) + 1; - - if (iscsi_check_numerical_value(param, left_val_ptr) < 0) - return -1; - if (iscsi_check_numerical_value(param, right_val_ptr) < 0) - return -1; - - left_val = simple_strtoul(left_val_ptr, NULL, 0); - right_val = simple_strtoul(right_val_ptr, NULL, 0); - *tilde_ptr = '~'; - - if (right_val < left_val) { - pr_err("Numerical range for parameter \"%s\" contains" - " a right value which is less than the left.\n", - param->name); - return -1; - } - - /* - * For now, enforce reasonable defaults for [I,O]FMarkInt. - */ - tilde_ptr = strchr(param->value, '~'); - if (!tilde_ptr) { - pr_err("Unable to locate numerical range indicator" - " \"~\" for \"%s\".\n", param->name); - return -1; - } - *tilde_ptr = '\0'; - - left_val_ptr = param->value; - right_val_ptr = param->value + strlen(left_val_ptr) + 1; - - local_left_val = simple_strtoul(left_val_ptr, NULL, 0); - *tilde_ptr = '~'; - - if (param->set_param) { - if ((left_val < local_left_val) || - (right_val < local_left_val)) { - pr_err("Passed value range \"%u~%u\" is below" - " minimum left value \"%u\" for key \"%s\"," - " rejecting.\n", left_val, right_val, - local_left_val, param->name); - return -1; - } - } else { - if ((left_val < local_left_val) && - (right_val < local_left_val)) { - pr_err("Received value range \"%u~%u\" is" - " below minimum left value \"%u\" for key" - " \"%s\", rejecting.\n", left_val, right_val, - local_left_val, param->name); - SET_PSTATE_REJECT(param); - if (iscsi_update_param_value(param, REJECT) < 0) - return -1; - } - } - - return 0; -} - static int iscsi_check_string_or_list_value(struct iscsi_param *param, char *value) { if (IS_PSTATE_PROPOSER(param)) @@ -1027,33 +918,6 @@ static int iscsi_check_string_or_list_value(struct iscsi_param *param, char *val return 0; } -/* - * This function is used to pick a value range number, currently just - * returns the lesser of both right values. - */ -static char *iscsi_get_value_from_number_range( - struct iscsi_param *param, - char *value) -{ - char *end_ptr, *tilde_ptr1 = NULL, *tilde_ptr2 = NULL; - u32 acceptor_right_value, proposer_right_value; - - tilde_ptr1 = strchr(value, '~'); - if (!tilde_ptr1) - return NULL; - *tilde_ptr1++ = '\0'; - proposer_right_value = simple_strtoul(tilde_ptr1, &end_ptr, 0); - - tilde_ptr2 = strchr(param->value, '~'); - if (!tilde_ptr2) - return NULL; - *tilde_ptr2++ = '\0'; - acceptor_right_value = simple_strtoul(tilde_ptr2, &end_ptr, 0); - - return (acceptor_right_value >= proposer_right_value) ? - tilde_ptr1 : tilde_ptr2; -} - static char *iscsi_check_valuelist_for_support( struct iscsi_param *param, char *value) @@ -1203,14 +1067,6 @@ static int iscsi_check_acceptor_state(struct iscsi_param *param, char *value, pr_debug("Updated %s to target MXDSL value: %s\n", param->name, param->value); } - - } else if (IS_TYPE_NUMBER_RANGE(param)) { - negoitated_value = iscsi_get_value_from_number_range( - param, value); - if (!negoitated_value) - return -1; - if (iscsi_update_param_value(param, negoitated_value) < 0) - return -1; } else if (IS_TYPE_VALUE_LIST(param)) { negoitated_value = iscsi_check_valuelist_for_support( param, value); @@ -1239,47 +1095,7 @@ static int iscsi_check_proposer_state(struct iscsi_param *param, char *value) return -1; } - if (IS_TYPE_NUMBER_RANGE(param)) { - u32 left_val = 0, right_val = 0, recieved_value = 0; - char *left_val_ptr = NULL, *right_val_ptr = NULL; - char *tilde_ptr = NULL; - - if (!strcmp(value, IRRELEVANT) || !strcmp(value, REJECT)) { - if (iscsi_update_param_value(param, value) < 0) - return -1; - return 0; - } - - tilde_ptr = strchr(value, '~'); - if (tilde_ptr) { - pr_err("Illegal \"~\" in response for \"%s\".\n", - param->name); - return -1; - } - tilde_ptr = strchr(param->value, '~'); - if (!tilde_ptr) { - pr_err("Unable to locate numerical range" - " indicator \"~\" for \"%s\".\n", param->name); - return -1; - } - *tilde_ptr = '\0'; - - left_val_ptr = param->value; - right_val_ptr = param->value + strlen(left_val_ptr) + 1; - left_val = simple_strtoul(left_val_ptr, NULL, 0); - right_val = simple_strtoul(right_val_ptr, NULL, 0); - recieved_value = simple_strtoul(value, NULL, 0); - - *tilde_ptr = '~'; - - if ((recieved_value < left_val) || - (recieved_value > right_val)) { - pr_err("Illegal response \"%s=%u\", value must" - " be between %u and %u.\n", param->name, - recieved_value, left_val, right_val); - return -1; - } - } else if (IS_TYPE_VALUE_LIST(param)) { + if (IS_TYPE_VALUE_LIST(param)) { char *comma_ptr = NULL, *tmp_ptr = NULL; comma_ptr = strchr(value, ','); @@ -1361,9 +1177,6 @@ static int iscsi_check_value(struct iscsi_param *param, char *value) } else if (IS_TYPE_NUMBER(param)) { if (iscsi_check_numerical_value(param, value) < 0) return -1; - } else if (IS_TYPE_NUMBER_RANGE(param)) { - if (iscsi_check_numerical_range_value(param, value) < 0) - return -1; } else if (IS_TYPE_STRING(param) || IS_TYPE_VALUE_LIST(param)) { if (iscsi_check_string_or_list_value(param, value) < 0) return -1; @@ -1483,8 +1296,6 @@ static int iscsi_enforce_integrity_rules( char *tmpptr; u8 DataSequenceInOrder = 0; u8 ErrorRecoveryLevel = 0, SessionType = 0; - u8 IFMarker = 0, OFMarker = 0; - u8 IFMarkInt_Reject = 1, OFMarkInt_Reject = 1; u32 FirstBurstLength = 0, MaxBurstLength = 0; struct iscsi_param *param = NULL; @@ -1503,28 +1314,12 @@ static int iscsi_enforce_integrity_rules( if (!strcmp(param->name, MAXBURSTLENGTH)) MaxBurstLength = simple_strtoul(param->value, &tmpptr, 0); - if (!strcmp(param->name, IFMARKER)) - if (!strcmp(param->value, YES)) - IFMarker = 1; - if (!strcmp(param->name, OFMARKER)) - if (!strcmp(param->value, YES)) - OFMarker = 1; - if (!strcmp(param->name, IFMARKINT)) - if (!strcmp(param->value, REJECT)) - IFMarkInt_Reject = 1; - if (!strcmp(param->name, OFMARKINT)) - if (!strcmp(param->value, REJECT)) - OFMarkInt_Reject = 1; } list_for_each_entry(param, ¶m_list->param_list, p_list) { if (!(param->phase & phase)) continue; - if (!SessionType && (!IS_PSTATE_ACCEPTOR(param) && - (strcmp(param->name, IFMARKER) && - strcmp(param->name, OFMARKER) && - strcmp(param->name, IFMARKINT) && - strcmp(param->name, OFMARKINT)))) + if (!SessionType && !IS_PSTATE_ACCEPTOR(param)) continue; if (!strcmp(param->name, MAXOUTSTANDINGR2T) && DataSequenceInOrder && (ErrorRecoveryLevel > 0)) { @@ -1556,38 +1351,6 @@ static int iscsi_enforce_integrity_rules( param->name, param->value); } } - if (!strcmp(param->name, IFMARKER) && IFMarkInt_Reject) { - if (iscsi_update_param_value(param, NO) < 0) - return -1; - IFMarker = 0; - pr_debug("Reset \"%s\" to \"%s\".\n", - param->name, param->value); - } - if (!strcmp(param->name, OFMARKER) && OFMarkInt_Reject) { - if (iscsi_update_param_value(param, NO) < 0) - return -1; - OFMarker = 0; - pr_debug("Reset \"%s\" to \"%s\".\n", - param->name, param->value); - } - if (!strcmp(param->name, IFMARKINT) && !IFMarker) { - if (!strcmp(param->value, REJECT)) - continue; - param->state &= ~PSTATE_NEGOTIATE; - if (iscsi_update_param_value(param, IRRELEVANT) < 0) - return -1; - pr_debug("Reset \"%s\" to \"%s\".\n", - param->name, param->value); - } - if (!strcmp(param->name, OFMARKINT) && !OFMarker) { - if (!strcmp(param->value, REJECT)) - continue; - param->state &= ~PSTATE_NEGOTIATE; - if (iscsi_update_param_value(param, IRRELEVANT) < 0) - return -1; - pr_debug("Reset \"%s\" to \"%s\".\n", - param->name, param->value); - } } return 0; @@ -1824,24 +1587,6 @@ void iscsi_set_connection_parameters( */ pr_debug("MaxRecvDataSegmentLength: %u\n", ops->MaxRecvDataSegmentLength); - } else if (!strcmp(param->name, OFMARKER)) { - ops->OFMarker = !strcmp(param->value, YES); - pr_debug("OFMarker: %s\n", - param->value); - } else if (!strcmp(param->name, IFMARKER)) { - ops->IFMarker = !strcmp(param->value, YES); - pr_debug("IFMarker: %s\n", - param->value); - } else if (!strcmp(param->name, OFMARKINT)) { - ops->OFMarkInt = - simple_strtoul(param->value, &tmpptr, 0); - pr_debug("OFMarkInt: %s\n", - param->value); - } else if (!strcmp(param->name, IFMARKINT)) { - ops->IFMarkInt = - simple_strtoul(param->value, &tmpptr, 0); - pr_debug("IFMarkInt: %s\n", - param->value); } else if (!strcmp(param->name, INITIATORRECVDATASEGMENTLENGTH)) { ops->InitiatorRecvDataSegmentLength = simple_strtoul(param->value, &tmpptr, 0); diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h index a47046a752aa..a0751e3f0813 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.h +++ b/drivers/target/iscsi/iscsi_target_parameters.h @@ -138,8 +138,8 @@ extern void iscsi_set_session_parameters(struct iscsi_sess_ops *, #define INITIAL_SESSIONTYPE NORMAL #define INITIAL_IFMARKER NO #define INITIAL_OFMARKER NO -#define INITIAL_IFMARKINT "2048~65535" -#define INITIAL_OFMARKINT "2048~65535" +#define INITIAL_IFMARKINT REJECT +#define INITIAL_OFMARKINT REJECT /* * Initial values for iSER parameters following RFC-5046 Section 6 @@ -239,10 +239,9 @@ extern void iscsi_set_session_parameters(struct iscsi_sess_ops *, #define TYPERANGE_AUTH 0x0200 #define TYPERANGE_DIGEST 0x0400 #define TYPERANGE_ISCSINAME 0x0800 -#define TYPERANGE_MARKINT 0x1000 -#define TYPERANGE_SESSIONTYPE 0x2000 -#define TYPERANGE_TARGETADDRESS 0x4000 -#define TYPERANGE_UTF8 0x8000 +#define TYPERANGE_SESSIONTYPE 0x1000 +#define TYPERANGE_TARGETADDRESS 0x2000 +#define TYPERANGE_UTF8 0x4000 #define IS_TYPERANGE_0_TO_2(p) ((p)->type_range & TYPERANGE_0_TO_2) #define IS_TYPERANGE_0_TO_3600(p) ((p)->type_range & TYPERANGE_0_TO_3600) diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index b18edda3e8af..37d23e5e08dc 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -809,54 +809,6 @@ void iscsit_inc_session_usage_count(struct iscsi_session *sess) spin_unlock_bh(&sess->session_usage_lock); } -/* - * Setup conn->if_marker and conn->of_marker values based upon - * the initial marker-less interval. (see iSCSI v19 A.2) - */ -int iscsit_set_sync_and_steering_values(struct iscsi_conn *conn) -{ - int login_ifmarker_count = 0, login_ofmarker_count = 0, next_marker = 0; - /* - * IFMarkInt and OFMarkInt are negotiated as 32-bit words. - */ - u32 IFMarkInt = (conn->conn_ops->IFMarkInt * 4); - u32 OFMarkInt = (conn->conn_ops->OFMarkInt * 4); - - if (conn->conn_ops->OFMarker) { - /* - * Account for the first Login Command received not - * via iscsi_recv_msg(). - */ - conn->of_marker += ISCSI_HDR_LEN; - if (conn->of_marker <= OFMarkInt) { - conn->of_marker = (OFMarkInt - conn->of_marker); - } else { - login_ofmarker_count = (conn->of_marker / OFMarkInt); - next_marker = (OFMarkInt * (login_ofmarker_count + 1)) + - (login_ofmarker_count * MARKER_SIZE); - conn->of_marker = (next_marker - conn->of_marker); - } - conn->of_marker_offset = 0; - pr_debug("Setting OFMarker value to %u based on Initial" - " Markerless Interval.\n", conn->of_marker); - } - - if (conn->conn_ops->IFMarker) { - if (conn->if_marker <= IFMarkInt) { - conn->if_marker = (IFMarkInt - conn->if_marker); - } else { - login_ifmarker_count = (conn->if_marker / IFMarkInt); - next_marker = (IFMarkInt * (login_ifmarker_count + 1)) + - (login_ifmarker_count * MARKER_SIZE); - conn->if_marker = (next_marker - conn->if_marker); - } - pr_debug("Setting IFMarker value to %u based on Initial" - " Markerless Interval.\n", conn->if_marker); - } - - return 0; -} - struct iscsi_conn *iscsit_get_conn_from_cid(struct iscsi_session *sess, u16 cid) { struct iscsi_conn *conn; diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h index 1ab754a671ff..995f1cb29d0e 100644 --- a/drivers/target/iscsi/iscsi_target_util.h +++ b/drivers/target/iscsi/iscsi_target_util.h @@ -34,7 +34,6 @@ extern void iscsit_free_cmd(struct iscsi_cmd *, bool); extern int iscsit_check_session_usage_count(struct iscsi_session *); extern void iscsit_dec_session_usage_count(struct iscsi_session *); extern void iscsit_inc_session_usage_count(struct iscsi_session *); -extern int iscsit_set_sync_and_steering_values(struct iscsi_conn *); extern struct iscsi_conn *iscsit_get_conn_from_cid(struct iscsi_session *, u16); extern struct iscsi_conn *iscsit_get_conn_from_cid_rcfr(struct iscsi_session *, u16); extern void iscsit_check_conn_usage_count(struct iscsi_conn *); diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 54e7af301888..39f3d181d39d 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -248,10 +248,6 @@ struct iscsi_conn_ops { u8 DataDigest; /* [0,1] == [None,CRC32C] */ u32 MaxRecvDataSegmentLength; /* [512..2**24-1] */ u32 MaxXmitDataSegmentLength; /* [512..2**24-1] */ - u8 OFMarker; /* [0,1] == [No,Yes] */ - u8 IFMarker; /* [0,1] == [No,Yes] */ - u32 OFMarkInt; /* [1..65535] */ - u32 IFMarkInt; /* [1..65535] */ /* * iSER specific connection parameters */ @@ -532,12 +528,6 @@ struct iscsi_conn { u32 exp_statsn; /* Per connection status sequence number */ u32 stat_sn; - /* IFMarkInt's Current Value */ - u32 if_marker; - /* OFMarkInt's Current Value */ - u32 of_marker; - /* Used for calculating OFMarker offset to next PDU */ - u32 of_marker_offset; #define IPV6_ADDRESS_SPACE 48 unsigned char login_ip[IPV6_ADDRESS_SPACE]; unsigned char local_ip[IPV6_ADDRESS_SPACE]; From 814e5b45182f4aaf6c0b0deac7104bc2cba5109e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Apr 2015 15:00:30 +0200 Subject: [PATCH 035/839] target: fix DPO and FUA bit checks Drivers may override the WCE flag, in which case the DPOFUA flag in MODE SENSE might differ from the check used to reject invalid FUA bits in sbc_check_dpofua. Also now that we reject invalid FUA bits early there is no need to duplicate the same buggy check down in the fileio code. As the DPOFUA flag controls th support for FUA bits on read and write commands as well as DPO key off all the checks off a single helper, and deprecate the emulate_dpo and emulate_fua_read attributs. This fixes various failures in the libiscsi testsuite. Personally I'd prefer to also remove the emulate_fua_write attribute as there is no good reason to disable it, but I'll leave that for a separate discussion. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 30 ++++++-------------------- drivers/target/target_core_file.c | 4 +--- drivers/target/target_core_internal.h | 2 ++ drivers/target/target_core_sbc.c | 5 +++-- drivers/target/target_core_spc.c | 12 +++++++---- drivers/target/target_core_transport.c | 19 ++++++++++++++++ include/target/target_core_base.h | 6 ------ 7 files changed, 40 insertions(+), 38 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index ce5f768181ff..9ee194d1cd4b 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -760,16 +760,8 @@ EXPORT_SYMBOL(se_dev_set_emulate_model_alias); int se_dev_set_emulate_dpo(struct se_device *dev, int flag) { - if (flag != 0 && flag != 1) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - - if (flag) { - pr_err("dpo_emulated not supported\n"); - return -EINVAL; - } - + printk_once(KERN_WARNING + "ignoring deprecated emulate_dpo attribute\n"); return 0; } EXPORT_SYMBOL(se_dev_set_emulate_dpo); @@ -799,16 +791,8 @@ EXPORT_SYMBOL(se_dev_set_emulate_fua_write); int se_dev_set_emulate_fua_read(struct se_device *dev, int flag) { - if (flag != 0 && flag != 1) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - - if (flag) { - pr_err("ua read emulated not supported\n"); - return -EINVAL; - } - + printk_once(KERN_WARNING + "ignoring deprecated emulate_fua_read attribute\n"); return 0; } EXPORT_SYMBOL(se_dev_set_emulate_fua_read); @@ -1513,9 +1497,9 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->dev_attrib.da_dev = dev; dev->dev_attrib.emulate_model_alias = DA_EMULATE_MODEL_ALIAS; - dev->dev_attrib.emulate_dpo = DA_EMULATE_DPO; - dev->dev_attrib.emulate_fua_write = DA_EMULATE_FUA_WRITE; - dev->dev_attrib.emulate_fua_read = DA_EMULATE_FUA_READ; + dev->dev_attrib.emulate_dpo = 1; + dev->dev_attrib.emulate_fua_write = 1; + dev->dev_attrib.emulate_fua_read = 1; dev->dev_attrib.emulate_write_cache = DA_EMULATE_WRITE_CACHE; dev->dev_attrib.emulate_ua_intlck_ctrl = DA_EMULATE_UA_INTLLCK_CTRL; dev->dev_attrib.emulate_tas = DA_EMULATE_TAS; diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index fe6c19c1e001..e865885352da 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -610,9 +610,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, * for SCSI WRITEs with Forced Unit Access (FUA) set. * Allow this to happen independent of WCE=0 setting. */ - if (ret > 0 && - dev->dev_attrib.emulate_fua_write > 0 && - (cmd->se_cmd_flags & SCF_FUA)) { + if (ret > 0 && (cmd->se_cmd_flags & SCF_FUA)) { loff_t start = cmd->t_task_lba * dev->dev_attrib.block_size; loff_t end; diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 68bd7f5d9f73..75338c348be4 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -81,6 +81,8 @@ int transport_clear_lun_ref(struct se_lun *); void transport_send_task_abort(struct se_cmd *); sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size); void target_qf_do_work(struct work_struct *work); +bool target_check_wce(struct se_device *dev); +bool target_check_fua(struct se_device *dev); /* target_core_stat.c */ void target_stat_setup_dev_default_groups(struct se_device *); diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 3df2cd538478..d441975604db 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -738,14 +738,15 @@ static int sbc_check_dpofua(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb) { if (cdb[1] & 0x10) { - if (!dev->dev_attrib.emulate_dpo) { + /* see explanation in spc_emulate_modesense */ + if (!target_check_fua(dev)) { pr_err("Got CDB: 0x%02x with DPO bit set, but device" " does not advertise support for DPO\n", cdb[0]); return -EINVAL; } } if (cdb[1] & 0x8) { - if (!dev->dev_attrib.emulate_fua_write || !se_dev_check_wce(dev)) { + if (!target_check_fua(dev)) { pr_err("Got CDB: 0x%02x with FUA bit set, but device" " does not advertise support for FUA write\n", cdb[0]); diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 7912aa124385..988c158cf65d 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -481,7 +481,7 @@ spc_emulate_evpd_86(struct se_cmd *cmd, unsigned char *buf) buf[5] = 0x07; /* If WriteCache emulation is enabled, set V_SUP */ - if (se_dev_check_wce(dev)) + if (target_check_wce(dev)) buf[6] = 0x01; /* If an LBA map is present set R_SUP */ spin_lock(&cmd->se_dev->t10_alua.lba_map_lock); @@ -888,7 +888,7 @@ static int spc_modesense_caching(struct se_cmd *cmd, u8 pc, u8 *p) if (pc == 1) goto out; - if (se_dev_check_wce(dev)) + if (target_check_wce(dev)) p[2] = 0x04; /* Write Cache Enable */ p[12] = 0x20; /* Disabled Read Ahead */ @@ -1000,8 +1000,12 @@ static sense_reason_t spc_emulate_modesense(struct se_cmd *cmd) (cmd->se_deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY))) spc_modesense_write_protect(&buf[length], type); - if ((se_dev_check_wce(dev)) && - (dev->dev_attrib.emulate_fua_write > 0)) + /* + * SBC only allows us to enable FUA and DPO together. Fortunately + * DPO is explicitly specified as a hint, so a noop is a perfectly + * valid implementation. + */ + if (target_check_fua(dev)) spc_modesense_dpofua(&buf[length], type); ++length; diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 105f4fd0fad4..231812d61357 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3075,3 +3075,22 @@ int transport_generic_handle_tmr( return 0; } EXPORT_SYMBOL(transport_generic_handle_tmr); + +bool +target_check_wce(struct se_device *dev) +{ + bool wce = false; + + if (dev->transport->get_write_cache) + wce = dev->transport->get_write_cache(dev); + else if (dev->dev_attrib.emulate_write_cache > 0) + wce = true; + + return wce; +} + +bool +target_check_fua(struct se_device *dev) +{ + return target_check_wce(dev) && dev->dev_attrib.emulate_fua_write > 0; +} diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 480e9f82dfea..7f4c7de3a4ce 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -79,12 +79,6 @@ #define DA_MAX_WRITE_SAME_LEN 0 /* Use a model alias based on the configfs backend device name */ #define DA_EMULATE_MODEL_ALIAS 0 -/* Emulation for Direct Page Out */ -#define DA_EMULATE_DPO 0 -/* Emulation for Forced Unit Access WRITEs */ -#define DA_EMULATE_FUA_WRITE 1 -/* Emulation for Forced Unit Access READs */ -#define DA_EMULATE_FUA_READ 0 /* Emulation for WriteCache and SYNCHRONIZE_CACHE */ #define DA_EMULATE_WRITE_CACHE 0 /* Emulation for UNIT ATTENTION Interlock Control */ From 2bc396a2529ae8a2287f17a49d893ce790e19110 Mon Sep 17 00:00:00 2001 From: Ilias Tsitsimpis Date: Thu, 23 Apr 2015 21:30:05 +0300 Subject: [PATCH 036/839] driver/user: Don't warn for DMA_NONE data direction Some SCSI commands (for example the TEST UNIT READY command) do not carry data and so data_direction is DMA_NONE. Patch TCMU to not print a warning message about unknown data direction, when it is DMA_NONE. Signed-off-by: Ilias Tsitsimpis Signed-off-by: Vangelis Koukis Reviewed-by: Andy Grover Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 07d2996d8c1f..50387c1d5e38 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -518,8 +518,9 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * } else if (se_cmd->data_direction == DMA_TO_DEVICE) { UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size); - } else { - pr_warn("TCMU: data direction was %d!\n", se_cmd->data_direction); + } else if (se_cmd->data_direction != DMA_NONE) { + pr_warn("TCMU: data direction was %d!\n", + se_cmd->data_direction); } target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status); From 9736f4ad7c6dd79eea36c7fb537a27bbd802582a Mon Sep 17 00:00:00 2001 From: Ilias Tsitsimpis Date: Thu, 23 Apr 2015 21:30:06 +0300 Subject: [PATCH 037/839] target/loop: Enable VARLEN CDB support Field shost->max_cmd_len is used to inform Linux / the SCSI midlayer of the maximum CDB size an LLD is capable of handling. Set this field to SCSI_MAX_VARLEN_CDB_SIZE for target, to enable support for variable-sized CDBs (0x7E). Also remove the definition of TL_SCSI_MAX_CMD_LEN since it is now redundant. Signed-off-by: Ilias Tsitsimpis Signed-off-by: Vangelis Koukis Reviewed-by: Andy Grover Signed-off-by: Nicholas Bellinger --- drivers/target/loopback/tcm_loop.c | 2 +- drivers/target/loopback/tcm_loop.h | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 51f0c895c6a5..b863ce894977 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -409,7 +409,7 @@ static int tcm_loop_driver_probe(struct device *dev) sh->max_id = 2; sh->max_lun = 0; sh->max_channel = 0; - sh->max_cmd_len = TL_SCSI_MAX_CMD_LEN; + sh->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE; host_prot = SHOST_DIF_TYPE1_PROTECTION | SHOST_DIF_TYPE2_PROTECTION | SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE1_PROTECTION | diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h index 1e72ff77cac9..433e595e5daa 100644 --- a/drivers/target/loopback/tcm_loop.h +++ b/drivers/target/loopback/tcm_loop.h @@ -2,11 +2,6 @@ #define TL_WWN_ADDR_LEN 256 #define TL_TPGS_PER_HBA 32 -/* - * Used in tcm_loop_driver_probe() for struct Scsi_Host->max_cmd_len - */ -#define TL_SCSI_MAX_CMD_LEN 32 - struct tcm_loop_cmd { /* State of Linux/SCSI CDB+Data descriptor */ u32 sc_cmd_state; From b32bd0a8ee6a19f9b70aa4fcb9bfc802b105ce05 Mon Sep 17 00:00:00 2001 From: Ilias Tsitsimpis Date: Thu, 23 Apr 2015 21:30:07 +0300 Subject: [PATCH 038/839] target/transport: Always initialize bidi fields in se_cmd Fields t_bidi_data_sg and t_bidi_data_nents are set only in the presence of BIDI commands. This means that the underlying code (for example TCMU) cannot inspect them when the SCSI command is not a BIDI one. Ensure the code always initializes these fields with the given values, even when the SCSI command is not a BIDI one. Set t_bidi_data_sg to sgl_bidi (which should be NULL for non-BIDI commands) and t_bidi_data_nents to sgl_bidi_count (which should be 0 for non-BIDI commands). This allows the underlying code to use these fields unconditionally. Signed-off-by: Ilias Tsitsimpis Signed-off-by: Vangelis Koukis Reviewed-by: Andy Grover Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 231812d61357..078d32102cce 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1353,11 +1353,9 @@ transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl, cmd->t_data_sg = sgl; cmd->t_data_nents = sgl_count; + cmd->t_bidi_data_sg = sgl_bidi; + cmd->t_bidi_data_nents = sgl_bidi_count; - if (sgl_bidi && sgl_bidi_count) { - cmd->t_bidi_data_sg = sgl_bidi; - cmd->t_bidi_data_nents = sgl_bidi_count; - } cmd->se_cmd_flags |= SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; return 0; } From f97ec7db1606875666366bfcba8476f8c917db96 Mon Sep 17 00:00:00 2001 From: Ilias Tsitsimpis Date: Thu, 23 Apr 2015 21:47:00 +0300 Subject: [PATCH 039/839] target/user: Refactor data area allocation code Introduce alloc_and_scatter_data_area()/gather_and_free_data_area() functions that allocate/deallocate space from the data area and copy data to/from a given scatter-gather list. These functions are needed so the next patch, introducing support for bidirectional commands in TCMU, can use the same code path both for t_data_sg and for t_bidi_data_sg. Signed-off-by: Ilias Tsitsimpis Signed-off-by: Vangelis Koukis Reviewed-by: Andy Grover Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 189 +++++++++++++++++------------- 1 file changed, 106 insertions(+), 83 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 50387c1d5e38..2b0d26e268be 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1,6 +1,7 @@ /* * Copyright (C) 2013 Shaohua Li * Copyright (C) 2014 Red Hat, Inc. + * Copyright (C) 2015 Arrikto, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -224,9 +225,104 @@ static inline size_t head_to_end(size_t head, size_t size) #define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size) +static void alloc_and_scatter_data_area(struct tcmu_dev *udev, + struct scatterlist *data_sg, unsigned int data_nents, + struct iovec **iov, int *iov_cnt, bool copy_data) +{ + int i; + void *from, *to; + size_t copy_bytes; + struct scatterlist *sg; + + for_each_sg(data_sg, sg, data_nents, i) { + copy_bytes = min_t(size_t, sg->length, + head_to_end(udev->data_head, udev->data_size)); + from = kmap_atomic(sg_page(sg)) + sg->offset; + to = (void *) udev->mb_addr + udev->data_off + udev->data_head; + + if (copy_data) { + memcpy(to, from, copy_bytes); + tcmu_flush_dcache_range(to, copy_bytes); + } + + /* Even iov_base is relative to mb_addr */ + (*iov)->iov_len = copy_bytes; + (*iov)->iov_base = (void __user *) udev->data_off + + udev->data_head; + (*iov_cnt)++; + (*iov)++; + + UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size); + + /* Uh oh, we wrapped the buffer. Must split sg across 2 iovs. */ + if (sg->length != copy_bytes) { + from += copy_bytes; + copy_bytes = sg->length - copy_bytes; + + (*iov)->iov_len = copy_bytes; + (*iov)->iov_base = (void __user *) udev->data_off + + udev->data_head; + + if (copy_data) { + to = (void *) udev->mb_addr + + udev->data_off + udev->data_head; + memcpy(to, from, copy_bytes); + tcmu_flush_dcache_range(to, copy_bytes); + } + + (*iov_cnt)++; + (*iov)++; + + UPDATE_HEAD(udev->data_head, + copy_bytes, udev->data_size); + } + + kunmap_atomic(from); + } +} + +static void gather_and_free_data_area(struct tcmu_dev *udev, + struct scatterlist *data_sg, unsigned int data_nents) +{ + int i; + void *from, *to; + size_t copy_bytes; + struct scatterlist *sg; + + /* It'd be easier to look at entry's iovec again, but UAM */ + for_each_sg(data_sg, sg, data_nents, i) { + copy_bytes = min_t(size_t, sg->length, + head_to_end(udev->data_tail, udev->data_size)); + + to = kmap_atomic(sg_page(sg)) + sg->offset; + WARN_ON(sg->length + sg->offset > PAGE_SIZE); + from = (void *) udev->mb_addr + + udev->data_off + udev->data_tail; + tcmu_flush_dcache_range(from, copy_bytes); + memcpy(to, from, copy_bytes); + + UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size); + + /* Uh oh, wrapped the data buffer for this sg's data */ + if (sg->length != copy_bytes) { + from = (void *) udev->mb_addr + + udev->data_off + udev->data_tail; + WARN_ON(udev->data_tail); + to += copy_bytes; + copy_bytes = sg->length - copy_bytes; + tcmu_flush_dcache_range(from, copy_bytes); + memcpy(to, from, copy_bytes); + + UPDATE_HEAD(udev->data_tail, + copy_bytes, udev->data_size); + } + kunmap_atomic(to); + } +} + /* - * We can't queue a command until we have space available on the cmd ring *and* space - * space avail on the data ring. + * We can't queue a command until we have space available on the cmd ring *and* + * space available on the data ring. * * Called with ring lock held. */ @@ -274,12 +370,11 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) size_t base_command_size, command_size; struct tcmu_mailbox *mb; struct tcmu_cmd_entry *entry; - int i; - struct scatterlist *sg; struct iovec *iov; - int iov_cnt = 0; + int iov_cnt; uint32_t cmd_head; uint64_t cdb_off; + bool copy_to_data_area; if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) return -EINVAL; @@ -360,49 +455,10 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) * Fix up iovecs, and handle if allocation in data ring wrapped. */ iov = &entry->req.iov[0]; - for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) { - size_t copy_bytes = min((size_t)sg->length, - head_to_end(udev->data_head, udev->data_size)); - void *from = kmap_atomic(sg_page(sg)) + sg->offset; - void *to = (void *) mb + udev->data_off + udev->data_head; - - if (tcmu_cmd->se_cmd->data_direction == DMA_TO_DEVICE) { - memcpy(to, from, copy_bytes); - tcmu_flush_dcache_range(to, copy_bytes); - } - - /* Even iov_base is relative to mb_addr */ - iov->iov_len = copy_bytes; - iov->iov_base = (void __user *) udev->data_off + - udev->data_head; - iov_cnt++; - iov++; - - UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size); - - /* Uh oh, we wrapped the buffer. Must split sg across 2 iovs. */ - if (sg->length != copy_bytes) { - from += copy_bytes; - copy_bytes = sg->length - copy_bytes; - - iov->iov_len = copy_bytes; - iov->iov_base = (void __user *) udev->data_off + - udev->data_head; - - if (se_cmd->data_direction == DMA_TO_DEVICE) { - to = (void *) mb + udev->data_off + udev->data_head; - memcpy(to, from, copy_bytes); - tcmu_flush_dcache_range(to, copy_bytes); - } - - iov_cnt++; - iov++; - - UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size); - } - - kunmap_atomic(from); - } + iov_cnt = 0; + copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE); + alloc_and_scatter_data_area(udev, se_cmd->t_data_sg, + se_cmd->t_data_nents, &iov, &iov_cnt, copy_to_data_area); entry->req.iov_cnt = iov_cnt; entry->req.iov_bidi_cnt = 0; entry->req.iov_dif_cnt = 0; @@ -481,41 +537,8 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size); } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { - struct scatterlist *sg; - int i; - - /* It'd be easier to look at entry's iovec again, but UAM */ - for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) { - size_t copy_bytes; - void *to; - void *from; - - copy_bytes = min((size_t)sg->length, - head_to_end(udev->data_tail, udev->data_size)); - - to = kmap_atomic(sg_page(sg)) + sg->offset; - WARN_ON(sg->length + sg->offset > PAGE_SIZE); - from = (void *) udev->mb_addr + udev->data_off + udev->data_tail; - tcmu_flush_dcache_range(from, copy_bytes); - memcpy(to, from, copy_bytes); - - UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size); - - /* Uh oh, wrapped the data buffer for this sg's data */ - if (sg->length != copy_bytes) { - from = (void *) udev->mb_addr + udev->data_off + udev->data_tail; - WARN_ON(udev->data_tail); - to += copy_bytes; - copy_bytes = sg->length - copy_bytes; - tcmu_flush_dcache_range(from, copy_bytes); - memcpy(to, from, copy_bytes); - - UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size); - } - - kunmap_atomic(to); - } - + gather_and_free_data_area(udev, + se_cmd->t_data_sg, se_cmd->t_data_nents); } else if (se_cmd->data_direction == DMA_TO_DEVICE) { UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size); } else if (se_cmd->data_direction != DMA_NONE) { From e4648b014e03baee45d5f5146c1219b19e4e5f2f Mon Sep 17 00:00:00 2001 From: Ilias Tsitsimpis Date: Thu, 23 Apr 2015 21:30:09 +0300 Subject: [PATCH 040/839] target/user: Add support for bidirectional commands Enable TCMU to handle bidirectional SCSI commands. In such cases, entries in iov[] cover both the Data-In and the Data-Out buffers. The first iov_cnt entries correspond to the Data-Out buffer, while the remaining iov_bidi_cnt entries correspond to the Data-In buffer. Signed-off-by: Ilias Tsitsimpis Signed-off-by: Vangelis Koukis Reviewed-by: Andy Grover Signed-off-by: Nicholas Bellinger --- Documentation/target/tcmu-design.txt | 2 +- drivers/target/target_core_user.c | 29 +++++++++++++++++++++++----- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt index 263b907517ac..bef81e42788f 100644 --- a/Documentation/target/tcmu-design.txt +++ b/Documentation/target/tcmu-design.txt @@ -152,7 +152,7 @@ overall shared memory region, not the entry. The data in/out buffers are accessible via tht req.iov[] array. iov_cnt contains the number of entries in iov[] needed to describe either the Data-In or Data-Out buffers. For bidirectional commands, iov_cnt specifies how many iovec -entries cover the Data-Out area, and iov_bidi_count specifies how many +entries cover the Data-Out area, and iov_bidi_cnt specifies how many iovec entries immediately after that in iov[] cover the Data-In area. Just like other fields, iov.iov_base is an offset from the start of the region. diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 2b0d26e268be..60330e00f59d 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -167,6 +167,11 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) tcmu_cmd->tcmu_dev = udev; tcmu_cmd->data_length = se_cmd->data_length; + if (se_cmd->se_cmd_flags & SCF_BIDI) { + BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents)); + tcmu_cmd->data_length += se_cmd->t_bidi_data_sg->length; + } + tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT); idr_preload(GFP_KERNEL); @@ -387,7 +392,8 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) * b/c size == offsetof one-past-element. */ base_command_size = max(offsetof(struct tcmu_cmd_entry, - req.iov[se_cmd->t_data_nents + 2]), + req.iov[se_cmd->t_bidi_data_nents + + se_cmd->t_data_nents + 2]), sizeof(struct tcmu_cmd_entry)); command_size = base_command_size + round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE); @@ -456,13 +462,19 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) */ iov = &entry->req.iov[0]; iov_cnt = 0; - copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE); + copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE + || se_cmd->se_cmd_flags & SCF_BIDI); alloc_and_scatter_data_area(udev, se_cmd->t_data_sg, se_cmd->t_data_nents, &iov, &iov_cnt, copy_to_data_area); entry->req.iov_cnt = iov_cnt; - entry->req.iov_bidi_cnt = 0; entry->req.iov_dif_cnt = 0; + /* Handle BIDI commands */ + iov_cnt = 0; + alloc_and_scatter_data_area(udev, se_cmd->t_bidi_data_sg, + se_cmd->t_bidi_data_nents, &iov, &iov_cnt, false); + entry->req.iov_bidi_cnt = iov_cnt; + /* All offsets relative to mb_addr, not start of entry! */ cdb_off = CMDR_OFF + cmd_head + base_command_size; memcpy((void *) mb + cdb_off, se_cmd->t_task_cdb, scsi_command_size(se_cmd->t_task_cdb)); @@ -535,8 +547,15 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * se_cmd->scsi_sense_length); UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size); - } - else if (se_cmd->data_direction == DMA_FROM_DEVICE) { + } else if (se_cmd->se_cmd_flags & SCF_BIDI) { + /* Discard data_out buffer */ + UPDATE_HEAD(udev->data_tail, + (size_t)se_cmd->t_data_sg->length, udev->data_size); + + /* Get Data-In buffer */ + gather_and_free_data_area(udev, + se_cmd->t_bidi_data_sg, se_cmd->t_bidi_data_nents); + } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { gather_and_free_data_area(udev, se_cmd->t_data_sg, se_cmd->t_data_nents); } else if (se_cmd->data_direction == DMA_TO_DEVICE) { From afc16604c06414223478df3e42301ab630b9960a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 27 Apr 2015 13:52:36 +0200 Subject: [PATCH 041/839] target: Remove first argument of target_{get,put}_sess_cmd() The first argument of these two functions is always identical to se_cmd->se_sess. Hence remove the first argument. Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Cc: Andy Grover Cc: Cc: Felipe Balbi Cc: Michael S. Tsirkin Signed-off-by: Nicholas Bellinger --- drivers/infiniband/ulp/isert/ib_isert.c | 6 +++--- drivers/infiniband/ulp/srpt/ib_srpt.c | 10 +++++----- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 2 +- drivers/target/iscsi/iscsi_target.c | 15 +++++++-------- drivers/target/iscsi/iscsi_target_configfs.c | 2 +- drivers/target/iscsi/iscsi_target_util.c | 4 ++-- drivers/target/target_core_tmr.c | 2 +- drivers/target/target_core_transport.c | 20 ++++++++++---------- drivers/vhost/scsi.c | 2 +- include/target/target_core_fabric.h | 4 ++-- 10 files changed, 33 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 3f40319a55da..9e3662cce612 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1332,7 +1332,7 @@ sequence_cmd: if (!rc && dump_payload == false && unsol_data) iscsit_set_unsoliticed_dataout(cmd); else if (dump_payload && imm_data) - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return 0; } @@ -1757,7 +1757,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err) cmd->se_cmd.t_state == TRANSPORT_WRITE_PENDING) { struct se_cmd *se_cmd = &cmd->se_cmd; - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); } } @@ -1930,7 +1930,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, spin_unlock_bh(&cmd->istate_lock); if (ret) { - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); transport_send_check_condition_and_sense(se_cmd, se_cmd->pi_err, 0); } else { diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 9b84b4c0a000..6fbc7bc824d2 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1334,7 +1334,7 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) BUG_ON(ch->sess == NULL); - target_put_sess_cmd(ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); goto out; } @@ -1365,11 +1365,11 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) * not been received in time. */ srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); break; case SRPT_STATE_MGMT_RSP_SENT: srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); break; default: WARN(1, "Unexpected command state (%d)", state); @@ -1679,7 +1679,7 @@ static int srpt_check_stop_free(struct se_cmd *cmd) struct srpt_send_ioctx *ioctx = container_of(cmd, struct srpt_send_ioctx, cmd); - return target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + return target_put_sess_cmd(&ioctx->cmd); } /** @@ -3074,7 +3074,7 @@ static void srpt_queue_response(struct se_cmd *cmd) ioctx->tag); srpt_unmap_sg_to_ib_sge(ch, ioctx); srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); } } diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 5c9e680aa375..8a6c87547303 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -429,7 +429,7 @@ static int tcm_qla2xxx_check_stop_free(struct se_cmd *se_cmd) cmd->cmd_flags |= BIT_14; } - return target_put_sess_cmd(se_cmd->se_sess, se_cmd); + return target_put_sess_cmd(se_cmd); } /* tcm_qla2xxx_release_cmd - Callback from TCM Core to release underlying diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 21620c751071..866c167c0986 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -715,7 +715,7 @@ static int iscsit_add_reject_from_cmd( */ if (cmd->se_cmd.se_tfo != NULL) { pr_debug("iscsi reject: calling target_put_sess_cmd >>>>>>\n"); - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); } return -1; } @@ -1001,7 +1001,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, hdr->cmdsn, be32_to_cpu(hdr->data_length), payload_length, conn->cid); - target_get_sess_cmd(conn->sess->se_sess, &cmd->se_cmd, true); + target_get_sess_cmd(&cmd->se_cmd, true); cmd->sense_reason = transport_lookup_cmd_lun(&cmd->se_cmd, scsilun_to_int(&hdr->lun)); @@ -1067,7 +1067,7 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) return -1; else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return 0; } } @@ -1083,7 +1083,7 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (!cmd->sense_reason) return 0; - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return 0; } @@ -1114,7 +1114,6 @@ static int iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, bool dump_payload) { - struct iscsi_conn *conn = cmd->conn; int cmdsn_ret = 0, immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION; /* * Special case for Unsupported SAM WRITE Opcodes and ImmediateData=Yes. @@ -1141,7 +1140,7 @@ after_immediate_data: rc = iscsit_dump_data_payload(cmd->conn, cmd->first_burst_len, 1); - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return rc; } else if (cmd->unsolicited_data) iscsit_set_unsoliticed_dataout(cmd); @@ -1810,7 +1809,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, conn->sess->se_sess, 0, DMA_NONE, TCM_SIMPLE_TAG, cmd->sense_buffer + 2); - target_get_sess_cmd(conn->sess->se_sess, &cmd->se_cmd, true); + target_get_sess_cmd(&cmd->se_cmd, true); sess_ref = true; switch (function) { @@ -1952,7 +1951,7 @@ attach: */ if (sess_ref) { pr_debug("Handle TMR, using sess_ref=true check\n"); - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); } iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 469fce44ebad..568e478615a5 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1967,7 +1967,7 @@ static void lio_set_default_node_attributes(struct se_node_acl *se_acl) static int lio_check_stop_free(struct se_cmd *se_cmd) { - return target_put_sess_cmd(se_cmd->se_sess, se_cmd); + return target_put_sess_cmd(se_cmd); } static void lio_release_cmd(struct se_cmd *se_cmd) diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 37d23e5e08dc..c1582e88191e 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -746,7 +746,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown); if (!rc && shutdown && se_cmd && se_cmd->se_sess) { __iscsit_free_cmd(cmd, true, shutdown); - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); } break; case ISCSI_OP_REJECT: @@ -762,7 +762,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown); if (!rc && shutdown && se_cmd->se_sess) { __iscsit_free_cmd(cmd, true, shutdown); - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); } break; } diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 315ec3458eeb..b2e169fba3c6 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -153,7 +153,7 @@ void core_tmr_abort_task( cancel_work_sync(&se_cmd->work); transport_wait_for_tasks(se_cmd); - target_put_sess_cmd(se_sess, se_cmd); + target_put_sess_cmd(se_cmd); transport_cmd_finish_abort(se_cmd, true); printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for" diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 078d32102cce..9dc1bd5f0e6b 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1417,7 +1417,7 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess * for fabrics using TARGET_SCF_ACK_KREF that expect a second * kref_put() to happen during fabric packet acknowledgement. */ - ret = target_get_sess_cmd(se_sess, se_cmd, (flags & TARGET_SCF_ACK_KREF)); + ret = target_get_sess_cmd(se_cmd, flags & TARGET_SCF_ACK_KREF); if (ret) return ret; /* @@ -1431,7 +1431,7 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess rc = transport_lookup_cmd_lun(se_cmd, unpacked_lun); if (rc) { transport_send_check_condition_and_sense(se_cmd, rc, 0); - target_put_sess_cmd(se_sess, se_cmd); + target_put_sess_cmd(se_cmd); return 0; } @@ -1582,7 +1582,7 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess, se_cmd->se_tmr_req->ref_task_tag = tag; /* See target_submit_cmd for commentary */ - ret = target_get_sess_cmd(se_sess, se_cmd, (flags & TARGET_SCF_ACK_KREF)); + ret = target_get_sess_cmd(se_cmd, flags & TARGET_SCF_ACK_KREF); if (ret) { core_tmr_release_req(se_cmd->se_tmr_req); return ret; @@ -2226,7 +2226,7 @@ static int transport_release_cmd(struct se_cmd *cmd) * If this cmd has been setup with target_get_sess_cmd(), drop * the kref and call ->release_cmd() in kref callback. */ - return target_put_sess_cmd(cmd->se_sess, cmd); + return target_put_sess_cmd(cmd); } /** @@ -2470,13 +2470,12 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) EXPORT_SYMBOL(transport_generic_free_cmd); /* target_get_sess_cmd - Add command to active ->sess_cmd_list - * @se_sess: session to reference * @se_cmd: command descriptor to add * @ack_kref: Signal that fabric will perform an ack target_put_sess_cmd() */ -int target_get_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd, - bool ack_kref) +int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref) { + struct se_session *se_sess = se_cmd->se_sess; unsigned long flags; int ret = 0; @@ -2498,7 +2497,7 @@ out: spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); if (ret && ack_kref) - target_put_sess_cmd(se_sess, se_cmd); + target_put_sess_cmd(se_cmd); return ret; } @@ -2527,11 +2526,12 @@ static void target_release_cmd_kref(struct kref *kref) } /* target_put_sess_cmd - Check for active I/O shutdown via kref_put - * @se_sess: session to reference * @se_cmd: command descriptor to drop */ -int target_put_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd) +int target_put_sess_cmd(struct se_cmd *se_cmd) { + struct se_session *se_sess = se_cmd->se_sess; + if (!se_sess) { se_cmd->se_tfo->release_cmd(se_cmd); return 1; diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index ea32b386797f..636435b41293 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -607,7 +607,7 @@ static void vhost_scsi_free_cmd(struct vhost_scsi_cmd *cmd) static int vhost_scsi_check_stop_free(struct se_cmd *se_cmd) { - return target_put_sess_cmd(se_cmd->se_sess, se_cmd); + return target_put_sess_cmd(se_cmd); } static void diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 0f4dc3768587..24c8d9d0d946 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -155,8 +155,8 @@ bool transport_wait_for_tasks(struct se_cmd *); int transport_check_aborted_status(struct se_cmd *, int); int transport_send_check_condition_and_sense(struct se_cmd *, sense_reason_t, int); -int target_get_sess_cmd(struct se_session *, struct se_cmd *, bool); -int target_put_sess_cmd(struct se_session *, struct se_cmd *); +int target_get_sess_cmd(struct se_cmd *, bool); +int target_put_sess_cmd(struct se_cmd *); void target_sess_cmd_list_set_waiting(struct se_session *); void target_wait_for_sess_cmds(struct se_session *); From df75b956274b03e09426c1169f913d884d768ee7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Apr 2015 19:51:09 +0200 Subject: [PATCH 042/839] tcm_loop: remove struct tcm_loop_nacl Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/loopback/tcm_loop.c | 15 ++------------- drivers/target/loopback/tcm_loop.h | 4 ---- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index b863ce894977..e11bcf3e0889 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -706,25 +706,14 @@ static int tcm_loop_check_prot_fabric_only(struct se_portal_group *se_tpg) static struct se_node_acl *tcm_loop_tpg_alloc_fabric_acl( struct se_portal_group *se_tpg) { - struct tcm_loop_nacl *tl_nacl; - - tl_nacl = kzalloc(sizeof(struct tcm_loop_nacl), GFP_KERNEL); - if (!tl_nacl) { - pr_err("Unable to allocate struct tcm_loop_nacl\n"); - return NULL; - } - - return &tl_nacl->se_node_acl; + return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL); } static void tcm_loop_tpg_release_fabric_acl( struct se_portal_group *se_tpg, struct se_node_acl *se_nacl) { - struct tcm_loop_nacl *tl_nacl = container_of(se_nacl, - struct tcm_loop_nacl, se_node_acl); - - kfree(tl_nacl); + kfree(se_nacl); } static u32 tcm_loop_get_inst_index(struct se_portal_group *se_tpg) diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h index 433e595e5daa..4346462094a1 100644 --- a/drivers/target/loopback/tcm_loop.h +++ b/drivers/target/loopback/tcm_loop.h @@ -28,10 +28,6 @@ struct tcm_loop_nexus { struct se_session *se_sess; }; -struct tcm_loop_nacl { - struct se_node_acl se_node_acl; -}; - #define TCM_TRANSPORT_ONLINE 0 #define TCM_TRANSPORT_OFFLINE 1 From a3b679e7e4369dfb5be735598888fcdef9d35196 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Apr 2015 19:51:10 +0200 Subject: [PATCH 043/839] tcm_usb: remove struct usbg_nacl Except for the embedded struct se_node_acl none of the fields were ever used. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/usb/gadget/legacy/tcm_usb_gadget.c | 24 +++------------------- drivers/usb/gadget/legacy/tcm_usb_gadget.h | 9 -------- 2 files changed, 3 insertions(+), 30 deletions(-) diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c index 8b80addc4ce6..aba80e245e44 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c @@ -1380,22 +1380,14 @@ static char *usbg_parse_pr_out_transport_id( static struct se_node_acl *usbg_alloc_fabric_acl(struct se_portal_group *se_tpg) { - struct usbg_nacl *nacl; - - nacl = kzalloc(sizeof(struct usbg_nacl), GFP_KERNEL); - if (!nacl) - return NULL; - - return &nacl->se_node_acl; + return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL); } static void usbg_release_fabric_acl( struct se_portal_group *se_tpg, struct se_node_acl *se_nacl) { - struct usbg_nacl *nacl = container_of(se_nacl, - struct usbg_nacl, se_node_acl); - kfree(nacl); + kfree(se_nacl); } static u32 usbg_tpg_get_inst_index(struct se_portal_group *se_tpg) @@ -1495,8 +1487,6 @@ static struct se_node_acl *usbg_make_nodeacl( const char *name) { struct se_node_acl *se_nacl, *se_nacl_new; - struct usbg_nacl *nacl; - u64 wwpn = 0; u32 nexus_depth; const char *wnn_name; @@ -1518,21 +1508,13 @@ static struct se_node_acl *usbg_make_nodeacl( usbg_release_fabric_acl(se_tpg, se_nacl_new); return se_nacl; } - /* - * Locate our struct usbg_nacl and set the FC Nport WWPN - */ - nacl = container_of(se_nacl, struct usbg_nacl, se_node_acl); - nacl->iport_wwpn = wwpn; - snprintf(nacl->iport_name, sizeof(nacl->iport_name), "%s", name); return se_nacl; } static void usbg_drop_nodeacl(struct se_node_acl *se_acl) { - struct usbg_nacl *nacl = container_of(se_acl, - struct usbg_nacl, se_node_acl); core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1); - kfree(nacl); + kfree(se_acl); } struct usbg_tpg *the_only_tpg_I_currently_have; diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.h b/drivers/usb/gadget/legacy/tcm_usb_gadget.h index 8289219925b8..b254aec521da 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.h +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.h @@ -25,15 +25,6 @@ enum { #define USB_G_ALT_INT_BBB 0 #define USB_G_ALT_INT_UAS 1 -struct usbg_nacl { - /* Binary World Wide unique Port Name for SAS Initiator port */ - u64 iport_wwpn; - /* ASCII formatted WWPN for Sas Initiator port */ - char iport_name[USBG_NAMELEN]; - /* Returned by usbg_make_nodeacl() */ - struct se_node_acl se_node_acl; -}; - struct tcm_usbg_nexus { struct se_session *tvn_se_sess; }; From 16233799e383fc76f20d8142ef55da2308d36c65 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Apr 2015 19:51:11 +0200 Subject: [PATCH 044/839] sbp_target: remove struct sbp_nacl Except for the embedded struct se_node_acl none of the fields were ever used. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/sbp/sbp_target.c | 24 +++--------------------- drivers/target/sbp/sbp_target.h | 9 --------- 2 files changed, 3 insertions(+), 30 deletions(-) diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 18b0f9703ff2..61da83be8966 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -1714,24 +1714,14 @@ static u32 sbp_get_default_depth(struct se_portal_group *se_tpg) static struct se_node_acl *sbp_alloc_fabric_acl(struct se_portal_group *se_tpg) { - struct sbp_nacl *nacl; - - nacl = kzalloc(sizeof(struct sbp_nacl), GFP_KERNEL); - if (!nacl) { - pr_err("Unable to allocate struct sbp_nacl\n"); - return NULL; - } - - return &nacl->se_node_acl; + return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL); } static void sbp_release_fabric_acl( struct se_portal_group *se_tpg, struct se_node_acl *se_nacl) { - struct sbp_nacl *nacl = - container_of(se_nacl, struct sbp_nacl, se_node_acl); - kfree(nacl); + kfree(se_nacl); } static u32 sbp_tpg_get_inst_index(struct se_portal_group *se_tpg) @@ -2106,7 +2096,6 @@ static struct se_node_acl *sbp_make_nodeacl( const char *name) { struct se_node_acl *se_nacl, *se_nacl_new; - struct sbp_nacl *nacl; u64 guid = 0; u32 nexus_depth = 1; @@ -2128,20 +2117,13 @@ static struct se_node_acl *sbp_make_nodeacl( return se_nacl; } - nacl = container_of(se_nacl, struct sbp_nacl, se_node_acl); - nacl->guid = guid; - sbp_format_wwn(nacl->iport_name, SBP_NAMELEN, guid); - return se_nacl; } static void sbp_drop_nodeacl(struct se_node_acl *se_acl) { - struct sbp_nacl *nacl = - container_of(se_acl, struct sbp_nacl, se_node_acl); - core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1); - kfree(nacl); + kfree(se_acl); } static int sbp_post_link_lun( diff --git a/drivers/target/sbp/sbp_target.h b/drivers/target/sbp/sbp_target.h index 6d0d74a2c545..e1b0b84f7379 100644 --- a/drivers/target/sbp/sbp_target.h +++ b/drivers/target/sbp/sbp_target.h @@ -151,15 +151,6 @@ struct sbp_session { u64 reconnect_expires; }; -struct sbp_nacl { - /* Initiator EUI-64 */ - u64 guid; - /* ASCII formatted GUID for SBP Initiator port */ - char iport_name[SBP_NAMELEN]; - /* Returned by sbp_make_nodeacl() */ - struct se_node_acl se_node_acl; -}; - struct sbp_tpg { /* Target portal group tag for TCM */ u16 tport_tpgt; From 55570113a955091af5fafa94cd50914d994e09ae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Apr 2015 19:51:12 +0200 Subject: [PATCH 045/839] vhost/scsi: remove struct vhost_scsi_nacl Except for the embedded struct se_node_acl none of the fields were ever used. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/vhost/scsi.c | 34 +++------------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 636435b41293..d21f2ba6c899 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -115,15 +115,6 @@ struct vhost_scsi_nexus { struct se_session *tvn_se_sess; }; -struct vhost_scsi_nacl { - /* Binary World Wide unique Port Name for Vhost Initiator port */ - u64 iport_wwpn; - /* ASCII formatted WWPN for Sas Initiator port */ - char iport_name[VHOST_SCSI_NAMELEN]; - /* Returned by vhost_scsi_make_nodeacl() */ - struct se_node_acl se_node_acl; -}; - struct vhost_scsi_tpg { /* Vhost port target portal group tag for TCM */ u16 tport_tpgt; @@ -442,24 +433,14 @@ static int vhost_scsi_check_prot_fabric_only(struct se_portal_group *se_tpg) static struct se_node_acl * vhost_scsi_alloc_fabric_acl(struct se_portal_group *se_tpg) { - struct vhost_scsi_nacl *nacl; - - nacl = kzalloc(sizeof(struct vhost_scsi_nacl), GFP_KERNEL); - if (!nacl) { - pr_err("Unable to allocate struct vhost_scsi_nacl\n"); - return NULL; - } - - return &nacl->se_node_acl; + return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL); } static void vhost_scsi_release_fabric_acl(struct se_portal_group *se_tpg, struct se_node_acl *se_nacl) { - struct vhost_scsi_nacl *nacl = container_of(se_nacl, - struct vhost_scsi_nacl, se_node_acl); - kfree(nacl); + kfree(se_nacl); } static u32 vhost_scsi_tpg_get_inst_index(struct se_portal_group *se_tpg) @@ -1828,8 +1809,6 @@ vhost_scsi_make_nodeacl(struct se_portal_group *se_tpg, const char *name) { struct se_node_acl *se_nacl, *se_nacl_new; - struct vhost_scsi_nacl *nacl; - u64 wwpn = 0; u32 nexus_depth; /* vhost_scsi_parse_wwn(name, &wwpn, 1) < 0) @@ -1849,21 +1828,14 @@ vhost_scsi_make_nodeacl(struct se_portal_group *se_tpg, vhost_scsi_release_fabric_acl(se_tpg, se_nacl_new); return se_nacl; } - /* - * Locate our struct vhost_scsi_nacl and set the FC Nport WWPN - */ - nacl = container_of(se_nacl, struct vhost_scsi_nacl, se_node_acl); - nacl->iport_wwpn = wwpn; return se_nacl; } static void vhost_scsi_drop_nodeacl(struct se_node_acl *se_acl) { - struct vhost_scsi_nacl *nacl = container_of(se_acl, - struct vhost_scsi_nacl, se_node_acl); core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1); - kfree(nacl); + kfree(se_acl); } static void vhost_scsi_free_cmd_map_res(struct vhost_scsi_nexus *nexus, From e1750d20e69477c0d7b26d6e0ba24c3c9ec47530 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Apr 2015 19:51:13 +0200 Subject: [PATCH 046/839] target: make the tpg_get_default_depth method optional All fabric drivers except for iSCSI always return 1, so implement that as default behavior. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- Documentation/target/tcm_mod_builder.py | 8 -------- drivers/infiniband/ulp/srpt/ib_srpt.c | 6 ------ drivers/scsi/qla2xxx/tcm_qla2xxx.c | 7 ------- drivers/target/loopback/tcm_loop.c | 6 ------ drivers/target/sbp/sbp_target.c | 6 ------ drivers/target/target_core_configfs.c | 4 ---- drivers/target/target_core_tpg.c | 5 ++++- drivers/target/tcm_fc/tfc_conf.c | 6 ------ drivers/usb/gadget/legacy/tcm_usb_gadget.c | 6 ------ drivers/vhost/scsi.c | 6 ------ drivers/xen/xen-scsiback.c | 6 ------ 11 files changed, 4 insertions(+), 62 deletions(-) diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 2ba71cea0172..3dab9b28f4f0 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -377,7 +377,6 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " .get_fabric_proto_ident = " + fabric_mod_name + "_get_fabric_proto_ident,\n" buf += " .tpg_get_wwn = " + fabric_mod_name + "_get_fabric_wwn,\n" buf += " .tpg_get_tag = " + fabric_mod_name + "_get_tag,\n" - buf += " .tpg_get_default_depth = " + fabric_mod_name + "_get_default_depth,\n" buf += " .tpg_get_pr_transport_id = " + fabric_mod_name + "_get_pr_transport_id,\n" buf += " .tpg_get_pr_transport_id_len = " + fabric_mod_name + "_get_pr_transport_id_len,\n" buf += " .tpg_parse_pr_out_transport_id = " + fabric_mod_name + "_parse_pr_out_transport_id,\n" @@ -590,13 +589,6 @@ def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "}\n\n" bufi += "u16 " + fabric_mod_name + "_get_tag(struct se_portal_group *);\n" - if re.search('get_default_depth', fo): - buf += "u32 " + fabric_mod_name + "_get_default_depth(struct se_portal_group *se_tpg)\n" - buf += "{\n" - buf += " return 1;\n" - buf += "}\n\n" - bufi += "u32 " + fabric_mod_name + "_get_default_depth(struct se_portal_group *);\n" - if re.search('get_pr_transport_id\)\(', fo): buf += "u32 " + fabric_mod_name + "_get_pr_transport_id(\n" buf += " struct se_portal_group *se_tpg,\n" diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 6fbc7bc824d2..8198d4cb0694 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3411,11 +3411,6 @@ static u16 srpt_get_tag(struct se_portal_group *tpg) return 1; } -static u32 srpt_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - static u32 srpt_get_pr_transport_id(struct se_portal_group *se_tpg, struct se_node_acl *se_nacl, struct t10_pr_registration *pr_reg, @@ -3920,7 +3915,6 @@ static const struct target_core_fabric_ops srpt_template = { .get_fabric_proto_ident = srpt_get_fabric_proto_ident, .tpg_get_wwn = srpt_get_fabric_wwn, .tpg_get_tag = srpt_get_tag, - .tpg_get_default_depth = srpt_get_default_depth, .tpg_get_pr_transport_id = srpt_get_pr_transport_id, .tpg_get_pr_transport_id_len = srpt_get_pr_transport_id_len, .tpg_parse_pr_out_transport_id = srpt_parse_pr_out_transport_id, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 8a6c87547303..5d69abcf9ed0 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -223,11 +223,6 @@ static u16 tcm_qla2xxx_get_tag(struct se_portal_group *se_tpg) return tpg->lport_tpgt; } -static u32 tcm_qla2xxx_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - static u32 tcm_qla2xxx_get_pr_transport_id( struct se_portal_group *se_tpg, struct se_node_acl *se_nacl, @@ -1991,7 +1986,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { .get_fabric_proto_ident = tcm_qla2xxx_get_fabric_proto_ident, .tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn, .tpg_get_tag = tcm_qla2xxx_get_tag, - .tpg_get_default_depth = tcm_qla2xxx_get_default_depth, .tpg_get_pr_transport_id = tcm_qla2xxx_get_pr_transport_id, .tpg_get_pr_transport_id_len = tcm_qla2xxx_get_pr_transport_id_len, .tpg_parse_pr_out_transport_id = tcm_qla2xxx_parse_pr_out_transport_id, @@ -2049,7 +2043,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .get_fabric_proto_ident = tcm_qla2xxx_get_fabric_proto_ident, .tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn, .tpg_get_tag = tcm_qla2xxx_get_tag, - .tpg_get_default_depth = tcm_qla2xxx_get_default_depth, .tpg_get_pr_transport_id = tcm_qla2xxx_get_pr_transport_id, .tpg_get_pr_transport_id_len = tcm_qla2xxx_get_pr_transport_id_len, .tpg_parse_pr_out_transport_id = tcm_qla2xxx_parse_pr_out_transport_id, diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index e11bcf3e0889..12b85b03e9ae 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -566,11 +566,6 @@ static u16 tcm_loop_get_tag(struct se_portal_group *se_tpg) return tl_tpg->tl_tpgt; } -static u32 tcm_loop_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - static u32 tcm_loop_get_pr_transport_id( struct se_portal_group *se_tpg, struct se_node_acl *se_nacl, @@ -1378,7 +1373,6 @@ static const struct target_core_fabric_ops loop_ops = { .get_fabric_proto_ident = tcm_loop_get_fabric_proto_ident, .tpg_get_wwn = tcm_loop_get_endpoint_wwn, .tpg_get_tag = tcm_loop_get_tag, - .tpg_get_default_depth = tcm_loop_get_default_depth, .tpg_get_pr_transport_id = tcm_loop_get_pr_transport_id, .tpg_get_pr_transport_id_len = tcm_loop_get_pr_transport_id_len, .tpg_parse_pr_out_transport_id = tcm_loop_parse_pr_out_transport_id, diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 61da83be8966..0d70d4f58e43 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -1707,11 +1707,6 @@ static u16 sbp_get_tag(struct se_portal_group *se_tpg) return tpg->tport_tpgt; } -static u32 sbp_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - static struct se_node_acl *sbp_alloc_fabric_acl(struct se_portal_group *se_tpg) { return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL); @@ -2490,7 +2485,6 @@ static const struct target_core_fabric_ops sbp_ops = { .get_fabric_proto_ident = sbp_get_fabric_proto_ident, .tpg_get_wwn = sbp_get_fabric_wwn, .tpg_get_tag = sbp_get_tag, - .tpg_get_default_depth = sbp_get_default_depth, .tpg_get_pr_transport_id = sbp_get_pr_transport_id, .tpg_get_pr_transport_id_len = sbp_get_pr_transport_id_len, .tpg_parse_pr_out_transport_id = sbp_parse_pr_out_transport_id, diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index e7b0430a0575..c87ca0c50545 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -330,10 +330,6 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo) pr_err("Missing tfo->tpg_get_tag()\n"); return -EINVAL; } - if (!tfo->tpg_get_default_depth) { - pr_err("Missing tfo->tpg_get_default_depth()\n"); - return -EINVAL; - } if (!tfo->tpg_get_pr_transport_id) { pr_err("Missing tfo->tpg_get_pr_transport_id()\n"); return -EINVAL; diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 47f064415bf6..5a9deb2b24af 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -282,7 +282,10 @@ struct se_node_acl *core_tpg_check_initiator_node_acl( spin_lock_init(&acl->device_list_lock); spin_lock_init(&acl->nacl_sess_lock); atomic_set(&acl->acl_pr_ref_count, 0); - acl->queue_depth = tpg->se_tpg_tfo->tpg_get_default_depth(tpg); + if (tpg->se_tpg_tfo->tpg_get_default_depth) + acl->queue_depth = tpg->se_tpg_tfo->tpg_get_default_depth(tpg); + else + acl->queue_depth = 1; snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname); acl->se_tpg = tpg; acl->acl_index = scsi_get_new_index(SCSI_AUTH_INTR_INDEX); diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 65dce1345966..46b4387460e5 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -486,11 +486,6 @@ static u16 ft_get_tag(struct se_portal_group *se_tpg) return tpg->index; } -static u32 ft_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - static int ft_check_false(struct se_portal_group *se_tpg) { return 0; @@ -514,7 +509,6 @@ static const struct target_core_fabric_ops ft_fabric_ops = { .get_fabric_proto_ident = fc_get_fabric_proto_ident, .tpg_get_wwn = ft_get_fabric_wwn, .tpg_get_tag = ft_get_tag, - .tpg_get_default_depth = ft_get_default_depth, .tpg_get_pr_transport_id = fc_get_pr_transport_id, .tpg_get_pr_transport_id_len = fc_get_pr_transport_id_len, .tpg_parse_pr_out_transport_id = fc_parse_pr_out_transport_id, diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c index aba80e245e44..b7a78f72fb0e 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c @@ -1307,11 +1307,6 @@ static u16 usbg_get_tag(struct se_portal_group *se_tpg) return tpg->tport_tpgt; } -static u32 usbg_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - static u32 usbg_get_pr_transport_id( struct se_portal_group *se_tpg, struct se_node_acl *se_nacl, @@ -1852,7 +1847,6 @@ static const struct target_core_fabric_ops usbg_ops = { .get_fabric_proto_ident = usbg_get_fabric_proto_ident, .tpg_get_wwn = usbg_get_fabric_wwn, .tpg_get_tag = usbg_get_tag, - .tpg_get_default_depth = usbg_get_default_depth, .tpg_get_pr_transport_id = usbg_get_pr_transport_id, .tpg_get_pr_transport_id_len = usbg_get_pr_transport_id_len, .tpg_parse_pr_out_transport_id = usbg_parse_pr_out_transport_id, diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index d21f2ba6c899..fb8a93d5bfba 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -326,11 +326,6 @@ static u16 vhost_scsi_get_tpgt(struct se_portal_group *se_tpg) return tpg->tport_tpgt; } -static u32 vhost_scsi_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - static u32 vhost_scsi_get_pr_transport_id(struct se_portal_group *se_tpg, struct se_node_acl *se_nacl, @@ -2300,7 +2295,6 @@ static struct target_core_fabric_ops vhost_scsi_ops = { .get_fabric_proto_ident = vhost_scsi_get_fabric_proto_ident, .tpg_get_wwn = vhost_scsi_get_fabric_wwn, .tpg_get_tag = vhost_scsi_get_tpgt, - .tpg_get_default_depth = vhost_scsi_get_default_depth, .tpg_get_pr_transport_id = vhost_scsi_get_pr_transport_id, .tpg_get_pr_transport_id_len = vhost_scsi_get_pr_transport_id_len, .tpg_parse_pr_out_transport_id = vhost_scsi_parse_pr_out_transport_id, diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index b7f51504f85a..c0d4aee0eeb5 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1292,11 +1292,6 @@ static u16 scsiback_get_tag(struct se_portal_group *se_tpg) return tpg->tport_tpgt; } -static u32 scsiback_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - static u32 scsiback_get_pr_transport_id(struct se_portal_group *se_tpg, struct se_node_acl *se_nacl, @@ -1950,7 +1945,6 @@ static const struct target_core_fabric_ops scsiback_ops = { .get_fabric_proto_ident = scsiback_get_fabric_proto_ident, .tpg_get_wwn = scsiback_get_fabric_wwn, .tpg_get_tag = scsiback_get_tag, - .tpg_get_default_depth = scsiback_get_default_depth, .tpg_get_pr_transport_id = scsiback_get_pr_transport_id, .tpg_get_pr_transport_id_len = scsiback_get_pr_transport_id_len, .tpg_parse_pr_out_transport_id = scsiback_parse_pr_out_transport_id, From c7d6a803926bae9bbf4510a18fc8dd8957cc0e01 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Apr 2015 19:51:14 +0200 Subject: [PATCH 047/839] target: refactor init/drop_nodeacl methods By always allocating and adding, respectively removing and freeing the se_node_acl structure in core code we can remove tons of repeated code in the init_nodeacl and drop_nodeacl routines. Additionally this now respects the get_default_queue_depth method in this code path as well. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- Documentation/target/tcm_mod_builder.py | 77 -------------------- Documentation/target/tcm_mod_builder.txt | 4 +- drivers/infiniband/ulp/srpt/ib_srpt.c | 58 ++++----------- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 59 ++------------- drivers/target/iscsi/iscsi_target_configfs.c | 49 +++---------- drivers/target/sbp/sbp_target.c | 36 +-------- drivers/target/target_core_fabric_configfs.c | 22 +++--- drivers/target/target_core_internal.h | 3 + drivers/target/target_core_tpg.c | 46 +++--------- drivers/target/tcm_fc/tfc_conf.c | 48 ++---------- drivers/usb/gadget/legacy/tcm_usb_gadget.c | 42 ++--------- drivers/vhost/scsi.c | 39 ---------- drivers/xen/xen-scsiback.c | 6 -- include/target/target_core_fabric.h | 9 +-- 14 files changed, 77 insertions(+), 421 deletions(-) diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 3dab9b28f4f0..3e54809b3aa1 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -50,15 +50,6 @@ def tcm_mod_build_FC_include(fabric_mod_dir_var, fabric_mod_name): buf = "#define " + fabric_mod_name.upper() + "_VERSION \"v0.1\"\n" buf += "#define " + fabric_mod_name.upper() + "_NAMELEN 32\n" buf += "\n" - buf += "struct " + fabric_mod_name + "_nacl {\n" - buf += " /* Binary World Wide unique Port Name for FC Initiator Nport */\n" - buf += " u64 nport_wwpn;\n" - buf += " /* ASCII formatted WWPN for FC Initiator Nport */\n" - buf += " char nport_name[" + fabric_mod_name.upper() + "_NAMELEN];\n" - buf += " /* Returned by " + fabric_mod_name + "_make_nodeacl() */\n" - buf += " struct se_node_acl se_node_acl;\n" - buf += "};\n" - buf += "\n" buf += "struct " + fabric_mod_name + "_tpg {\n" buf += " /* FC lport target portal group tag for TCM */\n" buf += " u16 lport_tpgt;\n" @@ -105,14 +96,6 @@ def tcm_mod_build_SAS_include(fabric_mod_dir_var, fabric_mod_name): buf = "#define " + fabric_mod_name.upper() + "_VERSION \"v0.1\"\n" buf += "#define " + fabric_mod_name.upper() + "_NAMELEN 32\n" buf += "\n" - buf += "struct " + fabric_mod_name + "_nacl {\n" - buf += " /* Binary World Wide unique Port Name for SAS Initiator port */\n" - buf += " u64 iport_wwpn;\n" - buf += " /* ASCII formatted WWPN for Sas Initiator port */\n" - buf += " char iport_name[" + fabric_mod_name.upper() + "_NAMELEN];\n" - buf += " /* Returned by " + fabric_mod_name + "_make_nodeacl() */\n" - buf += " struct se_node_acl se_node_acl;\n" - buf += "};\n\n" buf += "struct " + fabric_mod_name + "_tpg {\n" buf += " /* SAS port target portal group tag for TCM */\n" buf += " u16 tport_tpgt;\n" @@ -158,12 +141,6 @@ def tcm_mod_build_iSCSI_include(fabric_mod_dir_var, fabric_mod_name): buf = "#define " + fabric_mod_name.upper() + "_VERSION \"v0.1\"\n" buf += "#define " + fabric_mod_name.upper() + "_NAMELEN 32\n" buf += "\n" - buf += "struct " + fabric_mod_name + "_nacl {\n" - buf += " /* ASCII formatted InitiatorName */\n" - buf += " char iport_name[" + fabric_mod_name.upper() + "_NAMELEN];\n" - buf += " /* Returned by " + fabric_mod_name + "_make_nodeacl() */\n" - buf += " struct se_node_acl se_node_acl;\n" - buf += "};\n\n" buf += "struct " + fabric_mod_name + "_tpg {\n" buf += " /* iSCSI target portal group tag for TCM */\n" buf += " u16 tport_tpgt;\n" @@ -239,54 +216,6 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "static const struct target_core_fabric_ops " + fabric_mod_name + "_ops;\n\n" - buf += "static struct se_node_acl *" + fabric_mod_name + "_make_nodeacl(\n" - buf += " struct se_portal_group *se_tpg,\n" - buf += " struct config_group *group,\n" - buf += " const char *name)\n" - buf += "{\n" - buf += " struct se_node_acl *se_nacl, *se_nacl_new;\n" - buf += " struct " + fabric_mod_name + "_nacl *nacl;\n" - - if proto_ident == "FC" or proto_ident == "SAS": - buf += " u64 wwpn = 0;\n" - - buf += " u32 nexus_depth;\n\n" - buf += " /* " + fabric_mod_name + "_parse_wwn(name, &wwpn, 1) < 0)\n" - buf += " return ERR_PTR(-EINVAL); */\n" - buf += " se_nacl_new = " + fabric_mod_name + "_alloc_fabric_acl(se_tpg);\n" - buf += " if (!se_nacl_new)\n" - buf += " return ERR_PTR(-ENOMEM);\n" - buf += "//#warning FIXME: Hardcoded nexus depth in " + fabric_mod_name + "_make_nodeacl()\n" - buf += " nexus_depth = 1;\n" - buf += " /*\n" - buf += " * se_nacl_new may be released by core_tpg_add_initiator_node_acl()\n" - buf += " * when converting a NodeACL from demo mode -> explict\n" - buf += " */\n" - buf += " se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new,\n" - buf += " name, nexus_depth);\n" - buf += " if (IS_ERR(se_nacl)) {\n" - buf += " " + fabric_mod_name + "_release_fabric_acl(se_tpg, se_nacl_new);\n" - buf += " return se_nacl;\n" - buf += " }\n" - buf += " /*\n" - buf += " * Locate our struct " + fabric_mod_name + "_nacl and set the FC Nport WWPN\n" - buf += " */\n" - buf += " nacl = container_of(se_nacl, struct " + fabric_mod_name + "_nacl, se_node_acl);\n" - - if proto_ident == "FC" or proto_ident == "SAS": - buf += " nacl->" + fabric_mod_init_port + "_wwpn = wwpn;\n" - - buf += " /* " + fabric_mod_name + "_format_wwn(&nacl->" + fabric_mod_init_port + "_name[0], " + fabric_mod_name.upper() + "_NAMELEN, wwpn); */\n\n" - buf += " return se_nacl;\n" - buf += "}\n\n" - buf += "static void " + fabric_mod_name + "_drop_nodeacl(struct se_node_acl *se_acl)\n" - buf += "{\n" - buf += " struct " + fabric_mod_name + "_nacl *nacl = container_of(se_acl,\n" - buf += " struct " + fabric_mod_name + "_nacl, se_node_acl);\n" - buf += " core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1);\n" - buf += " kfree(nacl);\n" - buf += "}\n\n" - buf += "static struct se_portal_group *" + fabric_mod_name + "_make_tpg(\n" buf += " struct se_wwn *wwn,\n" buf += " struct config_group *group,\n" @@ -408,12 +337,6 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " .fabric_drop_wwn = " + fabric_mod_name + "_drop_" + fabric_mod_port + ",\n" buf += " .fabric_make_tpg = " + fabric_mod_name + "_make_tpg,\n" buf += " .fabric_drop_tpg = " + fabric_mod_name + "_drop_tpg,\n" - buf += " .fabric_post_link = NULL,\n" - buf += " .fabric_pre_unlink = NULL,\n" - buf += " .fabric_make_np = NULL,\n" - buf += " .fabric_drop_np = NULL,\n" - buf += " .fabric_make_nodeacl = " + fabric_mod_name + "_make_nodeacl,\n" - buf += " .fabric_drop_nodeacl = " + fabric_mod_name + "_drop_nodeacl,\n" buf += "\n" buf += " .tfc_wwn_attrs = " + fabric_mod_name + "_wwn_attrs;\n" buf += "};\n\n" diff --git a/Documentation/target/tcm_mod_builder.txt b/Documentation/target/tcm_mod_builder.txt index 84533d8e747f..ae22f7005540 100644 --- a/Documentation/target/tcm_mod_builder.txt +++ b/Documentation/target/tcm_mod_builder.txt @@ -13,8 +13,8 @@ fabric skeleton, by simply using: This script will create a new drivers/target/$TCM_NEW_MOD/, and will do the following *) Generate new API callers for drivers/target/target_core_fabric_configs.c logic - ->make_nodeacl(), ->drop_nodeacl(), ->make_tpg(), ->drop_tpg() - ->make_wwn(), ->drop_wwn(). These are created into $TCM_NEW_MOD/$TCM_NEW_MOD_configfs.c + ->make_tpg(), ->drop_tpg(), ->make_wwn(), ->drop_wwn(). These are created + into $TCM_NEW_MOD/$TCM_NEW_MOD_configfs.c *) Generate basic infrastructure for loading/unloading LKMs and TCM/ConfigFS fabric module using a skeleton struct target_core_fabric_ops API template. *) Based on user defined T10 Proto_Ident for the new fabric module being built, diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 8198d4cb0694..c1f3aa52f42b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3592,40 +3592,19 @@ out: * configfs callback function invoked for * mkdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id */ -static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg, - struct config_group *group, - const char *name) +static int srpt_init_nodeacl(struct se_node_acl *se_nacl, const char *name) { - struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1); - struct se_node_acl *se_nacl, *se_nacl_new; - struct srpt_node_acl *nacl; - int ret = 0; - u32 nexus_depth = 1; + struct srpt_port *sport = + container_of(se_nacl->se_tpg, struct srpt_port, port_tpg_1); + struct srpt_node_acl *nacl = + container_of(se_nacl, struct srpt_node_acl, nacl); u8 i_port_id[16]; if (srpt_parse_i_port_id(i_port_id, name) < 0) { pr_err("invalid initiator port ID %s\n", name); - ret = -EINVAL; - goto err; + return -EINVAL; } - se_nacl_new = srpt_alloc_fabric_acl(tpg); - if (!se_nacl_new) { - ret = -ENOMEM; - goto err; - } - /* - * nacl_new may be released by core_tpg_add_initiator_node_acl() - * when converting a node ACL from demo mode to explict - */ - se_nacl = core_tpg_add_initiator_node_acl(tpg, se_nacl_new, name, - nexus_depth); - if (IS_ERR(se_nacl)) { - ret = PTR_ERR(se_nacl); - goto err; - } - /* Locate our struct srpt_node_acl and set sdev and i_port_id. */ - nacl = container_of(se_nacl, struct srpt_node_acl, nacl); memcpy(&nacl->i_port_id[0], &i_port_id[0], 16); nacl->sport = sport; @@ -3633,29 +3612,22 @@ static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg, list_add_tail(&nacl->list, &sport->port_acl_list); spin_unlock_irq(&sport->port_acl_lock); - return se_nacl; -err: - return ERR_PTR(ret); + return 0; } /* * configfs callback function invoked for * rmdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id */ -static void srpt_drop_nodeacl(struct se_node_acl *se_nacl) +static void srpt_cleanup_nodeacl(struct se_node_acl *se_nacl) { - struct srpt_node_acl *nacl; - struct srpt_device *sdev; - struct srpt_port *sport; + struct srpt_node_acl *nacl = + container_of(se_nacl, struct srpt_node_acl, nacl); + struct srpt_port *sport = nacl->sport; - nacl = container_of(se_nacl, struct srpt_node_acl, nacl); - sport = nacl->sport; - sdev = sport->sdev; spin_lock_irq(&sport->port_acl_lock); list_del(&nacl->list); spin_unlock_irq(&sport->port_acl_lock); - core_tpg_del_initiator_node_acl(&sport->port_tpg_1, se_nacl, 1); - srpt_release_fabric_acl(NULL, se_nacl); } static ssize_t srpt_tpg_attrib_show_srp_max_rdma_size( @@ -3948,12 +3920,8 @@ static const struct target_core_fabric_ops srpt_template = { .fabric_drop_wwn = srpt_drop_tport, .fabric_make_tpg = srpt_make_tpg, .fabric_drop_tpg = srpt_drop_tpg, - .fabric_post_link = NULL, - .fabric_pre_unlink = NULL, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, - .fabric_make_nodeacl = srpt_make_nodeacl, - .fabric_drop_nodeacl = srpt_drop_nodeacl, + .fabric_init_nodeacl = srpt_init_nodeacl, + .fabric_cleanup_nodeacl = srpt_cleanup_nodeacl, .tfc_wwn_attrs = srpt_wwn_attrs, .tfc_tpg_base_attrs = srpt_tpg_attrs, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 5d69abcf9ed0..a7ab689f5524 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -847,53 +847,20 @@ static void tcm_qla2xxx_shutdown_sess(struct qla_tgt_sess *sess) target_sess_cmd_list_set_waiting(sess->se_sess); } -static struct se_node_acl *tcm_qla2xxx_make_nodeacl( - struct se_portal_group *se_tpg, - struct config_group *group, - const char *name) +static int tcm_qla2xxx_init_nodeacl(struct se_node_acl *se_nacl, + const char *name) { - struct se_node_acl *se_nacl, *se_nacl_new; - struct tcm_qla2xxx_nacl *nacl; + struct tcm_qla2xxx_nacl *nacl = + container_of(se_nacl, struct tcm_qla2xxx_nacl, se_node_acl); u64 wwnn; - u32 qla2xxx_nexus_depth; if (tcm_qla2xxx_parse_wwn(name, &wwnn, 1) < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; - se_nacl_new = tcm_qla2xxx_alloc_fabric_acl(se_tpg); - if (!se_nacl_new) - return ERR_PTR(-ENOMEM); -/* #warning FIXME: Hardcoded qla2xxx_nexus depth in tcm_qla2xxx_make_nodeacl */ - qla2xxx_nexus_depth = 1; - - /* - * se_nacl_new may be released by core_tpg_add_initiator_node_acl() - * when converting a NodeACL from demo mode -> explict - */ - se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new, - name, qla2xxx_nexus_depth); - if (IS_ERR(se_nacl)) { - tcm_qla2xxx_release_fabric_acl(se_tpg, se_nacl_new); - return se_nacl; - } - /* - * Locate our struct tcm_qla2xxx_nacl and set the FC Nport WWPN - */ - nacl = container_of(se_nacl, struct tcm_qla2xxx_nacl, se_node_acl); nacl->nport_wwnn = wwnn; tcm_qla2xxx_format_wwn(&nacl->nport_name[0], TCM_QLA2XXX_NAMELEN, wwnn); - return se_nacl; -} - -static void tcm_qla2xxx_drop_nodeacl(struct se_node_acl *se_acl) -{ - struct se_portal_group *se_tpg = se_acl->se_tpg; - struct tcm_qla2xxx_nacl *nacl = container_of(se_acl, - struct tcm_qla2xxx_nacl, se_node_acl); - - core_tpg_del_initiator_node_acl(se_tpg, se_acl, 1); - kfree(nacl); + return 0; } /* Start items for tcm_qla2xxx_tpg_attrib_cit */ @@ -2024,12 +1991,7 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { .fabric_drop_wwn = tcm_qla2xxx_drop_lport, .fabric_make_tpg = tcm_qla2xxx_make_tpg, .fabric_drop_tpg = tcm_qla2xxx_drop_tpg, - .fabric_post_link = NULL, - .fabric_pre_unlink = NULL, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, - .fabric_make_nodeacl = tcm_qla2xxx_make_nodeacl, - .fabric_drop_nodeacl = tcm_qla2xxx_drop_nodeacl, + .fabric_init_nodeacl = tcm_qla2xxx_init_nodeacl, .tfc_wwn_attrs = tcm_qla2xxx_wwn_attrs, .tfc_tpg_base_attrs = tcm_qla2xxx_tpg_attrs, @@ -2079,12 +2041,7 @@ static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .fabric_drop_wwn = tcm_qla2xxx_npiv_drop_lport, .fabric_make_tpg = tcm_qla2xxx_npiv_make_tpg, .fabric_drop_tpg = tcm_qla2xxx_drop_tpg, - .fabric_post_link = NULL, - .fabric_pre_unlink = NULL, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, - .fabric_make_nodeacl = tcm_qla2xxx_make_nodeacl, - .fabric_drop_nodeacl = tcm_qla2xxx_drop_nodeacl, + .fabric_init_nodeacl = tcm_qla2xxx_init_nodeacl, .tfc_wwn_attrs = tcm_qla2xxx_wwn_attrs, .tfc_tpg_base_attrs = tcm_qla2xxx_npiv_tpg_attrs, diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 568e478615a5..7d9ff8cbbb5d 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -874,43 +874,19 @@ static struct se_node_acl *lio_tpg_alloc_fabric_acl( return &acl->se_node_acl; } -static struct se_node_acl *lio_target_make_nodeacl( - struct se_portal_group *se_tpg, - struct config_group *group, - const char *name) +static int lio_target_init_nodeacl(struct se_node_acl *se_nacl, + const char *name) { - struct config_group *stats_cg; - struct iscsi_node_acl *acl; - struct se_node_acl *se_nacl_new, *se_nacl; - struct iscsi_portal_group *tpg = container_of(se_tpg, - struct iscsi_portal_group, tpg_se_tpg); - u32 cmdsn_depth; - - se_nacl_new = lio_tpg_alloc_fabric_acl(se_tpg); - if (!se_nacl_new) - return ERR_PTR(-ENOMEM); - - cmdsn_depth = tpg->tpg_attrib.default_cmdsn_depth; - /* - * se_nacl_new may be released by core_tpg_add_initiator_node_acl() - * when converting a NdoeACL from demo mode -> explict - */ - se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new, - name, cmdsn_depth); - if (IS_ERR(se_nacl)) - return se_nacl; - - acl = container_of(se_nacl, struct iscsi_node_acl, se_node_acl); - stats_cg = &se_nacl->acl_fabric_stat_group; + struct iscsi_node_acl *acl = + container_of(se_nacl, struct iscsi_node_acl, se_node_acl); + struct config_group *stats_cg = &se_nacl->acl_fabric_stat_group; stats_cg->default_groups = kmalloc(sizeof(struct config_group *) * 2, GFP_KERNEL); if (!stats_cg->default_groups) { pr_err("Unable to allocate memory for" " stats_cg->default_groups\n"); - core_tpg_del_initiator_node_acl(se_tpg, se_nacl, 1); - kfree(acl); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } stats_cg->default_groups[0] = &acl->node_stat_grps.iscsi_sess_stats_group; @@ -918,13 +894,11 @@ static struct se_node_acl *lio_target_make_nodeacl( config_group_init_type_name(&acl->node_stat_grps.iscsi_sess_stats_group, "iscsi_sess_stats", &iscsi_stat_sess_cit); - return se_nacl; + return 0; } -static void lio_target_drop_nodeacl( - struct se_node_acl *se_nacl) +static void lio_target_cleanup_nodeacl( struct se_node_acl *se_nacl) { - struct se_portal_group *se_tpg = se_nacl->se_tpg; struct iscsi_node_acl *acl = container_of(se_nacl, struct iscsi_node_acl, se_node_acl); struct config_item *df_item; @@ -938,9 +912,6 @@ static void lio_target_drop_nodeacl( config_item_put(df_item); } kfree(stats_cg->default_groups); - - core_tpg_del_initiator_node_acl(se_tpg, se_nacl, 1); - kfree(acl); } /* End items for lio_target_acl_cit */ @@ -2020,8 +1991,8 @@ const struct target_core_fabric_ops iscsi_ops = { .fabric_drop_tpg = lio_target_tiqn_deltpg, .fabric_make_np = lio_target_call_addnptotpg, .fabric_drop_np = lio_target_call_delnpfromtpg, - .fabric_make_nodeacl = lio_target_make_nodeacl, - .fabric_drop_nodeacl = lio_target_drop_nodeacl, + .fabric_init_nodeacl = lio_target_init_nodeacl, + .fabric_cleanup_nodeacl = lio_target_cleanup_nodeacl, .tfc_discovery_attrs = lio_target_discovery_auth_attrs, .tfc_wwn_attrs = lio_target_wwn_attrs, diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 0d70d4f58e43..3a47fe46fbca 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -2085,40 +2085,13 @@ static ssize_t sbp_format_wwn(char *buf, size_t len, u64 wwn) return snprintf(buf, len, "%016llx", wwn); } -static struct se_node_acl *sbp_make_nodeacl( - struct se_portal_group *se_tpg, - struct config_group *group, - const char *name) +static int sbp_init_nodeacl(struct se_node_acl *se_nacl, const char *name) { - struct se_node_acl *se_nacl, *se_nacl_new; u64 guid = 0; - u32 nexus_depth = 1; if (sbp_parse_wwn(name, &guid) < 0) - return ERR_PTR(-EINVAL); - - se_nacl_new = sbp_alloc_fabric_acl(se_tpg); - if (!se_nacl_new) - return ERR_PTR(-ENOMEM); - - /* - * se_nacl_new may be released by core_tpg_add_initiator_node_acl() - * when converting a NodeACL from demo mode -> explict - */ - se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new, - name, nexus_depth); - if (IS_ERR(se_nacl)) { - sbp_release_fabric_acl(se_tpg, se_nacl_new); - return se_nacl; - } - - return se_nacl; -} - -static void sbp_drop_nodeacl(struct se_node_acl *se_acl) -{ - core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1); - kfree(se_acl); + return -EINVAL; + return 0; } static int sbp_post_link_lun( @@ -2518,8 +2491,7 @@ static const struct target_core_fabric_ops sbp_ops = { .fabric_pre_unlink = sbp_pre_unlink_lun, .fabric_make_np = NULL, .fabric_drop_np = NULL, - .fabric_make_nodeacl = sbp_make_nodeacl, - .fabric_drop_nodeacl = sbp_drop_nodeacl, + .fabric_init_nodeacl = sbp_init_nodeacl, .tfc_wwn_attrs = sbp_wwn_attrs, .tfc_tpg_base_attrs = sbp_tpg_base_attrs, diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 1f7886bb16bf..f4d9467c3e14 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -458,10 +458,11 @@ static void target_fabric_nacl_base_release(struct config_item *item) { struct se_node_acl *se_nacl = container_of(to_config_group(item), struct se_node_acl, acl_group); - struct se_portal_group *se_tpg = se_nacl->se_tpg; - struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf; + struct target_fabric_configfs *tf = se_nacl->se_tpg->se_tpg_wwn->wwn_tf; - tf->tf_ops.fabric_drop_nodeacl(se_nacl); + if (tf->tf_ops.fabric_cleanup_nodeacl) + tf->tf_ops.fabric_cleanup_nodeacl(se_nacl); + core_tpg_del_initiator_node_acl(se_nacl); } static struct configfs_item_operations target_fabric_nacl_base_item_ops = { @@ -501,15 +502,18 @@ static struct config_group *target_fabric_make_nodeacl( struct se_node_acl *se_nacl; struct config_group *nacl_cg; - if (!tf->tf_ops.fabric_make_nodeacl) { - pr_err("tf->tf_ops.fabric_make_nodeacl is NULL\n"); - return ERR_PTR(-ENOSYS); - } - - se_nacl = tf->tf_ops.fabric_make_nodeacl(se_tpg, group, name); + se_nacl = core_tpg_add_initiator_node_acl(se_tpg, name); if (IS_ERR(se_nacl)) return ERR_CAST(se_nacl); + if (tf->tf_ops.fabric_init_nodeacl) { + int ret = tf->tf_ops.fabric_init_nodeacl(se_nacl, name); + if (ret) { + core_tpg_del_initiator_node_acl(se_nacl); + return ERR_PTR(ret); + } + } + nacl_cg = &se_nacl->acl_group; nacl_cg->default_groups = se_nacl->acl_default_groups; nacl_cg->default_groups[0] = &se_nacl->acl_attrib_group; diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 75338c348be4..058ca71cda81 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -59,6 +59,9 @@ struct se_lun *core_tpg_alloc_lun(struct se_portal_group *, u32); int core_tpg_add_lun(struct se_portal_group *, struct se_lun *, u32, struct se_device *); void core_tpg_remove_lun(struct se_portal_group *, struct se_lun *); +struct se_node_acl *core_tpg_add_initiator_node_acl(struct se_portal_group *tpg, + const char *initiatorname); +void core_tpg_del_initiator_node_acl(struct se_node_acl *acl); /* target_core_transport.c */ extern struct kmem_cache *se_tmr_req_cache; diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 5a9deb2b24af..78dd53ada04a 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -353,17 +353,11 @@ void core_tpg_clear_object_luns(struct se_portal_group *tpg) } EXPORT_SYMBOL(core_tpg_clear_object_luns); -/* core_tpg_add_initiator_node_acl(): - * - * - */ struct se_node_acl *core_tpg_add_initiator_node_acl( struct se_portal_group *tpg, - struct se_node_acl *se_nacl, - const char *initiatorname, - u32 queue_depth) + const char *initiatorname) { - struct se_node_acl *acl = NULL; + struct se_node_acl *acl; spin_lock_irq(&tpg->acl_node_lock); acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname); @@ -374,14 +368,6 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( " for %s\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), initiatorname); spin_unlock_irq(&tpg->acl_node_lock); - /* - * Release the locally allocated struct se_node_acl - * because * core_tpg_add_initiator_node_acl() returned - * a pointer to an existing demo mode node ACL. - */ - if (se_nacl) - tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, - se_nacl); goto done; } @@ -394,16 +380,11 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( } spin_unlock_irq(&tpg->acl_node_lock); - if (!se_nacl) { + acl = tpg->se_tpg_tfo->tpg_alloc_fabric_acl(tpg); + if (!acl) { pr_err("struct se_node_acl pointer is NULL\n"); return ERR_PTR(-EINVAL); } - /* - * For v4.x logic the se_node_acl_s is hanging off a fabric - * dependent structure allocated via - * struct target_core_fabric_ops->fabric_make_nodeacl() - */ - acl = se_nacl; INIT_LIST_HEAD(&acl->acl_list); INIT_LIST_HEAD(&acl->acl_sess_list); @@ -412,7 +393,10 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( spin_lock_init(&acl->device_list_lock); spin_lock_init(&acl->nacl_sess_lock); atomic_set(&acl->acl_pr_ref_count, 0); - acl->queue_depth = queue_depth; + if (tpg->se_tpg_tfo->tpg_get_default_depth) + acl->queue_depth = tpg->se_tpg_tfo->tpg_get_default_depth(tpg); + else + acl->queue_depth = 1; snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname); acl->se_tpg = tpg; acl->acl_index = scsi_get_new_index(SCSI_AUTH_INTR_INDEX); @@ -443,17 +427,10 @@ done: return acl; } -EXPORT_SYMBOL(core_tpg_add_initiator_node_acl); -/* core_tpg_del_initiator_node_acl(): - * - * - */ -int core_tpg_del_initiator_node_acl( - struct se_portal_group *tpg, - struct se_node_acl *acl, - int force) +void core_tpg_del_initiator_node_acl(struct se_node_acl *acl) { + struct se_portal_group *tpg = acl->se_tpg; LIST_HEAD(sess_list); struct se_session *sess, *sess_tmp; unsigned long flags; @@ -505,9 +482,8 @@ int core_tpg_del_initiator_node_acl( tpg->se_tpg_tfo->tpg_get_tag(tpg), acl->queue_depth, tpg->se_tpg_tfo->get_fabric_name(), acl->initiatorname); - return 0; + tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl); } -EXPORT_SYMBOL(core_tpg_del_initiator_node_acl); /* core_tpg_set_initiator_node_queue_depth(): * diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 46b4387460e5..e179fdf76ddb 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -198,48 +198,17 @@ static struct configfs_attribute *ft_nacl_base_attrs[] = { * Add ACL for an initiator. The ACL is named arbitrarily. * The port_name and/or node_name are attributes. */ -static struct se_node_acl *ft_add_acl( - struct se_portal_group *se_tpg, - struct config_group *group, - const char *name) +static int ft_init_nodeacl(struct se_node_acl *nacl, const char *name) { - struct ft_node_acl *acl; - struct ft_tpg *tpg; + struct ft_node_acl *acl = + container_of(nacl, struct ft_node_acl, se_node_acl); u64 wwpn; - u32 q_depth; - - pr_debug("add acl %s\n", name); - tpg = container_of(se_tpg, struct ft_tpg, se_tpg); if (ft_parse_wwn(name, &wwpn, 1) < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; - acl = kzalloc(sizeof(struct ft_node_acl), GFP_KERNEL); - if (!acl) - return ERR_PTR(-ENOMEM); acl->node_auth.port_name = wwpn; - - q_depth = 32; /* XXX bogus default - get from tpg? */ - return core_tpg_add_initiator_node_acl(&tpg->se_tpg, - &acl->se_node_acl, name, q_depth); -} - -static void ft_del_acl(struct se_node_acl *se_acl) -{ - struct se_portal_group *se_tpg = se_acl->se_tpg; - struct ft_tpg *tpg; - struct ft_node_acl *acl = container_of(se_acl, - struct ft_node_acl, se_node_acl); - - pr_debug("del acl %s\n", - config_item_name(&se_acl->acl_group.cg_item)); - - tpg = container_of(se_tpg, struct ft_tpg, se_tpg); - pr_debug("del acl %p se_acl %p tpg %p se_tpg %p\n", - acl, se_acl, tpg, &tpg->se_tpg); - - core_tpg_del_initiator_node_acl(&tpg->se_tpg, se_acl, 1); - kfree(acl); + return 0; } struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata) @@ -542,12 +511,7 @@ static const struct target_core_fabric_ops ft_fabric_ops = { .fabric_drop_wwn = &ft_del_wwn, .fabric_make_tpg = &ft_add_tpg, .fabric_drop_tpg = &ft_del_tpg, - .fabric_post_link = NULL, - .fabric_pre_unlink = NULL, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, - .fabric_make_nodeacl = &ft_add_acl, - .fabric_drop_nodeacl = &ft_del_acl, + .fabric_init_nodeacl = &ft_init_nodeacl, .tfc_wwn_attrs = ft_wwn_attrs, .tfc_tpg_nacl_base_attrs = ft_nacl_base_attrs, diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c index b7a78f72fb0e..fccb0ccb355a 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c @@ -1476,40 +1476,11 @@ static const char *usbg_check_wwn(const char *name) return n; } -static struct se_node_acl *usbg_make_nodeacl( - struct se_portal_group *se_tpg, - struct config_group *group, - const char *name) +static int usbg_init_nodeacl(struct se_node_acl *se_nacl, const char *name) { - struct se_node_acl *se_nacl, *se_nacl_new; - u32 nexus_depth; - const char *wnn_name; - - wnn_name = usbg_check_wwn(name); - if (!wnn_name) - return ERR_PTR(-EINVAL); - se_nacl_new = usbg_alloc_fabric_acl(se_tpg); - if (!(se_nacl_new)) - return ERR_PTR(-ENOMEM); - - nexus_depth = 1; - /* - * se_nacl_new may be released by core_tpg_add_initiator_node_acl() - * when converting a NodeACL from demo mode -> explict - */ - se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new, - name, nexus_depth); - if (IS_ERR(se_nacl)) { - usbg_release_fabric_acl(se_tpg, se_nacl_new); - return se_nacl; - } - return se_nacl; -} - -static void usbg_drop_nodeacl(struct se_node_acl *se_acl) -{ - core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1); - kfree(se_acl); + if (!usbg_check_wwn(name)) + return -EINVAL; + return 0; } struct usbg_tpg *the_only_tpg_I_currently_have; @@ -1879,10 +1850,7 @@ static const struct target_core_fabric_ops usbg_ops = { .fabric_drop_tpg = usbg_drop_tpg, .fabric_post_link = usbg_port_link, .fabric_pre_unlink = usbg_port_unlink, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, - .fabric_make_nodeacl = usbg_make_nodeacl, - .fabric_drop_nodeacl = usbg_drop_nodeacl, + .fabric_init_nodeacl = usbg_init_nodeacl, .tfc_wwn_attrs = usbg_wwn_attrs, .tfc_tpg_base_attrs = usbg_base_attrs, diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index fb8a93d5bfba..8faa7f4abf23 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1798,41 +1798,6 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg, mutex_unlock(&vhost_scsi_mutex); } -static struct se_node_acl * -vhost_scsi_make_nodeacl(struct se_portal_group *se_tpg, - struct config_group *group, - const char *name) -{ - struct se_node_acl *se_nacl, *se_nacl_new; - u32 nexus_depth; - - /* vhost_scsi_parse_wwn(name, &wwpn, 1) < 0) - return ERR_PTR(-EINVAL); */ - se_nacl_new = vhost_scsi_alloc_fabric_acl(se_tpg); - if (!se_nacl_new) - return ERR_PTR(-ENOMEM); - - nexus_depth = 1; - /* - * se_nacl_new may be released by core_tpg_add_initiator_node_acl() - * when converting a NodeACL from demo mode -> explict - */ - se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new, - name, nexus_depth); - if (IS_ERR(se_nacl)) { - vhost_scsi_release_fabric_acl(se_tpg, se_nacl_new); - return se_nacl; - } - - return se_nacl; -} - -static void vhost_scsi_drop_nodeacl(struct se_node_acl *se_acl) -{ - core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1); - kfree(se_acl); -} - static void vhost_scsi_free_cmd_map_res(struct vhost_scsi_nexus *nexus, struct se_session *se_sess) { @@ -2330,10 +2295,6 @@ static struct target_core_fabric_ops vhost_scsi_ops = { .fabric_drop_tpg = vhost_scsi_drop_tpg, .fabric_post_link = vhost_scsi_port_link, .fabric_pre_unlink = vhost_scsi_port_unlink, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, - .fabric_make_nodeacl = vhost_scsi_make_nodeacl, - .fabric_drop_nodeacl = vhost_scsi_drop_nodeacl, .tfc_wwn_attrs = vhost_scsi_wwn_attrs, .tfc_tpg_base_attrs = vhost_scsi_tpg_attrs, diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index c0d4aee0eeb5..8a130ab71733 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1980,12 +1980,6 @@ static const struct target_core_fabric_ops scsiback_ops = { .fabric_drop_tpg = scsiback_drop_tpg, .fabric_post_link = scsiback_port_link, .fabric_pre_unlink = scsiback_port_unlink, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, -#if 0 - .fabric_make_nodeacl = scsiback_make_nodeacl, - .fabric_drop_nodeacl = scsiback_drop_nodeacl, -#endif .tfc_wwn_attrs = scsiback_wwn_attrs, .tfc_tpg_base_attrs = scsiback_tpg_attrs, diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 24c8d9d0d946..8b570c49f4d1 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -88,9 +88,8 @@ struct target_core_fabric_ops { struct se_tpg_np *(*fabric_make_np)(struct se_portal_group *, struct config_group *, const char *); void (*fabric_drop_np)(struct se_tpg_np *); - struct se_node_acl *(*fabric_make_nodeacl)(struct se_portal_group *, - struct config_group *, const char *); - void (*fabric_drop_nodeacl)(struct se_node_acl *); + int (*fabric_init_nodeacl)(struct se_node_acl *, const char *); + void (*fabric_cleanup_nodeacl)(struct se_node_acl *); struct configfs_attribute **tfc_discovery_attrs; struct configfs_attribute **tfc_wwn_attrs; @@ -174,10 +173,6 @@ struct se_node_acl *core_tpg_get_initiator_node_acl(struct se_portal_group *tpg, struct se_node_acl *core_tpg_check_initiator_node_acl(struct se_portal_group *, unsigned char *); void core_tpg_clear_object_luns(struct se_portal_group *); -struct se_node_acl *core_tpg_add_initiator_node_acl(struct se_portal_group *, - struct se_node_acl *, const char *, u32); -int core_tpg_del_initiator_node_acl(struct se_portal_group *, - struct se_node_acl *, int); int core_tpg_set_initiator_node_queue_depth(struct se_portal_group *, unsigned char *, u32, int); int core_tpg_set_initiator_node_tag(struct se_portal_group *, From e413f4727037e826c55a639c713a221006b6d61d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Apr 2015 19:51:15 +0200 Subject: [PATCH 048/839] target: refactor node ACL allocation Split out two common helpers to share code for allocating and initializing node ACLs. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_tpg.c | 135 +++++++++++++------------------ 1 file changed, 57 insertions(+), 78 deletions(-) diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 78dd53ada04a..0cd86ff9a792 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -254,24 +254,12 @@ static int core_create_device_list_for_node(struct se_node_acl *nacl) return 0; } -/* core_tpg_check_initiator_node_acl() - * - * - */ -struct se_node_acl *core_tpg_check_initiator_node_acl( - struct se_portal_group *tpg, - unsigned char *initiatorname) +static struct se_node_acl *target_alloc_node_acl(struct se_portal_group *tpg, + const unsigned char *initiatorname) { struct se_node_acl *acl; - acl = core_tpg_get_initiator_node_acl(tpg, initiatorname); - if (acl) - return acl; - - if (!tpg->se_tpg_tfo->tpg_check_demo_mode(tpg)) - return NULL; - - acl = tpg->se_tpg_tfo->tpg_alloc_fabric_acl(tpg); + acl = tpg->se_tpg_tfo->tpg_alloc_fabric_acl(tpg); if (!acl) return NULL; @@ -289,20 +277,60 @@ struct se_node_acl *core_tpg_check_initiator_node_acl( snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname); acl->se_tpg = tpg; acl->acl_index = scsi_get_new_index(SCSI_AUTH_INTR_INDEX); - acl->dynamic_node_acl = 1; tpg->se_tpg_tfo->set_default_node_attributes(acl); - if (core_create_device_list_for_node(acl) < 0) { - tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl); - return NULL; - } + if (core_create_device_list_for_node(acl) < 0) + goto out_free_acl; + if (core_set_queue_depth_for_node(tpg, acl) < 0) + goto out_free_device_list; - if (core_set_queue_depth_for_node(tpg, acl) < 0) { - core_free_device_list_for_node(acl, tpg); - tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl); + return acl; + +out_free_device_list: + core_free_device_list_for_node(acl, tpg); +out_free_acl: + tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl); + return NULL; +} + +static void target_add_node_acl(struct se_node_acl *acl) +{ + struct se_portal_group *tpg = acl->se_tpg; + + spin_lock_irq(&tpg->acl_node_lock); + list_add_tail(&acl->acl_list, &tpg->acl_node_list); + tpg->num_node_acls++; + spin_unlock_irq(&tpg->acl_node_lock); + + pr_debug("%s_TPG[%hu] - Added %s ACL with TCQ Depth: %d for %s" + " Initiator Node: %s\n", + tpg->se_tpg_tfo->get_fabric_name(), + tpg->se_tpg_tfo->tpg_get_tag(tpg), + acl->dynamic_node_acl ? "DYNAMIC" : "", + acl->queue_depth, + tpg->se_tpg_tfo->get_fabric_name(), + acl->initiatorname); +} + +struct se_node_acl *core_tpg_check_initiator_node_acl( + struct se_portal_group *tpg, + unsigned char *initiatorname) +{ + struct se_node_acl *acl; + + acl = core_tpg_get_initiator_node_acl(tpg, initiatorname); + if (acl) + return acl; + + if (!tpg->se_tpg_tfo->tpg_check_demo_mode(tpg)) return NULL; - } + + acl = target_alloc_node_acl(tpg, initiatorname); + if (!acl) + return NULL; + acl->dynamic_node_acl = 1; + /* * Here we only create demo-mode MappedLUNs from the active * TPG LUNs if the fabric is not explicitly asking for @@ -312,16 +340,7 @@ struct se_node_acl *core_tpg_check_initiator_node_acl( (tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg) != 1)) core_tpg_add_node_to_devs(acl, tpg); - spin_lock_irq(&tpg->acl_node_lock); - list_add_tail(&acl->acl_list, &tpg->acl_node_list); - tpg->num_node_acls++; - spin_unlock_irq(&tpg->acl_node_lock); - - pr_debug("%s_TPG[%u] - Added DYNAMIC ACL with TCQ Depth: %d for %s" - " Initiator Node: %s\n", tpg->se_tpg_tfo->get_fabric_name(), - tpg->se_tpg_tfo->tpg_get_tag(tpg), acl->queue_depth, - tpg->se_tpg_tfo->get_fabric_name(), initiatorname); - + target_add_node_acl(acl); return acl; } EXPORT_SYMBOL(core_tpg_check_initiator_node_acl); @@ -368,7 +387,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( " for %s\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), initiatorname); spin_unlock_irq(&tpg->acl_node_lock); - goto done; + return acl; } pr_err("ACL entry for %s Initiator" @@ -380,51 +399,11 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( } spin_unlock_irq(&tpg->acl_node_lock); - acl = tpg->se_tpg_tfo->tpg_alloc_fabric_acl(tpg); - if (!acl) { - pr_err("struct se_node_acl pointer is NULL\n"); - return ERR_PTR(-EINVAL); - } - - INIT_LIST_HEAD(&acl->acl_list); - INIT_LIST_HEAD(&acl->acl_sess_list); - kref_init(&acl->acl_kref); - init_completion(&acl->acl_free_comp); - spin_lock_init(&acl->device_list_lock); - spin_lock_init(&acl->nacl_sess_lock); - atomic_set(&acl->acl_pr_ref_count, 0); - if (tpg->se_tpg_tfo->tpg_get_default_depth) - acl->queue_depth = tpg->se_tpg_tfo->tpg_get_default_depth(tpg); - else - acl->queue_depth = 1; - snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname); - acl->se_tpg = tpg; - acl->acl_index = scsi_get_new_index(SCSI_AUTH_INTR_INDEX); - - tpg->se_tpg_tfo->set_default_node_attributes(acl); - - if (core_create_device_list_for_node(acl) < 0) { - tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl); + acl = target_alloc_node_acl(tpg, initiatorname); + if (!acl) return ERR_PTR(-ENOMEM); - } - - if (core_set_queue_depth_for_node(tpg, acl) < 0) { - core_free_device_list_for_node(acl, tpg); - tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl); - return ERR_PTR(-EINVAL); - } - - spin_lock_irq(&tpg->acl_node_lock); - list_add_tail(&acl->acl_list, &tpg->acl_node_list); - tpg->num_node_acls++; - spin_unlock_irq(&tpg->acl_node_lock); - -done: - pr_debug("%s_TPG[%hu] - Added ACL with TCQ Depth: %d for %s" - " Initiator Node: %s\n", tpg->se_tpg_tfo->get_fabric_name(), - tpg->se_tpg_tfo->tpg_get_tag(tpg), acl->queue_depth, - tpg->se_tpg_tfo->get_fabric_name(), initiatorname); + target_add_node_acl(acl); return acl; } From 144bc4c2a42a0f42a32c106d53f5bf2724fbf098 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Apr 2015 19:51:16 +0200 Subject: [PATCH 049/839] target: move node ACL allocation to core code Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- Documentation/target/tcm_mod_builder.py | 27 ------------------ drivers/infiniband/ulp/srpt/ib_srpt.c | 25 +---------------- drivers/infiniband/ulp/srpt/ib_srpt.h | 4 +-- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 29 ++------------------ drivers/scsi/qla2xxx/tcm_qla2xxx.h | 4 +-- drivers/target/iscsi/iscsi_target_configfs.c | 26 +----------------- drivers/target/loopback/tcm_loop.c | 15 ---------- drivers/target/sbp/sbp_target.c | 14 ---------- drivers/target/target_core_configfs.c | 8 ------ drivers/target/target_core_tpg.c | 9 +++--- drivers/target/target_core_transport.c | 2 +- drivers/target/tcm_fc/tcm_fc.h | 2 +- drivers/target/tcm_fc/tfc_conf.c | 26 +----------------- drivers/usb/gadget/legacy/tcm_usb_gadget.c | 14 ---------- drivers/vhost/scsi.c | 15 ---------- drivers/xen/xen-scsiback.c | 15 ---------- include/target/iscsi/iscsi_target_core.h | 2 +- include/target/target_core_fabric.h | 5 +--- 18 files changed, 18 insertions(+), 224 deletions(-) diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 3e54809b3aa1..07e7ef3d7429 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -313,8 +313,6 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " .tpg_check_demo_mode_cache = " + fabric_mod_name + "_check_true,\n" buf += " .tpg_check_demo_mode_write_protect = " + fabric_mod_name + "_check_true,\n" buf += " .tpg_check_prod_mode_write_protect = " + fabric_mod_name + "_check_false,\n" - buf += " .tpg_alloc_fabric_acl = " + fabric_mod_name + "_alloc_fabric_acl,\n" - buf += " .tpg_release_fabric_acl = " + fabric_mod_name + "_release_fabric_acl,\n" buf += " .tpg_get_inst_index = " + fabric_mod_name + "_tpg_get_inst_index,\n" buf += " .release_cmd = " + fabric_mod_name + "_release_cmd,\n" buf += " .shutdown_session = " + fabric_mod_name + "_shutdown_session,\n" @@ -624,31 +622,6 @@ def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name): bufi += "char *" + fabric_mod_name + "_parse_pr_out_transport_id(struct se_portal_group *,\n" bufi += " const char *, u32 *, char **);\n" - if re.search('alloc_fabric_acl\)\(', fo): - buf += "struct se_node_acl *" + fabric_mod_name + "_alloc_fabric_acl(struct se_portal_group *se_tpg)\n" - buf += "{\n" - buf += " struct " + fabric_mod_name + "_nacl *nacl;\n\n" - buf += " nacl = kzalloc(sizeof(struct " + fabric_mod_name + "_nacl), GFP_KERNEL);\n" - buf += " if (!nacl) {\n" - buf += " printk(KERN_ERR \"Unable to allocate struct " + fabric_mod_name + "_nacl\\n\");\n" - buf += " return NULL;\n" - buf += " }\n\n" - buf += " return &nacl->se_node_acl;\n" - buf += "}\n\n" - bufi += "struct se_node_acl *" + fabric_mod_name + "_alloc_fabric_acl(struct se_portal_group *);\n" - - if re.search('release_fabric_acl\)\(', fo): - buf += "void " + fabric_mod_name + "_release_fabric_acl(\n" - buf += " struct se_portal_group *se_tpg,\n" - buf += " struct se_node_acl *se_nacl)\n" - buf += "{\n" - buf += " struct " + fabric_mod_name + "_nacl *nacl = container_of(se_nacl,\n" - buf += " struct " + fabric_mod_name + "_nacl, se_node_acl);\n" - buf += " kfree(nacl);\n" - buf += "}\n\n" - bufi += "void " + fabric_mod_name + "_release_fabric_acl(struct se_portal_group *,\n" - bufi += " struct se_node_acl *);\n" - if re.search('tpg_get_inst_index\)\(', fo): buf += "u32 " + fabric_mod_name + "_tpg_get_inst_index(struct se_portal_group *se_tpg)\n" buf += "{\n" diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index c1f3aa52f42b..38e51f4a17de 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3447,28 +3447,6 @@ static char *srpt_parse_pr_out_transport_id(struct se_portal_group *se_tpg, return (char *)tr_id->i_port_id; } -static struct se_node_acl *srpt_alloc_fabric_acl(struct se_portal_group *se_tpg) -{ - struct srpt_node_acl *nacl; - - nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL); - if (!nacl) { - pr_err("Unable to allocate struct srpt_node_acl\n"); - return NULL; - } - - return &nacl->nacl; -} - -static void srpt_release_fabric_acl(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - struct srpt_node_acl *nacl; - - nacl = container_of(se_nacl, struct srpt_node_acl, nacl); - kfree(nacl); -} - static u32 srpt_tpg_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -3883,6 +3861,7 @@ static struct configfs_attribute *srpt_wwn_attrs[] = { static const struct target_core_fabric_ops srpt_template = { .module = THIS_MODULE, .name = "srpt", + .node_acl_size = sizeof(struct srpt_node_acl), .get_fabric_name = srpt_get_fabric_name, .get_fabric_proto_ident = srpt_get_fabric_proto_ident, .tpg_get_wwn = srpt_get_fabric_wwn, @@ -3894,8 +3873,6 @@ static const struct target_core_fabric_ops srpt_template = { .tpg_check_demo_mode_cache = srpt_check_true, .tpg_check_demo_mode_write_protect = srpt_check_true, .tpg_check_prod_mode_write_protect = srpt_check_false, - .tpg_alloc_fabric_acl = srpt_alloc_fabric_acl, - .tpg_release_fabric_acl = srpt_release_fabric_acl, .tpg_get_inst_index = srpt_tpg_get_inst_index, .release_cmd = srpt_release_cmd, .check_stop_free = srpt_check_stop_free, diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 3dae156905de..355f6f5ce8b2 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -410,15 +410,15 @@ struct srpt_device { /** * struct srpt_node_acl - Per-initiator ACL data (managed via configfs). + * @nacl: Target core node ACL information. * @i_port_id: 128-bit SRP initiator port ID. * @sport: port information. - * @nacl: Target core node ACL information. * @list: Element of the per-HCA ACL list. */ struct srpt_node_acl { + struct se_node_acl nacl; u8 i_port_id[16]; struct srpt_port *sport; - struct se_node_acl nacl; struct list_head list; }; diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index a7ab689f5524..cb376e5198d0 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -338,29 +338,6 @@ static int tcm_qla2xxx_check_prot_fabric_only(struct se_portal_group *se_tpg) return tpg->tpg_attrib.fabric_prot_type; } -static struct se_node_acl *tcm_qla2xxx_alloc_fabric_acl( - struct se_portal_group *se_tpg) -{ - struct tcm_qla2xxx_nacl *nacl; - - nacl = kzalloc(sizeof(struct tcm_qla2xxx_nacl), GFP_KERNEL); - if (!nacl) { - pr_err("Unable to allocate struct tcm_qla2xxx_nacl\n"); - return NULL; - } - - return &nacl->se_node_acl; -} - -static void tcm_qla2xxx_release_fabric_acl( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - struct tcm_qla2xxx_nacl *nacl = container_of(se_nacl, - struct tcm_qla2xxx_nacl, se_node_acl); - kfree(nacl); -} - static u32 tcm_qla2xxx_tpg_get_inst_index(struct se_portal_group *se_tpg) { struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, @@ -1949,6 +1926,7 @@ static struct configfs_attribute *tcm_qla2xxx_wwn_attrs[] = { static const struct target_core_fabric_ops tcm_qla2xxx_ops = { .module = THIS_MODULE, .name = "qla2xxx", + .node_acl_size = sizeof(struct tcm_qla2xxx_nacl), .get_fabric_name = tcm_qla2xxx_get_fabric_name, .get_fabric_proto_ident = tcm_qla2xxx_get_fabric_proto_ident, .tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn, @@ -1964,8 +1942,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { tcm_qla2xxx_check_prod_write_protect, .tpg_check_prot_fabric_only = tcm_qla2xxx_check_prot_fabric_only, .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_demo_mode_login_only, - .tpg_alloc_fabric_acl = tcm_qla2xxx_alloc_fabric_acl, - .tpg_release_fabric_acl = tcm_qla2xxx_release_fabric_acl, .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, .check_stop_free = tcm_qla2xxx_check_stop_free, .release_cmd = tcm_qla2xxx_release_cmd, @@ -2001,6 +1977,7 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .module = THIS_MODULE, .name = "qla2xxx_npiv", + .node_acl_size = sizeof(struct tcm_qla2xxx_nacl), .get_fabric_name = tcm_qla2xxx_npiv_get_fabric_name, .get_fabric_proto_ident = tcm_qla2xxx_get_fabric_proto_ident, .tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn, @@ -2014,8 +1991,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .tpg_check_prod_mode_write_protect = tcm_qla2xxx_check_prod_write_protect, .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_demo_mode_login_only, - .tpg_alloc_fabric_acl = tcm_qla2xxx_alloc_fabric_acl, - .tpg_release_fabric_acl = tcm_qla2xxx_release_fabric_acl, .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, .check_stop_free = tcm_qla2xxx_check_stop_free, .release_cmd = tcm_qla2xxx_release_cmd, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index 23295115c9fc..3d805a07061c 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -13,6 +13,8 @@ #include "qla_target.h" struct tcm_qla2xxx_nacl { + struct se_node_acl se_node_acl; + /* From libfc struct fc_rport->port_id */ u32 nport_id; /* Binary World Wide unique Node Name for remote FC Initiator Nport */ @@ -23,8 +25,6 @@ struct tcm_qla2xxx_nacl { struct qla_tgt_sess *qla_tgt_sess; /* Pointer to TCM FC nexus */ struct se_session *nport_nexus; - /* Returned by tcm_qla2xxx_make_nodeacl() */ - struct se_node_acl se_node_acl; }; struct tcm_qla2xxx_tpg_attrib { diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 7d9ff8cbbb5d..643024c1ee8d 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -860,20 +860,6 @@ static struct configfs_attribute *lio_target_initiator_attrs[] = { NULL, }; -static struct se_node_acl *lio_tpg_alloc_fabric_acl( - struct se_portal_group *se_tpg) -{ - struct iscsi_node_acl *acl; - - acl = kzalloc(sizeof(struct iscsi_node_acl), GFP_KERNEL); - if (!acl) { - pr_err("Unable to allocate memory for struct iscsi_node_acl\n"); - return NULL; - } - - return &acl->se_node_acl; -} - static int lio_target_init_nodeacl(struct se_node_acl *se_nacl, const char *name) { @@ -1868,15 +1854,6 @@ static int lio_tpg_check_prot_fabric_only( return tpg->tpg_attrib.fabric_prot_type; } -static void lio_tpg_release_fabric_acl( - struct se_portal_group *se_tpg, - struct se_node_acl *se_acl) -{ - struct iscsi_node_acl *acl = container_of(se_acl, - struct iscsi_node_acl, se_node_acl); - kfree(acl); -} - /* * Called with spin_lock_bh(struct se_portal_group->session_lock) held.. * @@ -1952,6 +1929,7 @@ static void lio_release_cmd(struct se_cmd *se_cmd) const struct target_core_fabric_ops iscsi_ops = { .module = THIS_MODULE, .name = "iscsi", + .node_acl_size = sizeof(struct iscsi_node_acl), .get_fabric_name = iscsi_get_fabric_name, .get_fabric_proto_ident = iscsi_get_fabric_proto_ident, .tpg_get_wwn = lio_tpg_get_endpoint_wwn, @@ -1967,8 +1945,6 @@ const struct target_core_fabric_ops iscsi_ops = { .tpg_check_prod_mode_write_protect = lio_tpg_check_prod_mode_write_protect, .tpg_check_prot_fabric_only = &lio_tpg_check_prot_fabric_only, - .tpg_alloc_fabric_acl = lio_tpg_alloc_fabric_acl, - .tpg_release_fabric_acl = lio_tpg_release_fabric_acl, .tpg_get_inst_index = lio_tpg_get_inst_index, .check_stop_free = lio_check_stop_free, .release_cmd = lio_release_cmd, diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 12b85b03e9ae..5a71c9f2ee66 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -698,19 +698,6 @@ static int tcm_loop_check_prot_fabric_only(struct se_portal_group *se_tpg) return tl_tpg->tl_fabric_prot_type; } -static struct se_node_acl *tcm_loop_tpg_alloc_fabric_acl( - struct se_portal_group *se_tpg) -{ - return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL); -} - -static void tcm_loop_tpg_release_fabric_acl( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - kfree(se_nacl); -} - static u32 tcm_loop_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -1383,8 +1370,6 @@ static const struct target_core_fabric_ops loop_ops = { .tpg_check_prod_mode_write_protect = tcm_loop_check_prod_mode_write_protect, .tpg_check_prot_fabric_only = tcm_loop_check_prot_fabric_only, - .tpg_alloc_fabric_acl = tcm_loop_tpg_alloc_fabric_acl, - .tpg_release_fabric_acl = tcm_loop_tpg_release_fabric_acl, .tpg_get_inst_index = tcm_loop_get_inst_index, .check_stop_free = tcm_loop_check_stop_free, .release_cmd = tcm_loop_release_cmd, diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 3a47fe46fbca..5df2a61714fb 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -1707,18 +1707,6 @@ static u16 sbp_get_tag(struct se_portal_group *se_tpg) return tpg->tport_tpgt; } -static struct se_node_acl *sbp_alloc_fabric_acl(struct se_portal_group *se_tpg) -{ - return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL); -} - -static void sbp_release_fabric_acl( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - kfree(se_nacl); -} - static u32 sbp_tpg_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -2465,8 +2453,6 @@ static const struct target_core_fabric_ops sbp_ops = { .tpg_check_demo_mode_cache = sbp_check_true, .tpg_check_demo_mode_write_protect = sbp_check_false, .tpg_check_prod_mode_write_protect = sbp_check_false, - .tpg_alloc_fabric_acl = sbp_alloc_fabric_acl, - .tpg_release_fabric_acl = sbp_release_fabric_acl, .tpg_get_inst_index = sbp_tpg_get_inst_index, .release_cmd = sbp_release_cmd, .shutdown_session = sbp_shutdown_session, diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index c87ca0c50545..2b10499264d3 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -354,14 +354,6 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo) pr_err("Missing tfo->tpg_check_prod_mode_write_protect()\n"); return -EINVAL; } - if (!tfo->tpg_alloc_fabric_acl) { - pr_err("Missing tfo->tpg_alloc_fabric_acl()\n"); - return -EINVAL; - } - if (!tfo->tpg_release_fabric_acl) { - pr_err("Missing tfo->tpg_release_fabric_acl()\n"); - return -EINVAL; - } if (!tfo->tpg_get_inst_index) { pr_err("Missing tfo->tpg_get_inst_index()\n"); return -EINVAL; diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 0cd86ff9a792..42f3bd9561c8 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -259,7 +259,8 @@ static struct se_node_acl *target_alloc_node_acl(struct se_portal_group *tpg, { struct se_node_acl *acl; - acl = tpg->se_tpg_tfo->tpg_alloc_fabric_acl(tpg); + acl = kzalloc(max(sizeof(*acl), tpg->se_tpg_tfo->node_acl_size), + GFP_KERNEL); if (!acl) return NULL; @@ -290,7 +291,7 @@ static struct se_node_acl *target_alloc_node_acl(struct se_portal_group *tpg, out_free_device_list: core_free_device_list_for_node(acl, tpg); out_free_acl: - tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl); + kfree(acl); return NULL; } @@ -461,7 +462,7 @@ void core_tpg_del_initiator_node_acl(struct se_node_acl *acl) tpg->se_tpg_tfo->tpg_get_tag(tpg), acl->queue_depth, tpg->se_tpg_tfo->get_fabric_name(), acl->initiatorname); - tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl); + kfree(acl); } /* core_tpg_set_initiator_node_queue_depth(): @@ -725,7 +726,7 @@ int core_tpg_deregister(struct se_portal_group *se_tpg) core_tpg_wait_for_nacl_pr_ref(nacl); core_free_device_list_for_node(nacl, se_tpg); - se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg, nacl); + kfree(nacl); spin_lock_irq(&se_tpg->acl_node_lock); } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 9dc1bd5f0e6b..0fc4f5cb4bea 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -533,7 +533,7 @@ void transport_deregister_session(struct se_session *se_sess) spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags); core_tpg_wait_for_nacl_pr_ref(se_nacl); core_free_device_list_for_node(se_nacl, se_tpg); - se_tfo->tpg_release_fabric_acl(se_tpg, se_nacl); + kfree(se_nacl); comp_nacl = false; spin_lock_irqsave(&se_tpg->acl_node_lock, flags); diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h index 881deb3d499a..4ceaeb9a4b93 100644 --- a/drivers/target/tcm_fc/tcm_fc.h +++ b/drivers/target/tcm_fc/tcm_fc.h @@ -80,8 +80,8 @@ struct ft_node_auth { * Node ACL for FC remote port session. */ struct ft_node_acl { - struct ft_node_auth node_auth; struct se_node_acl se_node_acl; + struct ft_node_auth node_auth; }; struct ft_lun { diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index e179fdf76ddb..8da159352315 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -236,29 +236,6 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata) return found; } -static struct se_node_acl *ft_tpg_alloc_fabric_acl(struct se_portal_group *se_tpg) -{ - struct ft_node_acl *acl; - - acl = kzalloc(sizeof(*acl), GFP_KERNEL); - if (!acl) { - pr_err("Unable to allocate struct ft_node_acl\n"); - return NULL; - } - pr_debug("acl %p\n", acl); - return &acl->se_node_acl; -} - -static void ft_tpg_release_fabric_acl(struct se_portal_group *se_tpg, - struct se_node_acl *se_acl) -{ - struct ft_node_acl *acl = container_of(se_acl, - struct ft_node_acl, se_node_acl); - - pr_debug("acl %p\n", acl); - kfree(acl); -} - /* * local_port port_group (tpg) ops. */ @@ -474,6 +451,7 @@ static u32 ft_tpg_get_inst_index(struct se_portal_group *se_tpg) static const struct target_core_fabric_ops ft_fabric_ops = { .module = THIS_MODULE, .name = "fc", + .node_acl_size = sizeof(struct ft_node_acl), .get_fabric_name = ft_get_fabric_name, .get_fabric_proto_ident = fc_get_fabric_proto_ident, .tpg_get_wwn = ft_get_fabric_wwn, @@ -485,8 +463,6 @@ static const struct target_core_fabric_ops ft_fabric_ops = { .tpg_check_demo_mode_cache = ft_check_false, .tpg_check_demo_mode_write_protect = ft_check_false, .tpg_check_prod_mode_write_protect = ft_check_false, - .tpg_alloc_fabric_acl = ft_tpg_alloc_fabric_acl, - .tpg_release_fabric_acl = ft_tpg_release_fabric_acl, .tpg_get_inst_index = ft_tpg_get_inst_index, .check_stop_free = ft_check_stop_free, .release_cmd = ft_release_cmd, diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c index fccb0ccb355a..82f4da8bfe41 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c @@ -1373,18 +1373,6 @@ static char *usbg_parse_pr_out_transport_id( return tid; } -static struct se_node_acl *usbg_alloc_fabric_acl(struct se_portal_group *se_tpg) -{ - return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL); -} - -static void usbg_release_fabric_acl( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - kfree(se_nacl); -} - static u32 usbg_tpg_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -1825,8 +1813,6 @@ static const struct target_core_fabric_ops usbg_ops = { .tpg_check_demo_mode_cache = usbg_check_false, .tpg_check_demo_mode_write_protect = usbg_check_false, .tpg_check_prod_mode_write_protect = usbg_check_false, - .tpg_alloc_fabric_acl = usbg_alloc_fabric_acl, - .tpg_release_fabric_acl = usbg_release_fabric_acl, .tpg_get_inst_index = usbg_tpg_get_inst_index, .release_cmd = usbg_release_cmd, .shutdown_session = usbg_shutdown_session, diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 8faa7f4abf23..b93c03935964 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -425,19 +425,6 @@ static int vhost_scsi_check_prot_fabric_only(struct se_portal_group *se_tpg) return tpg->tv_fabric_prot_type; } -static struct se_node_acl * -vhost_scsi_alloc_fabric_acl(struct se_portal_group *se_tpg) -{ - return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL); -} - -static void -vhost_scsi_release_fabric_acl(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - kfree(se_nacl); -} - static u32 vhost_scsi_tpg_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -2268,8 +2255,6 @@ static struct target_core_fabric_ops vhost_scsi_ops = { .tpg_check_demo_mode_write_protect = vhost_scsi_check_false, .tpg_check_prod_mode_write_protect = vhost_scsi_check_false, .tpg_check_prot_fabric_only = vhost_scsi_check_prot_fabric_only, - .tpg_alloc_fabric_acl = vhost_scsi_alloc_fabric_acl, - .tpg_release_fabric_acl = vhost_scsi_release_fabric_acl, .tpg_get_inst_index = vhost_scsi_tpg_get_inst_index, .release_cmd = vhost_scsi_release_cmd, .check_stop_free = vhost_scsi_check_stop_free, diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 8a130ab71733..3756953b385b 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1449,19 +1449,6 @@ static void scsiback_drop_tport(struct se_wwn *wwn) kfree(tport); } -static struct se_node_acl * -scsiback_alloc_fabric_acl(struct se_portal_group *se_tpg) -{ - return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL); -} - -static void -scsiback_release_fabric_acl(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - kfree(se_nacl); -} - static u32 scsiback_tpg_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -1952,8 +1939,6 @@ static const struct target_core_fabric_ops scsiback_ops = { .tpg_check_demo_mode_cache = scsiback_check_true, .tpg_check_demo_mode_write_protect = scsiback_check_false, .tpg_check_prod_mode_write_protect = scsiback_check_false, - .tpg_alloc_fabric_acl = scsiback_alloc_fabric_acl, - .tpg_release_fabric_acl = scsiback_release_fabric_acl, .tpg_get_inst_index = scsiback_tpg_get_inst_index, .check_stop_free = scsiback_check_stop_free, .release_cmd = scsiback_release_cmd, diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 39f3d181d39d..f7b16ca67504 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -745,10 +745,10 @@ struct iscsi_node_stat_grps { }; struct iscsi_node_acl { + struct se_node_acl se_node_acl; struct iscsi_node_attrib node_attrib; struct iscsi_node_auth node_auth; struct iscsi_node_stat_grps node_stat_grps; - struct se_node_acl se_node_acl; }; struct iscsi_tpg_attrib { diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 8b570c49f4d1..e5414744bf2d 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -4,6 +4,7 @@ struct target_core_fabric_ops { struct module *module; const char *name; + size_t node_acl_size; char *(*get_fabric_name)(void); u8 (*get_fabric_proto_ident)(struct se_portal_group *); char *(*tpg_get_wwn)(struct se_portal_group *); @@ -36,10 +37,6 @@ struct target_core_fabric_ops { * WRITE_STRIP and READ_INSERT operations. */ int (*tpg_check_prot_fabric_only)(struct se_portal_group *); - struct se_node_acl *(*tpg_alloc_fabric_acl)( - struct se_portal_group *); - void (*tpg_release_fabric_acl)(struct se_portal_group *, - struct se_node_acl *); u32 (*tpg_get_inst_index)(struct se_portal_group *); /* * Optional to release struct se_cmd and fabric dependent allocated From 57636388af32c2c7db3919e8d53f4979b7c7c307 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 1 May 2015 15:23:48 +0900 Subject: [PATCH 050/839] target: Fix inconsistent address passed to kunmap_atomic() in sbc_dif_copy_prot() In sbc_dif_copy_prot(), the addresses passed to kunmap_atomic() are inconsistent with the addresses which are mapped by kmap_atomic(). That could be problematic if an SG element has its length larger than PAGE_SIZE as kunmap_atomic() will attempt to unmap different page. Signed-off-by: Akinobu Mita Cc: Nicholas Bellinger Cc: Sagi Grimberg Cc: "Martin K. Petersen" Cc: Christoph Hellwig Cc: "James E.J. Bottomley" Cc: target-devel@vger.kernel.org Cc: linux-scsi@vger.kernel.org Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_sbc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index d441975604db..d615a11dc9c9 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1300,13 +1300,14 @@ void sbc_dif_copy_prot(struct se_cmd *cmd, unsigned int sectors, bool read, copied += len; psg_len -= len; + kunmap_atomic(addr - sg->offset - offset); + if (offset >= sg->length) { sg = sg_next(sg); offset = 0; } - kunmap_atomic(addr); } - kunmap_atomic(paddr); + kunmap_atomic(paddr - psg->offset); } } EXPORT_SYMBOL(sbc_dif_copy_prot); From 5835812f13e56483f2c4907664bb43b0a78546b6 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 1 May 2015 15:23:49 +0900 Subject: [PATCH 051/839] target: ensure se_cmd->t_prot_sg is allocated when required Even if the device backend is initialized with protection info is enabled, some requests don't have the protection info attached for WRITE SAME command issued by block device helpers, WRITE command with WRPROTECT=0 by SG_IO ioctl, etc. So when TCM loopback fabric module is used, se_cmd->t_prot_sg is NULL for these requests and performing WRITE_INSERT of PI using software emulation by sbc_dif_generate() causes kernel crash. To fix this, introduce SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC for se_cmd_flags, which is used to determine that se_cmd->t_prot_sg needs to be allocated or use pre-allocated protection information by scsi mid-layer. Signed-off-by: Akinobu Mita Cc: Nicholas Bellinger Cc: Sagi Grimberg Cc: "Martin K. Petersen" Cc: Christoph Hellwig Cc: "James E.J. Bottomley" Cc: target-devel@vger.kernel.org Cc: linux-scsi@vger.kernel.org Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 27 ++++++++++++++------------ include/target/target_core_base.h | 1 + 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 0fc4f5cb4bea..85b021e749e6 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1448,6 +1448,7 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess if (sgl_prot_count) { se_cmd->t_prot_sg = sgl_prot; se_cmd->t_prot_nents = sgl_prot_count; + se_cmd->se_cmd_flags |= SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC; } /* @@ -2178,6 +2179,12 @@ static inline void transport_reset_sgl_orig(struct se_cmd *cmd) static inline void transport_free_pages(struct se_cmd *cmd) { + if (!(cmd->se_cmd_flags & SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC)) { + transport_free_sgl(cmd->t_prot_sg, cmd->t_prot_nents); + cmd->t_prot_sg = NULL; + cmd->t_prot_nents = 0; + } + if (cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) { /* * Release special case READ buffer payload required for @@ -2201,10 +2208,6 @@ static inline void transport_free_pages(struct se_cmd *cmd) transport_free_sgl(cmd->t_bidi_data_sg, cmd->t_bidi_data_nents); cmd->t_bidi_data_sg = NULL; cmd->t_bidi_data_nents = 0; - - transport_free_sgl(cmd->t_prot_sg, cmd->t_prot_nents); - cmd->t_prot_sg = NULL; - cmd->t_prot_nents = 0; } /** @@ -2343,6 +2346,14 @@ transport_generic_new_cmd(struct se_cmd *cmd) int ret = 0; bool zero_flag = !(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB); + if (cmd->prot_op != TARGET_PROT_NORMAL && + !(cmd->se_cmd_flags & SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC)) { + ret = target_alloc_sgl(&cmd->t_prot_sg, &cmd->t_prot_nents, + cmd->prot_length, true); + if (ret < 0) + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } + /* * Determine is the TCM fabric module has already allocated physical * memory, and is directly calling transport_generic_map_mem_to_cmd() @@ -2368,14 +2379,6 @@ transport_generic_new_cmd(struct se_cmd *cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } - if (cmd->prot_op != TARGET_PROT_NORMAL) { - ret = target_alloc_sgl(&cmd->t_prot_sg, - &cmd->t_prot_nents, - cmd->prot_length, true); - if (ret < 0) - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - } - ret = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents, cmd->data_length, zero_flag); if (ret < 0) diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 7f4c7de3a4ce..9f8f5a1b527d 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -161,6 +161,7 @@ enum se_cmd_flags_table { SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00020000, SCF_COMPARE_AND_WRITE = 0x00080000, SCF_COMPARE_AND_WRITE_POST = 0x00100000, + SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC = 0x00200000, }; /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */ From 10081fb532a2a2216b7d8e4ad585c985075b6f60 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 1 May 2015 15:23:50 +0900 Subject: [PATCH 052/839] lib: introduce crc_t10dif_update() This introduces crc_t10dif_update() which enables to calculate CRC for a block which straddles multiple SG elements by calling multiple times. This also converts crc_t10dif() to use crc_t10dif_update() as they are almost same. (remove extra function call in crc_t10dif() and crc_t10dif_update - Tim + Herbert) Signed-off-by: Akinobu Mita Acked-by: Martin K. Petersen Cc: Tim Chen Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-crypto@vger.kernel.org Cc: Nicholas Bellinger Cc: Sagi Grimberg Cc: "Martin K. Petersen" Cc: Christoph Hellwig Cc: "James E.J. Bottomley" Cc: target-devel@vger.kernel.org Signed-off-by: Nicholas Bellinger --- include/linux/crc-t10dif.h | 1 + lib/crc-t10dif.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index cf53d0773ce3..d81961e9e37d 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -9,5 +9,6 @@ extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len); extern __u16 crc_t10dif(unsigned char const *, size_t); +extern __u16 crc_t10dif_update(__u16 crc, unsigned char const *, size_t); #endif diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c index dfe6ec17c0a5..1ad33e555805 100644 --- a/lib/crc-t10dif.c +++ b/lib/crc-t10dif.c @@ -19,7 +19,7 @@ static struct crypto_shash *crct10dif_tfm; static struct static_key crct10dif_fallback __read_mostly; -__u16 crc_t10dif(const unsigned char *buffer, size_t len) +__u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len) { struct { struct shash_desc shash; @@ -28,17 +28,23 @@ __u16 crc_t10dif(const unsigned char *buffer, size_t len) int err; if (static_key_false(&crct10dif_fallback)) - return crc_t10dif_generic(0, buffer, len); + return crc_t10dif_generic(crc, buffer, len); desc.shash.tfm = crct10dif_tfm; desc.shash.flags = 0; - *(__u16 *)desc.ctx = 0; + *(__u16 *)desc.ctx = crc; err = crypto_shash_update(&desc.shash, buffer, len); BUG_ON(err); return *(__u16 *)desc.ctx; } +EXPORT_SYMBOL(crc_t10dif_update); + +__u16 crc_t10dif(const unsigned char *buffer, size_t len) +{ + return crc_t10dif_update(0, buffer, len); +} EXPORT_SYMBOL(crc_t10dif); static int __init crc_t10dif_mod_init(void) From 18213afbd8cee9f8a3ac5294dedf543a06559c8b Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 1 May 2015 15:23:51 +0900 Subject: [PATCH 053/839] target: handle odd SG mapping for data transfer memory sbc_dif_generate() and sbc_dif_verify() currently assume that each SG element for data transfer memory doesn't straddle the block size boundary. However, when using SG_IO ioctl, we can choose the data transfer memory which doesn't satisfy that alignment requirement. In order to handle such cases correctly, this change inverts the outer loop to iterate data transfer memory and the inner loop to iterate protection information and enables to calculate CRC for a block which straddles multiple SG elements. Signed-off-by: Akinobu Mita Cc: Tim Chen Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-crypto@vger.kernel.org Cc: Nicholas Bellinger Cc: Sagi Grimberg Cc: "Martin K. Petersen" Cc: Christoph Hellwig Cc: "James E.J. Bottomley" Cc: target-devel@vger.kernel.org Cc: linux-scsi@vger.kernel.org Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_sbc.c | 124 +++++++++++++++++++++---------- 1 file changed, 84 insertions(+), 40 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index d615a11dc9c9..31b2ae356120 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1183,27 +1183,50 @@ sbc_dif_generate(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; struct se_dif_v1_tuple *sdt; - struct scatterlist *dsg, *psg = cmd->t_prot_sg; + struct scatterlist *dsg = cmd->t_data_sg, *psg; sector_t sector = cmd->t_task_lba; void *daddr, *paddr; int i, j, offset = 0; + unsigned int block_size = dev->dev_attrib.block_size; - for_each_sg(cmd->t_data_sg, dsg, cmd->t_data_nents, i) { - daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; + for_each_sg(cmd->t_prot_sg, psg, cmd->t_prot_nents, i) { paddr = kmap_atomic(sg_page(psg)) + psg->offset; + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; - for (j = 0; j < dsg->length; j += dev->dev_attrib.block_size) { + for (j = 0; j < psg->length; + j += sizeof(struct se_dif_v1_tuple)) { + __u16 crc; + unsigned int avail; - if (offset >= psg->length) { - kunmap_atomic(paddr); - psg = sg_next(psg); - paddr = kmap_atomic(sg_page(psg)) + psg->offset; - offset = 0; + if (offset >= dsg->length) { + offset -= dsg->length; + kunmap_atomic(daddr - dsg->offset); + dsg = sg_next(dsg); + if (!dsg) { + kunmap_atomic(paddr - psg->offset); + return; + } + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; } - sdt = paddr + offset; - sdt->guard_tag = cpu_to_be16(crc_t10dif(daddr + j, - dev->dev_attrib.block_size)); + sdt = paddr + j; + avail = min(block_size, dsg->length - offset); + crc = crc_t10dif(daddr + offset, avail); + if (avail < block_size) { + kunmap_atomic(daddr - dsg->offset); + dsg = sg_next(dsg); + if (!dsg) { + kunmap_atomic(paddr - psg->offset); + return; + } + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; + offset = block_size - avail; + crc = crc_t10dif_update(crc, daddr, offset); + } else { + offset += block_size; + } + + sdt->guard_tag = cpu_to_be16(crc); if (cmd->prot_type == TARGET_DIF_TYPE1_PROT) sdt->ref_tag = cpu_to_be32(sector & 0xffffffff); sdt->app_tag = 0; @@ -1216,26 +1239,23 @@ sbc_dif_generate(struct se_cmd *cmd) be32_to_cpu(sdt->ref_tag)); sector++; - offset += sizeof(struct se_dif_v1_tuple); } - kunmap_atomic(paddr); - kunmap_atomic(daddr); + kunmap_atomic(daddr - dsg->offset); + kunmap_atomic(paddr - psg->offset); } } static sense_reason_t sbc_dif_v1_verify(struct se_cmd *cmd, struct se_dif_v1_tuple *sdt, - const void *p, sector_t sector, unsigned int ei_lba) + __u16 crc, sector_t sector, unsigned int ei_lba) { - struct se_device *dev = cmd->se_dev; - int block_size = dev->dev_attrib.block_size; __be16 csum; if (!(cmd->prot_checks & TARGET_DIF_CHECK_GUARD)) goto check_ref; - csum = cpu_to_be16(crc_t10dif(p, block_size)); + csum = cpu_to_be16(crc); if (sdt->guard_tag != csum) { pr_err("DIFv1 checksum failed on sector %llu guard tag 0x%04x" @@ -1318,26 +1338,36 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors, { struct se_device *dev = cmd->se_dev; struct se_dif_v1_tuple *sdt; - struct scatterlist *dsg; + struct scatterlist *dsg = cmd->t_data_sg; sector_t sector = start; void *daddr, *paddr; - int i, j; + int i; sense_reason_t rc; + int dsg_off = 0; + unsigned int block_size = dev->dev_attrib.block_size; - for_each_sg(cmd->t_data_sg, dsg, cmd->t_data_nents, i) { - daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; + for (; psg && sector < start + sectors; psg = sg_next(psg)) { paddr = kmap_atomic(sg_page(psg)) + psg->offset; + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; - for (j = 0; j < dsg->length; j += dev->dev_attrib.block_size) { + for (i = psg_off; i < psg->length && + sector < start + sectors; + i += sizeof(struct se_dif_v1_tuple)) { + __u16 crc; + unsigned int avail; - if (psg_off >= psg->length) { - kunmap_atomic(paddr - psg->offset); - psg = sg_next(psg); - paddr = kmap_atomic(sg_page(psg)) + psg->offset; - psg_off = 0; + if (dsg_off >= dsg->length) { + dsg_off -= dsg->length; + kunmap_atomic(daddr - dsg->offset); + dsg = sg_next(dsg); + if (!dsg) { + kunmap_atomic(paddr - psg->offset); + return 0; + } + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; } - sdt = paddr + psg_off; + sdt = paddr + i; pr_debug("DIF READ sector: %llu guard_tag: 0x%04x" " app_tag: 0x%04x ref_tag: %u\n", @@ -1345,27 +1375,41 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors, sdt->app_tag, be32_to_cpu(sdt->ref_tag)); if (sdt->app_tag == cpu_to_be16(0xffff)) { - sector++; - psg_off += sizeof(struct se_dif_v1_tuple); - continue; + dsg_off += block_size; + goto next; } - rc = sbc_dif_v1_verify(cmd, sdt, daddr + j, sector, - ei_lba); - if (rc) { - kunmap_atomic(paddr - psg->offset); + avail = min(block_size, dsg->length - dsg_off); + crc = crc_t10dif(daddr + dsg_off, avail); + if (avail < block_size) { kunmap_atomic(daddr - dsg->offset); + dsg = sg_next(dsg); + if (!dsg) { + kunmap_atomic(paddr - psg->offset); + return 0; + } + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; + dsg_off = block_size - avail; + crc = crc_t10dif_update(crc, daddr, dsg_off); + } else { + dsg_off += block_size; + } + + rc = sbc_dif_v1_verify(cmd, sdt, crc, sector, ei_lba); + if (rc) { + kunmap_atomic(daddr - dsg->offset); + kunmap_atomic(paddr - psg->offset); cmd->bad_sector = sector; return rc; } - +next: sector++; ei_lba++; - psg_off += sizeof(struct se_dif_v1_tuple); } - kunmap_atomic(paddr - psg->offset); + psg_off = 0; kunmap_atomic(daddr - dsg->offset); + kunmap_atomic(paddr - psg->offset); } return 0; From 2b6eb609b8fab75707ac3557d195a5a99037e39b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 May 2015 17:47:53 +0200 Subject: [PATCH 054/839] target/iscsi: stop using se_tpg_fabric_ptr Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_configfs.c | 43 +++++++------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 643024c1ee8d..03b2e15c1b6c 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1789,69 +1789,58 @@ static void lio_aborted_task(struct se_cmd *se_cmd) cmd->conn->conn_transport->iscsit_aborted_task(cmd->conn, cmd); } +static inline struct iscsi_portal_group *iscsi_tpg(struct se_portal_group *se_tpg) +{ + return container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg); +} + static char *lio_tpg_get_endpoint_wwn(struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return &tpg->tpg_tiqn->tiqn[0]; + return iscsi_tpg(se_tpg)->tpg_tiqn->tiqn; } static u16 lio_tpg_get_tag(struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->tpgt; + return iscsi_tpg(se_tpg)->tpgt; } static u32 lio_tpg_get_default_depth(struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->tpg_attrib.default_cmdsn_depth; + return iscsi_tpg(se_tpg)->tpg_attrib.default_cmdsn_depth; } static int lio_tpg_check_demo_mode(struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->tpg_attrib.generate_node_acls; + return iscsi_tpg(se_tpg)->tpg_attrib.generate_node_acls; } static int lio_tpg_check_demo_mode_cache(struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->tpg_attrib.cache_dynamic_acls; + return iscsi_tpg(se_tpg)->tpg_attrib.cache_dynamic_acls; } static int lio_tpg_check_demo_mode_write_protect( struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->tpg_attrib.demo_mode_write_protect; + return iscsi_tpg(se_tpg)->tpg_attrib.demo_mode_write_protect; } static int lio_tpg_check_prod_mode_write_protect( struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->tpg_attrib.prod_mode_write_protect; + return iscsi_tpg(se_tpg)->tpg_attrib.prod_mode_write_protect; } static int lio_tpg_check_prot_fabric_only( struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; /* * Only report fabric_prot_type if t10_pi has also been enabled * for incoming ib_isert sessions. */ - if (!tpg->tpg_attrib.t10_pi) + if (!iscsi_tpg(se_tpg)->tpg_attrib.t10_pi) return 0; - - return tpg->tpg_attrib.fabric_prot_type; + return iscsi_tpg(se_tpg)->tpg_attrib.fabric_prot_type; } /* @@ -1896,9 +1885,7 @@ static void lio_tpg_close_session(struct se_session *se_sess) static u32 lio_tpg_get_inst_index(struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->tpg_tiqn->tiqn_index; + return iscsi_tpg(se_tpg)->tpg_tiqn->tiqn_index; } static void lio_set_default_node_attributes(struct se_node_acl *se_acl) From 1667a459c382d4a90d05b0e8feab2d0b4ff27667 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 May 2015 17:47:54 +0200 Subject: [PATCH 055/839] tcm_loop: stop using se_tpg_fabric_ptr Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/loopback/tcm_loop.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 5a71c9f2ee66..9436bdfef091 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -520,10 +520,14 @@ static char *tcm_loop_get_fabric_name(void) return "loopback"; } +static inline struct tcm_loop_tpg *tl_tpg(struct se_portal_group *se_tpg) +{ + return container_of(se_tpg, struct tcm_loop_tpg, tl_se_tpg); +} + static u8 tcm_loop_get_fabric_proto_ident(struct se_portal_group *se_tpg) { - struct tcm_loop_tpg *tl_tpg = se_tpg->se_tpg_fabric_ptr; - struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba; + struct tcm_loop_hba *tl_hba = tl_tpg(se_tpg)->tl_hba; /* * tl_proto_id is set at tcm_loop_configfs.c:tcm_loop_make_scsi_hba() * time based on the protocol dependent prefix of the passed configfs group. @@ -549,21 +553,19 @@ static u8 tcm_loop_get_fabric_proto_ident(struct se_portal_group *se_tpg) static char *tcm_loop_get_endpoint_wwn(struct se_portal_group *se_tpg) { - struct tcm_loop_tpg *tl_tpg = se_tpg->se_tpg_fabric_ptr; /* * Return the passed NAA identifier for the SAS Target Port */ - return &tl_tpg->tl_hba->tl_wwn_address[0]; + return &tl_tpg(se_tpg)->tl_hba->tl_wwn_address[0]; } static u16 tcm_loop_get_tag(struct se_portal_group *se_tpg) { - struct tcm_loop_tpg *tl_tpg = se_tpg->se_tpg_fabric_ptr; /* * This Tag is used when forming SCSI Name identifier in EVPD=1 0x83 * to represent the SCSI Target Port. */ - return tl_tpg->tl_tpgt; + return tl_tpg(se_tpg)->tl_tpgt; } static u32 tcm_loop_get_pr_transport_id( @@ -573,8 +575,7 @@ static u32 tcm_loop_get_pr_transport_id( int *format_code, unsigned char *buf) { - struct tcm_loop_tpg *tl_tpg = se_tpg->se_tpg_fabric_ptr; - struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba; + struct tcm_loop_hba *tl_hba = tl_tpg(se_tpg)->tl_hba; switch (tl_hba->tl_proto_id) { case SCSI_PROTOCOL_SAS: @@ -602,8 +603,7 @@ static u32 tcm_loop_get_pr_transport_id_len( struct t10_pr_registration *pr_reg, int *format_code) { - struct tcm_loop_tpg *tl_tpg = se_tpg->se_tpg_fabric_ptr; - struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba; + struct tcm_loop_hba *tl_hba = tl_tpg(se_tpg)->tl_hba; switch (tl_hba->tl_proto_id) { case SCSI_PROTOCOL_SAS: @@ -635,8 +635,7 @@ static char *tcm_loop_parse_pr_out_transport_id( u32 *out_tid_len, char **port_nexus_ptr) { - struct tcm_loop_tpg *tl_tpg = se_tpg->se_tpg_fabric_ptr; - struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba; + struct tcm_loop_hba *tl_hba = tl_tpg(se_tpg)->tl_hba; switch (tl_hba->tl_proto_id) { case SCSI_PROTOCOL_SAS: From 3868e4365f1735698df373733c9d99d7b9a688e8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 May 2015 17:47:55 +0200 Subject: [PATCH 056/839] tcm_fc: stop using se_tpg_fabric_ptr Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/tcm_fc/tfc_conf.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 8da159352315..4bdf7a25ae67 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -409,6 +409,11 @@ static struct configfs_attribute *ft_wwn_attrs[] = { NULL, }; +static inline struct ft_tpg *ft_tpg(struct se_portal_group *se_tpg) +{ + return container_of(se_tpg, struct ft_tpg, se_tpg); +} + static char *ft_get_fabric_name(void) { return "fc"; @@ -416,20 +421,16 @@ static char *ft_get_fabric_name(void) static char *ft_get_fabric_wwn(struct se_portal_group *se_tpg) { - struct ft_tpg *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->lport_wwn->name; + return ft_tpg(se_tpg)->lport_wwn->name; } static u16 ft_get_tag(struct se_portal_group *se_tpg) { - struct ft_tpg *tpg = se_tpg->se_tpg_fabric_ptr; - /* * This tag is used when forming SCSI Name identifier in EVPD=1 0x83 * to represent the SCSI Target Port. */ - return tpg->index; + return ft_tpg(se_tpg)->index; } static int ft_check_false(struct se_portal_group *se_tpg) @@ -443,9 +444,7 @@ static void ft_set_default_node_attr(struct se_node_acl *se_nacl) static u32 ft_tpg_get_inst_index(struct se_portal_group *se_tpg) { - struct ft_tpg *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->index; + return ft_tpg(se_tpg)->index; } static const struct target_core_fabric_ops ft_fabric_ops = { From e4aae5af810eaa61c2cd7ba79d95ebfe0d88fe9b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 May 2015 17:47:56 +0200 Subject: [PATCH 057/839] target: change core_tpg_register prototype Remove the unneeded fabric_ptr argument, and change the type argument to pass in a SPC protocol identifier. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- Documentation/target/tcm_mod_builder.py | 3 +- drivers/infiniband/ulp/srpt/ib_srpt.c | 2 +- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 8 ++--- drivers/target/iscsi/iscsi_target_configfs.c | 2 +- drivers/target/iscsi/iscsi_target_tpg.c | 3 +- drivers/target/loopback/tcm_loop.c | 4 +-- drivers/target/sbp/sbp_target.c | 3 +- drivers/target/target_core_tpg.c | 31 ++++++++------------ drivers/target/tcm_fc/tfc_conf.c | 2 +- drivers/usb/gadget/legacy/tcm_usb_gadget.c | 4 +-- drivers/vhost/scsi.c | 4 +-- drivers/xen/xen-scsiback.c | 4 +-- include/target/target_core_base.h | 16 ++++------ include/target/target_core_fabric.h | 2 +- 14 files changed, 38 insertions(+), 50 deletions(-) diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 07e7ef3d7429..b04846e985d4 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -238,8 +238,7 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " tpg->" + fabric_mod_port + " = " + fabric_mod_port + ";\n" buf += " tpg->" + fabric_mod_port + "_tpgt = tpgt;\n\n" buf += " ret = core_tpg_register(&" + fabric_mod_name + "_ops, wwn,\n" - buf += " &tpg->se_tpg, tpg,\n" - buf += " TRANSPORT_TPG_TYPE_NORMAL);\n" + buf += " &tpg->se_tpg, SCSI_PROTOCOL_SAS);\n" buf += " if (ret < 0) {\n" buf += " kfree(tpg);\n" buf += " return NULL;\n" diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 38e51f4a17de..8eed6089c5d7 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3791,7 +3791,7 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, /* Initialize sport->port_wwn and sport->port_tpg_1 */ res = core_tpg_register(&srpt_template, &sport->port_wwn, - &sport->port_tpg_1, sport, TRANSPORT_TPG_TYPE_NORMAL); + &sport->port_tpg_1, SCSI_PROTOCOL_SRP); if (res) return ERR_PTR(res); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index cb376e5198d0..30cbfa6dc3e6 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1113,8 +1113,8 @@ static struct se_portal_group *tcm_qla2xxx_make_tpg( tpg->tpg_attrib.cache_dynamic_acls = 1; tpg->tpg_attrib.demo_mode_login_only = 1; - ret = core_tpg_register(&tcm_qla2xxx_ops, wwn, - &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(&tcm_qla2xxx_ops, wwn, &tpg->se_tpg, + SCSI_PROTOCOL_FCP); if (ret < 0) { kfree(tpg); return NULL; @@ -1233,8 +1233,8 @@ static struct se_portal_group *tcm_qla2xxx_npiv_make_tpg( tpg->tpg_attrib.cache_dynamic_acls = 1; tpg->tpg_attrib.demo_mode_login_only = 1; - ret = core_tpg_register(&tcm_qla2xxx_npiv_ops, wwn, - &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(&tcm_qla2xxx_npiv_ops, wwn, &tpg->se_tpg, + SCSI_PROTOCOL_FCP); if (ret < 0) { kfree(tpg); return NULL; diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 03b2e15c1b6c..0d5d88817a47 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1421,7 +1421,7 @@ static struct se_portal_group *lio_target_tiqn_addtpg( return NULL; ret = core_tpg_register(&iscsi_ops, wwn, &tpg->tpg_se_tpg, - tpg, TRANSPORT_TPG_TYPE_NORMAL); + SCSI_PROTOCOL_ISCSI); if (ret < 0) return NULL; diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 5e3295fe404d..dcb7ede1d4aa 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -68,8 +68,7 @@ int iscsit_load_discovery_tpg(void) return -1; } - ret = core_tpg_register(&iscsi_ops, NULL, &tpg->tpg_se_tpg, - tpg, TRANSPORT_TPG_TYPE_DISCOVERY); + ret = core_tpg_register(&iscsi_ops, NULL, &tpg->tpg_se_tpg, -1); if (ret < 0) { kfree(tpg); return -1; diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 9436bdfef091..3f264d436737 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -1204,8 +1204,8 @@ static struct se_portal_group *tcm_loop_make_naa_tpg( /* * Register the tl_tpg as a emulated SAS TCM Target Endpoint */ - ret = core_tpg_register(&loop_ops, wwn, &tl_tpg->tl_se_tpg, tl_tpg, - TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(&loop_ops, wwn, &tl_tpg->tl_se_tpg, + tl_hba->tl_proto_id); if (ret < 0) return ERR_PTR(-ENOMEM); diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 5df2a61714fb..40b9f516cf9b 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -2152,8 +2152,7 @@ static struct se_portal_group *sbp_make_tpg( goto out_free_tpg; } - ret = core_tpg_register(&sbp_ops, wwn, &tpg->se_tpg, tpg, - TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(&sbp_ops, wwn, &tpg->se_tpg, SCSI_PROTOCOL_SBP); if (ret < 0) goto out_unreg_mgt_agt; diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 42f3bd9561c8..c0c1f67facb5 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -634,8 +634,7 @@ int core_tpg_register( const struct target_core_fabric_ops *tfo, struct se_wwn *se_wwn, struct se_portal_group *se_tpg, - void *tpg_fabric_ptr, - int se_tpg_type) + int proto_id) { struct se_lun *lun; u32 i; @@ -661,8 +660,7 @@ int core_tpg_register( init_completion(&lun->lun_ref_comp); } - se_tpg->se_tpg_type = se_tpg_type; - se_tpg->se_tpg_fabric_ptr = tpg_fabric_ptr; + se_tpg->proto_id = proto_id; se_tpg->se_tpg_tfo = tfo; se_tpg->se_tpg_wwn = se_wwn; atomic_set(&se_tpg->tpg_pr_ref_count, 0); @@ -673,7 +671,7 @@ int core_tpg_register( spin_lock_init(&se_tpg->session_lock); spin_lock_init(&se_tpg->tpg_lun_lock); - if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) { + if (se_tpg->proto_id >= 0) { if (core_tpg_setup_virtual_lun0(se_tpg) < 0) { array_free(se_tpg->tpg_lun_list, TRANSPORT_MAX_LUNS_PER_TPG); @@ -685,11 +683,10 @@ int core_tpg_register( list_add_tail(&se_tpg->se_tpg_node, &tpg_list); spin_unlock_bh(&tpg_lock); - pr_debug("TARGET_CORE[%s]: Allocated %s struct se_portal_group for" - " endpoint: %s, Portal Tag: %u\n", tfo->get_fabric_name(), - (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) ? - "Normal" : "Discovery", (tfo->tpg_get_wwn(se_tpg) == NULL) ? - "None" : tfo->tpg_get_wwn(se_tpg), tfo->tpg_get_tag(se_tpg)); + pr_debug("TARGET_CORE[%s]: Allocated portal_group for endpoint: %s, " + "Proto: %d, Portal Tag: %u\n", tfo->get_fabric_name(), + tfo->tpg_get_wwn(se_tpg) ? tfo->tpg_get_wwn(se_tpg) : NULL, + se_tpg->proto_id, tfo->tpg_get_tag(se_tpg)); return 0; } @@ -697,14 +694,13 @@ EXPORT_SYMBOL(core_tpg_register); int core_tpg_deregister(struct se_portal_group *se_tpg) { + const struct target_core_fabric_ops *tfo = se_tpg->se_tpg_tfo; struct se_node_acl *nacl, *nacl_tmp; - pr_debug("TARGET_CORE[%s]: Deallocating %s struct se_portal_group" - " for endpoint: %s Portal Tag %u\n", - (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) ? - "Normal" : "Discovery", se_tpg->se_tpg_tfo->get_fabric_name(), - se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg), - se_tpg->se_tpg_tfo->tpg_get_tag(se_tpg)); + pr_debug("TARGET_CORE[%s]: Deallocating portal_group for endpoint: %s, " + "Proto: %d, Portal Tag: %u\n", tfo->get_fabric_name(), + tfo->tpg_get_wwn(se_tpg) ? tfo->tpg_get_wwn(se_tpg) : NULL, + se_tpg->proto_id, tfo->tpg_get_tag(se_tpg)); spin_lock_bh(&tpg_lock); list_del(&se_tpg->se_tpg_node); @@ -732,10 +728,9 @@ int core_tpg_deregister(struct se_portal_group *se_tpg) } spin_unlock_irq(&se_tpg->acl_node_lock); - if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) + if (se_tpg->proto_id >= 0) core_tpg_remove_lun(se_tpg, &se_tpg->tpg_virt_lun0); - se_tpg->se_tpg_fabric_ptr = NULL; array_free(se_tpg->tpg_lun_list, TRANSPORT_MAX_LUNS_PER_TPG); return 0; } diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 4bdf7a25ae67..8309c3d91387 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -284,7 +284,7 @@ static struct se_portal_group *ft_add_tpg( } ret = core_tpg_register(&ft_fabric_ops, wwn, &tpg->se_tpg, - tpg, TRANSPORT_TPG_TYPE_NORMAL); + SCSI_PROTOCOL_FCP); if (ret < 0) { destroy_workqueue(wq); kfree(tpg); diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c index 82f4da8bfe41..08b4f48aa49e 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c @@ -1508,8 +1508,8 @@ static struct se_portal_group *usbg_make_tpg( tpg->tport = tport; tpg->tport_tpgt = tpgt; - ret = core_tpg_register(&usbg_ops, wwn, &tpg->se_tpg, tpg, - TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(&usbg_ops, wwn, &tpg->se_tpg, + tport->tport_proto_id); if (ret < 0) { destroy_workqueue(tpg->workqueue); kfree(tpg); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index b93c03935964..8295e7be0fcb 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -2119,8 +2119,8 @@ vhost_scsi_make_tpg(struct se_wwn *wwn, tpg->tport = tport; tpg->tport_tpgt = tpgt; - ret = core_tpg_register(&vhost_scsi_ops, wwn, - &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(&vhost_scsi_ops, wwn, &tpg->se_tpg, + tport->tport_proto_id); if (ret < 0) { kfree(tpg); return NULL; diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 3756953b385b..223d493878eb 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1883,8 +1883,8 @@ scsiback_make_tpg(struct se_wwn *wwn, tpg->tport = tport; tpg->tport_tpgt = tpgt; - ret = core_tpg_register(&scsiback_ops, wwn, - &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(&scsiback_ops, wwn, &tpg->se_tpg, + tport->tport_proto_id); if (ret < 0) { kfree(tpg); return NULL; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 9f8f5a1b527d..c462fb0a47f4 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -125,12 +125,6 @@ enum transport_lun_status_table { TRANSPORT_LUN_STATUS_ACTIVE = 1, }; -/* struct se_portal_group->se_tpg_type */ -enum transport_tpg_type_table { - TRANSPORT_TPG_TYPE_NORMAL = 0, - TRANSPORT_TPG_TYPE_DISCOVERY = 1, -}; - /* Special transport agnostic struct se_cmd->t_states */ enum transport_state_table { TRANSPORT_NO_STATE = 0, @@ -864,8 +858,12 @@ struct se_tpg_np { }; struct se_portal_group { - /* Type of target portal group, see transport_tpg_type_table */ - enum transport_tpg_type_table se_tpg_type; + /* + * PROTOCOL IDENTIFIER value per SPC4, 7.5.1. + * + * Negative values can be used by fabric drivers for internal use TPGs. + */ + int proto_id; /* Number of ACLed Initiator Nodes for this TPG */ u32 num_node_acls; /* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */ @@ -875,8 +873,6 @@ struct se_portal_group { /* Spinlock for adding/removing sessions */ spinlock_t session_lock; spinlock_t tpg_lun_lock; - /* Pointer to $FABRIC_MOD portal group */ - void *se_tpg_fabric_ptr; struct list_head se_tpg_node; /* linked list for initiator ACL list */ struct list_head acl_node_list; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index e5414744bf2d..e0adc141de07 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -175,7 +175,7 @@ int core_tpg_set_initiator_node_queue_depth(struct se_portal_group *, int core_tpg_set_initiator_node_tag(struct se_portal_group *, struct se_node_acl *, const char *); int core_tpg_register(const struct target_core_fabric_ops *, - struct se_wwn *, struct se_portal_group *, void *, int); + struct se_wwn *, struct se_portal_group *, int); int core_tpg_deregister(struct se_portal_group *); /* SAS helpers */ From 2aeeafae6bb9f04dbe17b521bcd8f0d03516c393 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 May 2015 17:47:57 +0200 Subject: [PATCH 058/839] target: remove the get_fabric_proto_ident method Now that we store the protocol identifier in the tpg structure we don't need this method. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- Documentation/target/tcm_mod_builder.py | 31 -------------------- drivers/infiniband/ulp/srpt/ib_srpt.c | 6 ---- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 19 ------------ drivers/target/iscsi/iscsi_target_configfs.c | 1 - drivers/target/loopback/tcm_loop.c | 27 ----------------- drivers/target/sbp/sbp_target.c | 13 -------- drivers/target/target_core_configfs.c | 4 --- drivers/target/target_core_fabric_lib.c | 27 ----------------- drivers/target/target_core_pr.c | 12 ++++---- drivers/target/target_core_spc.c | 12 +++----- drivers/target/tcm_fc/tfc_conf.c | 1 - drivers/usb/gadget/legacy/tcm_usb_gadget.c | 18 ------------ drivers/vhost/scsi.c | 23 --------------- drivers/xen/xen-scsiback.c | 23 --------------- include/target/target_core_fabric.h | 4 --- 15 files changed, 9 insertions(+), 212 deletions(-) diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index b04846e985d4..29176c29537c 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -300,9 +300,7 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "static const struct target_core_fabric_ops " + fabric_mod_name + "_ops = {\n" buf += " .module = THIS_MODULE,\n" buf += " .name = " + fabric_mod_name + ",\n" - buf += " .get_fabric_proto_ident = " + fabric_mod_name + "_get_fabric_proto_ident,\n" buf += " .get_fabric_name = " + fabric_mod_name + "_get_fabric_name,\n" - buf += " .get_fabric_proto_ident = " + fabric_mod_name + "_get_fabric_proto_ident,\n" buf += " .tpg_get_wwn = " + fabric_mod_name + "_get_fabric_wwn,\n" buf += " .tpg_get_tag = " + fabric_mod_name + "_get_tag,\n" buf += " .tpg_get_pr_transport_id = " + fabric_mod_name + "_get_pr_transport_id,\n" @@ -461,35 +459,6 @@ def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name): bufi += "char *" + fabric_mod_name + "_get_fabric_name(void);\n" continue - if re.search('get_fabric_proto_ident', fo): - buf += "u8 " + fabric_mod_name + "_get_fabric_proto_ident(struct se_portal_group *se_tpg)\n" - buf += "{\n" - buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n" - buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n" - buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + " = tpg->" + fabric_mod_port + ";\n" - buf += " u8 proto_id;\n\n" - buf += " switch (" + fabric_mod_port + "->" + fabric_mod_port + "_proto_id) {\n" - if proto_ident == "FC": - buf += " case SCSI_PROTOCOL_FCP:\n" - buf += " default:\n" - buf += " proto_id = fc_get_fabric_proto_ident(se_tpg);\n" - buf += " break;\n" - elif proto_ident == "SAS": - buf += " case SCSI_PROTOCOL_SAS:\n" - buf += " default:\n" - buf += " proto_id = sas_get_fabric_proto_ident(se_tpg);\n" - buf += " break;\n" - elif proto_ident == "iSCSI": - buf += " case SCSI_PROTOCOL_ISCSI:\n" - buf += " default:\n" - buf += " proto_id = iscsi_get_fabric_proto_ident(se_tpg);\n" - buf += " break;\n" - - buf += " }\n\n" - buf += " return proto_id;\n" - buf += "}\n\n" - bufi += "u8 " + fabric_mod_name + "_get_fabric_proto_ident(struct se_portal_group *);\n" - if re.search('get_wwn', fo): buf += "char *" + fabric_mod_name + "_get_fabric_wwn(struct se_portal_group *se_tpg)\n" buf += "{\n" diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 8eed6089c5d7..9213c2de28fc 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3394,11 +3394,6 @@ static char *srpt_get_fabric_name(void) return "srpt"; } -static u8 srpt_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - return SCSI_TRANSPORTID_PROTOCOLID_SRP; -} - static char *srpt_get_fabric_wwn(struct se_portal_group *tpg) { struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1); @@ -3863,7 +3858,6 @@ static const struct target_core_fabric_ops srpt_template = { .name = "srpt", .node_acl_size = sizeof(struct srpt_node_acl), .get_fabric_name = srpt_get_fabric_name, - .get_fabric_proto_ident = srpt_get_fabric_proto_ident, .tpg_get_wwn = srpt_get_fabric_wwn, .tpg_get_tag = srpt_get_tag, .tpg_get_pr_transport_id = srpt_get_pr_transport_id, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 30cbfa6dc3e6..4566c4649751 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -190,23 +190,6 @@ static char *tcm_qla2xxx_npiv_get_fabric_name(void) return "qla2xxx_npiv"; } -static u8 tcm_qla2xxx_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, - struct tcm_qla2xxx_tpg, se_tpg); - struct tcm_qla2xxx_lport *lport = tpg->lport; - u8 proto_id; - - switch (lport->lport_proto_id) { - case SCSI_PROTOCOL_FCP: - default: - proto_id = fc_get_fabric_proto_ident(se_tpg); - break; - } - - return proto_id; -} - static char *tcm_qla2xxx_get_fabric_wwn(struct se_portal_group *se_tpg) { struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, @@ -1928,7 +1911,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { .name = "qla2xxx", .node_acl_size = sizeof(struct tcm_qla2xxx_nacl), .get_fabric_name = tcm_qla2xxx_get_fabric_name, - .get_fabric_proto_ident = tcm_qla2xxx_get_fabric_proto_ident, .tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn, .tpg_get_tag = tcm_qla2xxx_get_tag, .tpg_get_pr_transport_id = tcm_qla2xxx_get_pr_transport_id, @@ -1979,7 +1961,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .name = "qla2xxx_npiv", .node_acl_size = sizeof(struct tcm_qla2xxx_nacl), .get_fabric_name = tcm_qla2xxx_npiv_get_fabric_name, - .get_fabric_proto_ident = tcm_qla2xxx_get_fabric_proto_ident, .tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn, .tpg_get_tag = tcm_qla2xxx_get_tag, .tpg_get_pr_transport_id = tcm_qla2xxx_get_pr_transport_id, diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 0d5d88817a47..9dec9f39139f 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1918,7 +1918,6 @@ const struct target_core_fabric_ops iscsi_ops = { .name = "iscsi", .node_acl_size = sizeof(struct iscsi_node_acl), .get_fabric_name = iscsi_get_fabric_name, - .get_fabric_proto_ident = iscsi_get_fabric_proto_ident, .tpg_get_wwn = lio_tpg_get_endpoint_wwn, .tpg_get_tag = lio_tpg_get_tag, .tpg_get_default_depth = lio_tpg_get_default_depth, diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 3f264d436737..0eed0209a7f4 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -525,32 +525,6 @@ static inline struct tcm_loop_tpg *tl_tpg(struct se_portal_group *se_tpg) return container_of(se_tpg, struct tcm_loop_tpg, tl_se_tpg); } -static u8 tcm_loop_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - struct tcm_loop_hba *tl_hba = tl_tpg(se_tpg)->tl_hba; - /* - * tl_proto_id is set at tcm_loop_configfs.c:tcm_loop_make_scsi_hba() - * time based on the protocol dependent prefix of the passed configfs group. - * - * Based upon tl_proto_id, TCM_Loop emulates the requested fabric - * ProtocolID using target_core_fabric_lib.c symbols. - */ - switch (tl_hba->tl_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_fabric_proto_ident(se_tpg); - case SCSI_PROTOCOL_FCP: - return fc_get_fabric_proto_ident(se_tpg); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_fabric_proto_ident(se_tpg); - default: - pr_err("Unknown tl_proto_id: 0x%02x, using" - " SAS emulation\n", tl_hba->tl_proto_id); - break; - } - - return sas_get_fabric_proto_ident(se_tpg); -} - static char *tcm_loop_get_endpoint_wwn(struct se_portal_group *se_tpg) { /* @@ -1356,7 +1330,6 @@ static const struct target_core_fabric_ops loop_ops = { .module = THIS_MODULE, .name = "loopback", .get_fabric_name = tcm_loop_get_fabric_name, - .get_fabric_proto_ident = tcm_loop_get_fabric_proto_ident, .tpg_get_wwn = tcm_loop_get_endpoint_wwn, .tpg_get_tag = tcm_loop_get_tag, .tpg_get_pr_transport_id = tcm_loop_get_pr_transport_id, diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 40b9f516cf9b..8acb37fd9ebc 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -1832,18 +1832,6 @@ static int sbp_check_stop_free(struct se_cmd *se_cmd) return 1; } -/* - * Handlers for Serial Bus Protocol 2/3 (SBP-2 / SBP-3) - */ -static u8 sbp_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - /* - * Return a IEEE 1394 SCSI Protocol identifier for loopback operations - * This is defined in section 7.5.1 Table 362 in spc4r17 - */ - return SCSI_PROTOCOL_SBP; -} - static u32 sbp_get_pr_transport_id( struct se_portal_group *se_tpg, struct se_node_acl *se_nacl, @@ -2442,7 +2430,6 @@ static const struct target_core_fabric_ops sbp_ops = { .module = THIS_MODULE, .name = "sbp", .get_fabric_name = sbp_get_fabric_name, - .get_fabric_proto_ident = sbp_get_fabric_proto_ident, .tpg_get_wwn = sbp_get_fabric_wwn, .tpg_get_tag = sbp_get_tag, .tpg_get_pr_transport_id = sbp_get_pr_transport_id, diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 2b10499264d3..21c9f7d79d5e 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -318,10 +318,6 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo) pr_err("Missing tfo->get_fabric_name()\n"); return -EINVAL; } - if (!tfo->get_fabric_proto_ident) { - pr_err("Missing tfo->get_fabric_proto_ident()\n"); - return -EINVAL; - } if (!tfo->tpg_get_wwn) { pr_err("Missing tfo->tpg_get_wwn()\n"); return -EINVAL; diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 35bfe77160d8..6fed14adbe61 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -42,16 +42,6 @@ /* * Handlers for Serial Attached SCSI (SAS) */ -u8 sas_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - /* - * Return a SAS Serial SCSI Protocol identifier for loopback operations - * This is defined in section 7.5.1 Table 362 in spc4r17 - */ - return 0x6; -} -EXPORT_SYMBOL(sas_get_fabric_proto_ident); - u32 sas_get_pr_transport_id( struct se_portal_group *se_tpg, struct se_node_acl *se_nacl, @@ -128,12 +118,6 @@ EXPORT_SYMBOL(sas_parse_pr_out_transport_id); /* * Handlers for Fibre Channel Protocol (FCP) */ -u8 fc_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - return 0x0; /* 0 = fcp-2 per SPC4 section 7.5.1 */ -} -EXPORT_SYMBOL(fc_get_fabric_proto_ident); - u32 fc_get_pr_transport_id_len( struct se_portal_group *se_tpg, struct se_node_acl *se_nacl, @@ -208,17 +192,6 @@ EXPORT_SYMBOL(fc_parse_pr_out_transport_id); /* * Handlers for Internet Small Computer Systems Interface (iSCSI) */ - -u8 iscsi_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - /* - * This value is defined for "Internet SCSI (iSCSI)" - * in spc4r17 section 7.5.1 Table 362 - */ - return 0x5; -} -EXPORT_SYMBOL(iscsi_get_fabric_proto_ident); - u32 iscsi_get_pr_transport_id( struct se_portal_group *se_tpg, struct se_node_acl *se_nacl, diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index a15411c79ae9..23c065f83a56 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1447,7 +1447,7 @@ core_scsi3_decode_spec_i_port( struct pr_transport_id_holder *tidh_new, *tidh, *tidh_tmp; const struct target_core_fabric_ops *tmp_tf_ops; unsigned char *buf; - unsigned char *ptr, *i_str = NULL, proto_ident, tmp_proto_ident; + unsigned char *ptr, *i_str = NULL, proto_ident; char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN]; sense_reason_t ret; u32 tpdl, tid_len = 0; @@ -1536,15 +1536,13 @@ core_scsi3_decode_spec_i_port( tmp_tf_ops = tmp_tpg->se_tpg_tfo; if (!tmp_tf_ops) continue; - if (!tmp_tf_ops->get_fabric_proto_ident || - !tmp_tf_ops->tpg_parse_pr_out_transport_id) + if (!tmp_tf_ops->tpg_parse_pr_out_transport_id) continue; /* * Look for the matching proto_ident provided by * the received TransportID */ - tmp_proto_ident = tmp_tf_ops->get_fabric_proto_ident(tmp_tpg); - if (tmp_proto_ident != proto_ident) + if (tmp_tpg->proto_id != proto_ident) continue; dest_rtpi = tmp_port->sep_rtpi; @@ -3230,11 +3228,11 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, pr_debug("SPC-3 PR REGISTER_AND_MOVE: Extracted Protocol Identifier:" " 0x%02x\n", proto_ident); - if (proto_ident != dest_tf_ops->get_fabric_proto_ident(dest_se_tpg)) { + if (proto_ident != dest_se_tpg->proto_id) { pr_err("SPC-3 PR REGISTER_AND_MOVE: Received" " proto_ident: 0x%02x does not match ident: 0x%02x" " from fabric: %s\n", proto_ident, - dest_tf_ops->get_fabric_proto_ident(dest_se_tpg), + dest_se_tpg->proto_id, dest_tf_ops->get_fabric_name()); ret = TCM_INVALID_PARAMETER_LIST; goto out; diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 988c158cf65d..78c0b40fa5c0 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -286,8 +286,7 @@ check_t10_vend_desc: * Get the PROTOCOL IDENTIFIER as defined by spc4r17 * section 7.5.1 Table 362 */ - buf[off] = - (tpg->se_tpg_tfo->get_fabric_proto_ident(tpg) << 4); + buf[off] = tpg->proto_id << 4; buf[off++] |= 0x1; /* CODE SET == Binary */ buf[off] = 0x80; /* Set PIV=1 */ /* Set ASSOCIATION == target port: 01b */ @@ -322,8 +321,7 @@ check_t10_vend_desc: tg_pt_gp_id = tg_pt_gp->tg_pt_gp_id; spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - buf[off] = - (tpg->se_tpg_tfo->get_fabric_proto_ident(tpg) << 4); + buf[off] = tpg->proto_id << 4; buf[off++] |= 0x1; /* CODE SET == Binary */ buf[off] = 0x80; /* Set PIV=1 */ /* Set ASSOCIATION == target port: 01b */ @@ -371,8 +369,7 @@ check_lu_gp: * section 7.5.1 Table 362 */ check_scsi_name: - buf[off] = - (tpg->se_tpg_tfo->get_fabric_proto_ident(tpg) << 4); + buf[off] = tpg->proto_id << 4; buf[off++] |= 0x3; /* CODE SET == UTF-8 */ buf[off] = 0x80; /* Set PIV=1 */ /* Set ASSOCIATION == target port: 01b */ @@ -412,8 +409,7 @@ check_scsi_name: /* * Target device designator */ - buf[off] = - (tpg->se_tpg_tfo->get_fabric_proto_ident(tpg) << 4); + buf[off] = tpg->proto_id << 4; buf[off++] |= 0x3; /* CODE SET == UTF-8 */ buf[off] = 0x80; /* Set PIV=1 */ /* Set ASSOCIATION == target device: 10b */ diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 8309c3d91387..fabc7bacf693 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -452,7 +452,6 @@ static const struct target_core_fabric_ops ft_fabric_ops = { .name = "fc", .node_acl_size = sizeof(struct ft_node_acl), .get_fabric_name = ft_get_fabric_name, - .get_fabric_proto_ident = fc_get_fabric_proto_ident, .tpg_get_wwn = ft_get_fabric_wwn, .tpg_get_tag = ft_get_tag, .tpg_get_pr_transport_id = fc_get_pr_transport_id, diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c index 08b4f48aa49e..d6b03178262f 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c @@ -1274,23 +1274,6 @@ static char *usbg_get_fabric_name(void) return "usb_gadget"; } -static u8 usbg_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - struct usbg_tpg *tpg = container_of(se_tpg, - struct usbg_tpg, se_tpg); - struct usbg_tport *tport = tpg->tport; - u8 proto_id; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - default: - proto_id = sas_get_fabric_proto_ident(se_tpg); - break; - } - - return proto_id; -} - static char *usbg_get_fabric_wwn(struct se_portal_group *se_tpg) { struct usbg_tpg *tpg = container_of(se_tpg, @@ -1803,7 +1786,6 @@ static const struct target_core_fabric_ops usbg_ops = { .module = THIS_MODULE, .name = "usb_gadget", .get_fabric_name = usbg_get_fabric_name, - .get_fabric_proto_ident = usbg_get_fabric_proto_ident, .tpg_get_wwn = usbg_get_fabric_wwn, .tpg_get_tag = usbg_get_tag, .tpg_get_pr_transport_id = usbg_get_pr_transport_id, diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 8295e7be0fcb..022860ccd008 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -288,28 +288,6 @@ static char *vhost_scsi_get_fabric_name(void) return "vhost"; } -static u8 vhost_scsi_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - struct vhost_scsi_tpg *tpg = container_of(se_tpg, - struct vhost_scsi_tpg, se_tpg); - struct vhost_scsi_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_fabric_proto_ident(se_tpg); - case SCSI_PROTOCOL_FCP: - return fc_get_fabric_proto_ident(se_tpg); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_fabric_proto_ident(se_tpg); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using" - " SAS emulation\n", tport->tport_proto_id); - break; - } - - return sas_get_fabric_proto_ident(se_tpg); -} - static char *vhost_scsi_get_fabric_wwn(struct se_portal_group *se_tpg) { struct vhost_scsi_tpg *tpg = container_of(se_tpg, @@ -2244,7 +2222,6 @@ static struct target_core_fabric_ops vhost_scsi_ops = { .module = THIS_MODULE, .name = "vhost", .get_fabric_name = vhost_scsi_get_fabric_name, - .get_fabric_proto_ident = vhost_scsi_get_fabric_proto_ident, .tpg_get_wwn = vhost_scsi_get_fabric_wwn, .tpg_get_tag = vhost_scsi_get_tpgt, .tpg_get_pr_transport_id = vhost_scsi_get_pr_transport_id, diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 223d493878eb..8bf9448bd7fd 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1254,28 +1254,6 @@ static char *scsiback_dump_proto_id(struct scsiback_tport *tport) return "Unknown"; } -static u8 scsiback_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - struct scsiback_tpg *tpg = container_of(se_tpg, - struct scsiback_tpg, se_tpg); - struct scsiback_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_fabric_proto_ident(se_tpg); - case SCSI_PROTOCOL_FCP: - return fc_get_fabric_proto_ident(se_tpg); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_fabric_proto_ident(se_tpg); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n", - tport->tport_proto_id); - break; - } - - return sas_get_fabric_proto_ident(se_tpg); -} - static char *scsiback_get_fabric_wwn(struct se_portal_group *se_tpg) { struct scsiback_tpg *tpg = container_of(se_tpg, @@ -1929,7 +1907,6 @@ static const struct target_core_fabric_ops scsiback_ops = { .module = THIS_MODULE, .name = "xen-pvscsi", .get_fabric_name = scsiback_get_fabric_name, - .get_fabric_proto_ident = scsiback_get_fabric_proto_ident, .tpg_get_wwn = scsiback_get_fabric_wwn, .tpg_get_tag = scsiback_get_tag, .tpg_get_pr_transport_id = scsiback_get_pr_transport_id, diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index e0adc141de07..495606382546 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -6,7 +6,6 @@ struct target_core_fabric_ops { const char *name; size_t node_acl_size; char *(*get_fabric_name)(void); - u8 (*get_fabric_proto_ident)(struct se_portal_group *); char *(*tpg_get_wwn)(struct se_portal_group *); u16 (*tpg_get_tag)(struct se_portal_group *); u32 (*tpg_get_default_depth)(struct se_portal_group *); @@ -179,7 +178,6 @@ int core_tpg_register(const struct target_core_fabric_ops *, int core_tpg_deregister(struct se_portal_group *); /* SAS helpers */ -u8 sas_get_fabric_proto_ident(struct se_portal_group *); u32 sas_get_pr_transport_id(struct se_portal_group *, struct se_node_acl *, struct t10_pr_registration *, int *, unsigned char *); u32 sas_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl *, @@ -188,7 +186,6 @@ char *sas_parse_pr_out_transport_id(struct se_portal_group *, const char *, u32 *, char **); /* FC helpers */ -u8 fc_get_fabric_proto_ident(struct se_portal_group *); u32 fc_get_pr_transport_id(struct se_portal_group *, struct se_node_acl *, struct t10_pr_registration *, int *, unsigned char *); u32 fc_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl *, @@ -197,7 +194,6 @@ char *fc_parse_pr_out_transport_id(struct se_portal_group *, const char *, u32 *, char **); /* iSCSI helpers */ -u8 iscsi_get_fabric_proto_ident(struct se_portal_group *); u32 iscsi_get_pr_transport_id(struct se_portal_group *, struct se_node_acl *, struct t10_pr_registration *, int *, unsigned char *); u32 iscsi_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl *, From 2650d71e244fb3637b5f58a0080682a8bf9c7091 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 May 2015 17:47:58 +0200 Subject: [PATCH 059/839] target: move transport ID handling to the core Now that struct se_portal_group contains a protocol identifier field we can take all the code to format an parse protocol identifiers in CDBs into common code instead of leaving this to low-level drivers. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- Documentation/target/tcm_mod_builder.py | 121 --------- drivers/infiniband/ulp/srpt/ib_srpt.c | 39 --- drivers/infiniband/ulp/srpt/ib_srpt.h | 18 -- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 73 ----- drivers/scsi/qla2xxx/tcm_qla2xxx.h | 2 - drivers/target/iscsi/iscsi_target_configfs.c | 3 - drivers/target/loopback/tcm_loop.c | 92 ------- drivers/target/sbp/sbp_target.c | 70 ----- drivers/target/target_core_configfs.c | 8 - drivers/target/target_core_fabric_lib.c | 263 ++++++++++--------- drivers/target/target_core_internal.h | 9 + drivers/target/target_core_pr.c | 49 ++-- drivers/target/tcm_fc/tfc_conf.c | 3 - drivers/usb/gadget/legacy/tcm_usb_gadget.c | 75 +----- drivers/usb/gadget/legacy/tcm_usb_gadget.h | 2 - drivers/vhost/scsi.c | 94 ------- drivers/xen/xen-scsiback.c | 94 ------- include/target/target_core_fabric.h | 33 --- 18 files changed, 173 insertions(+), 875 deletions(-) diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 29176c29537c..0dbd70cccde1 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -60,8 +60,6 @@ def tcm_mod_build_FC_include(fabric_mod_dir_var, fabric_mod_name): buf += "};\n" buf += "\n" buf += "struct " + fabric_mod_name + "_lport {\n" - buf += " /* SCSI protocol the lport is providing */\n" - buf += " u8 lport_proto_id;\n" buf += " /* Binary World Wide unique Port Name for FC Target Lport */\n" buf += " u64 lport_wwpn;\n" buf += " /* ASCII formatted WWPN for FC Target Lport */\n" @@ -105,8 +103,6 @@ def tcm_mod_build_SAS_include(fabric_mod_dir_var, fabric_mod_name): buf += " struct se_portal_group se_tpg;\n" buf += "};\n\n" buf += "struct " + fabric_mod_name + "_tport {\n" - buf += " /* SCSI protocol the tport is providing */\n" - buf += " u8 tport_proto_id;\n" buf += " /* Binary World Wide unique Port Name for SAS Target port */\n" buf += " u64 tport_wwpn;\n" buf += " /* ASCII formatted WWPN for SAS Target port */\n" @@ -150,8 +146,6 @@ def tcm_mod_build_iSCSI_include(fabric_mod_dir_var, fabric_mod_name): buf += " struct se_portal_group se_tpg;\n" buf += "};\n\n" buf += "struct " + fabric_mod_name + "_tport {\n" - buf += " /* SCSI protocol the tport is providing */\n" - buf += " u8 tport_proto_id;\n" buf += " /* ASCII formatted TargetName for IQN */\n" buf += " char tport_name[" + fabric_mod_name.upper() + "_NAMELEN];\n" buf += " /* Returned by " + fabric_mod_name + "_make_tport() */\n" @@ -303,9 +297,6 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " .get_fabric_name = " + fabric_mod_name + "_get_fabric_name,\n" buf += " .tpg_get_wwn = " + fabric_mod_name + "_get_fabric_wwn,\n" buf += " .tpg_get_tag = " + fabric_mod_name + "_get_tag,\n" - buf += " .tpg_get_pr_transport_id = " + fabric_mod_name + "_get_pr_transport_id,\n" - buf += " .tpg_get_pr_transport_id_len = " + fabric_mod_name + "_get_pr_transport_id_len,\n" - buf += " .tpg_parse_pr_out_transport_id = " + fabric_mod_name + "_parse_pr_out_transport_id,\n" buf += " .tpg_check_demo_mode = " + fabric_mod_name + "_check_false,\n" buf += " .tpg_check_demo_mode_cache = " + fabric_mod_name + "_check_true,\n" buf += " .tpg_check_demo_mode_write_protect = " + fabric_mod_name + "_check_true,\n" @@ -478,118 +469,6 @@ def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "}\n\n" bufi += "u16 " + fabric_mod_name + "_get_tag(struct se_portal_group *);\n" - if re.search('get_pr_transport_id\)\(', fo): - buf += "u32 " + fabric_mod_name + "_get_pr_transport_id(\n" - buf += " struct se_portal_group *se_tpg,\n" - buf += " struct se_node_acl *se_nacl,\n" - buf += " struct t10_pr_registration *pr_reg,\n" - buf += " int *format_code,\n" - buf += " unsigned char *buf)\n" - buf += "{\n" - buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n" - buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n" - buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + " = tpg->" + fabric_mod_port + ";\n" - buf += " int ret = 0;\n\n" - buf += " switch (" + fabric_mod_port + "->" + fabric_mod_port + "_proto_id) {\n" - if proto_ident == "FC": - buf += " case SCSI_PROTOCOL_FCP:\n" - buf += " default:\n" - buf += " ret = fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg,\n" - buf += " format_code, buf);\n" - buf += " break;\n" - elif proto_ident == "SAS": - buf += " case SCSI_PROTOCOL_SAS:\n" - buf += " default:\n" - buf += " ret = sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg,\n" - buf += " format_code, buf);\n" - buf += " break;\n" - elif proto_ident == "iSCSI": - buf += " case SCSI_PROTOCOL_ISCSI:\n" - buf += " default:\n" - buf += " ret = iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg,\n" - buf += " format_code, buf);\n" - buf += " break;\n" - - buf += " }\n\n" - buf += " return ret;\n" - buf += "}\n\n" - bufi += "u32 " + fabric_mod_name + "_get_pr_transport_id(struct se_portal_group *,\n" - bufi += " struct se_node_acl *, struct t10_pr_registration *,\n" - bufi += " int *, unsigned char *);\n" - - if re.search('get_pr_transport_id_len\)\(', fo): - buf += "u32 " + fabric_mod_name + "_get_pr_transport_id_len(\n" - buf += " struct se_portal_group *se_tpg,\n" - buf += " struct se_node_acl *se_nacl,\n" - buf += " struct t10_pr_registration *pr_reg,\n" - buf += " int *format_code)\n" - buf += "{\n" - buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n" - buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n" - buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + " = tpg->" + fabric_mod_port + ";\n" - buf += " int ret = 0;\n\n" - buf += " switch (" + fabric_mod_port + "->" + fabric_mod_port + "_proto_id) {\n" - if proto_ident == "FC": - buf += " case SCSI_PROTOCOL_FCP:\n" - buf += " default:\n" - buf += " ret = fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,\n" - buf += " format_code);\n" - buf += " break;\n" - elif proto_ident == "SAS": - buf += " case SCSI_PROTOCOL_SAS:\n" - buf += " default:\n" - buf += " ret = sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,\n" - buf += " format_code);\n" - buf += " break;\n" - elif proto_ident == "iSCSI": - buf += " case SCSI_PROTOCOL_ISCSI:\n" - buf += " default:\n" - buf += " ret = iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,\n" - buf += " format_code);\n" - buf += " break;\n" - - - buf += " }\n\n" - buf += " return ret;\n" - buf += "}\n\n" - bufi += "u32 " + fabric_mod_name + "_get_pr_transport_id_len(struct se_portal_group *,\n" - bufi += " struct se_node_acl *, struct t10_pr_registration *,\n" - bufi += " int *);\n" - - if re.search('parse_pr_out_transport_id\)\(', fo): - buf += "char *" + fabric_mod_name + "_parse_pr_out_transport_id(\n" - buf += " struct se_portal_group *se_tpg,\n" - buf += " const char *buf,\n" - buf += " u32 *out_tid_len,\n" - buf += " char **port_nexus_ptr)\n" - buf += "{\n" - buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n" - buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n" - buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + " = tpg->" + fabric_mod_port + ";\n" - buf += " char *tid = NULL;\n\n" - buf += " switch (" + fabric_mod_port + "->" + fabric_mod_port + "_proto_id) {\n" - if proto_ident == "FC": - buf += " case SCSI_PROTOCOL_FCP:\n" - buf += " default:\n" - buf += " tid = fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,\n" - buf += " port_nexus_ptr);\n" - elif proto_ident == "SAS": - buf += " case SCSI_PROTOCOL_SAS:\n" - buf += " default:\n" - buf += " tid = sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,\n" - buf += " port_nexus_ptr);\n" - elif proto_ident == "iSCSI": - buf += " case SCSI_PROTOCOL_ISCSI:\n" - buf += " default:\n" - buf += " tid = iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,\n" - buf += " port_nexus_ptr);\n" - - buf += " }\n\n" - buf += " return tid;\n" - buf += "}\n\n" - bufi += "char *" + fabric_mod_name + "_parse_pr_out_transport_id(struct se_portal_group *,\n" - bufi += " const char *, u32 *, char **);\n" - if re.search('tpg_get_inst_index\)\(', fo): buf += "u32 " + fabric_mod_name + "_tpg_get_inst_index(struct se_portal_group *se_tpg)\n" buf += "{\n" diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 9213c2de28fc..98e00360f97e 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3406,42 +3406,6 @@ static u16 srpt_get_tag(struct se_portal_group *tpg) return 1; } -static u32 srpt_get_pr_transport_id(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, unsigned char *buf) -{ - struct srpt_node_acl *nacl; - struct spc_rdma_transport_id *tr_id; - - nacl = container_of(se_nacl, struct srpt_node_acl, nacl); - tr_id = (void *)buf; - tr_id->protocol_identifier = SCSI_TRANSPORTID_PROTOCOLID_SRP; - memcpy(tr_id->i_port_id, nacl->i_port_id, sizeof(tr_id->i_port_id)); - return sizeof(*tr_id); -} - -static u32 srpt_get_pr_transport_id_len(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - *format_code = 0; - return sizeof(struct spc_rdma_transport_id); -} - -static char *srpt_parse_pr_out_transport_id(struct se_portal_group *se_tpg, - const char *buf, u32 *out_tid_len, - char **port_nexus_ptr) -{ - struct spc_rdma_transport_id *tr_id; - - *port_nexus_ptr = NULL; - *out_tid_len = sizeof(struct spc_rdma_transport_id); - tr_id = (void *)buf; - return (char *)tr_id->i_port_id; -} - static u32 srpt_tpg_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -3860,9 +3824,6 @@ static const struct target_core_fabric_ops srpt_template = { .get_fabric_name = srpt_get_fabric_name, .tpg_get_wwn = srpt_get_fabric_wwn, .tpg_get_tag = srpt_get_tag, - .tpg_get_pr_transport_id = srpt_get_pr_transport_id, - .tpg_get_pr_transport_id_len = srpt_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = srpt_parse_pr_out_transport_id, .tpg_check_demo_mode = srpt_check_false, .tpg_check_demo_mode_cache = srpt_check_true, .tpg_check_demo_mode_write_protect = srpt_check_true, diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 355f6f5ce8b2..33b2c88b73ba 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -422,22 +422,4 @@ struct srpt_node_acl { struct list_head list; }; -/* - * SRP-releated SCSI persistent reservation definitions. - * - * See also SPC4r28, section 7.6.1 (Protocol specific parameters introduction). - * See also SPC4r28, section 7.6.4.5 (TransportID for initiator ports using - * SCSI over an RDMA interface). - */ - -enum { - SCSI_TRANSPORTID_PROTOCOLID_SRP = 4, -}; - -struct spc_rdma_transport_id { - uint8_t protocol_identifier; - uint8_t reserved[7]; - uint8_t i_port_id[16]; -}; - #endif /* IB_SRPT_H */ diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 4566c4649751..bd0f9eb67901 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -206,73 +206,6 @@ static u16 tcm_qla2xxx_get_tag(struct se_portal_group *se_tpg) return tpg->lport_tpgt; } -static u32 tcm_qla2xxx_get_pr_transport_id( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, - unsigned char *buf) -{ - struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, - struct tcm_qla2xxx_tpg, se_tpg); - struct tcm_qla2xxx_lport *lport = tpg->lport; - int ret = 0; - - switch (lport->lport_proto_id) { - case SCSI_PROTOCOL_FCP: - default: - ret = fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - break; - } - - return ret; -} - -static u32 tcm_qla2xxx_get_pr_transport_id_len( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, - struct tcm_qla2xxx_tpg, se_tpg); - struct tcm_qla2xxx_lport *lport = tpg->lport; - int ret = 0; - - switch (lport->lport_proto_id) { - case SCSI_PROTOCOL_FCP: - default: - ret = fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - break; - } - - return ret; -} - -static char *tcm_qla2xxx_parse_pr_out_transport_id( - struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, - struct tcm_qla2xxx_tpg, se_tpg); - struct tcm_qla2xxx_lport *lport = tpg->lport; - char *tid = NULL; - - switch (lport->lport_proto_id) { - case SCSI_PROTOCOL_FCP: - default: - tid = fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - break; - } - - return tid; -} - static int tcm_qla2xxx_check_demo_mode(struct se_portal_group *se_tpg) { struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, @@ -1913,9 +1846,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { .get_fabric_name = tcm_qla2xxx_get_fabric_name, .tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn, .tpg_get_tag = tcm_qla2xxx_get_tag, - .tpg_get_pr_transport_id = tcm_qla2xxx_get_pr_transport_id, - .tpg_get_pr_transport_id_len = tcm_qla2xxx_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = tcm_qla2xxx_parse_pr_out_transport_id, .tpg_check_demo_mode = tcm_qla2xxx_check_demo_mode, .tpg_check_demo_mode_cache = tcm_qla2xxx_check_demo_mode_cache, .tpg_check_demo_mode_write_protect = @@ -1963,9 +1893,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .get_fabric_name = tcm_qla2xxx_npiv_get_fabric_name, .tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn, .tpg_get_tag = tcm_qla2xxx_get_tag, - .tpg_get_pr_transport_id = tcm_qla2xxx_get_pr_transport_id, - .tpg_get_pr_transport_id_len = tcm_qla2xxx_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = tcm_qla2xxx_parse_pr_out_transport_id, .tpg_check_demo_mode = tcm_qla2xxx_check_demo_mode, .tpg_check_demo_mode_cache = tcm_qla2xxx_check_demo_mode_cache, .tpg_check_demo_mode_write_protect = tcm_qla2xxx_check_demo_mode, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index 3d805a07061c..3bbf4cb6fd97 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -57,8 +57,6 @@ struct tcm_qla2xxx_fc_loopid { }; struct tcm_qla2xxx_lport { - /* SCSI protocol the lport is providing */ - u8 lport_proto_id; /* Binary World Wide unique Port Name for FC Target Lport */ u64 lport_wwpn; /* Binary World Wide unique Port Name for FC NPIV Target Lport */ diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 9dec9f39139f..bd8af8764f4b 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1921,9 +1921,6 @@ const struct target_core_fabric_ops iscsi_ops = { .tpg_get_wwn = lio_tpg_get_endpoint_wwn, .tpg_get_tag = lio_tpg_get_tag, .tpg_get_default_depth = lio_tpg_get_default_depth, - .tpg_get_pr_transport_id = iscsi_get_pr_transport_id, - .tpg_get_pr_transport_id_len = iscsi_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = iscsi_parse_pr_out_transport_id, .tpg_check_demo_mode = lio_tpg_check_demo_mode, .tpg_check_demo_mode_cache = lio_tpg_check_demo_mode_cache, .tpg_check_demo_mode_write_protect = diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 0eed0209a7f4..b788406977f6 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -542,95 +542,6 @@ static u16 tcm_loop_get_tag(struct se_portal_group *se_tpg) return tl_tpg(se_tpg)->tl_tpgt; } -static u32 tcm_loop_get_pr_transport_id( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, - unsigned char *buf) -{ - struct tcm_loop_hba *tl_hba = tl_tpg(se_tpg)->tl_hba; - - switch (tl_hba->tl_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - case SCSI_PROTOCOL_FCP: - return fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - default: - pr_err("Unknown tl_proto_id: 0x%02x, using" - " SAS emulation\n", tl_hba->tl_proto_id); - break; - } - - return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); -} - -static u32 tcm_loop_get_pr_transport_id_len( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - struct tcm_loop_hba *tl_hba = tl_tpg(se_tpg)->tl_hba; - - switch (tl_hba->tl_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - case SCSI_PROTOCOL_FCP: - return fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - default: - pr_err("Unknown tl_proto_id: 0x%02x, using" - " SAS emulation\n", tl_hba->tl_proto_id); - break; - } - - return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); -} - -/* - * Used for handling SCSI fabric dependent TransportIDs in SPC-3 and above - * Persistent Reservation SPEC_I_PT=1 and PROUT REGISTER_AND_MOVE operations. - */ -static char *tcm_loop_parse_pr_out_transport_id( - struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - struct tcm_loop_hba *tl_hba = tl_tpg(se_tpg)->tl_hba; - - switch (tl_hba->tl_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - case SCSI_PROTOCOL_FCP: - return fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - case SCSI_PROTOCOL_ISCSI: - return iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - default: - pr_err("Unknown tl_proto_id: 0x%02x, using" - " SAS emulation\n", tl_hba->tl_proto_id); - break; - } - - return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); -} - /* * Returning (1) here allows for target_core_mod struct se_node_acl to be generated * based upon the incoming fabric dependent SCSI Initiator Port @@ -1332,9 +1243,6 @@ static const struct target_core_fabric_ops loop_ops = { .get_fabric_name = tcm_loop_get_fabric_name, .tpg_get_wwn = tcm_loop_get_endpoint_wwn, .tpg_get_tag = tcm_loop_get_tag, - .tpg_get_pr_transport_id = tcm_loop_get_pr_transport_id, - .tpg_get_pr_transport_id_len = tcm_loop_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = tcm_loop_parse_pr_out_transport_id, .tpg_check_demo_mode = tcm_loop_check_demo_mode, .tpg_check_demo_mode_cache = tcm_loop_check_demo_mode_cache, .tpg_check_demo_mode_write_protect = diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 8acb37fd9ebc..89f172dc8678 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -1832,73 +1832,6 @@ static int sbp_check_stop_free(struct se_cmd *se_cmd) return 1; } -static u32 sbp_get_pr_transport_id( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, - unsigned char *buf) -{ - int ret; - - /* - * Set PROTOCOL IDENTIFIER to 3h for SBP - */ - buf[0] = SCSI_PROTOCOL_SBP; - /* - * From spc4r17, 7.5.4.4 TransportID for initiator ports using SCSI - * over IEEE 1394 - */ - ret = hex2bin(&buf[8], se_nacl->initiatorname, 8); - if (ret < 0) - pr_debug("sbp transport_id: invalid hex string\n"); - - /* - * The IEEE 1394 Transport ID is a hardcoded 24-byte length - */ - return 24; -} - -static u32 sbp_get_pr_transport_id_len( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - *format_code = 0; - /* - * From spc4r17, 7.5.4.4 TransportID for initiator ports using SCSI - * over IEEE 1394 - * - * The SBP Transport ID is a hardcoded 24-byte length - */ - return 24; -} - -/* - * Used for handling SCSI fabric dependent TransportIDs in SPC-3 and above - * Persistent Reservation SPEC_I_PT=1 and PROUT REGISTER_AND_MOVE operations. - */ -static char *sbp_parse_pr_out_transport_id( - struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - /* - * Assume the FORMAT CODE 00b from spc4r17, 7.5.4.4 TransportID - * for initiator ports using SCSI over SBP Serial SCSI Protocol - * - * The TransportID for a IEEE 1394 Initiator Port is of fixed size of - * 24 bytes, and IEEE 1394 does not contain a I_T nexus identifier, - * so we return the **port_nexus_ptr set to NULL. - */ - *port_nexus_ptr = NULL; - *out_tid_len = 24; - - return (char *)&buf[8]; -} - static int sbp_count_se_tpg_luns(struct se_portal_group *tpg) { int i, count = 0; @@ -2432,9 +2365,6 @@ static const struct target_core_fabric_ops sbp_ops = { .get_fabric_name = sbp_get_fabric_name, .tpg_get_wwn = sbp_get_fabric_wwn, .tpg_get_tag = sbp_get_tag, - .tpg_get_pr_transport_id = sbp_get_pr_transport_id, - .tpg_get_pr_transport_id_len = sbp_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = sbp_parse_pr_out_transport_id, .tpg_check_demo_mode = sbp_check_true, .tpg_check_demo_mode_cache = sbp_check_true, .tpg_check_demo_mode_write_protect = sbp_check_false, diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 21c9f7d79d5e..5e2649fc6919 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -326,14 +326,6 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo) pr_err("Missing tfo->tpg_get_tag()\n"); return -EINVAL; } - if (!tfo->tpg_get_pr_transport_id) { - pr_err("Missing tfo->tpg_get_pr_transport_id()\n"); - return -EINVAL; - } - if (!tfo->tpg_get_pr_transport_id_len) { - pr_err("Missing tfo->tpg_get_pr_transport_id_len()\n"); - return -EINVAL; - } if (!tfo->tpg_check_demo_mode) { pr_err("Missing tfo->tpg_check_demo_mode()\n"); return -EINVAL; diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 6fed14adbe61..89720b2f37c2 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -24,6 +24,11 @@ * ******************************************************************************/ +/* + * See SPC4, section 7.5 "Protocol specific parameters" for details + * on the formats implemented in this file. + */ + #include #include #include @@ -39,103 +44,26 @@ #include "target_core_internal.h" #include "target_core_pr.h" -/* - * Handlers for Serial Attached SCSI (SAS) - */ -u32 sas_get_pr_transport_id( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, + +static int sas_get_pr_transport_id( + struct se_node_acl *nacl, int *format_code, unsigned char *buf) { - unsigned char *ptr; int ret; - /* - * Set PROTOCOL IDENTIFIER to 6h for SAS - */ - buf[0] = 0x06; - /* - * From spc4r17, 7.5.4.7 TransportID for initiator ports using SCSI - * over SAS Serial SCSI Protocol - */ - ptr = &se_nacl->initiatorname[4]; /* Skip over 'naa. prefix */ + /* Skip over 'naa. prefix */ + ret = hex2bin(&buf[4], &nacl->initiatorname[4], 8); + if (ret) { + pr_debug("%s: invalid hex string\n", __func__); + return ret; + } - ret = hex2bin(&buf[4], ptr, 8); - if (ret < 0) - pr_debug("sas transport_id: invalid hex string\n"); - - /* - * The SAS Transport ID is a hardcoded 24-byte length - */ return 24; } -EXPORT_SYMBOL(sas_get_pr_transport_id); -u32 sas_get_pr_transport_id_len( - struct se_portal_group *se_tpg, +static int fc_get_pr_transport_id( struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - *format_code = 0; - /* - * From spc4r17, 7.5.4.7 TransportID for initiator ports using SCSI - * over SAS Serial SCSI Protocol - * - * The SAS Transport ID is a hardcoded 24-byte length - */ - return 24; -} -EXPORT_SYMBOL(sas_get_pr_transport_id_len); - -/* - * Used for handling SCSI fabric dependent TransportIDs in SPC-3 and above - * Persistent Reservation SPEC_I_PT=1 and PROUT REGISTER_AND_MOVE operations. - */ -char *sas_parse_pr_out_transport_id( - struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - /* - * Assume the FORMAT CODE 00b from spc4r17, 7.5.4.7 TransportID - * for initiator ports using SCSI over SAS Serial SCSI Protocol - * - * The TransportID for a SAS Initiator Port is of fixed size of - * 24 bytes, and SAS does not contain a I_T nexus identifier, - * so we return the **port_nexus_ptr set to NULL. - */ - *port_nexus_ptr = NULL; - *out_tid_len = 24; - - return (char *)&buf[4]; -} -EXPORT_SYMBOL(sas_parse_pr_out_transport_id); - -/* - * Handlers for Fibre Channel Protocol (FCP) - */ -u32 fc_get_pr_transport_id_len( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - *format_code = 0; - /* - * The FC Transport ID is a hardcoded 24-byte length - */ - return 24; -} -EXPORT_SYMBOL(fc_get_pr_transport_id_len); - -u32 fc_get_pr_transport_id( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, int *format_code, unsigned char *buf) { @@ -144,24 +72,20 @@ u32 fc_get_pr_transport_id( u32 off = 8; /* - * PROTOCOL IDENTIFIER is 0h for FCP-2 - * - * From spc4r17, 7.5.4.2 TransportID for initiator ports using - * SCSI over Fibre Channel - * * We convert the ASCII formatted N Port name into a binary * encoded TransportID. */ ptr = &se_nacl->initiatorname[0]; - for (i = 0; i < 24; ) { if (!strncmp(&ptr[i], ":", 1)) { i++; continue; } ret = hex2bin(&buf[off++], &ptr[i], 1); - if (ret < 0) - pr_debug("fc transport_id: invalid hex string\n"); + if (ret < 0) { + pr_debug("%s: invalid hex string\n", __func__); + return ret; + } i += 2; } /* @@ -169,31 +93,52 @@ u32 fc_get_pr_transport_id( */ return 24; } -EXPORT_SYMBOL(fc_get_pr_transport_id); -char *fc_parse_pr_out_transport_id( - struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) +static int sbp_get_pr_transport_id( + struct se_node_acl *nacl, + int *format_code, + unsigned char *buf) { - /* - * The TransportID for a FC N Port is of fixed size of - * 24 bytes, and FC does not contain a I_T nexus identifier, - * so we return the **port_nexus_ptr set to NULL. - */ - *port_nexus_ptr = NULL; - *out_tid_len = 24; + int ret; - return (char *)&buf[8]; + ret = hex2bin(&buf[8], nacl->initiatorname, 8); + if (ret) { + pr_debug("%s: invalid hex string\n", __func__); + return ret; + } + + return 24; } -EXPORT_SYMBOL(fc_parse_pr_out_transport_id); -/* - * Handlers for Internet Small Computer Systems Interface (iSCSI) - */ -u32 iscsi_get_pr_transport_id( - struct se_portal_group *se_tpg, +static int srp_get_pr_transport_id( + struct se_node_acl *nacl, + int *format_code, + unsigned char *buf) +{ + const char *p; + unsigned len, count, leading_zero_bytes; + int rc; + + p = nacl->initiatorname; + if (strncasecmp(p, "0x", 2) == 0) + p += 2; + len = strlen(p); + if (len % 2) + return -EINVAL; + + count = min(len / 2, 16U); + leading_zero_bytes = 16 - count; + memset(buf + 8, 0, leading_zero_bytes); + rc = hex2bin(buf + 8 + leading_zero_bytes, p, count); + if (rc < 0) { + pr_debug("hex2bin failed for %s: %d\n", __func__, rc); + return rc; + } + + return 24; +} + +static int iscsi_get_pr_transport_id( struct se_node_acl *se_nacl, struct t10_pr_registration *pr_reg, int *format_code, @@ -203,10 +148,6 @@ u32 iscsi_get_pr_transport_id( u16 len = 0; spin_lock_irq(&se_nacl->nacl_sess_lock); - /* - * Set PROTOCOL IDENTIFIER to 5h for iSCSI - */ - buf[0] = 0x05; /* * From spc4r17 Section 7.5.4.6: TransportID for initiator * ports using SCSI over iSCSI. @@ -286,10 +227,8 @@ u32 iscsi_get_pr_transport_id( return len; } -EXPORT_SYMBOL(iscsi_get_pr_transport_id); -u32 iscsi_get_pr_transport_id_len( - struct se_portal_group *se_tpg, +static int iscsi_get_pr_transport_id_len( struct se_node_acl *se_nacl, struct t10_pr_registration *pr_reg, int *format_code) @@ -332,9 +271,8 @@ u32 iscsi_get_pr_transport_id_len( return len; } -EXPORT_SYMBOL(iscsi_get_pr_transport_id_len); -char *iscsi_parse_pr_out_transport_id( +static char *iscsi_parse_pr_out_transport_id( struct se_portal_group *se_tpg, const char *buf, u32 *out_tid_len, @@ -421,4 +359,79 @@ char *iscsi_parse_pr_out_transport_id( return (char *)&buf[4]; } -EXPORT_SYMBOL(iscsi_parse_pr_out_transport_id); + +int target_get_pr_transport_id_len(struct se_node_acl *nacl, + struct t10_pr_registration *pr_reg, int *format_code) +{ + switch (nacl->se_tpg->proto_id) { + case SCSI_PROTOCOL_FCP: + case SCSI_PROTOCOL_SBP: + case SCSI_PROTOCOL_SRP: + case SCSI_PROTOCOL_SAS: + break; + case SCSI_PROTOCOL_ISCSI: + return iscsi_get_pr_transport_id_len(nacl, pr_reg, format_code); + default: + pr_err("Unknown proto_id: 0x%02x\n", nacl->se_tpg->proto_id); + return -EINVAL; + } + + /* + * Most transports use a fixed length 24 byte identifier. + */ + *format_code = 0; + return 24; +} + +int target_get_pr_transport_id(struct se_node_acl *nacl, + struct t10_pr_registration *pr_reg, int *format_code, + unsigned char *buf) +{ + switch (nacl->se_tpg->proto_id) { + case SCSI_PROTOCOL_SAS: + return sas_get_pr_transport_id(nacl, format_code, buf); + case SCSI_PROTOCOL_SBP: + return sbp_get_pr_transport_id(nacl, format_code, buf); + case SCSI_PROTOCOL_SRP: + return srp_get_pr_transport_id(nacl, format_code, buf); + case SCSI_PROTOCOL_FCP: + return fc_get_pr_transport_id(nacl, format_code, buf); + case SCSI_PROTOCOL_ISCSI: + return iscsi_get_pr_transport_id(nacl, pr_reg, format_code, + buf); + default: + pr_err("Unknown proto_id: 0x%02x\n", nacl->se_tpg->proto_id); + return -EINVAL; + } +} + +const char *target_parse_pr_out_transport_id(struct se_portal_group *tpg, + const char *buf, u32 *out_tid_len, char **port_nexus_ptr) +{ + u32 offset; + + switch (tpg->proto_id) { + case SCSI_PROTOCOL_SAS: + /* + * Assume the FORMAT CODE 00b from spc4r17, 7.5.4.7 TransportID + * for initiator ports using SCSI over SAS Serial SCSI Protocol. + */ + offset = 4; + break; + case SCSI_PROTOCOL_SBP: + case SCSI_PROTOCOL_SRP: + case SCSI_PROTOCOL_FCP: + offset = 8; + break; + case SCSI_PROTOCOL_ISCSI: + return iscsi_parse_pr_out_transport_id(tpg, buf, out_tid_len, + port_nexus_ptr); + default: + pr_err("Unknown proto_id: 0x%02x\n", tpg->proto_id); + return NULL; + } + + *port_nexus_ptr = NULL; + *out_tid_len = 24; + return buf + offset; +} diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 058ca71cda81..d0344ad9b0d8 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -38,6 +38,15 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name); int target_configure_device(struct se_device *dev); void target_free_device(struct se_device *); +/* target_core_fabric_lib.c */ +int target_get_pr_transport_id_len(struct se_node_acl *nacl, + struct t10_pr_registration *pr_reg, int *format_code); +int target_get_pr_transport_id(struct se_node_acl *nacl, + struct t10_pr_registration *pr_reg, int *format_code, + unsigned char *buf); +const char *target_parse_pr_out_transport_id(struct se_portal_group *tpg, + const char *buf, u32 *out_tid_len, char **port_nexus_ptr); + /* target_core_hba.c */ struct se_hba *core_alloc_hba(const char *, u32, u32); int core_delete_hba(struct se_hba *); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 23c065f83a56..d396b3b87025 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1445,9 +1445,8 @@ core_scsi3_decode_spec_i_port( struct t10_pr_registration *pr_reg_tmp, *pr_reg_tmp_safe; LIST_HEAD(tid_dest_list); struct pr_transport_id_holder *tidh_new, *tidh, *tidh_tmp; - const struct target_core_fabric_ops *tmp_tf_ops; - unsigned char *buf; - unsigned char *ptr, *i_str = NULL, proto_ident; + unsigned char *buf, *ptr, proto_ident; + const unsigned char *i_str; char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN]; sense_reason_t ret; u32 tpdl, tid_len = 0; @@ -1533,11 +1532,7 @@ core_scsi3_decode_spec_i_port( tmp_tpg = tmp_port->sep_tpg; if (!tmp_tpg) continue; - tmp_tf_ops = tmp_tpg->se_tpg_tfo; - if (!tmp_tf_ops) - continue; - if (!tmp_tf_ops->tpg_parse_pr_out_transport_id) - continue; + /* * Look for the matching proto_ident provided by * the received TransportID @@ -1546,9 +1541,8 @@ core_scsi3_decode_spec_i_port( continue; dest_rtpi = tmp_port->sep_rtpi; - i_str = tmp_tf_ops->tpg_parse_pr_out_transport_id( - tmp_tpg, (const char *)ptr, &tid_len, - &iport_ptr); + i_str = target_parse_pr_out_transport_id(tmp_tpg, + (const char *)ptr, &tid_len, &iport_ptr); if (!i_str) continue; @@ -3105,7 +3099,7 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, struct t10_pr_registration *pr_reg, *pr_res_holder, *dest_pr_reg; struct t10_reservation *pr_tmpl = &dev->t10_pr; unsigned char *buf; - unsigned char *initiator_str; + const unsigned char *initiator_str; char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN]; u32 tid_len, tmp_tid_len; int new_reg = 0, type, scope, matching_iname; @@ -3237,14 +3231,7 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, ret = TCM_INVALID_PARAMETER_LIST; goto out; } - if (dest_tf_ops->tpg_parse_pr_out_transport_id == NULL) { - pr_err("SPC-3 PR REGISTER_AND_MOVE: Fabric does not" - " containg a valid tpg_parse_pr_out_transport_id" - " function pointer\n"); - ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - goto out; - } - initiator_str = dest_tf_ops->tpg_parse_pr_out_transport_id(dest_se_tpg, + initiator_str = target_parse_pr_out_transport_id(dest_se_tpg, (const char *)&buf[24], &tmp_tid_len, &iport_ptr); if (!initiator_str) { pr_err("SPC-3 PR REGISTER_AND_MOVE: Unable to locate" @@ -3881,9 +3868,10 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) struct t10_pr_registration *pr_reg, *pr_reg_tmp; struct t10_reservation *pr_tmpl = &dev->t10_pr; unsigned char *buf; - u32 add_desc_len = 0, add_len = 0, desc_len, exp_desc_len; + u32 add_desc_len = 0, add_len = 0; u32 off = 8; /* off into first Full Status descriptor */ int format_code = 0, pr_res_type = 0, pr_res_scope = 0; + int exp_desc_len, desc_len; bool all_reg = false; if (cmd->data_length < 8) { @@ -3928,10 +3916,10 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) * Determine expected length of $FABRIC_MOD specific * TransportID full status descriptor.. */ - exp_desc_len = se_tpg->se_tpg_tfo->tpg_get_pr_transport_id_len( - se_tpg, se_nacl, pr_reg, &format_code); - - if ((exp_desc_len + add_len) > cmd->data_length) { + exp_desc_len = target_get_pr_transport_id_len(se_nacl, pr_reg, + &format_code); + if (exp_desc_len < 0 || + exp_desc_len + add_len > cmd->data_length) { pr_warn("SPC-3 PRIN READ_FULL_STATUS ran" " out of buffer: %d\n", cmd->data_length); spin_lock(&pr_tmpl->registration_lock); @@ -3995,14 +3983,19 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) } else off += 2; /* Skip over RELATIVE TARGET PORT IDENTIFIER */ + buf[off+4] = se_tpg->proto_id; + /* - * Now, have the $FABRIC_MOD fill in the protocol identifier + * Now, have the $FABRIC_MOD fill in the transport ID. */ - desc_len = se_tpg->se_tpg_tfo->tpg_get_pr_transport_id(se_tpg, - se_nacl, pr_reg, &format_code, &buf[off+4]); + desc_len = target_get_pr_transport_id(se_nacl, pr_reg, + &format_code, &buf[off+4]); spin_lock(&pr_tmpl->registration_lock); atomic_dec_mb(&pr_reg->pr_res_holders); + + if (desc_len < 0) + break; /* * Set the ADDITIONAL DESCRIPTOR LENGTH */ diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index fabc7bacf693..6ad7404b7dd1 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -454,9 +454,6 @@ static const struct target_core_fabric_ops ft_fabric_ops = { .get_fabric_name = ft_get_fabric_name, .tpg_get_wwn = ft_get_fabric_wwn, .tpg_get_tag = ft_get_tag, - .tpg_get_pr_transport_id = fc_get_pr_transport_id, - .tpg_get_pr_transport_id_len = fc_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = fc_parse_pr_out_transport_id, .tpg_check_demo_mode = ft_check_false, .tpg_check_demo_mode_cache = ft_check_false, .tpg_check_demo_mode_write_protect = ft_check_false, diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c index d6b03178262f..77cdbb56e1d5 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c @@ -1290,72 +1290,6 @@ static u16 usbg_get_tag(struct se_portal_group *se_tpg) return tpg->tport_tpgt; } -static u32 usbg_get_pr_transport_id( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, - unsigned char *buf) -{ - struct usbg_tpg *tpg = container_of(se_tpg, - struct usbg_tpg, se_tpg); - struct usbg_tport *tport = tpg->tport; - int ret = 0; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - default: - ret = sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - break; - } - - return ret; -} - -static u32 usbg_get_pr_transport_id_len( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - struct usbg_tpg *tpg = container_of(se_tpg, - struct usbg_tpg, se_tpg); - struct usbg_tport *tport = tpg->tport; - int ret = 0; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - default: - ret = sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - break; - } - - return ret; -} - -static char *usbg_parse_pr_out_transport_id( - struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - struct usbg_tpg *tpg = container_of(se_tpg, - struct usbg_tpg, se_tpg); - struct usbg_tport *tport = tpg->tport; - char *tid = NULL; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - default: - tid = sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - } - - return tid; -} - static u32 usbg_tpg_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -1491,8 +1425,12 @@ static struct se_portal_group *usbg_make_tpg( tpg->tport = tport; tpg->tport_tpgt = tpgt; + /* + * SPC doesn't assign a protocol identifier for USB-SCSI, so we + * pretend to be SAS.. + */ ret = core_tpg_register(&usbg_ops, wwn, &tpg->se_tpg, - tport->tport_proto_id); + SCSI_PROTOCOL_SAS); if (ret < 0) { destroy_workqueue(tpg->workqueue); kfree(tpg); @@ -1788,9 +1726,6 @@ static const struct target_core_fabric_ops usbg_ops = { .get_fabric_name = usbg_get_fabric_name, .tpg_get_wwn = usbg_get_fabric_wwn, .tpg_get_tag = usbg_get_tag, - .tpg_get_pr_transport_id = usbg_get_pr_transport_id, - .tpg_get_pr_transport_id_len = usbg_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = usbg_parse_pr_out_transport_id, .tpg_check_demo_mode = usbg_check_true, .tpg_check_demo_mode_cache = usbg_check_false, .tpg_check_demo_mode_write_protect = usbg_check_false, diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.h b/drivers/usb/gadget/legacy/tcm_usb_gadget.h index b254aec521da..4c1c22a964b4 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.h +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.h @@ -44,8 +44,6 @@ struct usbg_tpg { }; struct usbg_tport { - /* SCSI protocol the tport is providing */ - u8 tport_proto_id; /* Binary World Wide unique Port Name for SAS Target port */ u64 tport_wwpn; /* ASCII formatted WWPN for SAS Target port */ diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 022860ccd008..db9f4b474214 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -304,97 +304,6 @@ static u16 vhost_scsi_get_tpgt(struct se_portal_group *se_tpg) return tpg->tport_tpgt; } -static u32 -vhost_scsi_get_pr_transport_id(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, - unsigned char *buf) -{ - struct vhost_scsi_tpg *tpg = container_of(se_tpg, - struct vhost_scsi_tpg, se_tpg); - struct vhost_scsi_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - case SCSI_PROTOCOL_FCP: - return fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using" - " SAS emulation\n", tport->tport_proto_id); - break; - } - - return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); -} - -static u32 -vhost_scsi_get_pr_transport_id_len(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - struct vhost_scsi_tpg *tpg = container_of(se_tpg, - struct vhost_scsi_tpg, se_tpg); - struct vhost_scsi_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - case SCSI_PROTOCOL_FCP: - return fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using" - " SAS emulation\n", tport->tport_proto_id); - break; - } - - return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); -} - -static char * -vhost_scsi_parse_pr_out_transport_id(struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - struct vhost_scsi_tpg *tpg = container_of(se_tpg, - struct vhost_scsi_tpg, se_tpg); - struct vhost_scsi_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - case SCSI_PROTOCOL_FCP: - return fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - case SCSI_PROTOCOL_ISCSI: - return iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using" - " SAS emulation\n", tport->tport_proto_id); - break; - } - - return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); -} - static int vhost_scsi_check_prot_fabric_only(struct se_portal_group *se_tpg) { struct vhost_scsi_tpg *tpg = container_of(se_tpg, @@ -2224,9 +2133,6 @@ static struct target_core_fabric_ops vhost_scsi_ops = { .get_fabric_name = vhost_scsi_get_fabric_name, .tpg_get_wwn = vhost_scsi_get_fabric_wwn, .tpg_get_tag = vhost_scsi_get_tpgt, - .tpg_get_pr_transport_id = vhost_scsi_get_pr_transport_id, - .tpg_get_pr_transport_id_len = vhost_scsi_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = vhost_scsi_parse_pr_out_transport_id, .tpg_check_demo_mode = vhost_scsi_check_true, .tpg_check_demo_mode_cache = vhost_scsi_check_true, .tpg_check_demo_mode_write_protect = vhost_scsi_check_false, diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 8bf9448bd7fd..10c71a5616fa 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1270,97 +1270,6 @@ static u16 scsiback_get_tag(struct se_portal_group *se_tpg) return tpg->tport_tpgt; } -static u32 -scsiback_get_pr_transport_id(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, - unsigned char *buf) -{ - struct scsiback_tpg *tpg = container_of(se_tpg, - struct scsiback_tpg, se_tpg); - struct scsiback_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - case SCSI_PROTOCOL_FCP: - return fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n", - tport->tport_proto_id); - break; - } - - return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); -} - -static u32 -scsiback_get_pr_transport_id_len(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - struct scsiback_tpg *tpg = container_of(se_tpg, - struct scsiback_tpg, se_tpg); - struct scsiback_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - case SCSI_PROTOCOL_FCP: - return fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n", - tport->tport_proto_id); - break; - } - - return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); -} - -static char * -scsiback_parse_pr_out_transport_id(struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - struct scsiback_tpg *tpg = container_of(se_tpg, - struct scsiback_tpg, se_tpg); - struct scsiback_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - case SCSI_PROTOCOL_FCP: - return fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - case SCSI_PROTOCOL_ISCSI: - return iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n", - tport->tport_proto_id); - break; - } - - return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); -} - static struct se_wwn * scsiback_make_tport(struct target_fabric_configfs *tf, struct config_group *group, @@ -1909,9 +1818,6 @@ static const struct target_core_fabric_ops scsiback_ops = { .get_fabric_name = scsiback_get_fabric_name, .tpg_get_wwn = scsiback_get_fabric_wwn, .tpg_get_tag = scsiback_get_tag, - .tpg_get_pr_transport_id = scsiback_get_pr_transport_id, - .tpg_get_pr_transport_id_len = scsiback_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = scsiback_parse_pr_out_transport_id, .tpg_check_demo_mode = scsiback_check_true, .tpg_check_demo_mode_cache = scsiback_check_true, .tpg_check_demo_mode_write_protect = scsiback_check_false, diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 495606382546..a420f434c6c5 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -9,15 +9,6 @@ struct target_core_fabric_ops { char *(*tpg_get_wwn)(struct se_portal_group *); u16 (*tpg_get_tag)(struct se_portal_group *); u32 (*tpg_get_default_depth)(struct se_portal_group *); - u32 (*tpg_get_pr_transport_id)(struct se_portal_group *, - struct se_node_acl *, - struct t10_pr_registration *, int *, - unsigned char *); - u32 (*tpg_get_pr_transport_id_len)(struct se_portal_group *, - struct se_node_acl *, - struct t10_pr_registration *, int *); - char *(*tpg_parse_pr_out_transport_id)(struct se_portal_group *, - const char *, u32 *, char **); int (*tpg_check_demo_mode)(struct se_portal_group *); int (*tpg_check_demo_mode_cache)(struct se_portal_group *); int (*tpg_check_demo_mode_write_protect)(struct se_portal_group *); @@ -177,30 +168,6 @@ int core_tpg_register(const struct target_core_fabric_ops *, struct se_wwn *, struct se_portal_group *, int); int core_tpg_deregister(struct se_portal_group *); -/* SAS helpers */ -u32 sas_get_pr_transport_id(struct se_portal_group *, struct se_node_acl *, - struct t10_pr_registration *, int *, unsigned char *); -u32 sas_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl *, - struct t10_pr_registration *, int *); -char *sas_parse_pr_out_transport_id(struct se_portal_group *, const char *, - u32 *, char **); - -/* FC helpers */ -u32 fc_get_pr_transport_id(struct se_portal_group *, struct se_node_acl *, - struct t10_pr_registration *, int *, unsigned char *); -u32 fc_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl *, - struct t10_pr_registration *, int *); -char *fc_parse_pr_out_transport_id(struct se_portal_group *, const char *, - u32 *, char **); - -/* iSCSI helpers */ -u32 iscsi_get_pr_transport_id(struct se_portal_group *, struct se_node_acl *, - struct t10_pr_registration *, int *, unsigned char *); -u32 iscsi_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl *, - struct t10_pr_registration *, int *); -char *iscsi_parse_pr_out_transport_id(struct se_portal_group *, const char *, - u32 *, char **); - /* * The LIO target core uses DMA_TO_DEVICE to mean that data is going * to the target (eg handling a WRITE) and DMA_FROM_DEVICE to mean From 649ee05499d1257a3af0e10d961a1c52d9ef95b7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 14 Apr 2015 13:26:44 +0200 Subject: [PATCH 060/839] target: Move task tag into struct se_cmd + support 64-bit tags Simplify target core and target drivers by storing the task tag a.k.a. command identifier inside struct se_cmd. For several transports (e.g. SRP) tags are 64 bits wide. Hence add support for 64-bit tags. (Fix core_tmr_abort_task conversion spec warnings - nab) (Fix up usb-gadget to use 16-bit tags - HCH + bart) Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Andy Grover Cc: Sagi Grimberg Cc: Cc: Felipe Balbi Cc: Michael S. Tsirkin Cc: Juergen Gross Signed-off-by: Nicholas Bellinger --- Documentation/target/tcm_mod_builder.py | 8 --- drivers/infiniband/ulp/srpt/ib_srpt.c | 27 ++++------ drivers/infiniband/ulp/srpt/ib_srpt.h | 1 - drivers/scsi/qla2xxx/qla_target.c | 52 +++++++++----------- drivers/scsi/qla2xxx/qla_target.h | 1 - drivers/scsi/qla2xxx/tcm_qla2xxx.c | 15 ------ drivers/target/iscsi/iscsi_target.c | 2 + drivers/target/iscsi/iscsi_target_configfs.c | 9 ---- drivers/target/loopback/tcm_loop.c | 10 +--- drivers/target/sbp/sbp_target.c | 12 +---- drivers/target/target_core_configfs.c | 4 -- drivers/target/target_core_tmr.c | 21 ++++---- drivers/target/target_core_transport.c | 41 ++++++++------- drivers/target/target_core_xcopy.c | 7 +-- drivers/target/tcm_fc/tcm_fc.h | 1 - drivers/target/tcm_fc/tfc_cmd.c | 10 +--- drivers/target/tcm_fc/tfc_conf.c | 1 - drivers/usb/gadget/legacy/tcm_usb_gadget.c | 15 +----- drivers/vhost/scsi.c | 7 +-- drivers/xen/xen-scsiback.c | 10 +--- include/target/target_core_base.h | 3 +- include/target/target_core_fabric.h | 1 - 22 files changed, 76 insertions(+), 182 deletions(-) diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 0dbd70cccde1..71e1f5863310 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -310,7 +310,6 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " .write_pending = " + fabric_mod_name + "_write_pending,\n" buf += " .write_pending_status = " + fabric_mod_name + "_write_pending_status,\n" buf += " .set_default_node_attributes = " + fabric_mod_name + "_set_default_node_attrs,\n" - buf += " .get_task_tag = " + fabric_mod_name + "_get_task_tag,\n" buf += " .get_cmd_state = " + fabric_mod_name + "_get_cmd_state,\n" buf += " .queue_data_in = " + fabric_mod_name + "_queue_data_in,\n" buf += " .queue_status = " + fabric_mod_name + "_queue_status,\n" @@ -525,13 +524,6 @@ def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "}\n\n" bufi += "void " + fabric_mod_name + "_set_default_node_attrs(struct se_node_acl *);\n" - if re.search('get_task_tag\)\(', fo): - buf += "u32 " + fabric_mod_name + "_get_task_tag(struct se_cmd *se_cmd)\n" - buf += "{\n" - buf += " return 0;\n" - buf += "}\n\n" - bufi += "u32 " + fabric_mod_name + "_get_task_tag(struct se_cmd *);\n" - if re.search('get_cmd_state\)\(', fo): buf += "int " + fabric_mod_name + "_get_cmd_state(struct se_cmd *se_cmd)\n" buf += "{\n" diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 98e00360f97e..56df5cd918c5 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1339,7 +1339,7 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) } pr_debug("Aborting cmd with state %d and tag %lld\n", state, - ioctx->tag); + ioctx->cmd.tag); switch (state) { case SRPT_STATE_NEW: @@ -1701,7 +1701,7 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch, srp_cmd = recv_ioctx->ioctx.buf; cmd = &send_ioctx->cmd; - send_ioctx->tag = srp_cmd->tag; + cmd->tag = srp_cmd->tag; switch (srp_cmd->task_attr) { case SRP_CMD_SIMPLE_Q: @@ -1772,7 +1772,7 @@ static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag) for (i = 0; i < ch->rq_size; ++i) { target = ch->ioctx_ring[i]; if (target->cmd.se_lun == ioctx->cmd.se_lun && - target->tag == tag && + target->cmd.tag == tag && srpt_get_cmd_state(target) != SRPT_STATE_DONE) { ret = 0; /* now let the target core abort &target->cmd; */ @@ -1831,7 +1831,7 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, srp_tsk->task_tag, srp_tsk->tag, ch->cm_id, ch->sess); srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT); - send_ioctx->tag = srp_tsk->tag; + send_ioctx->cmd.tag = srp_tsk->tag; tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func); if (tcm_tmr < 0) { send_ioctx->cmd.se_tmr_req->response = @@ -2979,7 +2979,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd) case CH_DRAINING: case CH_RELEASING: pr_debug("cmd with tag %lld: channel disconnecting\n", - ioctx->tag); + ioctx->cmd.tag); srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN); ret = -EINVAL; goto out; @@ -3054,24 +3054,24 @@ static void srpt_queue_response(struct se_cmd *cmd) ret = srpt_xfer_data(ch, ioctx); if (ret) { pr_err("xfer_data failed for tag %llu\n", - ioctx->tag); + ioctx->cmd.tag); return; } } if (state != SRPT_STATE_MGMT) - resp_len = srpt_build_cmd_rsp(ch, ioctx, ioctx->tag, + resp_len = srpt_build_cmd_rsp(ch, ioctx, ioctx->cmd.tag, cmd->scsi_status); else { srp_tm_status = tcm_to_srp_tsk_mgmt_status(cmd->se_tmr_req->response); resp_len = srpt_build_tskmgmt_rsp(ch, ioctx, srp_tm_status, - ioctx->tag); + ioctx->cmd.tag); } ret = srpt_post_send(ch, ioctx, resp_len); if (ret) { pr_err("sending cmd response failed for tag %llu\n", - ioctx->tag); + ioctx->cmd.tag); srpt_unmap_sg_to_ib_sge(ch, ioctx); srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); target_put_sess_cmd(&ioctx->cmd); @@ -3479,14 +3479,6 @@ static void srpt_set_default_node_attrs(struct se_node_acl *nacl) { } -static u32 srpt_get_task_tag(struct se_cmd *se_cmd) -{ - struct srpt_send_ioctx *ioctx; - - ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); - return ioctx->tag; -} - /* Note: only used from inside debug printk's by the TCM core. */ static int srpt_get_tcm_cmd_state(struct se_cmd *se_cmd) { @@ -3838,7 +3830,6 @@ static const struct target_core_fabric_ops srpt_template = { .write_pending = srpt_write_pending, .write_pending_status = srpt_write_pending_status, .set_default_node_attributes = srpt_set_default_node_attrs, - .get_task_tag = srpt_get_task_tag, .get_cmd_state = srpt_get_tcm_cmd_state, .queue_data_in = srpt_queue_data_in, .queue_status = srpt_queue_status, diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 33b2c88b73ba..6fec740742bd 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -238,7 +238,6 @@ struct srpt_send_ioctx { bool rdma_aborted; struct se_cmd cmd; struct completion tx_done; - u64 tag; int sg_cnt; int mapped_sg_count; u16 n_rdma_ius; diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index fe8a8d157e22..e7515069e1ce 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1191,7 +1191,7 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha, list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); - if (cmd->tag == abts->exchange_addr_to_abort) { + if (se_cmd->tag == abts->exchange_addr_to_abort) { lun = cmd->unpacked_lun; found_lun = true; break; @@ -1728,9 +1728,8 @@ static int qlt_pre_xmit_response(struct qla_tgt_cmd *cmd, if (unlikely(cmd->aborted)) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014, - "qla_target(%d): terminating exchange " - "for aborted cmd=%p (se_cmd=%p, tag=%d)", vha->vp_idx, cmd, - se_cmd, cmd->tag); + "qla_target(%d): terminating exchange for aborted cmd=%p (se_cmd=%p, tag=%lld)", + vha->vp_idx, cmd, se_cmd, se_cmd->tag); cmd->state = QLA_TGT_STATE_ABORTED; cmd->cmd_flags |= BIT_6; @@ -1765,18 +1764,17 @@ static int qlt_pre_xmit_response(struct qla_tgt_cmd *cmd, if (se_cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { prm->residual = se_cmd->residual_count; ql_dbg(ql_dbg_io + ql_dbg_verbose, vha, 0x305c, - "Residual underflow: %d (tag %d, " - "op %x, bufflen %d, rq_result %x)\n", prm->residual, - cmd->tag, se_cmd->t_task_cdb ? se_cmd->t_task_cdb[0] : 0, - cmd->bufflen, prm->rq_result); + "Residual underflow: %d (tag %lld, op %x, bufflen %d, rq_result %x)\n", + prm->residual, se_cmd->tag, + se_cmd->t_task_cdb ? se_cmd->t_task_cdb[0] : 0, + cmd->bufflen, prm->rq_result); prm->rq_result |= SS_RESIDUAL_UNDER; } else if (se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT) { prm->residual = se_cmd->residual_count; ql_dbg(ql_dbg_io, vha, 0x305d, - "Residual overflow: %d (tag %d, " - "op %x, bufflen %d, rq_result %x)\n", prm->residual, - cmd->tag, se_cmd->t_task_cdb ? se_cmd->t_task_cdb[0] : 0, - cmd->bufflen, prm->rq_result); + "Residual overflow: %d (tag %lld, op %x, bufflen %d, rq_result %x)\n", + prm->residual, se_cmd->tag, se_cmd->t_task_cdb ? + se_cmd->t_task_cdb[0] : 0, cmd->bufflen, prm->rq_result); prm->rq_result |= SS_RESIDUAL_OVER; } @@ -1849,7 +1847,7 @@ static void qlt_check_srr_debug(struct qla_tgt_cmd *cmd, int *xmit_type) == 50) { *xmit_type &= ~QLA_TGT_XMIT_STATUS; ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf015, - "Dropping cmd %p (tag %d) status", cmd, cmd->tag); + "Dropping cmd %p (tag %d) status", cmd, se_cmd->tag); } #endif /* @@ -1873,7 +1871,7 @@ static void qlt_check_srr_debug(struct qla_tgt_cmd *cmd, int *xmit_type) ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf016, "Cutting cmd %p (tag %d) buffer" " tail to len %d, sg_cnt %d (cmd->bufflen %d," - " cmd->sg_cnt %d)", cmd, cmd->tag, tot_len, leave, + " cmd->sg_cnt %d)", cmd, se_cmd->tag, tot_len, leave, cmd->bufflen, cmd->sg_cnt); cmd->bufflen = tot_len; @@ -1885,13 +1883,13 @@ static void qlt_check_srr_debug(struct qla_tgt_cmd *cmd, int *xmit_type) ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf017, "Cutting cmd %p (tag %d) buffer head " - "to offset %d (cmd->bufflen %d)", cmd, cmd->tag, offset, + "to offset %d (cmd->bufflen %d)", cmd, se_cmd->tag, offset, cmd->bufflen); if (offset == 0) *xmit_type &= ~QLA_TGT_XMIT_DATA; else if (qlt_set_data_offset(cmd, offset)) { ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf018, - "qlt_set_data_offset() failed (tag %d)", cmd->tag); + "qlt_set_data_offset() failed (tag %d)", se_cmd->tag); } } } @@ -3194,7 +3192,7 @@ skip_term: return; } else if (cmd->state == QLA_TGT_STATE_ABORTED) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01e, - "Aborted command %p (tag %d) finished\n", cmd, cmd->tag); + "Aborted command %p (tag %lld) finished\n", cmd, se_cmd->tag); } else { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05c, "qla_target(%d): A command in state (%d) should " @@ -3266,7 +3264,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) goto out_term; cdb = &atio->u.isp24.fcp_cmnd.cdb[0]; - cmd->tag = atio->u.isp24.exchange_addr; + cmd->se_cmd.tag = atio->u.isp24.exchange_addr; cmd->unpacked_lun = scsilun_to_int( (struct scsi_lun *)&atio->u.isp24.fcp_cmnd.lun); @@ -3891,9 +3889,8 @@ static void qlt_handle_srr(struct scsi_qla_host *vha, resp = 1; } else { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf064, - "qla_target(%d): SRR for in data for cmd " - "without them (tag %d, SCSI status %d), " - "reject", vha->vp_idx, cmd->tag, + "qla_target(%d): SRR for in data for cmd without them (tag %lld, SCSI status %d), reject", + vha->vp_idx, se_cmd->tag, cmd->se_cmd.scsi_status); goto out_reject; } @@ -3927,10 +3924,8 @@ static void qlt_handle_srr(struct scsi_qla_host *vha, } } else { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf066, - "qla_target(%d): SRR for out data for cmd " - "without them (tag %d, SCSI status %d), " - "reject", vha->vp_idx, cmd->tag, - cmd->se_cmd.scsi_status); + "qla_target(%d): SRR for out data for cmd without them (tag %lld, SCSI status %d), reject", + vha->vp_idx, se_cmd->tag, cmd->se_cmd.scsi_status); goto out_reject; } break; @@ -4051,10 +4046,9 @@ restart: cmd->sg = se_cmd->t_data_sg; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf02c, - "SRR cmd %p (se_cmd %p, tag %d, op %x), " - "sg_cnt=%d, offset=%d", cmd, &cmd->se_cmd, cmd->tag, - se_cmd->t_task_cdb ? se_cmd->t_task_cdb[0] : 0, - cmd->sg_cnt, cmd->offset); + "SRR cmd %p (se_cmd %p, tag %lld, op %x), sg_cnt=%d, offset=%d", + cmd, &cmd->se_cmd, se_cmd->tag, se_cmd->t_task_cdb ? + se_cmd->t_task_cdb[0] : 0, cmd->sg_cnt, cmd->offset); qlt_handle_srr(vha, sctio, imm); diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index 332086776dfe..985d76dd706b 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -924,7 +924,6 @@ struct qla_tgt_cmd { int sg_cnt; /* SG segments count */ int bufflen; /* cmd buffer length */ int offset; - uint32_t tag; uint32_t unpacked_lun; enum dma_data_direction dma_data_direction; uint32_t reset_count; diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index bd0f9eb67901..8e331220adda 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -421,19 +421,6 @@ static void tcm_qla2xxx_set_default_node_attrs(struct se_node_acl *nacl) return; } -static u32 tcm_qla2xxx_get_task_tag(struct se_cmd *se_cmd) -{ - struct qla_tgt_cmd *cmd; - - /* check for task mgmt cmd */ - if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) - return 0xffffffff; - - cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); - - return cmd->tag; -} - static int tcm_qla2xxx_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -1865,7 +1852,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { .write_pending = tcm_qla2xxx_write_pending, .write_pending_status = tcm_qla2xxx_write_pending_status, .set_default_node_attributes = tcm_qla2xxx_set_default_node_attrs, - .get_task_tag = tcm_qla2xxx_get_task_tag, .get_cmd_state = tcm_qla2xxx_get_cmd_state, .queue_data_in = tcm_qla2xxx_queue_data_in, .queue_status = tcm_qla2xxx_queue_status, @@ -1910,7 +1896,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .write_pending = tcm_qla2xxx_write_pending, .write_pending_status = tcm_qla2xxx_write_pending_status, .set_default_node_attributes = tcm_qla2xxx_set_default_node_attrs, - .get_task_tag = tcm_qla2xxx_get_task_tag, .get_cmd_state = tcm_qla2xxx_get_cmd_state, .queue_data_in = tcm_qla2xxx_queue_data_in, .queue_status = tcm_qla2xxx_queue_status, diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 866c167c0986..3c4431f71158 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1008,6 +1008,8 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (cmd->sense_reason) goto attach_cmd; + /* only used for printks or comparing with ->ref_task_tag */ + cmd->se_cmd.tag = (__force u32)cmd->init_task_tag; cmd->sense_reason = target_setup_cmd_from_cdb(&cmd->se_cmd, hdr->cdb); if (cmd->sense_reason) { if (cmd->sense_reason == TCM_OUT_OF_RESOURCES) { diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index bd8af8764f4b..aa4fc4d8e176 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1692,14 +1692,6 @@ static char *iscsi_get_fabric_name(void) return "iSCSI"; } -static u32 iscsi_get_task_tag(struct se_cmd *se_cmd) -{ - struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); - - /* only used for printks or comparism with ->ref_task_tag */ - return (__force u32)cmd->init_task_tag; -} - static int iscsi_get_cmd_state(struct se_cmd *se_cmd) { struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); @@ -1938,7 +1930,6 @@ const struct target_core_fabric_ops iscsi_ops = { .write_pending = lio_write_pending, .write_pending_status = lio_write_pending_status, .set_default_node_attributes = lio_set_default_node_attributes, - .get_task_tag = iscsi_get_task_tag, .get_cmd_state = iscsi_get_cmd_state, .queue_data_in = lio_queue_data_in, .queue_status = lio_queue_status, diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index b788406977f6..07d3b52c88eb 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -165,6 +165,7 @@ static void tcm_loop_submission_work(struct work_struct *work) transfer_length = scsi_bufflen(sc); } + se_cmd->tag = tl_cmd->sc_cmd_tag; rc = target_submit_cmd_map_sgls(se_cmd, tl_nexus->se_sess, sc->cmnd, &tl_cmd->tl_sense_buf[0], tl_cmd->sc->device->lun, transfer_length, TCM_SIMPLE_TAG, @@ -597,14 +598,6 @@ static void tcm_loop_set_default_node_attributes(struct se_node_acl *se_acl) return; } -static u32 tcm_loop_get_task_tag(struct se_cmd *se_cmd) -{ - struct tcm_loop_cmd *tl_cmd = container_of(se_cmd, - struct tcm_loop_cmd, tl_se_cmd); - - return tl_cmd->sc_cmd_tag; -} - static int tcm_loop_get_cmd_state(struct se_cmd *se_cmd) { struct tcm_loop_cmd *tl_cmd = container_of(se_cmd, @@ -1259,7 +1252,6 @@ static const struct target_core_fabric_ops loop_ops = { .write_pending = tcm_loop_write_pending, .write_pending_status = tcm_loop_write_pending_status, .set_default_node_attributes = tcm_loop_set_default_node_attributes, - .get_task_tag = tcm_loop_get_task_tag, .get_cmd_state = tcm_loop_get_cmd_state, .queue_data_in = tcm_loop_queue_data_in, .queue_status = tcm_loop_queue_status, diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 89f172dc8678..2916a4023e71 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -1234,6 +1234,8 @@ static void sbp_handle_command(struct sbp_target_request *req) pr_debug("sbp_handle_command ORB:0x%llx unpacked_lun:%d data_len:%d data_dir:%d\n", req->orb_pointer, unpacked_lun, data_length, data_dir); + /* only used for printk until we do TMRs */ + req->se_cmd.tag = req->orb_pointer; if (target_submit_cmd(&req->se_cmd, sess->se_sess, req->cmd_buf, req->sense_buf, unpacked_lun, data_length, TCM_SIMPLE_TAG, data_dir, 0)) @@ -1768,15 +1770,6 @@ static void sbp_set_default_node_attrs(struct se_node_acl *nacl) return; } -static u32 sbp_get_task_tag(struct se_cmd *se_cmd) -{ - struct sbp_target_request *req = container_of(se_cmd, - struct sbp_target_request, se_cmd); - - /* only used for printk until we do TMRs */ - return (u32)req->orb_pointer; -} - static int sbp_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -2377,7 +2370,6 @@ static const struct target_core_fabric_ops sbp_ops = { .write_pending = sbp_write_pending, .write_pending_status = sbp_write_pending_status, .set_default_node_attributes = sbp_set_default_node_attrs, - .get_task_tag = sbp_get_task_tag, .get_cmd_state = sbp_get_cmd_state, .queue_data_in = sbp_queue_data_in, .queue_status = sbp_queue_status, diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 5e2649fc6919..fc598c084523 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -374,10 +374,6 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo) pr_err("Missing tfo->set_default_node_attributes()\n"); return -EINVAL; } - if (!tfo->get_task_tag) { - pr_err("Missing tfo->get_task_tag()\n"); - return -EINVAL; - } if (!tfo->get_cmd_state) { pr_err("Missing tfo->get_cmd_state()\n"); return -EINVAL; diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index b2e169fba3c6..393aca8bb3eb 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -117,7 +117,7 @@ void core_tmr_abort_task( { struct se_cmd *se_cmd; unsigned long flags; - int ref_tag; + u64 ref_tag; spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list) { @@ -129,16 +129,17 @@ void core_tmr_abort_task( if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) continue; - ref_tag = se_cmd->se_tfo->get_task_tag(se_cmd); + ref_tag = se_cmd->tag; if (tmr->ref_task_tag != ref_tag) continue; - printk("ABORT_TASK: Found referenced %s task_tag: %u\n", + printk("ABORT_TASK: Found referenced %s task_tag: %llu\n", se_cmd->se_tfo->get_fabric_name(), ref_tag); spin_lock(&se_cmd->t_state_lock); if (se_cmd->transport_state & CMD_T_COMPLETE) { - printk("ABORT_TASK: ref_tag: %u already complete, skipping\n", ref_tag); + printk("ABORT_TASK: ref_tag: %llu already complete," + " skipping\n", ref_tag); spin_unlock(&se_cmd->t_state_lock); spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); goto out; @@ -157,14 +158,14 @@ void core_tmr_abort_task( transport_cmd_finish_abort(se_cmd, true); printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for" - " ref_tag: %d\n", ref_tag); + " ref_tag: %llu\n", ref_tag); tmr->response = TMR_FUNCTION_COMPLETE; return; } spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); out: - printk("ABORT_TASK: Sending TMR_TASK_DOES_NOT_EXIST for ref_tag: %d\n", + printk("ABORT_TASK: Sending TMR_TASK_DOES_NOT_EXIST for ref_tag: %lld\n", tmr->ref_task_tag); tmr->response = TMR_TASK_DOES_NOT_EXIST; } @@ -289,16 +290,16 @@ static void core_tmr_drain_state_list( list_del(&cmd->state_list); pr_debug("LUN_RESET: %s cmd: %p" - " ITT/CmdSN: 0x%08x/0x%08x, i_state: %d, t_state: %d" + " ITT/CmdSN: 0x%08llx/0x%08x, i_state: %d, t_state: %d" "cdb: 0x%02x\n", (preempt_and_abort_list) ? "Preempt" : "", cmd, - cmd->se_tfo->get_task_tag(cmd), 0, + cmd->tag, 0, cmd->se_tfo->get_cmd_state(cmd), cmd->t_state, cmd->t_task_cdb[0]); - pr_debug("LUN_RESET: ITT[0x%08x] - pr_res_key: 0x%016Lx" + pr_debug("LUN_RESET: ITT[0x%08llx] - pr_res_key: 0x%016Lx" " -- CMD_T_ACTIVE: %d" " CMD_T_STOP: %d CMD_T_SENT: %d\n", - cmd->se_tfo->get_task_tag(cmd), cmd->pr_res_key, + cmd->tag, cmd->pr_res_key, (cmd->transport_state & CMD_T_ACTIVE) != 0, (cmd->transport_state & CMD_T_STOP) != 0, (cmd->transport_state & CMD_T_SENT) != 0); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 85b021e749e6..0d8662bb470e 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -600,9 +600,8 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, * this command for frontend exceptions. */ if (cmd->transport_state & CMD_T_STOP) { - pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08x\n", - __func__, __LINE__, - cmd->se_tfo->get_task_tag(cmd)); + pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08llx\n", + __func__, __LINE__, cmd->tag); spin_unlock_irqrestore(&cmd->t_state_lock, flags); @@ -1155,6 +1154,8 @@ target_cmd_size_check(struct se_cmd *cmd, unsigned int size) /* * Used by fabric modules containing a local struct se_cmd within their * fabric dependent per I/O descriptor. + * + * Preserves the value of @cmd->tag. */ void transport_init_se_cmd( struct se_cmd *cmd, @@ -1380,6 +1381,8 @@ transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl, * @sgl_prot: struct scatterlist memory protection information * @sgl_prot_count: scatterlist count for protection information * + * Task tags are supported if the caller has set @se_cmd->tag. + * * Returns non zero to signal active I/O shutdown failure. All other * setup exceptions will be returned as a SCSI CHECK_CONDITION response, * but still return zero here. @@ -1512,6 +1515,8 @@ EXPORT_SYMBOL(target_submit_cmd_map_sgls); * @data_dir: DMA data direction * @flags: flags for command submission from target_sc_flags_tables * + * Task tags are supported if the caller has set @se_cmd->tag. + * * Returns non zero to signal active I/O shutdown failure. All other * setup exceptions will be returned as a SCSI CHECK_CONDITION response, * but still return zero here. @@ -1639,9 +1644,8 @@ void transport_generic_request_failure(struct se_cmd *cmd, { int ret = 0; - pr_debug("-----[ Storage Engine Exception for cmd: %p ITT: 0x%08x" - " CDB: 0x%02x\n", cmd, cmd->se_tfo->get_task_tag(cmd), - cmd->t_task_cdb[0]); + pr_debug("-----[ Storage Engine Exception for cmd: %p ITT: 0x%08llx" + " CDB: 0x%02x\n", cmd, cmd->tag, cmd->t_task_cdb[0]); pr_debug("-----[ i_state: %d t_state: %d sense_reason: %d\n", cmd->se_tfo->get_cmd_state(cmd), cmd->t_state, sense_reason); @@ -1849,9 +1853,8 @@ void target_execute_cmd(struct se_cmd *cmd) */ spin_lock_irq(&cmd->t_state_lock); if (cmd->transport_state & CMD_T_STOP) { - pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08x\n", - __func__, __LINE__, - cmd->se_tfo->get_task_tag(cmd)); + pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08llx\n", + __func__, __LINE__, cmd->tag); spin_unlock_irq(&cmd->t_state_lock); complete_all(&cmd->t_transport_stop_comp); @@ -2658,10 +2661,8 @@ bool transport_wait_for_tasks(struct se_cmd *cmd) cmd->transport_state |= CMD_T_STOP; - pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08x" - " i_state: %d, t_state: %d, CMD_T_STOP\n", - cmd, cmd->se_tfo->get_task_tag(cmd), - cmd->se_tfo->get_cmd_state(cmd), cmd->t_state); + pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08llx i_state: %d, t_state: %d, CMD_T_STOP\n", + cmd, cmd->tag, cmd->se_tfo->get_cmd_state(cmd), cmd->t_state); spin_unlock_irqrestore(&cmd->t_state_lock, flags); @@ -2670,9 +2671,8 @@ bool transport_wait_for_tasks(struct se_cmd *cmd) spin_lock_irqsave(&cmd->t_state_lock, flags); cmd->transport_state &= ~(CMD_T_ACTIVE | CMD_T_STOP); - pr_debug("wait_for_tasks: Stopped wait_for_completion(" - "&cmd->t_transport_stop_comp) for ITT: 0x%08x\n", - cmd->se_tfo->get_task_tag(cmd)); + pr_debug("wait_for_tasks: Stopped wait_for_completion(&cmd->t_transport_stop_comp) for ITT: 0x%08llx\n", + cmd->tag); spin_unlock_irqrestore(&cmd->t_state_lock, flags); @@ -2974,8 +2974,8 @@ int transport_check_aborted_status(struct se_cmd *cmd, int send_status) if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS)) return 1; - pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB: 0x%02x ITT: 0x%08x\n", - cmd->t_task_cdb[0], cmd->se_tfo->get_task_tag(cmd)); + pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB: 0x%02x ITT: 0x%08llx\n", + cmd->t_task_cdb[0], cmd->tag); cmd->se_cmd_flags &= ~SCF_SEND_DELAYED_TAS; cmd->scsi_status = SAM_STAT_TASK_ABORTED; @@ -3014,9 +3014,8 @@ void transport_send_task_abort(struct se_cmd *cmd) transport_lun_remove_cmd(cmd); - pr_debug("Setting SAM_STAT_TASK_ABORTED status for CDB: 0x%02x," - " ITT: 0x%08x\n", cmd->t_task_cdb[0], - cmd->se_tfo->get_task_tag(cmd)); + pr_debug("Setting SAM_STAT_TASK_ABORTED status for CDB: 0x%02x, ITT: 0x%08llx\n", + cmd->t_task_cdb[0], cmd->tag); trace_target_cmd_complete(cmd); cmd->se_tfo->queue_status(cmd); diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 8fd680ac941b..5545619d3045 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -359,11 +359,6 @@ static char *xcopy_pt_get_fabric_name(void) return "xcopy-pt"; } -static u32 xcopy_pt_get_tag(struct se_cmd *se_cmd) -{ - return 0; -} - static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -424,7 +419,6 @@ static int xcopy_pt_queue_status(struct se_cmd *se_cmd) static const struct target_core_fabric_ops xcopy_pt_tfo = { .get_fabric_name = xcopy_pt_get_fabric_name, - .get_task_tag = xcopy_pt_get_tag, .get_cmd_state = xcopy_pt_get_cmd_state, .release_cmd = xcopy_pt_release_cmd, .check_stop_free = xcopy_pt_check_stop_free, @@ -575,6 +569,7 @@ static int target_xcopy_setup_pt_cmd( xpt_cmd->xcopy_op = xop; target_xcopy_setup_pt_port(xpt_cmd, xop, remote_port); + cmd->tag = 0; sense_rc = target_setup_cmd_from_cdb(cmd, cdb); if (sense_rc) { ret = -EINVAL; diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h index 4ceaeb9a4b93..39909dadef3e 100644 --- a/drivers/target/tcm_fc/tcm_fc.h +++ b/drivers/target/tcm_fc/tcm_fc.h @@ -157,7 +157,6 @@ int ft_queue_status(struct se_cmd *); int ft_queue_data_in(struct se_cmd *); int ft_write_pending(struct se_cmd *); int ft_write_pending_status(struct se_cmd *); -u32 ft_get_task_tag(struct se_cmd *); int ft_get_cmd_state(struct se_cmd *); void ft_queue_tm_resp(struct se_cmd *); void ft_aborted_task(struct se_cmd *); diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index edcafa4490c0..a8fe6ed5262f 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -247,15 +247,6 @@ int ft_write_pending(struct se_cmd *se_cmd) return 0; } -u32 ft_get_task_tag(struct se_cmd *se_cmd) -{ - struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd); - - if (cmd->aborted) - return ~0; - return fc_seq_exch(cmd->seq)->rxid; -} - int ft_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -568,6 +559,7 @@ static void ft_send_work(struct work_struct *work) } fc_seq_exch(cmd->seq)->lp->tt.seq_set_resp(cmd->seq, ft_recv_seq, cmd); + cmd->se_cmd.tag = fc_seq_exch(cmd->seq)->rxid; /* * Use a single se_cmd->cmd_kref as we expect to release se_cmd * directly from ft_check_stop_free callback in response path. diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 6ad7404b7dd1..f0821967f0a1 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -468,7 +468,6 @@ static const struct target_core_fabric_ops ft_fabric_ops = { .write_pending = ft_write_pending, .write_pending_status = ft_write_pending_status, .set_default_node_attributes = ft_set_default_node_attr, - .get_task_tag = ft_get_task_tag, .get_cmd_state = ft_get_cmd_state, .queue_data_in = ft_queue_data_in, .queue_status = ft_queue_status, diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c index 77cdbb56e1d5..a000d89dc78a 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c @@ -1112,6 +1112,7 @@ static int usbg_submit_command(struct f_uas *fu, memcpy(cmd->cmd_buf, cmd_iu->cdb, cmd_len); cmd->tag = be16_to_cpup(&cmd_iu->tag); + cmd->se_cmd.tag = cmd->tag; if (fu->flags & USBG_USE_STREAMS) { if (cmd->tag > UASP_SS_EP_COMP_NUM_STREAMS) goto err; @@ -1245,6 +1246,7 @@ static int bot_submit_command(struct f_uas *fu, cmd->unpacked_lun = cbw->Lun; cmd->is_read = cbw->Flags & US_BULK_FLAG_IN ? 1 : 0; cmd->data_len = le32_to_cpu(cbw->DataTransferLength); + cmd->se_cmd.tag = le32_to_cpu(cmd->bot_tag); INIT_WORK(&cmd->work, bot_cmd_work); ret = queue_work(tpg->workqueue, &cmd->work); @@ -1340,18 +1342,6 @@ static void usbg_set_default_node_attrs(struct se_node_acl *nacl) return; } -static u32 usbg_get_task_tag(struct se_cmd *se_cmd) -{ - struct usbg_cmd *cmd = container_of(se_cmd, struct usbg_cmd, - se_cmd); - struct f_uas *fu = cmd->fu; - - if (fu->flags & USBG_IS_BOT) - return le32_to_cpu(cmd->bot_tag); - else - return cmd->tag; -} - static int usbg_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -1739,7 +1729,6 @@ static const struct target_core_fabric_ops usbg_ops = { .write_pending = usbg_send_write_request, .write_pending_status = usbg_write_pending_status, .set_default_node_attributes = usbg_set_default_node_attrs, - .get_task_tag = usbg_get_task_tag, .get_cmd_state = usbg_get_cmd_state, .queue_data_in = usbg_send_read_response, .queue_status = usbg_send_status_response, diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index db9f4b474214..4a003948b07f 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -369,11 +369,6 @@ static void vhost_scsi_set_default_node_attrs(struct se_node_acl *nacl) return; } -static u32 vhost_scsi_get_task_tag(struct se_cmd *se_cmd) -{ - return 0; -} - static int vhost_scsi_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -818,6 +813,7 @@ static void vhost_scsi_submission_work(struct work_struct *work) } tv_nexus = cmd->tvc_nexus; + se_cmd->tag = 0; rc = target_submit_cmd_map_sgls(se_cmd, tv_nexus->tvn_se_sess, cmd->tvc_cdb, &cmd->tvc_sense_buf[0], cmd->tvc_lun, cmd->tvc_exp_data_len, @@ -2148,7 +2144,6 @@ static struct target_core_fabric_ops vhost_scsi_ops = { .write_pending = vhost_scsi_write_pending, .write_pending_status = vhost_scsi_write_pending_status, .set_default_node_attributes = vhost_scsi_set_default_node_attrs, - .get_task_tag = vhost_scsi_get_task_tag, .get_cmd_state = vhost_scsi_get_cmd_state, .queue_data_in = vhost_scsi_queue_data_in, .queue_status = vhost_scsi_queue_status, diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 10c71a5616fa..ea929baf7ad0 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -400,6 +400,7 @@ static void scsiback_cmd_exec(struct vscsibk_pend *pending_req) memset(se_cmd, 0, sizeof(*se_cmd)); scsiback_get(pending_req->info); + se_cmd->tag = pending_req->rqid; rc = target_submit_cmd_map_sgls(se_cmd, sess, pending_req->cmnd, pending_req->sense_buffer, pending_req->v2p->lun, pending_req->data_len, 0, @@ -1394,14 +1395,6 @@ static void scsiback_set_default_node_attrs(struct se_node_acl *nacl) { } -static u32 scsiback_get_task_tag(struct se_cmd *se_cmd) -{ - struct vscsibk_pend *pending_req = container_of(se_cmd, - struct vscsibk_pend, se_cmd); - - return pending_req->rqid; -} - static int scsiback_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -1833,7 +1826,6 @@ static const struct target_core_fabric_ops scsiback_ops = { .write_pending = scsiback_write_pending, .write_pending_status = scsiback_write_pending_status, .set_default_node_attributes = scsiback_set_default_node_attrs, - .get_task_tag = scsiback_get_task_tag, .get_cmd_state = scsiback_get_cmd_state, .queue_data_in = scsiback_queue_data_in, .queue_status = scsiback_queue_status, diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index c462fb0a47f4..042a73464966 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -420,7 +420,7 @@ struct se_tmr_req { u8 response; int call_transport; /* Reference to ITT that Task Mgmt should be performed */ - u32 ref_task_tag; + u64 ref_task_tag; void *fabric_tmr_ptr; struct se_cmd *task_cmd; struct se_device *tmr_dev; @@ -473,6 +473,7 @@ struct se_cmd { u8 scsi_asc; u8 scsi_ascq; u16 scsi_sense_length; + u64 tag; /* SAM command identifier aka task tag */ /* Delay for ALUA Active/NonOptimized state access in milliseconds */ int alua_nonop_delay; /* See include/linux/dma-mapping.h */ diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index a420f434c6c5..f64d493f888b 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -53,7 +53,6 @@ struct target_core_fabric_ops { int (*write_pending)(struct se_cmd *); int (*write_pending_status)(struct se_cmd *); void (*set_default_node_attributes)(struct se_node_acl *); - u32 (*get_task_tag)(struct se_cmd *); int (*get_cmd_state)(struct se_cmd *); int (*queue_data_in)(struct se_cmd *); int (*queue_status)(struct se_cmd *); From 2fe6e721b575e308d16e37a0beff107cafd5cf9b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 14 Apr 2015 13:15:29 +0200 Subject: [PATCH 061/839] ib_srpt: Remove set-but-not-used variables Detected these variables by building with W=1. Signed-off-by: Bart Van Assche Signed-off-by: Nicholas Bellinger --- drivers/infiniband/ulp/srpt/ib_srpt.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 56df5cd918c5..f21a5f6bf858 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1387,7 +1387,6 @@ static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id) { struct srpt_send_ioctx *ioctx; enum srpt_command_state state; - struct se_cmd *cmd; u32 index; atomic_inc(&ch->sq_wr_avail); @@ -1395,7 +1394,6 @@ static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id) index = idx_from_wr_id(wr_id); ioctx = ch->ioctx_ring[index]; state = srpt_get_cmd_state(ioctx); - cmd = &ioctx->cmd; WARN_ON(state != SRPT_STATE_CMD_RSP_SENT && state != SRPT_STATE_MGMT_RSP_SENT @@ -1472,10 +1470,8 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx, enum srpt_opcode opcode) { - struct se_cmd *cmd; enum srpt_command_state state; - cmd = &ioctx->cmd; state = srpt_get_cmd_state(ioctx); switch (opcode) { case SRPT_RDMA_READ_LAST: @@ -2176,12 +2172,9 @@ static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) */ static void __srpt_close_ch(struct srpt_rdma_ch *ch) { - struct srpt_device *sdev; enum rdma_ch_state prev_state; unsigned long flags; - sdev = ch->sport->sdev; - spin_lock_irqsave(&ch->spinlock, flags); prev_state = ch->state; switch (prev_state) { From 45fb94c2925fc7d9b170f2d148f91556428eaa1d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 14 Apr 2015 13:00:58 +0200 Subject: [PATCH 062/839] target: Remove set-but-not-used-variables Detected these variables by building with W=1. Signed-off-by: Bart Van Assche Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_configfs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index fc598c084523..86caeb26f996 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -1006,8 +1006,8 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( u64 sa_res_key = 0; u32 mapped_lun = 0, target_lun = 0; int ret = -1, res_holder = 0, all_tg_pt = 0, arg, token; - u16 port_rpti = 0, tpgt = 0; - u8 type = 0, scope; + u16 tpgt = 0; + u8 type = 0; if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; @@ -1087,7 +1087,6 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( break; case Opt_res_scope: match_int(args, &arg); - scope = (u8)arg; break; case Opt_res_all_tg_pt: match_int(args, &arg); @@ -1127,7 +1126,6 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( break; case Opt_port_rtpi: match_int(args, &arg); - port_rpti = (u16)arg; break; case Opt_target_lun: match_int(args, &arg); From ef0caf8dd149992796ee453b65dd0d77ff848f57 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 May 2015 08:50:53 +0200 Subject: [PATCH 063/839] target: don't copy fabric ops Now that we don't need to set up ->tf_subsys we don't need to copy around the ops vector anymore. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_configfs.c | 2 +- drivers/target/target_core_fabric_configfs.c | 38 ++++++++++---------- include/target/target_core_configfs.h | 2 +- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 86caeb26f996..43c9ed1d1b92 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -444,7 +444,7 @@ int target_register_template(const struct target_core_fabric_ops *fo) tf->tf_module = fo->module; snprintf(tf->tf_name, TARGET_FABRIC_NAME_SIZE, "%s", fo->name); - tf->tf_ops = *fo; + tf->tf_ops = fo; target_fabric_setup_cits(tf); mutex_lock(&g_tf_lock); diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index f4d9467c3e14..93564c0b7576 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -61,7 +61,7 @@ static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf) { \ struct target_fabric_configfs_template *tfc = &tf->tf_cit_tmpl; \ struct config_item_type *cit = &tfc->tfc_##_name##_cit; \ - struct configfs_attribute **attrs = tf->tf_ops.tfc_##_name##_attrs; \ + struct configfs_attribute **attrs = tf->tf_ops->tfc_##_name##_attrs; \ \ cit->ct_item_ops = _item_ops; \ cit->ct_group_ops = _group_ops; \ @@ -460,8 +460,8 @@ static void target_fabric_nacl_base_release(struct config_item *item) struct se_node_acl, acl_group); struct target_fabric_configfs *tf = se_nacl->se_tpg->se_tpg_wwn->wwn_tf; - if (tf->tf_ops.fabric_cleanup_nodeacl) - tf->tf_ops.fabric_cleanup_nodeacl(se_nacl); + if (tf->tf_ops->fabric_cleanup_nodeacl) + tf->tf_ops->fabric_cleanup_nodeacl(se_nacl); core_tpg_del_initiator_node_acl(se_nacl); } @@ -506,8 +506,8 @@ static struct config_group *target_fabric_make_nodeacl( if (IS_ERR(se_nacl)) return ERR_CAST(se_nacl); - if (tf->tf_ops.fabric_init_nodeacl) { - int ret = tf->tf_ops.fabric_init_nodeacl(se_nacl, name); + if (tf->tf_ops->fabric_init_nodeacl) { + int ret = tf->tf_ops->fabric_init_nodeacl(se_nacl, name); if (ret) { core_tpg_del_initiator_node_acl(se_nacl); return ERR_PTR(ret); @@ -579,7 +579,7 @@ static void target_fabric_np_base_release(struct config_item *item) struct se_portal_group *se_tpg = se_tpg_np->tpg_np_parent; struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf; - tf->tf_ops.fabric_drop_np(se_tpg_np); + tf->tf_ops->fabric_drop_np(se_tpg_np); } static struct configfs_item_operations target_fabric_np_base_item_ops = { @@ -603,12 +603,12 @@ static struct config_group *target_fabric_make_np( struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf; struct se_tpg_np *se_tpg_np; - if (!tf->tf_ops.fabric_make_np) { + if (!tf->tf_ops->fabric_make_np) { pr_err("tf->tf_ops.fabric_make_np is NULL\n"); return ERR_PTR(-ENOSYS); } - se_tpg_np = tf->tf_ops.fabric_make_np(se_tpg, group, name); + se_tpg_np = tf->tf_ops->fabric_make_np(se_tpg, group, name); if (!se_tpg_np || IS_ERR(se_tpg_np)) return ERR_PTR(-EINVAL); @@ -808,13 +808,13 @@ static int target_fabric_port_link( goto out; } - if (tf->tf_ops.fabric_post_link) { + if (tf->tf_ops->fabric_post_link) { /* * Call the optional fabric_post_link() to allow a * fabric module to setup any additional state once * core_dev_add_lun() has been called.. */ - tf->tf_ops.fabric_post_link(se_tpg, lun); + tf->tf_ops->fabric_post_link(se_tpg, lun); } return 0; @@ -831,13 +831,13 @@ static int target_fabric_port_unlink( struct se_portal_group *se_tpg = lun->lun_sep->sep_tpg; struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf; - if (tf->tf_ops.fabric_pre_unlink) { + if (tf->tf_ops->fabric_pre_unlink) { /* * Call the optional fabric_pre_unlink() to allow a * fabric module to release any additional stat before * core_dev_del_lun() is called. */ - tf->tf_ops.fabric_pre_unlink(se_tpg, lun); + tf->tf_ops->fabric_pre_unlink(se_tpg, lun); } core_dev_del_lun(se_tpg, lun); @@ -1027,7 +1027,7 @@ static void target_fabric_tpg_release(struct config_item *item) struct se_wwn *wwn = se_tpg->se_tpg_wwn; struct target_fabric_configfs *tf = wwn->wwn_tf; - tf->tf_ops.fabric_drop_tpg(se_tpg); + tf->tf_ops->fabric_drop_tpg(se_tpg); } static struct configfs_item_operations target_fabric_tpg_base_item_ops = { @@ -1050,12 +1050,12 @@ static struct config_group *target_fabric_make_tpg( struct target_fabric_configfs *tf = wwn->wwn_tf; struct se_portal_group *se_tpg; - if (!tf->tf_ops.fabric_make_tpg) { - pr_err("tf->tf_ops.fabric_make_tpg is NULL\n"); + if (!tf->tf_ops->fabric_make_tpg) { + pr_err("tf->tf_ops->fabric_make_tpg is NULL\n"); return ERR_PTR(-ENOSYS); } - se_tpg = tf->tf_ops.fabric_make_tpg(wwn, group, name); + se_tpg = tf->tf_ops->fabric_make_tpg(wwn, group, name); if (!se_tpg || IS_ERR(se_tpg)) return ERR_PTR(-EINVAL); /* @@ -1116,7 +1116,7 @@ static void target_fabric_release_wwn(struct config_item *item) struct se_wwn, wwn_group); struct target_fabric_configfs *tf = wwn->wwn_tf; - tf->tf_ops.fabric_drop_wwn(wwn); + tf->tf_ops->fabric_drop_wwn(wwn); } static struct configfs_item_operations target_fabric_tpg_item_ops = { @@ -1152,12 +1152,12 @@ static struct config_group *target_fabric_make_wwn( struct target_fabric_configfs, tf_group); struct se_wwn *wwn; - if (!tf->tf_ops.fabric_make_wwn) { + if (!tf->tf_ops->fabric_make_wwn) { pr_err("tf->tf_ops.fabric_make_wwn is NULL\n"); return ERR_PTR(-ENOSYS); } - wwn = tf->tf_ops.fabric_make_wwn(tf, group, name); + wwn = tf->tf_ops->fabric_make_wwn(tf, group, name); if (!wwn || IS_ERR(wwn)) return ERR_PTR(-EINVAL); diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h index b99c01170392..10402e5526e7 100644 --- a/include/target/target_core_configfs.h +++ b/include/target/target_core_configfs.h @@ -42,7 +42,7 @@ struct target_fabric_configfs { struct config_item_type *tf_fabric_cit; /* Pointer to fabric's struct module */ struct module *tf_module; - struct target_core_fabric_ops tf_ops; + const struct target_core_fabric_ops *tf_ops; struct target_fabric_configfs_template tf_cit_tmpl; }; From 0dc2e8d1435318dc448ac390d6d31e6cd2516684 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 May 2015 08:50:54 +0200 Subject: [PATCH 064/839] target: put struct target_fabric_configfs on a diet Remove all fields that are either unused or can be replaced by trivially following pointers. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_configfs.c | 24 ++++---------------- drivers/target/target_core_fabric_configfs.c | 4 ++-- include/target/target_core_configfs.h | 7 ------ 3 files changed, 6 insertions(+), 29 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 43c9ed1d1b92..f63e4dda8a90 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -116,7 +116,7 @@ static struct target_fabric_configfs *target_core_get_fabric( mutex_lock(&g_tf_lock); list_for_each_entry(tf, &g_tf_list, tf_list) { - if (!strcmp(tf->tf_name, name)) { + if (!strcmp(tf->tf_ops->name, name)) { atomic_inc(&tf->tf_access_cnt); mutex_unlock(&g_tf_lock); return tf; @@ -193,7 +193,7 @@ static struct config_group *target_core_register_fabric( return ERR_PTR(-EINVAL); } pr_debug("Target_Core_ConfigFS: REGISTER -> Located fabric:" - " %s\n", tf->tf_name); + " %s\n", tf->tf_ops->name); /* * On a successful target_core_get_fabric() look, the returned * struct target_fabric_configfs *tf will contain a usage reference. @@ -212,10 +212,6 @@ static struct config_group *target_core_register_fabric( pr_debug("Target_Core_ConfigFS: REGISTER -> Allocated Fabric:" " %s\n", tf->tf_group.cg_item.ci_name); - tf->tf_fabric = &tf->tf_group.cg_item; - pr_debug("Target_Core_ConfigFS: REGISTER -> Set tf->tf_fabric" - " for %s\n", name); - return &tf->tf_group; } @@ -236,13 +232,9 @@ static void target_core_deregister_fabric( " tf list\n", config_item_name(item)); pr_debug("Target_Core_ConfigFS: DEREGISTER -> located fabric:" - " %s\n", tf->tf_name); + " %s\n", tf->tf_ops->name); atomic_dec(&tf->tf_access_cnt); - pr_debug("Target_Core_ConfigFS: DEREGISTER -> Releasing" - " tf->tf_fabric for %s\n", tf->tf_name); - tf->tf_fabric = NULL; - pr_debug("Target_Core_ConfigFS: DEREGISTER -> Releasing ci" " %s\n", config_item_name(item)); @@ -436,14 +428,6 @@ int target_register_template(const struct target_core_fabric_ops *fo) INIT_LIST_HEAD(&tf->tf_list); atomic_set(&tf->tf_access_cnt, 0); - - /* - * Setup the default generic struct config_item_type's (cits) in - * struct target_fabric_configfs->tf_cit_tmpl - */ - tf->tf_module = fo->module; - snprintf(tf->tf_name, TARGET_FABRIC_NAME_SIZE, "%s", fo->name); - tf->tf_ops = fo; target_fabric_setup_cits(tf); @@ -461,7 +445,7 @@ void target_unregister_template(const struct target_core_fabric_ops *fo) mutex_lock(&g_tf_lock); list_for_each_entry(t, &g_tf_list, tf_list) { - if (!strcmp(t->tf_name, fo->name)) { + if (!strcmp(t->tf_ops->name, fo->name)) { BUG_ON(atomic_read(&t->tf_access_cnt)); list_del(&t->tf_list); kfree(t); diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 93564c0b7576..d2ce61a07afd 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -52,7 +52,7 @@ static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf) cit->ct_item_ops = _item_ops; \ cit->ct_group_ops = _group_ops; \ cit->ct_attrs = _attrs; \ - cit->ct_owner = tf->tf_module; \ + cit->ct_owner = tf->tf_ops->module; \ pr_debug("Setup generic %s\n", __stringify(_name)); \ } @@ -66,7 +66,7 @@ static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf) cit->ct_item_ops = _item_ops; \ cit->ct_group_ops = _group_ops; \ cit->ct_attrs = attrs; \ - cit->ct_owner = tf->tf_module; \ + cit->ct_owner = tf->tf_ops->module; \ pr_debug("Setup generic %s\n", __stringify(_name)); \ } diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h index 10402e5526e7..3f11d2ead63d 100644 --- a/include/target/target_core_configfs.h +++ b/include/target/target_core_configfs.h @@ -30,18 +30,11 @@ struct target_fabric_configfs_template { }; struct target_fabric_configfs { - char tf_name[TARGET_FABRIC_NAME_SIZE]; atomic_t tf_access_cnt; struct list_head tf_list; struct config_group tf_group; struct config_group tf_disc_group; struct config_group *tf_default_groups[2]; - /* Pointer to fabric's config_item */ - struct config_item *tf_fabric; - /* Passed from fabric modules */ - struct config_item_type *tf_fabric_cit; - /* Pointer to fabric's struct module */ - struct module *tf_module; const struct target_core_fabric_ops *tf_ops; struct target_fabric_configfs_template tf_cit_tmpl; }; From 968ebe752035d14a4c2bb69f8ed0ddf7292dd2f9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 May 2015 08:50:55 +0200 Subject: [PATCH 065/839] target: remove struct target_fabric_configfs_template It's only embedded into struct target_fabric_configfs these days, so we might as well remove this layer of abstraction. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_configfs.c | 7 ++- drivers/target/target_core_fabric_configfs.c | 46 +++++++++---------- include/target/target_core_configfs.h | 48 +++++++++----------- 3 files changed, 46 insertions(+), 55 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index f63e4dda8a90..798386a1b6e7 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -199,16 +199,15 @@ static struct config_group *target_core_register_fabric( * struct target_fabric_configfs *tf will contain a usage reference. */ pr_debug("Target_Core_ConfigFS: REGISTER tfc_wwn_cit -> %p\n", - &tf->tf_cit_tmpl.tfc_wwn_cit); + &tf->tf_wwn_cit); tf->tf_group.default_groups = tf->tf_default_groups; tf->tf_group.default_groups[0] = &tf->tf_disc_group; tf->tf_group.default_groups[1] = NULL; - config_group_init_type_name(&tf->tf_group, name, - &tf->tf_cit_tmpl.tfc_wwn_cit); + config_group_init_type_name(&tf->tf_group, name, &tf->tf_wwn_cit); config_group_init_type_name(&tf->tf_disc_group, "discovery_auth", - &tf->tf_cit_tmpl.tfc_discovery_cit); + &tf->tf_discovery_cit); pr_debug("Target_Core_ConfigFS: REGISTER -> Allocated Fabric:" " %s\n", tf->tf_group.cg_item.ci_name); diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index d2ce61a07afd..6cb4828308e9 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -46,8 +46,7 @@ #define TF_CIT_SETUP(_name, _item_ops, _group_ops, _attrs) \ static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf) \ { \ - struct target_fabric_configfs_template *tfc = &tf->tf_cit_tmpl; \ - struct config_item_type *cit = &tfc->tfc_##_name##_cit; \ + struct config_item_type *cit = &tf->tf_##_name##_cit; \ \ cit->ct_item_ops = _item_ops; \ cit->ct_group_ops = _group_ops; \ @@ -59,8 +58,7 @@ static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf) #define TF_CIT_SETUP_DRV(_name, _item_ops, _group_ops) \ static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf) \ { \ - struct target_fabric_configfs_template *tfc = &tf->tf_cit_tmpl; \ - struct config_item_type *cit = &tfc->tfc_##_name##_cit; \ + struct config_item_type *cit = &tf->tf_##_name##_cit; \ struct configfs_attribute **attrs = tf->tf_ops->tfc_##_name##_attrs; \ \ cit->ct_item_ops = _item_ops; \ @@ -399,9 +397,9 @@ static struct config_group *target_fabric_make_mappedlun( } config_group_init_type_name(&lacl->se_lun_group, name, - &tf->tf_cit_tmpl.tfc_tpg_mappedlun_cit); + &tf->tf_tpg_mappedlun_cit); config_group_init_type_name(&lacl->ml_stat_grps.stat_group, - "statistics", &tf->tf_cit_tmpl.tfc_tpg_mappedlun_stat_cit); + "statistics", &tf->tf_tpg_mappedlun_stat_cit); lacl_cg->default_groups[0] = &lacl->ml_stat_grps.stat_group; lacl_cg->default_groups[1] = NULL; @@ -523,16 +521,15 @@ static struct config_group *target_fabric_make_nodeacl( nacl_cg->default_groups[4] = NULL; config_group_init_type_name(&se_nacl->acl_group, name, - &tf->tf_cit_tmpl.tfc_tpg_nacl_base_cit); + &tf->tf_tpg_nacl_base_cit); config_group_init_type_name(&se_nacl->acl_attrib_group, "attrib", - &tf->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit); + &tf->tf_tpg_nacl_attrib_cit); config_group_init_type_name(&se_nacl->acl_auth_group, "auth", - &tf->tf_cit_tmpl.tfc_tpg_nacl_auth_cit); + &tf->tf_tpg_nacl_auth_cit); config_group_init_type_name(&se_nacl->acl_param_group, "param", - &tf->tf_cit_tmpl.tfc_tpg_nacl_param_cit); + &tf->tf_tpg_nacl_param_cit); config_group_init_type_name(&se_nacl->acl_fabric_stat_group, - "fabric_statistics", - &tf->tf_cit_tmpl.tfc_tpg_nacl_stat_cit); + "fabric_statistics", &tf->tf_tpg_nacl_stat_cit); return &se_nacl->acl_group; } @@ -614,7 +611,7 @@ static struct config_group *target_fabric_make_np( se_tpg_np->tpg_np_parent = se_tpg; config_group_init_type_name(&se_tpg_np->tpg_np_group, name, - &tf->tf_cit_tmpl.tfc_tpg_np_base_cit); + &tf->tf_tpg_np_base_cit); return &se_tpg_np->tpg_np_group; } @@ -918,9 +915,9 @@ static struct config_group *target_fabric_make_lun( } config_group_init_type_name(&lun->lun_group, name, - &tf->tf_cit_tmpl.tfc_tpg_port_cit); + &tf->tf_tpg_port_cit); config_group_init_type_name(&lun->port_stat_grps.stat_group, - "statistics", &tf->tf_cit_tmpl.tfc_tpg_port_stat_cit); + "statistics", &tf->tf_tpg_port_stat_cit); lun_cg->default_groups[0] = &lun->port_stat_grps.stat_group; lun_cg->default_groups[1] = NULL; @@ -1071,19 +1068,19 @@ static struct config_group *target_fabric_make_tpg( se_tpg->tpg_group.default_groups[6] = NULL; config_group_init_type_name(&se_tpg->tpg_group, name, - &tf->tf_cit_tmpl.tfc_tpg_base_cit); + &tf->tf_tpg_base_cit); config_group_init_type_name(&se_tpg->tpg_lun_group, "lun", - &tf->tf_cit_tmpl.tfc_tpg_lun_cit); + &tf->tf_tpg_lun_cit); config_group_init_type_name(&se_tpg->tpg_np_group, "np", - &tf->tf_cit_tmpl.tfc_tpg_np_cit); + &tf->tf_tpg_np_cit); config_group_init_type_name(&se_tpg->tpg_acl_group, "acls", - &tf->tf_cit_tmpl.tfc_tpg_nacl_cit); + &tf->tf_tpg_nacl_cit); config_group_init_type_name(&se_tpg->tpg_attrib_group, "attrib", - &tf->tf_cit_tmpl.tfc_tpg_attrib_cit); + &tf->tf_tpg_attrib_cit); config_group_init_type_name(&se_tpg->tpg_auth_group, "auth", - &tf->tf_cit_tmpl.tfc_tpg_auth_cit); + &tf->tf_tpg_auth_cit); config_group_init_type_name(&se_tpg->tpg_param_group, "param", - &tf->tf_cit_tmpl.tfc_tpg_param_cit); + &tf->tf_tpg_param_cit); return &se_tpg->tpg_group; } @@ -1169,10 +1166,9 @@ static struct config_group *target_fabric_make_wwn( wwn->wwn_group.default_groups[0] = &wwn->fabric_stat_group; wwn->wwn_group.default_groups[1] = NULL; - config_group_init_type_name(&wwn->wwn_group, name, - &tf->tf_cit_tmpl.tfc_tpg_cit); + config_group_init_type_name(&wwn->wwn_group, name, &tf->tf_tpg_cit); config_group_init_type_name(&wwn->fabric_stat_group, "fabric_statistics", - &tf->tf_cit_tmpl.tfc_wwn_fabric_stats_cit); + &tf->tf_wwn_fabric_stats_cit); return &wwn->wwn_group; } diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h index 3f11d2ead63d..abd063b8b301 100644 --- a/include/target/target_core_configfs.h +++ b/include/target/target_core_configfs.h @@ -5,30 +5,6 @@ #define TARGET_CORE_NAME_MAX_LEN 64 #define TARGET_FABRIC_NAME_SIZE 32 -struct target_fabric_configfs_template { - struct config_item_type tfc_discovery_cit; - struct config_item_type tfc_wwn_cit; - struct config_item_type tfc_wwn_fabric_stats_cit; - struct config_item_type tfc_tpg_cit; - struct config_item_type tfc_tpg_base_cit; - struct config_item_type tfc_tpg_lun_cit; - struct config_item_type tfc_tpg_port_cit; - struct config_item_type tfc_tpg_port_stat_cit; - struct config_item_type tfc_tpg_np_cit; - struct config_item_type tfc_tpg_np_base_cit; - struct config_item_type tfc_tpg_attrib_cit; - struct config_item_type tfc_tpg_auth_cit; - struct config_item_type tfc_tpg_param_cit; - struct config_item_type tfc_tpg_nacl_cit; - struct config_item_type tfc_tpg_nacl_base_cit; - struct config_item_type tfc_tpg_nacl_attrib_cit; - struct config_item_type tfc_tpg_nacl_auth_cit; - struct config_item_type tfc_tpg_nacl_param_cit; - struct config_item_type tfc_tpg_nacl_stat_cit; - struct config_item_type tfc_tpg_mappedlun_cit; - struct config_item_type tfc_tpg_mappedlun_stat_cit; -}; - struct target_fabric_configfs { atomic_t tf_access_cnt; struct list_head tf_list; @@ -36,6 +12,26 @@ struct target_fabric_configfs { struct config_group tf_disc_group; struct config_group *tf_default_groups[2]; const struct target_core_fabric_ops *tf_ops; - struct target_fabric_configfs_template tf_cit_tmpl; -}; + struct config_item_type tf_discovery_cit; + struct config_item_type tf_wwn_cit; + struct config_item_type tf_wwn_fabric_stats_cit; + struct config_item_type tf_tpg_cit; + struct config_item_type tf_tpg_base_cit; + struct config_item_type tf_tpg_lun_cit; + struct config_item_type tf_tpg_port_cit; + struct config_item_type tf_tpg_port_stat_cit; + struct config_item_type tf_tpg_np_cit; + struct config_item_type tf_tpg_np_base_cit; + struct config_item_type tf_tpg_attrib_cit; + struct config_item_type tf_tpg_auth_cit; + struct config_item_type tf_tpg_param_cit; + struct config_item_type tf_tpg_nacl_cit; + struct config_item_type tf_tpg_nacl_base_cit; + struct config_item_type tf_tpg_nacl_attrib_cit; + struct config_item_type tf_tpg_nacl_auth_cit; + struct config_item_type tf_tpg_nacl_param_cit; + struct config_item_type tf_tpg_nacl_stat_cit; + struct config_item_type tf_tpg_mappedlun_cit; + struct config_item_type tf_tpg_mappedlun_stat_cit; +}; From 7ad34a93675e9c5a65711f83c518871317adc2ee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 May 2015 08:50:56 +0200 Subject: [PATCH 066/839] target: target_core_configfs.h is not needed in fabric drivers Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- Documentation/target/tcm_mod_builder.py | 2 -- drivers/infiniband/ulp/srpt/ib_srpt.c | 1 - drivers/scsi/qla2xxx/tcm_qla2xxx.c | 1 - drivers/target/iscsi/iscsi_target.c | 1 - drivers/target/iscsi/iscsi_target_configfs.c | 1 - drivers/target/iscsi/iscsi_target_tpg.c | 1 - drivers/target/iscsi/iscsi_target_util.c | 1 - drivers/target/loopback/tcm_loop.c | 1 - drivers/target/sbp/sbp_target.c | 1 - drivers/target/target_core_alua.c | 1 - drivers/target/target_core_fabric_lib.c | 1 - drivers/target/target_core_hba.c | 1 - drivers/target/target_core_pr.c | 1 - drivers/target/target_core_stat.c | 1 - drivers/target/target_core_tmr.c | 1 - drivers/target/target_core_transport.c | 1 - drivers/target/target_core_ua.c | 1 - drivers/target/target_core_xcopy.c | 1 - drivers/target/tcm_fc/tfc_cmd.c | 1 - drivers/target/tcm_fc/tfc_conf.c | 1 - drivers/target/tcm_fc/tfc_io.c | 1 - drivers/target/tcm_fc/tfc_sess.c | 1 - drivers/usb/gadget/legacy/tcm_usb_gadget.c | 1 - drivers/vhost/scsi.c | 1 - drivers/xen/xen-scsiback.c | 1 - 25 files changed, 26 deletions(-) diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 71e1f5863310..c70b193ec110 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -203,7 +203,6 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "#include \n" buf += "#include \n" buf += "#include \n" - buf += "#include \n" buf += "#include \n\n" buf += "#include \"" + fabric_mod_name + "_base.h\"\n" buf += "#include \"" + fabric_mod_name + "_fabric.h\"\n\n" @@ -417,7 +416,6 @@ def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "#include \n\n" buf += "#include \n" buf += "#include \n" - buf += "#include \n\n" buf += "#include \"" + fabric_mod_name + "_base.h\"\n" buf += "#include \"" + fabric_mod_name + "_fabric.h\"\n\n" diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index f21a5f6bf858..dbad5c67a294 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -46,7 +46,6 @@ #include #include #include -#include #include "ib_srpt.h" /* Name of this kernel module. */ diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 8e331220adda..b505e5e7c987 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include "qla_def.h" diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 3c4431f71158..1817922fe0f1 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include "iscsi_target_parameters.h" diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index aa4fc4d8e176..523ae556e22c 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index dcb7ede1d4aa..d4ac31fc2acd 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -18,7 +18,6 @@ #include #include -#include #include #include "iscsi_target_erl0.h" diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index c1582e88191e..a2bff0702eb2 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 07d3b52c88eb..b0c17614493e 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -35,7 +35,6 @@ #include #include #include -#include #include "tcm_loop.h" diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 2916a4023e71..5d7755edc668 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 4f8d4d459aa4..b590f153a7b6 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -35,7 +35,6 @@ #include #include #include -#include #include "target_core_internal.h" #include "target_core_alua.h" diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 89720b2f37c2..92346f23ab45 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -39,7 +39,6 @@ #include #include -#include #include "target_core_internal.h" #include "target_core_pr.h" diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c index ff95f95dcd13..e6e496ff9546 100644 --- a/drivers/target/target_core_hba.c +++ b/drivers/target/target_core_hba.c @@ -36,7 +36,6 @@ #include #include #include -#include #include "target_core_internal.h" diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index d396b3b87025..a4b9a0dd50cd 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -35,7 +35,6 @@ #include #include #include -#include #include "target_core_internal.h" #include "target_core_pr.h" diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index 03538994d2f7..64efee2fba52 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include "target_core_internal.h" diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 393aca8bb3eb..60ef1043242e 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -33,7 +33,6 @@ #include #include #include -#include #include "target_core_internal.h" #include "target_core_alua.h" diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 0d8662bb470e..fc460f1b0b75 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -44,7 +44,6 @@ #include #include #include -#include #include "target_core_internal.h" #include "target_core_alua.h" diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c index 1738b1646988..a0bf0d1a3df7 100644 --- a/drivers/target/target_core_ua.c +++ b/drivers/target/target_core_ua.c @@ -30,7 +30,6 @@ #include #include -#include #include "target_core_internal.h" #include "target_core_alua.h" diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 5545619d3045..3556a9dcee46 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -32,7 +32,6 @@ #include #include #include -#include #include "target_core_internal.h" #include "target_core_pr.h" diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index a8fe6ed5262f..d62282fb2b5b 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -40,7 +40,6 @@ #include #include -#include #include #include "tcm_fc.h" diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index f0821967f0a1..ee5653139417 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include "tcm_fc.h" diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c index 583e755d8091..401ce7f27863 100644 --- a/drivers/target/tcm_fc/tfc_io.c +++ b/drivers/target/tcm_fc/tfc_io.c @@ -48,7 +48,6 @@ #include #include -#include #include #include "tcm_fc.h" diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index ccee7e332a4d..96a2273667ce 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -40,7 +40,6 @@ #include #include -#include #include #include "tcm_fc.h" diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c index a000d89dc78a..16b9a16678fe 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 4a003948b07f..dcd228b59ded 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index ea929baf7ad0..edd0379c06b9 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -56,7 +56,6 @@ #include #include -#include #include #include From afe92323f9df4a2c4a093cd3f2a3f82800498596 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 May 2015 08:54:42 +0200 Subject: [PATCH 067/839] target: remove ->put_session method The only instance of ->put_session is in qla2xxx, and was added by commit aaf68b ("tcm_qla2xxx: Convert to TFO->put_session() usage") with the following description: This patch converts tcm_qla2xxx code to use an internal kref_put() for se_session->sess_kref in order to ensure that qla_hw_data->hardware_lock can be held while calling qlt_unreg_sess() for the final put. But these day we're already holding the hardware lock over qlt_unreg_sess in the ->close_session callback, so we're fine without this method. (Re-add missing tcm_qla2xxx_release_session + drop put_session usage - nab) Signed-off-by: Christoph Hellwig Acked-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 13 ------------- drivers/target/target_core_transport.c | 6 ------ drivers/xen/xen-scsiback.c | 1 - include/target/target_core_fabric.h | 1 - 4 files changed, 21 deletions(-) diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index b505e5e7c987..cb53144c72d0 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -700,17 +700,6 @@ static void tcm_qla2xxx_release_session(struct kref *kref) qlt_unreg_sess(se_sess->fabric_sess_ptr); } -static void tcm_qla2xxx_put_session(struct se_session *se_sess) -{ - struct qla_tgt_sess *sess = se_sess->fabric_sess_ptr; - struct qla_hw_data *ha = sess->vha->hw; - unsigned long flags; - - spin_lock_irqsave(&ha->hardware_lock, flags); - kref_put(&se_sess->sess_kref, tcm_qla2xxx_release_session); - spin_unlock_irqrestore(&ha->hardware_lock, flags); -} - static void tcm_qla2xxx_put_sess(struct qla_tgt_sess *sess) { if (!sess) @@ -1843,7 +1832,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, .check_stop_free = tcm_qla2xxx_check_stop_free, .release_cmd = tcm_qla2xxx_release_cmd, - .put_session = tcm_qla2xxx_put_session, .shutdown_session = tcm_qla2xxx_shutdown_session, .close_session = tcm_qla2xxx_close_session, .sess_get_index = tcm_qla2xxx_sess_get_index, @@ -1887,7 +1875,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, .check_stop_free = tcm_qla2xxx_check_stop_free, .release_cmd = tcm_qla2xxx_release_cmd, - .put_session = tcm_qla2xxx_put_session, .shutdown_session = tcm_qla2xxx_shutdown_session, .close_session = tcm_qla2xxx_close_session, .sess_get_index = tcm_qla2xxx_sess_get_index, diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index fc460f1b0b75..3c2809cd8b5b 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -409,12 +409,6 @@ EXPORT_SYMBOL(target_get_session); void target_put_session(struct se_session *se_sess) { - struct se_portal_group *tpg = se_sess->se_tpg; - - if (tpg->se_tpg_tfo->put_session != NULL) { - tpg->se_tpg_tfo->put_session(se_sess); - return; - } kref_put(&se_sess->sess_kref, target_release_session); } EXPORT_SYMBOL(target_put_session); diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index edd0379c06b9..8b7dd47abd8d 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1817,7 +1817,6 @@ static const struct target_core_fabric_ops scsiback_ops = { .tpg_get_inst_index = scsiback_tpg_get_inst_index, .check_stop_free = scsiback_check_stop_free, .release_cmd = scsiback_release_cmd, - .put_session = NULL, .shutdown_session = scsiback_shutdown_session, .close_session = scsiback_close_session, .sess_get_index = scsiback_sess_get_index, diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index f64d493f888b..55654c90350d 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -37,7 +37,6 @@ struct target_core_fabric_ops { */ int (*check_stop_free)(struct se_cmd *); void (*release_cmd)(struct se_cmd *); - void (*put_session)(struct se_session *); /* * Called with spin_lock_bh(struct se_portal_group->session_lock held. */ From 3a40dede1f0af77fd4d77017a4ee07703fad87ef Mon Sep 17 00:00:00 2001 From: Christophe Vu-Brugier Date: Mon, 4 May 2015 11:33:33 +0200 Subject: [PATCH 068/839] target: fix a log message in se_dev_set_emulate_rest_reord() Signed-off-by: Christophe Vu-Brugier Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 9ee194d1cd4b..371a9770f563 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1071,7 +1071,7 @@ EXPORT_SYMBOL(se_dev_set_is_nonrot); int se_dev_set_emulate_rest_reord(struct se_device *dev, int flag) { if (flag != 0) { - printk(KERN_ERR "dev[%p]: SE Device emulatation of restricted" + printk(KERN_ERR "dev[%p]: SE Device emulation of restricted" " reordering not implemented\n", dev); return -ENOSYS; } From d2c27f0d0b7eb18f40b9b1a0d793c8ba074e5c23 Mon Sep 17 00:00:00 2001 From: Christophe Vu-Brugier Date: Mon, 4 May 2015 11:33:34 +0200 Subject: [PATCH 069/839] iscsi-target: fix variable name typo in iscsi_check_acceptor_state() Signed-off-by: Christophe Vu-Brugier Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_parameters.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index be336df0a887..e8a52f7d6204 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -967,7 +967,7 @@ static int iscsi_check_acceptor_state(struct iscsi_param *param, char *value, struct iscsi_conn *conn) { u8 acceptor_boolean_value = 0, proposer_boolean_value = 0; - char *negoitated_value = NULL; + char *negotiated_value = NULL; if (IS_PSTATE_ACCEPTOR(param)) { pr_err("Received key \"%s\" twice, protocol error.\n", @@ -1068,15 +1068,15 @@ static int iscsi_check_acceptor_state(struct iscsi_param *param, char *value, param->name, param->value); } } else if (IS_TYPE_VALUE_LIST(param)) { - negoitated_value = iscsi_check_valuelist_for_support( + negotiated_value = iscsi_check_valuelist_for_support( param, value); - if (!negoitated_value) { + if (!negotiated_value) { pr_err("Proposer's value list \"%s\" contains" " no valid values from Acceptor's value list" " \"%s\".\n", value, param->value); return -1; } - if (iscsi_update_param_value(param, negoitated_value) < 0) + if (iscsi_update_param_value(param, negotiated_value) < 0) return -1; } else if (IS_PHASE_DECLARATIVE(param)) { if (iscsi_update_param_value(param, value) < 0) From 29a05deebf6c2e3010934fb78ee65cab3d329470 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 22 Mar 2015 20:42:19 -0700 Subject: [PATCH 070/839] target: Convert se_node_acl->device_list[] to RCU hlist This patch converts se_node_acl->device_list[] table for mappedluns to modern RCU hlist_head usage in order to support an arbitrary number of node_acl lun mappings. It converts transport_lookup_*_lun() fast-path code to use RCU read path primitives when looking up se_dev_entry. It adds a new hlist_head at se_node_acl->lun_entry_hlist for this purpose. For transport_lookup_cmd_lun() code, it works with existing per-cpu se_lun->lun_ref when associating se_cmd with se_lun + se_device. Also, go ahead and update core_create_device_list_for_node() + core_free_device_list_for_node() to use ->lun_entry_hlist. It also converts se_dev_entry->pr_ref_count access to use modern struct kref counting, and updates core_disable_device_list_for_node() to kref_put() and block on se_deve->pr_comp waiting for outstanding PR special-case PR references to drop, then invoke kfree_rcu() to wait for the RCU grace period to complete before releasing memory. So now that se_node_acl->lun_entry_hlist fast path access uses RCU protected pointers, go ahead and convert remaining non-fast path RCU updater code using ->lun_entry_lock to struct mutex to allow callers to block while walking se_node_acl->lun_entry_hlist. Finally drop the left-over core_clear_initiator_node_from_tpg() that originally cleared lun_access during se_node_acl shutdown, as post RCU conversion it now becomes duplicated logic. Reviewed-by: Hannes Reinecke Cc: Christoph Hellwig Cc: Sagi Grimberg Cc: Paul E. McKenney Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 3 +- drivers/target/target_core_device.c | 354 ++++++++++--------- drivers/target/target_core_fabric_configfs.c | 41 +-- drivers/target/target_core_internal.h | 8 +- drivers/target/target_core_pr.c | 72 ++-- drivers/target/target_core_pscsi.c | 7 +- drivers/target/target_core_spc.c | 18 +- drivers/target/target_core_stat.c | 197 ++++++----- drivers/target/target_core_tpg.c | 75 +--- drivers/target/target_core_ua.c | 51 ++- include/target/target_core_backend.h | 2 +- include/target/target_core_base.h | 27 +- 12 files changed, 413 insertions(+), 442 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index b590f153a7b6..53cdfabac861 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1001,7 +1001,8 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work) spin_lock_bh(&port->sep_alua_lock); list_for_each_entry(se_deve, &port->sep_alua_list, alua_port_list) { - lacl = se_deve->se_lun_acl; + lacl = rcu_dereference_check(se_deve->se_lun_acl, + lockdep_is_held(&port->sep_alua_lock)); /* * se_deve->se_lun_acl pointer may be NULL for a * entry created without explicit Node+MappedLUN ACLs diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 371a9770f563..e44e6bd66659 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -60,18 +60,17 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) { struct se_lun *se_lun = NULL; struct se_session *se_sess = se_cmd->se_sess; + struct se_node_acl *nacl = se_sess->se_node_acl; struct se_device *dev; - unsigned long flags; + struct se_dev_entry *deve; if (unpacked_lun >= TRANSPORT_MAX_LUNS_PER_TPG) return TCM_NON_EXISTENT_LUN; - spin_lock_irqsave(&se_sess->se_node_acl->device_list_lock, flags); - se_cmd->se_deve = se_sess->se_node_acl->device_list[unpacked_lun]; - if (se_cmd->se_deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) { - struct se_dev_entry *deve = se_cmd->se_deve; - - deve->total_cmds++; + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, unpacked_lun); + if (deve) { + atomic_long_inc(&deve->total_cmds); if ((se_cmd->data_direction == DMA_TO_DEVICE) && (deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY)) { @@ -79,17 +78,19 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) " Access for 0x%08x\n", se_cmd->se_tfo->get_fabric_name(), unpacked_lun); - spin_unlock_irqrestore(&se_sess->se_node_acl->device_list_lock, flags); + rcu_read_unlock(); return TCM_WRITE_PROTECTED; } if (se_cmd->data_direction == DMA_TO_DEVICE) - deve->write_bytes += se_cmd->data_length; + atomic_long_add(se_cmd->data_length, + &deve->write_bytes); else if (se_cmd->data_direction == DMA_FROM_DEVICE) - deve->read_bytes += se_cmd->data_length; + atomic_long_add(se_cmd->data_length, + &deve->read_bytes); - se_lun = deve->se_lun; - se_cmd->se_lun = deve->se_lun; + se_lun = rcu_dereference(deve->se_lun); + se_cmd->se_lun = rcu_dereference(deve->se_lun); se_cmd->pr_res_key = deve->pr_res_key; se_cmd->orig_fe_lun = unpacked_lun; se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD; @@ -97,7 +98,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) percpu_ref_get(&se_lun->lun_ref); se_cmd->lun_ref_active = true; } - spin_unlock_irqrestore(&se_sess->se_node_acl->device_list_lock, flags); + rcu_read_unlock(); if (!se_lun) { /* @@ -147,24 +148,23 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u32 unpacked_lun) struct se_dev_entry *deve; struct se_lun *se_lun = NULL; struct se_session *se_sess = se_cmd->se_sess; + struct se_node_acl *nacl = se_sess->se_node_acl; struct se_tmr_req *se_tmr = se_cmd->se_tmr_req; unsigned long flags; if (unpacked_lun >= TRANSPORT_MAX_LUNS_PER_TPG) return -ENODEV; - spin_lock_irqsave(&se_sess->se_node_acl->device_list_lock, flags); - se_cmd->se_deve = se_sess->se_node_acl->device_list[unpacked_lun]; - deve = se_cmd->se_deve; - - if (deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) { - se_tmr->tmr_lun = deve->se_lun; - se_cmd->se_lun = deve->se_lun; - se_lun = deve->se_lun; + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, unpacked_lun); + if (deve) { + se_tmr->tmr_lun = rcu_dereference(deve->se_lun); + se_cmd->se_lun = rcu_dereference(deve->se_lun); + se_lun = rcu_dereference(deve->se_lun); se_cmd->pr_res_key = deve->pr_res_key; se_cmd->orig_fe_lun = unpacked_lun; } - spin_unlock_irqrestore(&se_sess->se_node_acl->device_list_lock, flags); + rcu_read_unlock(); if (!se_lun) { pr_debug("TARGET_CORE[%s]: Detected NON_EXISTENT_LUN" @@ -186,9 +186,27 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u32 unpacked_lun) } EXPORT_SYMBOL(transport_lookup_tmr_lun); +bool target_lun_is_rdonly(struct se_cmd *cmd) +{ + struct se_session *se_sess = cmd->se_sess; + struct se_dev_entry *deve; + bool ret; + + if (cmd->se_lun->lun_access & TRANSPORT_LUNFLAGS_READ_ONLY) + return true; + + rcu_read_lock(); + deve = target_nacl_find_deve(se_sess->se_node_acl, cmd->orig_fe_lun); + ret = (deve && deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(target_lun_is_rdonly); + /* * This function is called from core_scsi3_emulate_pro_register_and_move() - * and core_scsi3_decode_spec_i_port(), and will increment &deve->pr_ref_count + * and core_scsi3_decode_spec_i_port(), and will increment &deve->pr_kref * when a matching rtpi is found. */ struct se_dev_entry *core_get_se_deve_from_rtpi( @@ -197,81 +215,43 @@ struct se_dev_entry *core_get_se_deve_from_rtpi( { struct se_dev_entry *deve; struct se_lun *lun; - struct se_port *port; struct se_portal_group *tpg = nacl->se_tpg; - u32 i; - spin_lock_irq(&nacl->device_list_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - deve = nacl->device_list[i]; - - if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS)) - continue; - - lun = deve->se_lun; + rcu_read_lock(); + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) { + lun = rcu_dereference(deve->se_lun); if (!lun) { pr_err("%s device entries device pointer is" " NULL, but Initiator has access.\n", tpg->se_tpg_tfo->get_fabric_name()); continue; } - port = lun->lun_sep; - if (!port) { - pr_err("%s device entries device pointer is" - " NULL, but Initiator has access.\n", - tpg->se_tpg_tfo->get_fabric_name()); - continue; - } - if (port->sep_rtpi != rtpi) + if (lun->lun_rtpi != rtpi) continue; - atomic_inc_mb(&deve->pr_ref_count); - spin_unlock_irq(&nacl->device_list_lock); + kref_get(&deve->pr_kref); + rcu_read_unlock(); return deve; } - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return NULL; } -int core_free_device_list_for_node( +void core_free_device_list_for_node( struct se_node_acl *nacl, struct se_portal_group *tpg) { struct se_dev_entry *deve; - struct se_lun *lun; - u32 i; - if (!nacl->device_list) - return 0; - - spin_lock_irq(&nacl->device_list_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - deve = nacl->device_list[i]; - - if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS)) - continue; - - if (!deve->se_lun) { - pr_err("%s device entries device pointer is" - " NULL, but Initiator has access.\n", - tpg->se_tpg_tfo->get_fabric_name()); - continue; - } - lun = deve->se_lun; - - spin_unlock_irq(&nacl->device_list_lock); - core_disable_device_list_for_node(lun, NULL, deve->mapped_lun, - TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg); - spin_lock_irq(&nacl->device_list_lock); + mutex_lock(&nacl->lun_entry_mutex); + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) { + struct se_lun *lun = rcu_dereference_check(deve->se_lun, + lockdep_is_held(&nacl->lun_entry_mutex)); + core_disable_device_list_for_node(lun, deve, nacl, tpg); } - spin_unlock_irq(&nacl->device_list_lock); - - array_free(nacl->device_list, TRANSPORT_MAX_LUNS_PER_TPG); - nacl->device_list = NULL; - - return 0; + mutex_unlock(&nacl->lun_entry_mutex); } void core_update_device_list_access( @@ -281,16 +261,40 @@ void core_update_device_list_access( { struct se_dev_entry *deve; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[mapped_lun]; - if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) { - deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY; - deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE; - } else { - deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE; - deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY; + mutex_lock(&nacl->lun_entry_mutex); + deve = target_nacl_find_deve(nacl, mapped_lun); + if (deve) { + if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) { + deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY; + deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE; + } else { + deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE; + deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY; + } } - spin_unlock_irq(&nacl->device_list_lock); + mutex_unlock(&nacl->lun_entry_mutex); +} + +/* + * Called with rcu_read_lock or nacl->device_list_lock held. + */ +struct se_dev_entry *target_nacl_find_deve(struct se_node_acl *nacl, u32 mapped_lun) +{ + struct se_dev_entry *deve; + + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) + if (deve->mapped_lun == mapped_lun) + return deve; + + return NULL; +} +EXPORT_SYMBOL(target_nacl_find_deve); + +void target_pr_kref_release(struct kref *kref) +{ + struct se_dev_entry *deve = container_of(kref, struct se_dev_entry, + pr_kref); + complete(&deve->pr_comp); } /* core_enable_device_list_for_node(): @@ -306,85 +310,87 @@ int core_enable_device_list_for_node( struct se_portal_group *tpg) { struct se_port *port = lun->lun_sep; - struct se_dev_entry *deve; + struct se_dev_entry *orig, *new; - spin_lock_irq(&nacl->device_list_lock); + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) { + pr_err("Unable to allocate se_dev_entry memory\n"); + return -ENOMEM; + } - deve = nacl->device_list[mapped_lun]; + atomic_set(&new->ua_count, 0); + spin_lock_init(&new->ua_lock); + INIT_LIST_HEAD(&new->alua_port_list); + INIT_LIST_HEAD(&new->ua_list); - /* - * Check if the call is handling demo mode -> explicit LUN ACL - * transition. This transition must be for the same struct se_lun - * + mapped_lun that was setup in demo mode.. - */ - if (deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) { - if (deve->se_lun_acl != NULL) { - pr_err("struct se_dev_entry->se_lun_acl" - " already set for demo mode -> explicit" - " LUN ACL transition\n"); - spin_unlock_irq(&nacl->device_list_lock); + new->mapped_lun = mapped_lun; + kref_init(&new->pr_kref); + init_completion(&new->pr_comp); + + if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) + new->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE; + else + new->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY; + + new->creation_time = get_jiffies_64(); + new->attach_count++; + + mutex_lock(&nacl->lun_entry_mutex); + orig = target_nacl_find_deve(nacl, mapped_lun); + if (orig && orig->se_lun) { + struct se_lun *orig_lun = rcu_dereference_check(orig->se_lun, + lockdep_is_held(&nacl->lun_entry_mutex)); + + if (orig_lun != lun) { + pr_err("Existing orig->se_lun doesn't match new lun" + " for dynamic -> explicit NodeACL conversion:" + " %s\n", nacl->initiatorname); + mutex_unlock(&nacl->lun_entry_mutex); + kfree(new); return -EINVAL; } - if (deve->se_lun != lun) { - pr_err("struct se_dev_entry->se_lun does" - " match passed struct se_lun for demo mode" - " -> explicit LUN ACL transition\n"); - spin_unlock_irq(&nacl->device_list_lock); - return -EINVAL; - } - deve->se_lun_acl = lun_acl; + BUG_ON(orig->se_lun_acl != NULL); - if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) { - deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY; - deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE; - } else { - deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE; - deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY; - } + rcu_assign_pointer(new->se_lun, lun); + rcu_assign_pointer(new->se_lun_acl, lun_acl); + hlist_del_rcu(&orig->link); + hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist); + mutex_unlock(&nacl->lun_entry_mutex); - spin_unlock_irq(&nacl->device_list_lock); + spin_lock_bh(&port->sep_alua_lock); + list_del(&orig->alua_port_list); + list_add_tail(&new->alua_port_list, &port->sep_alua_list); + spin_unlock_bh(&port->sep_alua_lock); + + kref_put(&orig->pr_kref, target_pr_kref_release); + wait_for_completion(&orig->pr_comp); + + kfree_rcu(orig, rcu_head); return 0; } - deve->se_lun = lun; - deve->se_lun_acl = lun_acl; - deve->mapped_lun = mapped_lun; - deve->lun_flags |= TRANSPORT_LUNFLAGS_INITIATOR_ACCESS; - - if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) { - deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY; - deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE; - } else { - deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE; - deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY; - } - - deve->creation_time = get_jiffies_64(); - deve->attach_count++; - spin_unlock_irq(&nacl->device_list_lock); + rcu_assign_pointer(new->se_lun, lun); + rcu_assign_pointer(new->se_lun_acl, lun_acl); + hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist); + mutex_unlock(&nacl->lun_entry_mutex); spin_lock_bh(&port->sep_alua_lock); - list_add_tail(&deve->alua_port_list, &port->sep_alua_list); + list_add_tail(&new->alua_port_list, &port->sep_alua_list); spin_unlock_bh(&port->sep_alua_lock); return 0; } -/* core_disable_device_list_for_node(): - * - * +/* + * Called with se_node_acl->lun_entry_mutex held. */ -int core_disable_device_list_for_node( +void core_disable_device_list_for_node( struct se_lun *lun, - struct se_lun_acl *lun_acl, - u32 mapped_lun, - u32 lun_access, + struct se_dev_entry *orig, struct se_node_acl *nacl, struct se_portal_group *tpg) { struct se_port *port = lun->lun_sep; - struct se_dev_entry *deve = nacl->device_list[mapped_lun]; - /* * If the MappedLUN entry is being disabled, the entry in * port->sep_alua_list must be removed now before clearing the @@ -399,29 +405,29 @@ int core_disable_device_list_for_node( * MappedLUN *deve will be released below.. */ spin_lock_bh(&port->sep_alua_lock); - list_del(&deve->alua_port_list); + list_del(&orig->alua_port_list); spin_unlock_bh(&port->sep_alua_lock); - /* - * Wait for any in process SPEC_I_PT=1 or REGISTER_AND_MOVE - * PR operation to complete. - */ - while (atomic_read(&deve->pr_ref_count) != 0) - cpu_relax(); - - spin_lock_irq(&nacl->device_list_lock); /* * Disable struct se_dev_entry LUN ACL mapping */ - core_scsi3_ua_release_all(deve); - deve->se_lun = NULL; - deve->se_lun_acl = NULL; - deve->lun_flags = 0; - deve->creation_time = 0; - deve->attach_count--; - spin_unlock_irq(&nacl->device_list_lock); + core_scsi3_ua_release_all(orig); + + hlist_del_rcu(&orig->link); + rcu_assign_pointer(orig->se_lun, NULL); + rcu_assign_pointer(orig->se_lun_acl, NULL); + orig->lun_flags = 0; + orig->creation_time = 0; + orig->attach_count--; + /* + * Before firing off RCU callback, wait for any in process SPEC_I_PT=1 + * or REGISTER_AND_MOVE PR operation to complete. + */ + kref_put(&orig->pr_kref, target_pr_kref_release); + wait_for_completion(&orig->pr_comp); + + kfree_rcu(orig, rcu_head); core_scsi3_free_pr_reg_from_nacl(lun->lun_se_dev, nacl); - return 0; } /* core_clear_lun_from_tpg(): @@ -432,26 +438,22 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg) { struct se_node_acl *nacl; struct se_dev_entry *deve; - u32 i; spin_lock_irq(&tpg->acl_node_lock); list_for_each_entry(nacl, &tpg->acl_node_list, acl_list) { spin_unlock_irq(&tpg->acl_node_lock); - spin_lock_irq(&nacl->device_list_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - deve = nacl->device_list[i]; - if (lun != deve->se_lun) + mutex_lock(&nacl->lun_entry_mutex); + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) { + struct se_lun *tmp_lun = rcu_dereference_check(deve->se_lun, + lockdep_is_held(&nacl->lun_entry_mutex)); + + if (lun != tmp_lun) continue; - spin_unlock_irq(&nacl->device_list_lock); - core_disable_device_list_for_node(lun, NULL, - deve->mapped_lun, TRANSPORT_LUNFLAGS_NO_ACCESS, - nacl, tpg); - - spin_lock_irq(&nacl->device_list_lock); + core_disable_device_list_for_node(lun, deve, nacl, tpg); } - spin_unlock_irq(&nacl->device_list_lock); + mutex_unlock(&nacl->lun_entry_mutex); spin_lock_irq(&tpg->acl_node_lock); } @@ -583,7 +585,9 @@ int core_dev_export( if (IS_ERR(port)) return PTR_ERR(port); + lun->lun_index = dev->dev_index; lun->lun_se_dev = dev; + lun->lun_rtpi = port->sep_rtpi; spin_lock(&hba->device_lock); dev->export_count++; @@ -1369,16 +1373,13 @@ int core_dev_add_initiator_node_lun_acl( return 0; } -/* core_dev_del_initiator_node_lun_acl(): - * - * - */ int core_dev_del_initiator_node_lun_acl( struct se_portal_group *tpg, struct se_lun *lun, struct se_lun_acl *lacl) { struct se_node_acl *nacl; + struct se_dev_entry *deve; nacl = lacl->se_lun_nacl; if (!nacl) @@ -1389,8 +1390,11 @@ int core_dev_del_initiator_node_lun_acl( atomic_dec_mb(&lun->lun_acl_count); spin_unlock(&lun->lun_acl_lock); - core_disable_device_list_for_node(lun, NULL, lacl->mapped_lun, - TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg); + mutex_lock(&nacl->lun_entry_mutex); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (deve) + core_disable_device_list_for_node(lun, deve, nacl, tpg); + mutex_unlock(&nacl->lun_entry_mutex); lacl->se_lun = NULL; diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 6cb4828308e9..0939a5492c16 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -123,16 +123,16 @@ static int target_fabric_mappedlun_link( * which be will write protected (READ-ONLY) when * tpg_1/attrib/demo_mode_write_protect=1 */ - spin_lock_irq(&lacl->se_lun_nacl->device_list_lock); - deve = lacl->se_lun_nacl->device_list[lacl->mapped_lun]; - if (deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) + rcu_read_lock(); + deve = target_nacl_find_deve(lacl->se_lun_nacl, lacl->mapped_lun); + if (deve) lun_access = deve->lun_flags; else lun_access = (se_tpg->se_tpg_tfo->tpg_check_prod_mode_write_protect( se_tpg)) ? TRANSPORT_LUNFLAGS_READ_ONLY : TRANSPORT_LUNFLAGS_READ_WRITE; - spin_unlock_irq(&lacl->se_lun_nacl->device_list_lock); + rcu_read_unlock(); /* * Determine the actual mapped LUN value user wants.. * @@ -149,23 +149,13 @@ static int target_fabric_mappedlun_unlink( struct config_item *lun_acl_ci, struct config_item *lun_ci) { - struct se_lun *lun; struct se_lun_acl *lacl = container_of(to_config_group(lun_acl_ci), struct se_lun_acl, se_lun_group); - struct se_node_acl *nacl = lacl->se_lun_nacl; - struct se_dev_entry *deve = nacl->device_list[lacl->mapped_lun]; - struct se_portal_group *se_tpg; - /* - * Determine if the underlying MappedLUN has already been released.. - */ - if (!deve->se_lun) - return 0; + struct se_lun *lun = container_of(to_config_group(lun_ci), + struct se_lun, lun_group); + struct se_portal_group *se_tpg = lun->lun_sep->sep_tpg; - lun = container_of(to_config_group(lun_ci), struct se_lun, lun_group); - se_tpg = lun->lun_sep->sep_tpg; - - core_dev_del_initiator_node_lun_acl(se_tpg, lun, lacl); - return 0; + return core_dev_del_initiator_node_lun_acl(se_tpg, lun, lacl); } CONFIGFS_EATTR_STRUCT(target_fabric_mappedlun, se_lun_acl); @@ -181,14 +171,15 @@ static ssize_t target_fabric_mappedlun_show_write_protect( { struct se_node_acl *se_nacl = lacl->se_lun_nacl; struct se_dev_entry *deve; - ssize_t len; + ssize_t len = 0; - spin_lock_irq(&se_nacl->device_list_lock); - deve = se_nacl->device_list[lacl->mapped_lun]; - len = sprintf(page, "%d\n", - (deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY) ? - 1 : 0); - spin_unlock_irq(&se_nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(se_nacl, lacl->mapped_lun); + if (deve) { + len = sprintf(page, "%d\n", + (deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY) ? 1 : 0); + } + rcu_read_unlock(); return len; } diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index d0344ad9b0d8..a04a6e396ae3 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -9,13 +9,15 @@ extern struct mutex g_device_mutex; extern struct list_head g_device_list; struct se_dev_entry *core_get_se_deve_from_rtpi(struct se_node_acl *, u16); -int core_free_device_list_for_node(struct se_node_acl *, +void target_pr_kref_release(struct kref *); +void core_free_device_list_for_node(struct se_node_acl *, struct se_portal_group *); void core_update_device_list_access(u32, u32, struct se_node_acl *); +struct se_dev_entry *target_nacl_find_deve(struct se_node_acl *, u32); int core_enable_device_list_for_node(struct se_lun *, struct se_lun_acl *, u32, u32, struct se_node_acl *, struct se_portal_group *); -int core_disable_device_list_for_node(struct se_lun *, struct se_lun_acl *, - u32, u32, struct se_node_acl *, struct se_portal_group *); +void core_disable_device_list_for_node(struct se_lun *, struct se_dev_entry *, + struct se_node_acl *, struct se_portal_group *); void core_clear_lun_from_tpg(struct se_lun *, struct se_portal_group *); int core_dev_export(struct se_device *, struct se_portal_group *, struct se_lun *); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index a4b9a0dd50cd..909573a682d5 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -48,7 +48,7 @@ struct pr_transport_id_holder { struct t10_pr_registration *dest_pr_reg; struct se_portal_group *dest_tpg; struct se_node_acl *dest_node_acl; - struct se_dev_entry *dest_se_deve; + struct se_dev_entry __rcu *dest_se_deve; struct list_head dest_list; }; @@ -232,7 +232,7 @@ target_scsi2_reservation_release(struct se_cmd *cmd) tpg = sess->se_tpg; pr_debug("SCSI-2 Released reservation for %s LUN: %u ->" " MAPPED LUN: %u for %s\n", tpg->se_tpg_tfo->get_fabric_name(), - cmd->se_lun->unpacked_lun, cmd->se_deve->mapped_lun, + cmd->se_lun->unpacked_lun, cmd->orig_fe_lun, sess->se_node_acl->initiatorname); out_unlock: @@ -281,7 +281,7 @@ target_scsi2_reservation_reserve(struct se_cmd *cmd) dev->dev_reserved_node_acl->initiatorname); pr_err("Current attempt - LUN: %u -> MAPPED LUN: %u" " from %s \n", cmd->se_lun->unpacked_lun, - cmd->se_deve->mapped_lun, + cmd->orig_fe_lun, sess->se_node_acl->initiatorname); ret = TCM_RESERVATION_CONFLICT; goto out_unlock; @@ -295,7 +295,7 @@ target_scsi2_reservation_reserve(struct se_cmd *cmd) } pr_debug("SCSI-2 Reserved %s LUN: %u -> MAPPED LUN: %u" " for %s\n", tpg->se_tpg_tfo->get_fabric_name(), - cmd->se_lun->unpacked_lun, cmd->se_deve->mapped_lun, + cmd->se_lun->unpacked_lun, cmd->orig_fe_lun, sess->se_node_acl->initiatorname); out_unlock: @@ -320,6 +320,7 @@ static int core_scsi3_pr_seq_non_holder( unsigned char *cdb = cmd->t_task_cdb; struct se_dev_entry *se_deve; struct se_session *se_sess = cmd->se_sess; + struct se_node_acl *nacl = se_sess->se_node_acl; int other_cdb = 0, ignore_reg; int registered_nexus = 0, ret = 1; /* Conflict by default */ int all_reg = 0, reg_only = 0; /* ALL_REG, REG_ONLY */ @@ -327,7 +328,8 @@ static int core_scsi3_pr_seq_non_holder( int legacy = 0; /* Act like a legacy device and return * RESERVATION CONFLICT on some CDBs */ - se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun]; + rcu_read_lock(); + se_deve = target_nacl_find_deve(nacl, cmd->orig_fe_lun); /* * Determine if the registration should be ignored due to * non-matching ISIDs in target_scsi3_pr_reservation_check(). @@ -368,8 +370,10 @@ static int core_scsi3_pr_seq_non_holder( registered_nexus = 1; break; default: + rcu_read_unlock(); return -EINVAL; } + rcu_read_unlock(); /* * Referenced from spc4r17 table 45 for *NON* PR holder access */ @@ -735,7 +739,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( if (strcmp(nacl->initiatorname, nacl_tmp->initiatorname)) continue; - atomic_inc_mb(&deve_tmp->pr_ref_count); + kref_get(&deve_tmp->pr_kref); spin_unlock_bh(&port->sep_alua_lock); /* * Grab a configfs group dependency that is released @@ -748,7 +752,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( pr_err("core_scsi3_lunacl_depend" "_item() failed\n"); atomic_dec_mb(&port->sep_tg_pt_ref_cnt); - atomic_dec_mb(&deve_tmp->pr_ref_count); + kref_put(&deve_tmp->pr_kref, target_pr_kref_release); goto out; } /* @@ -763,7 +767,6 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( sa_res_key, all_tg_pt, aptpl); if (!pr_reg_atp) { atomic_dec_mb(&port->sep_tg_pt_ref_cnt); - atomic_dec_mb(&deve_tmp->pr_ref_count); core_scsi3_lunacl_undepend_item(deve_tmp); goto out; } @@ -896,7 +899,7 @@ static int __core_scsi3_check_aptpl_registration( struct se_lun *lun, u32 target_lun, struct se_node_acl *nacl, - struct se_dev_entry *deve) + u32 mapped_lun) { struct t10_pr_registration *pr_reg, *pr_reg_tmp; struct t10_reservation *pr_tmpl = &dev->t10_pr; @@ -924,13 +927,12 @@ static int __core_scsi3_check_aptpl_registration( pr_reg_aptpl_list) { if (!strcmp(pr_reg->pr_iport, i_port) && - (pr_reg->pr_res_mapped_lun == deve->mapped_lun) && + (pr_reg->pr_res_mapped_lun == mapped_lun) && !(strcmp(pr_reg->pr_tport, t_port)) && (pr_reg->pr_reg_tpgt == tpgt) && (pr_reg->pr_aptpl_target_lun == target_lun)) { pr_reg->pr_reg_nacl = nacl; - pr_reg->pr_reg_deve = deve; pr_reg->pr_reg_tg_pt_lun = lun; list_del(&pr_reg->pr_reg_aptpl_list); @@ -968,13 +970,12 @@ int core_scsi3_check_aptpl_registration( struct se_node_acl *nacl, u32 mapped_lun) { - struct se_dev_entry *deve = nacl->device_list[mapped_lun]; - if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return 0; return __core_scsi3_check_aptpl_registration(dev, tpg, lun, - lun->unpacked_lun, nacl, deve); + lun->unpacked_lun, nacl, + mapped_lun); } static void __core_scsi3_dump_registration( @@ -1408,27 +1409,29 @@ static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve) static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve) { - struct se_lun_acl *lun_acl = se_deve->se_lun_acl; + struct se_lun_acl *lun_acl; struct se_node_acl *nacl; struct se_portal_group *tpg; /* * For nacl->dynamic_node_acl=1 */ + lun_acl = se_deve->se_lun_acl; if (!lun_acl) { - atomic_dec_mb(&se_deve->pr_ref_count); + kref_put(&se_deve->pr_kref, target_pr_kref_release); return; } nacl = lun_acl->se_lun_nacl; tpg = nacl->se_tpg; target_undepend_item(&lun_acl->se_lun_group.cg_item); - atomic_dec_mb(&se_deve->pr_ref_count); + kref_put(&se_deve->pr_kref, target_pr_kref_release); } static sense_reason_t core_scsi3_decode_spec_i_port( struct se_cmd *cmd, struct se_portal_group *tpg, + struct se_dev_entry *local_se_deve, unsigned char *l_isid, u64 sa_res_key, int all_tg_pt, @@ -1439,7 +1442,7 @@ core_scsi3_decode_spec_i_port( struct se_portal_group *dest_tpg = NULL, *tmp_tpg; struct se_session *se_sess = cmd->se_sess; struct se_node_acl *dest_node_acl = NULL; - struct se_dev_entry *dest_se_deve = NULL, *local_se_deve; + struct se_dev_entry __rcu *dest_se_deve = NULL; struct t10_pr_registration *dest_pr_reg, *local_pr_reg, *pr_reg_e; struct t10_pr_registration *pr_reg_tmp, *pr_reg_tmp_safe; LIST_HEAD(tid_dest_list); @@ -1452,7 +1455,6 @@ core_scsi3_decode_spec_i_port( int dest_local_nexus; u32 dest_rtpi = 0; - local_se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun]; /* * Allocate a struct pr_transport_id_holder and setup the * local_node_acl and local_se_deve pointers and add to @@ -1467,7 +1469,6 @@ core_scsi3_decode_spec_i_port( INIT_LIST_HEAD(&tidh_new->dest_list); tidh_new->dest_tpg = tpg; tidh_new->dest_node_acl = se_sess->se_node_acl; - tidh_new->dest_se_deve = local_se_deve; local_pr_reg = __core_scsi3_alloc_registration(cmd->se_dev, se_sess->se_node_acl, local_se_deve, l_isid, @@ -1476,6 +1477,7 @@ core_scsi3_decode_spec_i_port( kfree(tidh_new); return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } + rcu_assign_pointer(tidh_new->dest_se_deve, local_se_deve); tidh_new->dest_pr_reg = local_pr_reg; /* * The local I_T nexus does not hold any configfs dependances, @@ -1635,7 +1637,7 @@ core_scsi3_decode_spec_i_port( if (core_scsi3_lunacl_depend_item(dest_se_deve)) { pr_err("core_scsi3_lunacl_depend_item()" " failed\n"); - atomic_dec_mb(&dest_se_deve->pr_ref_count); + kref_put(&dest_se_deve->pr_kref, target_pr_kref_release); core_scsi3_nodeacl_undepend_item(dest_node_acl); core_scsi3_tpg_undepend_item(dest_tpg); ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; @@ -1990,6 +1992,7 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, bool aptpl, bool all_tg_pt, bool spec_i_pt, enum register_type register_type) { struct se_session *se_sess = cmd->se_sess; + struct se_node_acl *nacl = se_sess->se_node_acl; struct se_device *dev = cmd->se_dev; struct se_dev_entry *se_deve; struct se_lun *se_lun = cmd->se_lun; @@ -2005,7 +2008,14 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } se_tpg = se_sess->se_tpg; - se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun]; + + rcu_read_lock(); + se_deve = target_nacl_find_deve(nacl, cmd->orig_fe_lun); + if (!se_deve) { + pr_err("Unable to locate se_deve for PRO-REGISTER\n"); + rcu_read_unlock(); + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } if (se_tpg->se_tpg_tfo->sess_get_initiator_sid) { memset(&isid_buf[0], 0, PR_REG_ISID_LEN); @@ -2021,14 +2031,16 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, if (res_key) { pr_warn("SPC-3 PR: Reservation Key non-zero" " for SA REGISTER, returning CONFLICT\n"); + rcu_read_unlock(); return TCM_RESERVATION_CONFLICT; } /* * Do nothing but return GOOD status. */ - if (!sa_res_key) + if (!sa_res_key) { + rcu_read_unlock(); return 0; - + } if (!spec_i_pt) { /* * Perform the Service Action REGISTER on the Initiator @@ -2041,6 +2053,7 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, register_type, 0)) { pr_err("Unable to allocate" " struct t10_pr_registration\n"); + rcu_read_unlock(); return TCM_INVALID_PARAMETER_LIST; } } else { @@ -2052,14 +2065,17 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, * logic from of core_scsi3_alloc_registration() for * each TransportID provided SCSI Initiator Port/Device */ - ret = core_scsi3_decode_spec_i_port(cmd, se_tpg, + ret = core_scsi3_decode_spec_i_port(cmd, se_tpg, se_deve, isid_ptr, sa_res_key, all_tg_pt, aptpl); - if (ret != 0) + if (ret != 0) { + rcu_read_unlock(); return ret; + } } - + rcu_read_unlock(); return core_scsi3_update_and_write_aptpl(dev, aptpl); } + rcu_read_unlock(); /* ok, existing registration */ @@ -3321,7 +3337,7 @@ after_iport_check: if (core_scsi3_lunacl_depend_item(dest_se_deve)) { pr_err("core_scsi3_lunacl_depend_item() failed\n"); - atomic_dec_mb(&dest_se_deve->pr_ref_count); + kref_put(&dest_se_deve->pr_kref, target_pr_kref_release); dest_se_deve = NULL; ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; goto out; diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index ecc5eaef13d6..21db991b4465 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -47,6 +47,7 @@ #include #include "target_core_alua.h" +#include "target_core_internal.h" #include "target_core_pscsi.h" #define ISPRINT(a) ((a >= ' ') && (a <= '~')) @@ -637,12 +638,14 @@ static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg, * Hack to make sure that Write-Protect modepage is set if R/O mode is * forced. */ - if (!cmd->se_deve || !cmd->data_length) + if (!cmd->data_length) goto after_mode_sense; if (((cdb[0] == MODE_SENSE) || (cdb[0] == MODE_SENSE_10)) && (status_byte(result) << 1) == SAM_STAT_GOOD) { - if (cmd->se_deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY) { + bool read_only = target_lun_is_rdonly(cmd); + + if (read_only) { unsigned char *buf; buf = transport_kmap_data_sg(cmd); diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 78c0b40fa5c0..9f995b87b8d1 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -981,6 +981,7 @@ static sense_reason_t spc_emulate_modesense(struct se_cmd *cmd) int length = 0; int ret; int i; + bool read_only = target_lun_is_rdonly(cmd);; memset(buf, 0, SE_MODE_PAGE_BUF); @@ -991,9 +992,7 @@ static sense_reason_t spc_emulate_modesense(struct se_cmd *cmd) length = ten ? 3 : 2; /* DEVICE-SPECIFIC PARAMETER */ - if ((cmd->se_lun->lun_access & TRANSPORT_LUNFLAGS_READ_ONLY) || - (cmd->se_deve && - (cmd->se_deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY))) + if ((cmd->se_lun->lun_access & TRANSPORT_LUNFLAGS_READ_ONLY) || read_only) spc_modesense_write_protect(&buf[length], type); /* @@ -1211,8 +1210,9 @@ sense_reason_t spc_emulate_report_luns(struct se_cmd *cmd) { struct se_dev_entry *deve; struct se_session *sess = cmd->se_sess; + struct se_node_acl *nacl; unsigned char *buf; - u32 lun_count = 0, offset = 8, i; + u32 lun_count = 0, offset = 8; if (cmd->data_length < 16) { pr_warn("REPORT LUNS allocation length %u too small\n", @@ -1234,12 +1234,10 @@ sense_reason_t spc_emulate_report_luns(struct se_cmd *cmd) lun_count = 1; goto done; } + nacl = sess->se_node_acl; - spin_lock_irq(&sess->se_node_acl->device_list_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - deve = sess->se_node_acl->device_list[i]; - if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS)) - continue; + rcu_read_lock(); + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) { /* * We determine the correct LUN LIST LENGTH even once we * have reached the initial allocation length. @@ -1252,7 +1250,7 @@ sense_reason_t spc_emulate_report_luns(struct se_cmd *cmd) int_to_scsilun(deve->mapped_lun, (struct scsi_lun *)&buf[offset]); offset += 8; } - spin_unlock_irq(&sess->se_node_acl->device_list_lock); + rcu_read_unlock(); /* * See SPC3 r07, page 159. diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index 64efee2fba52..ea1287940a7c 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -1084,17 +1084,17 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_inst( struct se_portal_group *tpg; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } tpg = nacl->se_tpg; /* scsiInstIndex */ ret = snprintf(page, PAGE_SIZE, "%u\n", tpg->se_tpg_tfo->tpg_get_inst_index(tpg)); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(inst); @@ -1109,16 +1109,16 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_dev( struct se_lun *lun; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } - lun = deve->se_lun; + lun = rcu_dereference(deve->se_lun); /* scsiDeviceIndex */ - ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_se_dev->dev_index); - spin_unlock_irq(&nacl->device_list_lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_index); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(dev); @@ -1133,16 +1133,16 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_port( struct se_portal_group *tpg; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } tpg = nacl->se_tpg; /* scsiAuthIntrTgtPortIndex */ ret = snprintf(page, PAGE_SIZE, "%u\n", tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(port); @@ -1156,15 +1156,15 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_indx( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrIndex */ ret = snprintf(page, PAGE_SIZE, "%u\n", nacl->acl_index); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(indx); @@ -1178,15 +1178,15 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_dev_or_port( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrDevOrPort */ ret = snprintf(page, PAGE_SIZE, "%u\n", 1); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(dev_or_port); @@ -1200,15 +1200,15 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_intr_name( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrName */ ret = snprintf(page, PAGE_SIZE, "%s\n", nacl->initiatorname); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(intr_name); @@ -1222,15 +1222,15 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_map_indx( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* FIXME: scsiAuthIntrLunMapIndex */ ret = snprintf(page, PAGE_SIZE, "%u\n", 0); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(map_indx); @@ -1244,15 +1244,15 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_att_count( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrAttachedTimes */ ret = snprintf(page, PAGE_SIZE, "%u\n", deve->attach_count); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(att_count); @@ -1266,15 +1266,16 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_num_cmds( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrOutCommands */ - ret = snprintf(page, PAGE_SIZE, "%u\n", deve->total_cmds); - spin_unlock_irq(&nacl->device_list_lock); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&deve->total_cmds)); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(num_cmds); @@ -1288,15 +1289,16 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_read_mbytes( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrReadMegaBytes */ - ret = snprintf(page, PAGE_SIZE, "%u\n", (u32)(deve->read_bytes >> 20)); - spin_unlock_irq(&nacl->device_list_lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", + (u32)(atomic_long_read(&deve->read_bytes) >> 20)); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(read_mbytes); @@ -1310,15 +1312,16 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_write_mbytes( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrWrittenMegaBytes */ - ret = snprintf(page, PAGE_SIZE, "%u\n", (u32)(deve->write_bytes >> 20)); - spin_unlock_irq(&nacl->device_list_lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", + (u32)(atomic_long_read(&deve->write_bytes) >> 20)); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(write_mbytes); @@ -1332,15 +1335,15 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_hs_num_cmds( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* FIXME: scsiAuthIntrHSOutCommands */ ret = snprintf(page, PAGE_SIZE, "%u\n", 0); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(hs_num_cmds); @@ -1354,16 +1357,16 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_creation_time( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrLastCreation */ ret = snprintf(page, PAGE_SIZE, "%u\n", (u32)(((u32)deve->creation_time - INITIAL_JIFFIES) * 100 / HZ)); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(creation_time); @@ -1377,15 +1380,15 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_row_status( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* FIXME: scsiAuthIntrRowStatus */ ret = snprintf(page, PAGE_SIZE, "Ready\n"); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(row_status); @@ -1450,17 +1453,17 @@ static ssize_t target_stat_scsi_att_intr_port_show_attr_inst( struct se_portal_group *tpg; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } tpg = nacl->se_tpg; /* scsiInstIndex */ ret = snprintf(page, PAGE_SIZE, "%u\n", tpg->se_tpg_tfo->tpg_get_inst_index(tpg)); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(inst); @@ -1475,16 +1478,16 @@ static ssize_t target_stat_scsi_att_intr_port_show_attr_dev( struct se_lun *lun; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } - lun = deve->se_lun; + lun = rcu_dereference(deve->se_lun); /* scsiDeviceIndex */ - ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_se_dev->dev_index); - spin_unlock_irq(&nacl->device_list_lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_index); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(dev); @@ -1499,16 +1502,16 @@ static ssize_t target_stat_scsi_att_intr_port_show_attr_port( struct se_portal_group *tpg; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } tpg = nacl->se_tpg; /* scsiPortIndex */ ret = snprintf(page, PAGE_SIZE, "%u\n", tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(port); @@ -1548,15 +1551,15 @@ static ssize_t target_stat_scsi_att_intr_port_show_attr_port_auth_indx( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAttIntrPortAuthIntrIdx */ ret = snprintf(page, PAGE_SIZE, "%u\n", nacl->acl_index); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(port_auth_indx); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index c0c1f67facb5..0519923ef930 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -47,42 +47,6 @@ extern struct se_device *g_lun0_dev; static DEFINE_SPINLOCK(tpg_lock); static LIST_HEAD(tpg_list); -/* core_clear_initiator_node_from_tpg(): - * - * - */ -static void core_clear_initiator_node_from_tpg( - struct se_node_acl *nacl, - struct se_portal_group *tpg) -{ - int i; - struct se_dev_entry *deve; - struct se_lun *lun; - - spin_lock_irq(&nacl->device_list_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - deve = nacl->device_list[i]; - - if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS)) - continue; - - if (!deve->se_lun) { - pr_err("%s device entries device pointer is" - " NULL, but Initiator has access.\n", - tpg->se_tpg_tfo->get_fabric_name()); - continue; - } - - lun = deve->se_lun; - spin_unlock_irq(&nacl->device_list_lock); - core_disable_device_list_for_node(lun, NULL, deve->mapped_lun, - TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg); - - spin_lock_irq(&nacl->device_list_lock); - } - spin_unlock_irq(&nacl->device_list_lock); -} - /* __core_tpg_get_initiator_node_acl(): * * spin_lock_bh(&tpg->acl_node_lock); must be held when calling @@ -225,35 +189,6 @@ static void *array_zalloc(int n, size_t size, gfp_t flags) return a; } -/* core_create_device_list_for_node(): - * - * - */ -static int core_create_device_list_for_node(struct se_node_acl *nacl) -{ - struct se_dev_entry *deve; - int i; - - nacl->device_list = array_zalloc(TRANSPORT_MAX_LUNS_PER_TPG, - sizeof(struct se_dev_entry), GFP_KERNEL); - if (!nacl->device_list) { - pr_err("Unable to allocate memory for" - " struct se_node_acl->device_list\n"); - return -ENOMEM; - } - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - deve = nacl->device_list[i]; - - atomic_set(&deve->ua_count, 0); - atomic_set(&deve->pr_ref_count, 0); - spin_lock_init(&deve->ua_lock); - INIT_LIST_HEAD(&deve->alua_port_list); - INIT_LIST_HEAD(&deve->ua_list); - } - - return 0; -} - static struct se_node_acl *target_alloc_node_acl(struct se_portal_group *tpg, const unsigned char *initiatorname) { @@ -266,10 +201,11 @@ static struct se_node_acl *target_alloc_node_acl(struct se_portal_group *tpg, INIT_LIST_HEAD(&acl->acl_list); INIT_LIST_HEAD(&acl->acl_sess_list); + INIT_HLIST_HEAD(&acl->lun_entry_hlist); kref_init(&acl->acl_kref); init_completion(&acl->acl_free_comp); - spin_lock_init(&acl->device_list_lock); spin_lock_init(&acl->nacl_sess_lock); + mutex_init(&acl->lun_entry_mutex); atomic_set(&acl->acl_pr_ref_count, 0); if (tpg->se_tpg_tfo->tpg_get_default_depth) acl->queue_depth = tpg->se_tpg_tfo->tpg_get_default_depth(tpg); @@ -281,15 +217,11 @@ static struct se_node_acl *target_alloc_node_acl(struct se_portal_group *tpg, tpg->se_tpg_tfo->set_default_node_attributes(acl); - if (core_create_device_list_for_node(acl) < 0) - goto out_free_acl; if (core_set_queue_depth_for_node(tpg, acl) < 0) - goto out_free_device_list; + goto out_free_acl; return acl; -out_free_device_list: - core_free_device_list_for_node(acl, tpg); out_free_acl: kfree(acl); return NULL; @@ -454,7 +386,6 @@ void core_tpg_del_initiator_node_acl(struct se_node_acl *acl) wait_for_completion(&acl->acl_free_comp); core_tpg_wait_for_nacl_pr_ref(acl); - core_clear_initiator_node_from_tpg(acl, tpg); core_free_device_list_for_node(acl, tpg); pr_debug("%s_TPG[%hu] - Deleted ACL with TCQ Depth: %d for %s" diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c index a0bf0d1a3df7..6c9616d212b1 100644 --- a/drivers/target/target_core_ua.c +++ b/drivers/target/target_core_ua.c @@ -50,9 +50,17 @@ target_scsi3_ua_check(struct se_cmd *cmd) if (!nacl) return 0; - deve = nacl->device_list[cmd->orig_fe_lun]; - if (!atomic_read(&deve->ua_count)) + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, cmd->orig_fe_lun); + if (!deve) { + rcu_read_unlock(); return 0; + } + if (!atomic_read(&deve->ua_count)) { + rcu_read_unlock(); + return 0; + } + rcu_read_unlock(); /* * From sam4r14, section 5.14 Unit attention condition: * @@ -103,9 +111,12 @@ int core_scsi3_ua_allocate( ua->ua_asc = asc; ua->ua_ascq = ascq; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[unpacked_lun]; - + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, unpacked_lun); + if (!deve) { + rcu_read_unlock(); + return -EINVAL; + } spin_lock(&deve->ua_lock); list_for_each_entry_safe(ua_p, ua_tmp, &deve->ua_list, ua_nacl_list) { /* @@ -113,7 +124,7 @@ int core_scsi3_ua_allocate( */ if ((ua_p->ua_asc == asc) && (ua_p->ua_ascq == ascq)) { spin_unlock(&deve->ua_lock); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); kmem_cache_free(se_ua_cache, ua); return 0; } @@ -158,14 +169,13 @@ int core_scsi3_ua_allocate( list_add_tail(&ua->ua_nacl_list, &deve->ua_list); spin_unlock(&deve->ua_lock); - spin_unlock_irq(&nacl->device_list_lock); atomic_inc_mb(&deve->ua_count); + rcu_read_unlock(); return 0; } list_add_tail(&ua->ua_nacl_list, &deve->ua_list); spin_unlock(&deve->ua_lock); - spin_unlock_irq(&nacl->device_list_lock); pr_debug("[%s]: Allocated UNIT ATTENTION, mapped LUN: %u, ASC:" " 0x%02x, ASCQ: 0x%02x\n", @@ -173,6 +183,7 @@ int core_scsi3_ua_allocate( asc, ascq); atomic_inc_mb(&deve->ua_count); + rcu_read_unlock(); return 0; } @@ -210,10 +221,14 @@ void core_scsi3_ua_for_check_condition( if (!nacl) return; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[cmd->orig_fe_lun]; + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, cmd->orig_fe_lun); + if (!deve) { + rcu_read_unlock(); + return; + } if (!atomic_read(&deve->ua_count)) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return; } /* @@ -249,7 +264,7 @@ void core_scsi3_ua_for_check_condition( atomic_dec_mb(&deve->ua_count); } spin_unlock(&deve->ua_lock); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); pr_debug("[%s]: %s UNIT ATTENTION condition with" " INTLCK_CTRL: %d, mapped LUN: %u, got CDB: 0x%02x" @@ -278,10 +293,14 @@ int core_scsi3_ua_clear_for_request_sense( if (!nacl) return -EINVAL; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[cmd->orig_fe_lun]; + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, cmd->orig_fe_lun); + if (!deve) { + rcu_read_unlock(); + return -EINVAL; + } if (!atomic_read(&deve->ua_count)) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return -EPERM; } /* @@ -307,7 +326,7 @@ int core_scsi3_ua_clear_for_request_sense( atomic_dec_mb(&deve->ua_count); } spin_unlock(&deve->ua_lock); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); pr_debug("[%s]: Released UNIT ATTENTION condition, mapped" " LUN: %u, got REQUEST_SENSE reported ASC: 0x%02x," diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 47fafec556f8..80d9e486e33e 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -101,7 +101,7 @@ int target_alloc_sgl(struct scatterlist **, unsigned int *, u32, bool); sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *, struct scatterlist *, u32, struct scatterlist *, u32); -void array_free(void *array, int n); +bool target_lun_is_rdonly(struct se_cmd *); /* From target_core_configfs.c to setup default backend config_item_types */ void target_core_setup_sub_cits(struct se_subsystem_api *); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 042a73464966..b518523cba7b 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -160,10 +160,8 @@ enum se_cmd_flags_table { /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */ enum transport_lunflags_table { - TRANSPORT_LUNFLAGS_NO_ACCESS = 0x00, - TRANSPORT_LUNFLAGS_INITIATOR_ACCESS = 0x01, - TRANSPORT_LUNFLAGS_READ_ONLY = 0x02, - TRANSPORT_LUNFLAGS_READ_WRITE = 0x04, + TRANSPORT_LUNFLAGS_READ_ONLY = 0x01, + TRANSPORT_LUNFLAGS_READ_WRITE = 0x02, }; /* @@ -584,10 +582,10 @@ struct se_node_acl { char acl_tag[MAX_ACL_TAG_SIZE]; /* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */ atomic_t acl_pr_ref_count; - struct se_dev_entry **device_list; + struct hlist_head lun_entry_hlist; struct se_session *nacl_sess; struct se_portal_group *se_tpg; - spinlock_t device_list_lock; + struct mutex lun_entry_mutex; spinlock_t nacl_sess_lock; struct config_group acl_group; struct config_group acl_attrib_group; @@ -644,20 +642,23 @@ struct se_dev_entry { /* See transport_lunflags_table */ u32 lun_flags; u32 mapped_lun; - u32 total_cmds; u64 pr_res_key; u64 creation_time; u32 attach_count; - u64 read_bytes; - u64 write_bytes; + atomic_long_t total_cmds; + atomic_long_t read_bytes; + atomic_long_t write_bytes; atomic_t ua_count; /* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */ - atomic_t pr_ref_count; - struct se_lun_acl *se_lun_acl; + struct kref pr_kref; + struct completion pr_comp; + struct se_lun_acl __rcu *se_lun_acl; spinlock_t ua_lock; - struct se_lun *se_lun; + struct se_lun __rcu *se_lun; struct list_head alua_port_list; struct list_head ua_list; + struct hlist_node link; + struct rcu_head rcu_head; }; struct se_dev_attrib { @@ -703,6 +704,7 @@ struct se_port_stat_grps { }; struct se_lun { + u16 lun_rtpi; #define SE_LUN_LINK_MAGIC 0xffff7771 u32 lun_link_magic; /* See transport_lun_status_table */ @@ -710,6 +712,7 @@ struct se_lun { u32 lun_access; u32 lun_flags; u32 unpacked_lun; + u32 lun_index; atomic_t lun_acl_count; spinlock_t lun_acl_lock; spinlock_t lun_sep_lock; From 80bfdfa92481d431b199eff72788588d13a3988f Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 25 Mar 2015 01:02:57 -0700 Subject: [PATCH 071/839] target/pr: Use atomic bitop for se_dev_entry->deve_flags reservation check This patch converts the core_scsi3_pr_seq_non_holder() check for non reservation holding registrations to use an atomic bitop in ->deve_flags to determine if a registration is currently active. It also includes associated a set_bit() in __core_scsi3_add_registration() and clear_bit() in __core_scsi3_free_registration(), if se_dev_entry still exists, and has not already been released via se_dev_entry shutdown path in core_disable_device_list_for_node(). Also, clear_bit in core_disable_device_list_for_node as well to ensure the read-critical path in core_scsi3_pr_seq_non_holder() picks up the new state, preceeding the final kfree_rcu() call. Reported-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Cc: Sagi Grimberg Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 1 + drivers/target/target_core_pr.c | 58 +++++++++++++++++++---------- include/target/target_core_base.h | 3 +- 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index e44e6bd66659..97792cc81fe4 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -413,6 +413,7 @@ void core_disable_device_list_for_node( core_scsi3_ua_release_all(orig); hlist_del_rcu(&orig->link); + clear_bit(DEF_PR_REG_ACTIVE, &orig->deve_flags); rcu_assign_pointer(orig->se_lun, NULL); rcu_assign_pointer(orig->se_lun_acl, NULL); orig->lun_flags = 0; diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 909573a682d5..66e686bdf894 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -327,9 +327,13 @@ static int core_scsi3_pr_seq_non_holder( int we = 0; /* Write Exclusive */ int legacy = 0; /* Act like a legacy device and return * RESERVATION CONFLICT on some CDBs */ + bool registered = false; rcu_read_lock(); se_deve = target_nacl_find_deve(nacl, cmd->orig_fe_lun); + if (se_deve) + registered = test_bit(DEF_PR_REG_ACTIVE, &se_deve->deve_flags); + rcu_read_unlock(); /* * Determine if the registration should be ignored due to * non-matching ISIDs in target_scsi3_pr_reservation_check(). @@ -346,7 +350,7 @@ static int core_scsi3_pr_seq_non_holder( * Some commands are only allowed for the persistent reservation * holder. */ - if ((se_deve->def_pr_registered) && !(ignore_reg)) + if ((registered) && !(ignore_reg)) registered_nexus = 1; break; case PR_TYPE_WRITE_EXCLUSIVE_REGONLY: @@ -356,7 +360,7 @@ static int core_scsi3_pr_seq_non_holder( * Some commands are only allowed for registered I_T Nexuses. */ reg_only = 1; - if ((se_deve->def_pr_registered) && !(ignore_reg)) + if ((registered) && !(ignore_reg)) registered_nexus = 1; break; case PR_TYPE_WRITE_EXCLUSIVE_ALLREG: @@ -366,14 +370,12 @@ static int core_scsi3_pr_seq_non_holder( * Each registered I_T Nexus is a reservation holder. */ all_reg = 1; - if ((se_deve->def_pr_registered) && !(ignore_reg)) + if ((registered) && !(ignore_reg)) registered_nexus = 1; break; default: - rcu_read_unlock(); return -EINVAL; } - rcu_read_unlock(); /* * Referenced from spc4r17 table 45 for *NON* PR holder access */ @@ -1009,10 +1011,6 @@ static void __core_scsi3_dump_registration( pr_reg->pr_reg_aptpl); } -/* - * this function can be called with struct se_device->dev_reservation_lock - * when register_move = 1 - */ static void __core_scsi3_add_registration( struct se_device *dev, struct se_node_acl *nacl, @@ -1023,6 +1021,7 @@ static void __core_scsi3_add_registration( const struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo; struct t10_pr_registration *pr_reg_tmp, *pr_reg_tmp_safe; struct t10_reservation *pr_tmpl = &dev->t10_pr; + struct se_dev_entry *deve; /* * Increment PRgeneration counter for struct se_device upon a successful @@ -1039,10 +1038,16 @@ static void __core_scsi3_add_registration( spin_lock(&pr_tmpl->registration_lock); list_add_tail(&pr_reg->pr_reg_list, &pr_tmpl->registration_list); - pr_reg->pr_reg_deve->def_pr_registered = 1; __core_scsi3_dump_registration(tfo, dev, nacl, pr_reg, register_type); spin_unlock(&pr_tmpl->registration_lock); + + rcu_read_lock(); + deve = pr_reg->pr_reg_deve; + if (deve) + set_bit(DEF_PR_REG_ACTIVE, &deve->deve_flags); + rcu_read_unlock(); + /* * Skip extra processing for ALL_TG_PT=0 or REGISTER_AND_MOVE. */ @@ -1054,6 +1059,8 @@ static void __core_scsi3_add_registration( */ list_for_each_entry_safe(pr_reg_tmp, pr_reg_tmp_safe, &pr_reg->pr_reg_atp_list, pr_reg_atp_mem_list) { + struct se_node_acl *nacl_tmp = pr_reg_tmp->pr_reg_nacl; + list_del(&pr_reg_tmp->pr_reg_atp_mem_list); pr_reg_tmp->pr_res_generation = core_scsi3_pr_generation(dev); @@ -1061,12 +1068,17 @@ static void __core_scsi3_add_registration( spin_lock(&pr_tmpl->registration_lock); list_add_tail(&pr_reg_tmp->pr_reg_list, &pr_tmpl->registration_list); - pr_reg_tmp->pr_reg_deve->def_pr_registered = 1; - __core_scsi3_dump_registration(tfo, dev, - pr_reg_tmp->pr_reg_nacl, pr_reg_tmp, - register_type); + __core_scsi3_dump_registration(tfo, dev, nacl_tmp, pr_reg_tmp, + register_type); spin_unlock(&pr_tmpl->registration_lock); + + rcu_read_lock(); + deve = pr_reg_tmp->pr_reg_deve; + if (deve) + set_bit(DEF_PR_REG_ACTIVE, &deve->deve_flags); + rcu_read_unlock(); + /* * Drop configfs group dependency reference from * __core_scsi3_alloc_registration() @@ -1242,13 +1254,13 @@ static void __core_scsi3_free_registration( const struct target_core_fabric_ops *tfo = pr_reg->pr_reg_nacl->se_tpg->se_tpg_tfo; struct t10_reservation *pr_tmpl = &dev->t10_pr; + struct se_node_acl *nacl = pr_reg->pr_reg_nacl; + struct se_dev_entry *deve; char i_buf[PR_REG_ISID_ID_LEN]; memset(i_buf, 0, PR_REG_ISID_ID_LEN); core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); - pr_reg->pr_reg_deve->def_pr_registered = 0; - pr_reg->pr_reg_deve->pr_res_key = 0; if (!list_empty(&pr_reg->pr_reg_list)) list_del(&pr_reg->pr_reg_list); /* @@ -1257,6 +1269,8 @@ static void __core_scsi3_free_registration( */ if (dec_holders) core_scsi3_put_pr_reg(pr_reg); + + spin_unlock(&pr_tmpl->registration_lock); /* * Wait until all reference from any other I_T nexuses for this * *pr_reg have been released. Because list_del() is called above, @@ -1264,13 +1278,18 @@ static void __core_scsi3_free_registration( * count back to zero, and we release *pr_reg. */ while (atomic_read(&pr_reg->pr_res_holders) != 0) { - spin_unlock(&pr_tmpl->registration_lock); pr_debug("SPC-3 PR [%s] waiting for pr_res_holders\n", tfo->get_fabric_name()); cpu_relax(); - spin_lock(&pr_tmpl->registration_lock); } + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, pr_reg->pr_res_mapped_lun); + if (deve) + clear_bit(DEF_PR_REG_ACTIVE, &deve->deve_flags); + rcu_read_unlock(); + + spin_lock(&pr_tmpl->registration_lock); pr_debug("SPC-3 PR [%s] Service Action: UNREGISTER Initiator" " Node: %s%s\n", tfo->get_fabric_name(), pr_reg->pr_reg_nacl->initiatorname, @@ -3421,13 +3440,14 @@ after_iport_check: dest_pr_reg = __core_scsi3_locate_pr_reg(dev, dest_node_acl, iport_ptr); if (!dest_pr_reg) { + spin_unlock(&dev->dev_reservation_lock); if (core_scsi3_alloc_registration(cmd->se_dev, dest_node_acl, dest_se_deve, iport_ptr, sa_res_key, 0, aptpl, 2, 1)) { - spin_unlock(&dev->dev_reservation_lock); ret = TCM_INVALID_PARAMETER_LIST; goto out; } + spin_lock(&dev->dev_reservation_lock); dest_pr_reg = __core_scsi3_locate_pr_reg(dev, dest_node_acl, iport_ptr); new_reg = 1; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index b518523cba7b..508528a5e927 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -638,7 +638,6 @@ struct se_lun_acl { }; struct se_dev_entry { - bool def_pr_registered; /* See transport_lunflags_table */ u32 lun_flags; u32 mapped_lun; @@ -655,6 +654,8 @@ struct se_dev_entry { struct se_lun_acl __rcu *se_lun_acl; spinlock_t ua_lock; struct se_lun __rcu *se_lun; +#define DEF_PR_REG_ACTIVE 1 + unsigned long deve_flags; struct list_head alua_port_list; struct list_head ua_list; struct hlist_node link; From 79dc9c9e865a7f8c14737453f112ced25deafdef Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 27 Mar 2015 04:51:03 +0000 Subject: [PATCH 072/839] target/pr: Change alloc_registration to avoid pr_reg_tg_pt_lun This patch changes __core_scsi3_do_alloc_registration() code to drop pr_reg->pr_reg_tg_pt_lun pointer usage in favor of a new pr_reg RPTI + existing pr_reg->pr_aptpl_target_lun used by APTPL metadata logic. It also includes changes to REGISTER, REGISTER_AND_MOVE and APTPL feature bit codepaths to use rcu_dereference_check() with the expected non-zero se_dev_entry->pr_kref reference held. Reviewed-by: Hannes Reinecke Cc: Christoph Hellwig Cc: Sagi Grimberg Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_configfs.c | 6 +- drivers/target/target_core_pr.c | 131 +++++++++++++------------- include/target/target_core_base.h | 4 +- 3 files changed, 72 insertions(+), 69 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 798386a1b6e7..4a31d4765390 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -817,7 +817,6 @@ static ssize_t target_core_dev_pr_show_attr_res_pr_holder_tg_port( struct se_device *dev, char *page) { struct se_node_acl *se_nacl; - struct se_lun *lun; struct se_portal_group *se_tpg; struct t10_pr_registration *pr_reg; const struct target_core_fabric_ops *tfo; @@ -832,7 +831,6 @@ static ssize_t target_core_dev_pr_show_attr_res_pr_holder_tg_port( se_nacl = pr_reg->pr_reg_nacl; se_tpg = se_nacl->se_tpg; - lun = pr_reg->pr_reg_tg_pt_lun; tfo = se_tpg->se_tpg_tfo; len += sprintf(page+len, "SPC-3 Reservation: %s" @@ -840,9 +838,9 @@ static ssize_t target_core_dev_pr_show_attr_res_pr_holder_tg_port( tfo->tpg_get_wwn(se_tpg)); len += sprintf(page+len, "SPC-3 Reservation: Relative Port" " Identifier Tag: %hu %s Portal Group Tag: %hu" - " %s Logical Unit: %u\n", lun->lun_sep->sep_rtpi, + " %s Logical Unit: %u\n", pr_reg->tg_pt_sep_rtpi, tfo->get_fabric_name(), tfo->tpg_get_tag(se_tpg), - tfo->get_fabric_name(), lun->unpacked_lun); + tfo->get_fabric_name(), pr_reg->pr_aptpl_target_lun); out_unlock: spin_unlock(&dev->dev_reservation_lock); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 66e686bdf894..48ed5d2bf5fe 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -44,11 +44,10 @@ * Used for Specify Initiator Ports Capable Bit (SPEC_I_PT) */ struct pr_transport_id_holder { - int dest_local_nexus; struct t10_pr_registration *dest_pr_reg; struct se_portal_group *dest_tpg; struct se_node_acl *dest_node_acl; - struct se_dev_entry __rcu *dest_se_deve; + struct se_dev_entry *dest_se_deve; struct list_head dest_list; }; @@ -625,7 +624,9 @@ static u32 core_scsi3_pr_generation(struct se_device *dev) static struct t10_pr_registration *__core_scsi3_do_alloc_registration( struct se_device *dev, struct se_node_acl *nacl, + struct se_lun *lun, struct se_dev_entry *deve, + u32 mapped_lun, unsigned char *isid, u64 sa_res_key, int all_tg_pt, @@ -647,12 +648,12 @@ static struct t10_pr_registration *__core_scsi3_do_alloc_registration( atomic_set(&pr_reg->pr_res_holders, 0); pr_reg->pr_reg_nacl = nacl; pr_reg->pr_reg_deve = deve; - pr_reg->pr_res_mapped_lun = deve->mapped_lun; - pr_reg->pr_aptpl_target_lun = deve->se_lun->unpacked_lun; + pr_reg->pr_res_mapped_lun = mapped_lun; + pr_reg->pr_aptpl_target_lun = lun->unpacked_lun; + pr_reg->tg_pt_sep_rtpi = lun->lun_sep->sep_rtpi; pr_reg->pr_res_key = sa_res_key; pr_reg->pr_reg_all_tg_pt = all_tg_pt; pr_reg->pr_reg_aptpl = aptpl; - pr_reg->pr_reg_tg_pt_lun = deve->se_lun; /* * If an ISID value for this SCSI Initiator Port exists, * save it to the registration now. @@ -676,7 +677,9 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *); static struct t10_pr_registration *__core_scsi3_alloc_registration( struct se_device *dev, struct se_node_acl *nacl, + struct se_lun *lun, struct se_dev_entry *deve, + u32 mapped_lun, unsigned char *isid, u64 sa_res_key, int all_tg_pt, @@ -684,6 +687,8 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( { struct se_dev_entry *deve_tmp; struct se_node_acl *nacl_tmp; + struct se_lun_acl *lacl_tmp; + struct se_lun *lun_tmp; struct se_port *port, *port_tmp; const struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo; struct t10_pr_registration *pr_reg, *pr_reg_atp, *pr_reg_tmp, *pr_reg_tmp_safe; @@ -692,8 +697,9 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( * Create a registration for the I_T Nexus upon which the * PROUT REGISTER was received. */ - pr_reg = __core_scsi3_do_alloc_registration(dev, nacl, deve, isid, - sa_res_key, all_tg_pt, aptpl); + pr_reg = __core_scsi3_do_alloc_registration(dev, nacl, lun, deve, mapped_lun, + isid, sa_res_key, all_tg_pt, + aptpl); if (!pr_reg) return NULL; /* @@ -721,7 +727,9 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( if (!deve_tmp->se_lun_acl) continue; - nacl_tmp = deve_tmp->se_lun_acl->se_lun_nacl; + lacl_tmp = rcu_dereference_check(deve_tmp->se_lun_acl, + lockdep_is_held(&port->sep_alua_lock)); + nacl_tmp = lacl_tmp->se_lun_nacl; /* * Skip the matching struct se_node_acl that is allocated * above.. @@ -764,8 +772,12 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( * the original *pr_reg is processed in * __core_scsi3_add_registration() */ + lun_tmp = rcu_dereference_check(deve_tmp->se_lun, + atomic_read(&deve_tmp->pr_kref.refcount) != 0); + pr_reg_atp = __core_scsi3_do_alloc_registration(dev, - nacl_tmp, deve_tmp, NULL, + nacl_tmp, lun_tmp, deve_tmp, + deve_tmp->mapped_lun, NULL, sa_res_key, all_tg_pt, aptpl); if (!pr_reg_atp) { atomic_dec_mb(&port->sep_tg_pt_ref_cnt); @@ -835,7 +847,6 @@ int core_scsi3_alloc_aptpl_registration( pr_reg->pr_res_key = sa_res_key; pr_reg->pr_reg_all_tg_pt = all_tg_pt; pr_reg->pr_reg_aptpl = 1; - pr_reg->pr_reg_tg_pt_lun = NULL; pr_reg->pr_res_scope = 0; /* Always LUN_SCOPE */ pr_reg->pr_res_type = type; /* @@ -935,7 +946,7 @@ static int __core_scsi3_check_aptpl_registration( (pr_reg->pr_aptpl_target_lun == target_lun)) { pr_reg->pr_reg_nacl = nacl; - pr_reg->pr_reg_tg_pt_lun = lun; + pr_reg->tg_pt_sep_rtpi = lun->lun_sep->sep_rtpi; list_del(&pr_reg->pr_reg_aptpl_list); spin_unlock(&pr_tmpl->aptpl_reg_lock); @@ -1090,7 +1101,9 @@ static void __core_scsi3_add_registration( static int core_scsi3_alloc_registration( struct se_device *dev, struct se_node_acl *nacl, + struct se_lun *lun, struct se_dev_entry *deve, + u32 mapped_lun, unsigned char *isid, u64 sa_res_key, int all_tg_pt, @@ -1100,8 +1113,9 @@ static int core_scsi3_alloc_registration( { struct t10_pr_registration *pr_reg; - pr_reg = __core_scsi3_alloc_registration(dev, nacl, deve, isid, - sa_res_key, all_tg_pt, aptpl); + pr_reg = __core_scsi3_alloc_registration(dev, nacl, lun, deve, mapped_lun, + isid, sa_res_key, all_tg_pt, + aptpl); if (!pr_reg) return -EPERM; @@ -1411,12 +1425,14 @@ static void core_scsi3_nodeacl_undepend_item(struct se_node_acl *nacl) static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve) { - struct se_lun_acl *lun_acl = se_deve->se_lun_acl; + struct se_lun_acl *lun_acl; struct se_node_acl *nacl; struct se_portal_group *tpg; /* * For nacl->dynamic_node_acl=1 */ + lun_acl = rcu_dereference_check(se_deve->se_lun_acl, + atomic_read(&se_deve->pr_kref.refcount) != 0); if (!lun_acl) return 0; @@ -1434,7 +1450,8 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve) /* * For nacl->dynamic_node_acl=1 */ - lun_acl = se_deve->se_lun_acl; + lun_acl = rcu_dereference_check(se_deve->se_lun_acl, + atomic_read(&se_deve->pr_kref.refcount) != 0); if (!lun_acl) { kref_put(&se_deve->pr_kref, target_pr_kref_release); return; @@ -1450,7 +1467,6 @@ static sense_reason_t core_scsi3_decode_spec_i_port( struct se_cmd *cmd, struct se_portal_group *tpg, - struct se_dev_entry *local_se_deve, unsigned char *l_isid, u64 sa_res_key, int all_tg_pt, @@ -1461,7 +1477,7 @@ core_scsi3_decode_spec_i_port( struct se_portal_group *dest_tpg = NULL, *tmp_tpg; struct se_session *se_sess = cmd->se_sess; struct se_node_acl *dest_node_acl = NULL; - struct se_dev_entry __rcu *dest_se_deve = NULL; + struct se_dev_entry *dest_se_deve = NULL; struct t10_pr_registration *dest_pr_reg, *local_pr_reg, *pr_reg_e; struct t10_pr_registration *pr_reg_tmp, *pr_reg_tmp_safe; LIST_HEAD(tid_dest_list); @@ -1471,14 +1487,12 @@ core_scsi3_decode_spec_i_port( char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN]; sense_reason_t ret; u32 tpdl, tid_len = 0; - int dest_local_nexus; u32 dest_rtpi = 0; /* * Allocate a struct pr_transport_id_holder and setup the - * local_node_acl and local_se_deve pointers and add to - * struct list_head tid_dest_list for add registration - * processing in the loop of tid_dest_list below. + * local_node_acl pointer and add to struct list_head tid_dest_list + * for add registration processing in the loop of tid_dest_list below. */ tidh_new = kzalloc(sizeof(struct pr_transport_id_holder), GFP_KERNEL); if (!tidh_new) { @@ -1490,20 +1504,20 @@ core_scsi3_decode_spec_i_port( tidh_new->dest_node_acl = se_sess->se_node_acl; local_pr_reg = __core_scsi3_alloc_registration(cmd->se_dev, - se_sess->se_node_acl, local_se_deve, l_isid, + se_sess->se_node_acl, cmd->se_lun, + NULL, cmd->orig_fe_lun, l_isid, sa_res_key, all_tg_pt, aptpl); if (!local_pr_reg) { kfree(tidh_new); return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } - rcu_assign_pointer(tidh_new->dest_se_deve, local_se_deve); tidh_new->dest_pr_reg = local_pr_reg; /* * The local I_T nexus does not hold any configfs dependances, - * so we set tid_h->dest_local_nexus=1 to prevent the + * so we set tidh_new->dest_se_deve to NULL to prevent the * configfs_undepend_item() calls in the tid_dest_list loops below. */ - tidh_new->dest_local_nexus = 1; + tidh_new->dest_se_deve = NULL; list_add_tail(&tidh_new->dest_list, &tid_dest_list); if (cmd->data_length < 28) { @@ -1544,6 +1558,8 @@ core_scsi3_decode_spec_i_port( ptr = &buf[28]; while (tpdl > 0) { + struct se_lun *dest_lun; + proto_ident = (ptr[0] & 0x0f); dest_tpg = NULL; @@ -1720,9 +1736,13 @@ core_scsi3_decode_spec_i_port( * and then call __core_scsi3_add_registration() in the * 2nd loop which will never fail. */ + dest_lun = rcu_dereference_check(dest_se_deve->se_lun, + atomic_read(&dest_se_deve->pr_kref.refcount) != 0); + dest_pr_reg = __core_scsi3_alloc_registration(cmd->se_dev, - dest_node_acl, dest_se_deve, iport_ptr, - sa_res_key, all_tg_pt, aptpl); + dest_node_acl, dest_lun, dest_se_deve, + dest_se_deve->mapped_lun, iport_ptr, + sa_res_key, all_tg_pt, aptpl); if (!dest_pr_reg) { core_scsi3_lunacl_undepend_item(dest_se_deve); core_scsi3_nodeacl_undepend_item(dest_node_acl); @@ -1760,7 +1780,6 @@ core_scsi3_decode_spec_i_port( dest_node_acl = tidh->dest_node_acl; dest_se_deve = tidh->dest_se_deve; dest_pr_reg = tidh->dest_pr_reg; - dest_local_nexus = tidh->dest_local_nexus; list_del(&tidh->dest_list); kfree(tidh); @@ -1774,9 +1793,10 @@ core_scsi3_decode_spec_i_port( pr_debug("SPC-3 PR [%s] SPEC_I_PT: Successfully" " registered Transport ID for Node: %s%s Mapped LUN:" " %u\n", dest_tpg->se_tpg_tfo->get_fabric_name(), - dest_node_acl->initiatorname, i_buf, dest_se_deve->mapped_lun); + dest_node_acl->initiatorname, i_buf, (dest_se_deve) ? + dest_se_deve->mapped_lun : 0); - if (dest_local_nexus) + if (!dest_se_deve) continue; core_scsi3_lunacl_undepend_item(dest_se_deve); @@ -1797,7 +1817,6 @@ out: dest_node_acl = tidh->dest_node_acl; dest_se_deve = tidh->dest_se_deve; dest_pr_reg = tidh->dest_pr_reg; - dest_local_nexus = tidh->dest_local_nexus; list_del(&tidh->dest_list); kfree(tidh); @@ -1815,7 +1834,7 @@ out: kmem_cache_free(t10_pr_reg_cache, dest_pr_reg); - if (dest_local_nexus) + if (!dest_se_deve) continue; core_scsi3_lunacl_undepend_item(dest_se_deve); @@ -1830,7 +1849,6 @@ static int core_scsi3_update_aptpl_buf( unsigned char *buf, u32 pr_aptpl_buf_len) { - struct se_lun *lun; struct se_portal_group *tpg; struct t10_pr_registration *pr_reg; unsigned char tmp[512], isid_buf[32]; @@ -1849,7 +1867,6 @@ static int core_scsi3_update_aptpl_buf( tmp[0] = '\0'; isid_buf[0] = '\0'; tpg = pr_reg->pr_reg_nacl->se_tpg; - lun = pr_reg->pr_reg_tg_pt_lun; /* * Write out any ISID value to APTPL metadata that was included * in the original registration. @@ -1901,7 +1918,8 @@ static int core_scsi3_update_aptpl_buf( " %d\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_wwn(tpg), tpg->se_tpg_tfo->tpg_get_tag(tpg), - lun->lun_sep->sep_rtpi, lun->unpacked_lun, reg_count); + pr_reg->tg_pt_sep_rtpi, pr_reg->pr_aptpl_target_lun, + reg_count); if ((len + strlen(tmp) >= pr_aptpl_buf_len)) { pr_err("Unable to update renaming APTPL metadata," @@ -2011,9 +2029,7 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, bool aptpl, bool all_tg_pt, bool spec_i_pt, enum register_type register_type) { struct se_session *se_sess = cmd->se_sess; - struct se_node_acl *nacl = se_sess->se_node_acl; struct se_device *dev = cmd->se_dev; - struct se_dev_entry *se_deve; struct se_lun *se_lun = cmd->se_lun; struct se_portal_group *se_tpg; struct t10_pr_registration *pr_reg, *pr_reg_p, *pr_reg_tmp; @@ -2028,14 +2044,6 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, } se_tpg = se_sess->se_tpg; - rcu_read_lock(); - se_deve = target_nacl_find_deve(nacl, cmd->orig_fe_lun); - if (!se_deve) { - pr_err("Unable to locate se_deve for PRO-REGISTER\n"); - rcu_read_unlock(); - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - } - if (se_tpg->se_tpg_tfo->sess_get_initiator_sid) { memset(&isid_buf[0], 0, PR_REG_ISID_LEN); se_tpg->se_tpg_tfo->sess_get_initiator_sid(se_sess, &isid_buf[0], @@ -2050,16 +2058,14 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, if (res_key) { pr_warn("SPC-3 PR: Reservation Key non-zero" " for SA REGISTER, returning CONFLICT\n"); - rcu_read_unlock(); return TCM_RESERVATION_CONFLICT; } /* * Do nothing but return GOOD status. */ - if (!sa_res_key) { - rcu_read_unlock(); + if (!sa_res_key) return 0; - } + if (!spec_i_pt) { /* * Perform the Service Action REGISTER on the Initiator @@ -2067,12 +2073,12 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, * Logical Unit of the SCSI device server. */ if (core_scsi3_alloc_registration(cmd->se_dev, - se_sess->se_node_acl, se_deve, isid_ptr, + se_sess->se_node_acl, cmd->se_lun, + NULL, cmd->orig_fe_lun, isid_ptr, sa_res_key, all_tg_pt, aptpl, register_type, 0)) { pr_err("Unable to allocate" " struct t10_pr_registration\n"); - rcu_read_unlock(); return TCM_INVALID_PARAMETER_LIST; } } else { @@ -2084,17 +2090,13 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, * logic from of core_scsi3_alloc_registration() for * each TransportID provided SCSI Initiator Port/Device */ - ret = core_scsi3_decode_spec_i_port(cmd, se_tpg, se_deve, + ret = core_scsi3_decode_spec_i_port(cmd, se_tpg, isid_ptr, sa_res_key, all_tg_pt, aptpl); - if (ret != 0) { - rcu_read_unlock(); + if (ret != 0) return ret; - } } - rcu_read_unlock(); return core_scsi3_update_and_write_aptpl(dev, aptpl); } - rcu_read_unlock(); /* ok, existing registration */ @@ -3440,10 +3442,13 @@ after_iport_check: dest_pr_reg = __core_scsi3_locate_pr_reg(dev, dest_node_acl, iport_ptr); if (!dest_pr_reg) { + struct se_lun *dest_lun = rcu_dereference_check(dest_se_deve->se_lun, + atomic_read(&dest_se_deve->pr_kref.refcount) != 0); + spin_unlock(&dev->dev_reservation_lock); - if (core_scsi3_alloc_registration(cmd->se_dev, - dest_node_acl, dest_se_deve, iport_ptr, - sa_res_key, 0, aptpl, 2, 1)) { + if (core_scsi3_alloc_registration(cmd->se_dev, dest_node_acl, + dest_lun, dest_se_deve, dest_se_deve->mapped_lun, + iport_ptr, sa_res_key, 0, aptpl, 2, 1)) { ret = TCM_INVALID_PARAMETER_LIST; goto out; } @@ -4011,10 +4016,10 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) * IDENTIFIER field are not defined by this standard. */ if (!pr_reg->pr_reg_all_tg_pt) { - struct se_port *port = pr_reg->pr_reg_tg_pt_lun->lun_sep; + u16 sep_rtpi = pr_reg->tg_pt_sep_rtpi; - buf[off++] = ((port->sep_rtpi >> 8) & 0xff); - buf[off++] = (port->sep_rtpi & 0xff); + buf[off++] = ((sep_rtpi >> 8) & 0xff); + buf[off++] = (sep_rtpi & 0xff); } else off += 2; /* Skip over RELATIVE TARGET PORT IDENTIFIER */ diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 508528a5e927..cf3c6addf05a 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -372,13 +372,14 @@ struct t10_pr_registration { bool isid_present_at_reg; u32 pr_res_mapped_lun; u32 pr_aptpl_target_lun; + u16 tg_pt_sep_rtpi; u32 pr_res_generation; u64 pr_reg_bin_isid; u64 pr_res_key; atomic_t pr_res_holders; struct se_node_acl *pr_reg_nacl; + /* Used by ALL_TG_PT=1 registration with deve->pr_ref taken */ struct se_dev_entry *pr_reg_deve; - struct se_lun *pr_reg_tg_pt_lun; struct list_head pr_reg_list; struct list_head pr_reg_abort_list; struct list_head pr_reg_aptpl_list; @@ -498,7 +499,6 @@ struct se_cmd { struct list_head se_delayed_node; struct list_head se_qf_node; struct se_device *se_dev; - struct se_dev_entry *se_deve; struct se_lun *se_lun; /* Only used for internal passthrough and legacy TCM fabric modules */ struct se_session *se_sess; From 9fcb57f39c0cde70bcccdc1d998d3060297e911c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 29 Mar 2015 09:39:03 +0200 Subject: [PATCH 073/839] target/pr: cleanup core_scsi3_pr_seq_non_holder Clean up the mess of registered variables, and pass the isid mismatch flag explicitly instead of overloading the registration type. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_pr.c | 44 ++++++++++++++------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 48ed5d2bf5fe..1e89679ad606 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -312,34 +312,31 @@ out: * This function is called by those initiator ports who are *NOT* * the active PR reservation holder when a reservation is present. */ -static int core_scsi3_pr_seq_non_holder( - struct se_cmd *cmd, - u32 pr_reg_type) +static int core_scsi3_pr_seq_non_holder(struct se_cmd *cmd, u32 pr_reg_type, + bool isid_mismatch) { unsigned char *cdb = cmd->t_task_cdb; - struct se_dev_entry *se_deve; struct se_session *se_sess = cmd->se_sess; struct se_node_acl *nacl = se_sess->se_node_acl; - int other_cdb = 0, ignore_reg; + int other_cdb = 0; int registered_nexus = 0, ret = 1; /* Conflict by default */ int all_reg = 0, reg_only = 0; /* ALL_REG, REG_ONLY */ int we = 0; /* Write Exclusive */ int legacy = 0; /* Act like a legacy device and return * RESERVATION CONFLICT on some CDBs */ - bool registered = false; - rcu_read_lock(); - se_deve = target_nacl_find_deve(nacl, cmd->orig_fe_lun); - if (se_deve) - registered = test_bit(DEF_PR_REG_ACTIVE, &se_deve->deve_flags); - rcu_read_unlock(); - /* - * Determine if the registration should be ignored due to - * non-matching ISIDs in target_scsi3_pr_reservation_check(). - */ - ignore_reg = (pr_reg_type & 0x80000000); - if (ignore_reg) - pr_reg_type &= ~0x80000000; + if (isid_mismatch) { + registered_nexus = 0; + } else { + struct se_dev_entry *se_deve; + + rcu_read_lock(); + se_deve = target_nacl_find_deve(nacl, cmd->orig_fe_lun); + if (se_deve) + registered_nexus = test_bit(DEF_PR_REG_ACTIVE, + &se_deve->deve_flags); + rcu_read_unlock(); + } switch (pr_reg_type) { case PR_TYPE_WRITE_EXCLUSIVE: @@ -349,8 +346,6 @@ static int core_scsi3_pr_seq_non_holder( * Some commands are only allowed for the persistent reservation * holder. */ - if ((registered) && !(ignore_reg)) - registered_nexus = 1; break; case PR_TYPE_WRITE_EXCLUSIVE_REGONLY: we = 1; @@ -359,8 +354,6 @@ static int core_scsi3_pr_seq_non_holder( * Some commands are only allowed for registered I_T Nexuses. */ reg_only = 1; - if ((registered) && !(ignore_reg)) - registered_nexus = 1; break; case PR_TYPE_WRITE_EXCLUSIVE_ALLREG: we = 1; @@ -369,8 +362,6 @@ static int core_scsi3_pr_seq_non_holder( * Each registered I_T Nexus is a reservation holder. */ all_reg = 1; - if ((registered) && !(ignore_reg)) - registered_nexus = 1; break; default: return -EINVAL; @@ -576,6 +567,7 @@ target_scsi3_pr_reservation_check(struct se_cmd *cmd) struct se_device *dev = cmd->se_dev; struct se_session *sess = cmd->se_sess; u32 pr_reg_type; + bool isid_mismatch = false; if (!dev->dev_pr_res_holder) return 0; @@ -588,7 +580,7 @@ target_scsi3_pr_reservation_check(struct se_cmd *cmd) if (dev->dev_pr_res_holder->isid_present_at_reg) { if (dev->dev_pr_res_holder->pr_reg_bin_isid != sess->sess_bin_isid) { - pr_reg_type |= 0x80000000; + isid_mismatch = true; goto check_nonholder; } } @@ -596,7 +588,7 @@ target_scsi3_pr_reservation_check(struct se_cmd *cmd) return 0; check_nonholder: - if (core_scsi3_pr_seq_non_holder(cmd, pr_reg_type)) + if (core_scsi3_pr_seq_non_holder(cmd, pr_reg_type, isid_mismatch)) return TCM_RESERVATION_CONFLICT; return 0; } From 6bb826121be244a5a3c8bd8b7d45c47df18810b7 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 10 May 2015 19:31:10 -0700 Subject: [PATCH 074/839] target: Convert se_portal_group->tpg_lun_list[] to RCU hlist This patch converts the fixed size se_portal_group->tpg_lun_list[] to use modern RCU with hlist_head in order to support an arbitary number of se_lun ports per target endpoint. It includes dropping core_tpg_alloc_lun() from core_dev_add_lun(), and calling it directly from target_fabric_make_lun() to allocate a new se_lun. And add a new target_fabric_port_release() configfs item callback to invoke kfree_rcu() to release memory during se_lun->lun_group shutdown. Also now that se_node_acl->lun_entry_hlist is using RCU, convert existing tpg_lun_lock to struct mutex so core_tpg_add_node_to_devs() can perform RCU updater logic without releasing ->tpg_lun_mutex. Also, drop core_tpg_clear_object_luns() and it's single consumer in iscsi-target, which is duplicating TPG LUN shutdown logic and is current code results in a NOP. Finally, sbp-target and xen-scsiback fabric driver conversions are included, which are required due to the non-standard way they use ->tpg_lun_hlist. Reviewed-by: Hannes Reinecke Cc: Christoph Hellwig Cc: Sagi Grimberg Cc: Paul E. McKenney Cc: Chris Boot Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_tpg.c | 2 - drivers/target/sbp/sbp_target.c | 97 ++++++-------- drivers/target/sbp/sbp_target.h | 2 +- drivers/target/target_core_device.c | 92 +------------ drivers/target/target_core_fabric_configfs.c | 34 +++-- drivers/target/target_core_internal.h | 6 +- drivers/target/target_core_tpg.c | 132 +++++-------------- drivers/xen/xen-scsiback.c | 27 ++-- include/target/target_core_base.h | 6 +- include/target/target_core_fabric.h | 1 - 10 files changed, 122 insertions(+), 277 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index d4ac31fc2acd..de8829102ab4 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -278,8 +278,6 @@ int iscsit_tpg_del_portal_group( return -EPERM; } - core_tpg_clear_object_luns(&tpg->tpg_se_tpg); - if (tpg->param_list) { iscsi_release_param_list(tpg->param_list); tpg->param_list = NULL; diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 5d7755edc668..47fb12fbaf47 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -108,13 +108,13 @@ static struct sbp_session *sbp_session_find_by_guid( } static struct sbp_login_descriptor *sbp_login_find_by_lun( - struct sbp_session *session, struct se_lun *lun) + struct sbp_session *session, u32 unpacked_lun) { struct sbp_login_descriptor *login, *found = NULL; spin_lock_bh(&session->lock); list_for_each_entry(login, &session->login_list, link) { - if (login->lun == lun) + if (login->login_lun == unpacked_lun) found = login; } spin_unlock_bh(&session->lock); @@ -124,7 +124,7 @@ static struct sbp_login_descriptor *sbp_login_find_by_lun( static int sbp_login_count_all_by_lun( struct sbp_tpg *tpg, - struct se_lun *lun, + u32 unpacked_lun, int exclusive) { struct se_session *se_sess; @@ -138,7 +138,7 @@ static int sbp_login_count_all_by_lun( spin_lock_bh(&sess->lock); list_for_each_entry(login, &sess->login_list, link) { - if (login->lun != lun) + if (login->login_lun != unpacked_lun) continue; if (!exclusive || login->exclusive) @@ -174,23 +174,23 @@ static struct sbp_login_descriptor *sbp_login_find_by_id( return found; } -static struct se_lun *sbp_get_lun_from_tpg(struct sbp_tpg *tpg, int lun) +static u32 sbp_get_lun_from_tpg(struct sbp_tpg *tpg, u32 login_lun, int *err) { struct se_portal_group *se_tpg = &tpg->se_tpg; struct se_lun *se_lun; - if (lun >= TRANSPORT_MAX_LUNS_PER_TPG) - return ERR_PTR(-EINVAL); + rcu_read_lock(); + hlist_for_each_entry_rcu(se_lun, &se_tpg->tpg_lun_hlist, link) { + if (se_lun->unpacked_lun == login_lun) { + rcu_read_unlock(); + *err = 0; + return login_lun; + } + } + rcu_read_unlock(); - spin_lock(&se_tpg->tpg_lun_lock); - se_lun = se_tpg->tpg_lun_list[lun]; - - if (se_lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) - se_lun = ERR_PTR(-ENODEV); - - spin_unlock(&se_tpg->tpg_lun_lock); - - return se_lun; + *err = -ENODEV; + return login_lun; } static struct sbp_session *sbp_session_create( @@ -294,17 +294,16 @@ static void sbp_management_request_login( { struct sbp_tport *tport = agent->tport; struct sbp_tpg *tpg = tport->tpg; - struct se_lun *se_lun; - int ret; - u64 guid; struct sbp_session *sess; struct sbp_login_descriptor *login; struct sbp_login_response_block *response; - int login_response_len; + u64 guid; + u32 unpacked_lun; + int login_response_len, ret; - se_lun = sbp_get_lun_from_tpg(tpg, - LOGIN_ORB_LUN(be32_to_cpu(req->orb.misc))); - if (IS_ERR(se_lun)) { + unpacked_lun = sbp_get_lun_from_tpg(tpg, + LOGIN_ORB_LUN(be32_to_cpu(req->orb.misc)), &ret); + if (ret) { pr_notice("login to unknown LUN: %d\n", LOGIN_ORB_LUN(be32_to_cpu(req->orb.misc))); @@ -325,11 +324,11 @@ static void sbp_management_request_login( } pr_notice("mgt_agent LOGIN to LUN %d from %016llx\n", - se_lun->unpacked_lun, guid); + unpacked_lun, guid); sess = sbp_session_find_by_guid(tpg, guid); if (sess) { - login = sbp_login_find_by_lun(sess, se_lun); + login = sbp_login_find_by_lun(sess, unpacked_lun); if (login) { pr_notice("initiator already logged-in\n"); @@ -357,7 +356,7 @@ static void sbp_management_request_login( * reject with access_denied if any logins present */ if (LOGIN_ORB_EXCLUSIVE(be32_to_cpu(req->orb.misc)) && - sbp_login_count_all_by_lun(tpg, se_lun, 0)) { + sbp_login_count_all_by_lun(tpg, unpacked_lun, 0)) { pr_warn("refusing exclusive login with other active logins\n"); req->status.status = cpu_to_be32( @@ -370,7 +369,7 @@ static void sbp_management_request_login( * check exclusive bit in any existing login descriptor * reject with access_denied if any exclusive logins present */ - if (sbp_login_count_all_by_lun(tpg, se_lun, 1)) { + if (sbp_login_count_all_by_lun(tpg, unpacked_lun, 1)) { pr_warn("refusing login while another exclusive login present\n"); req->status.status = cpu_to_be32( @@ -383,7 +382,7 @@ static void sbp_management_request_login( * check we haven't exceeded the number of allowed logins * reject with resources_unavailable if we have */ - if (sbp_login_count_all_by_lun(tpg, se_lun, 0) >= + if (sbp_login_count_all_by_lun(tpg, unpacked_lun, 0) >= tport->max_logins_per_lun) { pr_warn("max number of logins reached\n"); @@ -439,7 +438,7 @@ static void sbp_management_request_login( } login->sess = sess; - login->lun = se_lun; + login->login_lun = unpacked_lun; login->status_fifo_addr = sbp2_pointer_to_addr(&req->orb.status_fifo); login->exclusive = LOGIN_ORB_EXCLUSIVE(be32_to_cpu(req->orb.misc)); login->login_id = atomic_inc_return(&login_id); @@ -601,7 +600,7 @@ static void sbp_management_request_logout( } pr_info("mgt_agent LOGOUT from LUN %d session %d\n", - login->lun->unpacked_lun, login->login_id); + login->login_lun, login->login_id); if (req->node_addr != login->sess->node_id) { pr_warn("logout from different node ID\n"); @@ -1227,7 +1226,7 @@ static void sbp_handle_command(struct sbp_target_request *req) goto err; } - unpacked_lun = req->login->lun->unpacked_lun; + unpacked_lun = req->login->login_lun; sbp_calc_data_length_direction(req, &data_length, &data_dir); pr_debug("sbp_handle_command ORB:0x%llx unpacked_lun:%d data_len:%d data_dir:%d\n", @@ -1826,25 +1825,21 @@ static int sbp_check_stop_free(struct se_cmd *se_cmd) static int sbp_count_se_tpg_luns(struct se_portal_group *tpg) { - int i, count = 0; - - spin_lock(&tpg->tpg_lun_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - struct se_lun *se_lun = tpg->tpg_lun_list[i]; - - if (se_lun->lun_status == TRANSPORT_LUN_STATUS_FREE) - continue; + struct se_lun *lun; + int count = 0; + rcu_read_lock(); + hlist_for_each_entry_rcu(lun, &tpg->tpg_lun_hlist, link) count++; - } - spin_unlock(&tpg->tpg_lun_lock); + rcu_read_unlock(); return count; } static int sbp_update_unit_directory(struct sbp_tport *tport) { - int num_luns, num_entries, idx = 0, mgt_agt_addr, ret, i; + struct se_lun *lun; + int num_luns, num_entries, idx = 0, mgt_agt_addr, ret; u32 *data; if (tport->unit_directory.data) { @@ -1906,28 +1901,20 @@ static int sbp_update_unit_directory(struct sbp_tport *tport) /* unit unique ID (leaf is just after LUNs) */ data[idx++] = 0x8d000000 | (num_luns + 1); - spin_lock(&tport->tpg->se_tpg.tpg_lun_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - struct se_lun *se_lun = tport->tpg->se_tpg.tpg_lun_list[i]; + rcu_read_lock(); + hlist_for_each_entry_rcu(lun, &tport->tpg->se_tpg.tpg_lun_hlist, link) { struct se_device *dev; int type; - if (se_lun->lun_status == TRANSPORT_LUN_STATUS_FREE) - continue; - - spin_unlock(&tport->tpg->se_tpg.tpg_lun_lock); - - dev = se_lun->lun_se_dev; + dev = lun->lun_se_dev; type = dev->transport->get_device_type(dev); /* logical_unit_number */ data[idx++] = 0x14000000 | ((type << 16) & 0x1f0000) | - (se_lun->unpacked_lun & 0xffff); - - spin_lock(&tport->tpg->se_tpg.tpg_lun_lock); + (lun->unpacked_lun & 0xffff); } - spin_unlock(&tport->tpg->se_tpg.tpg_lun_lock); + rcu_read_unlock(); /* unit unique ID leaf */ data[idx++] = 2 << 16; diff --git a/drivers/target/sbp/sbp_target.h b/drivers/target/sbp/sbp_target.h index e1b0b84f7379..73bcb1208832 100644 --- a/drivers/target/sbp/sbp_target.h +++ b/drivers/target/sbp/sbp_target.h @@ -125,7 +125,7 @@ struct sbp_login_descriptor { struct sbp_session *sess; struct list_head link; - struct se_lun *lun; + u32 login_lun; u64 status_fifo_addr; int exclusive; diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 97792cc81fe4..431e5fc8daec 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1172,22 +1172,17 @@ int se_dev_set_block_size(struct se_device *dev, u32 block_size) } EXPORT_SYMBOL(se_dev_set_block_size); -struct se_lun *core_dev_add_lun( +int core_dev_add_lun( struct se_portal_group *tpg, struct se_device *dev, - u32 unpacked_lun) + struct se_lun *lun) { - struct se_lun *lun; int rc; - lun = core_tpg_alloc_lun(tpg, unpacked_lun); - if (IS_ERR(lun)) - return lun; - rc = core_tpg_add_lun(tpg, lun, TRANSPORT_LUNFLAGS_READ_WRITE, dev); if (rc < 0) - return ERR_PTR(rc); + return rc; pr_debug("%s_TPG[%u]_LUN[%u] - Activated %s Logical Unit from" " CORE HBA: %u\n", tpg->se_tpg_tfo->get_fabric_name(), @@ -1212,7 +1207,7 @@ struct se_lun *core_dev_add_lun( spin_unlock_irq(&tpg->acl_node_lock); } - return lun; + return 0; } /* core_dev_del_lun(): @@ -1231,68 +1226,6 @@ void core_dev_del_lun( core_tpg_remove_lun(tpg, lun); } -struct se_lun *core_get_lun_from_tpg(struct se_portal_group *tpg, u32 unpacked_lun) -{ - struct se_lun *lun; - - spin_lock(&tpg->tpg_lun_lock); - if (unpacked_lun > (TRANSPORT_MAX_LUNS_PER_TPG-1)) { - pr_err("%s LUN: %u exceeds TRANSPORT_MAX_LUNS" - "_PER_TPG-1: %u for Target Portal Group: %hu\n", - tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, - TRANSPORT_MAX_LUNS_PER_TPG-1, - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock(&tpg->tpg_lun_lock); - return NULL; - } - lun = tpg->tpg_lun_list[unpacked_lun]; - - if (lun->lun_status != TRANSPORT_LUN_STATUS_FREE) { - pr_err("%s Logical Unit Number: %u is not free on" - " Target Portal Group: %hu, ignoring request.\n", - tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock(&tpg->tpg_lun_lock); - return NULL; - } - spin_unlock(&tpg->tpg_lun_lock); - - return lun; -} - -/* core_dev_get_lun(): - * - * - */ -static struct se_lun *core_dev_get_lun(struct se_portal_group *tpg, u32 unpacked_lun) -{ - struct se_lun *lun; - - spin_lock(&tpg->tpg_lun_lock); - if (unpacked_lun > (TRANSPORT_MAX_LUNS_PER_TPG-1)) { - pr_err("%s LUN: %u exceeds TRANSPORT_MAX_LUNS_PER" - "_TPG-1: %u for Target Portal Group: %hu\n", - tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, - TRANSPORT_MAX_LUNS_PER_TPG-1, - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock(&tpg->tpg_lun_lock); - return NULL; - } - lun = tpg->tpg_lun_list[unpacked_lun]; - - if (lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) { - pr_err("%s Logical Unit Number: %u is not active on" - " Target Portal Group: %hu, ignoring request.\n", - tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock(&tpg->tpg_lun_lock); - return NULL; - } - spin_unlock(&tpg->tpg_lun_lock); - - return lun; -} - struct se_lun_acl *core_dev_init_initiator_node_lun_acl( struct se_portal_group *tpg, struct se_node_acl *nacl, @@ -1326,22 +1259,11 @@ struct se_lun_acl *core_dev_init_initiator_node_lun_acl( int core_dev_add_initiator_node_lun_acl( struct se_portal_group *tpg, struct se_lun_acl *lacl, - u32 unpacked_lun, + struct se_lun *lun, u32 lun_access) { - struct se_lun *lun; - struct se_node_acl *nacl; + struct se_node_acl *nacl = lacl->se_lun_nacl; - lun = core_dev_get_lun(tpg, unpacked_lun); - if (!lun) { - pr_err("%s Logical Unit Number: %u is not active on" - " Target Portal Group: %hu, ignoring request.\n", - tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - return -EINVAL; - } - - nacl = lacl->se_lun_nacl; if (!nacl) return -EINVAL; @@ -1362,7 +1284,7 @@ int core_dev_add_initiator_node_lun_acl( pr_debug("%s_TPG[%hu]_LUN[%u->%u] - Added %s ACL for " " InitiatorNode: %s\n", tpg->se_tpg_tfo->get_fabric_name(), - tpg->se_tpg_tfo->tpg_get_tag(tpg), unpacked_lun, lacl->mapped_lun, + tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, lacl->mapped_lun, (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) ? "RW" : "RO", lacl->initiatorname); /* diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 0939a5492c16..9be8030e016f 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -81,7 +81,7 @@ static int target_fabric_mappedlun_link( struct se_lun_acl, se_lun_group); struct se_portal_group *se_tpg; struct config_item *nacl_ci, *tpg_ci, *tpg_ci_s, *wwn_ci, *wwn_ci_s; - int ret = 0, lun_access; + int lun_access; if (lun->lun_link_magic != SE_LUN_LINK_MAGIC) { pr_err("Bad lun->lun_link_magic, not a valid lun_ci pointer:" @@ -137,12 +137,9 @@ static int target_fabric_mappedlun_link( * Determine the actual mapped LUN value user wants.. * * This value is what the SCSI Initiator actually sees the - * iscsi/$IQN/$TPGT/lun/lun_* as on their SCSI Initiator Ports. + * $FABRIC/$WWPN/$TPGT/lun/lun_* as on their SCSI Initiator Ports. */ - ret = core_dev_add_initiator_node_lun_acl(se_tpg, lacl, - lun->unpacked_lun, lun_access); - - return (ret < 0) ? -EINVAL : 0; + return core_dev_add_initiator_node_lun_acl(se_tpg, lacl, lun, lun_access); } static int target_fabric_mappedlun_unlink( @@ -761,7 +758,6 @@ static int target_fabric_port_link( struct config_item *tpg_ci; struct se_lun *lun = container_of(to_config_group(lun_ci), struct se_lun, lun_group); - struct se_lun *lun_p; struct se_portal_group *se_tpg; struct se_device *dev = container_of(to_config_group(se_dev_ci), struct se_device, dev_group); @@ -789,10 +785,9 @@ static int target_fabric_port_link( return -EEXIST; } - lun_p = core_dev_add_lun(se_tpg, dev, lun->unpacked_lun); - if (IS_ERR(lun_p)) { - pr_err("core_dev_add_lun() failed\n"); - ret = PTR_ERR(lun_p); + ret = core_dev_add_lun(se_tpg, dev, lun); + if (ret) { + pr_err("core_dev_add_lun() failed: %d\n", ret); goto out; } @@ -832,9 +827,18 @@ static int target_fabric_port_unlink( return 0; } +static void target_fabric_port_release(struct config_item *item) +{ + struct se_lun *lun = container_of(to_config_group(item), + struct se_lun, lun_group); + + kfree_rcu(lun, rcu_head); +} + static struct configfs_item_operations target_fabric_port_item_ops = { .show_attribute = target_fabric_port_attr_show, .store_attribute = target_fabric_port_attr_store, + .release = target_fabric_port_release, .allow_link = target_fabric_port_link, .drop_link = target_fabric_port_unlink, }; @@ -893,15 +897,16 @@ static struct config_group *target_fabric_make_lun( if (unpacked_lun > UINT_MAX) return ERR_PTR(-EINVAL); - lun = core_get_lun_from_tpg(se_tpg, unpacked_lun); - if (!lun) - return ERR_PTR(-EINVAL); + lun = core_tpg_alloc_lun(se_tpg, unpacked_lun); + if (IS_ERR(lun)) + return ERR_CAST(lun); lun_cg = &lun->lun_group; lun_cg->default_groups = kmalloc(sizeof(struct config_group *) * 2, GFP_KERNEL); if (!lun_cg->default_groups) { pr_err("Unable to allocate lun_cg->default_groups\n"); + kfree(lun); return ERR_PTR(-ENOMEM); } @@ -918,6 +923,7 @@ static struct config_group *target_fabric_make_lun( if (!port_stat_grp->default_groups) { pr_err("Unable to allocate port_stat_grp->default_groups\n"); kfree(lun_cg->default_groups); + kfree(lun); return ERR_PTR(-ENOMEM); } target_stat_setup_port_default_groups(lun); diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index a04a6e396ae3..2c160ceaf03f 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -23,13 +23,13 @@ int core_dev_export(struct se_device *, struct se_portal_group *, struct se_lun *); void core_dev_unexport(struct se_device *, struct se_portal_group *, struct se_lun *); -struct se_lun *core_dev_add_lun(struct se_portal_group *, struct se_device *, u32); +int core_dev_add_lun(struct se_portal_group *, struct se_device *, + struct se_lun *lun); void core_dev_del_lun(struct se_portal_group *, struct se_lun *); -struct se_lun *core_get_lun_from_tpg(struct se_portal_group *, u32); struct se_lun_acl *core_dev_init_initiator_node_lun_acl(struct se_portal_group *, struct se_node_acl *, u32, int *); int core_dev_add_initiator_node_lun_acl(struct se_portal_group *, - struct se_lun_acl *, u32, u32); + struct se_lun_acl *, struct se_lun *lun, u32); int core_dev_del_initiator_node_lun_acl(struct se_portal_group *, struct se_lun *, struct se_lun_acl *); void core_dev_free_initiator_node_lun_acl(struct se_portal_group *, diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 0519923ef930..13d34e22a3c4 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -91,19 +91,15 @@ void core_tpg_add_node_to_devs( struct se_node_acl *acl, struct se_portal_group *tpg) { - int i = 0; u32 lun_access = 0; struct se_lun *lun; struct se_device *dev; - spin_lock(&tpg->tpg_lun_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - lun = tpg->tpg_lun_list[i]; + mutex_lock(&tpg->tpg_lun_mutex); + hlist_for_each_entry_rcu(lun, &tpg->tpg_lun_hlist, link) { if (lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) continue; - spin_unlock(&tpg->tpg_lun_lock); - dev = lun->lun_se_dev; /* * By default in LIO-Target $FABRIC_MOD, @@ -130,7 +126,7 @@ void core_tpg_add_node_to_devs( "READ-WRITE" : "READ-ONLY"); core_enable_device_list_for_node(lun, NULL, lun->unpacked_lun, - lun_access, acl, tpg); + lun_access, acl, tpg); /* * Check to see if there are any existing persistent reservation * APTPL pre-registrations that need to be enabled for this dynamic @@ -138,9 +134,8 @@ void core_tpg_add_node_to_devs( */ core_scsi3_check_aptpl_registration(dev, tpg, lun, acl, lun->unpacked_lun); - spin_lock(&tpg->tpg_lun_lock); } - spin_unlock(&tpg->tpg_lun_lock); + mutex_unlock(&tpg->tpg_lun_mutex); } /* core_set_queue_depth_for_node(): @@ -161,34 +156,6 @@ static int core_set_queue_depth_for_node( return 0; } -void array_free(void *array, int n) -{ - void **a = array; - int i; - - for (i = 0; i < n; i++) - kfree(a[i]); - kfree(a); -} - -static void *array_zalloc(int n, size_t size, gfp_t flags) -{ - void **a; - int i; - - a = kzalloc(n * sizeof(void*), flags); - if (!a) - return NULL; - for (i = 0; i < n; i++) { - a[i] = kzalloc(size, flags); - if (!a[i]) { - array_free(a, n); - return NULL; - } - } - return a; -} - static struct se_node_acl *target_alloc_node_acl(struct se_portal_group *tpg, const unsigned char *initiatorname) { @@ -284,27 +251,6 @@ void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *nacl) cpu_relax(); } -void core_tpg_clear_object_luns(struct se_portal_group *tpg) -{ - int i; - struct se_lun *lun; - - spin_lock(&tpg->tpg_lun_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - lun = tpg->tpg_lun_list[i]; - - if ((lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) || - (lun->lun_se_dev == NULL)) - continue; - - spin_unlock(&tpg->tpg_lun_lock); - core_dev_del_lun(tpg, lun); - spin_lock(&tpg->tpg_lun_lock); - } - spin_unlock(&tpg->tpg_lun_lock); -} -EXPORT_SYMBOL(core_tpg_clear_object_luns); - struct se_node_acl *core_tpg_add_initiator_node_acl( struct se_portal_group *tpg, const char *initiatorname) @@ -567,30 +513,7 @@ int core_tpg_register( struct se_portal_group *se_tpg, int proto_id) { - struct se_lun *lun; - u32 i; - - se_tpg->tpg_lun_list = array_zalloc(TRANSPORT_MAX_LUNS_PER_TPG, - sizeof(struct se_lun), GFP_KERNEL); - if (!se_tpg->tpg_lun_list) { - pr_err("Unable to allocate struct se_portal_group->" - "tpg_lun_list\n"); - return -ENOMEM; - } - - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - lun = se_tpg->tpg_lun_list[i]; - lun->unpacked_lun = i; - lun->lun_link_magic = SE_LUN_LINK_MAGIC; - lun->lun_status = TRANSPORT_LUN_STATUS_FREE; - atomic_set(&lun->lun_acl_count, 0); - init_completion(&lun->lun_shutdown_comp); - INIT_LIST_HEAD(&lun->lun_acl_list); - spin_lock_init(&lun->lun_acl_lock); - spin_lock_init(&lun->lun_sep_lock); - init_completion(&lun->lun_ref_comp); - } - + INIT_HLIST_HEAD(&se_tpg->tpg_lun_hlist); se_tpg->proto_id = proto_id; se_tpg->se_tpg_tfo = tfo; se_tpg->se_tpg_wwn = se_wwn; @@ -600,14 +523,11 @@ int core_tpg_register( INIT_LIST_HEAD(&se_tpg->tpg_sess_list); spin_lock_init(&se_tpg->acl_node_lock); spin_lock_init(&se_tpg->session_lock); - spin_lock_init(&se_tpg->tpg_lun_lock); + mutex_init(&se_tpg->tpg_lun_mutex); if (se_tpg->proto_id >= 0) { - if (core_tpg_setup_virtual_lun0(se_tpg) < 0) { - array_free(se_tpg->tpg_lun_list, - TRANSPORT_MAX_LUNS_PER_TPG); + if (core_tpg_setup_virtual_lun0(se_tpg) < 0) return -ENOMEM; - } } spin_lock_bh(&tpg_lock); @@ -662,7 +582,6 @@ int core_tpg_deregister(struct se_portal_group *se_tpg) if (se_tpg->proto_id >= 0) core_tpg_remove_lun(se_tpg, &se_tpg->tpg_virt_lun0); - array_free(se_tpg->tpg_lun_list, TRANSPORT_MAX_LUNS_PER_TPG); return 0; } EXPORT_SYMBOL(core_tpg_deregister); @@ -682,17 +601,20 @@ struct se_lun *core_tpg_alloc_lun( return ERR_PTR(-EOVERFLOW); } - spin_lock(&tpg->tpg_lun_lock); - lun = tpg->tpg_lun_list[unpacked_lun]; - if (lun->lun_status == TRANSPORT_LUN_STATUS_ACTIVE) { - pr_err("TPG Logical Unit Number: %u is already active" - " on %s Target Portal Group: %u, ignoring request.\n", - unpacked_lun, tpg->se_tpg_tfo->get_fabric_name(), - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock(&tpg->tpg_lun_lock); - return ERR_PTR(-EINVAL); + lun = kzalloc(sizeof(*lun), GFP_KERNEL); + if (!lun) { + pr_err("Unable to allocate se_lun memory\n"); + return ERR_PTR(-ENOMEM); } - spin_unlock(&tpg->tpg_lun_lock); + lun->unpacked_lun = unpacked_lun; + lun->lun_link_magic = SE_LUN_LINK_MAGIC; + lun->lun_status = TRANSPORT_LUN_STATUS_FREE; + atomic_set(&lun->lun_acl_count, 0); + init_completion(&lun->lun_shutdown_comp); + INIT_LIST_HEAD(&lun->lun_acl_list); + spin_lock_init(&lun->lun_acl_lock); + spin_lock_init(&lun->lun_sep_lock); + init_completion(&lun->lun_ref_comp); return lun; } @@ -716,10 +638,12 @@ int core_tpg_add_lun( return ret; } - spin_lock(&tpg->tpg_lun_lock); + mutex_lock(&tpg->tpg_lun_mutex); lun->lun_access = lun_access; lun->lun_status = TRANSPORT_LUN_STATUS_ACTIVE; - spin_unlock(&tpg->tpg_lun_lock); + if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) + hlist_add_head_rcu(&lun->link, &tpg->tpg_lun_hlist); + mutex_unlock(&tpg->tpg_lun_mutex); return 0; } @@ -728,14 +652,18 @@ void core_tpg_remove_lun( struct se_portal_group *tpg, struct se_lun *lun) { + struct se_device *dev = lun->lun_se_dev; + core_clear_lun_from_tpg(lun, tpg); transport_clear_lun_ref(lun); core_dev_unexport(lun->lun_se_dev, tpg, lun); - spin_lock(&tpg->tpg_lun_lock); + mutex_lock(&tpg->tpg_lun_mutex); lun->lun_status = TRANSPORT_LUN_STATUS_FREE; - spin_unlock(&tpg->tpg_lun_lock); + if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) + hlist_del_rcu(&lun->link); + mutex_unlock(&tpg->tpg_lun_mutex); percpu_ref_exit(&lun->lun_ref); } diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 8b7dd47abd8d..555033bd9239 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -866,7 +866,8 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, struct list_head *head = &(info->v2p_entry_lists); unsigned long flags; char *lunp; - unsigned int lun; + unsigned int unpacked_lun; + struct se_lun *se_lun; struct scsiback_tpg *tpg_entry, *tpg = NULL; char *error = "doesn't exist"; @@ -877,7 +878,7 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, } *lunp = 0; lunp++; - if (kstrtouint(lunp, 10, &lun) || lun >= TRANSPORT_MAX_LUNS_PER_TPG) { + if (kstrtouint(lunp, 10, &unpacked_lun) || unpacked_lun >= TRANSPORT_MAX_LUNS_PER_TPG) { pr_err("lun number not valid: %s\n", lunp); return -EINVAL; } @@ -886,15 +887,17 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, list_for_each_entry(tpg_entry, &scsiback_list, tv_tpg_list) { if (!strcmp(phy, tpg_entry->tport->tport_name) || !strcmp(phy, tpg_entry->param_alias)) { - spin_lock(&tpg_entry->se_tpg.tpg_lun_lock); - if (tpg_entry->se_tpg.tpg_lun_list[lun]->lun_status == - TRANSPORT_LUN_STATUS_ACTIVE) { - if (!tpg_entry->tpg_nexus) - error = "nexus undefined"; - else - tpg = tpg_entry; + mutex_lock(&tpg_entry->se_tpg.tpg_lun_mutex); + hlist_for_each_entry(se_lun, &tpg_entry->se_tpg.tpg_lun_hlist, link) { + if (se_lun->unpacked_lun == unpacked_lun) { + if (!tpg_entry->tpg_nexus) + error = "nexus undefined"; + else + tpg = tpg_entry; + break; + } } - spin_unlock(&tpg_entry->se_tpg.tpg_lun_lock); + mutex_unlock(&tpg_entry->se_tpg.tpg_lun_mutex); break; } } @@ -906,7 +909,7 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, mutex_unlock(&scsiback_mutex); if (!tpg) { - pr_err("%s:%d %s\n", phy, lun, error); + pr_err("%s:%d %s\n", phy, unpacked_lun, error); return -ENODEV; } @@ -934,7 +937,7 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, kref_init(&new->kref); new->v = *v; new->tpg = tpg; - new->lun = lun; + new->lun = unpacked_lun; list_add_tail(&new->l, head); out: diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index cf3c6addf05a..c15fa1a3b945 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -725,6 +725,8 @@ struct se_lun { struct se_port_stat_grps port_stat_grps; struct completion lun_ref_comp; struct percpu_ref lun_ref; + struct hlist_node link; + struct rcu_head rcu_head; }; struct se_dev_stat_grps { @@ -877,11 +879,11 @@ struct se_portal_group { spinlock_t acl_node_lock; /* Spinlock for adding/removing sessions */ spinlock_t session_lock; - spinlock_t tpg_lun_lock; + struct mutex tpg_lun_mutex; struct list_head se_tpg_node; /* linked list for initiator ACL list */ struct list_head acl_node_list; - struct se_lun **tpg_lun_list; + struct hlist_head tpg_lun_hlist; struct se_lun tpg_virt_lun0; /* List of TCM sessions associated wth this TPG */ struct list_head tpg_sess_list; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 55654c90350d..b1e00a7d66de 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -157,7 +157,6 @@ struct se_node_acl *core_tpg_get_initiator_node_acl(struct se_portal_group *tpg, unsigned char *); struct se_node_acl *core_tpg_check_initiator_node_acl(struct se_portal_group *, unsigned char *); -void core_tpg_clear_object_luns(struct se_portal_group *); int core_tpg_set_initiator_node_queue_depth(struct se_portal_group *, unsigned char *, u32, int); int core_tpg_set_initiator_node_tag(struct se_portal_group *, From 403edd78a2851ef95b24c0bf5151a4ab640898d7 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 8 Mar 2015 22:33:47 +0000 Subject: [PATCH 075/839] target: Convert se_tpg->acl_node_lock to ->acl_node_mutex This patch converts se_tpg->acl_node_lock to struct mutex, so that ->acl_node_acl walkers in core_clear_lun_from_tpg() can block when calling core_disable_device_list_for_node(). It also updates core_dev_add_lun() to hold ->acl_node_mutex when calling core_tpg_add_node_to_devs() to build ->lun_entry_hlist for dynamically generated se_node_acl. Reviewed-by: Hannes Reinecke Cc: Christoph Hellwig Cc: Sagi Grimberg Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 14 +++---- drivers/target/target_core_pr.c | 8 ++-- drivers/target/target_core_tpg.c | 51 +++++++++++++------------- drivers/target/target_core_transport.c | 20 +++++----- drivers/target/tcm_fc/tfc_conf.c | 4 +- include/target/target_core_base.h | 2 +- 6 files changed, 48 insertions(+), 51 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 431e5fc8daec..1593d4965848 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -440,9 +440,8 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg) struct se_node_acl *nacl; struct se_dev_entry *deve; - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); list_for_each_entry(nacl, &tpg->acl_node_list, acl_list) { - spin_unlock_irq(&tpg->acl_node_lock); mutex_lock(&nacl->lun_entry_mutex); hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) { @@ -455,10 +454,8 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg) core_disable_device_list_for_node(lun, deve, nacl, tpg); } mutex_unlock(&nacl->lun_entry_mutex); - - spin_lock_irq(&tpg->acl_node_lock); } - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); } static struct se_port *core_alloc_port(struct se_device *dev) @@ -1194,17 +1191,16 @@ int core_dev_add_lun( */ if (tpg->se_tpg_tfo->tpg_check_demo_mode(tpg)) { struct se_node_acl *acl; - spin_lock_irq(&tpg->acl_node_lock); + + mutex_lock(&tpg->acl_node_mutex); list_for_each_entry(acl, &tpg->acl_node_list, acl_list) { if (acl->dynamic_node_acl && (!tpg->se_tpg_tfo->tpg_check_demo_mode_login_only || !tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg))) { - spin_unlock_irq(&tpg->acl_node_lock); core_tpg_add_node_to_devs(acl, tpg); - spin_lock_irq(&tpg->acl_node_lock); } } - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); } return 0; diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 1e89679ad606..b983f8a54766 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1589,12 +1589,12 @@ core_scsi3_decode_spec_i_port( * from the decoded fabric module specific TransportID * at *i_str. */ - spin_lock_irq(&tmp_tpg->acl_node_lock); + mutex_lock(&tmp_tpg->acl_node_mutex); dest_node_acl = __core_tpg_get_initiator_node_acl( tmp_tpg, i_str); if (dest_node_acl) atomic_inc_mb(&dest_node_acl->acl_pr_ref_count); - spin_unlock_irq(&tmp_tpg->acl_node_lock); + mutex_unlock(&tmp_tpg->acl_node_mutex); if (!dest_node_acl) { core_scsi3_tpg_undepend_item(tmp_tpg); @@ -3308,12 +3308,12 @@ after_iport_check: /* * Locate the destination struct se_node_acl from the received Transport ID */ - spin_lock_irq(&dest_se_tpg->acl_node_lock); + mutex_lock(&dest_se_tpg->acl_node_mutex); dest_node_acl = __core_tpg_get_initiator_node_acl(dest_se_tpg, initiator_str); if (dest_node_acl) atomic_inc_mb(&dest_node_acl->acl_pr_ref_count); - spin_unlock_irq(&dest_se_tpg->acl_node_lock); + mutex_unlock(&dest_se_tpg->acl_node_mutex); if (!dest_node_acl) { pr_err("Unable to locate %s dest_node_acl for" diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 13d34e22a3c4..229e8278f4fe 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -49,7 +49,7 @@ static LIST_HEAD(tpg_list); /* __core_tpg_get_initiator_node_acl(): * - * spin_lock_bh(&tpg->acl_node_lock); must be held when calling + * mutex_lock(&tpg->acl_node_mutex); must be held when calling */ struct se_node_acl *__core_tpg_get_initiator_node_acl( struct se_portal_group *tpg, @@ -75,9 +75,9 @@ struct se_node_acl *core_tpg_get_initiator_node_acl( { struct se_node_acl *acl; - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname); - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); return acl; } @@ -198,10 +198,10 @@ static void target_add_node_acl(struct se_node_acl *acl) { struct se_portal_group *tpg = acl->se_tpg; - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); list_add_tail(&acl->acl_list, &tpg->acl_node_list); tpg->num_node_acls++; - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); pr_debug("%s_TPG[%hu] - Added %s ACL with TCQ Depth: %d for %s" " Initiator Node: %s\n", @@ -257,7 +257,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( { struct se_node_acl *acl; - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname); if (acl) { if (acl->dynamic_node_acl) { @@ -265,7 +265,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( pr_debug("%s_TPG[%u] - Replacing dynamic ACL" " for %s\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), initiatorname); - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); return acl; } @@ -273,10 +273,10 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( " Node %s already exists for TPG %u, ignoring" " request.\n", tpg->se_tpg_tfo->get_fabric_name(), initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); return ERR_PTR(-EEXIST); } - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); acl = target_alloc_node_acl(tpg, initiatorname); if (!acl) @@ -294,13 +294,13 @@ void core_tpg_del_initiator_node_acl(struct se_node_acl *acl) unsigned long flags; int rc; - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); if (acl->dynamic_node_acl) { acl->dynamic_node_acl = 0; } list_del(&acl->acl_list); tpg->num_node_acls--; - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); spin_lock_irqsave(&acl->nacl_sess_lock, flags); acl->acl_stop = 1; @@ -357,21 +357,21 @@ int core_tpg_set_initiator_node_queue_depth( unsigned long flags; int dynamic_acl = 0; - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname); if (!acl) { pr_err("Access Control List entry for %s Initiator" " Node %s does not exists for TPG %hu, ignoring" " request.\n", tpg->se_tpg_tfo->get_fabric_name(), initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); return -ENODEV; } if (acl->dynamic_node_acl) { acl->dynamic_node_acl = 0; dynamic_acl = 1; } - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); spin_lock_irqsave(&tpg->session_lock, flags); list_for_each_entry(sess, &tpg->tpg_sess_list, sess_list) { @@ -387,10 +387,10 @@ int core_tpg_set_initiator_node_queue_depth( tpg->se_tpg_tfo->get_fabric_name(), initiatorname); spin_unlock_irqrestore(&tpg->session_lock, flags); - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); if (dynamic_acl) acl->dynamic_node_acl = 1; - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); return -EEXIST; } /* @@ -425,10 +425,10 @@ int core_tpg_set_initiator_node_queue_depth( if (init_sess) tpg->se_tpg_tfo->close_session(init_sess); - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); if (dynamic_acl) acl->dynamic_node_acl = 1; - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); return -EINVAL; } spin_unlock_irqrestore(&tpg->session_lock, flags); @@ -444,10 +444,10 @@ int core_tpg_set_initiator_node_queue_depth( initiatorname, tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); if (dynamic_acl) acl->dynamic_node_acl = 1; - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); return 0; } @@ -521,9 +521,9 @@ int core_tpg_register( INIT_LIST_HEAD(&se_tpg->acl_node_list); INIT_LIST_HEAD(&se_tpg->se_tpg_node); INIT_LIST_HEAD(&se_tpg->tpg_sess_list); - spin_lock_init(&se_tpg->acl_node_lock); spin_lock_init(&se_tpg->session_lock); mutex_init(&se_tpg->tpg_lun_mutex); + mutex_init(&se_tpg->acl_node_mutex); if (se_tpg->proto_id >= 0) { if (core_tpg_setup_virtual_lun0(se_tpg) < 0) @@ -559,25 +559,26 @@ int core_tpg_deregister(struct se_portal_group *se_tpg) while (atomic_read(&se_tpg->tpg_pr_ref_count) != 0) cpu_relax(); + /* * Release any remaining demo-mode generated se_node_acl that have * not been released because of TFO->tpg_check_demo_mode_cache() == 1 * in transport_deregister_session(). */ - spin_lock_irq(&se_tpg->acl_node_lock); + mutex_lock(&se_tpg->acl_node_mutex); list_for_each_entry_safe(nacl, nacl_tmp, &se_tpg->acl_node_list, acl_list) { list_del(&nacl->acl_list); se_tpg->num_node_acls--; - spin_unlock_irq(&se_tpg->acl_node_lock); + mutex_unlock(&se_tpg->acl_node_mutex); core_tpg_wait_for_nacl_pr_ref(nacl); core_free_device_list_for_node(nacl, se_tpg); kfree(nacl); - spin_lock_irq(&se_tpg->acl_node_lock); + mutex_lock(&se_tpg->acl_node_mutex); } - spin_unlock_irq(&se_tpg->acl_node_lock); + mutex_unlock(&se_tpg->acl_node_mutex); if (se_tpg->proto_id >= 0) core_tpg_remove_lun(se_tpg, &se_tpg->tpg_virt_lun0); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 3c2809cd8b5b..965a308e10a5 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -498,7 +498,7 @@ void transport_deregister_session(struct se_session *se_sess) const struct target_core_fabric_ops *se_tfo; struct se_node_acl *se_nacl; unsigned long flags; - bool comp_nacl = true; + bool comp_nacl = true, drop_nacl = false; if (!se_tpg) { transport_free_session(se_sess); @@ -518,22 +518,22 @@ void transport_deregister_session(struct se_session *se_sess) */ se_nacl = se_sess->se_node_acl; - spin_lock_irqsave(&se_tpg->acl_node_lock, flags); + mutex_lock(&se_tpg->acl_node_mutex); if (se_nacl && se_nacl->dynamic_node_acl) { if (!se_tfo->tpg_check_demo_mode_cache(se_tpg)) { list_del(&se_nacl->acl_list); se_tpg->num_node_acls--; - spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags); - core_tpg_wait_for_nacl_pr_ref(se_nacl); - core_free_device_list_for_node(se_nacl, se_tpg); - kfree(se_nacl); - - comp_nacl = false; - spin_lock_irqsave(&se_tpg->acl_node_lock, flags); + drop_nacl = true; } } - spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags); + mutex_unlock(&se_tpg->acl_node_mutex); + if (drop_nacl) { + core_tpg_wait_for_nacl_pr_ref(se_nacl); + core_free_device_list_for_node(se_nacl, se_tpg); + kfree(se_nacl); + comp_nacl = false; + } pr_debug("TARGET_CORE[%s]: Deregistered fabric_sess\n", se_tpg->se_tpg_tfo->get_fabric_name()); /* diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index ee5653139417..8e1a54f2642c 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -217,7 +217,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata) struct se_portal_group *se_tpg = &tpg->se_tpg; struct se_node_acl *se_acl; - spin_lock_irq(&se_tpg->acl_node_lock); + mutex_lock(&se_tpg->acl_node_mutex); list_for_each_entry(se_acl, &se_tpg->acl_node_list, acl_list) { acl = container_of(se_acl, struct ft_node_acl, se_node_acl); pr_debug("acl %p port_name %llx\n", @@ -231,7 +231,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata) break; } } - spin_unlock_irq(&se_tpg->acl_node_lock); + mutex_unlock(&se_tpg->acl_node_mutex); return found; } diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index c15fa1a3b945..2b6adace28b4 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -876,7 +876,7 @@ struct se_portal_group { /* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */ atomic_t tpg_pr_ref_count; /* Spinlock for adding/removing ACLed Nodes */ - spinlock_t acl_node_lock; + struct mutex acl_node_mutex; /* Spinlock for adding/removing sessions */ spinlock_t session_lock; struct mutex tpg_lun_mutex; From 22793de590ca23a1e49b0ce5353911121fcc32be Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 6 Mar 2015 10:23:25 +0000 Subject: [PATCH 076/839] target: Convert core_tpg_deregister to use list splice This patch converts core_tpg_deregister() to perform a list splice for any remaining dynamically generated se_node_acls attached to se_tpg, before calling kfree(nacl) to free memory. Reviewed-by: Hannes Reinecke Cc: Christoph Hellwig Cc: Sagi Grimberg Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_tpg.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 229e8278f4fe..e7745d21075b 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -547,6 +547,7 @@ int core_tpg_deregister(struct se_portal_group *se_tpg) { const struct target_core_fabric_ops *tfo = se_tpg->se_tpg_tfo; struct se_node_acl *nacl, *nacl_tmp; + LIST_HEAD(node_list); pr_debug("TARGET_CORE[%s]: Deallocating portal_group for endpoint: %s, " "Proto: %d, Portal Tag: %u\n", tfo->get_fabric_name(), @@ -560,25 +561,22 @@ int core_tpg_deregister(struct se_portal_group *se_tpg) while (atomic_read(&se_tpg->tpg_pr_ref_count) != 0) cpu_relax(); + mutex_lock(&se_tpg->acl_node_mutex); + list_splice_init(&se_tpg->acl_node_list, &node_list); + mutex_unlock(&se_tpg->acl_node_mutex); /* * Release any remaining demo-mode generated se_node_acl that have * not been released because of TFO->tpg_check_demo_mode_cache() == 1 * in transport_deregister_session(). */ - mutex_lock(&se_tpg->acl_node_mutex); - list_for_each_entry_safe(nacl, nacl_tmp, &se_tpg->acl_node_list, - acl_list) { + list_for_each_entry_safe(nacl, nacl_tmp, &node_list, acl_list) { list_del(&nacl->acl_list); se_tpg->num_node_acls--; - mutex_unlock(&se_tpg->acl_node_mutex); core_tpg_wait_for_nacl_pr_ref(nacl); core_free_device_list_for_node(nacl, se_tpg); kfree(nacl); - - mutex_lock(&se_tpg->acl_node_mutex); } - mutex_unlock(&se_tpg->acl_node_mutex); if (se_tpg->proto_id >= 0) core_tpg_remove_lun(se_tpg, &se_tpg->tpg_virt_lun0); From 84786546b6ff8d50c3e4c1ea877a872cf55d485a Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 8 Mar 2015 08:04:44 +0000 Subject: [PATCH 077/839] target: Drop unused se_lun->lun_acl_list Reviewed-by: Hannes Reinecke Cc: Christoph Hellwig Cc: Sagi Grimberg Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 15 --------------- drivers/target/target_core_tpg.c | 4 ---- include/target/target_core_base.h | 3 --- 3 files changed, 22 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 1593d4965848..ba8c65a670d8 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1243,7 +1243,6 @@ struct se_lun_acl *core_dev_init_initiator_node_lun_acl( return NULL; } - INIT_LIST_HEAD(&lacl->lacl_list); lacl->mapped_lun = mapped_lun; lacl->se_lun_nacl = nacl; snprintf(lacl->initiatorname, TRANSPORT_IQN_LEN, "%s", @@ -1273,11 +1272,6 @@ int core_dev_add_initiator_node_lun_acl( lun_access, nacl, tpg) < 0) return -EINVAL; - spin_lock(&lun->lun_acl_lock); - list_add_tail(&lacl->lacl_list, &lun->lun_acl_list); - atomic_inc_mb(&lun->lun_acl_count); - spin_unlock(&lun->lun_acl_lock); - pr_debug("%s_TPG[%hu]_LUN[%u->%u] - Added %s ACL for " " InitiatorNode: %s\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, lacl->mapped_lun, @@ -1304,19 +1298,12 @@ int core_dev_del_initiator_node_lun_acl( if (!nacl) return -EINVAL; - spin_lock(&lun->lun_acl_lock); - list_del(&lacl->lacl_list); - atomic_dec_mb(&lun->lun_acl_count); - spin_unlock(&lun->lun_acl_lock); - mutex_lock(&nacl->lun_entry_mutex); deve = target_nacl_find_deve(nacl, lacl->mapped_lun); if (deve) core_disable_device_list_for_node(lun, deve, nacl, tpg); mutex_unlock(&nacl->lun_entry_mutex); - lacl->se_lun = NULL; - pr_debug("%s_TPG[%hu]_LUN[%u] - Removed ACL for" " InitiatorNode: %s Mapped LUN: %u\n", tpg->se_tpg_tfo->get_fabric_name(), @@ -1446,8 +1433,6 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) xcopy_lun = &dev->xcopy_lun; xcopy_lun->lun_se_dev = dev; init_completion(&xcopy_lun->lun_shutdown_comp); - INIT_LIST_HEAD(&xcopy_lun->lun_acl_list); - spin_lock_init(&xcopy_lun->lun_acl_lock); spin_lock_init(&xcopy_lun->lun_sep_lock); init_completion(&xcopy_lun->lun_ref_comp); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index e7745d21075b..73c25bda5a25 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -495,8 +495,6 @@ static int core_tpg_setup_virtual_lun0(struct se_portal_group *se_tpg) lun->lun_status = TRANSPORT_LUN_STATUS_FREE; atomic_set(&lun->lun_acl_count, 0); init_completion(&lun->lun_shutdown_comp); - INIT_LIST_HEAD(&lun->lun_acl_list); - spin_lock_init(&lun->lun_acl_lock); spin_lock_init(&lun->lun_sep_lock); init_completion(&lun->lun_ref_comp); @@ -610,8 +608,6 @@ struct se_lun *core_tpg_alloc_lun( lun->lun_status = TRANSPORT_LUN_STATUS_FREE; atomic_set(&lun->lun_acl_count, 0); init_completion(&lun->lun_shutdown_comp); - INIT_LIST_HEAD(&lun->lun_acl_list); - spin_lock_init(&lun->lun_acl_lock); spin_lock_init(&lun->lun_sep_lock); init_completion(&lun->lun_ref_comp); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 2b6adace28b4..7908d6d04f91 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -632,7 +632,6 @@ struct se_lun_acl { u32 mapped_lun; struct se_node_acl *se_lun_nacl; struct se_lun *se_lun; - struct list_head lacl_list; struct config_group se_lun_group; struct se_ml_stat_grps ml_stat_grps; }; @@ -715,10 +714,8 @@ struct se_lun { u32 unpacked_lun; u32 lun_index; atomic_t lun_acl_count; - spinlock_t lun_acl_lock; spinlock_t lun_sep_lock; struct completion lun_shutdown_comp; - struct list_head lun_acl_list; struct se_device *lun_se_dev; struct se_port *lun_sep; struct config_group lun_group; From df9766ca9da5d9c1f2bb4eea9c9f16a2e7e8e1a5 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 22 May 2015 02:05:19 +0000 Subject: [PATCH 078/839] target: Only reset specific dynamic entries during lun_group creation This patch changes core_tpg_add_node_to_devs() to avoid unnecessarly resetting every se_dev_entry in se_node_acl->tpg_lun_hlist when the operation is driven by an explicit configfs se_lun->lun_group creation via core_dev_add_lun() to only update a single se_lun. Otherwise for the second core_tpg_check_initiator_node_acl() case, go ahead and continue to scan the full set of currently active se_lun in se_portal_group->tpg_lun_hlist. Reviewed-by: Hannes Reinecke Cc: Christoph Hellwig Cc: Sagi Grimberg Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 2 +- drivers/target/target_core_internal.h | 3 ++- drivers/target/target_core_tpg.c | 7 +++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index ba8c65a670d8..47a73609e277 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1197,7 +1197,7 @@ int core_dev_add_lun( if (acl->dynamic_node_acl && (!tpg->se_tpg_tfo->tpg_check_demo_mode_login_only || !tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg))) { - core_tpg_add_node_to_devs(acl, tpg); + core_tpg_add_node_to_devs(acl, tpg, lun); } } mutex_unlock(&tpg->acl_node_mutex); diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 2c160ceaf03f..ce80ca76f68b 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -64,7 +64,8 @@ extern struct se_device *g_lun0_dev; struct se_node_acl *__core_tpg_get_initiator_node_acl(struct se_portal_group *tpg, const char *); -void core_tpg_add_node_to_devs(struct se_node_acl *, struct se_portal_group *); +void core_tpg_add_node_to_devs(struct se_node_acl *, struct se_portal_group *, + struct se_lun *); void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *); struct se_lun *core_tpg_alloc_lun(struct se_portal_group *, u32); int core_tpg_add_lun(struct se_portal_group *, struct se_lun *, diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 73c25bda5a25..f66c208386f8 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -89,7 +89,8 @@ EXPORT_SYMBOL(core_tpg_get_initiator_node_acl); */ void core_tpg_add_node_to_devs( struct se_node_acl *acl, - struct se_portal_group *tpg) + struct se_portal_group *tpg, + struct se_lun *lun_orig) { u32 lun_access = 0; struct se_lun *lun; @@ -99,6 +100,8 @@ void core_tpg_add_node_to_devs( hlist_for_each_entry_rcu(lun, &tpg->tpg_lun_hlist, link) { if (lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) continue; + if (lun_orig && lun != lun_orig) + continue; dev = lun->lun_se_dev; /* @@ -238,7 +241,7 @@ struct se_node_acl *core_tpg_check_initiator_node_acl( */ if ((tpg->se_tpg_tfo->tpg_check_demo_mode_login_only == NULL) || (tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg) != 1)) - core_tpg_add_node_to_devs(acl, tpg); + core_tpg_add_node_to_devs(acl, tpg, NULL); target_add_node_acl(acl); return acl; From 4624773765699ac3f4e0b918306b638cba385713 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 24 May 2015 00:48:54 -0700 Subject: [PATCH 079/839] target: Drop left-over se_lun->lun_status Now that se_portal_group->tpg_lun_hlist is a RCU protected hlist, go ahead and drop the left-over lun->lun_status usage. Reported-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_tpg.c | 6 ------ include/target/target_core_base.h | 8 -------- 2 files changed, 14 deletions(-) diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index f66c208386f8..91f8ddb6d783 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -98,8 +98,6 @@ void core_tpg_add_node_to_devs( mutex_lock(&tpg->tpg_lun_mutex); hlist_for_each_entry_rcu(lun, &tpg->tpg_lun_hlist, link) { - if (lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) - continue; if (lun_orig && lun != lun_orig) continue; @@ -495,7 +493,6 @@ static int core_tpg_setup_virtual_lun0(struct se_portal_group *se_tpg) int ret; lun->unpacked_lun = 0; - lun->lun_status = TRANSPORT_LUN_STATUS_FREE; atomic_set(&lun->lun_acl_count, 0); init_completion(&lun->lun_shutdown_comp); spin_lock_init(&lun->lun_sep_lock); @@ -608,7 +605,6 @@ struct se_lun *core_tpg_alloc_lun( } lun->unpacked_lun = unpacked_lun; lun->lun_link_magic = SE_LUN_LINK_MAGIC; - lun->lun_status = TRANSPORT_LUN_STATUS_FREE; atomic_set(&lun->lun_acl_count, 0); init_completion(&lun->lun_shutdown_comp); spin_lock_init(&lun->lun_sep_lock); @@ -638,7 +634,6 @@ int core_tpg_add_lun( mutex_lock(&tpg->tpg_lun_mutex); lun->lun_access = lun_access; - lun->lun_status = TRANSPORT_LUN_STATUS_ACTIVE; if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) hlist_add_head_rcu(&lun->link, &tpg->tpg_lun_hlist); mutex_unlock(&tpg->tpg_lun_mutex); @@ -658,7 +653,6 @@ void core_tpg_remove_lun( core_dev_unexport(lun->lun_se_dev, tpg, lun); mutex_lock(&tpg->tpg_lun_mutex); - lun->lun_status = TRANSPORT_LUN_STATUS_FREE; if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) hlist_del_rcu(&lun->link); mutex_unlock(&tpg->tpg_lun_mutex); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 7908d6d04f91..78ed2a83838c 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -119,12 +119,6 @@ enum hba_flags_table { HBA_FLAGS_PSCSI_MODE = 0x02, }; -/* struct se_lun->lun_status */ -enum transport_lun_status_table { - TRANSPORT_LUN_STATUS_FREE = 0, - TRANSPORT_LUN_STATUS_ACTIVE = 1, -}; - /* Special transport agnostic struct se_cmd->t_states */ enum transport_state_table { TRANSPORT_NO_STATE = 0, @@ -707,8 +701,6 @@ struct se_lun { u16 lun_rtpi; #define SE_LUN_LINK_MAGIC 0xffff7771 u32 lun_link_magic; - /* See transport_lun_status_table */ - enum transport_lun_status_table lun_status; u32 lun_access; u32 lun_flags; u32 unpacked_lun; From 0a06d4309dc168dfa70cec3cf0cd9eb7fc15a2fd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 10 May 2015 18:14:56 +0200 Subject: [PATCH 080/839] target: simplify backend driver registration Rewrite the backend driver registration based on what we did to the fabric drivers: introduce a read-only struct target_bakckend_ops that the driver registers, which is then instanciate as a struct target_backend by the core. This allows the ops vector to be smaller and allows us to mark it const. At the same time the registration function can set up the configfs attributes, avoiding the need to add additional boilerplate code for that to the drivers. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_configfs.c | 68 ++++++++++---------- drivers/target/target_core_device.c | 6 +- drivers/target/target_core_file.c | 16 ++--- drivers/target/target_core_hba.c | 92 +++++++++++++++------------ drivers/target/target_core_iblock.c | 18 ++---- drivers/target/target_core_internal.h | 16 +++++ drivers/target/target_core_pscsi.c | 14 ++-- drivers/target/target_core_rd.c | 22 ++----- drivers/target/target_core_user.c | 11 ++-- include/target/target_core_backend.h | 22 ++----- include/target/target_core_base.h | 4 +- 11 files changed, 133 insertions(+), 156 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 4a31d4765390..57c099dd9da5 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -51,15 +51,26 @@ #include "target_core_xcopy.h" #define TB_CIT_SETUP(_name, _item_ops, _group_ops, _attrs) \ -static void target_core_setup_##_name##_cit(struct se_subsystem_api *sa) \ +static void target_core_setup_##_name##_cit(struct target_backend *tb) \ { \ - struct target_backend_cits *tbc = &sa->tb_cits; \ - struct config_item_type *cit = &tbc->tb_##_name##_cit; \ + struct config_item_type *cit = &tb->tb_##_name##_cit; \ \ cit->ct_item_ops = _item_ops; \ cit->ct_group_ops = _group_ops; \ cit->ct_attrs = _attrs; \ - cit->ct_owner = sa->owner; \ + cit->ct_owner = tb->ops->owner; \ + pr_debug("Setup generic %s\n", __stringify(_name)); \ +} + +#define TB_CIT_SETUP_DRV(_name, _item_ops, _group_ops) \ +static void target_core_setup_##_name##_cit(struct target_backend *tb) \ +{ \ + struct config_item_type *cit = &tb->tb_##_name##_cit; \ + \ + cit->ct_item_ops = _item_ops; \ + cit->ct_group_ops = _group_ops; \ + cit->ct_attrs = tb->ops->tb_##_name##_attrs; \ + cit->ct_owner = tb->ops->owner; \ pr_debug("Setup generic %s\n", __stringify(_name)); \ } @@ -469,7 +480,7 @@ static struct configfs_item_operations target_core_dev_attrib_ops = { .store_attribute = target_core_dev_attrib_attr_store, }; -TB_CIT_SETUP(dev_attrib, &target_core_dev_attrib_ops, NULL, NULL); +TB_CIT_SETUP_DRV(dev_attrib, &target_core_dev_attrib_ops, NULL); /* End functions for struct config_item_type tb_dev_attrib_cit */ @@ -1174,13 +1185,13 @@ TB_CIT_SETUP(dev_pr, &target_core_dev_pr_ops, NULL, target_core_dev_pr_attrs); static ssize_t target_core_show_dev_info(void *p, char *page) { struct se_device *dev = p; - struct se_subsystem_api *t = dev->transport; int bl = 0; ssize_t read_bytes = 0; transport_dump_dev_state(dev, page, &bl); read_bytes += bl; - read_bytes += t->show_configfs_dev_params(dev, page+read_bytes); + read_bytes += dev->transport->show_configfs_dev_params(dev, + page+read_bytes); return read_bytes; } @@ -1198,9 +1209,8 @@ static ssize_t target_core_store_dev_control( size_t count) { struct se_device *dev = p; - struct se_subsystem_api *t = dev->transport; - return t->set_configfs_dev_params(dev, page, count); + return dev->transport->set_configfs_dev_params(dev, page, count); } static struct target_core_configfs_attribute target_core_attr_dev_control = { @@ -2477,9 +2487,9 @@ static struct config_group *target_core_make_subdev( const char *name) { struct t10_alua_tg_pt_gp *tg_pt_gp; - struct se_subsystem_api *t; struct config_item *hba_ci = &group->cg_item; struct se_hba *hba = item_to_hba(hba_ci); + struct target_backend *tb = hba->backend; struct se_device *dev; struct config_group *dev_cg = NULL, *tg_pt_gp_cg = NULL; struct config_group *dev_stat_grp = NULL; @@ -2488,10 +2498,6 @@ static struct config_group *target_core_make_subdev( ret = mutex_lock_interruptible(&hba->hba_access_mutex); if (ret) return ERR_PTR(ret); - /* - * Locate the struct se_subsystem_api from parent's struct se_hba. - */ - t = hba->transport; dev = target_alloc_device(hba, name); if (!dev) @@ -2504,17 +2510,17 @@ static struct config_group *target_core_make_subdev( if (!dev_cg->default_groups) goto out_free_device; - config_group_init_type_name(dev_cg, name, &t->tb_cits.tb_dev_cit); + config_group_init_type_name(dev_cg, name, &tb->tb_dev_cit); config_group_init_type_name(&dev->dev_attrib.da_group, "attrib", - &t->tb_cits.tb_dev_attrib_cit); + &tb->tb_dev_attrib_cit); config_group_init_type_name(&dev->dev_pr_group, "pr", - &t->tb_cits.tb_dev_pr_cit); + &tb->tb_dev_pr_cit); config_group_init_type_name(&dev->t10_wwn.t10_wwn_group, "wwn", - &t->tb_cits.tb_dev_wwn_cit); + &tb->tb_dev_wwn_cit); config_group_init_type_name(&dev->t10_alua.alua_tg_pt_gps_group, - "alua", &t->tb_cits.tb_dev_alua_tg_pt_gps_cit); + "alua", &tb->tb_dev_alua_tg_pt_gps_cit); config_group_init_type_name(&dev->dev_stat_grps.stat_group, - "statistics", &t->tb_cits.tb_dev_stat_cit); + "statistics", &tb->tb_dev_stat_cit); dev_cg->default_groups[0] = &dev->dev_attrib.da_group; dev_cg->default_groups[1] = &dev->dev_pr_group; @@ -2644,7 +2650,7 @@ static ssize_t target_core_hba_show_attr_hba_info( char *page) { return sprintf(page, "HBA Index: %d plugin: %s version: %s\n", - hba->hba_id, hba->transport->name, + hba->hba_id, hba->backend->ops->name, TARGET_CORE_CONFIGFS_VERSION); } @@ -2664,11 +2670,10 @@ static ssize_t target_core_hba_show_attr_hba_mode(struct se_hba *hba, static ssize_t target_core_hba_store_attr_hba_mode(struct se_hba *hba, const char *page, size_t count) { - struct se_subsystem_api *transport = hba->transport; unsigned long mode_flag; int ret; - if (transport->pmode_enable_hba == NULL) + if (hba->backend->ops->pmode_enable_hba == NULL) return -EINVAL; ret = kstrtoul(page, 0, &mode_flag); @@ -2682,7 +2687,7 @@ static ssize_t target_core_hba_store_attr_hba_mode(struct se_hba *hba, return -EINVAL; } - ret = transport->pmode_enable_hba(hba, mode_flag); + ret = hba->backend->ops->pmode_enable_hba(hba, mode_flag); if (ret < 0) return -EINVAL; if (ret > 0) @@ -2808,16 +2813,15 @@ static struct config_item_type target_core_cit = { /* Stop functions for struct config_item_type target_core_hba_cit */ -void target_core_setup_sub_cits(struct se_subsystem_api *sa) +void target_setup_backend_cits(struct target_backend *tb) { - target_core_setup_dev_cit(sa); - target_core_setup_dev_attrib_cit(sa); - target_core_setup_dev_pr_cit(sa); - target_core_setup_dev_wwn_cit(sa); - target_core_setup_dev_alua_tg_pt_gps_cit(sa); - target_core_setup_dev_stat_cit(sa); + target_core_setup_dev_cit(tb); + target_core_setup_dev_attrib_cit(tb); + target_core_setup_dev_pr_cit(tb); + target_core_setup_dev_wwn_cit(tb); + target_core_setup_dev_alua_tg_pt_gps_cit(tb); + target_core_setup_dev_stat_cit(tb); } -EXPORT_SYMBOL(target_core_setup_sub_cits); static int __init target_core_init_configfs(void) { diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 47a73609e277..af906f1caaa7 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1367,13 +1367,13 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) struct se_device *dev; struct se_lun *xcopy_lun; - dev = hba->transport->alloc_device(hba, name); + dev = hba->backend->ops->alloc_device(hba, name); if (!dev) return NULL; dev->dev_link_magic = SE_DEV_LINK_MAGIC; dev->se_hba = hba; - dev->transport = hba->transport; + dev->transport = hba->backend->ops; dev->prot_length = sizeof(struct se_dif_v1_tuple); INIT_LIST_HEAD(&dev->dev_list); @@ -1571,7 +1571,7 @@ int core_dev_setup_virtual_lun0(void) goto out_free_hba; } - hba->transport->set_configfs_dev_params(dev, buf, sizeof(buf)); + hba->backend->ops->set_configfs_dev_params(dev, buf, sizeof(buf)); ret = target_configure_device(dev); if (ret) diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index e865885352da..7c5b1ef57d34 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -46,10 +46,6 @@ static inline struct fd_dev *FD_DEV(struct se_device *dev) return container_of(dev, struct fd_dev, dev); } -/* fd_attach_hba(): (Part of se_subsystem_api_t template) - * - * - */ static int fd_attach_hba(struct se_hba *hba, u32 host_id) { struct fd_host *fd_host; @@ -881,7 +877,7 @@ static struct configfs_attribute *fileio_backend_dev_attrs[] = { NULL, }; -static struct se_subsystem_api fileio_template = { +static const struct target_backend_ops fileio_ops = { .name = "fileio", .inquiry_prod = "FILEIO", .inquiry_rev = FD_VERSION, @@ -899,21 +895,17 @@ static struct se_subsystem_api fileio_template = { .init_prot = fd_init_prot, .format_prot = fd_format_prot, .free_prot = fd_free_prot, + .tb_dev_attrib_attrs = fileio_backend_dev_attrs, }; static int __init fileio_module_init(void) { - struct target_backend_cits *tbc = &fileio_template.tb_cits; - - target_core_setup_sub_cits(&fileio_template); - tbc->tb_dev_attrib_cit.ct_attrs = fileio_backend_dev_attrs; - - return transport_subsystem_register(&fileio_template); + return transport_backend_register(&fileio_ops); } static void __exit fileio_module_exit(void) { - transport_subsystem_release(&fileio_template); + target_backend_unregister(&fileio_ops); } MODULE_DESCRIPTION("TCM FILEIO subsystem plugin"); diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c index e6e496ff9546..62ea4e8e70a8 100644 --- a/drivers/target/target_core_hba.c +++ b/drivers/target/target_core_hba.c @@ -39,63 +39,75 @@ #include "target_core_internal.h" -static LIST_HEAD(subsystem_list); -static DEFINE_MUTEX(subsystem_mutex); +static LIST_HEAD(backend_list); +static DEFINE_MUTEX(backend_mutex); static u32 hba_id_counter; static DEFINE_SPINLOCK(hba_lock); static LIST_HEAD(hba_list); -int transport_subsystem_register(struct se_subsystem_api *sub_api) + +int transport_backend_register(const struct target_backend_ops *ops) { - struct se_subsystem_api *s; + struct target_backend *tb, *old; - INIT_LIST_HEAD(&sub_api->sub_api_list); + tb = kzalloc(sizeof(*tb), GFP_KERNEL); + if (!tb) + return -ENOMEM; + tb->ops = ops; - mutex_lock(&subsystem_mutex); - list_for_each_entry(s, &subsystem_list, sub_api_list) { - if (!strcmp(s->name, sub_api->name)) { - pr_err("%p is already registered with" - " duplicate name %s, unable to process" - " request\n", s, s->name); - mutex_unlock(&subsystem_mutex); + mutex_lock(&backend_mutex); + list_for_each_entry(old, &backend_list, list) { + if (!strcmp(old->ops->name, ops->name)) { + pr_err("backend %s already registered.\n", ops->name); + mutex_unlock(&backend_mutex); + kfree(tb); return -EEXIST; } } - list_add_tail(&sub_api->sub_api_list, &subsystem_list); - mutex_unlock(&subsystem_mutex); + target_setup_backend_cits(tb); + list_add_tail(&tb->list, &backend_list); + mutex_unlock(&backend_mutex); - pr_debug("TCM: Registered subsystem plugin: %s struct module:" - " %p\n", sub_api->name, sub_api->owner); + pr_debug("TCM: Registered subsystem plugin: %s struct module: %p\n", + ops->name, ops->owner); return 0; } -EXPORT_SYMBOL(transport_subsystem_register); +EXPORT_SYMBOL(transport_backend_register); -void transport_subsystem_release(struct se_subsystem_api *sub_api) +void target_backend_unregister(const struct target_backend_ops *ops) { - mutex_lock(&subsystem_mutex); - list_del(&sub_api->sub_api_list); - mutex_unlock(&subsystem_mutex); + struct target_backend *tb; + + mutex_lock(&backend_mutex); + list_for_each_entry(tb, &backend_list, list) { + if (tb->ops == ops) { + list_del(&tb->list); + kfree(tb); + break; + } + } + mutex_unlock(&backend_mutex); } -EXPORT_SYMBOL(transport_subsystem_release); +EXPORT_SYMBOL(target_backend_unregister); -static struct se_subsystem_api *core_get_backend(const char *sub_name) +static struct target_backend *core_get_backend(const char *name) { - struct se_subsystem_api *s; + struct target_backend *tb; - mutex_lock(&subsystem_mutex); - list_for_each_entry(s, &subsystem_list, sub_api_list) { - if (!strcmp(s->name, sub_name)) + mutex_lock(&backend_mutex); + list_for_each_entry(tb, &backend_list, list) { + if (!strcmp(tb->ops->name, name)) goto found; } - mutex_unlock(&subsystem_mutex); + mutex_unlock(&backend_mutex); return NULL; found: - if (s->owner && !try_module_get(s->owner)) - s = NULL; - mutex_unlock(&subsystem_mutex); - return s; + if (tb->ops->owner && !try_module_get(tb->ops->owner)) + tb = NULL; + mutex_unlock(&backend_mutex); + return tb; } struct se_hba * @@ -116,13 +128,13 @@ core_alloc_hba(const char *plugin_name, u32 plugin_dep_id, u32 hba_flags) hba->hba_index = scsi_get_new_index(SCSI_INST_INDEX); hba->hba_flags |= hba_flags; - hba->transport = core_get_backend(plugin_name); - if (!hba->transport) { + hba->backend = core_get_backend(plugin_name); + if (!hba->backend) { ret = -EINVAL; goto out_free_hba; } - ret = hba->transport->attach_hba(hba, plugin_dep_id); + ret = hba->backend->ops->attach_hba(hba, plugin_dep_id); if (ret < 0) goto out_module_put; @@ -137,8 +149,8 @@ core_alloc_hba(const char *plugin_name, u32 plugin_dep_id, u32 hba_flags) return hba; out_module_put: - module_put(hba->transport->owner); - hba->transport = NULL; + module_put(hba->backend->ops->owner); + hba->backend = NULL; out_free_hba: kfree(hba); return ERR_PTR(ret); @@ -149,7 +161,7 @@ core_delete_hba(struct se_hba *hba) { WARN_ON(hba->dev_count); - hba->transport->detach_hba(hba); + hba->backend->ops->detach_hba(hba); spin_lock(&hba_lock); list_del(&hba->hba_node); @@ -158,9 +170,9 @@ core_delete_hba(struct se_hba *hba) pr_debug("CORE_HBA[%d] - Detached HBA from Generic Target" " Core\n", hba->hba_id); - module_put(hba->transport->owner); + module_put(hba->backend->ops->owner); - hba->transport = NULL; + hba->backend = NULL; kfree(hba); return 0; } diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 8c965683789f..79f651fb98fb 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -54,12 +54,6 @@ static inline struct iblock_dev *IBLOCK_DEV(struct se_device *dev) } -static struct se_subsystem_api iblock_template; - -/* iblock_attach_hba(): (Part of se_subsystem_api_t template) - * - * - */ static int iblock_attach_hba(struct se_hba *hba, u32 host_id) { pr_debug("CORE_HBA[%d] - TCM iBlock HBA Driver %s on" @@ -899,7 +893,7 @@ static struct configfs_attribute *iblock_backend_dev_attrs[] = { NULL, }; -static struct se_subsystem_api iblock_template = { +static const struct target_backend_ops iblock_ops = { .name = "iblock", .inquiry_prod = "IBLOCK", .inquiry_rev = IBLOCK_VERSION, @@ -919,21 +913,17 @@ static struct se_subsystem_api iblock_template = { .get_io_min = iblock_get_io_min, .get_io_opt = iblock_get_io_opt, .get_write_cache = iblock_get_write_cache, + .tb_dev_attrib_attrs = iblock_backend_dev_attrs, }; static int __init iblock_module_init(void) { - struct target_backend_cits *tbc = &iblock_template.tb_cits; - - target_core_setup_sub_cits(&iblock_template); - tbc->tb_dev_attrib_cit.ct_attrs = iblock_backend_dev_attrs; - - return transport_subsystem_register(&iblock_template); + return transport_backend_register(&iblock_ops); } static void __exit iblock_module_exit(void) { - transport_subsystem_release(&iblock_template); + target_backend_unregister(&iblock_ops); } MODULE_DESCRIPTION("TCM IBLOCK subsystem plugin"); diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index ce80ca76f68b..01181aed67dc 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -1,6 +1,19 @@ #ifndef TARGET_CORE_INTERNAL_H #define TARGET_CORE_INTERNAL_H +struct target_backend { + struct list_head list; + + const struct target_backend_ops *ops; + + struct config_item_type tb_dev_cit; + struct config_item_type tb_dev_attrib_cit; + struct config_item_type tb_dev_pr_cit; + struct config_item_type tb_dev_wwn_cit; + struct config_item_type tb_dev_alua_tg_pt_gps_cit; + struct config_item_type tb_dev_stat_cit; +}; + /* target_core_alua.c */ extern struct t10_alua_lu_gp *default_lu_gp; @@ -40,6 +53,9 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name); int target_configure_device(struct se_device *dev); void target_free_device(struct se_device *); +/* target_core_configfs.c */ +void target_setup_backend_cits(struct target_backend *); + /* target_core_fabric_lib.c */ int target_get_pr_transport_id_len(struct se_node_acl *nacl, struct t10_pr_registration *pr_reg, int *format_code); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 21db991b4465..2e3a0b004f9a 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -57,8 +57,6 @@ static inline struct pscsi_dev_virt *PSCSI_DEV(struct se_device *dev) return container_of(dev, struct pscsi_dev_virt, dev); } -static struct se_subsystem_api pscsi_template; - static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd); static void pscsi_req_done(struct request *, int); @@ -1141,7 +1139,7 @@ static struct configfs_attribute *pscsi_backend_dev_attrs[] = { NULL, }; -static struct se_subsystem_api pscsi_template = { +static const struct target_backend_ops pscsi_ops = { .name = "pscsi", .owner = THIS_MODULE, .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, @@ -1157,21 +1155,17 @@ static struct se_subsystem_api pscsi_template = { .show_configfs_dev_params = pscsi_show_configfs_dev_params, .get_device_type = pscsi_get_device_type, .get_blocks = pscsi_get_blocks, + .tb_dev_attrib_attrs = pscsi_backend_dev_attrs, }; static int __init pscsi_module_init(void) { - struct target_backend_cits *tbc = &pscsi_template.tb_cits; - - target_core_setup_sub_cits(&pscsi_template); - tbc->tb_dev_attrib_cit.ct_attrs = pscsi_backend_dev_attrs; - - return transport_subsystem_register(&pscsi_template); + return transport_backend_register(&pscsi_ops); } static void __exit pscsi_module_exit(void) { - transport_subsystem_release(&pscsi_template); + target_backend_unregister(&pscsi_ops); } MODULE_DESCRIPTION("TCM PSCSI subsystem plugin"); diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 55315fd0f5d3..cf443a8a3cbe 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -43,10 +43,6 @@ static inline struct rd_dev *RD_DEV(struct se_device *dev) return container_of(dev, struct rd_dev, dev); } -/* rd_attach_hba(): (Part of se_subsystem_api_t template) - * - * - */ static int rd_attach_hba(struct se_hba *hba, u32 host_id) { struct rd_host *rd_host; @@ -735,7 +731,7 @@ static struct configfs_attribute *rd_mcp_backend_dev_attrs[] = { NULL, }; -static struct se_subsystem_api rd_mcp_template = { +static const struct target_backend_ops rd_mcp_ops = { .name = "rd_mcp", .inquiry_prod = "RAMDISK-MCP", .inquiry_rev = RD_MCP_VERSION, @@ -751,25 +747,15 @@ static struct se_subsystem_api rd_mcp_template = { .get_blocks = rd_get_blocks, .init_prot = rd_init_prot, .free_prot = rd_free_prot, + .tb_dev_attrib_attrs = rd_mcp_backend_dev_attrs, }; int __init rd_module_init(void) { - struct target_backend_cits *tbc = &rd_mcp_template.tb_cits; - int ret; - - target_core_setup_sub_cits(&rd_mcp_template); - tbc->tb_dev_attrib_cit.ct_attrs = rd_mcp_backend_dev_attrs; - - ret = transport_subsystem_register(&rd_mcp_template); - if (ret < 0) { - return ret; - } - - return 0; + return transport_backend_register(&rd_mcp_ops); } void rd_module_exit(void) { - transport_subsystem_release(&rd_mcp_template); + target_backend_unregister(&rd_mcp_ops); } diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 60330e00f59d..fb67e313dc4f 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1115,7 +1115,7 @@ static struct configfs_attribute *tcmu_backend_dev_attrs[] = { NULL, }; -static struct se_subsystem_api tcmu_template = { +static const struct target_backend_ops tcmu_ops = { .name = "user", .inquiry_prod = "USER", .inquiry_rev = TCMU_VERSION, @@ -1131,11 +1131,11 @@ static struct se_subsystem_api tcmu_template = { .show_configfs_dev_params = tcmu_show_configfs_dev_params, .get_device_type = sbc_get_device_type, .get_blocks = tcmu_get_blocks, + .tb_dev_attrib_attrs = tcmu_backend_dev_attrs, }; static int __init tcmu_module_init(void) { - struct target_backend_cits *tbc = &tcmu_template.tb_cits; int ret; BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0); @@ -1158,10 +1158,7 @@ static int __init tcmu_module_init(void) goto out_unreg_device; } - target_core_setup_sub_cits(&tcmu_template); - tbc->tb_dev_attrib_cit.ct_attrs = tcmu_backend_dev_attrs; - - ret = transport_subsystem_register(&tcmu_template); + ret = transport_backend_register(&tcmu_ops); if (ret) goto out_unreg_genl; @@ -1179,7 +1176,7 @@ out_free_cache: static void __exit tcmu_module_exit(void) { - transport_subsystem_release(&tcmu_template); + target_backend_unregister(&tcmu_ops); genl_unregister_family(&tcmu_genl_family); root_device_unregister(tcmu_root_device); kmem_cache_destroy(tcmu_cmd_cache); diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 80d9e486e33e..514b52019380 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -3,18 +3,7 @@ #define TRANSPORT_FLAG_PASSTHROUGH 1 -struct target_backend_cits { - struct config_item_type tb_dev_cit; - struct config_item_type tb_dev_attrib_cit; - struct config_item_type tb_dev_pr_cit; - struct config_item_type tb_dev_wwn_cit; - struct config_item_type tb_dev_alua_tg_pt_gps_cit; - struct config_item_type tb_dev_stat_cit; -}; - -struct se_subsystem_api { - struct list_head sub_api_list; - +struct target_backend_ops { char name[16]; char inquiry_prod[16]; char inquiry_rev[4]; @@ -52,7 +41,7 @@ struct se_subsystem_api { int (*format_prot)(struct se_device *); void (*free_prot)(struct se_device *); - struct target_backend_cits tb_cits; + struct configfs_attribute **tb_dev_attrib_attrs; }; struct sbc_ops { @@ -64,8 +53,8 @@ struct sbc_ops { sense_reason_t (*execute_unmap)(struct se_cmd *cmd); }; -int transport_subsystem_register(struct se_subsystem_api *); -void transport_subsystem_release(struct se_subsystem_api *); +int transport_backend_register(const struct target_backend_ops *); +void target_backend_unregister(const struct target_backend_ops *); void target_complete_cmd(struct se_cmd *, u8); void target_complete_cmd_with_length(struct se_cmd *, u8, int); @@ -103,9 +92,6 @@ sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *, bool target_lun_is_rdonly(struct se_cmd *); -/* From target_core_configfs.c to setup default backend config_item_types */ -void target_core_setup_sub_cits(struct se_subsystem_api *); - /* attribute helpers from target_core_device.c for backend drivers */ bool se_dev_check_wce(struct se_device *); int se_dev_set_max_unmap_lba_count(struct se_device *, u32); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 78ed2a83838c..03e2ee8f8337 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -797,7 +797,7 @@ struct se_device { #define SE_UDEV_PATH_LEN 512 /* must be less than PAGE_SIZE */ unsigned char udev_path[SE_UDEV_PATH_LEN]; /* Pointer to template of function pointers for transport */ - struct se_subsystem_api *transport; + const struct target_backend_ops *transport; /* Linked list for struct se_hba struct se_device list */ struct list_head dev_list; struct se_lun xcopy_lun; @@ -819,7 +819,7 @@ struct se_hba { spinlock_t device_lock; struct config_group hba_group; struct mutex hba_access_mutex; - struct se_subsystem_api *transport; + struct target_backend *backend; }; struct scsi_port_stats { From 5873c4d157400ade4052e9d7b6259fa592e1ddbf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 10 May 2015 18:14:57 +0200 Subject: [PATCH 081/839] target: consolidate backend attribute implementations Provide a common sets of dev_attrib attributes for all devices using the generic SPC/SBC parsers, and a second one with the minimal required read-only attributes for passthrough devices. The later is only used by pscsi for now, but will be wired up for the full-passthrough TCMU use case as well. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_configfs.c | 169 ++++++++++++++++++ drivers/target/target_core_file.c | 38 +--- drivers/target/target_core_iblock.c | 38 +--- drivers/target/target_core_pscsi.c | 23 +-- drivers/target/target_core_rd.c | 38 +--- drivers/target/target_core_user.c | 23 +-- include/target/target_core_backend.h | 3 + include/target/target_core_backend_configfs.h | 118 ------------ 8 files changed, 177 insertions(+), 273 deletions(-) delete mode 100644 include/target/target_core_backend_configfs.h diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 57c099dd9da5..dbf91f02ee5c 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -471,10 +471,179 @@ EXPORT_SYMBOL(target_unregister_template); //############################################################################*/ /* Start functions for struct config_item_type tb_dev_attrib_cit */ +#define DEF_TB_DEV_ATTRIB_SHOW(_backend, _name) \ +static ssize_t _backend##_dev_show_attr_##_name( \ + struct se_dev_attrib *da, \ + char *page) \ +{ \ + return snprintf(page, PAGE_SIZE, "%u\n", \ + (u32)da->da_dev->dev_attrib._name); \ +} + +#define DEF_TB_DEV_ATTRIB_STORE(_backend, _name) \ +static ssize_t _backend##_dev_store_attr_##_name( \ + struct se_dev_attrib *da, \ + const char *page, \ + size_t count) \ +{ \ + unsigned long val; \ + int ret; \ + \ + ret = kstrtoul(page, 0, &val); \ + if (ret < 0) { \ + pr_err("kstrtoul() failed with ret: %d\n", ret); \ + return -EINVAL; \ + } \ + ret = se_dev_set_##_name(da->da_dev, (u32)val); \ + \ + return (!ret) ? count : -EINVAL; \ +} + +#define DEF_TB_DEV_ATTRIB(_backend, _name) \ +DEF_TB_DEV_ATTRIB_SHOW(_backend, _name); \ +DEF_TB_DEV_ATTRIB_STORE(_backend, _name); + +#define DEF_TB_DEV_ATTRIB_RO(_backend, name) \ +DEF_TB_DEV_ATTRIB_SHOW(_backend, name); + +CONFIGFS_EATTR_STRUCT(target_backend_dev_attrib, se_dev_attrib); +#define TB_DEV_ATTR(_backend, _name, _mode) \ +static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + _backend##_dev_show_attr_##_name, \ + _backend##_dev_store_attr_##_name); + +#define TB_DEV_ATTR_RO(_backend, _name) \ +static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + _backend##_dev_show_attr_##_name); + +DEF_TB_DEV_ATTRIB(target_core, emulate_model_alias); +DEF_TB_DEV_ATTRIB(target_core, emulate_dpo); +DEF_TB_DEV_ATTRIB(target_core, emulate_fua_write); +DEF_TB_DEV_ATTRIB(target_core, emulate_fua_read); +DEF_TB_DEV_ATTRIB(target_core, emulate_write_cache); +DEF_TB_DEV_ATTRIB(target_core, emulate_ua_intlck_ctrl); +DEF_TB_DEV_ATTRIB(target_core, emulate_tas); +DEF_TB_DEV_ATTRIB(target_core, emulate_tpu); +DEF_TB_DEV_ATTRIB(target_core, emulate_tpws); +DEF_TB_DEV_ATTRIB(target_core, emulate_caw); +DEF_TB_DEV_ATTRIB(target_core, emulate_3pc); +DEF_TB_DEV_ATTRIB(target_core, pi_prot_type); +DEF_TB_DEV_ATTRIB_RO(target_core, hw_pi_prot_type); +DEF_TB_DEV_ATTRIB(target_core, pi_prot_format); +DEF_TB_DEV_ATTRIB(target_core, enforce_pr_isids); +DEF_TB_DEV_ATTRIB(target_core, is_nonrot); +DEF_TB_DEV_ATTRIB(target_core, emulate_rest_reord); +DEF_TB_DEV_ATTRIB(target_core, force_pr_aptpl); +DEF_TB_DEV_ATTRIB_RO(target_core, hw_block_size); +DEF_TB_DEV_ATTRIB(target_core, block_size); +DEF_TB_DEV_ATTRIB_RO(target_core, hw_max_sectors); +DEF_TB_DEV_ATTRIB(target_core, optimal_sectors); +DEF_TB_DEV_ATTRIB_RO(target_core, hw_queue_depth); +DEF_TB_DEV_ATTRIB(target_core, queue_depth); +DEF_TB_DEV_ATTRIB(target_core, max_unmap_lba_count); +DEF_TB_DEV_ATTRIB(target_core, max_unmap_block_desc_count); +DEF_TB_DEV_ATTRIB(target_core, unmap_granularity); +DEF_TB_DEV_ATTRIB(target_core, unmap_granularity_alignment); +DEF_TB_DEV_ATTRIB(target_core, max_write_same_len); + +TB_DEV_ATTR(target_core, emulate_model_alias, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_dpo, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_fua_write, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_fua_read, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_write_cache, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_ua_intlck_ctrl, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_tas, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_tpu, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_tpws, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_caw, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_3pc, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, pi_prot_type, S_IRUGO | S_IWUSR); +TB_DEV_ATTR_RO(target_core, hw_pi_prot_type); +TB_DEV_ATTR(target_core, pi_prot_format, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, enforce_pr_isids, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, is_nonrot, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_rest_reord, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, force_pr_aptpl, S_IRUGO | S_IWUSR) +TB_DEV_ATTR_RO(target_core, hw_block_size); +TB_DEV_ATTR(target_core, block_size, S_IRUGO | S_IWUSR) +TB_DEV_ATTR_RO(target_core, hw_max_sectors); +TB_DEV_ATTR(target_core, optimal_sectors, S_IRUGO | S_IWUSR); +TB_DEV_ATTR_RO(target_core, hw_queue_depth); +TB_DEV_ATTR(target_core, queue_depth, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, max_unmap_lba_count, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, max_unmap_block_desc_count, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, unmap_granularity, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, unmap_granularity_alignment, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, max_write_same_len, S_IRUGO | S_IWUSR); CONFIGFS_EATTR_STRUCT(target_core_dev_attrib, se_dev_attrib); CONFIGFS_EATTR_OPS(target_core_dev_attrib, se_dev_attrib, da_group); +/* + * dev_attrib attributes for devices using the target core SBC/SPC + * interpreter. Any backend using spc_parse_cdb should be using + * these. + */ +struct configfs_attribute *sbc_attrib_attrs[] = { + &target_core_dev_attrib_emulate_model_alias.attr, + &target_core_dev_attrib_emulate_dpo.attr, + &target_core_dev_attrib_emulate_fua_write.attr, + &target_core_dev_attrib_emulate_fua_read.attr, + &target_core_dev_attrib_emulate_write_cache.attr, + &target_core_dev_attrib_emulate_ua_intlck_ctrl.attr, + &target_core_dev_attrib_emulate_tas.attr, + &target_core_dev_attrib_emulate_tpu.attr, + &target_core_dev_attrib_emulate_tpws.attr, + &target_core_dev_attrib_emulate_caw.attr, + &target_core_dev_attrib_emulate_3pc.attr, + &target_core_dev_attrib_pi_prot_type.attr, + &target_core_dev_attrib_hw_pi_prot_type.attr, + &target_core_dev_attrib_pi_prot_format.attr, + &target_core_dev_attrib_enforce_pr_isids.attr, + &target_core_dev_attrib_is_nonrot.attr, + &target_core_dev_attrib_emulate_rest_reord.attr, + &target_core_dev_attrib_force_pr_aptpl.attr, + &target_core_dev_attrib_hw_block_size.attr, + &target_core_dev_attrib_block_size.attr, + &target_core_dev_attrib_hw_max_sectors.attr, + &target_core_dev_attrib_optimal_sectors.attr, + &target_core_dev_attrib_hw_queue_depth.attr, + &target_core_dev_attrib_queue_depth.attr, + &target_core_dev_attrib_max_unmap_lba_count.attr, + &target_core_dev_attrib_max_unmap_block_desc_count.attr, + &target_core_dev_attrib_unmap_granularity.attr, + &target_core_dev_attrib_unmap_granularity_alignment.attr, + &target_core_dev_attrib_max_write_same_len.attr, + NULL, +}; +EXPORT_SYMBOL(sbc_attrib_attrs); + +DEF_TB_DEV_ATTRIB_RO(target_pt, hw_pi_prot_type); +DEF_TB_DEV_ATTRIB_RO(target_pt, hw_block_size); +DEF_TB_DEV_ATTRIB_RO(target_pt, hw_max_sectors); +DEF_TB_DEV_ATTRIB_RO(target_pt, hw_queue_depth); + +TB_DEV_ATTR_RO(target_pt, hw_pi_prot_type); +TB_DEV_ATTR_RO(target_pt, hw_block_size); +TB_DEV_ATTR_RO(target_pt, hw_max_sectors); +TB_DEV_ATTR_RO(target_pt, hw_queue_depth); + +/* + * Minimal dev_attrib attributes for devices passing through CDBs. + * In this case we only provide a few read-only attributes for + * backwards compatibility. + */ +struct configfs_attribute *passthrough_attrib_attrs[] = { + &target_pt_dev_attrib_hw_pi_prot_type.attr, + &target_pt_dev_attrib_hw_block_size.attr, + &target_pt_dev_attrib_hw_max_sectors.attr, + &target_pt_dev_attrib_hw_queue_depth.attr, + NULL, +}; +EXPORT_SYMBOL(passthrough_attrib_attrs); + static struct configfs_item_operations target_core_dev_attrib_ops = { .show_attribute = target_core_dev_attrib_attr_show, .store_attribute = target_core_dev_attrib_attr_store, diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 7c5b1ef57d34..948b61f59a55 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -37,7 +37,6 @@ #include #include -#include #include "target_core_file.h" @@ -842,41 +841,6 @@ fd_parse_cdb(struct se_cmd *cmd) return sbc_parse_cdb(cmd, &fd_sbc_ops); } -DEF_TB_DEFAULT_ATTRIBS(fileio); - -static struct configfs_attribute *fileio_backend_dev_attrs[] = { - &fileio_dev_attrib_emulate_model_alias.attr, - &fileio_dev_attrib_emulate_dpo.attr, - &fileio_dev_attrib_emulate_fua_write.attr, - &fileio_dev_attrib_emulate_fua_read.attr, - &fileio_dev_attrib_emulate_write_cache.attr, - &fileio_dev_attrib_emulate_ua_intlck_ctrl.attr, - &fileio_dev_attrib_emulate_tas.attr, - &fileio_dev_attrib_emulate_tpu.attr, - &fileio_dev_attrib_emulate_tpws.attr, - &fileio_dev_attrib_emulate_caw.attr, - &fileio_dev_attrib_emulate_3pc.attr, - &fileio_dev_attrib_pi_prot_type.attr, - &fileio_dev_attrib_hw_pi_prot_type.attr, - &fileio_dev_attrib_pi_prot_format.attr, - &fileio_dev_attrib_enforce_pr_isids.attr, - &fileio_dev_attrib_is_nonrot.attr, - &fileio_dev_attrib_emulate_rest_reord.attr, - &fileio_dev_attrib_force_pr_aptpl.attr, - &fileio_dev_attrib_hw_block_size.attr, - &fileio_dev_attrib_block_size.attr, - &fileio_dev_attrib_hw_max_sectors.attr, - &fileio_dev_attrib_optimal_sectors.attr, - &fileio_dev_attrib_hw_queue_depth.attr, - &fileio_dev_attrib_queue_depth.attr, - &fileio_dev_attrib_max_unmap_lba_count.attr, - &fileio_dev_attrib_max_unmap_block_desc_count.attr, - &fileio_dev_attrib_unmap_granularity.attr, - &fileio_dev_attrib_unmap_granularity_alignment.attr, - &fileio_dev_attrib_max_write_same_len.attr, - NULL, -}; - static const struct target_backend_ops fileio_ops = { .name = "fileio", .inquiry_prod = "FILEIO", @@ -895,7 +859,7 @@ static const struct target_backend_ops fileio_ops = { .init_prot = fd_init_prot, .format_prot = fd_format_prot, .free_prot = fd_free_prot, - .tb_dev_attrib_attrs = fileio_backend_dev_attrs, + .tb_dev_attrib_attrs = sbc_attrib_attrs, }; static int __init fileio_module_init(void) diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 79f651fb98fb..6d4738252564 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -41,7 +41,6 @@ #include #include -#include #include "target_core_iblock.h" @@ -858,41 +857,6 @@ static bool iblock_get_write_cache(struct se_device *dev) return q->flush_flags & REQ_FLUSH; } -DEF_TB_DEFAULT_ATTRIBS(iblock); - -static struct configfs_attribute *iblock_backend_dev_attrs[] = { - &iblock_dev_attrib_emulate_model_alias.attr, - &iblock_dev_attrib_emulate_dpo.attr, - &iblock_dev_attrib_emulate_fua_write.attr, - &iblock_dev_attrib_emulate_fua_read.attr, - &iblock_dev_attrib_emulate_write_cache.attr, - &iblock_dev_attrib_emulate_ua_intlck_ctrl.attr, - &iblock_dev_attrib_emulate_tas.attr, - &iblock_dev_attrib_emulate_tpu.attr, - &iblock_dev_attrib_emulate_tpws.attr, - &iblock_dev_attrib_emulate_caw.attr, - &iblock_dev_attrib_emulate_3pc.attr, - &iblock_dev_attrib_pi_prot_type.attr, - &iblock_dev_attrib_hw_pi_prot_type.attr, - &iblock_dev_attrib_pi_prot_format.attr, - &iblock_dev_attrib_enforce_pr_isids.attr, - &iblock_dev_attrib_is_nonrot.attr, - &iblock_dev_attrib_emulate_rest_reord.attr, - &iblock_dev_attrib_force_pr_aptpl.attr, - &iblock_dev_attrib_hw_block_size.attr, - &iblock_dev_attrib_block_size.attr, - &iblock_dev_attrib_hw_max_sectors.attr, - &iblock_dev_attrib_optimal_sectors.attr, - &iblock_dev_attrib_hw_queue_depth.attr, - &iblock_dev_attrib_queue_depth.attr, - &iblock_dev_attrib_max_unmap_lba_count.attr, - &iblock_dev_attrib_max_unmap_block_desc_count.attr, - &iblock_dev_attrib_unmap_granularity.attr, - &iblock_dev_attrib_unmap_granularity_alignment.attr, - &iblock_dev_attrib_max_write_same_len.attr, - NULL, -}; - static const struct target_backend_ops iblock_ops = { .name = "iblock", .inquiry_prod = "IBLOCK", @@ -913,7 +877,7 @@ static const struct target_backend_ops iblock_ops = { .get_io_min = iblock_get_io_min, .get_io_opt = iblock_get_io_opt, .get_write_cache = iblock_get_write_cache, - .tb_dev_attrib_attrs = iblock_backend_dev_attrs, + .tb_dev_attrib_attrs = sbc_attrib_attrs, }; static int __init iblock_module_init(void) diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 2e3a0b004f9a..afb87a8d5668 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -44,7 +44,6 @@ #include #include -#include #include "target_core_alua.h" #include "target_core_internal.h" @@ -1119,26 +1118,6 @@ static void pscsi_req_done(struct request *req, int uptodate) kfree(pt); } -DEF_TB_DEV_ATTRIB_RO(pscsi, hw_pi_prot_type); -TB_DEV_ATTR_RO(pscsi, hw_pi_prot_type); - -DEF_TB_DEV_ATTRIB_RO(pscsi, hw_block_size); -TB_DEV_ATTR_RO(pscsi, hw_block_size); - -DEF_TB_DEV_ATTRIB_RO(pscsi, hw_max_sectors); -TB_DEV_ATTR_RO(pscsi, hw_max_sectors); - -DEF_TB_DEV_ATTRIB_RO(pscsi, hw_queue_depth); -TB_DEV_ATTR_RO(pscsi, hw_queue_depth); - -static struct configfs_attribute *pscsi_backend_dev_attrs[] = { - &pscsi_dev_attrib_hw_pi_prot_type.attr, - &pscsi_dev_attrib_hw_block_size.attr, - &pscsi_dev_attrib_hw_max_sectors.attr, - &pscsi_dev_attrib_hw_queue_depth.attr, - NULL, -}; - static const struct target_backend_ops pscsi_ops = { .name = "pscsi", .owner = THIS_MODULE, @@ -1155,7 +1134,7 @@ static const struct target_backend_ops pscsi_ops = { .show_configfs_dev_params = pscsi_show_configfs_dev_params, .get_device_type = pscsi_get_device_type, .get_blocks = pscsi_get_blocks, - .tb_dev_attrib_attrs = pscsi_backend_dev_attrs, + .tb_dev_attrib_attrs = passthrough_attrib_attrs, }; static int __init pscsi_module_init(void) diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index cf443a8a3cbe..55dd73e7f213 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -34,7 +34,6 @@ #include #include -#include #include "target_core_rd.h" @@ -696,41 +695,6 @@ rd_parse_cdb(struct se_cmd *cmd) return sbc_parse_cdb(cmd, &rd_sbc_ops); } -DEF_TB_DEFAULT_ATTRIBS(rd_mcp); - -static struct configfs_attribute *rd_mcp_backend_dev_attrs[] = { - &rd_mcp_dev_attrib_emulate_model_alias.attr, - &rd_mcp_dev_attrib_emulate_dpo.attr, - &rd_mcp_dev_attrib_emulate_fua_write.attr, - &rd_mcp_dev_attrib_emulate_fua_read.attr, - &rd_mcp_dev_attrib_emulate_write_cache.attr, - &rd_mcp_dev_attrib_emulate_ua_intlck_ctrl.attr, - &rd_mcp_dev_attrib_emulate_tas.attr, - &rd_mcp_dev_attrib_emulate_tpu.attr, - &rd_mcp_dev_attrib_emulate_tpws.attr, - &rd_mcp_dev_attrib_emulate_caw.attr, - &rd_mcp_dev_attrib_emulate_3pc.attr, - &rd_mcp_dev_attrib_pi_prot_type.attr, - &rd_mcp_dev_attrib_hw_pi_prot_type.attr, - &rd_mcp_dev_attrib_pi_prot_format.attr, - &rd_mcp_dev_attrib_enforce_pr_isids.attr, - &rd_mcp_dev_attrib_is_nonrot.attr, - &rd_mcp_dev_attrib_emulate_rest_reord.attr, - &rd_mcp_dev_attrib_force_pr_aptpl.attr, - &rd_mcp_dev_attrib_hw_block_size.attr, - &rd_mcp_dev_attrib_block_size.attr, - &rd_mcp_dev_attrib_hw_max_sectors.attr, - &rd_mcp_dev_attrib_optimal_sectors.attr, - &rd_mcp_dev_attrib_hw_queue_depth.attr, - &rd_mcp_dev_attrib_queue_depth.attr, - &rd_mcp_dev_attrib_max_unmap_lba_count.attr, - &rd_mcp_dev_attrib_max_unmap_block_desc_count.attr, - &rd_mcp_dev_attrib_unmap_granularity.attr, - &rd_mcp_dev_attrib_unmap_granularity_alignment.attr, - &rd_mcp_dev_attrib_max_write_same_len.attr, - NULL, -}; - static const struct target_backend_ops rd_mcp_ops = { .name = "rd_mcp", .inquiry_prod = "RAMDISK-MCP", @@ -747,7 +711,7 @@ static const struct target_backend_ops rd_mcp_ops = { .get_blocks = rd_get_blocks, .init_prot = rd_init_prot, .free_prot = rd_free_prot, - .tb_dev_attrib_attrs = rd_mcp_backend_dev_attrs, + .tb_dev_attrib_attrs = sbc_attrib_attrs, }; int __init rd_module_init(void) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index fb67e313dc4f..aebaad55e23f 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -29,7 +29,6 @@ #include #include #include -#include #include @@ -1095,26 +1094,6 @@ tcmu_parse_cdb(struct se_cmd *cmd) return passthrough_parse_cdb(cmd, tcmu_pass_op); } -DEF_TB_DEV_ATTRIB_RO(tcmu, hw_pi_prot_type); -TB_DEV_ATTR_RO(tcmu, hw_pi_prot_type); - -DEF_TB_DEV_ATTRIB_RO(tcmu, hw_block_size); -TB_DEV_ATTR_RO(tcmu, hw_block_size); - -DEF_TB_DEV_ATTRIB_RO(tcmu, hw_max_sectors); -TB_DEV_ATTR_RO(tcmu, hw_max_sectors); - -DEF_TB_DEV_ATTRIB_RO(tcmu, hw_queue_depth); -TB_DEV_ATTR_RO(tcmu, hw_queue_depth); - -static struct configfs_attribute *tcmu_backend_dev_attrs[] = { - &tcmu_dev_attrib_hw_pi_prot_type.attr, - &tcmu_dev_attrib_hw_block_size.attr, - &tcmu_dev_attrib_hw_max_sectors.attr, - &tcmu_dev_attrib_hw_queue_depth.attr, - NULL, -}; - static const struct target_backend_ops tcmu_ops = { .name = "user", .inquiry_prod = "USER", @@ -1131,7 +1110,7 @@ static const struct target_backend_ops tcmu_ops = { .show_configfs_dev_params = tcmu_show_configfs_dev_params, .get_device_type = sbc_get_device_type, .get_blocks = tcmu_get_blocks, - .tb_dev_attrib_attrs = tcmu_backend_dev_attrs, + .tb_dev_attrib_attrs = passthrough_attrib_attrs, }; static int __init tcmu_module_init(void) diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 514b52019380..2d5610516494 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -82,6 +82,9 @@ int transport_set_vpd_assoc(struct t10_vpd *, unsigned char *); int transport_set_vpd_ident_type(struct t10_vpd *, unsigned char *); int transport_set_vpd_ident(struct t10_vpd *, unsigned char *); +extern struct configfs_attribute *sbc_attrib_attrs[]; +extern struct configfs_attribute *passthrough_attrib_attrs[]; + /* core helpers also used by command snooping in pscsi */ void *transport_kmap_data_sg(struct se_cmd *); void transport_kunmap_data_sg(struct se_cmd *); diff --git a/include/target/target_core_backend_configfs.h b/include/target/target_core_backend_configfs.h deleted file mode 100644 index 186f7a923570..000000000000 --- a/include/target/target_core_backend_configfs.h +++ /dev/null @@ -1,118 +0,0 @@ -#ifndef TARGET_CORE_BACKEND_CONFIGFS_H -#define TARGET_CORE_BACKEND_CONFIGFS_H - -#include - -#define DEF_TB_DEV_ATTRIB_SHOW(_backend, _name) \ -static ssize_t _backend##_dev_show_attr_##_name( \ - struct se_dev_attrib *da, \ - char *page) \ -{ \ - return snprintf(page, PAGE_SIZE, "%u\n", \ - (u32)da->da_dev->dev_attrib._name); \ -} - -#define DEF_TB_DEV_ATTRIB_STORE(_backend, _name) \ -static ssize_t _backend##_dev_store_attr_##_name( \ - struct se_dev_attrib *da, \ - const char *page, \ - size_t count) \ -{ \ - unsigned long val; \ - int ret; \ - \ - ret = kstrtoul(page, 0, &val); \ - if (ret < 0) { \ - pr_err("kstrtoul() failed with ret: %d\n", ret); \ - return -EINVAL; \ - } \ - ret = se_dev_set_##_name(da->da_dev, (u32)val); \ - \ - return (!ret) ? count : -EINVAL; \ -} - -#define DEF_TB_DEV_ATTRIB(_backend, _name) \ -DEF_TB_DEV_ATTRIB_SHOW(_backend, _name); \ -DEF_TB_DEV_ATTRIB_STORE(_backend, _name); - -#define DEF_TB_DEV_ATTRIB_RO(_backend, name) \ -DEF_TB_DEV_ATTRIB_SHOW(_backend, name); - -CONFIGFS_EATTR_STRUCT(target_backend_dev_attrib, se_dev_attrib); -#define TB_DEV_ATTR(_backend, _name, _mode) \ -static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \ - __CONFIGFS_EATTR(_name, _mode, \ - _backend##_dev_show_attr_##_name, \ - _backend##_dev_store_attr_##_name); - -#define TB_DEV_ATTR_RO(_backend, _name) \ -static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \ - __CONFIGFS_EATTR_RO(_name, \ - _backend##_dev_show_attr_##_name); - -/* - * Default list of target backend device attributes as defined by - * struct se_dev_attrib - */ - -#define DEF_TB_DEFAULT_ATTRIBS(_backend) \ - DEF_TB_DEV_ATTRIB(_backend, emulate_model_alias); \ - TB_DEV_ATTR(_backend, emulate_model_alias, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_dpo); \ - TB_DEV_ATTR(_backend, emulate_dpo, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_fua_write); \ - TB_DEV_ATTR(_backend, emulate_fua_write, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_fua_read); \ - TB_DEV_ATTR(_backend, emulate_fua_read, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_write_cache); \ - TB_DEV_ATTR(_backend, emulate_write_cache, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_ua_intlck_ctrl); \ - TB_DEV_ATTR(_backend, emulate_ua_intlck_ctrl, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_tas); \ - TB_DEV_ATTR(_backend, emulate_tas, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_tpu); \ - TB_DEV_ATTR(_backend, emulate_tpu, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_tpws); \ - TB_DEV_ATTR(_backend, emulate_tpws, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_caw); \ - TB_DEV_ATTR(_backend, emulate_caw, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_3pc); \ - TB_DEV_ATTR(_backend, emulate_3pc, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, pi_prot_type); \ - TB_DEV_ATTR(_backend, pi_prot_type, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB_RO(_backend, hw_pi_prot_type); \ - TB_DEV_ATTR_RO(_backend, hw_pi_prot_type); \ - DEF_TB_DEV_ATTRIB(_backend, pi_prot_format); \ - TB_DEV_ATTR(_backend, pi_prot_format, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, enforce_pr_isids); \ - TB_DEV_ATTR(_backend, enforce_pr_isids, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, is_nonrot); \ - TB_DEV_ATTR(_backend, is_nonrot, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_rest_reord); \ - TB_DEV_ATTR(_backend, emulate_rest_reord, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, force_pr_aptpl); \ - TB_DEV_ATTR(_backend, force_pr_aptpl, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB_RO(_backend, hw_block_size); \ - TB_DEV_ATTR_RO(_backend, hw_block_size); \ - DEF_TB_DEV_ATTRIB(_backend, block_size); \ - TB_DEV_ATTR(_backend, block_size, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB_RO(_backend, hw_max_sectors); \ - TB_DEV_ATTR_RO(_backend, hw_max_sectors); \ - DEF_TB_DEV_ATTRIB(_backend, optimal_sectors); \ - TB_DEV_ATTR(_backend, optimal_sectors, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB_RO(_backend, hw_queue_depth); \ - TB_DEV_ATTR_RO(_backend, hw_queue_depth); \ - DEF_TB_DEV_ATTRIB(_backend, queue_depth); \ - TB_DEV_ATTR(_backend, queue_depth, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, max_unmap_lba_count); \ - TB_DEV_ATTR(_backend, max_unmap_lba_count, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, max_unmap_block_desc_count); \ - TB_DEV_ATTR(_backend, max_unmap_block_desc_count, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, unmap_granularity); \ - TB_DEV_ATTR(_backend, unmap_granularity, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, unmap_granularity_alignment); \ - TB_DEV_ATTR(_backend, unmap_granularity_alignment, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, max_write_same_len); \ - TB_DEV_ATTR(_backend, max_write_same_len, S_IRUGO | S_IWUSR); - -#endif /* TARGET_CORE_BACKEND_CONFIGFS_H */ From 3effdb9094fd06b9c61ecef08d610ae90805fd98 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 10 May 2015 18:14:58 +0200 Subject: [PATCH 082/839] target: simplify backend attribute implementation Consolidate the implementation of the backend attributes in a single file and single function per attribute show/store function instead of splitting it into multiple functions in multiple files. Also use the proper strto* helpers for exposed data types, add macros to implement the store methods for the most common data types and share the show methods between the two different attribute implementations. (Fix bogus store_pi_prot_format flag=0 return value - nab) Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_configfs.c | 550 +++++++++++++++++++++++--- drivers/target/target_core_device.c | 504 ----------------------- include/target/target_core_backend.h | 29 -- 3 files changed, 485 insertions(+), 598 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index dbf91f02ee5c..4313eea060ed 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -471,82 +471,507 @@ EXPORT_SYMBOL(target_unregister_template); //############################################################################*/ /* Start functions for struct config_item_type tb_dev_attrib_cit */ -#define DEF_TB_DEV_ATTRIB_SHOW(_backend, _name) \ -static ssize_t _backend##_dev_show_attr_##_name( \ - struct se_dev_attrib *da, \ - char *page) \ +#define DEF_TB_DEV_ATTRIB_SHOW(_name) \ +static ssize_t show_##_name(struct se_dev_attrib *da, char *page) \ { \ - return snprintf(page, PAGE_SIZE, "%u\n", \ - (u32)da->da_dev->dev_attrib._name); \ + return snprintf(page, PAGE_SIZE, "%u\n", da->_name); \ } -#define DEF_TB_DEV_ATTRIB_STORE(_backend, _name) \ -static ssize_t _backend##_dev_store_attr_##_name( \ - struct se_dev_attrib *da, \ - const char *page, \ - size_t count) \ +DEF_TB_DEV_ATTRIB_SHOW(emulate_model_alias); +DEF_TB_DEV_ATTRIB_SHOW(emulate_dpo); +DEF_TB_DEV_ATTRIB_SHOW(emulate_fua_write); +DEF_TB_DEV_ATTRIB_SHOW(emulate_fua_read); +DEF_TB_DEV_ATTRIB_SHOW(emulate_write_cache); +DEF_TB_DEV_ATTRIB_SHOW(emulate_ua_intlck_ctrl); +DEF_TB_DEV_ATTRIB_SHOW(emulate_tas); +DEF_TB_DEV_ATTRIB_SHOW(emulate_tpu); +DEF_TB_DEV_ATTRIB_SHOW(emulate_tpws); +DEF_TB_DEV_ATTRIB_SHOW(emulate_caw); +DEF_TB_DEV_ATTRIB_SHOW(emulate_3pc); +DEF_TB_DEV_ATTRIB_SHOW(pi_prot_type); +DEF_TB_DEV_ATTRIB_SHOW(hw_pi_prot_type); +DEF_TB_DEV_ATTRIB_SHOW(pi_prot_format); +DEF_TB_DEV_ATTRIB_SHOW(enforce_pr_isids); +DEF_TB_DEV_ATTRIB_SHOW(is_nonrot); +DEF_TB_DEV_ATTRIB_SHOW(emulate_rest_reord); +DEF_TB_DEV_ATTRIB_SHOW(force_pr_aptpl); +DEF_TB_DEV_ATTRIB_SHOW(hw_block_size); +DEF_TB_DEV_ATTRIB_SHOW(block_size); +DEF_TB_DEV_ATTRIB_SHOW(hw_max_sectors); +DEF_TB_DEV_ATTRIB_SHOW(optimal_sectors); +DEF_TB_DEV_ATTRIB_SHOW(hw_queue_depth); +DEF_TB_DEV_ATTRIB_SHOW(queue_depth); +DEF_TB_DEV_ATTRIB_SHOW(max_unmap_lba_count); +DEF_TB_DEV_ATTRIB_SHOW(max_unmap_block_desc_count); +DEF_TB_DEV_ATTRIB_SHOW(unmap_granularity); +DEF_TB_DEV_ATTRIB_SHOW(unmap_granularity_alignment); +DEF_TB_DEV_ATTRIB_SHOW(max_write_same_len); + +#define DEF_TB_DEV_ATTRIB_STORE_U32(_name) \ +static ssize_t store_##_name(struct se_dev_attrib *da, const char *page,\ + size_t count) \ { \ - unsigned long val; \ + u32 val; \ int ret; \ \ - ret = kstrtoul(page, 0, &val); \ - if (ret < 0) { \ - pr_err("kstrtoul() failed with ret: %d\n", ret); \ - return -EINVAL; \ - } \ - ret = se_dev_set_##_name(da->da_dev, (u32)val); \ - \ - return (!ret) ? count : -EINVAL; \ + ret = kstrtou32(page, 0, &val); \ + if (ret < 0) \ + return ret; \ + da->_name = val; \ + return count; \ } -#define DEF_TB_DEV_ATTRIB(_backend, _name) \ -DEF_TB_DEV_ATTRIB_SHOW(_backend, _name); \ -DEF_TB_DEV_ATTRIB_STORE(_backend, _name); +DEF_TB_DEV_ATTRIB_STORE_U32(max_unmap_lba_count); +DEF_TB_DEV_ATTRIB_STORE_U32(max_unmap_block_desc_count); +DEF_TB_DEV_ATTRIB_STORE_U32(unmap_granularity); +DEF_TB_DEV_ATTRIB_STORE_U32(unmap_granularity_alignment); +DEF_TB_DEV_ATTRIB_STORE_U32(max_write_same_len); -#define DEF_TB_DEV_ATTRIB_RO(_backend, name) \ -DEF_TB_DEV_ATTRIB_SHOW(_backend, name); +#define DEF_TB_DEV_ATTRIB_STORE_BOOL(_name) \ +static ssize_t store_##_name(struct se_dev_attrib *da, const char *page,\ + size_t count) \ +{ \ + bool flag; \ + int ret; \ + \ + ret = strtobool(page, &flag); \ + if (ret < 0) \ + return ret; \ + da->_name = flag; \ + return count; \ +} + +DEF_TB_DEV_ATTRIB_STORE_BOOL(emulate_fua_write); +DEF_TB_DEV_ATTRIB_STORE_BOOL(emulate_caw); +DEF_TB_DEV_ATTRIB_STORE_BOOL(emulate_3pc); +DEF_TB_DEV_ATTRIB_STORE_BOOL(enforce_pr_isids); +DEF_TB_DEV_ATTRIB_STORE_BOOL(is_nonrot); + +#define DEF_TB_DEV_ATTRIB_STORE_STUB(_name) \ +static ssize_t store_##_name(struct se_dev_attrib *da, const char *page,\ + size_t count) \ +{ \ + printk_once(KERN_WARNING \ + "ignoring deprecated ##_name## attribute\n"); \ + return count; \ +} + +DEF_TB_DEV_ATTRIB_STORE_STUB(emulate_dpo); +DEF_TB_DEV_ATTRIB_STORE_STUB(emulate_fua_read); + +static void dev_set_t10_wwn_model_alias(struct se_device *dev) +{ + const char *configname; + + configname = config_item_name(&dev->dev_group.cg_item); + if (strlen(configname) >= 16) { + pr_warn("dev[%p]: Backstore name '%s' is too long for " + "INQUIRY_MODEL, truncating to 16 bytes\n", dev, + configname); + } + snprintf(&dev->t10_wwn.model[0], 16, "%s", configname); +} + +static ssize_t store_emulate_model_alias(struct se_dev_attrib *da, + const char *page, size_t count) +{ + struct se_device *dev = da->da_dev; + bool flag; + int ret; + + if (dev->export_count) { + pr_err("dev[%p]: Unable to change model alias" + " while export_count is %d\n", + dev, dev->export_count); + return -EINVAL; + } + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + if (flag) { + dev_set_t10_wwn_model_alias(dev); + } else { + strncpy(&dev->t10_wwn.model[0], + dev->transport->inquiry_prod, 16); + } + da->emulate_model_alias = flag; + return count; +} + +static ssize_t store_emulate_write_cache(struct se_dev_attrib *da, + const char *page, size_t count) +{ + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + if (flag && da->da_dev->transport->get_write_cache) { + pr_err("emulate_write_cache not supported for this device\n"); + return -EINVAL; + } + + da->emulate_write_cache = flag; + pr_debug("dev[%p]: SE Device WRITE_CACHE_EMULATION flag: %d\n", + da->da_dev, flag); + return count; +} + +static ssize_t store_emulate_ua_intlck_ctrl(struct se_dev_attrib *da, + const char *page, size_t count) +{ + u32 val; + int ret; + + ret = kstrtou32(page, 0, &val); + if (ret < 0) + return ret; + + if (val != 0 && val != 1 && val != 2) { + pr_err("Illegal value %d\n", val); + return -EINVAL; + } + + if (da->da_dev->export_count) { + pr_err("dev[%p]: Unable to change SE Device" + " UA_INTRLCK_CTRL while export_count is %d\n", + da->da_dev, da->da_dev->export_count); + return -EINVAL; + } + da->emulate_ua_intlck_ctrl = val; + pr_debug("dev[%p]: SE Device UA_INTRLCK_CTRL flag: %d\n", + da->da_dev, val); + return count; +} + +static ssize_t store_emulate_tas(struct se_dev_attrib *da, + const char *page, size_t count) +{ + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + if (da->da_dev->export_count) { + pr_err("dev[%p]: Unable to change SE Device TAS while" + " export_count is %d\n", + da->da_dev, da->da_dev->export_count); + return -EINVAL; + } + da->emulate_tas = flag; + pr_debug("dev[%p]: SE Device TASK_ABORTED status bit: %s\n", + da->da_dev, flag ? "Enabled" : "Disabled"); + + return count; +} + +static ssize_t store_emulate_tpu(struct se_dev_attrib *da, + const char *page, size_t count) +{ + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + /* + * We expect this value to be non-zero when generic Block Layer + * Discard supported is detected iblock_create_virtdevice(). + */ + if (flag && !da->max_unmap_block_desc_count) { + pr_err("Generic Block Discard not supported\n"); + return -ENOSYS; + } + + da->emulate_tpu = flag; + pr_debug("dev[%p]: SE Device Thin Provisioning UNMAP bit: %d\n", + da->da_dev, flag); + return count; +} + +static ssize_t store_emulate_tpws(struct se_dev_attrib *da, + const char *page, size_t count) +{ + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + /* + * We expect this value to be non-zero when generic Block Layer + * Discard supported is detected iblock_create_virtdevice(). + */ + if (flag && !da->max_unmap_block_desc_count) { + pr_err("Generic Block Discard not supported\n"); + return -ENOSYS; + } + + da->emulate_tpws = flag; + pr_debug("dev[%p]: SE Device Thin Provisioning WRITE_SAME: %d\n", + da->da_dev, flag); + return count; +} + +static ssize_t store_pi_prot_type(struct se_dev_attrib *da, + const char *page, size_t count) +{ + int old_prot = da->pi_prot_type, ret; + struct se_device *dev = da->da_dev; + u32 flag; + + ret = kstrtou32(page, 0, &flag); + if (ret < 0) + return ret; + + if (flag != 0 && flag != 1 && flag != 2 && flag != 3) { + pr_err("Illegal value %d for pi_prot_type\n", flag); + return -EINVAL; + } + if (flag == 2) { + pr_err("DIF TYPE2 protection currently not supported\n"); + return -ENOSYS; + } + if (da->hw_pi_prot_type) { + pr_warn("DIF protection enabled on underlying hardware," + " ignoring\n"); + return count; + } + if (!dev->transport->init_prot || !dev->transport->free_prot) { + /* 0 is only allowed value for non-supporting backends */ + if (flag == 0) + return 0; + + pr_err("DIF protection not supported by backend: %s\n", + dev->transport->name); + return -ENOSYS; + } + if (!(dev->dev_flags & DF_CONFIGURED)) { + pr_err("DIF protection requires device to be configured\n"); + return -ENODEV; + } + if (dev->export_count) { + pr_err("dev[%p]: Unable to change SE Device PROT type while" + " export_count is %d\n", dev, dev->export_count); + return -EINVAL; + } + + da->pi_prot_type = flag; + + if (flag && !old_prot) { + ret = dev->transport->init_prot(dev); + if (ret) { + da->pi_prot_type = old_prot; + return ret; + } + + } else if (!flag && old_prot) { + dev->transport->free_prot(dev); + } + + pr_debug("dev[%p]: SE Device Protection Type: %d\n", dev, flag); + return count; +} + +static ssize_t store_pi_prot_format(struct se_dev_attrib *da, + const char *page, size_t count) +{ + struct se_device *dev = da->da_dev; + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + if (!flag) + return count; + + if (!dev->transport->format_prot) { + pr_err("DIF protection format not supported by backend %s\n", + dev->transport->name); + return -ENOSYS; + } + if (!(dev->dev_flags & DF_CONFIGURED)) { + pr_err("DIF protection format requires device to be configured\n"); + return -ENODEV; + } + if (dev->export_count) { + pr_err("dev[%p]: Unable to format SE Device PROT type while" + " export_count is %d\n", dev, dev->export_count); + return -EINVAL; + } + + ret = dev->transport->format_prot(dev); + if (ret) + return ret; + + pr_debug("dev[%p]: SE Device Protection Format complete\n", dev); + return count; +} + +static ssize_t store_force_pr_aptpl(struct se_dev_attrib *da, + const char *page, size_t count) +{ + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + if (da->da_dev->export_count) { + pr_err("dev[%p]: Unable to set force_pr_aptpl while" + " export_count is %d\n", + da->da_dev, da->da_dev->export_count); + return -EINVAL; + } + + da->force_pr_aptpl = flag; + pr_debug("dev[%p]: SE Device force_pr_aptpl: %d\n", da->da_dev, flag); + return count; +} + +static ssize_t store_emulate_rest_reord(struct se_dev_attrib *da, + const char *page, size_t count) +{ + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + if (flag != 0) { + printk(KERN_ERR "dev[%p]: SE Device emulation of restricted" + " reordering not implemented\n", da->da_dev); + return -ENOSYS; + } + da->emulate_rest_reord = flag; + pr_debug("dev[%p]: SE Device emulate_rest_reord: %d\n", + da->da_dev, flag); + return count; +} + +/* + * Note, this can only be called on unexported SE Device Object. + */ +static ssize_t store_queue_depth(struct se_dev_attrib *da, + const char *page, size_t count) +{ + struct se_device *dev = da->da_dev; + u32 val; + int ret; + + ret = kstrtou32(page, 0, &val); + if (ret < 0) + return ret; + + if (dev->export_count) { + pr_err("dev[%p]: Unable to change SE Device TCQ while" + " export_count is %d\n", + dev, dev->export_count); + return -EINVAL; + } + if (!val) { + pr_err("dev[%p]: Illegal ZERO value for queue_depth\n", dev); + return -EINVAL; + } + + if (val > dev->dev_attrib.queue_depth) { + if (val > dev->dev_attrib.hw_queue_depth) { + pr_err("dev[%p]: Passed queue_depth:" + " %u exceeds TCM/SE_Device MAX" + " TCQ: %u\n", dev, val, + dev->dev_attrib.hw_queue_depth); + return -EINVAL; + } + } + da->queue_depth = dev->queue_depth = val; + pr_debug("dev[%p]: SE Device TCQ Depth changed to: %u\n", dev, val); + return count; +} + +static ssize_t store_optimal_sectors(struct se_dev_attrib *da, + const char *page, size_t count) +{ + u32 val; + int ret; + + ret = kstrtou32(page, 0, &val); + if (ret < 0) + return ret; + + if (da->da_dev->export_count) { + pr_err("dev[%p]: Unable to change SE Device" + " optimal_sectors while export_count is %d\n", + da->da_dev, da->da_dev->export_count); + return -EINVAL; + } + if (val > da->hw_max_sectors) { + pr_err("dev[%p]: Passed optimal_sectors %u cannot be" + " greater than hw_max_sectors: %u\n", + da->da_dev, val, da->hw_max_sectors); + return -EINVAL; + } + + da->optimal_sectors = val; + pr_debug("dev[%p]: SE Device optimal_sectors changed to %u\n", + da->da_dev, val); + return count; +} + +static ssize_t store_block_size(struct se_dev_attrib *da, + const char *page, size_t count) +{ + u32 val; + int ret; + + ret = kstrtou32(page, 0, &val); + if (ret < 0) + return ret; + + if (da->da_dev->export_count) { + pr_err("dev[%p]: Unable to change SE Device block_size" + " while export_count is %d\n", + da->da_dev, da->da_dev->export_count); + return -EINVAL; + } + + if (val != 512 && val != 1024 && val != 2048 && val != 4096) { + pr_err("dev[%p]: Illegal value for block_device: %u" + " for SE device, must be 512, 1024, 2048 or 4096\n", + da->da_dev, val); + return -EINVAL; + } + + da->block_size = val; + if (da->max_bytes_per_io) + da->hw_max_sectors = da->max_bytes_per_io / val; + + pr_debug("dev[%p]: SE Device block_size changed to %u\n", + da->da_dev, val); + return count; +} CONFIGFS_EATTR_STRUCT(target_backend_dev_attrib, se_dev_attrib); #define TB_DEV_ATTR(_backend, _name, _mode) \ static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \ - __CONFIGFS_EATTR(_name, _mode, \ - _backend##_dev_show_attr_##_name, \ - _backend##_dev_store_attr_##_name); + __CONFIGFS_EATTR(_name, _mode, \ + show_##_name, \ + store_##_name); -#define TB_DEV_ATTR_RO(_backend, _name) \ +#define TB_DEV_ATTR_RO(_backend, _name) \ static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \ __CONFIGFS_EATTR_RO(_name, \ - _backend##_dev_show_attr_##_name); - -DEF_TB_DEV_ATTRIB(target_core, emulate_model_alias); -DEF_TB_DEV_ATTRIB(target_core, emulate_dpo); -DEF_TB_DEV_ATTRIB(target_core, emulate_fua_write); -DEF_TB_DEV_ATTRIB(target_core, emulate_fua_read); -DEF_TB_DEV_ATTRIB(target_core, emulate_write_cache); -DEF_TB_DEV_ATTRIB(target_core, emulate_ua_intlck_ctrl); -DEF_TB_DEV_ATTRIB(target_core, emulate_tas); -DEF_TB_DEV_ATTRIB(target_core, emulate_tpu); -DEF_TB_DEV_ATTRIB(target_core, emulate_tpws); -DEF_TB_DEV_ATTRIB(target_core, emulate_caw); -DEF_TB_DEV_ATTRIB(target_core, emulate_3pc); -DEF_TB_DEV_ATTRIB(target_core, pi_prot_type); -DEF_TB_DEV_ATTRIB_RO(target_core, hw_pi_prot_type); -DEF_TB_DEV_ATTRIB(target_core, pi_prot_format); -DEF_TB_DEV_ATTRIB(target_core, enforce_pr_isids); -DEF_TB_DEV_ATTRIB(target_core, is_nonrot); -DEF_TB_DEV_ATTRIB(target_core, emulate_rest_reord); -DEF_TB_DEV_ATTRIB(target_core, force_pr_aptpl); -DEF_TB_DEV_ATTRIB_RO(target_core, hw_block_size); -DEF_TB_DEV_ATTRIB(target_core, block_size); -DEF_TB_DEV_ATTRIB_RO(target_core, hw_max_sectors); -DEF_TB_DEV_ATTRIB(target_core, optimal_sectors); -DEF_TB_DEV_ATTRIB_RO(target_core, hw_queue_depth); -DEF_TB_DEV_ATTRIB(target_core, queue_depth); -DEF_TB_DEV_ATTRIB(target_core, max_unmap_lba_count); -DEF_TB_DEV_ATTRIB(target_core, max_unmap_block_desc_count); -DEF_TB_DEV_ATTRIB(target_core, unmap_granularity); -DEF_TB_DEV_ATTRIB(target_core, unmap_granularity_alignment); -DEF_TB_DEV_ATTRIB(target_core, max_write_same_len); + show_##_name); TB_DEV_ATTR(target_core, emulate_model_alias, S_IRUGO | S_IWUSR); TB_DEV_ATTR(target_core, emulate_dpo, S_IRUGO | S_IWUSR); @@ -620,11 +1045,6 @@ struct configfs_attribute *sbc_attrib_attrs[] = { }; EXPORT_SYMBOL(sbc_attrib_attrs); -DEF_TB_DEV_ATTRIB_RO(target_pt, hw_pi_prot_type); -DEF_TB_DEV_ATTRIB_RO(target_pt, hw_block_size); -DEF_TB_DEV_ATTRIB_RO(target_pt, hw_max_sectors); -DEF_TB_DEV_ATTRIB_RO(target_pt, hw_queue_depth); - TB_DEV_ATTR_RO(target_pt, hw_pi_prot_type); TB_DEV_ATTR_RO(target_pt, hw_block_size); TB_DEV_ATTR_RO(target_pt, hw_max_sectors); diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index af906f1caaa7..22f1e180d8ee 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -665,510 +665,6 @@ bool se_dev_check_wce(struct se_device *dev) return wce; } -int se_dev_set_max_unmap_lba_count( - struct se_device *dev, - u32 max_unmap_lba_count) -{ - dev->dev_attrib.max_unmap_lba_count = max_unmap_lba_count; - pr_debug("dev[%p]: Set max_unmap_lba_count: %u\n", - dev, dev->dev_attrib.max_unmap_lba_count); - return 0; -} -EXPORT_SYMBOL(se_dev_set_max_unmap_lba_count); - -int se_dev_set_max_unmap_block_desc_count( - struct se_device *dev, - u32 max_unmap_block_desc_count) -{ - dev->dev_attrib.max_unmap_block_desc_count = - max_unmap_block_desc_count; - pr_debug("dev[%p]: Set max_unmap_block_desc_count: %u\n", - dev, dev->dev_attrib.max_unmap_block_desc_count); - return 0; -} -EXPORT_SYMBOL(se_dev_set_max_unmap_block_desc_count); - -int se_dev_set_unmap_granularity( - struct se_device *dev, - u32 unmap_granularity) -{ - dev->dev_attrib.unmap_granularity = unmap_granularity; - pr_debug("dev[%p]: Set unmap_granularity: %u\n", - dev, dev->dev_attrib.unmap_granularity); - return 0; -} -EXPORT_SYMBOL(se_dev_set_unmap_granularity); - -int se_dev_set_unmap_granularity_alignment( - struct se_device *dev, - u32 unmap_granularity_alignment) -{ - dev->dev_attrib.unmap_granularity_alignment = unmap_granularity_alignment; - pr_debug("dev[%p]: Set unmap_granularity_alignment: %u\n", - dev, dev->dev_attrib.unmap_granularity_alignment); - return 0; -} -EXPORT_SYMBOL(se_dev_set_unmap_granularity_alignment); - -int se_dev_set_max_write_same_len( - struct se_device *dev, - u32 max_write_same_len) -{ - dev->dev_attrib.max_write_same_len = max_write_same_len; - pr_debug("dev[%p]: Set max_write_same_len: %u\n", - dev, dev->dev_attrib.max_write_same_len); - return 0; -} -EXPORT_SYMBOL(se_dev_set_max_write_same_len); - -static void dev_set_t10_wwn_model_alias(struct se_device *dev) -{ - const char *configname; - - configname = config_item_name(&dev->dev_group.cg_item); - if (strlen(configname) >= 16) { - pr_warn("dev[%p]: Backstore name '%s' is too long for " - "INQUIRY_MODEL, truncating to 16 bytes\n", dev, - configname); - } - snprintf(&dev->t10_wwn.model[0], 16, "%s", configname); -} - -int se_dev_set_emulate_model_alias(struct se_device *dev, int flag) -{ - if (dev->export_count) { - pr_err("dev[%p]: Unable to change model alias" - " while export_count is %d\n", - dev, dev->export_count); - return -EINVAL; - } - - if (flag != 0 && flag != 1) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - - if (flag) { - dev_set_t10_wwn_model_alias(dev); - } else { - strncpy(&dev->t10_wwn.model[0], - dev->transport->inquiry_prod, 16); - } - dev->dev_attrib.emulate_model_alias = flag; - - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_model_alias); - -int se_dev_set_emulate_dpo(struct se_device *dev, int flag) -{ - printk_once(KERN_WARNING - "ignoring deprecated emulate_dpo attribute\n"); - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_dpo); - -int se_dev_set_emulate_fua_write(struct se_device *dev, int flag) -{ - if (flag != 0 && flag != 1) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - if (flag && - dev->transport->get_write_cache) { - pr_warn("emulate_fua_write not supported for this device, ignoring\n"); - return 0; - } - if (dev->export_count) { - pr_err("emulate_fua_write cannot be changed with active" - " exports: %d\n", dev->export_count); - return -EINVAL; - } - dev->dev_attrib.emulate_fua_write = flag; - pr_debug("dev[%p]: SE Device Forced Unit Access WRITEs: %d\n", - dev, dev->dev_attrib.emulate_fua_write); - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_fua_write); - -int se_dev_set_emulate_fua_read(struct se_device *dev, int flag) -{ - printk_once(KERN_WARNING - "ignoring deprecated emulate_fua_read attribute\n"); - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_fua_read); - -int se_dev_set_emulate_write_cache(struct se_device *dev, int flag) -{ - if (flag != 0 && flag != 1) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - if (flag && - dev->transport->get_write_cache) { - pr_err("emulate_write_cache not supported for this device\n"); - return -EINVAL; - } - if (dev->export_count) { - pr_err("emulate_write_cache cannot be changed with active" - " exports: %d\n", dev->export_count); - return -EINVAL; - } - dev->dev_attrib.emulate_write_cache = flag; - pr_debug("dev[%p]: SE Device WRITE_CACHE_EMULATION flag: %d\n", - dev, dev->dev_attrib.emulate_write_cache); - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_write_cache); - -int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *dev, int flag) -{ - if ((flag != 0) && (flag != 1) && (flag != 2)) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - - if (dev->export_count) { - pr_err("dev[%p]: Unable to change SE Device" - " UA_INTRLCK_CTRL while export_count is %d\n", - dev, dev->export_count); - return -EINVAL; - } - dev->dev_attrib.emulate_ua_intlck_ctrl = flag; - pr_debug("dev[%p]: SE Device UA_INTRLCK_CTRL flag: %d\n", - dev, dev->dev_attrib.emulate_ua_intlck_ctrl); - - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_ua_intlck_ctrl); - -int se_dev_set_emulate_tas(struct se_device *dev, int flag) -{ - if ((flag != 0) && (flag != 1)) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - - if (dev->export_count) { - pr_err("dev[%p]: Unable to change SE Device TAS while" - " export_count is %d\n", - dev, dev->export_count); - return -EINVAL; - } - dev->dev_attrib.emulate_tas = flag; - pr_debug("dev[%p]: SE Device TASK_ABORTED status bit: %s\n", - dev, (dev->dev_attrib.emulate_tas) ? "Enabled" : "Disabled"); - - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_tas); - -int se_dev_set_emulate_tpu(struct se_device *dev, int flag) -{ - if ((flag != 0) && (flag != 1)) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - /* - * We expect this value to be non-zero when generic Block Layer - * Discard supported is detected iblock_create_virtdevice(). - */ - if (flag && !dev->dev_attrib.max_unmap_block_desc_count) { - pr_err("Generic Block Discard not supported\n"); - return -ENOSYS; - } - - dev->dev_attrib.emulate_tpu = flag; - pr_debug("dev[%p]: SE Device Thin Provisioning UNMAP bit: %d\n", - dev, flag); - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_tpu); - -int se_dev_set_emulate_tpws(struct se_device *dev, int flag) -{ - if ((flag != 0) && (flag != 1)) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - /* - * We expect this value to be non-zero when generic Block Layer - * Discard supported is detected iblock_create_virtdevice(). - */ - if (flag && !dev->dev_attrib.max_unmap_block_desc_count) { - pr_err("Generic Block Discard not supported\n"); - return -ENOSYS; - } - - dev->dev_attrib.emulate_tpws = flag; - pr_debug("dev[%p]: SE Device Thin Provisioning WRITE_SAME: %d\n", - dev, flag); - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_tpws); - -int se_dev_set_emulate_caw(struct se_device *dev, int flag) -{ - if (flag != 0 && flag != 1) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - dev->dev_attrib.emulate_caw = flag; - pr_debug("dev[%p]: SE Device CompareAndWrite (AtomicTestandSet): %d\n", - dev, flag); - - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_caw); - -int se_dev_set_emulate_3pc(struct se_device *dev, int flag) -{ - if (flag != 0 && flag != 1) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - dev->dev_attrib.emulate_3pc = flag; - pr_debug("dev[%p]: SE Device 3rd Party Copy (EXTENDED_COPY): %d\n", - dev, flag); - - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_3pc); - -int se_dev_set_pi_prot_type(struct se_device *dev, int flag) -{ - int rc, old_prot = dev->dev_attrib.pi_prot_type; - - if (flag != 0 && flag != 1 && flag != 2 && flag != 3) { - pr_err("Illegal value %d for pi_prot_type\n", flag); - return -EINVAL; - } - if (flag == 2) { - pr_err("DIF TYPE2 protection currently not supported\n"); - return -ENOSYS; - } - if (dev->dev_attrib.hw_pi_prot_type) { - pr_warn("DIF protection enabled on underlying hardware," - " ignoring\n"); - return 0; - } - if (!dev->transport->init_prot || !dev->transport->free_prot) { - /* 0 is only allowed value for non-supporting backends */ - if (flag == 0) - return 0; - - pr_err("DIF protection not supported by backend: %s\n", - dev->transport->name); - return -ENOSYS; - } - if (!(dev->dev_flags & DF_CONFIGURED)) { - pr_err("DIF protection requires device to be configured\n"); - return -ENODEV; - } - if (dev->export_count) { - pr_err("dev[%p]: Unable to change SE Device PROT type while" - " export_count is %d\n", dev, dev->export_count); - return -EINVAL; - } - - dev->dev_attrib.pi_prot_type = flag; - - if (flag && !old_prot) { - rc = dev->transport->init_prot(dev); - if (rc) { - dev->dev_attrib.pi_prot_type = old_prot; - return rc; - } - - } else if (!flag && old_prot) { - dev->transport->free_prot(dev); - } - pr_debug("dev[%p]: SE Device Protection Type: %d\n", dev, flag); - - return 0; -} -EXPORT_SYMBOL(se_dev_set_pi_prot_type); - -int se_dev_set_pi_prot_format(struct se_device *dev, int flag) -{ - int rc; - - if (!flag) - return 0; - - if (flag != 1) { - pr_err("Illegal value %d for pi_prot_format\n", flag); - return -EINVAL; - } - if (!dev->transport->format_prot) { - pr_err("DIF protection format not supported by backend %s\n", - dev->transport->name); - return -ENOSYS; - } - if (!(dev->dev_flags & DF_CONFIGURED)) { - pr_err("DIF protection format requires device to be configured\n"); - return -ENODEV; - } - if (dev->export_count) { - pr_err("dev[%p]: Unable to format SE Device PROT type while" - " export_count is %d\n", dev, dev->export_count); - return -EINVAL; - } - - rc = dev->transport->format_prot(dev); - if (rc) - return rc; - - pr_debug("dev[%p]: SE Device Protection Format complete\n", dev); - - return 0; -} -EXPORT_SYMBOL(se_dev_set_pi_prot_format); - -int se_dev_set_enforce_pr_isids(struct se_device *dev, int flag) -{ - if ((flag != 0) && (flag != 1)) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - dev->dev_attrib.enforce_pr_isids = flag; - pr_debug("dev[%p]: SE Device enforce_pr_isids bit: %s\n", dev, - (dev->dev_attrib.enforce_pr_isids) ? "Enabled" : "Disabled"); - return 0; -} -EXPORT_SYMBOL(se_dev_set_enforce_pr_isids); - -int se_dev_set_force_pr_aptpl(struct se_device *dev, int flag) -{ - if ((flag != 0) && (flag != 1)) { - printk(KERN_ERR "Illegal value %d\n", flag); - return -EINVAL; - } - if (dev->export_count) { - pr_err("dev[%p]: Unable to set force_pr_aptpl while" - " export_count is %d\n", dev, dev->export_count); - return -EINVAL; - } - - dev->dev_attrib.force_pr_aptpl = flag; - pr_debug("dev[%p]: SE Device force_pr_aptpl: %d\n", dev, flag); - return 0; -} -EXPORT_SYMBOL(se_dev_set_force_pr_aptpl); - -int se_dev_set_is_nonrot(struct se_device *dev, int flag) -{ - if ((flag != 0) && (flag != 1)) { - printk(KERN_ERR "Illegal value %d\n", flag); - return -EINVAL; - } - dev->dev_attrib.is_nonrot = flag; - pr_debug("dev[%p]: SE Device is_nonrot bit: %d\n", - dev, flag); - return 0; -} -EXPORT_SYMBOL(se_dev_set_is_nonrot); - -int se_dev_set_emulate_rest_reord(struct se_device *dev, int flag) -{ - if (flag != 0) { - printk(KERN_ERR "dev[%p]: SE Device emulation of restricted" - " reordering not implemented\n", dev); - return -ENOSYS; - } - dev->dev_attrib.emulate_rest_reord = flag; - pr_debug("dev[%p]: SE Device emulate_rest_reord: %d\n", dev, flag); - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_rest_reord); - -/* - * Note, this can only be called on unexported SE Device Object. - */ -int se_dev_set_queue_depth(struct se_device *dev, u32 queue_depth) -{ - if (dev->export_count) { - pr_err("dev[%p]: Unable to change SE Device TCQ while" - " export_count is %d\n", - dev, dev->export_count); - return -EINVAL; - } - if (!queue_depth) { - pr_err("dev[%p]: Illegal ZERO value for queue" - "_depth\n", dev); - return -EINVAL; - } - - if (queue_depth > dev->dev_attrib.queue_depth) { - if (queue_depth > dev->dev_attrib.hw_queue_depth) { - pr_err("dev[%p]: Passed queue_depth:" - " %u exceeds TCM/SE_Device MAX" - " TCQ: %u\n", dev, queue_depth, - dev->dev_attrib.hw_queue_depth); - return -EINVAL; - } - } - dev->dev_attrib.queue_depth = dev->queue_depth = queue_depth; - pr_debug("dev[%p]: SE Device TCQ Depth changed to: %u\n", - dev, queue_depth); - return 0; -} -EXPORT_SYMBOL(se_dev_set_queue_depth); - -int se_dev_set_optimal_sectors(struct se_device *dev, u32 optimal_sectors) -{ - if (dev->export_count) { - pr_err("dev[%p]: Unable to change SE Device" - " optimal_sectors while export_count is %d\n", - dev, dev->export_count); - return -EINVAL; - } - if (optimal_sectors > dev->dev_attrib.hw_max_sectors) { - pr_err("dev[%p]: Passed optimal_sectors %u cannot be" - " greater than hw_max_sectors: %u\n", dev, - optimal_sectors, dev->dev_attrib.hw_max_sectors); - return -EINVAL; - } - - dev->dev_attrib.optimal_sectors = optimal_sectors; - pr_debug("dev[%p]: SE Device optimal_sectors changed to %u\n", - dev, optimal_sectors); - return 0; -} -EXPORT_SYMBOL(se_dev_set_optimal_sectors); - -int se_dev_set_block_size(struct se_device *dev, u32 block_size) -{ - if (dev->export_count) { - pr_err("dev[%p]: Unable to change SE Device block_size" - " while export_count is %d\n", - dev, dev->export_count); - return -EINVAL; - } - - if ((block_size != 512) && - (block_size != 1024) && - (block_size != 2048) && - (block_size != 4096)) { - pr_err("dev[%p]: Illegal value for block_device: %u" - " for SE device, must be 512, 1024, 2048 or 4096\n", - dev, block_size); - return -EINVAL; - } - - dev->dev_attrib.block_size = block_size; - pr_debug("dev[%p]: SE Device block_size changed to %u\n", - dev, block_size); - - if (dev->dev_attrib.max_bytes_per_io) - dev->dev_attrib.hw_max_sectors = - dev->dev_attrib.max_bytes_per_io / block_size; - - return 0; -} -EXPORT_SYMBOL(se_dev_set_block_size); - int core_dev_add_lun( struct se_portal_group *tpg, struct se_device *dev, diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 2d5610516494..566c7d27f6d0 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -94,35 +94,6 @@ sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *, struct scatterlist *, u32, struct scatterlist *, u32); bool target_lun_is_rdonly(struct se_cmd *); - -/* attribute helpers from target_core_device.c for backend drivers */ -bool se_dev_check_wce(struct se_device *); -int se_dev_set_max_unmap_lba_count(struct se_device *, u32); -int se_dev_set_max_unmap_block_desc_count(struct se_device *, u32); -int se_dev_set_unmap_granularity(struct se_device *, u32); -int se_dev_set_unmap_granularity_alignment(struct se_device *, u32); -int se_dev_set_max_write_same_len(struct se_device *, u32); -int se_dev_set_emulate_model_alias(struct se_device *, int); -int se_dev_set_emulate_dpo(struct se_device *, int); -int se_dev_set_emulate_fua_write(struct se_device *, int); -int se_dev_set_emulate_fua_read(struct se_device *, int); -int se_dev_set_emulate_write_cache(struct se_device *, int); -int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *, int); -int se_dev_set_emulate_tas(struct se_device *, int); -int se_dev_set_emulate_tpu(struct se_device *, int); -int se_dev_set_emulate_tpws(struct se_device *, int); -int se_dev_set_emulate_caw(struct se_device *, int); -int se_dev_set_emulate_3pc(struct se_device *, int); -int se_dev_set_pi_prot_type(struct se_device *, int); -int se_dev_set_pi_prot_format(struct se_device *, int); -int se_dev_set_enforce_pr_isids(struct se_device *, int); -int se_dev_set_force_pr_aptpl(struct se_device *, int); -int se_dev_set_is_nonrot(struct se_device *, int); -int se_dev_set_emulate_rest_reord(struct se_device *dev, int); -int se_dev_set_queue_depth(struct se_device *, u32); -int se_dev_set_max_sectors(struct se_device *, u32); -int se_dev_set_optimal_sectors(struct se_device *, u32); -int se_dev_set_block_size(struct se_device *, u32); sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd, sense_reason_t (*exec_cmd)(struct se_cmd *cmd)); From b3eeea6619a0ed4f37138661c49339b21361d397 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 19 May 2015 16:16:01 +0200 Subject: [PATCH 083/839] target: Simplify LUN shutdown code Instead of starting a thread from transport_clear_lun_ref() that waits for LUN shutdown, wait in that function for LUN shutdown to finish. Additionally, change the return type of transport_clear_lun_ref() from int to void. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 1 - drivers/target/target_core_internal.h | 2 +- drivers/target/target_core_tpg.c | 2 -- drivers/target/target_core_transport.c | 23 +---------------------- include/target/target_core_base.h | 1 - 5 files changed, 2 insertions(+), 27 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 22f1e180d8ee..1e4485b1849d 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -928,7 +928,6 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) xcopy_lun = &dev->xcopy_lun; xcopy_lun->lun_se_dev = dev; - init_completion(&xcopy_lun->lun_shutdown_comp); spin_lock_init(&xcopy_lun->lun_sep_lock); init_completion(&xcopy_lun->lun_ref_comp); diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 01181aed67dc..23f992ed7cae 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -108,7 +108,7 @@ int transport_dump_vpd_assoc(struct t10_vpd *, unsigned char *, int); int transport_dump_vpd_ident_type(struct t10_vpd *, unsigned char *, int); int transport_dump_vpd_ident(struct t10_vpd *, unsigned char *, int); bool target_stop_cmd(struct se_cmd *cmd, unsigned long *flags); -int transport_clear_lun_ref(struct se_lun *); +void transport_clear_lun_ref(struct se_lun *); void transport_send_task_abort(struct se_cmd *); sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size); void target_qf_do_work(struct work_struct *work); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 91f8ddb6d783..fd531fcf3191 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -494,7 +494,6 @@ static int core_tpg_setup_virtual_lun0(struct se_portal_group *se_tpg) lun->unpacked_lun = 0; atomic_set(&lun->lun_acl_count, 0); - init_completion(&lun->lun_shutdown_comp); spin_lock_init(&lun->lun_sep_lock); init_completion(&lun->lun_ref_comp); @@ -606,7 +605,6 @@ struct se_lun *core_tpg_alloc_lun( lun->unpacked_lun = unpacked_lun; lun->lun_link_magic = SE_LUN_LINK_MAGIC; atomic_set(&lun->lun_acl_count, 0); - init_completion(&lun->lun_shutdown_comp); spin_lock_init(&lun->lun_sep_lock); init_completion(&lun->lun_ref_comp); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 965a308e10a5..7c0518a28186 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2596,31 +2596,10 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) } EXPORT_SYMBOL(target_wait_for_sess_cmds); -static int transport_clear_lun_ref_thread(void *p) +void transport_clear_lun_ref(struct se_lun *lun) { - struct se_lun *lun = p; - percpu_ref_kill(&lun->lun_ref); - wait_for_completion(&lun->lun_ref_comp); - complete(&lun->lun_shutdown_comp); - - return 0; -} - -int transport_clear_lun_ref(struct se_lun *lun) -{ - struct task_struct *kt; - - kt = kthread_run(transport_clear_lun_ref_thread, lun, - "tcm_cl_%u", lun->unpacked_lun); - if (IS_ERR(kt)) { - pr_err("Unable to start clear_lun thread\n"); - return PTR_ERR(kt); - } - wait_for_completion(&lun->lun_shutdown_comp); - - return 0; } /** diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 03e2ee8f8337..ee7abdd6b7a2 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -707,7 +707,6 @@ struct se_lun { u32 lun_index; atomic_t lun_acl_count; spinlock_t lun_sep_lock; - struct completion lun_shutdown_comp; struct se_device *lun_se_dev; struct se_port *lun_sep; struct config_group lun_group; From adf653f92f38e80a78bb77e912d49bcc8055330f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 25 May 2015 21:33:08 -0700 Subject: [PATCH 084/839] target: Subsume se_port + t10_alua_tg_pt_gp_member into se_lun This patch eliminates all se_port + t10_alua_tg_pt_gp_member usage, and converts current users to direct se_lun pointer dereference. This includes the removal of core_export_port(), core_release_port() core_dev_export() and core_dev_unexport(). Along with conversion of special case se_lun pointer dereference within PR ALL_TG_PT=1 and ALUA access state transition UNIT_ATTENTION handling. Also, update core_enable_device_list_for_node() to reference the new per se_lun->lun_deve_list when creating a new entry, or replacing an existing one via RCU. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 400 +++++++------------ drivers/target/target_core_alua.h | 14 +- drivers/target/target_core_configfs.c | 11 +- drivers/target/target_core_device.c | 179 ++------- drivers/target/target_core_fabric_configfs.c | 32 +- drivers/target/target_core_internal.h | 12 +- drivers/target/target_core_pr.c | 56 ++- drivers/target/target_core_spc.c | 48 +-- drivers/target/target_core_stat.c | 299 +++++--------- drivers/target/target_core_tpg.c | 95 +++-- drivers/target/target_core_transport.c | 34 +- drivers/target/target_core_xcopy.c | 17 +- include/target/target_core_base.h | 74 ++-- 13 files changed, 461 insertions(+), 810 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 53cdfabac861..1109c2833fe6 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -43,11 +43,13 @@ static sense_reason_t core_alua_check_transition(int state, int valid, int *primary); static int core_alua_set_tg_pt_secondary_state( - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, - struct se_port *port, int explicit, int offline); + struct se_lun *lun, int explicit, int offline); static char *core_alua_dump_state(int state); +static void __target_attach_tg_pt_gp(struct se_lun *lun, + struct t10_alua_tg_pt_gp *tg_pt_gp); + static u16 alua_lu_gps_counter; static u32 alua_lu_gps_count; @@ -145,9 +147,8 @@ sense_reason_t target_emulate_report_target_port_groups(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - struct se_port *port; struct t10_alua_tg_pt_gp *tg_pt_gp; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; + struct se_lun *lun; unsigned char *buf; u32 rd_len = 0, off; int ext_hdr = (cmd->t_task_cdb[1] & 0x20); @@ -222,9 +223,8 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) rd_len += 8; spin_lock(&tg_pt_gp->tg_pt_gp_lock); - list_for_each_entry(tg_pt_gp_mem, &tg_pt_gp->tg_pt_gp_mem_list, - tg_pt_gp_mem_list) { - port = tg_pt_gp_mem->tg_pt; + list_for_each_entry(lun, &tg_pt_gp->tg_pt_gp_lun_list, + lun_tg_pt_gp_link) { /* * Start Target Port descriptor format * @@ -234,8 +234,8 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) /* * Set RELATIVE TARGET PORT IDENTIFIER */ - buf[off++] = ((port->sep_rtpi >> 8) & 0xff); - buf[off++] = (port->sep_rtpi & 0xff); + buf[off++] = ((lun->lun_rtpi >> 8) & 0xff); + buf[off++] = (lun->lun_rtpi & 0xff); rd_len += 4; } spin_unlock(&tg_pt_gp->tg_pt_gp_lock); @@ -259,15 +259,11 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) * this CDB was received upon to determine this value individually * for ALUA target port group. */ - port = cmd->se_lun->lun_sep; - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - if (tg_pt_gp_mem) { - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; - if (tg_pt_gp) - buf[5] = tg_pt_gp->tg_pt_gp_implicit_trans_secs; - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - } + spin_lock(&cmd->se_lun->lun_tg_pt_gp_lock); + tg_pt_gp = cmd->se_lun->lun_tg_pt_gp; + if (tg_pt_gp) + buf[5] = tg_pt_gp->tg_pt_gp_implicit_trans_secs; + spin_unlock(&cmd->se_lun->lun_tg_pt_gp_lock); } transport_kunmap_data_sg(cmd); @@ -284,10 +280,9 @@ sense_reason_t target_emulate_set_target_port_groups(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - struct se_port *port, *l_port = cmd->se_lun->lun_sep; + struct se_lun *l_lun = cmd->se_lun; struct se_node_acl *nacl = cmd->se_sess->se_node_acl; struct t10_alua_tg_pt_gp *tg_pt_gp = NULL, *l_tg_pt_gp; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, *l_tg_pt_gp_mem; unsigned char *buf; unsigned char *ptr; sense_reason_t rc = TCM_NO_SENSE; @@ -295,9 +290,6 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) int alua_access_state, primary = 0, valid_states; u16 tg_pt_id, rtpi; - if (!l_port) - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - if (cmd->data_length < 4) { pr_warn("SET TARGET PORT GROUPS parameter list length %u too" " small\n", cmd->data_length); @@ -312,29 +304,24 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) * Determine if explicit ALUA via SET_TARGET_PORT_GROUPS is allowed * for the local tg_pt_gp. */ - l_tg_pt_gp_mem = l_port->sep_alua_tg_pt_gp_mem; - if (!l_tg_pt_gp_mem) { - pr_err("Unable to access l_port->sep_alua_tg_pt_gp_mem\n"); - rc = TCM_UNSUPPORTED_SCSI_OPCODE; - goto out; - } - spin_lock(&l_tg_pt_gp_mem->tg_pt_gp_mem_lock); - l_tg_pt_gp = l_tg_pt_gp_mem->tg_pt_gp; + spin_lock(&l_lun->lun_tg_pt_gp_lock); + l_tg_pt_gp = l_lun->lun_tg_pt_gp; if (!l_tg_pt_gp) { - spin_unlock(&l_tg_pt_gp_mem->tg_pt_gp_mem_lock); - pr_err("Unable to access *l_tg_pt_gp_mem->tg_pt_gp\n"); + spin_unlock(&l_lun->lun_tg_pt_gp_lock); + pr_err("Unable to access l_lun->tg_pt_gp\n"); rc = TCM_UNSUPPORTED_SCSI_OPCODE; goto out; } - spin_unlock(&l_tg_pt_gp_mem->tg_pt_gp_mem_lock); if (!(l_tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICIT_ALUA)) { + spin_unlock(&l_lun->lun_tg_pt_gp_lock); pr_debug("Unable to process SET_TARGET_PORT_GROUPS" " while TPGS_EXPLICIT_ALUA is disabled\n"); rc = TCM_UNSUPPORTED_SCSI_OPCODE; goto out; } valid_states = l_tg_pt_gp->tg_pt_gp_alua_supported_states; + spin_unlock(&l_lun->lun_tg_pt_gp_lock); ptr = &buf[4]; /* Skip over RESERVED area in header */ @@ -396,7 +383,7 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) spin_unlock(&dev->t10_alua.tg_pt_gps_lock); if (!core_alua_do_port_transition(tg_pt_gp, - dev, l_port, nacl, + dev, l_lun, nacl, alua_access_state, 1)) found = true; @@ -406,6 +393,8 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) } spin_unlock(&dev->t10_alua.tg_pt_gps_lock); } else { + struct se_lun *lun; + /* * Extract the RELATIVE TARGET PORT IDENTIFIER to identify * the Target Port in question for the the incoming @@ -417,17 +406,16 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) * for the struct se_device storage object. */ spin_lock(&dev->se_port_lock); - list_for_each_entry(port, &dev->dev_sep_list, - sep_list) { - if (port->sep_rtpi != rtpi) + list_for_each_entry(lun, &dev->dev_sep_list, + lun_dev_link) { + if (lun->lun_rtpi != rtpi) continue; - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - + // XXX: racy unlock spin_unlock(&dev->se_port_lock); if (!core_alua_set_tg_pt_secondary_state( - tg_pt_gp_mem, port, 1, 1)) + lun, 1, 1)) found = true; spin_lock(&dev->se_port_lock); @@ -696,9 +684,7 @@ target_alua_state_check(struct se_cmd *cmd) struct se_device *dev = cmd->se_dev; unsigned char *cdb = cmd->t_task_cdb; struct se_lun *lun = cmd->se_lun; - struct se_port *port = lun->lun_sep; struct t10_alua_tg_pt_gp *tg_pt_gp; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; int out_alua_state, nonop_delay_msecs; if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) @@ -706,33 +692,27 @@ target_alua_state_check(struct se_cmd *cmd) if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; - if (!port) - return 0; /* * First, check for a struct se_port specific secondary ALUA target port * access state: OFFLINE */ - if (atomic_read(&port->sep_tg_pt_secondary_offline)) { + if (atomic_read(&lun->lun_tg_pt_secondary_offline)) { pr_debug("ALUA: Got secondary offline status for local" " target port\n"); set_ascq(cmd, ASCQ_04H_ALUA_OFFLINE); return TCM_CHECK_CONDITION_NOT_READY; } - /* - * Second, obtain the struct t10_alua_tg_pt_gp_member pointer to the - * ALUA target port group, to obtain current ALUA access state. - * Otherwise look for the underlying struct se_device association with - * a ALUA logical unit group. - */ - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - if (!tg_pt_gp_mem) + + if (!lun->lun_tg_pt_gp) return 0; - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; + spin_lock(&lun->lun_tg_pt_gp_lock); + tg_pt_gp = lun->lun_tg_pt_gp; out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs; - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + + // XXX: keeps using tg_pt_gp witout reference after unlock + spin_unlock(&lun->lun_tg_pt_gp_lock); /* * Process ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED in a separate conditional * statement so the compiler knows explicitly to check this case first. @@ -764,7 +744,7 @@ target_alua_state_check(struct se_cmd *cmd) break; /* * OFFLINE is a secondary ALUA target port group access state, that is - * handled above with struct se_port->sep_tg_pt_secondary_offline=1 + * handled above with struct se_lun->lun_tg_pt_secondary_offline=1 */ case ALUA_ACCESS_STATE_OFFLINE: default: @@ -906,10 +886,6 @@ int core_alua_check_nonop_delay( } EXPORT_SYMBOL(core_alua_check_nonop_delay); -/* - * Called with tg_pt_gp->tg_pt_gp_md_mutex or tg_pt_gp_mem->sep_tg_pt_md_mutex - * - */ static int core_alua_write_tpg_metadata( const char *path, unsigned char *md_buf, @@ -971,16 +947,14 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work) struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work); struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; struct se_dev_entry *se_deve; + struct se_lun *lun; struct se_lun_acl *lacl; - struct se_port *port; - struct t10_alua_tg_pt_gp_member *mem; bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status == ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG); spin_lock(&tg_pt_gp->tg_pt_gp_lock); - list_for_each_entry(mem, &tg_pt_gp->tg_pt_gp_mem_list, - tg_pt_gp_mem_list) { - port = mem->tg_pt; + list_for_each_entry(lun, &tg_pt_gp->tg_pt_gp_lun_list, + lun_tg_pt_gp_link) { /* * After an implicit target port asymmetric access state * change, a device server shall establish a unit attention @@ -995,14 +969,13 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work) * every I_T nexus other than the I_T nexus on which the SET * TARGET PORT GROUPS command */ - atomic_inc_mb(&mem->tg_pt_gp_mem_ref_cnt); + atomic_inc_mb(&lun->lun_active); spin_unlock(&tg_pt_gp->tg_pt_gp_lock); - spin_lock_bh(&port->sep_alua_lock); - list_for_each_entry(se_deve, &port->sep_alua_list, - alua_port_list) { + spin_lock_bh(&lun->lun_deve_lock); + list_for_each_entry(se_deve, &lun->lun_deve_list, lun_link) { lacl = rcu_dereference_check(se_deve->se_lun_acl, - lockdep_is_held(&port->sep_alua_lock)); + lockdep_is_held(&lun->lun_deve_lock)); /* * se_deve->se_lun_acl pointer may be NULL for a * entry created without explicit Node+MappedLUN ACLs @@ -1014,18 +987,18 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work) ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG) && (tg_pt_gp->tg_pt_gp_alua_nacl != NULL) && (tg_pt_gp->tg_pt_gp_alua_nacl == lacl->se_lun_nacl) && - (tg_pt_gp->tg_pt_gp_alua_port != NULL) && - (tg_pt_gp->tg_pt_gp_alua_port == port)) + (tg_pt_gp->tg_pt_gp_alua_lun != NULL) && + (tg_pt_gp->tg_pt_gp_alua_lun == lun)) continue; core_scsi3_ua_allocate(lacl->se_lun_nacl, se_deve->mapped_lun, 0x2A, ASCQ_2AH_ASYMMETRIC_ACCESS_STATE_CHANGED); } - spin_unlock_bh(&port->sep_alua_lock); + spin_unlock_bh(&lun->lun_deve_lock); spin_lock(&tg_pt_gp->tg_pt_gp_lock); - atomic_dec_mb(&mem->tg_pt_gp_mem_ref_cnt); + atomic_dec_mb(&lun->lun_active); } spin_unlock(&tg_pt_gp->tg_pt_gp_lock); /* @@ -1143,7 +1116,7 @@ static int core_alua_do_transition_tg_pt( int core_alua_do_port_transition( struct t10_alua_tg_pt_gp *l_tg_pt_gp, struct se_device *l_dev, - struct se_port *l_port, + struct se_lun *l_lun, struct se_node_acl *l_nacl, int new_state, int explicit) @@ -1173,7 +1146,7 @@ int core_alua_do_port_transition( * core_alua_do_transition_tg_pt() will always return * success. */ - l_tg_pt_gp->tg_pt_gp_alua_port = l_port; + l_tg_pt_gp->tg_pt_gp_alua_lun = l_lun; l_tg_pt_gp->tg_pt_gp_alua_nacl = l_nacl; rc = core_alua_do_transition_tg_pt(l_tg_pt_gp, new_state, explicit); @@ -1212,10 +1185,10 @@ int core_alua_do_port_transition( continue; if (l_tg_pt_gp == tg_pt_gp) { - tg_pt_gp->tg_pt_gp_alua_port = l_port; + tg_pt_gp->tg_pt_gp_alua_lun = l_lun; tg_pt_gp->tg_pt_gp_alua_nacl = l_nacl; } else { - tg_pt_gp->tg_pt_gp_alua_port = NULL; + tg_pt_gp->tg_pt_gp_alua_lun = NULL; tg_pt_gp->tg_pt_gp_alua_nacl = NULL; } atomic_inc_mb(&tg_pt_gp->tg_pt_gp_ref_cnt); @@ -1252,22 +1225,20 @@ int core_alua_do_port_transition( return rc; } -/* - * Called with tg_pt_gp_mem->sep_tg_pt_md_mutex held - */ -static int core_alua_update_tpg_secondary_metadata( - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, - struct se_port *port) +static int core_alua_update_tpg_secondary_metadata(struct se_lun *lun) { + struct se_portal_group *se_tpg = lun->lun_tpg; unsigned char *md_buf; - struct se_portal_group *se_tpg = port->sep_tpg; char path[ALUA_METADATA_PATH_LEN], wwn[ALUA_SECONDARY_METADATA_WWN_LEN]; int len, rc; + mutex_lock(&lun->lun_tg_pt_md_mutex); + md_buf = kzalloc(ALUA_MD_BUF_LEN, GFP_KERNEL); if (!md_buf) { pr_err("Unable to allocate buf for ALUA metadata\n"); - return -ENOMEM; + rc = -ENOMEM; + goto out_unlock; } memset(path, 0, ALUA_METADATA_PATH_LEN); @@ -1282,32 +1253,33 @@ static int core_alua_update_tpg_secondary_metadata( len = snprintf(md_buf, ALUA_MD_BUF_LEN, "alua_tg_pt_offline=%d\n" "alua_tg_pt_status=0x%02x\n", - atomic_read(&port->sep_tg_pt_secondary_offline), - port->sep_tg_pt_secondary_stat); + atomic_read(&lun->lun_tg_pt_secondary_offline), + lun->lun_tg_pt_secondary_stat); snprintf(path, ALUA_METADATA_PATH_LEN, "/var/target/alua/%s/%s/lun_%u", se_tpg->se_tpg_tfo->get_fabric_name(), wwn, - port->sep_lun->unpacked_lun); + lun->unpacked_lun); rc = core_alua_write_tpg_metadata(path, md_buf, len); kfree(md_buf); +out_unlock: + mutex_unlock(&lun->lun_tg_pt_md_mutex); return rc; } static int core_alua_set_tg_pt_secondary_state( - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, - struct se_port *port, + struct se_lun *lun, int explicit, int offline) { struct t10_alua_tg_pt_gp *tg_pt_gp; int trans_delay_msecs; - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; + spin_lock(&lun->lun_tg_pt_gp_lock); + tg_pt_gp = lun->lun_tg_pt_gp; if (!tg_pt_gp) { - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_unlock(&lun->lun_tg_pt_gp_lock); pr_err("Unable to complete secondary state" " transition\n"); return -EINVAL; @@ -1315,14 +1287,14 @@ static int core_alua_set_tg_pt_secondary_state( trans_delay_msecs = tg_pt_gp->tg_pt_gp_trans_delay_msecs; /* * Set the secondary ALUA target port access state to OFFLINE - * or release the previously secondary state for struct se_port + * or release the previously secondary state for struct se_lun */ if (offline) - atomic_set(&port->sep_tg_pt_secondary_offline, 1); + atomic_set(&lun->lun_tg_pt_secondary_offline, 1); else - atomic_set(&port->sep_tg_pt_secondary_offline, 0); + atomic_set(&lun->lun_tg_pt_secondary_offline, 0); - port->sep_tg_pt_secondary_stat = (explicit) ? + lun->lun_tg_pt_secondary_stat = (explicit) ? ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG : ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA; @@ -1331,7 +1303,7 @@ static int core_alua_set_tg_pt_secondary_state( "implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item), tg_pt_gp->tg_pt_gp_id, (offline) ? "OFFLINE" : "ONLINE"); - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_unlock(&lun->lun_tg_pt_gp_lock); /* * Do the optional transition delay after we set the secondary * ALUA access state. @@ -1342,11 +1314,8 @@ static int core_alua_set_tg_pt_secondary_state( * See if we need to update the ALUA fabric port metadata for * secondary state and status */ - if (port->sep_tg_pt_secondary_write_md) { - mutex_lock(&port->sep_tg_pt_md_mutex); - core_alua_update_tpg_secondary_metadata(tg_pt_gp_mem, port); - mutex_unlock(&port->sep_tg_pt_md_mutex); - } + if (lun->lun_tg_pt_secondary_write_md) + core_alua_update_tpg_secondary_metadata(lun); return 0; } @@ -1700,7 +1669,7 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev, return NULL; } INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_list); - INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_mem_list); + INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_lun_list); mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex); spin_lock_init(&tg_pt_gp->tg_pt_gp_lock); atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0); @@ -1794,32 +1763,11 @@ again: return 0; } -struct t10_alua_tg_pt_gp_member *core_alua_allocate_tg_pt_gp_mem( - struct se_port *port) -{ - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; - - tg_pt_gp_mem = kmem_cache_zalloc(t10_alua_tg_pt_gp_mem_cache, - GFP_KERNEL); - if (!tg_pt_gp_mem) { - pr_err("Unable to allocate struct t10_alua_tg_pt_gp_member\n"); - return ERR_PTR(-ENOMEM); - } - INIT_LIST_HEAD(&tg_pt_gp_mem->tg_pt_gp_mem_list); - spin_lock_init(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - atomic_set(&tg_pt_gp_mem->tg_pt_gp_mem_ref_cnt, 0); - - tg_pt_gp_mem->tg_pt = port; - port->sep_alua_tg_pt_gp_mem = tg_pt_gp_mem; - - return tg_pt_gp_mem; -} - void core_alua_free_tg_pt_gp( struct t10_alua_tg_pt_gp *tg_pt_gp) { struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, *tg_pt_gp_mem_tmp; + struct se_lun *lun, *next; /* * Once we have reached this point, config_item_put() has already @@ -1850,30 +1798,24 @@ void core_alua_free_tg_pt_gp( * struct se_port. */ spin_lock(&tg_pt_gp->tg_pt_gp_lock); - list_for_each_entry_safe(tg_pt_gp_mem, tg_pt_gp_mem_tmp, - &tg_pt_gp->tg_pt_gp_mem_list, tg_pt_gp_mem_list) { - if (tg_pt_gp_mem->tg_pt_gp_assoc) { - list_del(&tg_pt_gp_mem->tg_pt_gp_mem_list); - tg_pt_gp->tg_pt_gp_members--; - tg_pt_gp_mem->tg_pt_gp_assoc = 0; - } + list_for_each_entry_safe(lun, next, + &tg_pt_gp->tg_pt_gp_lun_list, lun_tg_pt_gp_link) { + list_del_init(&lun->lun_tg_pt_gp_link); + tg_pt_gp->tg_pt_gp_members--; + spin_unlock(&tg_pt_gp->tg_pt_gp_lock); /* - * tg_pt_gp_mem is associated with a single - * se_port->sep_alua_tg_pt_gp_mem, and is released via - * core_alua_free_tg_pt_gp_mem(). - * * If the passed tg_pt_gp does NOT match the default_tg_pt_gp, * assume we want to re-associate a given tg_pt_gp_mem with * default_tg_pt_gp. */ - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_lock(&lun->lun_tg_pt_gp_lock); if (tg_pt_gp != dev->t10_alua.default_tg_pt_gp) { - __core_alua_attach_tg_pt_gp_mem(tg_pt_gp_mem, + __target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp); } else - tg_pt_gp_mem->tg_pt_gp = NULL; - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + lun->lun_tg_pt_gp = NULL; + spin_unlock(&lun->lun_tg_pt_gp_lock); spin_lock(&tg_pt_gp->tg_pt_gp_lock); } @@ -1882,35 +1824,6 @@ void core_alua_free_tg_pt_gp( kmem_cache_free(t10_alua_tg_pt_gp_cache, tg_pt_gp); } -void core_alua_free_tg_pt_gp_mem(struct se_port *port) -{ - struct t10_alua_tg_pt_gp *tg_pt_gp; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; - - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - if (!tg_pt_gp_mem) - return; - - while (atomic_read(&tg_pt_gp_mem->tg_pt_gp_mem_ref_cnt)) - cpu_relax(); - - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; - if (tg_pt_gp) { - spin_lock(&tg_pt_gp->tg_pt_gp_lock); - if (tg_pt_gp_mem->tg_pt_gp_assoc) { - list_del(&tg_pt_gp_mem->tg_pt_gp_mem_list); - tg_pt_gp->tg_pt_gp_members--; - tg_pt_gp_mem->tg_pt_gp_assoc = 0; - } - spin_unlock(&tg_pt_gp->tg_pt_gp_lock); - tg_pt_gp_mem->tg_pt_gp = NULL; - } - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - - kmem_cache_free(t10_alua_tg_pt_gp_mem_cache, tg_pt_gp_mem); -} - static struct t10_alua_tg_pt_gp *core_alua_get_tg_pt_gp_by_name( struct se_device *dev, const char *name) { @@ -1944,50 +1857,58 @@ static void core_alua_put_tg_pt_gp_from_name( spin_unlock(&dev->t10_alua.tg_pt_gps_lock); } -/* - * Called with struct t10_alua_tg_pt_gp_member->tg_pt_gp_mem_lock held - */ -void __core_alua_attach_tg_pt_gp_mem( - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, - struct t10_alua_tg_pt_gp *tg_pt_gp) +static void __target_attach_tg_pt_gp(struct se_lun *lun, + struct t10_alua_tg_pt_gp *tg_pt_gp) { + assert_spin_locked(&lun->lun_tg_pt_gp_lock); + spin_lock(&tg_pt_gp->tg_pt_gp_lock); - tg_pt_gp_mem->tg_pt_gp = tg_pt_gp; - tg_pt_gp_mem->tg_pt_gp_assoc = 1; - list_add_tail(&tg_pt_gp_mem->tg_pt_gp_mem_list, - &tg_pt_gp->tg_pt_gp_mem_list); + lun->lun_tg_pt_gp = tg_pt_gp; + list_add_tail(&lun->lun_tg_pt_gp_link, &tg_pt_gp->tg_pt_gp_lun_list); tg_pt_gp->tg_pt_gp_members++; spin_unlock(&tg_pt_gp->tg_pt_gp_lock); } -/* - * Called with struct t10_alua_tg_pt_gp_member->tg_pt_gp_mem_lock held - */ -static void __core_alua_drop_tg_pt_gp_mem( - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, - struct t10_alua_tg_pt_gp *tg_pt_gp) +void target_attach_tg_pt_gp(struct se_lun *lun, + struct t10_alua_tg_pt_gp *tg_pt_gp) { - spin_lock(&tg_pt_gp->tg_pt_gp_lock); - list_del(&tg_pt_gp_mem->tg_pt_gp_mem_list); - tg_pt_gp_mem->tg_pt_gp = NULL; - tg_pt_gp_mem->tg_pt_gp_assoc = 0; - tg_pt_gp->tg_pt_gp_members--; - spin_unlock(&tg_pt_gp->tg_pt_gp_lock); + spin_lock(&lun->lun_tg_pt_gp_lock); + __target_attach_tg_pt_gp(lun, tg_pt_gp); + spin_unlock(&lun->lun_tg_pt_gp_lock); } -ssize_t core_alua_show_tg_pt_gp_info(struct se_port *port, char *page) +static void __target_detach_tg_pt_gp(struct se_lun *lun, + struct t10_alua_tg_pt_gp *tg_pt_gp) +{ + assert_spin_locked(&lun->lun_tg_pt_gp_lock); + + spin_lock(&tg_pt_gp->tg_pt_gp_lock); + list_del_init(&lun->lun_tg_pt_gp_link); + tg_pt_gp->tg_pt_gp_members--; + spin_unlock(&tg_pt_gp->tg_pt_gp_lock); + + lun->lun_tg_pt_gp = NULL; +} + +void target_detach_tg_pt_gp(struct se_lun *lun) +{ + struct t10_alua_tg_pt_gp *tg_pt_gp; + + spin_lock(&lun->lun_tg_pt_gp_lock); + tg_pt_gp = lun->lun_tg_pt_gp; + if (tg_pt_gp) + __target_detach_tg_pt_gp(lun, tg_pt_gp); + spin_unlock(&lun->lun_tg_pt_gp_lock); +} + +ssize_t core_alua_show_tg_pt_gp_info(struct se_lun *lun, char *page) { struct config_item *tg_pt_ci; struct t10_alua_tg_pt_gp *tg_pt_gp; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; ssize_t len = 0; - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - if (!tg_pt_gp_mem) - return len; - - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; + spin_lock(&lun->lun_tg_pt_gp_lock); + tg_pt_gp = lun->lun_tg_pt_gp; if (tg_pt_gp) { tg_pt_ci = &tg_pt_gp->tg_pt_gp_group.cg_item; len += sprintf(page, "TG Port Alias: %s\nTG Port Group ID:" @@ -1999,34 +1920,29 @@ ssize_t core_alua_show_tg_pt_gp_info(struct se_port *port, char *page) &tg_pt_gp->tg_pt_gp_alua_access_state)), core_alua_dump_status( tg_pt_gp->tg_pt_gp_alua_access_status), - (atomic_read(&port->sep_tg_pt_secondary_offline)) ? + atomic_read(&lun->lun_tg_pt_secondary_offline) ? "Offline" : "None", - core_alua_dump_status(port->sep_tg_pt_secondary_stat)); + core_alua_dump_status(lun->lun_tg_pt_secondary_stat)); } - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_unlock(&lun->lun_tg_pt_gp_lock); return len; } ssize_t core_alua_store_tg_pt_gp_info( - struct se_port *port, + struct se_lun *lun, const char *page, size_t count) { - struct se_portal_group *tpg; - struct se_lun *lun; - struct se_device *dev = port->sep_lun->lun_se_dev; + struct se_portal_group *tpg = lun->lun_tpg; + struct se_device *dev = lun->lun_se_dev; struct t10_alua_tg_pt_gp *tg_pt_gp = NULL, *tg_pt_gp_new = NULL; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; unsigned char buf[TG_PT_GROUP_NAME_BUF]; int move = 0; - tpg = port->sep_tpg; - lun = port->sep_lun; - - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - if (!tg_pt_gp_mem) - return 0; + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH || + (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) + return -ENODEV; if (count > TG_PT_GROUP_NAME_BUF) { pr_err("ALUA Target Port Group alias too large!\n"); @@ -2050,8 +1966,8 @@ ssize_t core_alua_store_tg_pt_gp_info( return -ENODEV; } - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; + spin_lock(&lun->lun_tg_pt_gp_lock); + tg_pt_gp = lun->lun_tg_pt_gp; if (tg_pt_gp) { /* * Clearing an existing tg_pt_gp association, and replacing @@ -2069,24 +1985,19 @@ ssize_t core_alua_store_tg_pt_gp_info( &tg_pt_gp->tg_pt_gp_group.cg_item), tg_pt_gp->tg_pt_gp_id); - __core_alua_drop_tg_pt_gp_mem(tg_pt_gp_mem, tg_pt_gp); - __core_alua_attach_tg_pt_gp_mem(tg_pt_gp_mem, + __target_detach_tg_pt_gp(lun, tg_pt_gp); + __target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp); - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_unlock(&lun->lun_tg_pt_gp_lock); return count; } - /* - * Removing existing association of tg_pt_gp_mem with tg_pt_gp - */ - __core_alua_drop_tg_pt_gp_mem(tg_pt_gp_mem, tg_pt_gp); + __target_detach_tg_pt_gp(lun, tg_pt_gp); move = 1; } - /* - * Associate tg_pt_gp_mem with tg_pt_gp_new. - */ - __core_alua_attach_tg_pt_gp_mem(tg_pt_gp_mem, tg_pt_gp_new); - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + + __target_attach_tg_pt_gp(lun, tg_pt_gp_new); + spin_unlock(&lun->lun_tg_pt_gp_lock); pr_debug("Target_Core_ConfigFS: %s %s/tpgt_%hu/%s to ALUA" " Target Port Group: alua/%s, ID: %hu\n", (move) ? "Moving" : "Adding", tpg->se_tpg_tfo->tpg_get_wwn(tpg), @@ -2269,11 +2180,8 @@ ssize_t core_alua_store_preferred_bit( ssize_t core_alua_show_offline_bit(struct se_lun *lun, char *page) { - if (!lun->lun_sep) - return -ENODEV; - return sprintf(page, "%d\n", - atomic_read(&lun->lun_sep->sep_tg_pt_secondary_offline)); + atomic_read(&lun->lun_tg_pt_secondary_offline)); } ssize_t core_alua_store_offline_bit( @@ -2281,11 +2189,12 @@ ssize_t core_alua_store_offline_bit( const char *page, size_t count) { - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; + struct se_device *dev = lun->lun_se_dev; unsigned long tmp; int ret; - if (!lun->lun_sep) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH || + (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) return -ENODEV; ret = kstrtoul(page, 0, &tmp); @@ -2298,14 +2207,8 @@ ssize_t core_alua_store_offline_bit( tmp); return -EINVAL; } - tg_pt_gp_mem = lun->lun_sep->sep_alua_tg_pt_gp_mem; - if (!tg_pt_gp_mem) { - pr_err("Unable to locate *tg_pt_gp_mem\n"); - return -EINVAL; - } - ret = core_alua_set_tg_pt_secondary_state(tg_pt_gp_mem, - lun->lun_sep, 0, (int)tmp); + ret = core_alua_set_tg_pt_secondary_state(lun, 0, (int)tmp); if (ret < 0) return -EINVAL; @@ -2316,7 +2219,7 @@ ssize_t core_alua_show_secondary_status( struct se_lun *lun, char *page) { - return sprintf(page, "%d\n", lun->lun_sep->sep_tg_pt_secondary_stat); + return sprintf(page, "%d\n", lun->lun_tg_pt_secondary_stat); } ssize_t core_alua_store_secondary_status( @@ -2339,7 +2242,7 @@ ssize_t core_alua_store_secondary_status( tmp); return -EINVAL; } - lun->lun_sep->sep_tg_pt_secondary_stat = (int)tmp; + lun->lun_tg_pt_secondary_stat = (int)tmp; return count; } @@ -2348,8 +2251,7 @@ ssize_t core_alua_show_secondary_write_metadata( struct se_lun *lun, char *page) { - return sprintf(page, "%d\n", - lun->lun_sep->sep_tg_pt_secondary_write_md); + return sprintf(page, "%d\n", lun->lun_tg_pt_secondary_write_md); } ssize_t core_alua_store_secondary_write_metadata( @@ -2370,7 +2272,7 @@ ssize_t core_alua_store_secondary_write_metadata( " %lu\n", tmp); return -EINVAL; } - lun->lun_sep->sep_tg_pt_secondary_write_md = (int)tmp; + lun->lun_tg_pt_secondary_write_md = (int)tmp; return count; } diff --git a/drivers/target/target_core_alua.h b/drivers/target/target_core_alua.h index 0a7d65e80404..9b250f9b33bf 100644 --- a/drivers/target/target_core_alua.h +++ b/drivers/target/target_core_alua.h @@ -85,7 +85,6 @@ extern struct kmem_cache *t10_alua_lu_gp_cache; extern struct kmem_cache *t10_alua_lu_gp_mem_cache; extern struct kmem_cache *t10_alua_tg_pt_gp_cache; -extern struct kmem_cache *t10_alua_tg_pt_gp_mem_cache; extern struct kmem_cache *t10_alua_lba_map_cache; extern struct kmem_cache *t10_alua_lba_map_mem_cache; @@ -94,7 +93,7 @@ extern sense_reason_t target_emulate_set_target_port_groups(struct se_cmd *); extern sense_reason_t target_emulate_report_referrals(struct se_cmd *); extern int core_alua_check_nonop_delay(struct se_cmd *); extern int core_alua_do_port_transition(struct t10_alua_tg_pt_gp *, - struct se_device *, struct se_port *, + struct se_device *, struct se_lun *, struct se_node_acl *, int, int); extern char *core_alua_dump_status(int); extern struct t10_alua_lba_map *core_alua_allocate_lba_map( @@ -117,14 +116,11 @@ extern void core_alua_drop_lu_gp_dev(struct se_device *); extern struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp( struct se_device *, const char *, int); extern int core_alua_set_tg_pt_gp_id(struct t10_alua_tg_pt_gp *, u16); -extern struct t10_alua_tg_pt_gp_member *core_alua_allocate_tg_pt_gp_mem( - struct se_port *); extern void core_alua_free_tg_pt_gp(struct t10_alua_tg_pt_gp *); -extern void core_alua_free_tg_pt_gp_mem(struct se_port *); -extern void __core_alua_attach_tg_pt_gp_mem(struct t10_alua_tg_pt_gp_member *, - struct t10_alua_tg_pt_gp *); -extern ssize_t core_alua_show_tg_pt_gp_info(struct se_port *, char *); -extern ssize_t core_alua_store_tg_pt_gp_info(struct se_port *, const char *, +extern void target_detach_tg_pt_gp(struct se_lun *); +extern void target_attach_tg_pt_gp(struct se_lun *, struct t10_alua_tg_pt_gp *); +extern ssize_t core_alua_show_tg_pt_gp_info(struct se_lun *, char *); +extern ssize_t core_alua_store_tg_pt_gp_info(struct se_lun *, const char *, size_t); extern ssize_t core_alua_show_access_type(struct t10_alua_tg_pt_gp *, char *); extern ssize_t core_alua_store_access_type(struct t10_alua_tg_pt_gp *, diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 4313eea060ed..f97b969e6714 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -2889,21 +2889,16 @@ static ssize_t target_core_alua_tg_pt_gp_show_attr_members( struct t10_alua_tg_pt_gp *tg_pt_gp, char *page) { - struct se_port *port; - struct se_portal_group *tpg; struct se_lun *lun; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; ssize_t len = 0, cur_len; unsigned char buf[TG_PT_GROUP_NAME_BUF]; memset(buf, 0, TG_PT_GROUP_NAME_BUF); spin_lock(&tg_pt_gp->tg_pt_gp_lock); - list_for_each_entry(tg_pt_gp_mem, &tg_pt_gp->tg_pt_gp_mem_list, - tg_pt_gp_mem_list) { - port = tg_pt_gp_mem->tg_pt; - tpg = port->sep_tpg; - lun = port->sep_lun; + list_for_each_entry(lun, &tg_pt_gp->tg_pt_gp_lun_list, + lun_tg_pt_gp_link) { + struct se_portal_group *tpg = lun->lun_tpg; cur_len = snprintf(buf, TG_PT_GROUP_NAME_BUF, "%s/%s/tpgt_%hu" "/%s\n", tpg->se_tpg_tfo->get_fabric_name(), diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 1e4485b1849d..8485e9a789fc 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -120,8 +120,8 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) (se_cmd->data_direction != DMA_NONE)) return TCM_WRITE_PROTECTED; - se_lun = &se_sess->se_tpg->tpg_virt_lun0; - se_cmd->se_lun = &se_sess->se_tpg->tpg_virt_lun0; + se_lun = se_sess->se_tpg->tpg_virt_lun0; + se_cmd->se_lun = se_sess->se_tpg->tpg_virt_lun0; se_cmd->orig_fe_lun = 0; se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD; @@ -309,7 +309,6 @@ int core_enable_device_list_for_node( struct se_node_acl *nacl, struct se_portal_group *tpg) { - struct se_port *port = lun->lun_sep; struct se_dev_entry *orig, *new; new = kzalloc(sizeof(*new), GFP_KERNEL); @@ -320,8 +319,8 @@ int core_enable_device_list_for_node( atomic_set(&new->ua_count, 0); spin_lock_init(&new->ua_lock); - INIT_LIST_HEAD(&new->alua_port_list); INIT_LIST_HEAD(&new->ua_list); + INIT_LIST_HEAD(&new->lun_link); new->mapped_lun = mapped_lun; kref_init(&new->pr_kref); @@ -357,10 +356,10 @@ int core_enable_device_list_for_node( hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist); mutex_unlock(&nacl->lun_entry_mutex); - spin_lock_bh(&port->sep_alua_lock); - list_del(&orig->alua_port_list); - list_add_tail(&new->alua_port_list, &port->sep_alua_list); - spin_unlock_bh(&port->sep_alua_lock); + spin_lock_bh(&lun->lun_deve_lock); + list_del(&orig->lun_link); + list_add_tail(&new->lun_link, &lun->lun_deve_list); + spin_unlock_bh(&lun->lun_deve_lock); kref_put(&orig->pr_kref, target_pr_kref_release); wait_for_completion(&orig->pr_comp); @@ -374,9 +373,9 @@ int core_enable_device_list_for_node( hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist); mutex_unlock(&nacl->lun_entry_mutex); - spin_lock_bh(&port->sep_alua_lock); - list_add_tail(&new->alua_port_list, &port->sep_alua_list); - spin_unlock_bh(&port->sep_alua_lock); + spin_lock_bh(&lun->lun_deve_lock); + list_add_tail(&new->lun_link, &lun->lun_deve_list); + spin_unlock_bh(&lun->lun_deve_lock); return 0; } @@ -390,23 +389,22 @@ void core_disable_device_list_for_node( struct se_node_acl *nacl, struct se_portal_group *tpg) { - struct se_port *port = lun->lun_sep; /* * If the MappedLUN entry is being disabled, the entry in - * port->sep_alua_list must be removed now before clearing the + * lun->lun_deve_list must be removed now before clearing the * struct se_dev_entry pointers below as logic in * core_alua_do_transition_tg_pt() depends on these being present. * * deve->se_lun_acl will be NULL for demo-mode created LUNs * that have not been explicitly converted to MappedLUNs -> - * struct se_lun_acl, but we remove deve->alua_port_list from - * port->sep_alua_list. This also means that active UAs and + * struct se_lun_acl, but we remove deve->lun_link from + * lun->lun_deve_list. This also means that active UAs and * NodeACL context specific PR metadata for demo-mode * MappedLUN *deve will be released below.. */ - spin_lock_bh(&port->sep_alua_lock); - list_del(&orig->alua_port_list); - spin_unlock_bh(&port->sep_alua_lock); + spin_lock_bh(&lun->lun_deve_lock); + list_del(&orig->lun_link); + spin_unlock_bh(&lun->lun_deve_lock); /* * Disable struct se_dev_entry LUN ACL mapping */ @@ -458,27 +456,16 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg) mutex_unlock(&tpg->acl_node_mutex); } -static struct se_port *core_alloc_port(struct se_device *dev) +int core_alloc_rtpi(struct se_lun *lun, struct se_device *dev) { - struct se_port *port, *port_tmp; - - port = kzalloc(sizeof(struct se_port), GFP_KERNEL); - if (!port) { - pr_err("Unable to allocate struct se_port\n"); - return ERR_PTR(-ENOMEM); - } - INIT_LIST_HEAD(&port->sep_alua_list); - INIT_LIST_HEAD(&port->sep_list); - atomic_set(&port->sep_tg_pt_secondary_offline, 0); - spin_lock_init(&port->sep_alua_lock); - mutex_init(&port->sep_tg_pt_md_mutex); + struct se_lun *tmp; spin_lock(&dev->se_port_lock); - if (dev->dev_port_count == 0x0000ffff) { + if (dev->export_count == 0x0000ffff) { pr_warn("Reached dev->dev_port_count ==" " 0x0000ffff\n"); spin_unlock(&dev->se_port_lock); - return ERR_PTR(-ENOSPC); + return -ENOSPC; } again: /* @@ -493,135 +480,23 @@ again: * 2h Relative port 2, historically known as port B * 3h to FFFFh Relative port 3 through 65 535 */ - port->sep_rtpi = dev->dev_rpti_counter++; - if (!port->sep_rtpi) + lun->lun_rtpi = dev->dev_rpti_counter++; + if (!lun->lun_rtpi) goto again; - list_for_each_entry(port_tmp, &dev->dev_sep_list, sep_list) { + list_for_each_entry(tmp, &dev->dev_sep_list, lun_dev_link) { /* * Make sure RELATIVE TARGET PORT IDENTIFIER is unique * for 16-bit wrap.. */ - if (port->sep_rtpi == port_tmp->sep_rtpi) + if (lun->lun_rtpi == tmp->lun_rtpi) goto again; } spin_unlock(&dev->se_port_lock); - return port; -} - -static void core_export_port( - struct se_device *dev, - struct se_portal_group *tpg, - struct se_port *port, - struct se_lun *lun) -{ - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem = NULL; - - spin_lock(&dev->se_port_lock); - spin_lock(&lun->lun_sep_lock); - port->sep_tpg = tpg; - port->sep_lun = lun; - lun->lun_sep = port; - spin_unlock(&lun->lun_sep_lock); - - list_add_tail(&port->sep_list, &dev->dev_sep_list); - spin_unlock(&dev->se_port_lock); - - if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && - !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) { - tg_pt_gp_mem = core_alua_allocate_tg_pt_gp_mem(port); - if (IS_ERR(tg_pt_gp_mem) || !tg_pt_gp_mem) { - pr_err("Unable to allocate t10_alua_tg_pt" - "_gp_member_t\n"); - return; - } - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - __core_alua_attach_tg_pt_gp_mem(tg_pt_gp_mem, - dev->t10_alua.default_tg_pt_gp); - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - pr_debug("%s/%s: Adding to default ALUA Target Port" - " Group: alua/default_tg_pt_gp\n", - dev->transport->name, tpg->se_tpg_tfo->get_fabric_name()); - } - - dev->dev_port_count++; - port->sep_index = port->sep_rtpi; /* RELATIVE TARGET PORT IDENTIFIER */ -} - -/* - * Called with struct se_device->se_port_lock spinlock held. - */ -static void core_release_port(struct se_device *dev, struct se_port *port) - __releases(&dev->se_port_lock) __acquires(&dev->se_port_lock) -{ - /* - * Wait for any port reference for PR ALL_TG_PT=1 operation - * to complete in __core_scsi3_alloc_registration() - */ - spin_unlock(&dev->se_port_lock); - if (atomic_read(&port->sep_tg_pt_ref_cnt)) - cpu_relax(); - spin_lock(&dev->se_port_lock); - - core_alua_free_tg_pt_gp_mem(port); - - list_del(&port->sep_list); - dev->dev_port_count--; - kfree(port); -} - -int core_dev_export( - struct se_device *dev, - struct se_portal_group *tpg, - struct se_lun *lun) -{ - struct se_hba *hba = dev->se_hba; - struct se_port *port; - - port = core_alloc_port(dev); - if (IS_ERR(port)) - return PTR_ERR(port); - - lun->lun_index = dev->dev_index; - lun->lun_se_dev = dev; - lun->lun_rtpi = port->sep_rtpi; - - spin_lock(&hba->device_lock); - dev->export_count++; - spin_unlock(&hba->device_lock); - - core_export_port(dev, tpg, port, lun); return 0; } -void core_dev_unexport( - struct se_device *dev, - struct se_portal_group *tpg, - struct se_lun *lun) -{ - struct se_hba *hba = dev->se_hba; - struct se_port *port = lun->lun_sep; - - spin_lock(&lun->lun_sep_lock); - if (lun->lun_se_dev == NULL) { - spin_unlock(&lun->lun_sep_lock); - return; - } - spin_unlock(&lun->lun_sep_lock); - - spin_lock(&dev->se_port_lock); - core_release_port(dev, port); - spin_unlock(&dev->se_port_lock); - - spin_lock(&hba->device_lock); - dev->export_count--; - spin_unlock(&hba->device_lock); - - lun->lun_sep = NULL; - lun->lun_se_dev = NULL; -} - static void se_release_vpd_for_dev(struct se_device *dev) { struct t10_vpd *vpd, *vpd_tmp; @@ -783,10 +658,10 @@ int core_dev_add_initiator_node_lun_acl( } int core_dev_del_initiator_node_lun_acl( - struct se_portal_group *tpg, struct se_lun *lun, struct se_lun_acl *lacl) { + struct se_portal_group *tpg = lun->lun_tpg; struct se_node_acl *nacl; struct se_dev_entry *deve; @@ -930,6 +805,10 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) xcopy_lun->lun_se_dev = dev; spin_lock_init(&xcopy_lun->lun_sep_lock); init_completion(&xcopy_lun->lun_ref_comp); + INIT_LIST_HEAD(&xcopy_lun->lun_deve_list); + INIT_LIST_HEAD(&xcopy_lun->lun_dev_link); + mutex_init(&xcopy_lun->lun_tg_pt_md_mutex); + xcopy_lun->lun_tpg = &xcopy_pt_tpg; return dev; } diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 9be8030e016f..0ee182fce1a6 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -91,12 +91,11 @@ static int target_fabric_mappedlun_link( /* * Ensure that the source port exists */ - if (!lun->lun_sep || !lun->lun_sep->sep_tpg) { - pr_err("Source se_lun->lun_sep or lun->lun_sep->sep" - "_tpg does not exist\n"); + if (!lun->lun_se_dev) { + pr_err("Source se_lun->lun_se_dev does not exist\n"); return -EINVAL; } - se_tpg = lun->lun_sep->sep_tpg; + se_tpg = lun->lun_tpg; nacl_ci = &lun_acl_ci->ci_parent->ci_group->cg_item; tpg_ci = &nacl_ci->ci_group->cg_item; @@ -150,9 +149,8 @@ static int target_fabric_mappedlun_unlink( struct se_lun_acl, se_lun_group); struct se_lun *lun = container_of(to_config_group(lun_ci), struct se_lun, lun_group); - struct se_portal_group *se_tpg = lun->lun_sep->sep_tpg; - return core_dev_del_initiator_node_lun_acl(se_tpg, lun, lacl); + return core_dev_del_initiator_node_lun_acl(lun, lacl); } CONFIGFS_EATTR_STRUCT(target_fabric_mappedlun, se_lun_acl); @@ -643,10 +641,10 @@ static ssize_t target_fabric_port_show_attr_alua_tg_pt_gp( struct se_lun *lun, char *page) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; - return core_alua_show_tg_pt_gp_info(lun->lun_sep, page); + return core_alua_show_tg_pt_gp_info(lun, page); } static ssize_t target_fabric_port_store_attr_alua_tg_pt_gp( @@ -654,10 +652,10 @@ static ssize_t target_fabric_port_store_attr_alua_tg_pt_gp( const char *page, size_t count) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; - return core_alua_store_tg_pt_gp_info(lun->lun_sep, page, count); + return core_alua_store_tg_pt_gp_info(lun, page, count); } TCM_PORT_ATTR(alua_tg_pt_gp, S_IRUGO | S_IWUSR); @@ -669,7 +667,7 @@ static ssize_t target_fabric_port_show_attr_alua_tg_pt_offline( struct se_lun *lun, char *page) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; return core_alua_show_offline_bit(lun, page); @@ -680,7 +678,7 @@ static ssize_t target_fabric_port_store_attr_alua_tg_pt_offline( const char *page, size_t count) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; return core_alua_store_offline_bit(lun, page, count); @@ -695,7 +693,7 @@ static ssize_t target_fabric_port_show_attr_alua_tg_pt_status( struct se_lun *lun, char *page) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; return core_alua_show_secondary_status(lun, page); @@ -706,7 +704,7 @@ static ssize_t target_fabric_port_store_attr_alua_tg_pt_status( const char *page, size_t count) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; return core_alua_store_secondary_status(lun, page, count); @@ -721,7 +719,7 @@ static ssize_t target_fabric_port_show_attr_alua_tg_pt_write_md( struct se_lun *lun, char *page) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; return core_alua_show_secondary_write_metadata(lun, page); @@ -732,7 +730,7 @@ static ssize_t target_fabric_port_store_attr_alua_tg_pt_write_md( const char *page, size_t count) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; return core_alua_store_secondary_write_metadata(lun, page, count); @@ -811,7 +809,7 @@ static int target_fabric_port_unlink( { struct se_lun *lun = container_of(to_config_group(lun_ci), struct se_lun, lun_group); - struct se_portal_group *se_tpg = lun->lun_sep->sep_tpg; + struct se_portal_group *se_tpg = lun->lun_tpg; struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf; if (tf->tf_ops->fabric_pre_unlink) { diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 23f992ed7cae..8d8737a13e6f 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -21,6 +21,7 @@ extern struct t10_alua_lu_gp *default_lu_gp; extern struct mutex g_device_mutex; extern struct list_head g_device_list; +int core_alloc_rtpi(struct se_lun *lun, struct se_device *dev); struct se_dev_entry *core_get_se_deve_from_rtpi(struct se_node_acl *, u16); void target_pr_kref_release(struct kref *); void core_free_device_list_for_node(struct se_node_acl *, @@ -32,10 +33,6 @@ int core_enable_device_list_for_node(struct se_lun *, struct se_lun_acl *, void core_disable_device_list_for_node(struct se_lun *, struct se_dev_entry *, struct se_node_acl *, struct se_portal_group *); void core_clear_lun_from_tpg(struct se_lun *, struct se_portal_group *); -int core_dev_export(struct se_device *, struct se_portal_group *, - struct se_lun *); -void core_dev_unexport(struct se_device *, struct se_portal_group *, - struct se_lun *); int core_dev_add_lun(struct se_portal_group *, struct se_device *, struct se_lun *lun); void core_dev_del_lun(struct se_portal_group *, struct se_lun *); @@ -43,8 +40,8 @@ struct se_lun_acl *core_dev_init_initiator_node_lun_acl(struct se_portal_group * struct se_node_acl *, u32, int *); int core_dev_add_initiator_node_lun_acl(struct se_portal_group *, struct se_lun_acl *, struct se_lun *lun, u32); -int core_dev_del_initiator_node_lun_acl(struct se_portal_group *, - struct se_lun *, struct se_lun_acl *); +int core_dev_del_initiator_node_lun_acl(struct se_lun *, + struct se_lun_acl *); void core_dev_free_initiator_node_lun_acl(struct se_portal_group *, struct se_lun_acl *lacl); int core_dev_setup_virtual_lun0(void); @@ -120,4 +117,7 @@ void target_stat_setup_dev_default_groups(struct se_device *); void target_stat_setup_port_default_groups(struct se_lun *); void target_stat_setup_mappedlun_default_groups(struct se_lun_acl *); +/* target_core_xcopy.c */ +extern struct se_portal_group xcopy_pt_tpg; + #endif /* TARGET_CORE_INTERNAL_H */ diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index b983f8a54766..60624bb6c598 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -642,7 +642,7 @@ static struct t10_pr_registration *__core_scsi3_do_alloc_registration( pr_reg->pr_reg_deve = deve; pr_reg->pr_res_mapped_lun = mapped_lun; pr_reg->pr_aptpl_target_lun = lun->unpacked_lun; - pr_reg->tg_pt_sep_rtpi = lun->lun_sep->sep_rtpi; + pr_reg->tg_pt_sep_rtpi = lun->lun_rtpi; pr_reg->pr_res_key = sa_res_key; pr_reg->pr_reg_all_tg_pt = all_tg_pt; pr_reg->pr_reg_aptpl = aptpl; @@ -680,8 +680,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( struct se_dev_entry *deve_tmp; struct se_node_acl *nacl_tmp; struct se_lun_acl *lacl_tmp; - struct se_lun *lun_tmp; - struct se_port *port, *port_tmp; + struct se_lun *lun_tmp, *next, *dest_lun; const struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo; struct t10_pr_registration *pr_reg, *pr_reg_atp, *pr_reg_tmp, *pr_reg_tmp_safe; int ret; @@ -704,13 +703,12 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( * for ALL_TG_PT=1 */ spin_lock(&dev->se_port_lock); - list_for_each_entry_safe(port, port_tmp, &dev->dev_sep_list, sep_list) { - atomic_inc_mb(&port->sep_tg_pt_ref_cnt); + list_for_each_entry_safe(lun_tmp, next, &dev->dev_sep_list, lun_dev_link) { + atomic_inc_mb(&lun_tmp->lun_active); spin_unlock(&dev->se_port_lock); - spin_lock_bh(&port->sep_alua_lock); - list_for_each_entry(deve_tmp, &port->sep_alua_list, - alua_port_list) { + spin_lock_bh(&lun_tmp->lun_deve_lock); + list_for_each_entry(deve_tmp, &lun_tmp->lun_deve_list, lun_link) { /* * This pointer will be NULL for demo mode MappedLUNs * that have not been make explicit via a ConfigFS @@ -720,7 +718,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( continue; lacl_tmp = rcu_dereference_check(deve_tmp->se_lun_acl, - lockdep_is_held(&port->sep_alua_lock)); + lockdep_is_held(&lun_tmp->lun_deve_lock)); nacl_tmp = lacl_tmp->se_lun_nacl; /* * Skip the matching struct se_node_acl that is allocated @@ -742,7 +740,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( continue; kref_get(&deve_tmp->pr_kref); - spin_unlock_bh(&port->sep_alua_lock); + spin_unlock_bh(&lun_tmp->lun_deve_lock); /* * Grab a configfs group dependency that is released * for the exception path at label out: below, or upon @@ -753,7 +751,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( if (ret < 0) { pr_err("core_scsi3_lunacl_depend" "_item() failed\n"); - atomic_dec_mb(&port->sep_tg_pt_ref_cnt); + atomic_dec_mb(&lun->lun_active); kref_put(&deve_tmp->pr_kref, target_pr_kref_release); goto out; } @@ -764,27 +762,27 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( * the original *pr_reg is processed in * __core_scsi3_add_registration() */ - lun_tmp = rcu_dereference_check(deve_tmp->se_lun, + dest_lun = rcu_dereference_check(deve_tmp->se_lun, atomic_read(&deve_tmp->pr_kref.refcount) != 0); pr_reg_atp = __core_scsi3_do_alloc_registration(dev, - nacl_tmp, lun_tmp, deve_tmp, + nacl_tmp, dest_lun, deve_tmp, deve_tmp->mapped_lun, NULL, sa_res_key, all_tg_pt, aptpl); if (!pr_reg_atp) { - atomic_dec_mb(&port->sep_tg_pt_ref_cnt); + atomic_dec_mb(&lun_tmp->lun_active); core_scsi3_lunacl_undepend_item(deve_tmp); goto out; } list_add_tail(&pr_reg_atp->pr_reg_atp_mem_list, &pr_reg->pr_reg_atp_list); - spin_lock_bh(&port->sep_alua_lock); + spin_lock_bh(&lun_tmp->lun_deve_lock); } - spin_unlock_bh(&port->sep_alua_lock); + spin_unlock_bh(&lun_tmp->lun_deve_lock); spin_lock(&dev->se_port_lock); - atomic_dec_mb(&port->sep_tg_pt_ref_cnt); + atomic_dec_mb(&lun_tmp->lun_active); } spin_unlock(&dev->se_port_lock); @@ -938,7 +936,7 @@ static int __core_scsi3_check_aptpl_registration( (pr_reg->pr_aptpl_target_lun == target_lun)) { pr_reg->pr_reg_nacl = nacl; - pr_reg->tg_pt_sep_rtpi = lun->lun_sep->sep_rtpi; + pr_reg->tg_pt_sep_rtpi = lun->lun_rtpi; list_del(&pr_reg->pr_reg_aptpl_list); spin_unlock(&pr_tmpl->aptpl_reg_lock); @@ -1465,7 +1463,6 @@ core_scsi3_decode_spec_i_port( int aptpl) { struct se_device *dev = cmd->se_dev; - struct se_port *tmp_port; struct se_portal_group *dest_tpg = NULL, *tmp_tpg; struct se_session *se_sess = cmd->se_sess; struct se_node_acl *dest_node_acl = NULL; @@ -1550,16 +1547,14 @@ core_scsi3_decode_spec_i_port( ptr = &buf[28]; while (tpdl > 0) { - struct se_lun *dest_lun; + struct se_lun *dest_lun, *tmp_lun; proto_ident = (ptr[0] & 0x0f); dest_tpg = NULL; spin_lock(&dev->se_port_lock); - list_for_each_entry(tmp_port, &dev->dev_sep_list, sep_list) { - tmp_tpg = tmp_port->sep_tpg; - if (!tmp_tpg) - continue; + list_for_each_entry(tmp_lun, &dev->dev_sep_list, lun_dev_link) { + tmp_tpg = tmp_lun->lun_tpg; /* * Look for the matching proto_ident provided by @@ -1567,7 +1562,7 @@ core_scsi3_decode_spec_i_port( */ if (tmp_tpg->proto_id != proto_ident) continue; - dest_rtpi = tmp_port->sep_rtpi; + dest_rtpi = tmp_lun->lun_rtpi; i_str = target_parse_pr_out_transport_id(tmp_tpg, (const char *)ptr, &tid_len, &iport_ptr); @@ -3119,9 +3114,8 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, struct se_session *se_sess = cmd->se_sess; struct se_device *dev = cmd->se_dev; struct se_dev_entry *dest_se_deve = NULL; - struct se_lun *se_lun = cmd->se_lun; + struct se_lun *se_lun = cmd->se_lun, *tmp_lun; struct se_node_acl *pr_res_nacl, *pr_reg_nacl, *dest_node_acl = NULL; - struct se_port *se_port; struct se_portal_group *se_tpg, *dest_se_tpg = NULL; const struct target_core_fabric_ops *dest_tf_ops = NULL, *tf_ops; struct t10_pr_registration *pr_reg, *pr_res_holder, *dest_pr_reg; @@ -3206,12 +3200,10 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, } spin_lock(&dev->se_port_lock); - list_for_each_entry(se_port, &dev->dev_sep_list, sep_list) { - if (se_port->sep_rtpi != rtpi) - continue; - dest_se_tpg = se_port->sep_tpg; - if (!dest_se_tpg) + list_for_each_entry(tmp_lun, &dev->dev_sep_list, lun_dev_link) { + if (tmp_lun->lun_rtpi != rtpi) continue; + dest_se_tpg = tmp_lun->lun_tpg; dest_tf_ops = dest_se_tpg->se_tpg_tfo; if (!dest_tf_ops) continue; diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 9f995b87b8d1..34d8292a4432 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -37,10 +37,9 @@ #include "target_core_ua.h" #include "target_core_xcopy.h" -static void spc_fill_alua_data(struct se_port *port, unsigned char *buf) +static void spc_fill_alua_data(struct se_lun *lun, unsigned char *buf) { struct t10_alua_tg_pt_gp *tg_pt_gp; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; /* * Set SCCS for MAINTENANCE_IN + REPORT_TARGET_PORT_GROUPS. @@ -53,17 +52,11 @@ static void spc_fill_alua_data(struct se_port *port, unsigned char *buf) * * See spc4r17 section 6.4.2 Table 135 */ - if (!port) - return; - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - if (!tg_pt_gp_mem) - return; - - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; + spin_lock(&lun->lun_tg_pt_gp_lock); + tg_pt_gp = lun->lun_tg_pt_gp; if (tg_pt_gp) buf[5] |= tg_pt_gp->tg_pt_gp_alua_access_type; - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_unlock(&lun->lun_tg_pt_gp_lock); } sense_reason_t @@ -94,7 +87,7 @@ spc_emulate_inquiry_std(struct se_cmd *cmd, unsigned char *buf) /* * Enable SCCS and TPGS fields for Emulated ALUA */ - spc_fill_alua_data(lun->lun_sep, buf); + spc_fill_alua_data(lun, buf); /* * Set Third-Party Copy (3PC) bit to indicate support for EXTENDED_COPY @@ -181,11 +174,9 @@ spc_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf) { struct se_device *dev = cmd->se_dev; struct se_lun *lun = cmd->se_lun; - struct se_port *port = NULL; struct se_portal_group *tpg = NULL; struct t10_alua_lu_gp_member *lu_gp_mem; struct t10_alua_tg_pt_gp *tg_pt_gp; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; unsigned char *prod = &dev->t10_wwn.model[0]; u32 prod_len; u32 unit_serial_len, off = 0; @@ -267,18 +258,15 @@ check_t10_vend_desc: /* Header size for Designation descriptor */ len += (id_len + 4); off += (id_len + 4); - /* - * struct se_port is only set for INQUIRY VPD=1 through $FABRIC_MOD - */ - port = lun->lun_sep; - if (port) { + + if (1) { struct t10_alua_lu_gp *lu_gp; u32 padding, scsi_name_len, scsi_target_len; u16 lu_gp_id = 0; u16 tg_pt_gp_id = 0; u16 tpgt; - tpg = port->sep_tpg; + tpg = lun->lun_tpg; /* * Relative target port identifer, see spc4r17 * section 7.7.3.7 @@ -298,8 +286,8 @@ check_t10_vend_desc: /* Skip over Obsolete field in RTPI payload * in Table 472 */ off += 2; - buf[off++] = ((port->sep_rtpi >> 8) & 0xff); - buf[off++] = (port->sep_rtpi & 0xff); + buf[off++] = ((lun->lun_rtpi >> 8) & 0xff); + buf[off++] = (lun->lun_rtpi & 0xff); len += 8; /* Header size + Designation descriptor */ /* * Target port group identifier, see spc4r17 @@ -308,18 +296,14 @@ check_t10_vend_desc: * Get the PROTOCOL IDENTIFIER as defined by spc4r17 * section 7.5.1 Table 362 */ - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - if (!tg_pt_gp_mem) - goto check_lu_gp; - - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; + spin_lock(&lun->lun_tg_pt_gp_lock); + tg_pt_gp = lun->lun_tg_pt_gp; if (!tg_pt_gp) { - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_unlock(&lun->lun_tg_pt_gp_lock); goto check_lu_gp; } tg_pt_gp_id = tg_pt_gp->tg_pt_gp_id; - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_unlock(&lun->lun_tg_pt_gp_lock); buf[off] = tpg->proto_id << 4; buf[off++] |= 0x1; /* CODE SET == Binary */ @@ -694,7 +678,7 @@ static sense_reason_t spc_emulate_inquiry(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - struct se_portal_group *tpg = cmd->se_lun->lun_sep->sep_tpg; + struct se_portal_group *tpg = cmd->se_lun->lun_tpg; unsigned char *rbuf; unsigned char *cdb = cmd->t_task_cdb; unsigned char *buf; @@ -708,7 +692,7 @@ spc_emulate_inquiry(struct se_cmd *cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } - if (dev == tpg->tpg_virt_lun0.lun_se_dev) + if (dev == tpg->tpg_virt_lun0->lun_se_dev) buf[0] = 0x3f; /* Not connected */ else buf[0] = dev->transport->get_device_type(dev); diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index ea1287940a7c..8e080efb0188 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -106,7 +106,7 @@ static ssize_t target_stat_scsi_dev_show_attr_ports( struct se_device *dev = container_of(sgrps, struct se_device, dev_stat_grps); - return snprintf(page, PAGE_SIZE, "%u\n", dev->dev_port_count); + return snprintf(page, PAGE_SIZE, "%u\n", dev->export_count); } DEV_STAT_SCSI_DEV_ATTR_RO(ports); @@ -542,19 +542,13 @@ static ssize_t target_stat_scsi_port_show_attr_inst( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - struct se_device *dev = lun->lun_se_dev; - struct se_hba *hba; - ssize_t ret; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - hba = dev->se_hba; - ret = snprintf(page, PAGE_SIZE, "%u\n", hba->hba_index); + dev = lun->lun_se_dev; + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", dev->se_hba->hba_index); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -564,17 +558,13 @@ static ssize_t target_stat_scsi_port_show_attr_dev( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - struct se_device *dev = lun->lun_se_dev; - ssize_t ret; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - ret = snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index); + dev = lun->lun_se_dev; + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -584,16 +574,13 @@ static ssize_t target_stat_scsi_port_show_attr_indx( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - ssize_t ret; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - ret = snprintf(page, PAGE_SIZE, "%u\n", sep->sep_index); + dev = lun->lun_se_dev; + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_rtpi); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -603,20 +590,13 @@ static ssize_t target_stat_scsi_port_show_attr_role( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_device *dev = lun->lun_se_dev; - struct se_port *sep; - ssize_t ret; - - if (!dev) - return -ENODEV; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - ret = snprintf(page, PAGE_SIZE, "%s%u\n", "Device", dev->dev_index); + dev = lun->lun_se_dev; + if (dev) + ret = snprintf(page, PAGE_SIZE, "%s%u\n", "Device", dev->dev_index); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -626,17 +606,15 @@ static ssize_t target_stat_scsi_port_show_attr_busy_count( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - ssize_t ret; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; + dev = lun->lun_se_dev; + if (dev) { + /* FIXME: scsiPortBusyStatuses */ + ret = snprintf(page, PAGE_SIZE, "%u\n", 0); } - /* FIXME: scsiPortBusyStatuses */ - ret = snprintf(page, PAGE_SIZE, "%u\n", 0); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -685,19 +663,13 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_inst( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_device *dev = lun->lun_se_dev; - struct se_port *sep; - struct se_hba *hba; - ssize_t ret; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - hba = dev->se_hba; - ret = snprintf(page, PAGE_SIZE, "%u\n", hba->hba_index); + dev = lun->lun_se_dev; + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", dev->se_hba->hba_index); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -707,17 +679,13 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_dev( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_device *dev = lun->lun_se_dev; - struct se_port *sep; - ssize_t ret; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - ret = snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index); + dev = lun->lun_se_dev; + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -727,16 +695,13 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_indx( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - ssize_t ret; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - ret = snprintf(page, PAGE_SIZE, "%u\n", sep->sep_index); + dev = lun->lun_se_dev; + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_rtpi); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -746,20 +711,16 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_name( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - struct se_portal_group *tpg; - ssize_t ret; + struct se_portal_group *tpg = lun->lun_tpg; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - tpg = sep->sep_tpg; - - ret = snprintf(page, PAGE_SIZE, "%sPort#%u\n", - tpg->se_tpg_tfo->get_fabric_name(), sep->sep_index); + dev = lun->lun_se_dev; + if (dev) + ret = snprintf(page, PAGE_SIZE, "%sPort#%u\n", + tpg->se_tpg_tfo->get_fabric_name(), + lun->lun_rtpi); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -769,21 +730,16 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_port_index( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - struct se_portal_group *tpg; - ssize_t ret; + struct se_portal_group *tpg = lun->lun_tpg; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - tpg = sep->sep_tpg; - - ret = snprintf(page, PAGE_SIZE, "%s%s%d\n", - tpg->se_tpg_tfo->tpg_get_wwn(tpg), "+t+", - tpg->se_tpg_tfo->tpg_get_tag(tpg)); + dev = lun->lun_se_dev; + if (dev) + ret = snprintf(page, PAGE_SIZE, "%s%s%d\n", + tpg->se_tpg_tfo->tpg_get_wwn(tpg), "+t+", + tpg->se_tpg_tfo->tpg_get_tag(tpg)); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -793,17 +749,13 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_in_cmds( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - ssize_t ret; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - - ret = snprintf(page, PAGE_SIZE, "%llu\n", sep->sep_stats.cmd_pdus); + dev = lun->lun_se_dev; + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_stats.cmd_pdus); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -813,18 +765,14 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_write_mbytes( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - ssize_t ret; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - - ret = snprintf(page, PAGE_SIZE, "%u\n", - (u32)(sep->sep_stats.rx_data_octets >> 20)); + dev = lun->lun_se_dev; + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", + (u32)(lun->lun_stats.rx_data_octets >> 20)); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -834,18 +782,14 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_read_mbytes( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - ssize_t ret; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - - ret = snprintf(page, PAGE_SIZE, "%u\n", - (u32)(sep->sep_stats.tx_data_octets >> 20)); + dev = lun->lun_se_dev; + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", + (u32)(lun->lun_stats.tx_data_octets >> 20)); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -855,18 +799,15 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_hs_in_cmds( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - ssize_t ret; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; + dev = lun->lun_se_dev; + if (dev) { + /* FIXME: scsiTgtPortHsInCommands */ + ret = snprintf(page, PAGE_SIZE, "%u\n", 0); } - - /* FIXME: scsiTgtPortHsInCommands */ - ret = snprintf(page, PAGE_SIZE, "%u\n", 0); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -921,20 +862,13 @@ static ssize_t target_stat_scsi_transport_show_attr_inst( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_device *dev = lun->lun_se_dev; - struct se_port *sep; - struct se_hba *hba; - ssize_t ret; + struct se_device *dev; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - - hba = dev->se_hba; - ret = snprintf(page, PAGE_SIZE, "%u\n", hba->hba_index); + dev = lun->lun_se_dev; + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", dev->se_hba->hba_index); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -944,20 +878,17 @@ static ssize_t target_stat_scsi_transport_show_attr_device( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - struct se_portal_group *tpg; - ssize_t ret; + struct se_device *dev; + struct se_portal_group *tpg = lun->lun_tpg; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; + dev = lun->lun_se_dev; + if (dev) { + /* scsiTransportType */ + ret = snprintf(page, PAGE_SIZE, "scsiTransport%s\n", + tpg->se_tpg_tfo->get_fabric_name()); } - tpg = sep->sep_tpg; - /* scsiTransportType */ - ret = snprintf(page, PAGE_SIZE, "scsiTransport%s\n", - tpg->se_tpg_tfo->get_fabric_name()); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -967,19 +898,15 @@ static ssize_t target_stat_scsi_transport_show_attr_indx( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - struct se_portal_group *tpg; - ssize_t ret; + struct se_device *dev; + struct se_portal_group *tpg = lun->lun_tpg; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - tpg = sep->sep_tpg; - ret = snprintf(page, PAGE_SIZE, "%u\n", - tpg->se_tpg_tfo->tpg_get_inst_index(tpg)); + dev = lun->lun_se_dev; + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", + tpg->se_tpg_tfo->tpg_get_inst_index(tpg)); spin_unlock(&lun->lun_sep_lock); return ret; } @@ -990,24 +917,20 @@ static ssize_t target_stat_scsi_transport_show_attr_dev_name( { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); struct se_device *dev = lun->lun_se_dev; - struct se_port *sep; - struct se_portal_group *tpg; + struct se_portal_group *tpg = lun->lun_tpg; struct t10_wwn *wwn; - ssize_t ret; + ssize_t ret = -ENODEV; spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; + dev = lun->lun_se_dev; + if (dev) { + wwn = &dev->t10_wwn; + /* scsiTransportDevName */ + ret = snprintf(page, PAGE_SIZE, "%s+%s\n", + tpg->se_tpg_tfo->tpg_get_wwn(tpg), + (strlen(wwn->unit_serial)) ? wwn->unit_serial : + wwn->vendor); } - tpg = sep->sep_tpg; - wwn = &dev->t10_wwn; - /* scsiTransportDevName */ - ret = snprintf(page, PAGE_SIZE, "%s+%s\n", - tpg->se_tpg_tfo->tpg_get_wwn(tpg), - (strlen(wwn->unit_serial)) ? wwn->unit_serial : - wwn->vendor); spin_unlock(&lun->lun_sep_lock); return ret; } diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index fd531fcf3191..499b1399035a 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -40,6 +40,7 @@ #include #include "target_core_internal.h" +#include "target_core_alua.h" #include "target_core_pr.h" extern struct se_device *g_lun0_dev; @@ -484,32 +485,14 @@ static void core_tpg_lun_ref_release(struct percpu_ref *ref) complete(&lun->lun_ref_comp); } -static int core_tpg_setup_virtual_lun0(struct se_portal_group *se_tpg) -{ - /* Set in core_dev_setup_virtual_lun0() */ - struct se_device *dev = g_lun0_dev; - struct se_lun *lun = &se_tpg->tpg_virt_lun0; - u32 lun_access = TRANSPORT_LUNFLAGS_READ_ONLY; - int ret; - - lun->unpacked_lun = 0; - atomic_set(&lun->lun_acl_count, 0); - spin_lock_init(&lun->lun_sep_lock); - init_completion(&lun->lun_ref_comp); - - ret = core_tpg_add_lun(se_tpg, lun, lun_access, dev); - if (ret < 0) - return ret; - - return 0; -} - int core_tpg_register( const struct target_core_fabric_ops *tfo, struct se_wwn *se_wwn, struct se_portal_group *se_tpg, int proto_id) { + int ret; + INIT_HLIST_HEAD(&se_tpg->tpg_lun_hlist); se_tpg->proto_id = proto_id; se_tpg->se_tpg_tfo = tfo; @@ -523,8 +506,16 @@ int core_tpg_register( mutex_init(&se_tpg->acl_node_mutex); if (se_tpg->proto_id >= 0) { - if (core_tpg_setup_virtual_lun0(se_tpg) < 0) - return -ENOMEM; + se_tpg->tpg_virt_lun0 = core_tpg_alloc_lun(se_tpg, 0); + if (IS_ERR(se_tpg->tpg_virt_lun0)) + return PTR_ERR(se_tpg->tpg_virt_lun0); + + ret = core_tpg_add_lun(se_tpg, se_tpg->tpg_virt_lun0, + TRANSPORT_LUNFLAGS_READ_ONLY, g_lun0_dev); + if (ret < 0) { + kfree(se_tpg->tpg_virt_lun0); + return ret; + } } spin_lock_bh(&tpg_lock); @@ -575,8 +566,10 @@ int core_tpg_deregister(struct se_portal_group *se_tpg) kfree(nacl); } - if (se_tpg->proto_id >= 0) - core_tpg_remove_lun(se_tpg, &se_tpg->tpg_virt_lun0); + if (se_tpg->proto_id >= 0) { + core_tpg_remove_lun(se_tpg, se_tpg->tpg_virt_lun0); + kfree_rcu(se_tpg->tpg_virt_lun0, rcu_head); + } return 0; } @@ -607,6 +600,15 @@ struct se_lun *core_tpg_alloc_lun( atomic_set(&lun->lun_acl_count, 0); spin_lock_init(&lun->lun_sep_lock); init_completion(&lun->lun_ref_comp); + INIT_LIST_HEAD(&lun->lun_deve_list); + INIT_LIST_HEAD(&lun->lun_dev_link); + atomic_set(&lun->lun_tg_pt_secondary_offline, 0); + spin_lock_init(&lun->lun_deve_lock); + mutex_init(&lun->lun_tg_pt_md_mutex); + INIT_LIST_HEAD(&lun->lun_tg_pt_gp_link); + spin_lock_init(&lun->lun_tg_pt_gp_lock); + atomic_set(&lun->lun_active, 0); + lun->lun_tpg = tpg; return lun; } @@ -622,21 +624,40 @@ int core_tpg_add_lun( ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release, 0, GFP_KERNEL); if (ret < 0) - return ret; + goto out; - ret = core_dev_export(dev, tpg, lun); - if (ret < 0) { - percpu_ref_exit(&lun->lun_ref); - return ret; - } + ret = core_alloc_rtpi(lun, dev); + if (ret) + goto out_kill_ref; + + if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && + !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) + target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp); mutex_lock(&tpg->tpg_lun_mutex); + + spin_lock(&lun->lun_sep_lock); + lun->lun_index = dev->dev_index; + lun->lun_se_dev = dev; + spin_unlock(&lun->lun_sep_lock); + + spin_lock(&dev->se_port_lock); + rcu_assign_pointer(lun->lun_se_dev, dev); + dev->export_count++; + list_add_tail(&lun->lun_dev_link, &dev->dev_sep_list); + spin_unlock(&dev->se_port_lock); + lun->lun_access = lun_access; if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) hlist_add_head_rcu(&lun->link, &tpg->tpg_lun_hlist); mutex_unlock(&tpg->tpg_lun_mutex); return 0; + +out_kill_ref: + percpu_ref_exit(&lun->lun_ref); +out: + return ret; } void core_tpg_remove_lun( @@ -648,9 +669,19 @@ void core_tpg_remove_lun( core_clear_lun_from_tpg(lun, tpg); transport_clear_lun_ref(lun); - core_dev_unexport(lun->lun_se_dev, tpg, lun); - mutex_lock(&tpg->tpg_lun_mutex); + if (lun->lun_se_dev) { + while (atomic_read(&lun->lun_active)) + cpu_relax(); + + target_detach_tg_pt_gp(lun); + + spin_lock(&dev->se_port_lock); + list_del(&lun->lun_dev_link); + dev->export_count--; + rcu_assign_pointer(lun->lun_se_dev, NULL); + spin_unlock(&dev->se_port_lock); + } if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) hlist_del_rcu(&lun->link); mutex_unlock(&tpg->tpg_lun_mutex); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 7c0518a28186..ef6fdd8a47c4 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -60,7 +60,6 @@ struct kmem_cache *t10_pr_reg_cache; struct kmem_cache *t10_alua_lu_gp_cache; struct kmem_cache *t10_alua_lu_gp_mem_cache; struct kmem_cache *t10_alua_tg_pt_gp_cache; -struct kmem_cache *t10_alua_tg_pt_gp_mem_cache; struct kmem_cache *t10_alua_lba_map_cache; struct kmem_cache *t10_alua_lba_map_mem_cache; @@ -119,16 +118,6 @@ int init_se_kmem_caches(void) "cache failed\n"); goto out_free_lu_gp_mem_cache; } - t10_alua_tg_pt_gp_mem_cache = kmem_cache_create( - "t10_alua_tg_pt_gp_mem_cache", - sizeof(struct t10_alua_tg_pt_gp_member), - __alignof__(struct t10_alua_tg_pt_gp_member), - 0, NULL); - if (!t10_alua_tg_pt_gp_mem_cache) { - pr_err("kmem_cache_create() for t10_alua_tg_pt_gp_" - "mem_t failed\n"); - goto out_free_tg_pt_gp_cache; - } t10_alua_lba_map_cache = kmem_cache_create( "t10_alua_lba_map_cache", sizeof(struct t10_alua_lba_map), @@ -136,7 +125,7 @@ int init_se_kmem_caches(void) if (!t10_alua_lba_map_cache) { pr_err("kmem_cache_create() for t10_alua_lba_map_" "cache failed\n"); - goto out_free_tg_pt_gp_mem_cache; + goto out_free_tg_pt_gp_cache; } t10_alua_lba_map_mem_cache = kmem_cache_create( "t10_alua_lba_map_mem_cache", @@ -159,8 +148,6 @@ out_free_lba_map_mem_cache: kmem_cache_destroy(t10_alua_lba_map_mem_cache); out_free_lba_map_cache: kmem_cache_destroy(t10_alua_lba_map_cache); -out_free_tg_pt_gp_mem_cache: - kmem_cache_destroy(t10_alua_tg_pt_gp_mem_cache); out_free_tg_pt_gp_cache: kmem_cache_destroy(t10_alua_tg_pt_gp_cache); out_free_lu_gp_mem_cache: @@ -186,7 +173,6 @@ void release_se_kmem_caches(void) kmem_cache_destroy(t10_alua_lu_gp_cache); kmem_cache_destroy(t10_alua_lu_gp_mem_cache); kmem_cache_destroy(t10_alua_tg_pt_gp_cache); - kmem_cache_destroy(t10_alua_tg_pt_gp_mem_cache); kmem_cache_destroy(t10_alua_lba_map_cache); kmem_cache_destroy(t10_alua_lba_map_mem_cache); } @@ -1277,8 +1263,7 @@ target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned char *cdb) cmd->se_cmd_flags |= SCF_SUPPORTED_SAM_OPCODE; spin_lock(&cmd->se_lun->lun_sep_lock); - if (cmd->se_lun->lun_sep) - cmd->se_lun->lun_sep->sep_stats.cmd_pdus++; + cmd->se_lun->lun_stats.cmd_pdus++; spin_unlock(&cmd->se_lun->lun_sep_lock); return 0; } @@ -2076,10 +2061,7 @@ queue_rsp: switch (cmd->data_direction) { case DMA_FROM_DEVICE: spin_lock(&cmd->se_lun->lun_sep_lock); - if (cmd->se_lun->lun_sep) { - cmd->se_lun->lun_sep->sep_stats.tx_data_octets += - cmd->data_length; - } + cmd->se_lun->lun_stats.tx_data_octets += cmd->data_length; spin_unlock(&cmd->se_lun->lun_sep_lock); /* * Perform READ_STRIP of PI using software emulation when @@ -2104,20 +2086,14 @@ queue_rsp: break; case DMA_TO_DEVICE: spin_lock(&cmd->se_lun->lun_sep_lock); - if (cmd->se_lun->lun_sep) { - cmd->se_lun->lun_sep->sep_stats.rx_data_octets += - cmd->data_length; - } + cmd->se_lun->lun_stats.rx_data_octets += cmd->data_length; spin_unlock(&cmd->se_lun->lun_sep_lock); /* * Check if we need to send READ payload for BIDI-COMMAND */ if (cmd->se_cmd_flags & SCF_BIDI) { spin_lock(&cmd->se_lun->lun_sep_lock); - if (cmd->se_lun->lun_sep) { - cmd->se_lun->lun_sep->sep_stats.tx_data_octets += - cmd->data_length; - } + cmd->se_lun->lun_stats.tx_data_octets += cmd->data_length; spin_unlock(&cmd->se_lun->lun_sep_lock); ret = cmd->se_tfo->queue_data_in(cmd); if (ret == -EAGAIN || ret == -ENOMEM) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 3556a9dcee46..49111d39e795 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -348,8 +348,7 @@ struct xcopy_pt_cmd { unsigned char sense_buffer[TRANSPORT_SENSE_BUFFER]; }; -static struct se_port xcopy_pt_port; -static struct se_portal_group xcopy_pt_tpg; +struct se_portal_group xcopy_pt_tpg; static struct se_session xcopy_pt_sess; static struct se_node_acl xcopy_pt_nacl; @@ -439,17 +438,11 @@ int target_xcopy_setup_pt(void) return -ENOMEM; } - memset(&xcopy_pt_port, 0, sizeof(struct se_port)); - INIT_LIST_HEAD(&xcopy_pt_port.sep_alua_list); - INIT_LIST_HEAD(&xcopy_pt_port.sep_list); - mutex_init(&xcopy_pt_port.sep_tg_pt_md_mutex); - memset(&xcopy_pt_tpg, 0, sizeof(struct se_portal_group)); INIT_LIST_HEAD(&xcopy_pt_tpg.se_tpg_node); INIT_LIST_HEAD(&xcopy_pt_tpg.acl_node_list); INIT_LIST_HEAD(&xcopy_pt_tpg.tpg_sess_list); - xcopy_pt_port.sep_tpg = &xcopy_pt_tpg; xcopy_pt_tpg.se_tpg_tfo = &xcopy_pt_tfo; memset(&xcopy_pt_nacl, 0, sizeof(struct se_node_acl)); @@ -490,10 +483,6 @@ static void target_xcopy_setup_pt_port( */ if (remote_port) { xpt_cmd->remote_port = remote_port; - pt_cmd->se_lun->lun_sep = &xcopy_pt_port; - pr_debug("Setup emulated remote DEST xcopy_pt_port: %p to" - " cmd->se_lun->lun_sep for X-COPY data PUSH\n", - pt_cmd->se_lun->lun_sep); } else { pt_cmd->se_lun = ec_cmd->se_lun; pt_cmd->se_dev = ec_cmd->se_dev; @@ -513,10 +502,6 @@ static void target_xcopy_setup_pt_port( */ if (remote_port) { xpt_cmd->remote_port = remote_port; - pt_cmd->se_lun->lun_sep = &xcopy_pt_port; - pr_debug("Setup emulated remote SRC xcopy_pt_port: %p to" - " cmd->se_lun->lun_sep for X-COPY data PULL\n", - pt_cmd->se_lun->lun_sep); } else { pt_cmd->se_lun = ec_cmd->se_lun; pt_cmd->se_dev = ec_cmd->se_dev; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index ee7abdd6b7a2..1927dd5947a7 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -304,22 +304,13 @@ struct t10_alua_tg_pt_gp { struct se_device *tg_pt_gp_dev; struct config_group tg_pt_gp_group; struct list_head tg_pt_gp_list; - struct list_head tg_pt_gp_mem_list; - struct se_port *tg_pt_gp_alua_port; + struct list_head tg_pt_gp_lun_list; + struct se_lun *tg_pt_gp_alua_lun; struct se_node_acl *tg_pt_gp_alua_nacl; struct delayed_work tg_pt_gp_transition_work; struct completion *tg_pt_gp_transition_complete; }; -struct t10_alua_tg_pt_gp_member { - bool tg_pt_gp_assoc; - atomic_t tg_pt_gp_mem_ref_cnt; - spinlock_t tg_pt_gp_mem_lock; - struct t10_alua_tg_pt_gp *tg_pt_gp; - struct se_port *tg_pt; - struct list_head tg_pt_gp_mem_list; -}; - struct t10_vpd { unsigned char device_identifier[INQUIRY_VPD_DEVICE_IDENTIFIER_LEN]; int protocol_identifier_set; @@ -650,6 +641,7 @@ struct se_dev_entry { #define DEF_PR_REG_ACTIVE 1 unsigned long deve_flags; struct list_head alua_port_list; + struct list_head lun_link; struct list_head ua_list; struct hlist_node link; struct rcu_head rcu_head; @@ -697,7 +689,14 @@ struct se_port_stat_grps { struct config_group scsi_transport_group; }; +struct scsi_port_stats { + u32 cmd_pdus; + u64 tx_data_octets; + u64 rx_data_octets; +}; + struct se_lun { + /* RELATIVE TARGET PORT IDENTIFER */ u16 lun_rtpi; #define SE_LUN_LINK_MAGIC 0xffff7771 u32 lun_link_magic; @@ -707,12 +706,30 @@ struct se_lun { u32 lun_index; atomic_t lun_acl_count; spinlock_t lun_sep_lock; - struct se_device *lun_se_dev; - struct se_port *lun_sep; + struct se_device __rcu *lun_se_dev; + + struct list_head lun_deve_list; + spinlock_t lun_deve_lock; + + /* ALUA state */ + int lun_tg_pt_secondary_stat; + int lun_tg_pt_secondary_write_md; + atomic_t lun_tg_pt_secondary_offline; + struct mutex lun_tg_pt_md_mutex; + + /* ALUA target port group linkage */ + struct list_head lun_tg_pt_gp_link; + struct t10_alua_tg_pt_gp *lun_tg_pt_gp; + spinlock_t lun_tg_pt_gp_lock; + + atomic_t lun_active; + struct se_portal_group *lun_tpg; + struct scsi_port_stats lun_stats; struct config_group lun_group; struct se_port_stat_grps port_stat_grps; struct completion lun_ref_comp; struct percpu_ref lun_ref; + struct list_head lun_dev_link; struct hlist_node link; struct rcu_head rcu_head; }; @@ -737,7 +754,6 @@ struct se_device { #define DF_EMULATED_VPD_UNIT_SERIAL 0x00000004 #define DF_USING_UDEV_PATH 0x00000008 #define DF_USING_ALIAS 0x00000010 - u32 dev_port_count; /* Physical device queue depth */ u32 queue_depth; /* Used for SPC-2 reservations enforce of ISIDs */ @@ -754,7 +770,7 @@ struct se_device { atomic_t dev_ordered_id; atomic_t dev_ordered_sync; atomic_t dev_qf_count; - int export_count; + u32 export_count; spinlock_t delayed_cmd_lock; spinlock_t execute_task_lock; spinlock_t dev_reservation_lock; @@ -821,32 +837,6 @@ struct se_hba { struct target_backend *backend; }; -struct scsi_port_stats { - u64 cmd_pdus; - u64 tx_data_octets; - u64 rx_data_octets; -}; - -struct se_port { - /* RELATIVE TARGET PORT IDENTIFER */ - u16 sep_rtpi; - int sep_tg_pt_secondary_stat; - int sep_tg_pt_secondary_write_md; - u32 sep_index; - struct scsi_port_stats sep_stats; - /* Used for ALUA Target Port Groups membership */ - atomic_t sep_tg_pt_secondary_offline; - /* Used for PR ALL_TG_PT=1 */ - atomic_t sep_tg_pt_ref_cnt; - spinlock_t sep_alua_lock; - struct mutex sep_tg_pt_md_mutex; - struct t10_alua_tg_pt_gp_member *sep_alua_tg_pt_gp_mem; - struct se_lun *sep_lun; - struct se_portal_group *sep_tpg; - struct list_head sep_alua_list; - struct list_head sep_list; -}; - struct se_tpg_np { struct se_portal_group *tpg_np_parent; struct config_group tpg_np_group; @@ -872,7 +862,7 @@ struct se_portal_group { /* linked list for initiator ACL list */ struct list_head acl_node_list; struct hlist_head tpg_lun_hlist; - struct se_lun tpg_virt_lun0; + struct se_lun *tpg_virt_lun0; /* List of TCM sessions associated wth this TPG */ struct list_head tpg_sess_list; /* Pointer to $FABRIC_MOD dependent code */ From 4cc987eaff9144182cde88d6d132420c28d3f81b Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 19 May 2015 00:03:07 -0700 Subject: [PATCH 085/839] target: Drop lun_sep_lock for se_lun->lun_se_dev RCU usage With se_port and t10_alua_tg_pt_gp_member being absored into se_lun, there is no need for an extra lock to protect se_lun->lun_se_dev assignment. This patch also converts backend drivers to use call_rcu() release to allow any se_device readers to complete. The call_rcu() instead of kfree_rcu() is required here because se_device is embedded into the backend driver specific structure. Also, convert se_lun->lun_stats to use atomic_long_t within the target_complete_ok_work() completion callback, and add FIXME for transport_lookup_tmr_lun() with se_lun->lun_ref. Finally, update sbp_update_unit_directory() special case usage with proper rcu_dereference_raw() and configfs symlink comment. Reported-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Cc: Paul E. McKenney Cc: Chris Boot Signed-off-by: Nicholas Bellinger --- drivers/target/sbp/sbp_target.c | 7 +- drivers/target/target_core_alua.c | 12 ++- drivers/target/target_core_device.c | 47 ++++++---- drivers/target/target_core_file.c | 11 ++- drivers/target/target_core_iblock.c | 10 +- drivers/target/target_core_pscsi.c | 11 ++- drivers/target/target_core_rd.c | 10 +- drivers/target/target_core_spc.c | 2 +- drivers/target/target_core_stat.c | 123 +++++++++++++------------ drivers/target/target_core_tpg.c | 16 ++-- drivers/target/target_core_transport.c | 20 ++-- drivers/target/target_core_user.c | 11 ++- include/target/target_core_base.h | 10 +- 13 files changed, 175 insertions(+), 115 deletions(-) diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 47fb12fbaf47..28e3adf1eb85 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -1905,8 +1905,11 @@ static int sbp_update_unit_directory(struct sbp_tport *tport) hlist_for_each_entry_rcu(lun, &tport->tpg->se_tpg.tpg_lun_hlist, link) { struct se_device *dev; int type; - - dev = lun->lun_se_dev; + /* + * rcu_dereference_raw protected by se_lun->lun_group symlink + * reference to se_device->dev_group. + */ + dev = rcu_dereference_raw(lun->lun_se_dev); type = dev->transport->get_device_type(dev); /* logical_unit_number */ diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 1109c2833fe6..0a0087311cfb 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1935,7 +1935,11 @@ ssize_t core_alua_store_tg_pt_gp_info( size_t count) { struct se_portal_group *tpg = lun->lun_tpg; - struct se_device *dev = lun->lun_se_dev; + /* + * rcu_dereference_raw protected by se_lun->lun_group symlink + * reference to se_device->dev_group. + */ + struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev); struct t10_alua_tg_pt_gp *tg_pt_gp = NULL, *tg_pt_gp_new = NULL; unsigned char buf[TG_PT_GROUP_NAME_BUF]; int move = 0; @@ -2189,7 +2193,11 @@ ssize_t core_alua_store_offline_bit( const char *page, size_t count) { - struct se_device *dev = lun->lun_se_dev; + /* + * rcu_dereference_raw protected by se_lun->lun_group symlink + * reference to se_device->dev_group. + */ + struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev); unsigned long tmp; int ret; diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 8485e9a789fc..3baa6cd7fded 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -61,7 +61,6 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) struct se_lun *se_lun = NULL; struct se_session *se_sess = se_cmd->se_sess; struct se_node_acl *nacl = se_sess->se_node_acl; - struct se_device *dev; struct se_dev_entry *deve; if (unpacked_lun >= TRANSPORT_MAX_LUNS_PER_TPG) @@ -128,16 +127,21 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) percpu_ref_get(&se_lun->lun_ref); se_cmd->lun_ref_active = true; } + /* + * RCU reference protected by percpu se_lun->lun_ref taken above that + * must drop to zero (including initial reference) before this se_lun + * pointer can be kfree_rcu() by the final se_lun->lun_group put via + * target_core_fabric_configfs.c:target_fabric_port_release + */ + se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev); + atomic_long_inc(&se_cmd->se_dev->num_cmds); - /* Directly associate cmd with se_dev */ - se_cmd->se_dev = se_lun->lun_se_dev; - - dev = se_lun->lun_se_dev; - atomic_long_inc(&dev->num_cmds); if (se_cmd->data_direction == DMA_TO_DEVICE) - atomic_long_add(se_cmd->data_length, &dev->write_bytes); + atomic_long_add(se_cmd->data_length, + &se_cmd->se_dev->write_bytes); else if (se_cmd->data_direction == DMA_FROM_DEVICE) - atomic_long_add(se_cmd->data_length, &dev->read_bytes); + atomic_long_add(se_cmd->data_length, + &se_cmd->se_dev->read_bytes); return 0; } @@ -173,10 +177,11 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u32 unpacked_lun) unpacked_lun); return -ENODEV; } - - /* Directly associate cmd with se_dev */ - se_cmd->se_dev = se_lun->lun_se_dev; - se_tmr->tmr_dev = se_lun->lun_se_dev; + /* + * XXX: Add percpu se_lun->lun_ref reference count for TMR + */ + se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev); + se_tmr->tmr_dev = rcu_dereference_raw(se_lun->lun_se_dev); spin_lock_irqsave(&se_tmr->tmr_dev->se_tmr_lock, flags); list_add_tail(&se_tmr->tmr_list, &se_tmr->tmr_dev->dev_tmr_list); @@ -389,6 +394,11 @@ void core_disable_device_list_for_node( struct se_node_acl *nacl, struct se_portal_group *tpg) { + /* + * rcu_dereference_raw protected by se_lun->lun_group symlink + * reference to se_device->dev_group. + */ + struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev); /* * If the MappedLUN entry is being disabled, the entry in * lun->lun_deve_list must be removed now before clearing the @@ -426,7 +436,7 @@ void core_disable_device_list_for_node( kfree_rcu(orig, rcu_head); - core_scsi3_free_pr_reg_from_nacl(lun->lun_se_dev, nacl); + core_scsi3_free_pr_reg_from_nacl(dev, nacl); } /* core_clear_lun_from_tpg(): @@ -629,6 +639,11 @@ int core_dev_add_initiator_node_lun_acl( u32 lun_access) { struct se_node_acl *nacl = lacl->se_lun_nacl; + /* + * rcu_dereference_raw protected by se_lun->lun_group symlink + * reference to se_device->dev_group. + */ + struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev); if (!nacl) return -EINVAL; @@ -652,7 +667,7 @@ int core_dev_add_initiator_node_lun_acl( * Check to see if there are any existing persistent reservation APTPL * pre-registrations that need to be enabled for this LUN ACL.. */ - core_scsi3_check_aptpl_registration(lun->lun_se_dev, tpg, lun, nacl, + core_scsi3_check_aptpl_registration(dev, tpg, lun, nacl, lacl->mapped_lun); return 0; } @@ -746,6 +761,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->se_hba = hba; dev->transport = hba->backend->ops; dev->prot_length = sizeof(struct se_dif_v1_tuple); + dev->hba_index = hba->hba_index; INIT_LIST_HEAD(&dev->dev_list); INIT_LIST_HEAD(&dev->dev_sep_list); @@ -802,8 +818,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->dev_attrib.max_write_same_len = DA_MAX_WRITE_SAME_LEN; xcopy_lun = &dev->xcopy_lun; - xcopy_lun->lun_se_dev = dev; - spin_lock_init(&xcopy_lun->lun_sep_lock); + rcu_assign_pointer(xcopy_lun->lun_se_dev, dev); init_completion(&xcopy_lun->lun_ref_comp); INIT_LIST_HEAD(&xcopy_lun->lun_deve_list); INIT_LIST_HEAD(&xcopy_lun->lun_dev_link); diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 948b61f59a55..238e3a256d04 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -241,6 +241,14 @@ fail: return ret; } +static void fd_dev_call_rcu(struct rcu_head *p) +{ + struct se_device *dev = container_of(p, struct se_device, rcu_head); + struct fd_dev *fd_dev = FD_DEV(dev); + + kfree(fd_dev); +} + static void fd_free_device(struct se_device *dev) { struct fd_dev *fd_dev = FD_DEV(dev); @@ -249,8 +257,7 @@ static void fd_free_device(struct se_device *dev) filp_close(fd_dev->fd_file, NULL); fd_dev->fd_file = NULL; } - - kfree(fd_dev); + call_rcu(&dev->rcu_head, fd_dev_call_rcu); } static int fd_do_rw(struct se_cmd *cmd, struct file *fd, diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 6d4738252564..ae8ad2da6632 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -191,6 +191,14 @@ out: return ret; } +static void iblock_dev_call_rcu(struct rcu_head *p) +{ + struct se_device *dev = container_of(p, struct se_device, rcu_head); + struct iblock_dev *ib_dev = IBLOCK_DEV(dev); + + kfree(ib_dev); +} + static void iblock_free_device(struct se_device *dev) { struct iblock_dev *ib_dev = IBLOCK_DEV(dev); @@ -200,7 +208,7 @@ static void iblock_free_device(struct se_device *dev) if (ib_dev->ibd_bio_set != NULL) bioset_free(ib_dev->ibd_bio_set); - kfree(ib_dev); + call_rcu(&dev->rcu_head, iblock_dev_call_rcu); } static unsigned long long iblock_emulate_read_cap_with_block_size( diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index afb87a8d5668..c673980ded04 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -579,6 +579,14 @@ static int pscsi_configure_device(struct se_device *dev) return -ENODEV; } +static void pscsi_dev_call_rcu(struct rcu_head *p) +{ + struct se_device *dev = container_of(p, struct se_device, rcu_head); + struct pscsi_dev_virt *pdv = PSCSI_DEV(dev); + + kfree(pdv); +} + static void pscsi_free_device(struct se_device *dev) { struct pscsi_dev_virt *pdv = PSCSI_DEV(dev); @@ -610,8 +618,7 @@ static void pscsi_free_device(struct se_device *dev) pdv->pdv_sd = NULL; } - - kfree(pdv); + call_rcu(&dev->rcu_head, pscsi_dev_call_rcu); } static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg, diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 55dd73e7f213..e98432705f39 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -350,12 +350,20 @@ fail: return ret; } +static void rd_dev_call_rcu(struct rcu_head *p) +{ + struct se_device *dev = container_of(p, struct se_device, rcu_head); + struct rd_dev *rd_dev = RD_DEV(dev); + + kfree(rd_dev); +} + static void rd_free_device(struct se_device *dev) { struct rd_dev *rd_dev = RD_DEV(dev); rd_release_device_space(rd_dev); - kfree(rd_dev); + call_rcu(&dev->rcu_head, rd_dev_call_rcu); } static struct rd_dev_sg_table *rd_get_sg_table(struct rd_dev *rd_dev, u32 page) diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 34d8292a4432..a2b377e06cfa 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -692,7 +692,7 @@ spc_emulate_inquiry(struct se_cmd *cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } - if (dev == tpg->tpg_virt_lun0->lun_se_dev) + if (dev == rcu_access_pointer(tpg->tpg_virt_lun0->lun_se_dev)) buf[0] = 0x3f; /* Not connected */ else buf[0] = dev->transport->get_device_type(dev); diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index 8e080efb0188..79c785242375 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -545,11 +545,11 @@ static ssize_t target_stat_scsi_port_show_attr_inst( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) - ret = snprintf(page, PAGE_SIZE, "%u\n", dev->se_hba->hba_index); - spin_unlock(&lun->lun_sep_lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", dev->hba_index); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_PORT_ATTR_RO(inst); @@ -561,11 +561,11 @@ static ssize_t target_stat_scsi_port_show_attr_dev( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) ret = snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index); - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_PORT_ATTR_RO(dev); @@ -577,11 +577,11 @@ static ssize_t target_stat_scsi_port_show_attr_indx( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_rtpi); - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_PORT_ATTR_RO(indx); @@ -593,11 +593,11 @@ static ssize_t target_stat_scsi_port_show_attr_role( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) ret = snprintf(page, PAGE_SIZE, "%s%u\n", "Device", dev->dev_index); - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_PORT_ATTR_RO(role); @@ -609,13 +609,13 @@ static ssize_t target_stat_scsi_port_show_attr_busy_count( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) { /* FIXME: scsiPortBusyStatuses */ ret = snprintf(page, PAGE_SIZE, "%u\n", 0); } - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_PORT_ATTR_RO(busy_count); @@ -666,11 +666,11 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_inst( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) - ret = snprintf(page, PAGE_SIZE, "%u\n", dev->se_hba->hba_index); - spin_unlock(&lun->lun_sep_lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", dev->hba_index); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(inst); @@ -682,11 +682,11 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_dev( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) ret = snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index); - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(dev); @@ -698,11 +698,11 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_indx( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_rtpi); - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(indx); @@ -715,13 +715,13 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_name( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) ret = snprintf(page, PAGE_SIZE, "%sPort#%u\n", tpg->se_tpg_tfo->get_fabric_name(), lun->lun_rtpi); - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(name); @@ -734,13 +734,13 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_port_index( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) ret = snprintf(page, PAGE_SIZE, "%s%s%d\n", tpg->se_tpg_tfo->tpg_get_wwn(tpg), "+t+", tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(port_index); @@ -752,11 +752,12 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_in_cmds( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) - ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_stats.cmd_pdus); - spin_unlock(&lun->lun_sep_lock); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&lun->lun_stats.cmd_pdus)); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(in_cmds); @@ -768,12 +769,12 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_write_mbytes( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) ret = snprintf(page, PAGE_SIZE, "%u\n", - (u32)(lun->lun_stats.rx_data_octets >> 20)); - spin_unlock(&lun->lun_sep_lock); + (u32)(atomic_long_read(&lun->lun_stats.rx_data_octets) >> 20)); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(write_mbytes); @@ -785,12 +786,12 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_read_mbytes( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) ret = snprintf(page, PAGE_SIZE, "%u\n", - (u32)(lun->lun_stats.tx_data_octets >> 20)); - spin_unlock(&lun->lun_sep_lock); + (u32)(atomic_long_read(&lun->lun_stats.tx_data_octets) >> 20)); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(read_mbytes); @@ -802,13 +803,13 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_hs_in_cmds( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) { /* FIXME: scsiTgtPortHsInCommands */ ret = snprintf(page, PAGE_SIZE, "%u\n", 0); } - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(hs_in_cmds); @@ -865,11 +866,11 @@ static ssize_t target_stat_scsi_transport_show_attr_inst( struct se_device *dev; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) - ret = snprintf(page, PAGE_SIZE, "%u\n", dev->se_hba->hba_index); - spin_unlock(&lun->lun_sep_lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", dev->hba_index); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TRANSPORT_ATTR_RO(inst); @@ -882,14 +883,14 @@ static ssize_t target_stat_scsi_transport_show_attr_device( struct se_portal_group *tpg = lun->lun_tpg; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) { /* scsiTransportType */ ret = snprintf(page, PAGE_SIZE, "scsiTransport%s\n", tpg->se_tpg_tfo->get_fabric_name()); } - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TRANSPORT_ATTR_RO(device); @@ -902,12 +903,12 @@ static ssize_t target_stat_scsi_transport_show_attr_indx( struct se_portal_group *tpg = lun->lun_tpg; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) ret = snprintf(page, PAGE_SIZE, "%u\n", tpg->se_tpg_tfo->tpg_get_inst_index(tpg)); - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TRANSPORT_ATTR_RO(indx); @@ -916,13 +917,13 @@ static ssize_t target_stat_scsi_transport_show_attr_dev_name( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_device *dev = lun->lun_se_dev; + struct se_device *dev; struct se_portal_group *tpg = lun->lun_tpg; struct t10_wwn *wwn; ssize_t ret = -ENODEV; - spin_lock(&lun->lun_sep_lock); - dev = lun->lun_se_dev; + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); if (dev) { wwn = &dev->t10_wwn; /* scsiTransportDevName */ @@ -931,7 +932,7 @@ static ssize_t target_stat_scsi_transport_show_attr_dev_name( (strlen(wwn->unit_serial)) ? wwn->unit_serial : wwn->vendor); } - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TRANSPORT_ATTR_RO(dev_name); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 499b1399035a..5b30940ccaf2 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -102,7 +102,8 @@ void core_tpg_add_node_to_devs( if (lun_orig && lun != lun_orig) continue; - dev = lun->lun_se_dev; + dev = rcu_dereference_check(lun->lun_se_dev, + lockdep_is_held(&tpg->tpg_lun_mutex)); /* * By default in LIO-Target $FABRIC_MOD, * demo_mode_write_protect is ON, or READ_ONLY; @@ -598,7 +599,6 @@ struct se_lun *core_tpg_alloc_lun( lun->unpacked_lun = unpacked_lun; lun->lun_link_magic = SE_LUN_LINK_MAGIC; atomic_set(&lun->lun_acl_count, 0); - spin_lock_init(&lun->lun_sep_lock); init_completion(&lun->lun_ref_comp); INIT_LIST_HEAD(&lun->lun_deve_list); INIT_LIST_HEAD(&lun->lun_dev_link); @@ -636,12 +636,8 @@ int core_tpg_add_lun( mutex_lock(&tpg->tpg_lun_mutex); - spin_lock(&lun->lun_sep_lock); - lun->lun_index = dev->dev_index; - lun->lun_se_dev = dev; - spin_unlock(&lun->lun_sep_lock); - spin_lock(&dev->se_port_lock); + lun->lun_index = dev->dev_index; rcu_assign_pointer(lun->lun_se_dev, dev); dev->export_count++; list_add_tail(&lun->lun_dev_link, &dev->dev_sep_list); @@ -664,7 +660,11 @@ void core_tpg_remove_lun( struct se_portal_group *tpg, struct se_lun *lun) { - struct se_device *dev = lun->lun_se_dev; + /* + * rcu_dereference_raw protected by se_lun->lun_group symlink + * reference to se_device->dev_group. + */ + struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev); core_clear_lun_from_tpg(lun, tpg); transport_clear_lun_ref(lun); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index ef6fdd8a47c4..d8a59122fe3b 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1261,10 +1261,7 @@ target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned char *cdb) return ret; cmd->se_cmd_flags |= SCF_SUPPORTED_SAM_OPCODE; - - spin_lock(&cmd->se_lun->lun_sep_lock); - cmd->se_lun->lun_stats.cmd_pdus++; - spin_unlock(&cmd->se_lun->lun_sep_lock); + atomic_long_inc(&cmd->se_lun->lun_stats.cmd_pdus); return 0; } EXPORT_SYMBOL(target_setup_cmd_from_cdb); @@ -2060,9 +2057,8 @@ static void target_complete_ok_work(struct work_struct *work) queue_rsp: switch (cmd->data_direction) { case DMA_FROM_DEVICE: - spin_lock(&cmd->se_lun->lun_sep_lock); - cmd->se_lun->lun_stats.tx_data_octets += cmd->data_length; - spin_unlock(&cmd->se_lun->lun_sep_lock); + atomic_long_add(cmd->data_length, + &cmd->se_lun->lun_stats.tx_data_octets); /* * Perform READ_STRIP of PI using software emulation when * backend had PI enabled, if the transport will not be @@ -2085,16 +2081,14 @@ queue_rsp: goto queue_full; break; case DMA_TO_DEVICE: - spin_lock(&cmd->se_lun->lun_sep_lock); - cmd->se_lun->lun_stats.rx_data_octets += cmd->data_length; - spin_unlock(&cmd->se_lun->lun_sep_lock); + atomic_long_add(cmd->data_length, + &cmd->se_lun->lun_stats.rx_data_octets); /* * Check if we need to send READ payload for BIDI-COMMAND */ if (cmd->se_cmd_flags & SCF_BIDI) { - spin_lock(&cmd->se_lun->lun_sep_lock); - cmd->se_lun->lun_stats.tx_data_octets += cmd->data_length; - spin_unlock(&cmd->se_lun->lun_sep_lock); + atomic_long_add(cmd->data_length, + &cmd->se_lun->lun_stats.tx_data_octets); ret = cmd->se_tfo->queue_data_in(cmd); if (ret == -EAGAIN || ret == -ENOMEM) goto queue_full; diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index aebaad55e23f..949e6165ef8a 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -950,6 +950,14 @@ static int tcmu_check_pending_cmd(int id, void *p, void *data) return -EINVAL; } +static void tcmu_dev_call_rcu(struct rcu_head *p) +{ + struct se_device *dev = container_of(p, struct se_device, rcu_head); + struct tcmu_dev *udev = TCMU_DEV(dev); + + kfree(udev); +} + static void tcmu_free_device(struct se_device *dev) { struct tcmu_dev *udev = TCMU_DEV(dev); @@ -975,8 +983,7 @@ static void tcmu_free_device(struct se_device *dev) kfree(udev->uio_info.name); kfree(udev->name); } - - kfree(udev); + call_rcu(&dev->rcu_head, tcmu_dev_call_rcu); } enum { diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 1927dd5947a7..b82a989a4d3b 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -690,9 +690,9 @@ struct se_port_stat_grps { }; struct scsi_port_stats { - u32 cmd_pdus; - u64 tx_data_octets; - u64 rx_data_octets; + atomic_long_t cmd_pdus; + atomic_long_t tx_data_octets; + atomic_long_t rx_data_octets; }; struct se_lun { @@ -705,7 +705,6 @@ struct se_lun { u32 unpacked_lun; u32 lun_index; atomic_t lun_acl_count; - spinlock_t lun_sep_lock; struct se_device __rcu *lun_se_dev; struct list_head lun_deve_list; @@ -818,6 +817,9 @@ struct se_device { struct se_lun xcopy_lun; /* Protection Information */ int prot_length; + /* For se_lun->lun_se_dev RCU read-side critical access */ + u32 hba_index; + struct rcu_head rcu_head; }; struct se_hba { From 9e37d042cfcb003b885bb4c531cd6f07f62647d1 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 20 May 2015 21:21:08 -0700 Subject: [PATCH 086/839] target: Drop se_lun->lun_active for existing percpu lun_ref With se_port_t and t10_alua_tg_pt_gp_member being absored into se_lun, there is no need for an extra atomic_t based reference count for PR ALL_TG_PT=1 and ALUA access state transition. Go ahead and use the existing percpu se_lun->lun_ref instead, and convert the two special cases to percpu_ref_tryget_live() to avoid se_lun if transport_clear_lun_ref() has already been invoked to shutdown the se_lun. Cc: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 5 +++-- drivers/target/target_core_pr.c | 9 +++++---- drivers/target/target_core_tpg.c | 10 ++++++---- include/target/target_core_base.h | 1 - 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 0a0087311cfb..02d8e1a8a6a5 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -969,7 +969,8 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work) * every I_T nexus other than the I_T nexus on which the SET * TARGET PORT GROUPS command */ - atomic_inc_mb(&lun->lun_active); + if (!percpu_ref_tryget_live(&lun->lun_ref)) + continue; spin_unlock(&tg_pt_gp->tg_pt_gp_lock); spin_lock_bh(&lun->lun_deve_lock); @@ -998,7 +999,7 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work) spin_unlock_bh(&lun->lun_deve_lock); spin_lock(&tg_pt_gp->tg_pt_gp_lock); - atomic_dec_mb(&lun->lun_active); + percpu_ref_put(&lun->lun_ref); } spin_unlock(&tg_pt_gp->tg_pt_gp_lock); /* diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 60624bb6c598..94935eaa7fa7 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -704,7 +704,8 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( */ spin_lock(&dev->se_port_lock); list_for_each_entry_safe(lun_tmp, next, &dev->dev_sep_list, lun_dev_link) { - atomic_inc_mb(&lun_tmp->lun_active); + if (!percpu_ref_tryget_live(&lun_tmp->lun_ref)) + continue; spin_unlock(&dev->se_port_lock); spin_lock_bh(&lun_tmp->lun_deve_lock); @@ -751,7 +752,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( if (ret < 0) { pr_err("core_scsi3_lunacl_depend" "_item() failed\n"); - atomic_dec_mb(&lun->lun_active); + percpu_ref_put(&lun_tmp->lun_ref); kref_put(&deve_tmp->pr_kref, target_pr_kref_release); goto out; } @@ -770,7 +771,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( deve_tmp->mapped_lun, NULL, sa_res_key, all_tg_pt, aptpl); if (!pr_reg_atp) { - atomic_dec_mb(&lun_tmp->lun_active); + percpu_ref_put(&lun_tmp->lun_ref); core_scsi3_lunacl_undepend_item(deve_tmp); goto out; } @@ -782,7 +783,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( spin_unlock_bh(&lun_tmp->lun_deve_lock); spin_lock(&dev->se_port_lock); - atomic_dec_mb(&lun_tmp->lun_active); + percpu_ref_put(&lun_tmp->lun_ref); } spin_unlock(&dev->se_port_lock); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 5b30940ccaf2..b9fcf2c4898e 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -607,7 +607,6 @@ struct se_lun *core_tpg_alloc_lun( mutex_init(&lun->lun_tg_pt_md_mutex); INIT_LIST_HEAD(&lun->lun_tg_pt_gp_link); spin_lock_init(&lun->lun_tg_pt_gp_lock); - atomic_set(&lun->lun_active, 0); lun->lun_tpg = tpg; return lun; @@ -667,13 +666,16 @@ void core_tpg_remove_lun( struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev); core_clear_lun_from_tpg(lun, tpg); + /* + * Wait for any active I/O references to percpu se_lun->lun_ref to + * be released. Also, se_lun->lun_ref is now used by PR and ALUA + * logic when referencing a remote target port during ALL_TGT_PT=1 + * and generating UNIT_ATTENTIONs for ALUA access state transition. + */ transport_clear_lun_ref(lun); mutex_lock(&tpg->tpg_lun_mutex); if (lun->lun_se_dev) { - while (atomic_read(&lun->lun_active)) - cpu_relax(); - target_detach_tg_pt_gp(lun); spin_lock(&dev->se_port_lock); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index b82a989a4d3b..eefc2b0cfaa3 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -721,7 +721,6 @@ struct se_lun { struct t10_alua_tg_pt_gp *lun_tg_pt_gp; spinlock_t lun_tg_pt_gp_lock; - atomic_t lun_active; struct se_portal_group *lun_tpg; struct scsi_port_stats lun_stats; struct config_group lun_group; From cf561f0d2eb74574ad9985a2feab134267a9d298 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 24 Apr 2015 14:24:27 +0200 Subject: [PATCH 087/839] virtio: introduce virtio_is_little_endian() helper Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Acked-by: Cornelia Huck Reviewed-by: David Gibson --- include/linux/virtio_config.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 1e306f727edc..170947eb11b5 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -205,35 +205,40 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu) return 0; } +static inline bool virtio_is_little_endian(struct virtio_device *vdev) +{ + return virtio_has_feature(vdev, VIRTIO_F_VERSION_1); +} + /* Memory accessors */ static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val) { - return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio16_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val) { - return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio16(virtio_is_little_endian(vdev), val); } static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val) { - return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio32_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val) { - return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio32(virtio_is_little_endian(vdev), val); } static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val) { - return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio64_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) { - return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio64(virtio_is_little_endian(vdev), val); } /* Config space accessors. */ From 25bd55bbabd8957693c1ffc2fe1bf16cb4fdabd4 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 24 Apr 2015 14:24:38 +0200 Subject: [PATCH 088/839] tun: add tun_is_little_endian() helper Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Acked-by: Cornelia Huck Reviewed-by: David Gibson --- drivers/net/tun.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e470ae59d405..301e87c89a9f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -206,14 +206,19 @@ struct tun_struct { u32 flow_count; }; +static inline bool tun_is_little_endian(struct tun_struct *tun) +{ + return tun->flags & TUN_VNET_LE; +} + static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) { - return __virtio16_to_cpu(tun->flags & TUN_VNET_LE, val); + return __virtio16_to_cpu(tun_is_little_endian(tun), val); } static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) { - return __cpu_to_virtio16(tun->flags & TUN_VNET_LE, val); + return __cpu_to_virtio16(tun_is_little_endian(tun), val); } static inline u32 tun_hashfn(u32 rxhash) From 5b11e15f24744d2ea6210451529d2b180337965a Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 24 Apr 2015 14:24:48 +0200 Subject: [PATCH 089/839] macvtap: introduce macvtap_is_little_endian() helper Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Acked-by: Cornelia Huck Reviewed-by: David Gibson --- drivers/net/macvtap.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 8c350c5d54ad..4fdb88102082 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -49,14 +49,19 @@ struct macvtap_queue { #define MACVTAP_VNET_LE 0x80000000 +static inline bool macvtap_is_little_endian(struct macvtap_queue *q) +{ + return q->flags & MACVTAP_VNET_LE; +} + static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val) { - return __virtio16_to_cpu(q->flags & MACVTAP_VNET_LE, val); + return __virtio16_to_cpu(macvtap_is_little_endian(q), val); } static inline __virtio16 cpu_to_macvtap16(struct macvtap_queue *q, u16 val) { - return __cpu_to_virtio16(q->flags & MACVTAP_VNET_LE, val); + return __cpu_to_virtio16(macvtap_is_little_endian(q), val); } static struct proto macvtap_proto = { From 5da7b160b36a42f161980c5e20d3960d6d076fb8 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 24 Apr 2015 14:24:58 +0200 Subject: [PATCH 090/839] vringh: introduce vringh_is_little_endian() helper Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Acked-by: Cornelia Huck Reviewed-by: David Gibson --- include/linux/vringh.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/include/linux/vringh.h b/include/linux/vringh.h index a3fa537e717a..3ed62efc2bc5 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -226,33 +226,38 @@ static inline void vringh_notify(struct vringh *vrh) vrh->notify(vrh); } +static inline bool vringh_is_little_endian(const struct vringh *vrh) +{ + return vrh->little_endian; +} + static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val) { - return __virtio16_to_cpu(vrh->little_endian, val); + return __virtio16_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio16 cpu_to_vringh16(const struct vringh *vrh, u16 val) { - return __cpu_to_virtio16(vrh->little_endian, val); + return __cpu_to_virtio16(vringh_is_little_endian(vrh), val); } static inline u32 vringh32_to_cpu(const struct vringh *vrh, __virtio32 val) { - return __virtio32_to_cpu(vrh->little_endian, val); + return __virtio32_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio32 cpu_to_vringh32(const struct vringh *vrh, u32 val) { - return __cpu_to_virtio32(vrh->little_endian, val); + return __cpu_to_virtio32(vringh_is_little_endian(vrh), val); } static inline u64 vringh64_to_cpu(const struct vringh *vrh, __virtio64 val) { - return __virtio64_to_cpu(vrh->little_endian, val); + return __virtio64_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val) { - return __cpu_to_virtio64(vrh->little_endian, val); + return __cpu_to_virtio64(vringh_is_little_endian(vrh), val); } #endif /* _LINUX_VRINGH_H */ From ab27c07f607253f928cbc8c64d9c0d273df09e6b Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 24 Apr 2015 14:25:12 +0200 Subject: [PATCH 091/839] vhost: introduce vhost_is_little_endian() helper Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Acked-by: Cornelia Huck Reviewed-by: David Gibson --- drivers/vhost/vhost.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 8c1c792900ba..6a499603d856 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -173,34 +173,39 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) return vq->acked_features & (1ULL << bit); } +static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq) +{ + return vhost_has_feature(vq, VIRTIO_F_VERSION_1); +} + /* Memory accessors */ static inline u16 vhost16_to_cpu(struct vhost_virtqueue *vq, __virtio16 val) { - return __virtio16_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __virtio16_to_cpu(vhost_is_little_endian(vq), val); } static inline __virtio16 cpu_to_vhost16(struct vhost_virtqueue *vq, u16 val) { - return __cpu_to_virtio16(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio16(vhost_is_little_endian(vq), val); } static inline u32 vhost32_to_cpu(struct vhost_virtqueue *vq, __virtio32 val) { - return __virtio32_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __virtio32_to_cpu(vhost_is_little_endian(vq), val); } static inline __virtio32 cpu_to_vhost32(struct vhost_virtqueue *vq, u32 val) { - return __cpu_to_virtio32(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio32(vhost_is_little_endian(vq), val); } static inline u64 vhost64_to_cpu(struct vhost_virtqueue *vq, __virtio64 val) { - return __virtio64_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __virtio64_to_cpu(vhost_is_little_endian(vq), val); } static inline __virtio64 cpu_to_vhost64(struct vhost_virtqueue *vq, u64 val) { - return __cpu_to_virtio64(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio64(vhost_is_little_endian(vq), val); } #endif From 7d82410950aa74adccf035c332e409af2bb93e92 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 24 Apr 2015 14:26:24 +0200 Subject: [PATCH 092/839] virtio: add explicit big-endian support to memory accessors The current memory accessors logic is: - little endian if little_endian - native endian (i.e. no byteswap) if !little_endian If we want to fully support cross-endian vhost, we also need to be able to convert to big endian. Instead of changing the little_endian argument to some 3-value enum, this patch changes the logic to: - little endian if little_endian - big endian if !little_endian The native endian case is handled by all users with a trivial helper. This patch doesn't change any functionality, nor it does add overhead. Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Reviewed-by: David Gibson --- drivers/net/macvtap.c | 3 ++- drivers/net/tun.c | 3 ++- drivers/vhost/vhost.h | 3 ++- include/linux/virtio_byteorder.h | 24 ++++++++++++++---------- include/linux/virtio_config.h | 3 ++- include/linux/vringh.h | 3 ++- 6 files changed, 24 insertions(+), 15 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 4fdb88102082..072ccbaf7c45 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -51,7 +51,8 @@ struct macvtap_queue { static inline bool macvtap_is_little_endian(struct macvtap_queue *q) { - return q->flags & MACVTAP_VNET_LE; + return q->flags & MACVTAP_VNET_LE || + virtio_legacy_is_little_endian(); } static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 301e87c89a9f..b210139bef8f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -208,7 +208,8 @@ struct tun_struct { static inline bool tun_is_little_endian(struct tun_struct *tun) { - return tun->flags & TUN_VNET_LE; + return tun->flags & TUN_VNET_LE || + virtio_legacy_is_little_endian(); } static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 6a499603d856..a4fa33a79bf2 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -175,7 +175,8 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq) { - return vhost_has_feature(vq, VIRTIO_F_VERSION_1); + return vhost_has_feature(vq, VIRTIO_F_VERSION_1) || + virtio_legacy_is_little_endian(); } /* Memory accessors */ diff --git a/include/linux/virtio_byteorder.h b/include/linux/virtio_byteorder.h index 51865d05b267..ce63a2c3a612 100644 --- a/include/linux/virtio_byteorder.h +++ b/include/linux/virtio_byteorder.h @@ -3,17 +3,21 @@ #include #include -/* - * Low-level memory accessors for handling virtio in modern little endian and in - * compatibility native endian format. - */ +static inline bool virtio_legacy_is_little_endian(void) +{ +#ifdef __LITTLE_ENDIAN + return true; +#else + return false; +#endif +} static inline u16 __virtio16_to_cpu(bool little_endian, __virtio16 val) { if (little_endian) return le16_to_cpu((__force __le16)val); else - return (__force u16)val; + return be16_to_cpu((__force __be16)val); } static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val) @@ -21,7 +25,7 @@ static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val) if (little_endian) return (__force __virtio16)cpu_to_le16(val); else - return (__force __virtio16)val; + return (__force __virtio16)cpu_to_be16(val); } static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val) @@ -29,7 +33,7 @@ static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val) if (little_endian) return le32_to_cpu((__force __le32)val); else - return (__force u32)val; + return be32_to_cpu((__force __be32)val); } static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val) @@ -37,7 +41,7 @@ static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val) if (little_endian) return (__force __virtio32)cpu_to_le32(val); else - return (__force __virtio32)val; + return (__force __virtio32)cpu_to_be32(val); } static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val) @@ -45,7 +49,7 @@ static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val) if (little_endian) return le64_to_cpu((__force __le64)val); else - return (__force u64)val; + return be64_to_cpu((__force __be64)val); } static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val) @@ -53,7 +57,7 @@ static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val) if (little_endian) return (__force __virtio64)cpu_to_le64(val); else - return (__force __virtio64)val; + return (__force __virtio64)cpu_to_be64(val); } #endif /* _LINUX_VIRTIO_BYTEORDER */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 170947eb11b5..e5ce8ab0b8b0 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -207,7 +207,8 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu) static inline bool virtio_is_little_endian(struct virtio_device *vdev) { - return virtio_has_feature(vdev, VIRTIO_F_VERSION_1); + return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) || + virtio_legacy_is_little_endian(); } /* Memory accessors */ diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 3ed62efc2bc5..bc6c28d04263 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -228,7 +228,8 @@ static inline void vringh_notify(struct vringh *vrh) static inline bool vringh_is_little_endian(const struct vringh *vrh) { - return vrh->little_endian; + return vrh->little_endian || + virtio_legacy_is_little_endian(); } static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val) From 2751c9882b947292fcfb084c4f604e01724af804 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 24 Apr 2015 14:27:24 +0200 Subject: [PATCH 093/839] vhost: cross-endian support for legacy devices This patch brings cross-endian support to vhost when used to implement legacy virtio devices. Since it is a relatively rare situation, the feature availability is controlled by a kernel config option (not set by default). The vq->is_le boolean field is added to cache the endianness to be used for ring accesses. It defaults to native endian, as expected by legacy virtio devices. When the ring gets active, we force little endian if the device is modern. When the ring is deactivated, we revert to the native endian default. If cross-endian was compiled in, a vq->user_be boolean field is added so that userspace may request a specific endianness. This field is used to override the default when activating the ring of a legacy device. It has no effect on modern devices. Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Reviewed-by: David Gibson --- drivers/vhost/Kconfig | 15 +++++++ drivers/vhost/vhost.c | 85 +++++++++++++++++++++++++++++++++++++- drivers/vhost/vhost.h | 11 ++++- include/uapi/linux/vhost.h | 14 +++++++ 4 files changed, 122 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 017a1e8a8f6f..533eaf04f12f 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -32,3 +32,18 @@ config VHOST ---help--- This option is selected by any driver which needs to access the core of vhost. + +config VHOST_CROSS_ENDIAN_LEGACY + bool "Cross-endian support for vhost" + default n + ---help--- + This option allows vhost to support guests with a different byte + ordering from host while using legacy virtio. + + Userspace programs can control the feature using the + VHOST_SET_VRING_ENDIAN and VHOST_GET_VRING_ENDIAN ioctls. + + This is only useful on a few platforms (ppc64 and arm64). Since it + adds some overhead, it is disabled by default. + + If unsure, say "N". diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 2ee28266fd07..9e8e004bb1c3 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -36,6 +36,77 @@ enum { #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) +#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY +static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) +{ + vq->user_be = !virtio_legacy_is_little_endian(); +} + +static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) +{ + struct vhost_vring_state s; + + if (vq->private_data) + return -EBUSY; + + if (copy_from_user(&s, argp, sizeof(s))) + return -EFAULT; + + if (s.num != VHOST_VRING_LITTLE_ENDIAN && + s.num != VHOST_VRING_BIG_ENDIAN) + return -EINVAL; + + vq->user_be = s.num; + + return 0; +} + +static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, + int __user *argp) +{ + struct vhost_vring_state s = { + .index = idx, + .num = vq->user_be + }; + + if (copy_to_user(argp, &s, sizeof(s))) + return -EFAULT; + + return 0; +} + +static void vhost_init_is_le(struct vhost_virtqueue *vq) +{ + /* Note for legacy virtio: user_be is initialized at reset time + * according to the host endianness. If userspace does not set an + * explicit endianness, the default behavior is native endian, as + * expected by legacy virtio. + */ + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be; +} +#else +static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) +{ +} + +static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) +{ + return -ENOIOCTLCMD; +} + +static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, + int __user *argp) +{ + return -ENOIOCTLCMD; +} + +static void vhost_init_is_le(struct vhost_virtqueue *vq) +{ + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) + vq->is_le = true; +} +#endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ + static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { @@ -199,6 +270,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call = NULL; vq->log_ctx = NULL; vq->memory = NULL; + vq->is_le = virtio_legacy_is_little_endian(); + vhost_vq_reset_user_be(vq); } static int vhost_worker(void *data) @@ -806,6 +879,12 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) } else filep = eventfp; break; + case VHOST_SET_VRING_ENDIAN: + r = vhost_set_vring_endian(vq, argp); + break; + case VHOST_GET_VRING_ENDIAN: + r = vhost_get_vring_endian(vq, idx, argp); + break; default: r = -ENOIOCTLCMD; } @@ -1044,8 +1123,12 @@ int vhost_init_used(struct vhost_virtqueue *vq) { __virtio16 last_used_idx; int r; - if (!vq->private_data) + if (!vq->private_data) { + vq->is_le = virtio_legacy_is_little_endian(); return 0; + } + + vhost_init_is_le(vq); r = vhost_update_used_flags(vq); if (r) diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index a4fa33a79bf2..ce6f6da4b09f 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -106,6 +106,14 @@ struct vhost_virtqueue { /* Log write descriptors */ void __user *log_base; struct vhost_log *log; + + /* Ring endianness. Defaults to legacy native endianness. + * Set to true when starting a modern virtio device. */ + bool is_le; +#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY + /* Ring endianness requested by userspace for cross-endian support. */ + bool user_be; +#endif }; struct vhost_dev { @@ -175,8 +183,7 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq) { - return vhost_has_feature(vq, VIRTIO_F_VERSION_1) || - virtio_legacy_is_little_endian(); + return vq->is_le; } /* Memory accessors */ diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index bb6a5b4cb3c5..ab3731917bac 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -103,6 +103,20 @@ struct vhost_memory { /* Get accessor: reads index, writes value in num */ #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +/* Set the vring byte order in num. Valid values are VHOST_VRING_LITTLE_ENDIAN + * or VHOST_VRING_BIG_ENDIAN (other values return -EINVAL). + * The byte order cannot be changed while the device is active: trying to do so + * returns -EBUSY. + * This is a legacy only API that is simply ignored when VIRTIO_F_VERSION_1 is + * set. + * Not all kernel configurations support this ioctl, but all configurations that + * support SET also support GET. + */ +#define VHOST_VRING_LITTLE_ENDIAN 0 +#define VHOST_VRING_BIG_ENDIAN 1 +#define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) +#define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) + /* The following ioctls use eventfd file descriptors to signal and poll * for events. */ From 8b8e658b16336f0f50aba733f51db636ef121f50 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 24 Apr 2015 14:50:36 +0200 Subject: [PATCH 094/839] macvtap/tun: cross-endian support for little-endian hosts The VNET_LE flag was introduced to fix accesses to virtio 1.0 headers that are always little-endian. It can also be used to handle the special case of a legacy little-endian device implemented by a big-endian host. Let's add a flag and ioctls for big-endian devices as well. If both flags are set, little-endian wins. Since this is isn't a common usecase, the feature is controlled by a kernel config option (not set by default). Both macvtap and tun are covered by this patch since they share the same API with userland. Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Reviewed-by: David Gibson --- drivers/net/Kconfig | 14 +++++++++ drivers/net/macvtap.c | 57 ++++++++++++++++++++++++++++++++++- drivers/net/tun.c | 59 ++++++++++++++++++++++++++++++++++++- include/uapi/linux/if_tun.h | 6 ++++ 4 files changed, 134 insertions(+), 2 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index df51d6025a90..71ac0ece2d75 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -244,6 +244,20 @@ config TUN If you don't know what to use this for, you don't need it. +config TUN_VNET_CROSS_LE + bool "Support for cross-endian vnet headers on little-endian kernels" + default n + ---help--- + This option allows TUN/TAP and MACVTAP device drivers in a + little-endian kernel to parse vnet headers that come from a + big-endian legacy virtio device. + + Userspace programs can control the feature using the TUNSETVNETBE + and TUNGETVNETBE ioctls. + + Unless you have a little-endian system hosting a big-endian virtual + machine with a legacy virtio NIC, you should say N. + config VETH tristate "Virtual ethernet pair device" ---help--- diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 072ccbaf7c45..928f3f4db629 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -48,11 +48,60 @@ struct macvtap_queue { #define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) #define MACVTAP_VNET_LE 0x80000000 +#define MACVTAP_VNET_BE 0x40000000 + +#ifdef CONFIG_TUN_VNET_CROSS_LE +static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +{ + return q->flags & MACVTAP_VNET_BE ? false : + virtio_legacy_is_little_endian(); +} + +static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp) +{ + int s = !!(q->flags & MACVTAP_VNET_BE); + + if (put_user(s, sp)) + return -EFAULT; + + return 0; +} + +static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *sp) +{ + int s; + + if (get_user(s, sp)) + return -EFAULT; + + if (s) + q->flags |= MACVTAP_VNET_BE; + else + q->flags &= ~MACVTAP_VNET_BE; + + return 0; +} +#else +static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +{ + return virtio_legacy_is_little_endian(); +} + +static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *argp) +{ + return -EINVAL; +} + +static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *argp) +{ + return -EINVAL; +} +#endif /* CONFIG_TUN_VNET_CROSS_LE */ static inline bool macvtap_is_little_endian(struct macvtap_queue *q) { return q->flags & MACVTAP_VNET_LE || - virtio_legacy_is_little_endian(); + macvtap_legacy_is_little_endian(q); } static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val) @@ -1096,6 +1145,12 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, q->flags &= ~MACVTAP_VNET_LE; return 0; + case TUNGETVNETBE: + return macvtap_get_vnet_be(q, sp); + + case TUNSETVNETBE: + return macvtap_set_vnet_be(q, sp); + case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b210139bef8f..cb376b2d548a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -111,6 +111,7 @@ do { \ #define TUN_FASYNC IFF_ATTACH_QUEUE /* High bits in flags field are unused. */ #define TUN_VNET_LE 0x80000000 +#define TUN_VNET_BE 0x40000000 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ IFF_MULTI_QUEUE) @@ -206,10 +207,58 @@ struct tun_struct { u32 flow_count; }; +#ifdef CONFIG_TUN_VNET_CROSS_LE +static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) +{ + return tun->flags & TUN_VNET_BE ? false : + virtio_legacy_is_little_endian(); +} + +static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) +{ + int be = !!(tun->flags & TUN_VNET_BE); + + if (put_user(be, argp)) + return -EFAULT; + + return 0; +} + +static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) +{ + int be; + + if (get_user(be, argp)) + return -EFAULT; + + if (be) + tun->flags |= TUN_VNET_BE; + else + tun->flags &= ~TUN_VNET_BE; + + return 0; +} +#else +static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) +{ + return virtio_legacy_is_little_endian(); +} + +static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) +{ + return -EINVAL; +} + +static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) +{ + return -EINVAL; +} +#endif /* CONFIG_TUN_VNET_CROSS_LE */ + static inline bool tun_is_little_endian(struct tun_struct *tun) { return tun->flags & TUN_VNET_LE || - virtio_legacy_is_little_endian(); + tun_legacy_is_little_endian(tun); } static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) @@ -2062,6 +2111,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, tun->flags &= ~TUN_VNET_LE; break; + case TUNGETVNETBE: + ret = tun_get_vnet_be(tun, argp); + break; + + case TUNSETVNETBE: + ret = tun_set_vnet_be(tun, argp); + break; + case TUNATTACHFILTER: /* Can be set only for TAPs */ ret = -EINVAL; diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 50ae24335444..3cb5e1d85ddd 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -50,6 +50,12 @@ #define TUNGETFILTER _IOR('T', 219, struct sock_fprog) #define TUNSETVNETLE _IOW('T', 220, int) #define TUNGETVNETLE _IOR('T', 221, int) +/* The TUNSETVNETBE and TUNGETVNETBE ioctls are for cross-endian support on + * little-endian hosts. Not all kernel configurations support them, but all + * configurations that support SET also support GET. + */ +#define TUNSETVNETBE _IOW('T', 222, int) +#define TUNGETVNETBE _IOR('T', 223, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 From e8d975e73e5fa05f983fbf2723120edcf68e0b38 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 15 May 2015 11:45:31 -0400 Subject: [PATCH 095/839] fixing infinite OPEN loop in 4.0 stateid recovery Problem: When an operation like WRITE receives a BAD_STATEID, even though recovery code clears the RECLAIM_NOGRACE recovery flag before recovering the open state, because of clearing delegation state for the associated inode, nfs_inode_find_state_and_recover() gets called and it makes the same state with RECLAIM_NOGRACE flag again. As a results, when we restart looking over the open states, we end up in the infinite loop instead of breaking out in the next test of state flags. Solution: unset the RECLAIM_NOGRACE set because of calling of nfs_inode_find_state_and_recover() after returning from calling recover_open() function. Signed-off-by: Olga Kornievskaia Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2782cfca2265..ddef1dc80cf7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1482,6 +1482,8 @@ restart: spin_unlock(&state->state_lock); } nfs4_put_open_state(state); + clear_bit(NFS4CLNT_RECLAIM_NOGRACE, + &state->flags); spin_lock(&sp->so_lock); goto restart; } From 28a7eedd1109c9277390f44aa11de76b673996cd Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 1 Jun 2015 15:00:44 -0700 Subject: [PATCH 096/839] memory: omap-gpmc: Fix parsing of devices We currently artificially limit the parsing of GPMC connected devices based on the device name. Let's stop doing that, it's confusing as adding devices to .dts files with using normal names like fpga and usb will currently cause them to not probe. Cc: Roger Quadros Reported-by: Brian Hutchinson Signed-off-by: Tony Lindgren --- drivers/memory/omap-gpmc.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index c94ea0d68746..0e524a1de56d 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -2074,14 +2074,8 @@ static int gpmc_probe_dt(struct platform_device *pdev) ret = gpmc_probe_nand_child(pdev, child); else if (of_node_cmp(child->name, "onenand") == 0) ret = gpmc_probe_onenand_child(pdev, child); - else if (of_node_cmp(child->name, "ethernet") == 0 || - of_node_cmp(child->name, "nor") == 0 || - of_node_cmp(child->name, "uart") == 0) + else ret = gpmc_probe_generic_child(pdev, child); - - if (WARN(ret < 0, "%s: probing gpmc child %s failed\n", - __func__, child->full_name)) - of_node_put(child); } return 0; From e3abe2556b2a689b28926cd1581f0b97e9d2afa4 Mon Sep 17 00:00:00 2001 From: Nicholas Krause Date: Thu, 28 May 2015 11:00:05 -0400 Subject: [PATCH 097/839] ARM: OMAP2+: Remove unnessary return statement from the void function, omap2_show_dma_caps This removes the no longer required return statement at the end of the void function, omap2_show_dma_cap due to no need for a return statement due to this function always running successfully. Signed-off-by: Nicholas Krause Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/dma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/mach-omap2/dma.c b/arch/arm/mach-omap2/dma.c index e1a56d87599e..1ed4be184a29 100644 --- a/arch/arm/mach-omap2/dma.c +++ b/arch/arm/mach-omap2/dma.c @@ -117,7 +117,6 @@ static void omap2_show_dma_caps(void) u8 revision = dma_read(REVISION, 0) & 0xff; printk(KERN_INFO "OMAP DMA hardware revision %d.%d\n", revision >> 4, revision & 0xf); - return; } static unsigned configure_dma_errata(void) From 5cae02f42793130e1387f4ec09c4d07056ce9fa5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Jun 2015 10:30:11 -0400 Subject: [PATCH 098/839] NFSv4: Always drain the slot table before re-establishing the lease While the NFSv4.1 code has always drained the slot tables in order to stop non-recovery related RPC calls when doing lease recovery, the NFSv4 code did not. The reason for the difference in behaviour is that NFSv4 does not have session state, and so RPC calls can in theory proceed while recovery is happening. In practice, however, anything I/O or state related needs to wait until recovery is over. This patch changes the behaviour of NFSv4 to match that of NFSv4.1 so that we can simplify the state recovery code by assuming that we do not have to deal with races between recovery and ordinary I/O. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ddef1dc80cf7..605840dc89cf 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -309,7 +309,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) goto do_confirm; - nfs4_begin_drain_session(clp); status = nfs4_proc_exchange_id(clp, cred); if (status != 0) goto out; @@ -1832,6 +1831,7 @@ static int nfs4_establish_lease(struct nfs_client *clp) clp->cl_mvops->reboot_recovery_ops; int status; + nfs4_begin_drain_session(clp); cred = nfs4_get_clid_cred(clp); if (cred == NULL) return -ENOENT; From 7ef5ca4fe41e6a51d9bdcbbf7c66f203675e8500 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 8 May 2015 13:10:40 +1000 Subject: [PATCH 099/839] NFS: report more appropriate block size for directories. In glibc 2.21 (and several previous), a call to opendir() will result in a 32K (BUFSIZ*4) buffer being allocated and passed to getdents. However a call to fdopendir() results in an 'fstat' request to determine block size and a matching buffer allocated for subsequent use with getdents. This will typically be 1M. The first getdents call on an NFS directory will always use READDIR_PLUS (or NFSv4 equivalent) if available. Subsequent getdents calls only use this more expensive version if some 'stat' requests are made between the getdents calls. For this reason it is good to keep at least that first getdents call relatively short. When fdopendir() and readdir() is used on a large directory, it takes approximately 32 times as long to complete as using "opendir". Current versions of 'find' use fdopendir() and demonstrate this slowness. 'stat' on a directory currently returns the 'wsize'. This number has no meaning on directories. Actual READDIR requests are limited to ->dtsize, which itself is capped at 4 pages, coincidently the same as BUFSIZ*4. So this is a meaningful number to use as the blocksize on directories, and has the effect of making 'find' on large directories go a lot faster. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f734562c6d24..14b4709b9242 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -678,6 +678,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) if (!err) { generic_fillattr(inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); + if (S_ISDIR(inode->i_mode)) + stat->blksize = NFS_SERVER(inode)->dtsize; } out: trace_nfs_getattr_exit(inode, err); From d683cc49daf7c5afca8cd9654aaa1bf63cdf2ad9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:53:52 -0400 Subject: [PATCH 100/839] NFS: Fix size of NFSACL SETACL operations When encoding the NFSACL SETACL operation, reserve just the estimated size of the ACL rather than a fixed maximum. This eliminates needless zero padding on the wire that the server ignores. Fixes: ee5dc7732bd5 ('NFS: Fix "kernel BUG at fs/nfs/nfs3xdr.c:1338!"') Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs3xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 53852a4bd88b..9b04c2e6fffc 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1342,7 +1342,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, if (args->npages != 0) xdr_write_pages(xdr, args->pages, 0, args->len); else - xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE); + xdr_reserve_space(xdr, args->len); error = nfsacl_encode(xdr->buf, base, args->inode, (args->mask & NFS_ACL) ? From 9300fdba250cbb71574eb3c6af2540219d926717 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 6 May 2015 09:32:03 +0100 Subject: [PATCH 101/839] SUNRPC: drop stale doc comments in xprtsock.c Several functions have outdated arguments listed in the doc comments. Drop documentation for arguments that no longer exist. Signed-off-by: Stefan Hajnoczi Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 66891e32c5e3..b142feef0cf4 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -901,7 +901,6 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) /** * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets * @sk: socket with data to read - * @len: how much data to read * * Currently this assumes we can read the whole reply in a single gulp. */ @@ -965,7 +964,6 @@ static void xs_local_data_ready(struct sock *sk) /** * xs_udp_data_ready - "data ready" callback for UDP sockets * @sk: socket with data to read - * @len: how much data to read * */ static void xs_udp_data_ready(struct sock *sk) @@ -1389,7 +1387,6 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns /** * xs_tcp_data_ready - "data ready" callback for TCP sockets * @sk: socket with data to read - * @bytes: how much data to read * */ static void xs_tcp_data_ready(struct sock *sk) @@ -1886,9 +1883,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, /** * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint - * @xprt: RPC transport to connect * @transport: socket transport to connect - * @create_sock: function to create a socket of the correct type */ static int xs_local_setup_socket(struct sock_xprt *transport) { @@ -2125,9 +2120,6 @@ out: /** * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint - * @xprt: RPC transport to connect - * @transport: socket transport to connect - * @create_sock: function to create a socket of the correct type * * Invoked by a work queue tasklet. */ From 13985b1f77d09623f4114ce2d57e69a027ed9a09 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 28 May 2015 23:02:18 +0200 Subject: [PATCH 102/839] NFS: drop unneeded goto Delete jump to a label on the next line, when that label is not used elsewhere. A simplified version of the semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r@ identifier l; @@ -if (...) goto l; -l: // Also drop the unnecessary ret variable. Signed-off-by: Julia Lawall Signed-off-by: Trond Myklebust --- fs/nfs/nfs4idmap.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 2e1737c40a29..535dfc69c628 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -494,12 +494,7 @@ nfs_idmap_delete(struct nfs_client *clp) int nfs_idmap_init(void) { - int ret; - ret = nfs_idmap_init_keyring(); - if (ret != 0) - goto out; -out: - return ret; + return nfs_idmap_init_keyring(); } void nfs_idmap_quit(void) From 88de6af24f2b48b06c514d3c3d0a8f22fafe30bd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Jun 2015 15:10:25 -0400 Subject: [PATCH 103/839] SUNRPC: Fix a memory leak in the backchannel code req->rq_private_buf isn't initialised when xprt_setup_backchannel calls xprt_free_allocation. Fixes: fb7a0b9addbdb ("nfs41: New backchannel helper routines") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/backchannel_rqst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 9dd0ea8db463..28504dfd3dad 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -60,7 +60,7 @@ static void xprt_free_allocation(struct rpc_rqst *req) dprintk("RPC: free allocations for req= %p\n", req); WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); - xbufp = &req->rq_private_buf; + xbufp = &req->rq_rcv_buf; free_page((unsigned long)xbufp->head[0].iov_base); xbufp = &req->rq_snd_buf; free_page((unsigned long)xbufp->head[0].iov_base); From 1193d58f75faec6ccdaf24cbd3471f5460a0a906 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Jun 2015 11:53:21 -0400 Subject: [PATCH 104/839] SUNRPC: Backchannel handle socket nospace If the socket was busy due to a socket nospace error, then we should retry the send. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e6ce1517367f..fa70f663eb92 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1951,24 +1951,22 @@ call_bc_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - if (!xprt_prepare_transmit(task)) { - /* - * Could not reserve the transport. Try again after the - * transport is released. - */ - task->tk_status = 0; - task->tk_action = call_bc_transmit; - return; - } + if (!xprt_prepare_transmit(task)) + goto out_retry; - task->tk_action = rpc_exit_task; if (task->tk_status < 0) { printk(KERN_NOTICE "RPC: Could not send backchannel reply " "error: %d\n", task->tk_status); - return; + goto out_done; } + if (req->rq_connect_cookie != req->rq_xprt->connect_cookie) + req->rq_bytes_sent = 0; xprt_transmit(task); + + if (task->tk_status == -EAGAIN) + goto out_nospace; + xprt_end_transmit(task); dprint_status(task); switch (task->tk_status) { @@ -2002,6 +2000,13 @@ call_bc_transmit(struct rpc_task *task) break; } rpc_wake_up_queued_task(&req->rq_xprt->pending, task); +out_done: + task->tk_action = rpc_exit_task; + return; +out_nospace: + req->rq_connect_cookie = req->rq_xprt->connect_cookie; +out_retry: + task->tk_status = 0; } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ From 632dda833e28fe43049a01b4bc53e409176e7843 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 May 2015 14:04:50 -0400 Subject: [PATCH 105/839] SUNRPC: Clean up bc_send() Clean up: Merge bc_send() into bc_svc_process(). Note: even thought this touches svc.c, it is a client-side change. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/bc_xprt.h | 1 - net/sunrpc/Makefile | 2 +- net/sunrpc/bc_svc.c | 63 ---------------------------------- net/sunrpc/svc.c | 33 +++++++++++++----- 4 files changed, 26 insertions(+), 73 deletions(-) delete mode 100644 net/sunrpc/bc_svc.c diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 2ca67b55e0fe..8df43c9f11dc 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -37,7 +37,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); -int bc_send(struct rpc_rqst *req); /* * Determine if a shared backchannel is in use diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 15e6f6c23c5d..1b8e68d0e690 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -15,6 +15,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o -sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o +sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c deleted file mode 100644 index 15c7a8a1c24f..000000000000 --- a/net/sunrpc/bc_svc.c +++ /dev/null @@ -1,63 +0,0 @@ -/****************************************************************************** - -(c) 2007 Network Appliance, Inc. All Rights Reserved. -(c) 2009 NetApp. All Rights Reserved. - -NetApp provides this source code under the GPL v2 License. -The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -******************************************************************************/ - -/* - * The NFSv4.1 callback service helper routines. - * They implement the transport level processing required to send the - * reply over an existing open connection previously established by the client. - */ - -#include - -#include -#include -#include - -#define RPCDBG_FACILITY RPCDBG_SVCDSP - -/* Empty callback ops */ -static const struct rpc_call_ops nfs41_callback_ops = { -}; - - -/* - * Send the callback reply - */ -int bc_send(struct rpc_rqst *req) -{ - struct rpc_task *task; - int ret; - - dprintk("RPC: bc_send req= %p\n", req); - task = rpc_run_bc_task(req, &nfs41_callback_ops); - if (IS_ERR(task)) - ret = PTR_ERR(task); - else { - WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); - ret = task->tk_status; - rpc_put_task(task); - } - dprintk("RPC: bc_send ret= %d\n", ret); - return ret; -} - diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 78974e4d9ad2..e144902d382e 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1350,6 +1350,11 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; + static const struct rpc_call_ops reply_ops = { }; + struct rpc_task *task; + int error; + + dprintk("svc: %s(%p)\n", __func__, req); /* Build the svc_rqst used by the common processing routine */ rqstp->rq_xprt = serv->sv_bc_xprt; @@ -1372,21 +1377,33 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, /* * Skip the next two words because they've already been - * processed in the trasport + * processed in the transport */ svc_getu32(argv); /* XID */ svc_getnl(argv); /* CALLDIR */ - /* Returns 1 for send, 0 for drop */ - if (svc_process_common(rqstp, argv, resv)) { - memcpy(&req->rq_snd_buf, &rqstp->rq_res, - sizeof(req->rq_snd_buf)); - return bc_send(req); - } else { - /* drop request */ + /* Parse and execute the bc call */ + if (!svc_process_common(rqstp, argv, resv)) { + /* Processing error: drop the request */ xprt_free_bc_request(req); return 0; } + + /* Finally, send the reply synchronously */ + memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); + task = rpc_run_bc_task(req, &reply_ops); + if (IS_ERR(task)) { + error = PTR_ERR(task); + goto out; + } + + WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); + error = task->tk_status; + rpc_put_task(task); + +out: + dprintk("svc: %s(), error=%d\n", __func__, error); + return error; } EXPORT_SYMBOL_GPL(bc_svc_process); #endif /* CONFIG_SUNRPC_BACKCHANNEL */ From 39c3f1ba5ee3b937c43747f60dfa869d9776a4ac Mon Sep 17 00:00:00 2001 From: Diego Viola Date: Sun, 31 May 2015 14:35:51 -0300 Subject: [PATCH 106/839] scripts/kconfig/Makefile: Cosmetic fixes Signed-off-by: Diego Viola Signed-off-by: Michal Marek --- scripts/kconfig/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 3b48f7afb894..9e0be1cb0813 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -86,7 +86,7 @@ $(simple-targets): $(obj)/conf PHONY += oldnoconfig savedefconfig defconfig # oldnoconfig is an alias of olddefconfig, because people already are dependent -# on its behavior(sets new symbols to their default value but not 'n') with the +# on its behavior (sets new symbols to their default value but not 'n') with the # counter-intuitive name. oldnoconfig: olddefconfig @@ -125,7 +125,7 @@ help: @echo ' nconfig - Update current config utilising a ncurses menu based program' @echo ' menuconfig - Update current config utilising a menu based program' @echo ' xconfig - Update current config utilising a Qt based front-end' - @echo ' gconfig - Update current config utilising a GTK based front-end' + @echo ' gconfig - Update current config utilising a GTK+ based front-end' @echo ' oldconfig - Update current config utilising a provided .config as base' @echo ' localmodconfig - Update current config disabling modules not loaded' @echo ' localyesconfig - Update current config converting local mods to core' @@ -160,7 +160,7 @@ HOST_EXTRACFLAGS += $(shell $(CONFIG_SHELL) $(check-lxdialog) -ccflags) \ # qconf: Used for the xconfig target # Based on Qt which needs to be installed to compile it # gconf: Used for the gconfig target -# Based on GTK which needs to be installed to compile it +# Based on GTK+ which needs to be installed to compile it # object files used by all kconfig flavours lxdialog := lxdialog/checklist.o lxdialog/util.o lxdialog/inputbox.o @@ -273,7 +273,7 @@ $(obj)/gconf.o: $(obj)/.tmp_gtkcheck ifeq ($(MAKECMDGOALS),gconfig) -include $(obj)/.tmp_gtkcheck -# GTK needs some extra effort, too... +# GTK+ needs some extra effort, too... $(obj)/.tmp_gtkcheck: @if `pkg-config --exists gtk+-2.0 gmodule-2.0 libglade-2.0`; then \ if `pkg-config --atleast-version=2.0.0 gtk+-2.0`; then \ @@ -304,7 +304,7 @@ quiet_cmd_moc = MOC $@ $(obj)/%.moc: $(src)/%.h $(obj)/.tmp_qtcheck $(call cmd,moc) -# Extract gconf menu items for I18N support +# Extract gconf menu items for i18n support $(obj)/gconf.glade.h: $(obj)/gconf.glade $(Q)intltool-extract --type=gettext/glade --srcdir=$(srctree) \ $(obj)/gconf.glade From fa75a727c01963088299a7fa691e71ed90018a7d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 26 May 2015 13:39:54 +0200 Subject: [PATCH 107/839] kconfig: Wrap long "make help" text lines Some "make help" text lines extend beyond 80 characters. Wrap them at 79 characters. Signed-off-by: Geert Uytterhoeven Signed-off-by: Michal Marek --- scripts/kconfig/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 9e0be1cb0813..930de225d52e 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -122,7 +122,8 @@ tinyconfig: # Help text used by make help help: @echo ' config - Update current config utilising a line-oriented program' - @echo ' nconfig - Update current config utilising a ncurses menu based program' + @echo ' nconfig - Update current config utilising a ncurses menu based' + @echo ' program' @echo ' menuconfig - Update current config utilising a menu based program' @echo ' xconfig - Update current config utilising a Qt based front-end' @echo ' gconfig - Update current config utilising a GTK+ based front-end' @@ -138,7 +139,8 @@ help: @echo ' alldefconfig - New config with all symbols set to default' @echo ' randconfig - New config with random answer to all options' @echo ' listnewconfig - List new options' - @echo ' olddefconfig - Same as silentoldconfig but sets new symbols to their default value' + @echo ' olddefconfig - Same as silentoldconfig but sets new symbols to their' + @echo ' default value' @echo ' kvmconfig - Enable additional options for guest kernel support' @echo ' tinyconfig - Configure the tiniest possible kernel' From 8c6cf9cc829fcd0b179b59f7fe288941d0e31108 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 23:49:47 -0500 Subject: [PATCH 108/839] mnt: Modify fs_fully_visible to deal with locked ro nodev and atime Ignore an existing mount if the locked readonly, nodev or atime attributes are less permissive than the desired attributes of the new mount. On success ensure the new mount locks all of the same readonly, nodev and atime attributes as the old mount. The nosuid and noexec attributes are not checked here as this change is destined for stable and enforcing those attributes causes a regression in lxc and libvirt-lxc where those applications will not start and there are no known executables on sysfs or proc and no known way to create exectuables without code modifications Cc: stable@vger.kernel.org Fixes: e51db73532955 ("userns: Better restrictions on when proc and sysfs can be mounted") Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 8e7edaf60fe1..63b9806235e6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2332,7 +2332,7 @@ unlock: return err; } -static bool fs_fully_visible(struct file_system_type *fs_type); +static bool fs_fully_visible(struct file_system_type *fs_type, int *new_mnt_flags); /* * create a new mount for userspace and request it to be added into the @@ -2366,7 +2366,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } if (type->fs_flags & FS_USERNS_VISIBLE) { - if (!fs_fully_visible(type)) + if (!fs_fully_visible(type, &mnt_flags)) return -EPERM; } } @@ -3170,9 +3170,10 @@ bool current_chrooted(void) return chrooted; } -static bool fs_fully_visible(struct file_system_type *type) +static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) { struct mnt_namespace *ns = current->nsproxy->mnt_ns; + int new_flags = *new_mnt_flags; struct mount *mnt; bool visible = false; @@ -3191,6 +3192,19 @@ static bool fs_fully_visible(struct file_system_type *type) if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root) continue; + /* Verify the mount flags are equal to or more permissive + * than the proposed new mount. + */ + if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) && + !(new_flags & MNT_READONLY)) + continue; + if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && + !(new_flags & MNT_NODEV)) + continue; + if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) && + ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK))) + continue; + /* This mount is not fully visible if there are any child mounts * that cover anything except for empty directories. */ @@ -3201,6 +3215,10 @@ static bool fs_fully_visible(struct file_system_type *type) if (inode->i_nlink > 2) goto next; } + /* Preserve the locked attributes */ + *new_mnt_flags |= mnt->mnt.mnt_flags & (MNT_LOCK_READONLY | \ + MNT_LOCK_NODEV | \ + MNT_LOCK_ATIME); visible = true; goto found; next: ; From 8eee52af27b96047894bd18649102a0d2de3c3bb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Jun 2015 13:51:13 -0400 Subject: [PATCH 109/839] NFSv4: nfs4_handle_delegation_recall_error should ignore EAGAIN EAGAIN is a valid return code from nfs4_open_recover(), and should be handled by nfs4_handle_delegation_recall_error by simply passing it through. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 55e1e3af23a3..d25de2ab12fb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1684,6 +1684,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct "%d.\n", __func__, err); case 0: case -ENOENT: + case -EAGAIN: case -ESTALE: break; case -NFS4ERR_BADSESSION: From f2af7d25b4aa5b01203cc76e7530ea7fd18864a0 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 4 Jun 2015 14:18:49 +0800 Subject: [PATCH 110/839] x86/boot/setup: Clean up the e820_reserve_setup_data() code Deobfuscate the 'found' logic, it can be replaced with a simple: if (!pa_data) return; and 'found' can be eliminated. Signed-off-by: Wei Yang Cc: Andrew Morton Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1433398729-8314-1-git-send-email-weiyang@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d74ac33290ae..c3992c3a4378 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -461,19 +461,18 @@ static void __init e820_reserve_setup_data(void) { struct setup_data *data; u64 pa_data; - int found = 0; pa_data = boot_params.hdr.setup_data; + if (!pa_data) + return; + while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); e820_update_range(pa_data, sizeof(*data)+data->len, E820_RAM, E820_RESERVED_KERN); - found = 1; pa_data = data->next; early_memunmap(data, sizeof(*data)); } - if (!found) - return; sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); memcpy(&e820_saved, &e820, sizeof(struct e820map)); From 0f41979164a52a1ca0f0a601f90000fc18e3a396 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Jun 2015 22:59:08 -0400 Subject: [PATCH 111/839] SUNRPC: Remove unused argument 'tk_ops' in rpc_run_bc_task Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 +-- net/sunrpc/clnt.c | 6 ++---- net/sunrpc/svc.c | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 5f1e6bd4c316..2aa68976a482 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -205,8 +205,7 @@ struct rpc_wait_queue { */ struct rpc_task *rpc_new_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_task(const struct rpc_task_setup *); -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, - const struct rpc_call_ops *ops); +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); void rpc_exit_task(struct rpc_task *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index fa70f663eb92..f6717170480e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1031,15 +1031,13 @@ EXPORT_SYMBOL_GPL(rpc_call_async); * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run * rpc_execute against it * @req: RPC request - * @tk_ops: RPC call ops */ -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, - const struct rpc_call_ops *tk_ops) +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) { struct rpc_task *task; struct xdr_buf *xbufp = &req->rq_snd_buf; struct rpc_task_setup task_setup_data = { - .callback_ops = tk_ops, + .callback_ops = &rpc_default_ops, }; dprintk("RPC: rpc_run_bc_task req= %p\n", req); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e144902d382e..51c8cad5e765 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1350,7 +1350,6 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; - static const struct rpc_call_ops reply_ops = { }; struct rpc_task *task; int error; @@ -1391,7 +1390,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, /* Finally, send the reply synchronously */ memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); - task = rpc_run_bc_task(req, &reply_ops); + task = rpc_run_bc_task(req); if (IS_ERR(task)) { error = PTR_ERR(task); goto out; From 1dddda86c056ab4cf49e4206824a9de3e0c4159a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Jun 2015 15:05:38 -0400 Subject: [PATCH 112/839] SUNRPC: Clean up allocation and freeing of back channel requests Signed-off-by: Trond Myklebust --- net/sunrpc/backchannel_rqst.c | 95 ++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 40 deletions(-) diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 28504dfd3dad..1baa41099469 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -67,6 +67,55 @@ static void xprt_free_allocation(struct rpc_rqst *req) kfree(req); } +static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags) +{ + struct page *page; + /* Preallocate one XDR receive buffer */ + page = alloc_page(gfp_flags); + if (page == NULL) + return -ENOMEM; + buf->head[0].iov_base = page_address(page); + buf->head[0].iov_len = PAGE_SIZE; + buf->tail[0].iov_base = NULL; + buf->tail[0].iov_len = 0; + buf->page_len = 0; + buf->len = 0; + buf->buflen = PAGE_SIZE; + return 0; +} + +static +struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags) +{ + struct rpc_rqst *req; + + /* Pre-allocate one backchannel rpc_rqst */ + req = kzalloc(sizeof(*req), gfp_flags); + if (req == NULL) + return NULL; + + req->rq_xprt = xprt; + INIT_LIST_HEAD(&req->rq_list); + INIT_LIST_HEAD(&req->rq_bc_list); + + /* Preallocate one XDR receive buffer */ + if (xprt_alloc_xdr_buf(&req->rq_rcv_buf, gfp_flags) < 0) { + printk(KERN_ERR "Failed to create bc receive xbuf\n"); + goto out_free; + } + req->rq_rcv_buf.len = PAGE_SIZE; + + /* Preallocate one XDR send buffer */ + if (xprt_alloc_xdr_buf(&req->rq_snd_buf, gfp_flags) < 0) { + printk(KERN_ERR "Failed to create bc snd xbuf\n"); + goto out_free; + } + return req; +out_free: + xprt_free_allocation(req); + return NULL; +} + /* * Preallocate up to min_reqs structures and related buffers for use * by the backchannel. This function can be called multiple times @@ -87,9 +136,7 @@ static void xprt_free_allocation(struct rpc_rqst *req) */ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) { - struct page *page_rcv = NULL, *page_snd = NULL; - struct xdr_buf *xbufp = NULL; - struct rpc_rqst *req, *tmp; + struct rpc_rqst *req; struct list_head tmp_list; int i; @@ -106,7 +153,7 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) INIT_LIST_HEAD(&tmp_list); for (i = 0; i < min_reqs; i++) { /* Pre-allocate one backchannel rpc_rqst */ - req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); + req = xprt_alloc_bc_req(xprt, GFP_KERNEL); if (req == NULL) { printk(KERN_ERR "Failed to create bc rpc_rqst\n"); goto out_free; @@ -115,41 +162,6 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) /* Add the allocated buffer to the tmp list */ dprintk("RPC: adding req= %p\n", req); list_add(&req->rq_bc_pa_list, &tmp_list); - - req->rq_xprt = xprt; - INIT_LIST_HEAD(&req->rq_list); - INIT_LIST_HEAD(&req->rq_bc_list); - - /* Preallocate one XDR receive buffer */ - page_rcv = alloc_page(GFP_KERNEL); - if (page_rcv == NULL) { - printk(KERN_ERR "Failed to create bc receive xbuf\n"); - goto out_free; - } - xbufp = &req->rq_rcv_buf; - xbufp->head[0].iov_base = page_address(page_rcv); - xbufp->head[0].iov_len = PAGE_SIZE; - xbufp->tail[0].iov_base = NULL; - xbufp->tail[0].iov_len = 0; - xbufp->page_len = 0; - xbufp->len = PAGE_SIZE; - xbufp->buflen = PAGE_SIZE; - - /* Preallocate one XDR send buffer */ - page_snd = alloc_page(GFP_KERNEL); - if (page_snd == NULL) { - printk(KERN_ERR "Failed to create bc snd xbuf\n"); - goto out_free; - } - - xbufp = &req->rq_snd_buf; - xbufp->head[0].iov_base = page_address(page_snd); - xbufp->head[0].iov_len = 0; - xbufp->tail[0].iov_base = NULL; - xbufp->tail[0].iov_len = 0; - xbufp->page_len = 0; - xbufp->len = 0; - xbufp->buflen = PAGE_SIZE; } /* @@ -167,7 +179,10 @@ out_free: /* * Memory allocation failed, free the temporary list */ - list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) { + while (!list_empty(&tmp_list)) { + req = list_first_entry(&tmp_list, + struct rpc_rqst, + rq_bc_pa_list); list_del(&req->rq_bc_pa_list); xprt_free_allocation(req); } From 0d2a970d0ae55086520e1e58e572a7acd519429c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Jun 2015 15:37:10 -0400 Subject: [PATCH 113/839] SUNRPC: Fix a backchannel race We need to allow the server to send a new request immediately after we've replied to the previous one. Right now, there is a window between the send and the release of the old request in rpc_put_task(), where the server could send us a new backchannel RPC call, and we have no request to service it. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 ++- net/sunrpc/backchannel_rqst.c | 37 +++++++++++++++++++++++------------ net/sunrpc/svc.c | 6 +++++- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8b93ef53df3c..bc9c39d6d30d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -212,7 +212,8 @@ struct rpc_xprt { #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ /* process the callback */ - unsigned int bc_alloc_count; /* Total number of preallocs */ + int bc_alloc_count; /* Total number of preallocs */ + atomic_t bc_free_slots; spinlock_t bc_pa_lock; /* Protects the preallocated * items */ struct list_head bc_pa_list; /* List of preallocated diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 1baa41099469..9825ff0f91d6 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -37,16 +37,18 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ static inline int xprt_need_to_requeue(struct rpc_xprt *xprt) { - return xprt->bc_alloc_count > 0; + return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots); } static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n) { + atomic_add(n, &xprt->bc_free_slots); xprt->bc_alloc_count += n; } static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n) { + atomic_sub(n, &xprt->bc_free_slots); return xprt->bc_alloc_count -= n; } @@ -232,9 +234,15 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) struct rpc_rqst *req = NULL; dprintk("RPC: allocate a backchannel request\n"); - if (list_empty(&xprt->bc_pa_list)) + if (atomic_read(&xprt->bc_free_slots) <= 0) goto not_found; - + if (list_empty(&xprt->bc_pa_list)) { + req = xprt_alloc_bc_req(xprt, GFP_ATOMIC); + if (!req) + goto not_found; + /* Note: this 'free' request adds it to xprt->bc_pa_list */ + xprt_free_bc_request(req); + } req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, rq_bc_pa_list); req->rq_reply_bytes_recvd = 0; @@ -260,11 +268,21 @@ void xprt_free_bc_request(struct rpc_rqst *req) req->rq_connect_cookie = xprt->connect_cookie - 1; smp_mb__before_atomic(); - WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); smp_mb__after_atomic(); - if (!xprt_need_to_requeue(xprt)) { + /* + * Return it to the list of preallocations so that it + * may be reused by a new callback request. + */ + spin_lock_bh(&xprt->bc_pa_lock); + if (xprt_need_to_requeue(xprt)) { + list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); + xprt->bc_alloc_count++; + req = NULL; + } + spin_unlock_bh(&xprt->bc_pa_lock); + if (req != NULL) { /* * The last remaining session was destroyed while this * entry was in use. Free the entry and don't attempt @@ -275,14 +293,6 @@ void xprt_free_bc_request(struct rpc_rqst *req) xprt_free_allocation(req); return; } - - /* - * Return it to the list of preallocations so that it - * may be reused by a new callback request. - */ - spin_lock_bh(&xprt->bc_pa_lock); - list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); - spin_unlock_bh(&xprt->bc_pa_lock); } /* @@ -326,6 +336,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) spin_lock(&xprt->bc_pa_lock); list_del(&req->rq_bc_pa_list); + xprt->bc_alloc_count--; spin_unlock(&xprt->bc_pa_lock); req->rq_private_buf.len = copied; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 51c8cad5e765..b47f02f60b9c 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1351,6 +1351,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; struct rpc_task *task; + int proc_error; int error; dprintk("svc: %s(%p)\n", __func__, req); @@ -1382,7 +1383,10 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, svc_getnl(argv); /* CALLDIR */ /* Parse and execute the bc call */ - if (!svc_process_common(rqstp, argv, resv)) { + proc_error = svc_process_common(rqstp, argv, resv); + + atomic_inc(&req->rq_xprt->bc_free_slots); + if (!proc_error) { /* Processing error: drop the request */ xprt_free_bc_request(req); return 0; From ee83bd3b6483d59b0d4b509e46dae688d8e5759d Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 9 Jun 2015 23:35:00 -0700 Subject: [PATCH 114/839] serial: omap: Switch wake-up interrupt to generic wakeirq We can now use generic wakeirq handling and remove the custom handling for the wake-up interrupts. Acked-by: Greg Kroah-Hartman Signed-off-by: Tony Lindgren --- drivers/tty/serial/omap-serial.c | 35 ++++---------------------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 7f49172ccd86..7a2172b5e93c 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -160,7 +161,6 @@ struct uart_omap_port { unsigned long port_activity; int context_loss_cnt; u32 errata; - u8 wakeups_enabled; u32 features; int rts_gpio; @@ -209,28 +209,11 @@ static int serial_omap_get_context_loss_count(struct uart_omap_port *up) return pdata->get_context_loss_count(up->dev); } -static inline void serial_omap_enable_wakeirq(struct uart_omap_port *up, - bool enable) -{ - if (!up->wakeirq) - return; - - if (enable) - enable_irq(up->wakeirq); - else - disable_irq_nosync(up->wakeirq); -} - +/* REVISIT: Remove this when omap3 boots in device tree only mode */ static void serial_omap_enable_wakeup(struct uart_omap_port *up, bool enable) { struct omap_uart_port_info *pdata = dev_get_platdata(up->dev); - if (enable == up->wakeups_enabled) - return; - - serial_omap_enable_wakeirq(up, enable); - up->wakeups_enabled = enable; - if (!pdata || !pdata->enable_wakeup) return; @@ -750,13 +733,11 @@ static int serial_omap_startup(struct uart_port *port) /* Optional wake-up IRQ */ if (up->wakeirq) { - retval = request_irq(up->wakeirq, serial_omap_irq, - up->port.irqflags, up->name, up); + retval = dev_pm_set_dedicated_wake_irq(up->dev, up->wakeirq); if (retval) { free_irq(up->port.irq, up); return retval; } - disable_irq(up->wakeirq); } dev_dbg(up->port.dev, "serial_omap_startup+%d\n", up->port.line); @@ -845,8 +826,7 @@ static void serial_omap_shutdown(struct uart_port *port) pm_runtime_mark_last_busy(up->dev); pm_runtime_put_autosuspend(up->dev); free_irq(up->port.irq, up); - if (up->wakeirq) - free_irq(up->wakeirq, up); + dev_pm_clear_wake_irq(up->dev); } static void serial_omap_uart_qos_work(struct work_struct *work) @@ -1139,13 +1119,6 @@ serial_omap_pm(struct uart_port *port, unsigned int state, serial_out(up, UART_EFR, efr); serial_out(up, UART_LCR, 0); - if (!device_may_wakeup(up->dev)) { - if (!state) - pm_runtime_forbid(up->dev); - else - pm_runtime_allow(up->dev); - } - pm_runtime_mark_last_busy(up->dev); pm_runtime_put_autosuspend(up->dev); } From a3e362f18c01f28abcb6d8e5d00be8b5d97ea09d Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 9 Jun 2015 23:35:00 -0700 Subject: [PATCH 115/839] serial: 8250_omap: Move wake-up interrupt to generic wakeirq We can now use generic wakeirq handling and remove the custom handling for the wake-up interrupts. Acked-by: Greg Kroah-Hartman Signed-off-by: Tony Lindgren --- drivers/tty/serial/8250/8250_omap.c | 58 ++--------------------------- 1 file changed, 4 insertions(+), 54 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 978204333c94..d75a66c72750 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "8250.h" @@ -552,17 +553,6 @@ static void omap8250_uart_qos_work(struct work_struct *work) pm_qos_update_request(&priv->pm_qos_request, priv->latency); } -static irqreturn_t omap_wake_irq(int irq, void *dev_id) -{ - struct uart_port *port = dev_id; - int ret; - - ret = port->handle_irq(port); - if (ret) - return IRQ_HANDLED; - return IRQ_NONE; -} - #ifdef CONFIG_SERIAL_8250_DMA static int omap_8250_dma_handle_irq(struct uart_port *port); #endif @@ -596,11 +586,9 @@ static int omap_8250_startup(struct uart_port *port) int ret; if (priv->wakeirq) { - ret = request_irq(priv->wakeirq, omap_wake_irq, - port->irqflags, "uart wakeup irq", port); + ret = dev_pm_set_dedicated_wake_irq(port->dev, priv->wakeirq); if (ret) return ret; - disable_irq(priv->wakeirq); } pm_runtime_get_sync(port->dev); @@ -649,8 +637,7 @@ static int omap_8250_startup(struct uart_port *port) err: pm_runtime_mark_last_busy(port->dev); pm_runtime_put_autosuspend(port->dev); - if (priv->wakeirq) - free_irq(priv->wakeirq, port); + dev_pm_clear_wake_irq(port->dev); return ret; } @@ -682,10 +669,8 @@ static void omap_8250_shutdown(struct uart_port *port) pm_runtime_mark_last_busy(port->dev); pm_runtime_put_autosuspend(port->dev); - free_irq(port->irq, port); - if (priv->wakeirq) - free_irq(priv->wakeirq, port); + dev_pm_clear_wake_irq(port->dev); } static void omap_8250_throttle(struct uart_port *port) @@ -1226,31 +1211,6 @@ static int omap8250_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM - -static inline void omap8250_enable_wakeirq(struct omap8250_priv *priv, - bool enable) -{ - if (!priv->wakeirq) - return; - - if (enable) - enable_irq(priv->wakeirq); - else - disable_irq_nosync(priv->wakeirq); -} - -static void omap8250_enable_wakeup(struct omap8250_priv *priv, - bool enable) -{ - if (enable == priv->wakeups_enabled) - return; - - omap8250_enable_wakeirq(priv, enable); - priv->wakeups_enabled = enable; -} -#endif - #ifdef CONFIG_PM_SLEEP static int omap8250_prepare(struct device *dev) { @@ -1277,11 +1237,6 @@ static int omap8250_suspend(struct device *dev) serial8250_suspend_port(priv->line); flush_work(&priv->qos_work); - - if (device_may_wakeup(dev)) - omap8250_enable_wakeup(priv, true); - else - omap8250_enable_wakeup(priv, false); return 0; } @@ -1289,9 +1244,6 @@ static int omap8250_resume(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); - if (device_may_wakeup(dev)) - omap8250_enable_wakeup(priv, false); - serial8250_resume_port(priv->line); return 0; } @@ -1333,7 +1285,6 @@ static int omap8250_runtime_suspend(struct device *dev) return -EBUSY; } - omap8250_enable_wakeup(priv, true); if (up->dma) omap_8250_rx_dma(up, UART_IIR_RX_TIMEOUT); @@ -1354,7 +1305,6 @@ static int omap8250_runtime_resume(struct device *dev) return 0; up = serial8250_get_port(priv->line); - omap8250_enable_wakeup(priv, false); loss_cntx = omap8250_lost_context(up); if (loss_cntx) From 27b0d37e4209f3fc40c65e2b7936ee4154f4070d Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Wed, 20 May 2015 08:08:27 +0000 Subject: [PATCH 116/839] ARM: dts: atlas7: add pinctrl and gpio descriptions This patch adds pinctrl and gpio stuff according to the atlas7 pinctrl driver. Signed-off-by: Wei Chen Signed-off-by: Barry Song Acked-by: Linus Walleij --- arch/arm/boot/dts/atlas7.dtsi | 1042 ++++++++++++++++++++++++++++++++- 1 file changed, 1041 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/atlas7.dtsi b/arch/arm/boot/dts/atlas7.dtsi index a753178abc85..1928f78f5254 100644 --- a/arch/arm/boot/dts/atlas7.dtsi +++ b/arch/arm/boot/dts/atlas7.dtsi @@ -120,6 +120,1025 @@ compatible = "sirf,atlas7-ioc"; reg = <0x18880000 0x1000>, <0x10E40000 0x1000>; + + audio_ac97_pmx: audio_ac97@0 { + audio_ac97 { + groups = "audio_ac97_grp"; + function = "audio_ac97"; + }; + }; + + audio_func_dbg_pmx: audio_func_dbg@0 { + audio_func_dbg { + groups = "audio_func_dbg_grp"; + function = "audio_func_dbg"; + }; + }; + + audio_i2s_pmx: audio_i2s@0 { + audio_i2s { + groups = "audio_i2s_grp"; + function = "audio_i2s"; + }; + }; + + audio_i2s_2ch_pmx: audio_i2s_2ch@0 { + audio_i2s_2ch { + groups = "audio_i2s_2ch_grp"; + function = "audio_i2s_2ch"; + }; + }; + + audio_i2s_extclk_pmx: audio_i2s_extclk@0 { + audio_i2s_extclk { + groups = "audio_i2s_extclk_grp"; + function = "audio_i2s_extclk"; + }; + }; + + audio_uart0_pmx: audio_uart0@0 { + audio_uart0 { + groups = "audio_uart0_grp"; + function = "audio_uart0"; + }; + }; + + audio_uart1_pmx: audio_uart1@0 { + audio_uart1 { + groups = "audio_uart1_grp"; + function = "audio_uart1"; + }; + }; + + audio_uart2_pmx0: audio_uart2@0 { + audio_uart2_0 { + groups = "audio_uart2_grp0"; + function = "audio_uart2_m0"; + }; + }; + + audio_uart2_pmx1: audio_uart2@1 { + audio_uart2_1 { + groups = "audio_uart2_grp1"; + function = "audio_uart2_m1"; + }; + }; + + c_can_trnsvr_pmx: c_can_trnsvr@0 { + c_can_trnsvr { + groups = "c_can_trnsvr_grp"; + function = "c_can_trnsvr"; + }; + }; + + c0_can_pmx0: c0_can@0 { + c0_can_0 { + groups = "c0_can_grp0"; + function = "c0_can_m0"; + }; + }; + + c0_can_pmx1: c0_can@1 { + c0_can_1 { + groups = "c0_can_grp1"; + function = "c0_can_m1"; + }; + }; + + c1_can_pmx0: c1_can@0 { + c1_can_0 { + groups = "c1_can_grp0"; + function = "c1_can_m0"; + }; + }; + + c1_can_pmx1: c1_can@1 { + c1_can_1 { + groups = "c1_can_grp1"; + function = "c1_can_m1"; + }; + }; + + c1_can_pmx2: c1_can@2 { + c1_can_2 { + groups = "c1_can_grp2"; + function = "c1_can_m2"; + }; + }; + + ca_audio_lpc_pmx: ca_audio_lpc@0 { + ca_audio_lpc { + groups = "ca_audio_lpc_grp"; + function = "ca_audio_lpc"; + }; + }; + + ca_bt_lpc_pmx: ca_bt_lpc@0 { + ca_bt_lpc { + groups = "ca_bt_lpc_grp"; + function = "ca_bt_lpc"; + }; + }; + + ca_coex_pmx: ca_coex@0 { + ca_coex { + groups = "ca_coex_grp"; + function = "ca_coex"; + }; + }; + + ca_curator_lpc_pmx: ca_curator_lpc@0 { + ca_curator_lpc { + groups = "ca_curator_lpc_grp"; + function = "ca_curator_lpc"; + }; + }; + + ca_pcm_debug_pmx: ca_pcm_debug@0 { + ca_pcm_debug { + groups = "ca_pcm_debug_grp"; + function = "ca_pcm_debug"; + }; + }; + + ca_pio_pmx: ca_pio@0 { + ca_pio { + groups = "ca_pio_grp"; + function = "ca_pio"; + }; + }; + + ca_sdio_debug_pmx: ca_sdio_debug@0 { + ca_sdio_debug { + groups = "ca_sdio_debug_grp"; + function = "ca_sdio_debug"; + }; + }; + + ca_spi_pmx: ca_spi@0 { + ca_spi { + groups = "ca_spi_grp"; + function = "ca_spi"; + }; + }; + + ca_trb_pmx: ca_trb@0 { + ca_trb { + groups = "ca_trb_grp"; + function = "ca_trb"; + }; + }; + + ca_uart_debug_pmx: ca_uart_debug@0 { + ca_uart_debug { + groups = "ca_uart_debug_grp"; + function = "ca_uart_debug"; + }; + }; + + clkc_pmx0: clkc@0 { + clkc_0 { + groups = "clkc_grp0"; + function = "clkc_m0"; + }; + }; + + clkc_pmx1: clkc@1 { + clkc_1 { + groups = "clkc_grp1"; + function = "clkc_m1"; + }; + }; + + gn_gnss_i2c_pmx: gn_gnss_i2c@0 { + gn_gnss_i2c { + groups = "gn_gnss_i2c_grp"; + function = "gn_gnss_i2c"; + }; + }; + + gn_gnss_uart_nopause_pmx: gn_gnss_uart_nopause@0 { + gn_gnss_uart_nopause { + groups = "gn_gnss_uart_nopause_grp"; + function = "gn_gnss_uart_nopause"; + }; + }; + + gn_gnss_uart_pmx: gn_gnss_uart@0 { + gn_gnss_uart { + groups = "gn_gnss_uart_grp"; + function = "gn_gnss_uart"; + }; + }; + + gn_trg_spi_pmx0: gn_trg_spi@0 { + gn_trg_spi_0 { + groups = "gn_trg_spi_grp0"; + function = "gn_trg_spi_m0"; + }; + }; + + gn_trg_spi_pmx1: gn_trg_spi@1 { + gn_trg_spi_1 { + groups = "gn_trg_spi_grp1"; + function = "gn_trg_spi_m1"; + }; + }; + + cvbs_dbg_pmx: cvbs_dbg@0 { + cvbs_dbg { + groups = "cvbs_dbg_grp"; + function = "cvbs_dbg"; + }; + }; + + cvbs_dbg_test_pmx0: cvbs_dbg_test@0 { + cvbs_dbg_test_0 { + groups = "cvbs_dbg_test_grp0"; + function = "cvbs_dbg_test_m0"; + }; + }; + + cvbs_dbg_test_pmx1: cvbs_dbg_test@1 { + cvbs_dbg_test_1 { + groups = "cvbs_dbg_test_grp1"; + function = "cvbs_dbg_test_m1"; + }; + }; + + cvbs_dbg_test_pmx2: cvbs_dbg_test@2 { + cvbs_dbg_test_2 { + groups = "cvbs_dbg_test_grp2"; + function = "cvbs_dbg_test_m2"; + }; + }; + + cvbs_dbg_test_pmx3: cvbs_dbg_test@3 { + cvbs_dbg_test_3 { + groups = "cvbs_dbg_test_grp3"; + function = "cvbs_dbg_test_m3"; + }; + }; + + cvbs_dbg_test_pmx4: cvbs_dbg_test@4 { + cvbs_dbg_test_4 { + groups = "cvbs_dbg_test_grp4"; + function = "cvbs_dbg_test_m4"; + }; + }; + + cvbs_dbg_test_pmx5: cvbs_dbg_test@5 { + cvbs_dbg_test_5 { + groups = "cvbs_dbg_test_grp5"; + function = "cvbs_dbg_test_m5"; + }; + }; + + cvbs_dbg_test_pmx6: cvbs_dbg_test@6 { + cvbs_dbg_test_6 { + groups = "cvbs_dbg_test_grp6"; + function = "cvbs_dbg_test_m6"; + }; + }; + + cvbs_dbg_test_pmx7: cvbs_dbg_test@7 { + cvbs_dbg_test_7 { + groups = "cvbs_dbg_test_grp7"; + function = "cvbs_dbg_test_m7"; + }; + }; + + cvbs_dbg_test_pmx8: cvbs_dbg_test@8 { + cvbs_dbg_test_8 { + groups = "cvbs_dbg_test_grp8"; + function = "cvbs_dbg_test_m8"; + }; + }; + + cvbs_dbg_test_pmx9: cvbs_dbg_test@9 { + cvbs_dbg_test_9 { + groups = "cvbs_dbg_test_grp9"; + function = "cvbs_dbg_test_m9"; + }; + }; + + cvbs_dbg_test_pmx10: cvbs_dbg_test@10 { + cvbs_dbg_test_10 { + groups = "cvbs_dbg_test_grp10"; + function = "cvbs_dbg_test_m10"; + }; + }; + + cvbs_dbg_test_pmx11: cvbs_dbg_test@11 { + cvbs_dbg_test_11 { + groups = "cvbs_dbg_test_grp11"; + function = "cvbs_dbg_test_m11"; + }; + }; + + cvbs_dbg_test_pmx12: cvbs_dbg_test@12 { + cvbs_dbg_test_12 { + groups = "cvbs_dbg_test_grp12"; + function = "cvbs_dbg_test_m12"; + }; + }; + + cvbs_dbg_test_pmx13: cvbs_dbg_test@13 { + cvbs_dbg_test_13 { + groups = "cvbs_dbg_test_grp13"; + function = "cvbs_dbg_test_m13"; + }; + }; + + cvbs_dbg_test_pmx14: cvbs_dbg_test@14 { + cvbs_dbg_test_14 { + groups = "cvbs_dbg_test_grp14"; + function = "cvbs_dbg_test_m14"; + }; + }; + + cvbs_dbg_test_pmx15: cvbs_dbg_test@15 { + cvbs_dbg_test_15 { + groups = "cvbs_dbg_test_grp15"; + function = "cvbs_dbg_test_m15"; + }; + }; + + gn_gnss_power_pmx: gn_gnss_power@0 { + gn_gnss_power { + groups = "gn_gnss_power_grp"; + function = "gn_gnss_power"; + }; + }; + + gn_gnss_sw_status_pmx: gn_gnss_sw_status@0 { + gn_gnss_sw_status { + groups = "gn_gnss_sw_status_grp"; + function = "gn_gnss_sw_status"; + }; + }; + + gn_gnss_eclk_pmx: gn_gnss_eclk@0 { + gn_gnss_eclk { + groups = "gn_gnss_eclk_grp"; + function = "gn_gnss_eclk"; + }; + }; + + gn_gnss_irq1_pmx0: gn_gnss_irq1@0 { + gn_gnss_irq1_0 { + groups = "gn_gnss_irq1_grp0"; + function = "gn_gnss_irq1_m0"; + }; + }; + + gn_gnss_irq2_pmx0: gn_gnss_irq2@0 { + gn_gnss_irq2_0 { + groups = "gn_gnss_irq2_grp0"; + function = "gn_gnss_irq2_m0"; + }; + }; + + gn_gnss_tm_pmx: gn_gnss_tm@0 { + gn_gnss_tm { + groups = "gn_gnss_tm_grp"; + function = "gn_gnss_tm"; + }; + }; + + gn_gnss_tsync_pmx: gn_gnss_tsync@0 { + gn_gnss_tsync { + groups = "gn_gnss_tsync_grp"; + function = "gn_gnss_tsync"; + }; + }; + + gn_io_gnsssys_sw_cfg_pmx: gn_io_gnsssys_sw_cfg@0 { + gn_io_gnsssys_sw_cfg { + groups = "gn_io_gnsssys_sw_cfg_grp"; + function = "gn_io_gnsssys_sw_cfg"; + }; + }; + + gn_trg_pmx0: gn_trg@0 { + gn_trg_0 { + groups = "gn_trg_grp0"; + function = "gn_trg_m0"; + }; + }; + + gn_trg_pmx1: gn_trg@1 { + gn_trg_1 { + groups = "gn_trg_grp1"; + function = "gn_trg_m1"; + }; + }; + + gn_trg_shutdown_pmx0: gn_trg_shutdown@0 { + gn_trg_shutdown_0 { + groups = "gn_trg_shutdown_grp0"; + function = "gn_trg_shutdown_m0"; + }; + }; + + gn_trg_shutdown_pmx1: gn_trg_shutdown@1 { + gn_trg_shutdown_1 { + groups = "gn_trg_shutdown_grp1"; + function = "gn_trg_shutdown_m1"; + }; + }; + + gn_trg_shutdown_pmx2: gn_trg_shutdown@2 { + gn_trg_shutdown_2 { + groups = "gn_trg_shutdown_grp2"; + function = "gn_trg_shutdown_m2"; + }; + }; + + gn_trg_shutdown_pmx3: gn_trg_shutdown@3 { + gn_trg_shutdown_3 { + groups = "gn_trg_shutdown_grp3"; + function = "gn_trg_shutdown_m3"; + }; + }; + + i2c0_pmx: i2c0@0 { + i2c0 { + groups = "i2c0_grp"; + function = "i2c0"; + }; + }; + + i2c1_pmx: i2c1@0 { + i2c1 { + groups = "i2c1_grp"; + function = "i2c1"; + }; + }; + + jtag_pmx0: jtag@0 { + jtag_0 { + groups = "jtag_grp0"; + function = "jtag_m0"; + }; + }; + + ks_kas_spi_pmx0: ks_kas_spi@0 { + ks_kas_spi_0 { + groups = "ks_kas_spi_grp0"; + function = "ks_kas_spi_m0"; + }; + }; + + ld_ldd_pmx: ld_ldd@0 { + ld_ldd { + groups = "ld_ldd_grp"; + function = "ld_ldd"; + }; + }; + + ld_ldd_16bit_pmx: ld_ldd_16bit@0 { + ld_ldd_16bit { + groups = "ld_ldd_16bit_grp"; + function = "ld_ldd_16bit"; + }; + }; + + ld_ldd_fck_pmx: ld_ldd_fck@0 { + ld_ldd_fck { + groups = "ld_ldd_fck_grp"; + function = "ld_ldd_fck"; + }; + }; + + ld_ldd_lck_pmx: ld_ldd_lck@0 { + ld_ldd_lck { + groups = "ld_ldd_lck_grp"; + function = "ld_ldd_lck"; + }; + }; + + lr_lcdrom_pmx: lr_lcdrom@0 { + lr_lcdrom { + groups = "lr_lcdrom_grp"; + function = "lr_lcdrom"; + }; + }; + + lvds_analog_pmx: lvds_analog@0 { + lvds_analog { + groups = "lvds_analog_grp"; + function = "lvds_analog"; + }; + }; + + nd_df_pmx: nd_df@0 { + nd_df { + groups = "nd_df_grp"; + function = "nd_df"; + }; + }; + + nd_df_nowp_pmx: nd_df_nowp@0 { + nd_df_nowp { + groups = "nd_df_nowp_grp"; + function = "nd_df_nowp"; + }; + }; + + ps_pmx: ps@0 { + ps { + groups = "ps_grp"; + function = "ps"; + }; + }; + + pwc_core_on_pmx: pwc_core_on@0 { + pwc_core_on { + groups = "pwc_core_on_grp"; + function = "pwc_core_on"; + }; + }; + + pwc_ext_on_pmx: pwc_ext_on@0 { + pwc_ext_on { + groups = "pwc_ext_on_grp"; + function = "pwc_ext_on"; + }; + }; + + pwc_gpio3_clk_pmx: pwc_gpio3_clk@0 { + pwc_gpio3_clk { + groups = "pwc_gpio3_clk_grp"; + function = "pwc_gpio3_clk"; + }; + }; + + pwc_io_on_pmx: pwc_io_on@0 { + pwc_io_on { + groups = "pwc_io_on_grp"; + function = "pwc_io_on"; + }; + }; + + pwc_lowbatt_b_pmx0: pwc_lowbatt_b@0 { + pwc_lowbatt_b_0 { + groups = "pwc_lowbatt_b_grp0"; + function = "pwc_lowbatt_b_m0"; + }; + }; + + pwc_mem_on_pmx: pwc_mem_on@0 { + pwc_mem_on { + groups = "pwc_mem_on_grp"; + function = "pwc_mem_on"; + }; + }; + + pwc_on_key_b_pmx0: pwc_on_key_b@0 { + pwc_on_key_b_0 { + groups = "pwc_on_key_b_grp0"; + function = "pwc_on_key_b_m0"; + }; + }; + + pwc_wakeup_src0_pmx: pwc_wakeup_src0@0 { + pwc_wakeup_src0 { + groups = "pwc_wakeup_src0_grp"; + function = "pwc_wakeup_src0"; + }; + }; + + pwc_wakeup_src1_pmx: pwc_wakeup_src1@0 { + pwc_wakeup_src1 { + groups = "pwc_wakeup_src1_grp"; + function = "pwc_wakeup_src1"; + }; + }; + + pwc_wakeup_src2_pmx: pwc_wakeup_src2@0 { + pwc_wakeup_src2 { + groups = "pwc_wakeup_src2_grp"; + function = "pwc_wakeup_src2"; + }; + }; + + pwc_wakeup_src3_pmx: pwc_wakeup_src3@0 { + pwc_wakeup_src3 { + groups = "pwc_wakeup_src3_grp"; + function = "pwc_wakeup_src3"; + }; + }; + + pw_cko0_pmx0: pw_cko0@0 { + pw_cko0_0 { + groups = "pw_cko0_grp0"; + function = "pw_cko0_m0"; + }; + }; + + pw_cko0_pmx1: pw_cko0@1 { + pw_cko0_1 { + groups = "pw_cko0_grp1"; + function = "pw_cko0_m1"; + }; + }; + + pw_cko0_pmx2: pw_cko0@2 { + pw_cko0_2 { + groups = "pw_cko0_grp2"; + function = "pw_cko0_m2"; + }; + }; + + pw_cko1_pmx0: pw_cko1@0 { + pw_cko1_0 { + groups = "pw_cko1_grp0"; + function = "pw_cko1_m0"; + }; + }; + + pw_cko1_pmx1: pw_cko1@1 { + pw_cko1_1 { + groups = "pw_cko1_grp1"; + function = "pw_cko1_m1"; + }; + }; + + pw_i2s01_clk_pmx0: pw_i2s01_clk@0 { + pw_i2s01_clk_0 { + groups = "pw_i2s01_clk_grp0"; + function = "pw_i2s01_clk_m0"; + }; + }; + + pw_i2s01_clk_pmx1: pw_i2s01_clk@1 { + pw_i2s01_clk_1 { + groups = "pw_i2s01_clk_grp1"; + function = "pw_i2s01_clk_m1"; + }; + }; + + pw_pwm0_pmx: pw_pwm0@0 { + pw_pwm0 { + groups = "pw_pwm0_grp"; + function = "pw_pwm0"; + }; + }; + + pw_pwm1_pmx: pw_pwm1@0 { + pw_pwm1 { + groups = "pw_pwm1_grp"; + function = "pw_pwm1"; + }; + }; + + pw_pwm2_pmx0: pw_pwm2@0 { + pw_pwm2_0 { + groups = "pw_pwm2_grp0"; + function = "pw_pwm2_m0"; + }; + }; + + pw_pwm2_pmx1: pw_pwm2@1 { + pw_pwm2_1 { + groups = "pw_pwm2_grp1"; + function = "pw_pwm2_m1"; + }; + }; + + pw_pwm3_pmx0: pw_pwm3@0 { + pw_pwm3_0 { + groups = "pw_pwm3_grp0"; + function = "pw_pwm3_m0"; + }; + }; + + pw_pwm3_pmx1: pw_pwm3@1 { + pw_pwm3_1 { + groups = "pw_pwm3_grp1"; + function = "pw_pwm3_m1"; + }; + }; + + pw_pwm_cpu_vol_pmx0: pw_pwm_cpu_vol@0 { + pw_pwm_cpu_vol_0 { + groups = "pw_pwm_cpu_vol_grp0"; + function = "pw_pwm_cpu_vol_m0"; + }; + }; + + pw_pwm_cpu_vol_pmx1: pw_pwm_cpu_vol@1 { + pw_pwm_cpu_vol_1 { + groups = "pw_pwm_cpu_vol_grp1"; + function = "pw_pwm_cpu_vol_m1"; + }; + }; + + pw_backlight_pmx0: pw_backlight@0 { + pw_backlight_0 { + groups = "pw_backlight_grp0"; + function = "pw_backlight_m0"; + }; + }; + + pw_backlight_pmx1: pw_backlight@1 { + pw_backlight_1 { + groups = "pw_backlight_grp1"; + function = "pw_backlight_m1"; + }; + }; + + rg_eth_mac_pmx: rg_eth_mac@0 { + rg_eth_mac { + groups = "rg_eth_mac_grp"; + function = "rg_eth_mac"; + }; + }; + + rg_gmac_phy_intr_n_pmx: rg_gmac_phy_intr_n@0 { + rg_gmac_phy_intr_n { + groups = "rg_gmac_phy_intr_n_grp"; + function = "rg_gmac_phy_intr_n"; + }; + }; + + rg_rgmii_mac_pmx: rg_rgmii_mac@0 { + rg_rgmii_mac { + groups = "rg_rgmii_mac_grp"; + function = "rg_rgmii_mac"; + }; + }; + + rg_rgmii_phy_ref_clk_pmx0: rg_rgmii_phy_ref_clk@0 { + rg_rgmii_phy_ref_clk_0 { + groups = + "rg_rgmii_phy_ref_clk_grp0"; + function = + "rg_rgmii_phy_ref_clk_m0"; + }; + }; + + rg_rgmii_phy_ref_clk_pmx1: rg_rgmii_phy_ref_clk@1 { + rg_rgmii_phy_ref_clk_1 { + groups = + "rg_rgmii_phy_ref_clk_grp1"; + function = + "rg_rgmii_phy_ref_clk_m1"; + }; + }; + + sd0_pmx: sd0@0 { + sd0 { + groups = "sd0_grp"; + function = "sd0"; + }; + }; + + sd0_4bit_pmx: sd0_4bit@0 { + sd0_4bit { + groups = "sd0_4bit_grp"; + function = "sd0_4bit"; + }; + }; + + sd1_pmx: sd1@0 { + sd1 { + groups = "sd1_grp"; + function = "sd1"; + }; + }; + + sd1_4bit_pmx0: sd1_4bit@0 { + sd1_4bit_0 { + groups = "sd1_4bit_grp0"; + function = "sd1_4bit_m0"; + }; + }; + + sd1_4bit_pmx1: sd1_4bit@1 { + sd1_4bit_1 { + groups = "sd1_4bit_grp1"; + function = "sd1_4bit_m1"; + }; + }; + + sd2_pmx0: sd2@0 { + sd2_0 { + groups = "sd2_grp0"; + function = "sd2_m0"; + }; + }; + + sd2_no_cdb_pmx0: sd2_no_cdb@0 { + sd2_no_cdb_0 { + groups = "sd2_no_cdb_grp0"; + function = "sd2_no_cdb_m0"; + }; + }; + + sd3_pmx: sd3@0 { + sd3 { + groups = "sd3_grp"; + function = "sd3"; + }; + }; + + sd5_pmx: sd5@0 { + sd5 { + groups = "sd5_grp"; + function = "sd5"; + }; + }; + + sd6_pmx0: sd6@0 { + sd6_0 { + groups = "sd6_grp0"; + function = "sd6_m0"; + }; + }; + + sd6_pmx1: sd6@1 { + sd6_1 { + groups = "sd6_grp1"; + function = "sd6_m1"; + }; + }; + + sp0_ext_ldo_on_pmx: sp0_ext_ldo_on@0 { + sp0_ext_ldo_on { + groups = "sp0_ext_ldo_on_grp"; + function = "sp0_ext_ldo_on"; + }; + }; + + sp0_qspi_pmx: sp0_qspi@0 { + sp0_qspi { + groups = "sp0_qspi_grp"; + function = "sp0_qspi"; + }; + }; + + sp1_spi_pmx: sp1_spi@0 { + sp1_spi { + groups = "sp1_spi_grp"; + function = "sp1_spi"; + }; + }; + + tpiu_trace_pmx: tpiu_trace@0 { + tpiu_trace { + groups = "tpiu_trace_grp"; + function = "tpiu_trace"; + }; + }; + + uart0_pmx: uart0@0 { + uart0 { + groups = "uart0_grp"; + function = "uart0"; + }; + }; + + uart0_nopause_pmx: uart0_nopause@0 { + uart0_nopause { + groups = "uart0_nopause_grp"; + function = "uart0_nopause"; + }; + }; + + uart1_pmx: uart1@0 { + uart1 { + groups = "uart1_grp"; + function = "uart1"; + }; + }; + + uart2_pmx: uart2@0 { + uart2 { + groups = "uart2_grp"; + function = "uart2"; + }; + }; + + uart3_pmx0: uart3@0 { + uart3_0 { + groups = "uart3_grp0"; + function = "uart3_m0"; + }; + }; + + uart3_pmx1: uart3@1 { + uart3_1 { + groups = "uart3_grp1"; + function = "uart3_m1"; + }; + }; + + uart3_pmx2: uart3@2 { + uart3_2 { + groups = "uart3_grp2"; + function = "uart3_m2"; + }; + }; + + uart3_pmx3: uart3@3 { + uart3_3 { + groups = "uart3_grp3"; + function = "uart3_m3"; + }; + }; + + uart3_nopause_pmx0: uart3_nopause@0 { + uart3_nopause_0 { + groups = "uart3_nopause_grp0"; + function = "uart3_nopause_m0"; + }; + }; + + uart3_nopause_pmx1: uart3_nopause@1 { + uart3_nopause_1 { + groups = "uart3_nopause_grp1"; + function = "uart3_nopause_m1"; + }; + }; + + uart4_pmx0: uart4@0 { + uart4_0 { + groups = "uart4_grp0"; + function = "uart4_m0"; + }; + }; + + uart4_pmx1: uart4@1 { + uart4_1 { + groups = "uart4_grp1"; + function = "uart4_m1"; + }; + }; + + uart4_pmx2: uart4@2 { + uart4_2 { + groups = "uart4_grp2"; + function = "uart4_m2"; + }; + }; + + uart4_nopause_pmx: uart4_nopause@0 { + uart4_nopause { + groups = "uart4_nopause_grp"; + function = "uart4_nopause"; + }; + }; + + usb0_drvvbus_pmx: usb0_drvvbus@0 { + usb0_drvvbus { + groups = "usb0_drvvbus_grp"; + function = "usb0_drvvbus"; + }; + }; + + usb1_drvvbus_pmx: usb1_drvvbus@0 { + usb1_drvvbus { + groups = "usb1_drvvbus_grp"; + function = "usb1_drvvbus"; + }; + }; + + visbus_dout_pmx: visbus_dout@0 { + visbus_dout { + groups = "visbus_dout_grp"; + function = "visbus_dout"; + }; + }; + + vi_vip1_pmx: vi_vip1@0 { + vi_vip1 { + groups = "vi_vip1_grp"; + function = "vi_vip1"; + }; + }; + + vi_vip1_ext_pmx: vi_vip1_ext@0 { + vi_vip1_ext { + groups = "vi_vip1_ext_grp"; + function = "vi_vip1_ext"; + }; + }; + + vi_vip1_low8bit_pmx: vi_vip1_low8bit@0 { + vi_vip1_low8bit { + groups = "vi_vip1_low8bit_grp"; + function = "vi_vip1_low8bit"; + }; + }; + + vi_vip1_high8bit_pmx: vi_vip1_high8bit@0 { + vi_vip1_high8bit { + groups = "vi_vip1_high8bit_grp"; + function = "vi_vip1_high8bit"; + }; + }; }; pmipc { @@ -341,6 +1360,12 @@ clock-names = "gpio0_io"; gpio-controller; interrupt-controller; + + gpio-banks = <2>; + gpio-ranges = <&pinctrl 0 0 0>, + <&pinctrl 32 0 0>; + gpio-ranges-group-names = "lvds_gpio_grp", + "uart_nand_gpio_grp"; }; nand@17050000 { @@ -446,11 +1471,22 @@ #interrupt-cells = <2>; compatible = "sirf,atlas7-gpio"; reg = <0x13300000 0x1000>; - interrupts = <0 43 0>, <0 44 0>, <0 45 0>; + interrupts = <0 43 0>, <0 44 0>, + <0 45 0>, <0 46 0>; clocks = <&car 84>; clock-names = "gpio1_io"; gpio-controller; interrupt-controller; + + gpio-banks = <4>; + gpio-ranges = <&pinctrl 0 0 0>, + <&pinctrl 32 0 0>, + <&pinctrl 64 0 0>, + <&pinctrl 96 0 0>; + gpio-ranges-group-names = "gnss_gpio_grp", + "lcd_vip_gpio_grp", + "sdio_i2s_gpio_grp", + "sp_rgmii_gpio_grp"; }; sd2: sdhci@14200000 { @@ -729,6 +1765,10 @@ interrupts = <0 47 0>; gpio-controller; interrupt-controller; + + gpio-banks = <1>; + gpio-ranges = <&pinctrl 0 0 0>; + gpio-ranges-group-names = "rtc_gpio_grp"; }; rtc-iobg@18840000 { From b1999477ed91c3c33891acfe0e18a4457e5c4915 Mon Sep 17 00:00:00 2001 From: Guo Zeng Date: Tue, 14 Apr 2015 11:55:55 +0000 Subject: [PATCH 117/839] ARM: prima2: move to use REGMAP APIs for rtciobrg all devices behind rtciobrg needs a special way to access. currently they are using a platform-specific API. this patch moves to REGMAP, then clients can use regmap APIs to read/write. for the moment, old APIs are still kept, once all clients move to regmap, old APIs will be dropped. this patch also does minor clean for comments, authors statement. Signed-off-by: Guo Zeng Signed-off-by: Barry Song --- arch/arm/mach-prima2/Kconfig | 1 + arch/arm/mach-prima2/rtciobrg.c | 48 ++++++++++++++++++++++++++-- include/linux/rtc/sirfsoc_rtciobrg.h | 4 +++ 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig index e03d8b5c9ad0..9ab8932403e5 100644 --- a/arch/arm/mach-prima2/Kconfig +++ b/arch/arm/mach-prima2/Kconfig @@ -4,6 +4,7 @@ menuconfig ARCH_SIRF select ARCH_REQUIRE_GPIOLIB select GENERIC_IRQ_CHIP select NO_IOPORT_MAP + select REGMAP select PINCTRL select PINCTRL_SIRF help diff --git a/arch/arm/mach-prima2/rtciobrg.c b/arch/arm/mach-prima2/rtciobrg.c index 8f66d8f7ca75..d4852d24dc7d 100644 --- a/arch/arm/mach-prima2/rtciobrg.c +++ b/arch/arm/mach-prima2/rtciobrg.c @@ -1,5 +1,5 @@ /* - * RTC I/O Bridge interfaces for CSR SiRFprimaII + * RTC I/O Bridge interfaces for CSR SiRFprimaII/atlas7 * ARM access the registers of SYSRTC, GPSRTC and PWRC through this module * * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company. @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -66,6 +67,7 @@ u32 sirfsoc_rtc_iobrg_readl(u32 addr) { unsigned long flags, val; + /* TODO: add hwspinlock to sync with M3 */ spin_lock_irqsave(&rtciobrg_lock, flags); val = __sirfsoc_rtc_iobrg_readl(addr); @@ -90,6 +92,7 @@ void sirfsoc_rtc_iobrg_writel(u32 val, u32 addr) { unsigned long flags; + /* TODO: add hwspinlock to sync with M3 */ spin_lock_irqsave(&rtciobrg_lock, flags); sirfsoc_rtc_iobrg_pre_writel(val, addr); @@ -102,6 +105,45 @@ void sirfsoc_rtc_iobrg_writel(u32 val, u32 addr) } EXPORT_SYMBOL_GPL(sirfsoc_rtc_iobrg_writel); + +static int regmap_iobg_regwrite(void *context, unsigned int reg, + unsigned int val) +{ + sirfsoc_rtc_iobrg_writel(val, reg); + return 0; +} + +static int regmap_iobg_regread(void *context, unsigned int reg, + unsigned int *val) +{ + *val = (u32)sirfsoc_rtc_iobrg_readl(reg); + return 0; +} + +static struct regmap_bus regmap_iobg = { + .reg_write = regmap_iobg_regwrite, + .reg_read = regmap_iobg_regread, +}; + +/** + * devm_regmap_init_iobg(): Initialise managed register map + * + * @iobg: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +struct regmap *devm_regmap_init_iobg(struct device *dev, + const struct regmap_config *config) +{ + const struct regmap_bus *bus = ®map_iobg; + + return devm_regmap_init(dev, bus, dev, config); +} +EXPORT_SYMBOL_GPL(devm_regmap_init_iobg); + static const struct of_device_id rtciobrg_ids[] = { { .compatible = "sirf,prima2-rtciobg" }, {} @@ -132,7 +174,7 @@ static int __init sirfsoc_rtciobrg_init(void) } postcore_initcall(sirfsoc_rtciobrg_init); -MODULE_AUTHOR("Zhiwu Song , " - "Barry Song "); +MODULE_AUTHOR("Zhiwu Song "); +MODULE_AUTHOR("Barry Song "); MODULE_DESCRIPTION("CSR SiRFprimaII rtc io bridge"); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/rtc/sirfsoc_rtciobrg.h b/include/linux/rtc/sirfsoc_rtciobrg.h index 2c92e1c8e055..aefd997262e4 100644 --- a/include/linux/rtc/sirfsoc_rtciobrg.h +++ b/include/linux/rtc/sirfsoc_rtciobrg.h @@ -9,10 +9,14 @@ #ifndef _SIRFSOC_RTC_IOBRG_H_ #define _SIRFSOC_RTC_IOBRG_H_ +struct regmap_config; + extern void sirfsoc_rtc_iobrg_besyncing(void); extern u32 sirfsoc_rtc_iobrg_readl(u32 addr); extern void sirfsoc_rtc_iobrg_writel(u32 val, u32 addr); +struct regmap *devm_regmap_init_iobg(struct device *dev, + const struct regmap_config *config); #endif From 3c87ef6efb40f0e339d705c194b2224f854ec38e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:25 -0400 Subject: [PATCH 118/839] sunrpc: keep a count of swapfiles associated with the rpc_clnt Jerome reported seeing a warning pop when working with a swapfile on NFS. The nfs_swap_activate can end up calling sk_set_memalloc while holding the rcu_read_lock and that function can sleep. To fix that, we need to take a reference to the xprt while holding the rcu_read_lock, set the socket up for swapping and then drop that reference. But, xprt_put is not exported and having NFS deal with the underlying xprt is a bit of layering violation anyway. Fix this by adding a set of activate/deactivate functions that take a rpc_clnt pointer instead of an rpc_xprt, and have nfs_swap_activate and nfs_swap_deactivate call those. Also, add a per-rpc_clnt atomic counter to keep track of the number of active swapfiles associated with it. When the counter does a 0->1 transition, we enable swapping on the xprt, when we do a 1->0 transition we disable swapping on it. This also allows us to be a bit more selective with the RPC_TASK_SWAPPER flag. If non-swapper and swapper clnts are sharing a xprt, then we only need to flag the tasks from the swapper clnt with that flag. Acked-by: Mel Gorman Reported-by: Jerome Marchand Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 15 ++------ include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/sched.h | 16 +++++++++ net/sunrpc/clnt.c | 67 +++++++++++++++++++++++++++++++----- net/sunrpc/xprtsock.c | 3 +- 5 files changed, 78 insertions(+), 24 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8b8d83a526ce..cc4fa1ed61fc 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -555,31 +555,22 @@ static int nfs_launder_page(struct page *page) return nfs_wb_page(inode, page); } -#ifdef CONFIG_NFS_SWAP static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { - int ret; struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); *span = sis->pages; - rcu_read_lock(); - ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1); - rcu_read_unlock(); - - return ret; + return rpc_clnt_swap_activate(clnt); } static void nfs_swap_deactivate(struct file *file) { struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); - rcu_read_lock(); - xs_swapper(rcu_dereference(clnt->cl_xprt), 0); - rcu_read_unlock(); + rpc_clnt_swap_deactivate(clnt); } -#endif const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, @@ -596,10 +587,8 @@ const struct address_space_operations nfs_file_aops = { .launder_page = nfs_launder_page, .is_dirty_writeback = nfs_check_dirty_writeback, .error_remove_page = generic_error_remove_page, -#ifdef CONFIG_NFS_SWAP .swap_activate = nfs_swap_activate, .swap_deactivate = nfs_swap_deactivate, -#endif }; /* diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 598ba80ec30c..131032f15cc1 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -56,6 +56,7 @@ struct rpc_clnt { struct rpc_rtt * cl_rtt; /* RTO estimator data */ const struct rpc_timeout *cl_timeout; /* Timeout strategy */ + atomic_t cl_swapper; /* swapfile count */ int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME+1]; struct rpc_pipe_dir_head cl_pipedir_objects; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 2aa68976a482..d703f0ef37d8 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -268,4 +268,20 @@ static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, } #endif +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +int rpc_clnt_swap_activate(struct rpc_clnt *clnt); +void rpc_clnt_swap_deactivate(struct rpc_clnt *clnt); +#else +static inline int +rpc_clnt_swap_activate(struct rpc_clnt *clnt) +{ + return -EINVAL; +} + +static inline void +rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) +{ +} +#endif /* CONFIG_SUNRPC_SWAP */ + #endif /* _LINUX_SUNRPC_SCHED_H_ */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f6717170480e..0ba65156a62b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -891,15 +891,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_flags |= RPC_TASK_SOFT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; - if (sk_memalloc_socks()) { - struct rpc_xprt *xprt; - - rcu_read_lock(); - xprt = rcu_dereference(clnt->cl_xprt); - if (xprt->swapper) - task->tk_flags |= RPC_TASK_SWAPPER; - rcu_read_unlock(); - } + if (atomic_read(&clnt->cl_swapper)) + task->tk_flags |= RPC_TASK_SWAPPER; /* Add to the client's list of all tasks */ spin_lock(&clnt->cl_lock); list_add_tail(&task->tk_task, &clnt->cl_tasks); @@ -2479,3 +2472,59 @@ void rpc_show_tasks(struct net *net) spin_unlock(&sn->rpc_client_lock); } #endif + +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +int +rpc_clnt_swap_activate(struct rpc_clnt *clnt) +{ + int ret = 0; + struct rpc_xprt *xprt; + + if (atomic_inc_return(&clnt->cl_swapper) == 1) { +retry: + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + if (!xprt) { + /* + * If we didn't get a reference, then we likely are + * racing with a migration event. Wait for a grace + * period and try again. + */ + synchronize_rcu(); + goto retry; + } + + ret = xs_swapper(xprt, 1); + xprt_put(xprt); + } + return ret; +} +EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate); + +void +rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) +{ + struct rpc_xprt *xprt; + + if (atomic_dec_if_positive(&clnt->cl_swapper) == 0) { +retry: + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + if (!xprt) { + /* + * If we didn't get a reference, then we likely are + * racing with a migration event. Wait for a grace + * period and try again. + */ + synchronize_rcu(); + goto retry; + } + + xs_swapper(xprt, 0); + xprt_put(xprt); + } +} +EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate); +#endif /* CONFIG_SUNRPC_SWAP */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b142feef0cf4..f344c0f8974c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1955,7 +1955,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) msleep_interruptible(15000); } -#ifdef CONFIG_SUNRPC_SWAP +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) static void xs_set_memalloc(struct rpc_xprt *xprt) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, @@ -1987,7 +1987,6 @@ int xs_swapper(struct rpc_xprt *xprt, int enable) return err; } -EXPORT_SYMBOL_GPL(xs_swapper); #else static void xs_set_memalloc(struct rpc_xprt *xprt) { From 8e2281330f9930bccf77cf04027ec60b6cc0fb34 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:26 -0400 Subject: [PATCH 119/839] sunrpc: make xprt->swapper an atomic_t Split xs_swapper into enable/disable functions and eliminate the "enable" flag. Currently, it's racy if you have multiple swapon/swapoff operations running in parallel over the same xprt. Also fix it so that we only set it to a memalloc socket on a 0->1 transition and only clear it on a 1->0 transition. Cc: Mel Gorman Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 5 +++-- net/sunrpc/clnt.c | 4 ++-- net/sunrpc/xprtsock.c | 38 ++++++++++++++++++++++++------------- 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index bc9c39d6d30d..9a3308703c15 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -180,7 +180,7 @@ struct rpc_xprt { atomic_t num_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char resvport : 1; /* use a reserved port */ - unsigned int swapper; /* we're swapping over this + atomic_t swapper; /* we're swapping over this transport */ unsigned int bind_index; /* bind function index */ @@ -346,7 +346,8 @@ void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); -int xs_swapper(struct rpc_xprt *xprt, int enable); +int xs_swapper_enable(struct rpc_xprt *xprt); +void xs_swapper_disable(struct rpc_xprt *xprt); bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); void xprt_unlock_connect(struct rpc_xprt *, void *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0ba65156a62b..801044aeeedd 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2495,7 +2495,7 @@ retry: goto retry; } - ret = xs_swapper(xprt, 1); + ret = xs_swapper_enable(xprt); xprt_put(xprt); } return ret; @@ -2522,7 +2522,7 @@ retry: goto retry; } - xs_swapper(xprt, 0); + xs_swapper_disable(xprt); xprt_put(xprt); } } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f344c0f8974c..6538e6fbb7ba 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1961,31 +1961,43 @@ static void xs_set_memalloc(struct rpc_xprt *xprt) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - if (xprt->swapper) + if (atomic_read(&xprt->swapper)) sk_set_memalloc(transport->inet); } /** - * xs_swapper - Tag this transport as being used for swap. + * xs_swapper_enable - Tag this transport as being used for swap. * @xprt: transport to tag - * @enable: enable/disable * + * Take a reference to this transport on behalf of the rpc_clnt, and + * optionally mark it for swapping if it wasn't already. */ -int xs_swapper(struct rpc_xprt *xprt, int enable) +int +xs_swapper_enable(struct rpc_xprt *xprt) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - int err = 0; - if (enable) { - xprt->swapper++; - xs_set_memalloc(xprt); - } else if (xprt->swapper) { - xprt->swapper--; + if (atomic_inc_return(&xprt->swapper) == 1) + sk_set_memalloc(transport->inet); + return 0; +} + +/** + * xs_swapper_disable - Untag this transport as being used for swap. + * @xprt: transport to tag + * + * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the + * swapper refcount goes to 0, untag the socket as a memalloc socket. + */ +void +xs_swapper_disable(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, + xprt); + + if (atomic_dec_and_test(&xprt->swapper)) sk_clear_memalloc(transport->inet); - } - - return err; } #else static void xs_set_memalloc(struct rpc_xprt *xprt) From 264d1df3b34804a7d440d77771020f616a573528 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:27 -0400 Subject: [PATCH 120/839] sunrpc: if we're closing down a socket, clear memalloc on it first We currently increment the memalloc_socks counter if we have a xprt that is associated with a swapfile. That socket can be replaced however during a reconnect event, and the memalloc_socks counter is never decremented if that occurs. When tearing down a xprt socket, check to see if the xprt is set up for swapping and sk_clear_memalloc before releasing the socket if so. Acked-by: Mel Gorman Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 6538e6fbb7ba..31b856a2935c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -827,6 +827,9 @@ static void xs_reset_transport(struct sock_xprt *transport) if (sk == NULL) return; + if (atomic_read(&transport->xprt.swapper)) + sk_clear_memalloc(sk); + write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; transport->sock = NULL; From d6e971d8ecafee18ff9cc1ac646eb0817ab8b143 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:28 -0400 Subject: [PATCH 121/839] sunrpc: lock xprt before trying to set memalloc on the sockets It's possible that we could race with a call to xs_reset_transport, in which case the xprt->inet pointer could be zeroed out while we're accessing it. Lock the xprt before we try to set memalloc on it. Cc: Mel Gorman Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 31b856a2935c..e0efd8514886 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1959,11 +1959,22 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) } #if IS_ENABLED(CONFIG_SUNRPC_SWAP) +/* + * Note that this should be called with XPRT_LOCKED held (or when we otherwise + * know that we have exclusive access to the socket), to guard against + * races with xs_reset_transport. + */ static void xs_set_memalloc(struct rpc_xprt *xprt) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + /* + * If there's no sock, then we have nothing to set. The + * reconnecting process will get it for us. + */ + if (!transport->inet) + return; if (atomic_read(&xprt->swapper)) sk_set_memalloc(transport->inet); } @@ -1978,11 +1989,15 @@ static void xs_set_memalloc(struct rpc_xprt *xprt) int xs_swapper_enable(struct rpc_xprt *xprt) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, - xprt); + struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); - if (atomic_inc_return(&xprt->swapper) == 1) - sk_set_memalloc(transport->inet); + if (atomic_inc_return(&xprt->swapper) != 1) + return 0; + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) + return -ERESTARTSYS; + if (xs->inet) + sk_set_memalloc(xs->inet); + xprt_release_xprt(xprt, NULL); return 0; } @@ -1996,11 +2011,15 @@ xs_swapper_enable(struct rpc_xprt *xprt) void xs_swapper_disable(struct rpc_xprt *xprt) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, - xprt); + struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); - if (atomic_dec_and_test(&xprt->swapper)) - sk_clear_memalloc(transport->inet); + if (!atomic_dec_and_test(&xprt->swapper)) + return; + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) + return; + if (xs->inet) + sk_clear_memalloc(xs->inet); + xprt_release_xprt(xprt, NULL); } #else static void xs_set_memalloc(struct rpc_xprt *xprt) From d67fa4d85a2143b46052b2e9ccc6749a4c97b2de Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:29 -0400 Subject: [PATCH 122/839] sunrpc: turn swapper_enable/disable functions into rpc_xprt_ops RDMA xprts don't have a sock_xprt, but an rdma_xprt, so the xs_swapper_enable/disable functions will likely oops when fed an RDMA xprt. Turn these functions into rpc_xprt_ops so that that doesn't occur. For now the RDMA versions are no-ops that just return -EINVAL on an attempt to swapon. Cc: Chuck Lever Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 16 ++++++++++++++-- net/sunrpc/clnt.c | 4 ++-- net/sunrpc/xprtrdma/transport.c | 15 ++++++++++++++- net/sunrpc/xprtsock.c | 31 +++++++++++++++++++++++++------ 4 files changed, 55 insertions(+), 11 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9a3308703c15..b6096592b1d4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -133,6 +133,8 @@ struct rpc_xprt_ops { void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); + int (*enable_swap)(struct rpc_xprt *xprt); + void (*disable_swap)(struct rpc_xprt *xprt); }; /* @@ -328,6 +330,18 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * return p + xprt->tsh_size; } +static inline int +xprt_enable_swap(struct rpc_xprt *xprt) +{ + return xprt->ops->enable_swap(xprt); +} + +static inline void +xprt_disable_swap(struct rpc_xprt *xprt) +{ + xprt->ops->disable_swap(xprt); +} + /* * Transport switch helper functions */ @@ -346,8 +360,6 @@ void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); -int xs_swapper_enable(struct rpc_xprt *xprt); -void xs_swapper_disable(struct rpc_xprt *xprt); bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); void xprt_unlock_connect(struct rpc_xprt *, void *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 801044aeeedd..576d6ae39f25 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2495,7 +2495,7 @@ retry: goto retry; } - ret = xs_swapper_enable(xprt); + ret = xprt_enable_swap(xprt); xprt_put(xprt); } return ret; @@ -2522,7 +2522,7 @@ retry: goto retry; } - xs_swapper_disable(xprt); + xprt_disable_swap(xprt); xprt_put(xprt); } } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 54f23b1be986..ebf6fe759f0e 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -682,6 +682,17 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) r_xprt->rx_stats.bad_reply_count); } +static int +xprt_rdma_enable_swap(struct rpc_xprt *xprt) +{ + return -EINVAL; +} + +static void +xprt_rdma_disable_swap(struct rpc_xprt *xprt) +{ +} + /* * Plumbing for rpc transport switch and kernel module */ @@ -700,7 +711,9 @@ static struct rpc_xprt_ops xprt_rdma_procs = { .send_request = xprt_rdma_send_request, .close = xprt_rdma_close, .destroy = xprt_rdma_destroy, - .print_stats = xprt_rdma_print_stats + .print_stats = xprt_rdma_print_stats, + .enable_swap = xprt_rdma_enable_swap, + .disable_swap = xprt_rdma_disable_swap, }; static struct xprt_class xprt_rdma = { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e0efd8514886..bf20726d4ab5 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1980,14 +1980,14 @@ static void xs_set_memalloc(struct rpc_xprt *xprt) } /** - * xs_swapper_enable - Tag this transport as being used for swap. + * xs_enable_swap - Tag this transport as being used for swap. * @xprt: transport to tag * * Take a reference to this transport on behalf of the rpc_clnt, and * optionally mark it for swapping if it wasn't already. */ -int -xs_swapper_enable(struct rpc_xprt *xprt) +static int +xs_enable_swap(struct rpc_xprt *xprt) { struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); @@ -2002,14 +2002,14 @@ xs_swapper_enable(struct rpc_xprt *xprt) } /** - * xs_swapper_disable - Untag this transport as being used for swap. + * xs_disable_swap - Untag this transport as being used for swap. * @xprt: transport to tag * * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the * swapper refcount goes to 0, untag the socket as a memalloc socket. */ -void -xs_swapper_disable(struct rpc_xprt *xprt) +static void +xs_disable_swap(struct rpc_xprt *xprt) { struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); @@ -2025,6 +2025,17 @@ xs_swapper_disable(struct rpc_xprt *xprt) static void xs_set_memalloc(struct rpc_xprt *xprt) { } + +static int +xs_enable_swap(struct rpc_xprt *xprt) +{ + return -EINVAL; +} + +static void +xs_disable_swap(struct rpc_xprt *xprt) +{ +} #endif static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) @@ -2488,6 +2499,8 @@ static struct rpc_xprt_ops xs_local_ops = { .close = xs_close, .destroy = xs_destroy, .print_stats = xs_local_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, }; static struct rpc_xprt_ops xs_udp_ops = { @@ -2507,6 +2520,8 @@ static struct rpc_xprt_ops xs_udp_ops = { .close = xs_close, .destroy = xs_destroy, .print_stats = xs_udp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, }; static struct rpc_xprt_ops xs_tcp_ops = { @@ -2523,6 +2538,8 @@ static struct rpc_xprt_ops xs_tcp_ops = { .close = xs_tcp_shutdown, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, }; /* @@ -2540,6 +2557,8 @@ static struct rpc_xprt_ops bc_tcp_ops = { .close = bc_close, .destroy = bc_destroy, .print_stats = xs_tcp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, }; static int xs_init_anyaddr(const int family, struct sockaddr *sap) From c86c90c656fa2c94c2491dd86cf09501193f0dad Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Thu, 4 Jun 2015 17:04:17 +0200 Subject: [PATCH 123/839] NFSv4: handle nfs4_get_referral failure nfs4_proc_lookup_common is supposed to return a posix error, we have to handle any error returned that isn't errno Reported-by: Olga Kornievskaia Signed-off-by: Frank S. Filz Signed-off-by: Dominique Martinet Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 55e1e3af23a3..d689ea37be84 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3355,6 +3355,8 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, goto out; case -NFS4ERR_MOVED: err = nfs4_get_referral(client, dir, name, fattr, fhandle); + if (err == -NFS4ERR_MOVED) + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); goto out; case -NFS4ERR_WRONGSEC: err = -EPERM; From 11598b8ff2b97cf034d0c025cf125c92b574bafc Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 10 Jun 2015 16:54:28 -0400 Subject: [PATCH 124/839] NFS: Remove unused nfs_rw_ops->rw_release() function This was only ever set to nfs_writeback_release_common(), a function which is completely empty. Let's just drop this function pointer and simplify the code a bit. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 -- fs/nfs/write.c | 6 ------ include/linux/nfs_page.h | 1 - 3 files changed, 9 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 282b39369510..e9953ab1ac40 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -690,8 +690,6 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, static void nfs_pgio_release(void *calldata) { struct nfs_pgio_header *hdr = calldata; - if (hdr->rw_ops->rw_release) - hdr->rw_ops->rw_release(hdr); nfs_pgio_data_destroy(hdr); hdr->completion_ops->completion(hdr); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dfc19f1575a1..d0f1f210342e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1347,11 +1347,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); } -static void nfs_writeback_release_common(struct nfs_pgio_header *hdr) -{ - /* do nothing! */ -} - /* * Special version of should_remove_suid() that ignores capabilities. */ @@ -2012,7 +2007,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = { .rw_mode = FMODE_WRITE, .rw_alloc_header = nfs_writehdr_alloc, .rw_free_header = nfs_writehdr_free, - .rw_release = nfs_writeback_release_common, .rw_done = nfs_writeback_done, .rw_result = nfs_writeback_result, .rw_initiate = nfs_initiate_write, diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 3eb072dbce83..f2f650f136ee 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -67,7 +67,6 @@ struct nfs_rw_ops { const fmode_t rw_mode; struct nfs_pgio_header *(*rw_alloc_header)(void); void (*rw_free_header)(struct nfs_pgio_header *); - void (*rw_release)(struct nfs_pgio_header *); int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *, struct inode *); void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *); From 4a06825839889cc1756d0dd8a52d6b1071ee0263 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 May 2015 14:02:25 -0400 Subject: [PATCH 125/839] SUNRPC: Transport fault injection It has been exceptionally useful to exercise the logic that handles local immediate errors and RDMA connection loss. To enable developers to test this regularly and repeatably, add logic to simulate connection loss every so often. Fault injection is disabled by default. It is enabled with $ sudo echo xxx > /sys/kernel/debug/sunrpc/inject_fault/disconnect where "xxx" is a large positive number of transport method calls before a disconnect. A value of several thousand is usually a good number that allows reasonable forward progress while still causing a lot of connection drops. These hooks are disabled when SUNRPC_DEBUG is turned off. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 19 ++++++++ net/sunrpc/clnt.c | 1 + net/sunrpc/debugfs.c | 77 +++++++++++++++++++++++++++++++++ net/sunrpc/xprt.c | 2 + net/sunrpc/xprtrdma/transport.c | 11 +++++ net/sunrpc/xprtsock.c | 10 +++++ 6 files changed, 120 insertions(+) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b6096592b1d4..0fb9acbb4780 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -135,6 +135,7 @@ struct rpc_xprt_ops { void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); int (*enable_swap)(struct rpc_xprt *xprt); void (*disable_swap)(struct rpc_xprt *xprt); + void (*inject_disconnect)(struct rpc_xprt *xprt); }; /* @@ -244,6 +245,7 @@ struct rpc_xprt { const char *address_strings[RPC_DISPLAY_MAX]; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct dentry *debugfs; /* debugfs directory */ + atomic_t inject_disconnect; #endif }; @@ -445,6 +447,23 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) return test_and_set_bit(XPRT_BINDING, &xprt->state); } +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +extern unsigned int rpc_inject_disconnect; +static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) +{ + if (!rpc_inject_disconnect) + return; + if (atomic_dec_return(&xprt->inject_disconnect)) + return; + atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); + xprt->ops->inject_disconnect(xprt); +} +#else +static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) +{ +} +#endif + #endif /* __KERNEL__*/ #endif /* _LINUX_SUNRPC_XPRT_H */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 576d6ae39f25..f41ed882ed3c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1605,6 +1605,7 @@ call_allocate(struct rpc_task *task) req->rq_callsize + req->rq_rcvsize); if (req->rq_buffer != NULL) return; + xprt_inject_disconnect(xprt); dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index 82962f7e6e88..7cc1b8a6ef6d 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -10,9 +10,12 @@ #include "netns.h" static struct dentry *topdir; +static struct dentry *rpc_fault_dir; static struct dentry *rpc_clnt_dir; static struct dentry *rpc_xprt_dir; +unsigned int rpc_inject_disconnect; + struct rpc_clnt_iter { struct rpc_clnt *clnt; loff_t pos; @@ -257,6 +260,8 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt) debugfs_remove_recursive(xprt->debugfs); xprt->debugfs = NULL; } + + atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); } void @@ -266,11 +271,78 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt) xprt->debugfs = NULL; } +static int +fault_open(struct inode *inode, struct file *filp) +{ + filp->private_data = kmalloc(128, GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + return 0; +} + +static int +fault_release(struct inode *inode, struct file *filp) +{ + kfree(filp->private_data); + return 0; +} + +static ssize_t +fault_disconnect_read(struct file *filp, char __user *user_buf, + size_t len, loff_t *offset) +{ + char *buffer = (char *)filp->private_data; + size_t size; + + size = sprintf(buffer, "%u\n", rpc_inject_disconnect); + return simple_read_from_buffer(user_buf, len, offset, buffer, size); +} + +static ssize_t +fault_disconnect_write(struct file *filp, const char __user *user_buf, + size_t len, loff_t *offset) +{ + char buffer[16]; + + len = min(len, sizeof(buffer) - 1); + if (copy_from_user(buffer, user_buf, len)) + return -EFAULT; + buffer[len] = '\0'; + if (kstrtouint(buffer, 10, &rpc_inject_disconnect)) + return -EINVAL; + return len; +} + +static const struct file_operations fault_disconnect_fops = { + .owner = THIS_MODULE, + .open = fault_open, + .read = fault_disconnect_read, + .write = fault_disconnect_write, + .release = fault_release, +}; + +static struct dentry * +inject_fault_dir(struct dentry *topdir) +{ + struct dentry *faultdir; + + faultdir = debugfs_create_dir("inject_fault", topdir); + if (!faultdir) + return NULL; + + if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir, + NULL, &fault_disconnect_fops)) + return NULL; + + return faultdir; +} + void __exit sunrpc_debugfs_exit(void) { debugfs_remove_recursive(topdir); topdir = NULL; + rpc_fault_dir = NULL; rpc_clnt_dir = NULL; rpc_xprt_dir = NULL; } @@ -282,6 +354,10 @@ sunrpc_debugfs_init(void) if (!topdir) return; + rpc_fault_dir = inject_fault_dir(topdir); + if (!rpc_fault_dir) + goto out_remove; + rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir); if (!rpc_clnt_dir) goto out_remove; @@ -294,5 +370,6 @@ sunrpc_debugfs_init(void) out_remove: debugfs_remove_recursive(topdir); topdir = NULL; + rpc_fault_dir = NULL; rpc_clnt_dir = NULL; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1d4fe24af06a..e1fb538e10e0 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -967,6 +967,7 @@ void xprt_transmit(struct rpc_task *task) task->tk_status = status; return; } + xprt_inject_disconnect(xprt); dprintk("RPC: %5u xmit complete\n", task->tk_pid); task->tk_flags |= RPC_TASK_SENT; @@ -1285,6 +1286,7 @@ void xprt_release(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); if (req->rq_buffer) xprt->ops->buf_free(req->rq_buffer); + xprt_inject_disconnect(xprt); if (req->rq_cred != NULL) put_rpccred(req->rq_cred); task->tk_rqstp = NULL; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ebf6fe759f0e..b8aac23b1b0c 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -246,6 +246,16 @@ xprt_rdma_connect_worker(struct work_struct *work) xprt_clear_connecting(xprt); } +static void +xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, + rx_xprt); + + pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); + rdma_disconnect(r_xprt->rx_ia.ri_id); +} + /* * xprt_rdma_destroy * @@ -714,6 +724,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = { .print_stats = xprt_rdma_print_stats, .enable_swap = xprt_rdma_enable_swap, .disable_swap = xprt_rdma_disable_swap, + .inject_disconnect = xprt_rdma_inject_disconnect }; static struct xprt_class xprt_rdma = { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bf20726d4ab5..fda8ec8c74c0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -866,6 +866,13 @@ static void xs_close(struct rpc_xprt *xprt) xprt_disconnect_done(xprt); } +static void xs_inject_disconnect(struct rpc_xprt *xprt) +{ + dprintk("RPC: injecting transport disconnect on xprt=%p\n", + xprt); + xprt_disconnect_done(xprt); +} + static void xs_xprt_free(struct rpc_xprt *xprt) { xs_free_peer_addresses(xprt); @@ -2522,6 +2529,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .print_stats = xs_udp_print_stats, .enable_swap = xs_enable_swap, .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; static struct rpc_xprt_ops xs_tcp_ops = { @@ -2540,6 +2548,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, .enable_swap = xs_enable_swap, .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; /* @@ -2559,6 +2568,7 @@ static struct rpc_xprt_ops bc_tcp_ops = { .print_stats = xs_tcp_print_stats, .enable_swap = xs_enable_swap, .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; static int xs_init_anyaddr(const int family, struct sockaddr *sap) From 4ed0d83d057c16efeca4210ba8fb2b5029333fb3 Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Wed, 10 Jun 2015 20:15:29 +0530 Subject: [PATCH 126/839] NFS: Convert use of __constant_htonl to htonl In little endian cases, the macro htonl unfolds to __swab32 which provides special case for constants. In big endian cases, __constant_htonl and htonl expand directly to the same expression. So, replace __constant_htonl with htonl with the goal of getting rid of the definition of __constant_htonl completely. The semantic patch that performs this transformation is as follows: @@expression x;@@ - __constant_htonl(x) + htonl(x) Signed-off-by: Vaishali Thakkar Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 19ca95cdfd9b..6b1697a01dde 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -909,7 +909,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r xdr_init_encode(&xdr_out, &rqstp->rq_res, p); status = decode_compound_hdr_arg(&xdr_in, &hdr_arg); - if (status == __constant_htonl(NFS4ERR_RESOURCE)) + if (status == htonl(NFS4ERR_RESOURCE)) return rpc_garbage_args; if (hdr_arg.minorversion == 0) { From 2e0d737fc76f8d31e2265b3f0392749f75efd735 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 20 Jan 2015 12:52:48 +0000 Subject: [PATCH 127/839] kconfig: don't silently ignore unhandled characters At the very least we should tell people that what they wrote is not what the utility understands. Signed-off-by: Jan Beulich Acked-by: Paul Bolle Signed-off-by: Michal Marek --- scripts/kconfig/zconf.l | 7 +- scripts/kconfig/zconf.lex.c_shipped | 203 +++++++++++++++------------- 2 files changed, 117 insertions(+), 93 deletions(-) diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index 6c62d93b4ffb..16741396d264 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -141,7 +141,12 @@ n [A-Za-z0-9_] } #.* /* comment */ \\\n current_file->lineno++; - . + [[:blank:]]+ + . { + fprintf(stderr, + "%s:%d:warning: ignoring unsupported character '%c'\n", + zconf_curname(), zconf_lineno(), *yytext); + } <> { BEGIN(INITIAL); } diff --git a/scripts/kconfig/zconf.lex.c_shipped b/scripts/kconfig/zconf.lex.c_shipped index 349a7f24315b..746691ed010e 100644 --- a/scripts/kconfig/zconf.lex.c_shipped +++ b/scripts/kconfig/zconf.lex.c_shipped @@ -414,14 +414,14 @@ static yyconst flex_int16_t yy_nxt[][17] = }, { - 11, 26, 26, 27, 28, 29, 30, 31, 29, 32, - 33, 34, 35, 35, 36, 37, 38 + 11, 26, 27, 28, 29, 30, 31, 32, 30, 33, + 34, 35, 36, 36, 37, 38, 39 }, { - 11, 26, 26, 27, 28, 29, 30, 31, 29, 32, - 33, 34, 35, 35, 36, 37, 38 + 11, 26, 27, 28, 29, 30, 31, 32, 30, 33, + 34, 35, 36, 36, 37, 38, 39 }, { @@ -435,7 +435,7 @@ static yyconst flex_int16_t yy_nxt[][17] = }, { - 11, -13, 39, 40, -13, -13, 41, -13, -13, -13, + 11, -13, 40, 41, -13, -13, 42, -13, -13, -13, -13, -13, -13, -13, -13, -13, -13 }, @@ -446,8 +446,8 @@ static yyconst flex_int16_t yy_nxt[][17] = }, { - 11, 42, 42, 43, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42 + 11, 43, 43, 44, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43 }, { @@ -462,28 +462,28 @@ static yyconst flex_int16_t yy_nxt[][17] = { 11, -18, -18, -18, -18, -18, -18, -18, -18, -18, - -18, -18, -18, 44, -18, -18, -18 + -18, -18, -18, 45, -18, -18, -18 }, { - 11, 45, 45, -19, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45 + 11, 46, 46, -19, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46 }, { - 11, -20, 46, 47, -20, -20, -20, -20, -20, -20, + 11, -20, 47, 48, -20, -20, -20, -20, -20, -20, -20, -20, -20, -20, -20, -20, -20 }, { - 11, 48, -21, -21, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48 + 11, 49, -21, -21, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49 }, { - 11, 49, 49, 50, 49, -22, 49, 49, -22, 49, - 49, 49, 49, 49, 49, -22, 49 + 11, 50, 50, 51, 50, -22, 50, 50, -22, 50, + 50, 50, 50, 50, 50, -22, 50 }, { @@ -498,8 +498,8 @@ static yyconst flex_int16_t yy_nxt[][17] = }, { - 11, 51, 51, 52, 51, 51, 51, 51, 51, 51, - 51, 51, 51, 51, 51, 51, 51 + 11, 52, 52, 53, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52 }, { @@ -508,33 +508,33 @@ static yyconst flex_int16_t yy_nxt[][17] = }, { - 11, -27, -27, -27, -27, -27, -27, -27, -27, -27, + 11, -27, 54, -27, -27, -27, -27, -27, -27, -27, -27, -27, -27, -27, -27, -27, -27 }, { 11, -28, -28, -28, -28, -28, -28, -28, -28, -28, - -28, -28, -28, -28, 53, -28, -28 + -28, -28, -28, -28, -28, -28, -28 }, { 11, -29, -29, -29, -29, -29, -29, -29, -29, -29, - -29, -29, -29, -29, -29, -29, -29 + -29, -29, -29, -29, 55, -29, -29 }, { - 11, 54, 54, -30, 54, 54, 54, 54, 54, 54, - 54, 54, 54, 54, 54, 54, 54 + 11, -30, -30, -30, -30, -30, -30, -30, -30, -30, + -30, -30, -30, -30, -30, -30, -30 }, { - 11, -31, -31, -31, -31, -31, -31, 55, -31, -31, - -31, -31, -31, -31, -31, -31, -31 + 11, 56, 56, -31, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56 }, { - 11, -32, -32, -32, -32, -32, -32, -32, -32, -32, + 11, -32, -32, -32, -32, -32, -32, 57, -32, -32, -32, -32, -32, -32, -32, -32, -32 }, @@ -545,100 +545,100 @@ static yyconst flex_int16_t yy_nxt[][17] = { 11, -34, -34, -34, -34, -34, -34, -34, -34, -34, - -34, 56, 57, 57, -34, -34, -34 + -34, -34, -34, -34, -34, -34, -34 }, { 11, -35, -35, -35, -35, -35, -35, -35, -35, -35, - -35, 57, 57, 57, -35, -35, -35 + -35, 58, 59, 59, -35, -35, -35 }, { 11, -36, -36, -36, -36, -36, -36, -36, -36, -36, - -36, -36, -36, -36, -36, -36, -36 + -36, 59, 59, 59, -36, -36, -36 }, { - 11, -37, -37, 58, -37, -37, -37, -37, -37, -37, + 11, -37, -37, -37, -37, -37, -37, -37, -37, -37, -37, -37, -37, -37, -37, -37, -37 }, { - 11, -38, -38, -38, -38, -38, -38, -38, -38, -38, - -38, -38, -38, -38, -38, -38, 59 + 11, -38, -38, 60, -38, -38, -38, -38, -38, -38, + -38, -38, -38, -38, -38, -38, -38 }, { - 11, -39, 39, 40, -39, -39, 41, -39, -39, -39, - -39, -39, -39, -39, -39, -39, -39 + 11, -39, -39, -39, -39, -39, -39, -39, -39, -39, + -39, -39, -39, -39, -39, -39, 61 }, { - 11, -40, -40, -40, -40, -40, -40, -40, -40, -40, + 11, -40, 40, 41, -40, -40, 42, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40 }, { - 11, 42, 42, 43, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42 + 11, -41, -41, -41, -41, -41, -41, -41, -41, -41, + -41, -41, -41, -41, -41, -41, -41 }, { - 11, 42, 42, 43, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42 + 11, 43, 43, 44, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43 }, { - 11, -43, -43, -43, -43, -43, -43, -43, -43, -43, - -43, -43, -43, -43, -43, -43, -43 + 11, 43, 43, 44, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43 }, { 11, -44, -44, -44, -44, -44, -44, -44, -44, -44, - -44, -44, -44, 44, -44, -44, -44 + -44, -44, -44, -44, -44, -44, -44 }, { - 11, 45, 45, -45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45 + 11, -45, -45, -45, -45, -45, -45, -45, -45, -45, + -45, -45, -45, 45, -45, -45, -45 }, { - 11, -46, 46, 47, -46, -46, -46, -46, -46, -46, - -46, -46, -46, -46, -46, -46, -46 + 11, 46, 46, -46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46 }, { - 11, 48, -47, -47, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48 + 11, -47, 47, 48, -47, -47, -47, -47, -47, -47, + -47, -47, -47, -47, -47, -47, -47 }, { - 11, -48, -48, -48, -48, -48, -48, -48, -48, -48, - -48, -48, -48, -48, -48, -48, -48 + 11, 49, -48, -48, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49 }, { - 11, 49, 49, 50, 49, -49, 49, 49, -49, 49, - 49, 49, 49, 49, 49, -49, 49 + 11, -49, -49, -49, -49, -49, -49, -49, -49, -49, + -49, -49, -49, -49, -49, -49, -49 }, { - 11, -50, -50, -50, -50, -50, -50, -50, -50, -50, - -50, -50, -50, -50, -50, -50, -50 + 11, 50, 50, 51, 50, -50, 50, 50, -50, 50, + 50, 50, 50, 50, 50, -50, 50 }, { - 11, -51, -51, 52, -51, -51, -51, -51, -51, -51, + 11, -51, -51, -51, -51, -51, -51, -51, -51, -51, -51, -51, -51, -51, -51, -51, -51 }, { - 11, -52, -52, -52, -52, -52, -52, -52, -52, -52, + 11, -52, -52, 53, -52, -52, -52, -52, -52, -52, -52, -52, -52, -52, -52, -52, -52 }, @@ -648,8 +648,8 @@ static yyconst flex_int16_t yy_nxt[][17] = }, { - 11, 54, 54, -54, 54, 54, 54, 54, 54, 54, - 54, 54, 54, 54, 54, 54, 54 + 11, -54, 54, -54, -54, -54, -54, -54, -54, -54, + -54, -54, -54, -54, -54, -54, -54 }, @@ -659,29 +659,39 @@ static yyconst flex_int16_t yy_nxt[][17] = }, { - 11, -56, -56, -56, -56, -56, -56, -56, -56, -56, - -56, 60, 57, 57, -56, -56, -56 + 11, 56, 56, -56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56 }, { 11, -57, -57, -57, -57, -57, -57, -57, -57, -57, - -57, 57, 57, 57, -57, -57, -57 + -57, -57, -57, -57, -57, -57, -57 }, { 11, -58, -58, -58, -58, -58, -58, -58, -58, -58, - -58, -58, -58, -58, -58, -58, -58 + -58, 62, 59, 59, -58, -58, -58 }, { 11, -59, -59, -59, -59, -59, -59, -59, -59, -59, - -59, -59, -59, -59, -59, -59, -59 + -59, 59, 59, 59, -59, -59, -59 }, { 11, -60, -60, -60, -60, -60, -60, -60, -60, -60, - -60, 57, 57, 57, -60, -60, -60 + -60, -60, -60, -60, -60, -60, -60 + }, + + { + 11, -61, -61, -61, -61, -61, -61, -61, -61, -61, + -61, -61, -61, -61, -61, -61, -61 + }, + + { + 11, -62, -62, -62, -62, -62, -62, -62, -62, -62, + -62, 59, 59, 59, -62, -62, -62 }, } ; @@ -701,8 +711,8 @@ static void yy_fatal_error (yyconst char msg[] ); *yy_cp = '\0'; \ (yy_c_buf_p) = yy_cp; -#define YY_NUM_RULES 33 -#define YY_END_OF_BUFFER 34 +#define YY_NUM_RULES 34 +#define YY_END_OF_BUFFER 35 /* This struct is not used in this scanner, but its presence is necessary. */ struct yy_trans_info @@ -710,14 +720,15 @@ struct yy_trans_info flex_int32_t yy_verify; flex_int32_t yy_nxt; }; -static yyconst flex_int16_t yy_accept[61] = +static yyconst flex_int16_t yy_accept[63] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 34, 5, 4, 2, 3, 7, 8, 6, 32, 29, - 31, 24, 28, 27, 26, 22, 17, 13, 16, 20, - 22, 11, 12, 19, 19, 14, 22, 22, 4, 2, - 3, 3, 1, 6, 32, 29, 31, 30, 24, 23, - 26, 25, 15, 20, 9, 19, 19, 21, 10, 18 + 35, 5, 4, 2, 3, 7, 8, 6, 33, 30, + 32, 25, 29, 28, 27, 23, 22, 17, 13, 16, + 20, 23, 11, 12, 19, 19, 14, 23, 23, 4, + 2, 3, 3, 1, 6, 33, 30, 32, 31, 25, + 24, 27, 26, 22, 15, 20, 9, 19, 19, 21, + 10, 18 } ; static yyconst flex_int32_t yy_ec[256] = @@ -920,7 +931,7 @@ static int input (void ); /* This used to be an fputs(), but since the string might contain NUL's, * we now use fwrite(). */ -#define ECHO do { if (fwrite( zconftext, zconfleng, 1, zconfout )) {} } while (0) +#define ECHO fwrite( zconftext, zconfleng, 1, zconfout ) #endif /* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, @@ -1182,6 +1193,14 @@ current_file->lineno++; case 22: YY_RULE_SETUP + YY_BREAK +case 23: +YY_RULE_SETUP +{ + fprintf(stderr, + "%s:%d:warning: ignoring unsupported character '%c'\n", + zconf_curname(), zconf_lineno(), *zconftext); + } YY_BREAK case YY_STATE_EOF(PARAM): { @@ -1189,8 +1208,8 @@ case YY_STATE_EOF(PARAM): } YY_BREAK -case 23: -/* rule 23 can match eol */ +case 24: +/* rule 24 can match eol */ *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up zconftext again */ @@ -1201,14 +1220,14 @@ YY_RULE_SETUP return T_WORD_QUOTE; } YY_BREAK -case 24: -YY_RULE_SETUP -{ - append_string(zconftext, zconfleng); - } - YY_BREAK case 25: -/* rule 25 can match eol */ +YY_RULE_SETUP +{ + append_string(zconftext, zconfleng); + } + YY_BREAK +case 26: +/* rule 26 can match eol */ *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up zconftext again */ @@ -1219,13 +1238,13 @@ YY_RULE_SETUP return T_WORD_QUOTE; } YY_BREAK -case 26: +case 27: YY_RULE_SETUP { append_string(zconftext + 1, zconfleng - 1); } YY_BREAK -case 27: +case 28: YY_RULE_SETUP { if (str == zconftext[0]) { @@ -1236,8 +1255,8 @@ YY_RULE_SETUP append_string(zconftext, 1); } YY_BREAK -case 28: -/* rule 28 can match eol */ +case 29: +/* rule 29 can match eol */ YY_RULE_SETUP { printf("%s:%d:warning: multi-line strings not supported\n", zconf_curname(), zconf_lineno()); @@ -1252,7 +1271,7 @@ case YY_STATE_EOF(STRING): } YY_BREAK -case 29: +case 30: YY_RULE_SETUP { ts = 0; @@ -1277,8 +1296,8 @@ YY_RULE_SETUP } } YY_BREAK -case 30: -/* rule 30 can match eol */ +case 31: +/* rule 31 can match eol */ *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up zconftext again */ @@ -1289,15 +1308,15 @@ YY_RULE_SETUP return T_HELPTEXT; } YY_BREAK -case 31: -/* rule 31 can match eol */ +case 32: +/* rule 32 can match eol */ YY_RULE_SETUP { current_file->lineno++; append_string("\n", 1); } YY_BREAK -case 32: +case 33: YY_RULE_SETUP { while (zconfleng) { @@ -1328,7 +1347,7 @@ case YY_STATE_EOF(COMMAND): yyterminate(); } YY_BREAK -case 33: +case 34: YY_RULE_SETUP YY_FATAL_ERROR( "flex scanner jammed" ); YY_BREAK From 50ab9a69275dca588cb5ad5038657f069bbdeb56 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Sat, 21 Mar 2015 02:50:01 +0100 Subject: [PATCH 128/839] kbuild: include core debug info when DEBUG_INFO_REDUCED With CONFIG_DEBUG_INFO_REDUCED, we do get quite a lot of debug info (around 22.7 MB for a defconfig+DEBUG_INFO_REDUCED). However, the "basenames must match" rule used by -femit-struct-debug-baseonly option means that we miss some core data structures, such as struct {device, file, inode, mm_struct, page} etc. We can easily get these included as well, while still getting the benefits of CONFIG_DEBUG_INFO_REDUCED (faster build times and smaller individual object files): All it takes is a dummy translation unit including a few strategic headers and compiled with a flag overriding -femit-struct-debug-baseonly. This increases the size of .debug_info by ~0.3%, but these 90 KB contain some rather useful info. Signed-off-by: Rasmus Villemoes Signed-off-by: Michal Marek --- lib/Makefile | 3 +++ lib/debug_info.c | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 lib/debug_info.c diff --git a/lib/Makefile b/lib/Makefile index 6c37933336a0..c918538110ed 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -45,6 +45,9 @@ CFLAGS_kobject.o += -DDEBUG CFLAGS_kobject_uevent.o += -DDEBUG endif +obj-$(CONFIG_DEBUG_INFO_REDUCED) += debug_info.o +CFLAGS_debug_info.o += $(call cc-option, -femit-struct-debug-detailed=any) + obj-$(CONFIG_GENERIC_IOMAP) += iomap.o obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o diff --git a/lib/debug_info.c b/lib/debug_info.c new file mode 100644 index 000000000000..2edbe27517ed --- /dev/null +++ b/lib/debug_info.c @@ -0,0 +1,27 @@ +/* + * This file exists solely to ensure debug information for some core + * data structures is included in the final image even for + * CONFIG_DEBUG_INFO_REDUCED. Please do not add actual code. However, + * adding appropriate #includes is fine. + */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include From e937ee714b2df638275a61a99e0d033682f764d2 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 2 Jun 2015 18:58:46 +0800 Subject: [PATCH 129/839] nfs: Only update callback sequnce id when CB_SEQUENCE success When testing pnfs layout, nfsd got error NFS4ERR_SEQ_MISORDERED. It is caused by nfs return NFS4ERR_DELAY before validate_seqid(), don't update the sequnce id, but nfsd updates the sequnce id !!! According to RFC5661 20.9.3, " If CB_SEQUENCE returns an error, then the state of the slot (sequence ID, cached reply) MUST NOT change. " Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 197806fb87ff..7e9653aae5d8 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -327,10 +327,8 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr); /* Normal */ - if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { - slot->seq_nr++; + if (likely(args->csa_sequenceid == slot->seq_nr + 1)) goto out_ok; - } /* Replay */ if (args->csa_sequenceid == slot->seq_nr) { @@ -418,6 +416,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_process_state *cps) { struct nfs4_slot_table *tbl; + struct nfs4_slot *slot; struct nfs_client *clp; int i; __be32 status = htonl(NFS4ERR_BADSESSION); @@ -429,7 +428,9 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, if (!(clp->cl_session->flags & SESSION4_BACK_CHAN)) goto out; + tbl = &clp->cl_session->bc_slot_table; + slot = tbl->slots + args->csa_slotid; spin_lock(&tbl->slot_tbl_lock); /* state manager is resetting the session */ @@ -444,7 +445,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, goto out; } - status = validate_seqid(&clp->cl_session->bc_slot_table, args); + status = validate_seqid(tbl, args); spin_unlock(&tbl->slot_tbl_lock); if (status) goto out; @@ -468,6 +469,13 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + /* + * RFC5661 20.9.3 + * If CB_SEQUENCE returns an error, then the state of the slot + * (sequence ID, cached reply) MUST NOT change. + */ + slot->seq_nr++; + out: cps->clp = clp; /* put in nfs4_callback_compound */ for (i = 0; i < args->csa_nrclists; i++) From 5fd23f7e1d74c0fd100ffb0b04dc85727760d9ea Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Jun 2015 13:47:10 -0400 Subject: [PATCH 130/839] SUNRPC: Address kbuild warning in net/sunrpc/debugfs.c Cross-compile test on ARCH=mn10300: In file included from include/linux/list.h:8:0, from include/linux/wait.h:6, from include/linux/fs.h:6, from include/linux/debugfs.h:18, from net/sunrpc/debugfs.c:7: net/sunrpc/debugfs.c: In function 'fault_disconnect_write': include/linux/kernel.h:723:17: warning: comparison of distinct pointer types lacks a cast (void) (&_min1 == &_min2); \ ^ >> net/sunrpc/debugfs.c:307:8: note: in expansion of macro 'min' len = min(len, sizeof(buffer) - 1); Fixes: ('SUNRPC: Transport fault injection') Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/debugfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index 7cc1b8a6ef6d..e7b4d93566df 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -304,7 +304,8 @@ fault_disconnect_write(struct file *filp, const char __user *user_buf, { char buffer[16]; - len = min(len, sizeof(buffer) - 1); + if (len >= sizeof(buffer)) + len = sizeof(buffer) - 1; if (copy_from_user(buffer, user_buf, len)) return -EFAULT; buffer[len] = '\0'; From 6f02dc88be1d5ecfcc2b708250f4e5d49295326c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 4 Jun 2015 18:40:13 -0400 Subject: [PATCH 131/839] nfs: deny backchannel RPCs with an incorrect authflavor instead of dropping them A drop should really only be done when the frame is malformed or we have reason to think that there is some sort of DoS going on. When we get an RPC with bad auth, we should send back an error instead. Cc: Andy Adamson Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 8d129bb7355a..682529c00996 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -458,7 +458,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) * pg_authenticate method for nfsv4 callback threads. * * The authflavor has been negotiated, so an incorrect flavor is a server - * bug. Drop packets with incorrect authflavor. + * bug. Deny packets with incorrect authflavor. * * All other checking done after NFS decoding where the nfs_client can be * found in nfs4_callback_compound @@ -468,12 +468,12 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp) switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: if (rqstp->rq_proc != CB_NULL) - return SVC_DROP; + return SVC_DENIED; break; case RPC_AUTH_GSS: /* No RPC_AUTH_GSS support yet in NFSv4.1 */ if (svc_is_backchannel(rqstp)) - return SVC_DROP; + return SVC_DENIED; } return SVC_OK; } From 0579c8d2084adf932f09fe7edb5ee9328795d89f Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 2 Jun 2015 18:59:01 +0800 Subject: [PATCH 132/839] nfs: Initialize cb_sequenceres information before validate_seqid() For a cb_layoutrecall replay, nfsd got CB_SEQUENCE status of zero, but all informations of cb_sequenceres are zero too !!! validate_seqid() return NFS4ERR_RETRY_UNCACHED_REP for a replay, and skip the initlize cb_sequenceres. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 7e9653aae5d8..17a5c571a006 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -445,6 +445,13 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, goto out; } + memcpy(&res->csr_sessionid, &args->csa_sessionid, + sizeof(res->csr_sessionid)); + res->csr_sequenceid = args->csa_sequenceid; + res->csr_slotid = args->csa_slotid; + res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + status = validate_seqid(tbl, args); spin_unlock(&tbl->slot_tbl_lock); if (status) @@ -462,13 +469,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, goto out; } - memcpy(&res->csr_sessionid, &args->csa_sessionid, - sizeof(res->csr_sessionid)); - res->csr_sequenceid = args->csa_sequenceid; - res->csr_slotid = args->csa_slotid; - res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; - res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; - /* * RFC5661 20.9.3 * If CB_SEQUENCE returns an error, then the state of the slot From 4e54ab8d8c744f977ba13404342aeb260e353330 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 11 Jun 2015 21:13:58 -0400 Subject: [PATCH 133/839] NFS: Ensure that we update the sequence id under the slot table lock Fix a callback slot table regression. Fixes: e937ee714b2d ("nfs: Only update callback sequnce id when CB_SEQUENCE success") Cc: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 17a5c571a006..29e3c1b011b7 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -435,14 +435,13 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, spin_lock(&tbl->slot_tbl_lock); /* state manager is resetting the session */ if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { - spin_unlock(&tbl->slot_tbl_lock); status = htonl(NFS4ERR_DELAY); /* Return NFS4ERR_BADSESSION if we're draining the session * in order to reset it. */ if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) status = htonl(NFS4ERR_BADSESSION); - goto out; + goto out_unlock; } memcpy(&res->csr_sessionid, &args->csa_sessionid, @@ -453,9 +452,8 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; status = validate_seqid(tbl, args); - spin_unlock(&tbl->slot_tbl_lock); if (status) - goto out; + goto out_unlock; cps->slotid = args->csa_slotid; @@ -466,7 +464,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, */ if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) { status = htonl(NFS4ERR_DELAY); - goto out; + goto out_unlock; } /* @@ -475,6 +473,8 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, * (sequence ID, cached reply) MUST NOT change. */ slot->seq_nr++; +out_unlock: + spin_unlock(&tbl->slot_tbl_lock); out: cps->clp = clp; /* put in nfs4_callback_compound */ From cc16d664e21ef640faaf51e9952384cf90b92d9f Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Tue, 26 May 2015 08:28:29 +0000 Subject: [PATCH 134/839] hwspinlock: add a CSR atlas7 driver Add hwspinlock support for the CSR atlas7 SoC. The Hardware Spinlock device on atlas7 provides hardware assistance for synchronization between the multiple processors in the system (dual Cortex-A7, CAN bus Cortex-M3 and audio DSP). Reviewed-by: Suman Anna Reviewed-by: Bjorn Andersson Signed-off-by: Wei Chen Signed-off-by: Barry Song Signed-off-by: Ohad Ben-Cohen --- drivers/hwspinlock/Kconfig | 12 +++ drivers/hwspinlock/Makefile | 1 + drivers/hwspinlock/sirf_hwspinlock.c | 136 +++++++++++++++++++++++++++ 3 files changed, 149 insertions(+) create mode 100644 drivers/hwspinlock/sirf_hwspinlock.c diff --git a/drivers/hwspinlock/Kconfig b/drivers/hwspinlock/Kconfig index b5b4f52cef73..73a401662853 100644 --- a/drivers/hwspinlock/Kconfig +++ b/drivers/hwspinlock/Kconfig @@ -30,6 +30,18 @@ config HWSPINLOCK_QCOM If unsure, say N. +config HWSPINLOCK_SIRF + tristate "SIRF Hardware Spinlock device" + depends on ARCH_SIRF + select HWSPINLOCK + help + Say y here to support the SIRF Hardware Spinlock device, which + provides a synchronisation mechanism for the various processors + on the SoC. + + It's safe to say n here if you're not interested in SIRF hardware + spinlock or just want a bare minimum kernel. + config HSEM_U8500 tristate "STE Hardware Semaphore functionality" depends on ARCH_U8500 diff --git a/drivers/hwspinlock/Makefile b/drivers/hwspinlock/Makefile index 68f95d9a4855..6b59cb5a4f3a 100644 --- a/drivers/hwspinlock/Makefile +++ b/drivers/hwspinlock/Makefile @@ -5,4 +5,5 @@ obj-$(CONFIG_HWSPINLOCK) += hwspinlock_core.o obj-$(CONFIG_HWSPINLOCK_OMAP) += omap_hwspinlock.o obj-$(CONFIG_HWSPINLOCK_QCOM) += qcom_hwspinlock.o +obj-$(CONFIG_HWSPINLOCK_SIRF) += sirf_hwspinlock.o obj-$(CONFIG_HSEM_U8500) += u8500_hsem.o diff --git a/drivers/hwspinlock/sirf_hwspinlock.c b/drivers/hwspinlock/sirf_hwspinlock.c new file mode 100644 index 000000000000..16018544d431 --- /dev/null +++ b/drivers/hwspinlock/sirf_hwspinlock.c @@ -0,0 +1,136 @@ +/* + * SIRF hardware spinlock driver + * + * Copyright (c) 2015 Cambridge Silicon Radio Limited, a CSR plc group company. + * + * Licensed under GPLv2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hwspinlock_internal.h" + +struct sirf_hwspinlock { + void __iomem *io_base; + struct hwspinlock_device bank; +}; + +/* Number of Hardware Spinlocks*/ +#define HW_SPINLOCK_NUMBER 30 + +/* Hardware spinlock register offsets */ +#define HW_SPINLOCK_BASE 0x404 +#define HW_SPINLOCK_OFFSET(x) (HW_SPINLOCK_BASE + 0x4 * (x)) + +static int sirf_hwspinlock_trylock(struct hwspinlock *lock) +{ + void __iomem *lock_addr = lock->priv; + + /* attempt to acquire the lock by reading value == 1 from it */ + return !!readl(lock_addr); +} + +static void sirf_hwspinlock_unlock(struct hwspinlock *lock) +{ + void __iomem *lock_addr = lock->priv; + + /* release the lock by writing 0 to it */ + writel(0, lock_addr); +} + +static const struct hwspinlock_ops sirf_hwspinlock_ops = { + .trylock = sirf_hwspinlock_trylock, + .unlock = sirf_hwspinlock_unlock, +}; + +static int sirf_hwspinlock_probe(struct platform_device *pdev) +{ + struct sirf_hwspinlock *hwspin; + struct hwspinlock *hwlock; + int idx, ret; + + if (!pdev->dev.of_node) + return -ENODEV; + + hwspin = devm_kzalloc(&pdev->dev, sizeof(*hwspin) + + sizeof(*hwlock) * HW_SPINLOCK_NUMBER, GFP_KERNEL); + if (!hwspin) + return -ENOMEM; + + /* retrieve io base */ + hwspin->io_base = of_iomap(pdev->dev.of_node, 0); + if (!hwspin->io_base) + return -ENOMEM; + + for (idx = 0; idx < HW_SPINLOCK_NUMBER; idx++) { + hwlock = &hwspin->bank.lock[idx]; + hwlock->priv = hwspin->io_base + HW_SPINLOCK_OFFSET(idx); + } + + platform_set_drvdata(pdev, hwspin); + + pm_runtime_enable(&pdev->dev); + + ret = hwspin_lock_register(&hwspin->bank, &pdev->dev, + &sirf_hwspinlock_ops, 0, + HW_SPINLOCK_NUMBER); + if (ret) + goto reg_failed; + + return 0; + +reg_failed: + pm_runtime_disable(&pdev->dev); + iounmap(hwspin->io_base); + + return ret; +} + +static int sirf_hwspinlock_remove(struct platform_device *pdev) +{ + struct sirf_hwspinlock *hwspin = platform_get_drvdata(pdev); + int ret; + + ret = hwspin_lock_unregister(&hwspin->bank); + if (ret) { + dev_err(&pdev->dev, "%s failed: %d\n", __func__, ret); + return ret; + } + + pm_runtime_disable(&pdev->dev); + + iounmap(hwspin->io_base); + + return 0; +} + +static const struct of_device_id sirf_hwpinlock_ids[] = { + { .compatible = "sirf,hwspinlock", }, + {}, +}; +MODULE_DEVICE_TABLE(of, sirf_hwpinlock_ids); + +static struct platform_driver sirf_hwspinlock_driver = { + .probe = sirf_hwspinlock_probe, + .remove = sirf_hwspinlock_remove, + .driver = { + .name = "atlas7_hwspinlock", + .of_match_table = of_match_ptr(sirf_hwpinlock_ids), + }, +}; + +module_platform_driver(sirf_hwspinlock_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("SIRF Hardware spinlock driver"); +MODULE_AUTHOR("Wei Chen "); From b97cadee805e49e823ffd5ad692bb4a9bbda248e Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Tue, 26 May 2015 08:28:30 +0000 Subject: [PATCH 135/839] DT: hwspinlock: add the CSR atlas7 hwspinlock bindings document The Hardware Spinlock device on atlas7 provides hardware assistance for synchronization between the multiple processors in the system (dual Cortex-A7, CAN bus Cortex-M3 and audio DSP). This patch adds the DT bindings information for this hwspinlock module. Reviewed-by: Suman Anna Reviewed-by: Bjorn Andersson Signed-off-by: Wei Chen Signed-off-by: Barry Song Signed-off-by: Ohad Ben-Cohen --- .../bindings/hwlock/sirf,hwspinlock.txt | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 Documentation/devicetree/bindings/hwlock/sirf,hwspinlock.txt diff --git a/Documentation/devicetree/bindings/hwlock/sirf,hwspinlock.txt b/Documentation/devicetree/bindings/hwlock/sirf,hwspinlock.txt new file mode 100644 index 000000000000..9bb1240a68e0 --- /dev/null +++ b/Documentation/devicetree/bindings/hwlock/sirf,hwspinlock.txt @@ -0,0 +1,28 @@ +SIRF Hardware spinlock device Binding +----------------------------------------------- + +Required properties : +- compatible : shall contain only one of the following: + "sirf,hwspinlock" + +- reg : the register address of hwspinlock + +- #hwlock-cells : hwlock users only use the hwlock id to represent a specific + hwlock, so the number of cells should be <1> here. + +Please look at the generic hwlock binding for usage information for consumers, +"Documentation/devicetree/bindings/hwlock/hwlock.txt" + +Example of hwlock provider: + hwlock { + compatible = "sirf,hwspinlock"; + reg = <0x13240000 0x00010000>; + #hwlock-cells = <1>; + }; + +Example of hwlock users: + node { + ... + hwlocks = <&hwlock 2>; + ... + }; From 6d44698d548b1d238836cfb3b090b0c6b16db3cf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:51:27 -0400 Subject: [PATCH 136/839] xprtrdma: Warn when there are orphaned IB objects WARN during transport destruction if ib_dealloc_pd() fails. This is a sign that xprtrdma orphaned one or more RDMA API objects at some point, which can pin lower layer kernel modules and cause shutdown to hang. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Reviewed-by: Devesh Sharma Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4870d272e006..51900e6a2ab6 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -702,17 +702,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) dprintk("RPC: %s: ib_dereg_mr returned %i\n", __func__, rc); } + if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { if (ia->ri_id->qp) rdma_destroy_qp(ia->ri_id); rdma_destroy_id(ia->ri_id); ia->ri_id = NULL; } - if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { - rc = ib_dealloc_pd(ia->ri_pd); - dprintk("RPC: %s: ib_dealloc_pd returned %i\n", - __func__, rc); - } + + /* If the pd is still busy, xprtrdma missed freeing a resource */ + if (ia->ri_pd && !IS_ERR(ia->ri_pd)) + WARN_ON(ib_dealloc_pd(ia->ri_pd)); } /* From fed171b35c7c0777fa0d6aeb3f25cc9b0d5f56ad Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:51:37 -0400 Subject: [PATCH 137/839] xprtrdma: Replace rpcrdma_rep::rr_buffer with rr_rxprt Clean up: Instead of carrying a pointer to the buffer pool and the rpc_xprt, carry a pointer to the controlling rpcrdma_xprt. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 4 ++-- net/sunrpc/xprtrdma/transport.c | 7 ++----- net/sunrpc/xprtrdma/verbs.c | 8 +++++--- net/sunrpc/xprtrdma/xprt_rdma.h | 3 +-- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 2c53ea9e1b83..98a3b959cd5a 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -732,8 +732,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) struct rpcrdma_msg *headerp; struct rpcrdma_req *req; struct rpc_rqst *rqst; - struct rpc_xprt *xprt = rep->rr_xprt; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; + struct rpc_xprt *xprt = &r_xprt->rx_xprt; __be32 *iptr; int rdmalen, status; unsigned long cwnd; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 54f23b1be986..ce95eb3595d0 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -617,12 +617,9 @@ xprt_rdma_send_request(struct rpc_task *task) if (req->rl_reply == NULL) /* e.g. reconnection */ rpcrdma_recv_buffer_get(req); - - if (req->rl_reply) { + /* rpcrdma_recv_buffer_get may have set rl_reply, so check again */ + if (req->rl_reply) req->rl_reply->rr_func = rpcrdma_reply_handler; - /* this need only be done once, but... */ - req->rl_reply->rr_xprt = xprt; - } /* Must suppress retransmit to maintain credits */ if (req->rl_connect_cookie == xprt->connect_cookie) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 51900e6a2ab6..c55bfbcb5bff 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -278,6 +278,7 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) { struct rpcrdma_rep *rep = (struct rpcrdma_rep *)(unsigned long)wc->wr_id; + struct rpcrdma_ia *ia; /* WARNING: Only wr_id and status are reliable at this point */ if (wc->status != IB_WC_SUCCESS) @@ -290,8 +291,9 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n", __func__, rep, wc->byte_len); + ia = &rep->rr_rxprt->rx_ia; rep->rr_len = wc->byte_len; - ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, + ib_dma_sync_single_for_cpu(ia->ri_id->device, rdmab_addr(rep->rr_rdmabuf), rep->rr_len, DMA_FROM_DEVICE); prefetch(rdmab_to_msg(rep->rr_rdmabuf)); @@ -1053,7 +1055,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) goto out_free; } - rep->rr_buffer = &r_xprt->rx_buf; + rep->rr_rxprt = r_xprt; return rep; out_free: @@ -1423,7 +1425,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req) void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) { - struct rpcrdma_buffer *buffers = rep->rr_buffer; + struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; unsigned long flags; rep->rr_func = NULL; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 78e0b8beaa36..c3d57c0fd4a5 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -173,8 +173,7 @@ struct rpcrdma_buffer; struct rpcrdma_rep { unsigned int rr_len; - struct rpcrdma_buffer *rr_buffer; - struct rpc_xprt *rr_xprt; + struct rpcrdma_xprt *rr_rxprt; void (*rr_func)(struct rpcrdma_rep *); struct list_head rr_list; struct rpcrdma_regbuf *rr_rdmabuf; From 494ae30d2a47cf439c6a680cc62e09ae0c51d190 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:51:46 -0400 Subject: [PATCH 138/839] xprtrdma: Remove rr_func A posted rpcrdma_rep never has rr_func set to anything but rpcrdma_reply_handler. Signed-off-by: Chuck Lever Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 1 - net/sunrpc/xprtrdma/transport.c | 3 --- net/sunrpc/xprtrdma/verbs.c | 10 +--------- net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 4 files changed, 1 insertion(+), 14 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 98a3b959cd5a..3f422ca9f0f7 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -770,7 +770,6 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) rep->rr_len); repost: r_xprt->rx_stats.bad_reply_count++; - rep->rr_func = rpcrdma_reply_handler; if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) rpcrdma_recv_buffer_put(rep); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ce95eb3595d0..91b34348ace0 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -617,9 +617,6 @@ xprt_rdma_send_request(struct rpc_task *task) if (req->rl_reply == NULL) /* e.g. reconnection */ rpcrdma_recv_buffer_get(req); - /* rpcrdma_recv_buffer_get may have set rl_reply, so check again */ - if (req->rl_reply) - req->rl_reply->rr_func = rpcrdma_reply_handler; /* Must suppress retransmit to maintain credits */ if (req->rl_connect_cookie == xprt->connect_cookie) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index c55bfbcb5bff..8e0bd84c8df8 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -80,7 +80,6 @@ static void rpcrdma_run_tasklet(unsigned long data) { struct rpcrdma_rep *rep; - void (*func)(struct rpcrdma_rep *); unsigned long flags; data = data; @@ -89,14 +88,9 @@ rpcrdma_run_tasklet(unsigned long data) rep = list_entry(rpcrdma_tasklets_g.next, struct rpcrdma_rep, rr_list); list_del(&rep->rr_list); - func = rep->rr_func; - rep->rr_func = NULL; spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); - if (func) - func(rep); - else - rpcrdma_recv_buffer_put(rep); + rpcrdma_reply_handler(rep); spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); } @@ -1213,7 +1207,6 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) req->rl_niovs = 0; if (req->rl_reply) { buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply; - req->rl_reply->rr_func = NULL; req->rl_reply = NULL; } } @@ -1428,7 +1421,6 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; unsigned long flags; - rep->rr_func = NULL; spin_lock_irqsave(&buffers->rb_lock, flags); buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep; spin_unlock_irqrestore(&buffers->rb_lock, flags); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c3d57c0fd4a5..230e7fe5e31a 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -174,7 +174,6 @@ struct rpcrdma_buffer; struct rpcrdma_rep { unsigned int rr_len; struct rpcrdma_xprt *rr_rxprt; - void (*rr_func)(struct rpcrdma_rep *); struct list_head rr_list; struct rpcrdma_regbuf *rr_rdmabuf; }; From 89e0d11258e9a69d550fd4bfb4609e955bdd84ee Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:51:56 -0400 Subject: [PATCH 139/839] xprtrdma: Use ib_device pointer safely The connect worker can replace ri_id, but prevents ri_id->device from changing during the lifetime of a transport instance. The old ID is kept around until a new ID is created and the ->device is confirmed to be the same. Cache a copy of ri_id->device in rpcrdma_ia and in rpcrdma_rep. The cached copy can be used safely in code that does not serialize with the connect worker. Other code can use it to save an extra address generation (one pointer dereference instead of two). Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/fmr_ops.c | 8 +--- net/sunrpc/xprtrdma/frwr_ops.c | 12 +++--- net/sunrpc/xprtrdma/physical_ops.c | 8 +--- net/sunrpc/xprtrdma/verbs.c | 61 ++++++++++++++++-------------- net/sunrpc/xprtrdma/xprt_rdma.h | 2 + 5 files changed, 43 insertions(+), 48 deletions(-) diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 302d4ebf6fbf..0a96155bb03a 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -85,7 +85,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct ib_device *device = ia->ri_id->device; + struct ib_device *device = ia->ri_device; enum dma_data_direction direction = rpcrdma_data_dir(writing); struct rpcrdma_mr_seg *seg1 = seg; struct rpcrdma_mw *mw = seg1->rl_mw; @@ -137,17 +137,13 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mr_seg *seg1 = seg; - struct ib_device *device; int rc, nsegs = seg->mr_nsegs; LIST_HEAD(l); list_add(&seg1->rl_mw->r.fmr->list, &l); rc = ib_unmap_fmr(&l); - read_lock(&ia->ri_qplock); - device = ia->ri_id->device; while (seg1->mr_nsegs--) - rpcrdma_unmap_one(device, seg++); - read_unlock(&ia->ri_qplock); + rpcrdma_unmap_one(ia->ri_device, seg++); if (rc) goto out_err; return nsegs; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index dff0481dbcf8..66a85faf8bd9 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -137,7 +137,7 @@ static int frwr_op_init(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct ib_device *device = r_xprt->rx_ia.ri_id->device; + struct ib_device *device = r_xprt->rx_ia.ri_device; unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; struct ib_pd *pd = r_xprt->rx_ia.ri_pd; int i; @@ -178,7 +178,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct ib_device *device = ia->ri_id->device; + struct ib_device *device = ia->ri_device; enum dma_data_direction direction = rpcrdma_data_dir(writing); struct rpcrdma_mr_seg *seg1 = seg; struct rpcrdma_mw *mw = seg1->rl_mw; @@ -263,7 +263,6 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct ib_send_wr invalidate_wr, *bad_wr; int rc, nsegs = seg->mr_nsegs; - struct ib_device *device; seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; @@ -273,10 +272,9 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); - read_lock(&ia->ri_qplock); - device = ia->ri_id->device; while (seg1->mr_nsegs--) - rpcrdma_unmap_one(device, seg++); + rpcrdma_unmap_one(ia->ri_device, seg++); + read_lock(&ia->ri_qplock); rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); read_unlock(&ia->ri_qplock); if (rc) @@ -304,7 +302,7 @@ static void frwr_op_reset(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct ib_device *device = r_xprt->rx_ia.ri_id->device; + struct ib_device *device = r_xprt->rx_ia.ri_device; unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; struct ib_pd *pd = r_xprt->rx_ia.ri_pd; struct rpcrdma_mw *r; diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c index ba518af16787..da149e892858 100644 --- a/net/sunrpc/xprtrdma/physical_ops.c +++ b/net/sunrpc/xprtrdma/physical_ops.c @@ -50,8 +50,7 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - rpcrdma_map_one(ia->ri_id->device, seg, - rpcrdma_data_dir(writing)); + rpcrdma_map_one(ia->ri_device, seg, rpcrdma_data_dir(writing)); seg->mr_rkey = ia->ri_bind_mem->rkey; seg->mr_base = seg->mr_dma; seg->mr_nsegs = 1; @@ -65,10 +64,7 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - read_lock(&ia->ri_qplock); - rpcrdma_unmap_one(ia->ri_id->device, seg); - read_unlock(&ia->ri_qplock); - + rpcrdma_unmap_one(ia->ri_device, seg); return 1; } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8e0bd84c8df8..ddd5b362da35 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -272,7 +272,6 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) { struct rpcrdma_rep *rep = (struct rpcrdma_rep *)(unsigned long)wc->wr_id; - struct rpcrdma_ia *ia; /* WARNING: Only wr_id and status are reliable at this point */ if (wc->status != IB_WC_SUCCESS) @@ -285,9 +284,8 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n", __func__, rep, wc->byte_len); - ia = &rep->rr_rxprt->rx_ia; rep->rr_len = wc->byte_len; - ib_dma_sync_single_for_cpu(ia->ri_id->device, + ib_dma_sync_single_for_cpu(rep->rr_device, rdmab_addr(rep->rr_rdmabuf), rep->rr_len, DMA_FROM_DEVICE); prefetch(rdmab_to_msg(rep->rr_rdmabuf)); @@ -483,7 +481,7 @@ connected: pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n", sap, rpc_get_port(sap), - ia->ri_id->device->name, + ia->ri_device->name, ia->ri_ops->ro_displayname, xprt->rx_buf.rb_max_requests, ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); @@ -584,8 +582,9 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) rc = PTR_ERR(ia->ri_id); goto out1; } + ia->ri_device = ia->ri_id->device; - ia->ri_pd = ib_alloc_pd(ia->ri_id->device); + ia->ri_pd = ib_alloc_pd(ia->ri_device); if (IS_ERR(ia->ri_pd)) { rc = PTR_ERR(ia->ri_pd); dprintk("RPC: %s: ib_alloc_pd() failed %i\n", @@ -593,7 +592,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) goto out2; } - rc = ib_query_device(ia->ri_id->device, devattr); + rc = ib_query_device(ia->ri_device, devattr); if (rc) { dprintk("RPC: %s: ib_query_device failed %d\n", __func__, rc); @@ -602,7 +601,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { ia->ri_have_dma_lkey = 1; - ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; + ia->ri_dma_lkey = ia->ri_device->local_dma_lkey; } if (memreg == RPCRDMA_FRMR) { @@ -617,7 +616,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) } } if (memreg == RPCRDMA_MTHCAFMR) { - if (!ia->ri_id->device->alloc_fmr) { + if (!ia->ri_device->alloc_fmr) { dprintk("RPC: %s: MTHCAFMR registration " "not supported by HCA\n", __func__); memreg = RPCRDMA_ALLPHYSICAL; @@ -767,9 +766,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); - sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall, - rpcrdma_cq_async_error_upcall, ep, - ep->rep_attr.cap.max_send_wr + 1, 0); + sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall, + rpcrdma_cq_async_error_upcall, ep, + ep->rep_attr.cap.max_send_wr + 1, 0); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); dprintk("RPC: %s: failed to create send CQ: %i\n", @@ -784,9 +783,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, goto out2; } - recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall, - rpcrdma_cq_async_error_upcall, ep, - ep->rep_attr.cap.max_recv_wr + 1, 0); + recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall, + rpcrdma_cq_async_error_upcall, ep, + ep->rep_attr.cap.max_recv_wr + 1, 0); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); dprintk("RPC: %s: failed to create recv CQ: %i\n", @@ -907,7 +906,7 @@ retry: * More stuff I haven't thought of! * Rrrgh! */ - if (ia->ri_id->device != id->device) { + if (ia->ri_device != id->device) { printk("RPC: %s: can't reconnect on " "different device!\n", __func__); rdma_destroy_id(id); @@ -1049,6 +1048,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) goto out_free; } + rep->rr_device = ia->ri_device; rep->rr_rxprt = r_xprt; return rep; @@ -1449,9 +1449,9 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, /* * All memory passed here was kmalloc'ed, therefore phys-contiguous. */ - iov->addr = ib_dma_map_single(ia->ri_id->device, + iov->addr = ib_dma_map_single(ia->ri_device, va, len, DMA_BIDIRECTIONAL); - if (ib_dma_mapping_error(ia->ri_id->device, iov->addr)) + if (ib_dma_mapping_error(ia->ri_device, iov->addr)) return -ENOMEM; iov->length = len; @@ -1495,8 +1495,8 @@ rpcrdma_deregister_internal(struct rpcrdma_ia *ia, { int rc; - ib_dma_unmap_single(ia->ri_id->device, - iov->addr, iov->length, DMA_BIDIRECTIONAL); + ib_dma_unmap_single(ia->ri_device, + iov->addr, iov->length, DMA_BIDIRECTIONAL); if (NULL == mr) return 0; @@ -1589,15 +1589,18 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, send_wr.num_sge = req->rl_niovs; send_wr.opcode = IB_WR_SEND; if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */ - ib_dma_sync_single_for_device(ia->ri_id->device, - req->rl_send_iov[3].addr, req->rl_send_iov[3].length, - DMA_TO_DEVICE); - ib_dma_sync_single_for_device(ia->ri_id->device, - req->rl_send_iov[1].addr, req->rl_send_iov[1].length, - DMA_TO_DEVICE); - ib_dma_sync_single_for_device(ia->ri_id->device, - req->rl_send_iov[0].addr, req->rl_send_iov[0].length, - DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_device, + req->rl_send_iov[3].addr, + req->rl_send_iov[3].length, + DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_device, + req->rl_send_iov[1].addr, + req->rl_send_iov[1].length, + DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_device, + req->rl_send_iov[0].addr, + req->rl_send_iov[0].length, + DMA_TO_DEVICE); if (DECR_CQCOUNT(ep) > 0) send_wr.send_flags = 0; @@ -1630,7 +1633,7 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; recv_wr.num_sge = 1; - ib_dma_sync_single_for_cpu(ia->ri_id->device, + ib_dma_sync_single_for_cpu(ia->ri_device, rdmab_addr(rep->rr_rdmabuf), rdmab_length(rep->rr_rdmabuf), DMA_BIDIRECTIONAL); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 230e7fe5e31a..300423dea19c 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -62,6 +62,7 @@ struct rpcrdma_ia { const struct rpcrdma_memreg_ops *ri_ops; rwlock_t ri_qplock; + struct ib_device *ri_device; struct rdma_cm_id *ri_id; struct ib_pd *ri_pd; struct ib_mr *ri_bind_mem; @@ -173,6 +174,7 @@ struct rpcrdma_buffer; struct rpcrdma_rep { unsigned int rr_len; + struct ib_device *rr_device; struct rpcrdma_xprt *rr_rxprt; struct list_head rr_list; struct rpcrdma_regbuf *rr_rdmabuf; From 346aa66b2ab7988ca105f7fee5a968c11712b0d8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:52:06 -0400 Subject: [PATCH 140/839] xprtrdma: Introduce helpers for allocating MWs We eventually want to handle allocating MWs one at a time, as needed, instead of grabbing 64 and throwing them at each RPC in the pipeline. Add a helper for grabbing an MW off rb_mws, and a helper for returning an MW to rb_mws. These will be used in a subsequent patch. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 31 +++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/xprt_rdma.h | 2 ++ 2 files changed, 33 insertions(+) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index ddd5b362da35..b7ca73e7e2e6 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1173,6 +1173,37 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) kfree(buf->rb_pool); } +struct rpcrdma_mw * +rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_mw *mw = NULL; + unsigned long flags; + + spin_lock_irqsave(&buf->rb_lock, flags); + if (!list_empty(&buf->rb_mws)) { + mw = list_first_entry(&buf->rb_mws, + struct rpcrdma_mw, mw_list); + list_del_init(&mw->mw_list); + } + spin_unlock_irqrestore(&buf->rb_lock, flags); + + if (!mw) + pr_err("RPC: %s: no MWs available\n", __func__); + return mw; +} + +void +rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) +{ + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + unsigned long flags; + + spin_lock_irqsave(&buf->rb_lock, flags); + list_add_tail(&mw->mw_list, &buf->rb_mws); + spin_unlock_irqrestore(&buf->rb_lock, flags); +} + /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving * some req segments uninitialized. */ diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 300423dea19c..5b801d561e52 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -413,6 +413,8 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); +struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); +void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); void rpcrdma_buffer_put(struct rpcrdma_req *); void rpcrdma_recv_buffer_get(struct rpcrdma_req *); From fc7fbb59e70c65e2bd6170a6de139d5de62dd2be Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:52:16 -0400 Subject: [PATCH 141/839] xprtrdma: Acquire FMRs in rpcrdma_fmr_register_external() Acquiring 64 FMRs in rpcrdma_buffer_get() while holding the buffer pool lock is expensive, and unnecessary because FMR mode can transfer up to a 1MB payload using just a single ib_fmr. Instead, acquire ib_fmrs one-at-a-time as chunks are registered, and return them to rb_mws immediately during deregistration. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/fmr_ops.c | 52 ++++++++++++++++++++++++++++++++--- net/sunrpc/xprtrdma/verbs.c | 26 ------------------ 2 files changed, 48 insertions(+), 30 deletions(-) diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 0a96155bb03a..53fb649071d9 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -11,6 +11,21 @@ * can take tens of usecs to complete. */ +/* Normal operation + * + * A Memory Region is prepared for RDMA READ or WRITE using the + * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is + * finished, the Memory Region is unmapped using the ib_unmap_fmr + * verb (fmr_op_unmap). + */ + +/* Transport recovery + * + * After a transport reconnect, fmr_op_map re-uses the MR already + * allocated for the RPC, but generates a fresh rkey then maps the + * MR again. This process is synchronous. + */ + #include "xprt_rdma.h" #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -77,6 +92,15 @@ out_fmr_err: return rc; } +static int +__fmr_unmap(struct rpcrdma_mw *r) +{ + LIST_HEAD(l); + + list_add(&r->r.fmr->list, &l); + return ib_unmap_fmr(&l); +} + /* Use the ib_map_phys_fmr() verb to register a memory region * for remote access via RDMA READ or RDMA WRITE. */ @@ -88,9 +112,22 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, struct ib_device *device = ia->ri_device; enum dma_data_direction direction = rpcrdma_data_dir(writing); struct rpcrdma_mr_seg *seg1 = seg; - struct rpcrdma_mw *mw = seg1->rl_mw; u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; int len, pageoff, i, rc; + struct rpcrdma_mw *mw; + + mw = seg1->rl_mw; + seg1->rl_mw = NULL; + if (!mw) { + mw = rpcrdma_get_mw(r_xprt); + if (!mw) + return -ENOMEM; + } else { + /* this is a retransmit; generate a fresh rkey */ + rc = __fmr_unmap(mw); + if (rc) + return rc; + } pageoff = offset_in_page(seg1->mr_offset); seg1->mr_offset -= pageoff; /* start of page */ @@ -114,6 +151,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, if (rc) goto out_maperr; + seg1->rl_mw = mw; seg1->mr_rkey = mw->r.fmr->rkey; seg1->mr_base = seg1->mr_dma + pageoff; seg1->mr_nsegs = i; @@ -137,18 +175,24 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mr_seg *seg1 = seg; + struct rpcrdma_mw *mw = seg1->rl_mw; int rc, nsegs = seg->mr_nsegs; - LIST_HEAD(l); - list_add(&seg1->rl_mw->r.fmr->list, &l); - rc = ib_unmap_fmr(&l); + dprintk("RPC: %s: FMR %p\n", __func__, mw); + + seg1->rl_mw = NULL; while (seg1->mr_nsegs--) rpcrdma_unmap_one(ia->ri_device, seg++); + rc = __fmr_unmap(mw); if (rc) goto out_err; + rpcrdma_put_mw(r_xprt, mw); return nsegs; out_err: + /* The FMR is abandoned, but remains in rb_all. fmr_op_destroy + * will attempt to release it when the transport is destroyed. + */ dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc); return nsegs; } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index b7ca73e7e2e6..3188e368e0a2 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1324,28 +1324,6 @@ rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf, return NULL; } -static struct rpcrdma_req * -rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) -{ - struct rpcrdma_mw *r; - int i; - - i = RPCRDMA_MAX_SEGS - 1; - while (!list_empty(&buf->rb_mws)) { - r = list_entry(buf->rb_mws.next, - struct rpcrdma_mw, mw_list); - list_del(&r->mw_list); - req->rl_segments[i].rl_mw = r; - if (unlikely(i-- == 0)) - return req; /* Success */ - } - - /* Not enough entries on rb_mws for this req */ - rpcrdma_buffer_put_sendbuf(req, buf); - rpcrdma_buffer_put_mrs(req, buf); - return NULL; -} - /* * Get a set of request/reply buffers. * @@ -1387,9 +1365,6 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) case RPCRDMA_FRMR: req = rpcrdma_buffer_get_frmrs(req, buffers, &stale); break; - case RPCRDMA_MTHCAFMR: - req = rpcrdma_buffer_get_fmrs(req, buffers); - break; default: break; } @@ -1414,7 +1389,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) rpcrdma_buffer_put_sendbuf(req, buffers); switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: - case RPCRDMA_MTHCAFMR: rpcrdma_buffer_put_mrs(req, buffers); break; default: From 951e721ca0d665ece6175b8d8edf93cf7b215bd5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:52:25 -0400 Subject: [PATCH 142/839] xprtrdma: Introduce an FRMR recovery workqueue After a transport disconnect, FRMRs can be left in an undetermined state. In particular, the MR's rkey is no good. Currently, FRMRs are fixed up by the transport connect worker, but that can race with ->ro_unmap if an RPC happens to exit while the transport connect worker is running. A better way of dealing with broken FRMRs is to detect them before they are re-used by ->ro_map. Such FRMRs are either already invalid or are owned by the sending RPC, and thus no race with ->ro_unmap is possible. Introduce a mechanism for handing broken FRMRs to a workqueue to be reset in a context that is appropriate for allocating resources (ie. an ib_alloc_fast_reg_mr() API call). This mechanism is not yet used, but will be in subsequent patches. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Reviewed-By: Devesh Sharma Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 71 ++++++++++++++++++++++++++++++++- net/sunrpc/xprtrdma/transport.c | 11 ++++- net/sunrpc/xprtrdma/xprt_rdma.h | 5 +++ 3 files changed, 84 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 66a85faf8bd9..a06d9a359402 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -17,6 +17,74 @@ # define RPCDBG_FACILITY RPCDBG_TRANS #endif +static struct workqueue_struct *frwr_recovery_wq; + +#define FRWR_RECOVERY_WQ_FLAGS (WQ_UNBOUND | WQ_MEM_RECLAIM) + +int +frwr_alloc_recovery_wq(void) +{ + frwr_recovery_wq = alloc_workqueue("frwr_recovery", + FRWR_RECOVERY_WQ_FLAGS, 0); + return !frwr_recovery_wq ? -ENOMEM : 0; +} + +void +frwr_destroy_recovery_wq(void) +{ + struct workqueue_struct *wq; + + if (!frwr_recovery_wq) + return; + + wq = frwr_recovery_wq; + frwr_recovery_wq = NULL; + destroy_workqueue(wq); +} + +/* Deferred reset of a single FRMR. Generate a fresh rkey by + * replacing the MR. + * + * There's no recovery if this fails. The FRMR is abandoned, but + * remains in rb_all. It will be cleaned up when the transport is + * destroyed. + */ +static void +__frwr_recovery_worker(struct work_struct *work) +{ + struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw, + r.frmr.fr_work); + struct rpcrdma_xprt *r_xprt = r->r.frmr.fr_xprt; + unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; + struct ib_pd *pd = r_xprt->rx_ia.ri_pd; + + if (ib_dereg_mr(r->r.frmr.fr_mr)) + goto out_fail; + + r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(pd, depth); + if (IS_ERR(r->r.frmr.fr_mr)) + goto out_fail; + + dprintk("RPC: %s: recovered FRMR %p\n", __func__, r); + r->r.frmr.fr_state = FRMR_IS_INVALID; + rpcrdma_put_mw(r_xprt, r); + return; + +out_fail: + pr_warn("RPC: %s: FRMR %p unrecovered\n", + __func__, r); +} + +/* A broken MR was discovered in a context that can't sleep. + * Defer recovery to the recovery worker. + */ +static void +__frwr_queue_recovery(struct rpcrdma_mw *r) +{ + INIT_WORK(&r->r.frmr.fr_work, __frwr_recovery_worker); + queue_work(frwr_recovery_wq, &r->r.frmr.fr_work); +} + static int __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, unsigned int depth) @@ -128,7 +196,7 @@ frwr_sendcompletion(struct ib_wc *wc) /* WARNING: Only wr_id and status are reliable at this point */ r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; - dprintk("RPC: %s: frmr %p (stale), status %d\n", + pr_warn("RPC: %s: frmr %p flushed, status %d\n", __func__, r, wc->status); r->r.frmr.fr_state = FRMR_IS_STALE; } @@ -165,6 +233,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt) list_add(&r->mw_list, &buf->rb_mws); list_add(&r->mw_all, &buf->rb_all); r->mw_sendcompletion = frwr_sendcompletion; + r->r.frmr.fr_xprt = r_xprt; } return 0; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 91b34348ace0..ed861ff3586d 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -720,17 +720,24 @@ static void __exit xprt_rdma_cleanup(void) if (rc) dprintk("RPC: %s: xprt_unregister returned %i\n", __func__, rc); + + frwr_destroy_recovery_wq(); } static int __init xprt_rdma_init(void) { int rc; - rc = xprt_register_transport(&xprt_rdma); - + rc = frwr_alloc_recovery_wq(); if (rc) return rc; + rc = xprt_register_transport(&xprt_rdma); + if (rc) { + frwr_destroy_recovery_wq(); + return rc; + } + dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); dprintk("Defaults:\n"); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 5b801d561e52..c5862a4a5192 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -203,6 +203,8 @@ struct rpcrdma_frmr { struct ib_fast_reg_page_list *fr_pgl; struct ib_mr *fr_mr; enum rpcrdma_frmr_state fr_state; + struct work_struct fr_work; + struct rpcrdma_xprt *fr_xprt; }; struct rpcrdma_mw { @@ -427,6 +429,9 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *, unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); +int frwr_alloc_recovery_wq(void); +void frwr_destroy_recovery_wq(void); + /* * Wrappers for chunk registration, shared by read/write chunk code. */ From c14d86e5913564a6e9313a78604a7caf899c063f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:52:35 -0400 Subject: [PATCH 143/839] xprtrdma: Acquire MRs in rpcrdma_register_external() Acquiring 64 MRs in rpcrdma_buffer_get() while holding the buffer pool lock is expensive, and unnecessary because most modern adapters can transfer 100s of KBs of payload using just a single MR. Instead, acquire MRs one-at-a-time as chunks are registered, and return them to rb_mws immediately during deregistration. Note: commit 539431a437d2 ("xprtrdma: Don't invalidate FRMRs if registration fails") is reverted: There is now a valid case where registration can fail (with -ENOMEM) but the QP is still in RTS. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 100 +++++++++++++++++++++++++++++---- net/sunrpc/xprtrdma/rpc_rdma.c | 3 - net/sunrpc/xprtrdma/verbs.c | 21 +------ 3 files changed, 89 insertions(+), 35 deletions(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index a06d9a359402..133edf695a26 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -11,6 +11,62 @@ * but most complex memory registration mode. */ +/* Normal operation + * + * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG + * Work Request (frmr_op_map). When the RDMA operation is finished, this + * Memory Region is invalidated using a LOCAL_INV Work Request + * (frmr_op_unmap). + * + * Typically these Work Requests are not signaled, and neither are RDMA + * SEND Work Requests (with the exception of signaling occasionally to + * prevent provider work queue overflows). This greatly reduces HCA + * interrupt workload. + * + * As an optimization, frwr_op_unmap marks MRs INVALID before the + * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on + * rb_mws immediately so that no work (like managing a linked list + * under a spinlock) is needed in the completion upcall. + * + * But this means that frwr_op_map() can occasionally encounter an MR + * that is INVALID but the LOCAL_INV WR has not completed. Work Queue + * ordering prevents a subsequent FAST_REG WR from executing against + * that MR while it is still being invalidated. + */ + +/* Transport recovery + * + * ->op_map and the transport connect worker cannot run at the same + * time, but ->op_unmap can fire while the transport connect worker + * is running. Thus MR recovery is handled in ->op_map, to guarantee + * that recovered MRs are owned by a sending RPC, and not one where + * ->op_unmap could fire at the same time transport reconnect is + * being done. + * + * When the underlying transport disconnects, MRs are left in one of + * three states: + * + * INVALID: The MR was not in use before the QP entered ERROR state. + * (Or, the LOCAL_INV WR has not completed or flushed yet). + * + * STALE: The MR was being registered or unregistered when the QP + * entered ERROR state, and the pending WR was flushed. + * + * VALID: The MR was registered before the QP entered ERROR state. + * + * When frwr_op_map encounters STALE and VALID MRs, they are recovered + * with ib_dereg_mr and then are re-initialized. Beause MR recovery + * allocates fresh resources, it is deferred to a workqueue, and the + * recovered MRs are placed back on the rb_mws list when recovery is + * complete. frwr_op_map allocates another MR for the current RPC while + * the broken MR is reset. + * + * To ensure that frwr_op_map doesn't encounter an MR that is marked + * INVALID but that is about to be flushed due to a previous transport + * disconnect, the transport connect worker attempts to drain all + * pending send queue WRs before the transport is reconnected. + */ + #include "xprt_rdma.h" #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -250,9 +306,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, struct ib_device *device = ia->ri_device; enum dma_data_direction direction = rpcrdma_data_dir(writing); struct rpcrdma_mr_seg *seg1 = seg; - struct rpcrdma_mw *mw = seg1->rl_mw; - struct rpcrdma_frmr *frmr = &mw->r.frmr; - struct ib_mr *mr = frmr->fr_mr; + struct rpcrdma_mw *mw; + struct rpcrdma_frmr *frmr; + struct ib_mr *mr; struct ib_send_wr fastreg_wr, *bad_wr; u8 key; int len, pageoff; @@ -261,12 +317,25 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, u64 pa; int page_no; + mw = seg1->rl_mw; + seg1->rl_mw = NULL; + do { + if (mw) + __frwr_queue_recovery(mw); + mw = rpcrdma_get_mw(r_xprt); + if (!mw) + return -ENOMEM; + } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); + frmr = &mw->r.frmr; + frmr->fr_state = FRMR_IS_VALID; + pageoff = offset_in_page(seg1->mr_offset); seg1->mr_offset -= pageoff; /* start of page */ seg1->mr_len += pageoff; len = -pageoff; if (nsegs > ia->ri_max_frmr_depth) nsegs = ia->ri_max_frmr_depth; + for (page_no = i = 0; i < nsegs;) { rpcrdma_map_one(device, seg, direction); pa = seg->mr_dma; @@ -285,8 +354,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", __func__, mw, i, len); - frmr->fr_state = FRMR_IS_VALID; - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); fastreg_wr.wr_id = (unsigned long)(void *)mw; fastreg_wr.opcode = IB_WR_FAST_REG_MR; @@ -298,6 +365,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, fastreg_wr.wr.fast_reg.access_flags = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; + mr = frmr->fr_mr; key = (u8)(mr->rkey & 0x000000FF); ib_update_fast_reg_key(mr, ++key); fastreg_wr.wr.fast_reg.rkey = mr->rkey; @@ -307,6 +375,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, if (rc) goto out_senderr; + seg1->rl_mw = mw; seg1->mr_rkey = mr->rkey; seg1->mr_base = seg1->mr_dma + pageoff; seg1->mr_nsegs = i; @@ -315,10 +384,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, out_senderr: dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); - ib_update_fast_reg_key(mr, --key); - frmr->fr_state = FRMR_IS_INVALID; while (i--) rpcrdma_unmap_one(device, --seg); + __frwr_queue_recovery(mw); return rc; } @@ -330,15 +398,19 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) { struct rpcrdma_mr_seg *seg1 = seg; struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_mw *mw = seg1->rl_mw; struct ib_send_wr invalidate_wr, *bad_wr; int rc, nsegs = seg->mr_nsegs; - seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; + dprintk("RPC: %s: FRMR %p\n", __func__, mw); + + seg1->rl_mw = NULL; + mw->r.frmr.fr_state = FRMR_IS_INVALID; memset(&invalidate_wr, 0, sizeof(invalidate_wr)); - invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; + invalidate_wr.wr_id = (unsigned long)(void *)mw; invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; + invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); while (seg1->mr_nsegs--) @@ -348,12 +420,13 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) read_unlock(&ia->ri_qplock); if (rc) goto out_err; + + rpcrdma_put_mw(r_xprt, mw); return nsegs; out_err: - /* Force rpcrdma_buffer_get() to retry */ - seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); + __frwr_queue_recovery(mw); return nsegs; } @@ -400,6 +473,9 @@ frwr_op_destroy(struct rpcrdma_buffer *buf) { struct rpcrdma_mw *r; + /* Ensure stale MWs for "buf" are no longer in flight */ + flush_workqueue(frwr_recovery_wq); + while (!list_empty(&buf->rb_all)) { r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); list_del(&r->mw_all); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 3f422ca9f0f7..84ea37daef36 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -284,9 +284,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, return (unsigned char *)iptr - (unsigned char *)headerp; out: - if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) - return n; - for (pos = 0; nchunks--;) pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, &req->rl_segments[pos]); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3188e368e0a2..768bb77af850 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1336,12 +1336,11 @@ rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf, struct rpcrdma_req * rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) { - struct rpcrdma_ia *ia = rdmab_to_ia(buffers); - struct list_head stale; struct rpcrdma_req *req; unsigned long flags; spin_lock_irqsave(&buffers->rb_lock, flags); + if (buffers->rb_send_index == buffers->rb_max_requests) { spin_unlock_irqrestore(&buffers->rb_lock, flags); dprintk("RPC: %s: out of request buffers\n", __func__); @@ -1360,17 +1359,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) } buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; - INIT_LIST_HEAD(&stale); - switch (ia->ri_memreg_strategy) { - case RPCRDMA_FRMR: - req = rpcrdma_buffer_get_frmrs(req, buffers, &stale); - break; - default: - break; - } spin_unlock_irqrestore(&buffers->rb_lock, flags); - if (!list_empty(&stale)) - rpcrdma_retry_flushed_linv(&stale, buffers); return req; } @@ -1382,18 +1371,10 @@ void rpcrdma_buffer_put(struct rpcrdma_req *req) { struct rpcrdma_buffer *buffers = req->rl_buffer; - struct rpcrdma_ia *ia = rdmab_to_ia(buffers); unsigned long flags; spin_lock_irqsave(&buffers->rb_lock, flags); rpcrdma_buffer_put_sendbuf(req, buffers); - switch (ia->ri_memreg_strategy) { - case RPCRDMA_FRMR: - rpcrdma_buffer_put_mrs(req, buffers); - break; - default: - break; - } spin_unlock_irqrestore(&buffers->rb_lock, flags); } From 06b00880b0abd60c595088ae0bd7d210ee953f15 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:52:44 -0400 Subject: [PATCH 144/839] xprtrdma: Remove unused LOCAL_INV recovery logic Clean up: Remove functions no longer used to recover broken FRMRs. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Reviewed-by: Devesh Sharma Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 109 ------------------------------------ 1 file changed, 109 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 768bb77af850..a891cf769199 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1204,33 +1204,6 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) spin_unlock_irqrestore(&buf->rb_lock, flags); } -/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving - * some req segments uninitialized. - */ -static void -rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf) -{ - if (*mw) { - list_add_tail(&(*mw)->mw_list, &buf->rb_mws); - *mw = NULL; - } -} - -/* Cycle mw's back in reverse order, and "spin" them. - * This delays and scrambles reuse as much as possible. - */ -static void -rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) -{ - struct rpcrdma_mr_seg *seg = req->rl_segments; - struct rpcrdma_mr_seg *seg1 = seg; - int i; - - for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++) - rpcrdma_buffer_put_mr(&seg->rl_mw, buf); - rpcrdma_buffer_put_mr(&seg1->rl_mw, buf); -} - static void rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) { @@ -1242,88 +1215,6 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) } } -/* rpcrdma_unmap_one() was already done during deregistration. - * Redo only the ib_post_send(). - */ -static void -rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia) -{ - struct rpcrdma_xprt *r_xprt = - container_of(ia, struct rpcrdma_xprt, rx_ia); - struct ib_send_wr invalidate_wr, *bad_wr; - int rc; - - dprintk("RPC: %s: FRMR %p is stale\n", __func__, r); - - /* When this FRMR is re-inserted into rb_mws, it is no longer stale */ - r->r.frmr.fr_state = FRMR_IS_INVALID; - - memset(&invalidate_wr, 0, sizeof(invalidate_wr)); - invalidate_wr.wr_id = (unsigned long)(void *)r; - invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey; - DECR_CQCOUNT(&r_xprt->rx_ep); - - dprintk("RPC: %s: frmr %p invalidating rkey %08x\n", - __func__, r, r->r.frmr.fr_mr->rkey); - - read_lock(&ia->ri_qplock); - rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); - read_unlock(&ia->ri_qplock); - if (rc) { - /* Force rpcrdma_buffer_get() to retry */ - r->r.frmr.fr_state = FRMR_IS_STALE; - dprintk("RPC: %s: ib_post_send failed, %i\n", - __func__, rc); - } -} - -static void -rpcrdma_retry_flushed_linv(struct list_head *stale, - struct rpcrdma_buffer *buf) -{ - struct rpcrdma_ia *ia = rdmab_to_ia(buf); - struct list_head *pos; - struct rpcrdma_mw *r; - unsigned long flags; - - list_for_each(pos, stale) { - r = list_entry(pos, struct rpcrdma_mw, mw_list); - rpcrdma_retry_local_inv(r, ia); - } - - spin_lock_irqsave(&buf->rb_lock, flags); - list_splice_tail(stale, &buf->rb_mws); - spin_unlock_irqrestore(&buf->rb_lock, flags); -} - -static struct rpcrdma_req * -rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf, - struct list_head *stale) -{ - struct rpcrdma_mw *r; - int i; - - i = RPCRDMA_MAX_SEGS - 1; - while (!list_empty(&buf->rb_mws)) { - r = list_entry(buf->rb_mws.next, - struct rpcrdma_mw, mw_list); - list_del(&r->mw_list); - if (r->r.frmr.fr_state == FRMR_IS_STALE) { - list_add(&r->mw_list, stale); - continue; - } - req->rl_segments[i].rl_mw = r; - if (unlikely(i-- == 0)) - return req; /* Success */ - } - - /* Not enough entries on rb_mws for this req */ - rpcrdma_buffer_put_sendbuf(req, buf); - rpcrdma_buffer_put_mrs(req, buf); - return NULL; -} - /* * Get a set of request/reply buffers. * From 3269a94b6206d4fe10dd96cb37e6b0035ee42cd2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:52:54 -0400 Subject: [PATCH 145/839] xprtrdma: Remove ->ro_reset An RPC can exit at any time. When it does so, xprt_rdma_free() is called, and it calls ->op_unmap(). If ->ro_reset() is running due to a transport disconnect, the two methods can race while processing the same rpcrdma_mw. The results are unpredictable. Because of this, in previous patches I've altered ->ro_map() to handle MR reset. ->ro_reset() is no longer needed and can be removed. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Reviewed-by: Devesh Sharma Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/fmr_ops.c | 23 ------------------ net/sunrpc/xprtrdma/frwr_ops.c | 39 ------------------------------ net/sunrpc/xprtrdma/physical_ops.c | 6 ----- net/sunrpc/xprtrdma/verbs.c | 2 -- net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 5 files changed, 71 deletions(-) diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 53fb649071d9..5dd77dac094c 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -197,28 +197,6 @@ out_err: return nsegs; } -/* After a disconnect, unmap all FMRs. - * - * This is invoked only in the transport connect worker in order - * to serialize with rpcrdma_register_fmr_external(). - */ -static void -fmr_op_reset(struct rpcrdma_xprt *r_xprt) -{ - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct rpcrdma_mw *r; - LIST_HEAD(list); - int rc; - - list_for_each_entry(r, &buf->rb_all, mw_all) - list_add(&r->r.fmr->list, &list); - - rc = ib_unmap_fmr(&list); - if (rc) - dprintk("RPC: %s: ib_unmap_fmr failed %i\n", - __func__, rc); -} - static void fmr_op_destroy(struct rpcrdma_buffer *buf) { @@ -242,7 +220,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { .ro_open = fmr_op_open, .ro_maxpages = fmr_op_maxpages, .ro_init = fmr_op_init, - .ro_reset = fmr_op_reset, .ro_destroy = fmr_op_destroy, .ro_displayname = "fmr", }; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 133edf695a26..862279267fb8 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -430,44 +430,6 @@ out_err: return nsegs; } -/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in - * an unusable state. Find FRMRs in this state and dereg / reg - * each. FRMRs that are VALID and attached to an rpcrdma_req are - * also torn down. - * - * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. - * - * This is invoked only in the transport connect worker in order - * to serialize with rpcrdma_register_frmr_external(). - */ -static void -frwr_op_reset(struct rpcrdma_xprt *r_xprt) -{ - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct ib_device *device = r_xprt->rx_ia.ri_device; - unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; - struct ib_pd *pd = r_xprt->rx_ia.ri_pd; - struct rpcrdma_mw *r; - int rc; - - list_for_each_entry(r, &buf->rb_all, mw_all) { - if (r->r.frmr.fr_state == FRMR_IS_INVALID) - continue; - - __frwr_release(r); - rc = __frwr_init(r, pd, device, depth); - if (rc) { - dprintk("RPC: %s: mw %p left %s\n", - __func__, r, - (r->r.frmr.fr_state == FRMR_IS_STALE ? - "stale" : "valid")); - continue; - } - - r->r.frmr.fr_state = FRMR_IS_INVALID; - } -} - static void frwr_op_destroy(struct rpcrdma_buffer *buf) { @@ -490,7 +452,6 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { .ro_open = frwr_op_open, .ro_maxpages = frwr_op_maxpages, .ro_init = frwr_op_init, - .ro_reset = frwr_op_reset, .ro_destroy = frwr_op_destroy, .ro_displayname = "frwr", }; diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c index da149e892858..41985d07fdb7 100644 --- a/net/sunrpc/xprtrdma/physical_ops.c +++ b/net/sunrpc/xprtrdma/physical_ops.c @@ -68,11 +68,6 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) return 1; } -static void -physical_op_reset(struct rpcrdma_xprt *r_xprt) -{ -} - static void physical_op_destroy(struct rpcrdma_buffer *buf) { @@ -84,7 +79,6 @@ const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { .ro_open = physical_op_open, .ro_maxpages = physical_op_maxpages, .ro_init = physical_op_init, - .ro_reset = physical_op_reset, .ro_destroy = physical_op_destroy, .ro_displayname = "physical", }; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index a891cf769199..db9303a6a145 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -891,8 +891,6 @@ retry: rpcrdma_flush_cqs(ep); xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); - ia->ri_ops->ro_reset(xprt); - id = rpcrdma_create_id(xprt, ia, (struct sockaddr *)&xprt->rx_data.addr); if (IS_ERR(id)) { diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c5862a4a5192..f19376d35750 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -352,7 +352,6 @@ struct rpcrdma_memreg_ops { struct rpcrdma_create_data_internal *); size_t (*ro_maxpages)(struct rpcrdma_xprt *); int (*ro_init)(struct rpcrdma_xprt *); - void (*ro_reset)(struct rpcrdma_xprt *); void (*ro_destroy)(struct rpcrdma_buffer *); const char *ro_displayname; }; From 7e53df111beea8db2543424d07bdee2a630698c3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:53:04 -0400 Subject: [PATCH 146/839] xprtrdma: Remove rpcrdma_ia::ri_memreg_strategy Clean up: This field is no longer used. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Reviewed-by: Devesh Sharma Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprtrdma.h | 3 ++- net/sunrpc/xprtrdma/verbs.c | 3 --- net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index c984c85981ea..b17613052cc3 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -56,7 +56,8 @@ #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ -/* memory registration strategies */ +/* Memory registration strategies, by number. + * This is part of a kernel / user space API. Do not remove. */ enum rpcrdma_memreg { RPCRDMA_BOUNCEBUFFERS = 0, RPCRDMA_REGISTER, diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index db9303a6a145..cc1a52609974 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -665,9 +665,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) dprintk("RPC: %s: memory registration strategy is '%s'\n", __func__, ia->ri_ops->ro_displayname); - /* Else will do memory reg/dereg for each chunk */ - ia->ri_memreg_strategy = memreg; - rwlock_init(&ia->ri_qplock); return 0; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index f19376d35750..3ecee38bf1a0 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -70,7 +70,6 @@ struct rpcrdma_ia { int ri_have_dma_lkey; struct completion ri_done; int ri_async_rc; - enum rpcrdma_memreg ri_memreg_strategy; unsigned int ri_max_frmr_depth; struct ib_device_attr ri_devattr; struct ib_qp_attr ri_qp_attr; From 58d1dcf5a8ebb0ce8a521286a99efdd636012bf0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:53:13 -0400 Subject: [PATCH 147/839] xprtrdma: Split rb_lock /proc/lock_stat showed contention between rpcrdma_buffer_get/put and the MR allocation functions during I/O intensive workloads. Now that MRs are no longer allocated in rpcrdma_buffer_get(), there's no reason the rb_mws list has to be managed using the same lock as the send/receive buffers. Split that lock. The new lock does not need to disable interrupts because buffer get/put is never called in an interrupt context. struct rpcrdma_buffer is re-arranged to ensure rb_mwlock and rb_mws are always in a different cacheline than rb_lock and the buffer pointers. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/fmr_ops.c | 1 + net/sunrpc/xprtrdma/frwr_ops.c | 1 + net/sunrpc/xprtrdma/verbs.c | 10 ++++------ net/sunrpc/xprtrdma/xprt_rdma.h | 16 +++++++++------- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 5dd77dac094c..52f9ad5fe19b 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -65,6 +65,7 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt) struct rpcrdma_mw *r; int i, rc; + spin_lock_init(&buf->rb_mwlock); INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_all); diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 862279267fb8..18b7305d249f 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -266,6 +266,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt) struct ib_pd *pd = r_xprt->rx_ia.ri_pd; int i; + spin_lock_init(&buf->rb_mwlock); INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_all); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index cc1a52609974..234083560d0e 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1173,15 +1173,14 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_mw *mw = NULL; - unsigned long flags; - spin_lock_irqsave(&buf->rb_lock, flags); + spin_lock(&buf->rb_mwlock); if (!list_empty(&buf->rb_mws)) { mw = list_first_entry(&buf->rb_mws, struct rpcrdma_mw, mw_list); list_del_init(&mw->mw_list); } - spin_unlock_irqrestore(&buf->rb_lock, flags); + spin_unlock(&buf->rb_mwlock); if (!mw) pr_err("RPC: %s: no MWs available\n", __func__); @@ -1192,11 +1191,10 @@ void rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - unsigned long flags; - spin_lock_irqsave(&buf->rb_lock, flags); + spin_lock(&buf->rb_mwlock); list_add_tail(&mw->mw_list, &buf->rb_mws); - spin_unlock_irqrestore(&buf->rb_lock, flags); + spin_unlock(&buf->rb_mwlock); } static void diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 3ecee38bf1a0..df92884400c4 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -282,15 +282,17 @@ rpcr_to_rdmar(struct rpc_rqst *rqst) * One of these is associated with a transport instance */ struct rpcrdma_buffer { - spinlock_t rb_lock; /* protects indexes */ - u32 rb_max_requests;/* client max requests */ - struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ - struct list_head rb_all; - int rb_send_index; + spinlock_t rb_mwlock; /* protect rb_mws list */ + struct list_head rb_mws; + struct list_head rb_all; + char *rb_pool; + + spinlock_t rb_lock; /* protect buf arrays */ + u32 rb_max_requests; + int rb_send_index; + int rb_recv_index; struct rpcrdma_req **rb_send_bufs; - int rb_recv_index; struct rpcrdma_rep **rb_recv_bufs; - char *rb_pool; }; #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) From acb9da7a57d7905c46d0b69d30fcf944eae261de Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:53:23 -0400 Subject: [PATCH 148/839] xprtrdma: Stack relief in fmr_op_map() fmr_op_map() declares a 64 element array of u64 in automatic storage. This is 512 bytes (8 * 64) on the stack. Instead, when FMR memory registration is in use, pre-allocate a physaddr array for each rpcrdma_mw. This is a pre-requisite for increasing the r/wsize maximum for FMR on platforms with 4KB pages. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Reviewed-by: Devesh Sharma Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/fmr_ops.c | 32 ++++++++++++++++++++++---------- net/sunrpc/xprtrdma/xprt_rdma.h | 7 ++++++- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 52f9ad5fe19b..4a53ad5ab4b3 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -72,13 +72,19 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt) i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); + rc = -ENOMEM; while (i--) { r = kzalloc(sizeof(*r), GFP_KERNEL); if (!r) - return -ENOMEM; + goto out; - r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); - if (IS_ERR(r->r.fmr)) + r->r.fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES * + sizeof(u64), GFP_KERNEL); + if (!r->r.fmr.physaddrs) + goto out_free; + + r->r.fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); + if (IS_ERR(r->r.fmr.fmr)) goto out_fmr_err; list_add(&r->mw_list, &buf->rb_mws); @@ -87,9 +93,12 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt) return 0; out_fmr_err: - rc = PTR_ERR(r->r.fmr); + rc = PTR_ERR(r->r.fmr.fmr); dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); + kfree(r->r.fmr.physaddrs); +out_free: kfree(r); +out: return rc; } @@ -98,7 +107,7 @@ __fmr_unmap(struct rpcrdma_mw *r) { LIST_HEAD(l); - list_add(&r->r.fmr->list, &l); + list_add(&r->r.fmr.fmr->list, &l); return ib_unmap_fmr(&l); } @@ -113,7 +122,6 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, struct ib_device *device = ia->ri_device; enum dma_data_direction direction = rpcrdma_data_dir(writing); struct rpcrdma_mr_seg *seg1 = seg; - u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; int len, pageoff, i, rc; struct rpcrdma_mw *mw; @@ -138,7 +146,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, nsegs = RPCRDMA_MAX_FMR_SGES; for (i = 0; i < nsegs;) { rpcrdma_map_one(device, seg, direction); - physaddrs[i] = seg->mr_dma; + mw->r.fmr.physaddrs[i] = seg->mr_dma; len += seg->mr_len; ++seg; ++i; @@ -148,12 +156,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, break; } - rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma); + rc = ib_map_phys_fmr(mw->r.fmr.fmr, mw->r.fmr.physaddrs, + i, seg1->mr_dma); if (rc) goto out_maperr; seg1->rl_mw = mw; - seg1->mr_rkey = mw->r.fmr->rkey; + seg1->mr_rkey = mw->r.fmr.fmr->rkey; seg1->mr_base = seg1->mr_dma + pageoff; seg1->mr_nsegs = i; seg1->mr_len = len; @@ -207,10 +216,13 @@ fmr_op_destroy(struct rpcrdma_buffer *buf) while (!list_empty(&buf->rb_all)) { r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); list_del(&r->mw_all); - rc = ib_dealloc_fmr(r->r.fmr); + kfree(r->r.fmr.physaddrs); + + rc = ib_dealloc_fmr(r->r.fmr.fmr); if (rc) dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", __func__, rc); + kfree(r); } } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index df92884400c4..110d685b4ac5 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -206,9 +206,14 @@ struct rpcrdma_frmr { struct rpcrdma_xprt *fr_xprt; }; +struct rpcrdma_fmr { + struct ib_fmr *fmr; + u64 *physaddrs; +}; + struct rpcrdma_mw { union { - struct ib_fmr *fmr; + struct rpcrdma_fmr fmr; struct rpcrdma_frmr frmr; } r; void (*mw_sendcompletion)(struct ib_wc *); From 40c6ed0c8a7f2c5d67f5d6774bbce230a42176db Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:53:33 -0400 Subject: [PATCH 149/839] xprtrdma: Reduce per-transport MR allocation Reduce resource consumption per-transport to make way for increasing the credit limit and maximum r/wsize. Pre-allocate fewer MRs. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Reviewed-by: Devesh Sharma Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/fmr_ops.c | 6 ++++-- net/sunrpc/xprtrdma/frwr_ops.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 4a53ad5ab4b3..f1e8dafbd507 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -69,8 +69,10 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt) INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_all); - i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; - dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); + i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1); + i += 2; /* head + tail */ + i *= buf->rb_max_requests; /* one set for each RPC slot */ + dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i); rc = -ENOMEM; while (i--) { diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 18b7305d249f..661fbc1784ab 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -270,8 +270,10 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt) INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_all); - i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; - dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); + i = max_t(int, RPCRDMA_MAX_DATA_SEGS / depth, 1); + i += 2; /* head + tail */ + i *= buf->rb_max_requests; /* one set for each RPC slot */ + dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i); while (i--) { struct rpcrdma_mw *r; From 3f4a9494104cbadfa05cb2da9ca04b205712360f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 6 Jun 2015 09:15:55 -0400 Subject: [PATCH 150/839] ncpfs: successful rename() should invalidate caches for parents Signed-off-by: Al Viro --- fs/ncpfs/dir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 80021c709af9..93575e91a7aa 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -1145,6 +1145,8 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, case 0x00: ncp_dbg(1, "renamed %pd -> %pd\n", old_dentry, new_dentry); + ncp_d_prune(old_dentry); + ncp_d_prune(new_dentry); break; case 0x9E: error = -ENAMETOOLONG; From 13b987ea275840d74d9df9a44326632fab1894da Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 10 Jun 2015 10:09:32 +1000 Subject: [PATCH 151/839] fs/ufs: revert "ufs: fix deadlocks introduced by sb mutex merge" This reverts commit 9ef7db7f38d0 ("ufs: fix deadlocks introduced by sb mutex merge") That patch tried to solve commit 0244756edc4b98c ("ufs: sb mutex merge + mutex_destroy") which is itself partially reverted due to multiple deadlocks. Signed-off-by: Fabian Frederick Suggested-by: Jan Kara Cc: Ian Campbell Cc: Evgeniy Dushistov Cc: Alexey Khoroshilov Cc: Roger Pau Monne Cc: Ian Jackson Cc: Al Viro Cc: Signed-off-by: Andrew Morton --- fs/ufs/inode.c | 5 ++++- fs/ufs/namei.c | 14 ++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index be7d42c7d938..2d93ab07da8a 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -902,6 +902,9 @@ void ufs_evict_inode(struct inode * inode) invalidate_inode_buffers(inode); clear_inode(inode); - if (want_delete) + if (want_delete) { + lock_ufs(inode->i_sb); ufs_free_inode(inode); + unlock_ufs(inode->i_sb); + } } diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index e491a93a7e9a..1f5223c9e1e2 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -128,12 +128,12 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, if (l > sb->s_blocksize) goto out_notlocked; + lock_ufs(dir->i_sb); inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); err = PTR_ERR(inode); if (IS_ERR(inode)) - goto out_notlocked; + goto out; - lock_ufs(dir->i_sb); if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { /* slow symlink */ inode->i_op = &ufs_symlink_inode_operations; @@ -184,9 +184,13 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) struct inode * inode; int err; + lock_ufs(dir->i_sb); + inode_inc_link_count(dir); + inode = ufs_new_inode(dir, S_IFDIR|mode); + err = PTR_ERR(inode); if (IS_ERR(inode)) - return PTR_ERR(inode); + goto out_dir; inode->i_op = &ufs_dir_inode_operations; inode->i_fop = &ufs_dir_operations; @@ -194,9 +198,6 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) inode_inc_link_count(inode); - lock_ufs(dir->i_sb); - inode_inc_link_count(dir); - err = ufs_make_empty(inode, dir); if (err) goto out_fail; @@ -215,6 +216,7 @@ out_fail: inode_dec_link_count(inode); unlock_new_inode(inode); iput (inode); +out_dir: inode_dec_link_count(dir); unlock_ufs(dir->i_sb); goto out; From 31847b67bec0e989961118520406a63fdeac7246 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 15 Jun 2015 13:00:21 +0100 Subject: [PATCH 152/839] kconfig: allow use of relations other than (in)equality Over the years I found it desirable to be able to use all sorts of relations, not just (in)equality. And apparently I'm not the only one, as there's at least one example in the tree where the programmer assumed this would work (see DEBUG_UART_8250_WORD in arch/arm/Kconfig.debug). Another possible use would e.g. be to fold the two SMP/NR_CPUS prompts into one: SMP could be promptless, simply depending on NR_CPUS > 1. A (desirable) side effect of this change - resulting from numeric values now necessarily being compared as numbers rather than as strings - is that comparing hex values now works as expected: Other than int ones (which aren't allowed to have leading zeroes), zeroes following the 0x prefix made them compare unequal even if their values were equal. Signed-off-by: Jan Beulich Signed-off-by: Michal Marek --- scripts/kconfig/expr.c | 167 ++++++++++++++++++++++++++++++++++++--- scripts/kconfig/expr.h | 4 +- scripts/kconfig/symbol.c | 4 + scripts/kconfig/zconf.l | 4 + scripts/kconfig/zconf.y | 9 +++ 5 files changed, 177 insertions(+), 11 deletions(-) diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c index 0d7364ce9132..667d1aa23711 100644 --- a/scripts/kconfig/expr.c +++ b/scripts/kconfig/expr.c @@ -79,6 +79,10 @@ struct expr *expr_copy(const struct expr *org) e->left.expr = expr_copy(org->left.expr); break; case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: e->left.sym = org->left.sym; e->right.sym = org->right.sym; @@ -111,6 +115,10 @@ void expr_free(struct expr *e) expr_free(e->left.expr); return; case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: break; case E_OR: @@ -197,6 +205,10 @@ static int expr_eq(struct expr *e1, struct expr *e2) return 0; switch (e1->type) { case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: return e1->left.sym == e2->left.sym && e1->right.sym == e2->right.sym; case E_SYMBOL: @@ -587,6 +599,10 @@ struct expr *expr_transform(struct expr *e) return NULL; switch (e->type) { case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: case E_SYMBOL: case E_LIST: @@ -659,6 +675,22 @@ struct expr *expr_transform(struct expr *e) e = tmp; e->type = e->type == E_EQUAL ? E_UNEQUAL : E_EQUAL; break; + case E_LEQ: + case E_GEQ: + // !a<='x' -> a>'x' + tmp = e->left.expr; + free(e); + e = tmp; + e->type = e->type == E_LEQ ? E_GTH : E_LTH; + break; + case E_LTH: + case E_GTH: + // !a<'x' -> a>='x' + tmp = e->left.expr; + free(e); + e = tmp; + e->type = e->type == E_LTH ? E_GEQ : E_LEQ; + break; case E_OR: // !(a || b) -> !a && !b tmp = e->left.expr; @@ -729,6 +761,10 @@ int expr_contains_symbol(struct expr *dep, struct symbol *sym) case E_SYMBOL: return dep->left.sym == sym; case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: return dep->left.sym == sym || dep->right.sym == sym; @@ -803,6 +839,10 @@ struct expr *expr_trans_compare(struct expr *e, enum expr_type type, struct symb case E_NOT: return expr_trans_compare(e->left.expr, type == E_EQUAL ? E_UNEQUAL : E_EQUAL, sym); case E_UNEQUAL: + case E_LTH: + case E_LEQ: + case E_GTH: + case E_GEQ: case E_EQUAL: if (type == E_EQUAL) { if (sym == &symbol_yes) @@ -830,10 +870,57 @@ struct expr *expr_trans_compare(struct expr *e, enum expr_type type, struct symb return NULL; } +enum string_value_kind { + k_string, + k_signed, + k_unsigned, + k_invalid +}; + +union string_value { + unsigned long long u; + signed long long s; +}; + +static enum string_value_kind expr_parse_string(const char *str, + enum symbol_type type, + union string_value *val) +{ + char *tail; + enum string_value_kind kind; + + errno = 0; + switch (type) { + case S_BOOLEAN: + case S_TRISTATE: + return k_string; + case S_INT: + val->s = strtoll(str, &tail, 10); + kind = k_signed; + break; + case S_HEX: + val->u = strtoull(str, &tail, 16); + kind = k_unsigned; + break; + case S_STRING: + case S_UNKNOWN: + val->s = strtoll(str, &tail, 0); + kind = k_signed; + break; + default: + return k_invalid; + } + return !errno && !*tail && tail > str && isxdigit(tail[-1]) + ? kind : k_string; +} + tristate expr_calc_value(struct expr *e) { tristate val1, val2; const char *str1, *str2; + enum string_value_kind k1 = k_string, k2 = k_string; + union string_value lval = {}, rval = {}; + int res; if (!e) return yes; @@ -854,21 +941,57 @@ tristate expr_calc_value(struct expr *e) val1 = expr_calc_value(e->left.expr); return EXPR_NOT(val1); case E_EQUAL: - sym_calc_value(e->left.sym); - sym_calc_value(e->right.sym); - str1 = sym_get_string_value(e->left.sym); - str2 = sym_get_string_value(e->right.sym); - return !strcmp(str1, str2) ? yes : no; + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: - sym_calc_value(e->left.sym); - sym_calc_value(e->right.sym); - str1 = sym_get_string_value(e->left.sym); - str2 = sym_get_string_value(e->right.sym); - return !strcmp(str1, str2) ? no : yes; + break; default: printf("expr_calc_value: %d?\n", e->type); return no; } + + sym_calc_value(e->left.sym); + sym_calc_value(e->right.sym); + str1 = sym_get_string_value(e->left.sym); + str2 = sym_get_string_value(e->right.sym); + + if (e->left.sym->type != S_STRING || e->right.sym->type != S_STRING) { + k1 = expr_parse_string(str1, e->left.sym->type, &lval); + k2 = expr_parse_string(str2, e->right.sym->type, &rval); + } + + if (k1 == k_string || k2 == k_string) + res = strcmp(str1, str2); + else if (k1 == k_invalid || k2 == k_invalid) { + if (e->type != E_EQUAL && e->type != E_UNEQUAL) { + printf("Cannot compare \"%s\" and \"%s\"\n", str1, str2); + return no; + } + res = strcmp(str1, str2); + } else if (k1 == k_unsigned || k2 == k_unsigned) + res = (lval.u > rval.u) - (lval.u < rval.u); + else /* if (k1 == k_signed && k2 == k_signed) */ + res = (lval.s > rval.s) - (lval.s < rval.s); + + switch(e->type) { + case E_EQUAL: + return res ? no : yes; + case E_GEQ: + return res >= 0 ? yes : no; + case E_GTH: + return res > 0 ? yes : no; + case E_LEQ: + return res <= 0 ? yes : no; + case E_LTH: + return res < 0 ? yes : no; + case E_UNEQUAL: + return res ? yes : no; + default: + printf("expr_calc_value: relation %d?\n", e->type); + return no; + } } static int expr_compare_type(enum expr_type t1, enum expr_type t2) @@ -876,6 +999,12 @@ static int expr_compare_type(enum expr_type t1, enum expr_type t2) if (t1 == t2) return 0; switch (t1) { + case E_LEQ: + case E_LTH: + case E_GEQ: + case E_GTH: + if (t2 == E_EQUAL || t2 == E_UNEQUAL) + return 1; case E_EQUAL: case E_UNEQUAL: if (t2 == E_NOT) @@ -969,6 +1098,24 @@ void expr_print(struct expr *e, void (*fn)(void *, struct symbol *, const char * fn(data, NULL, "="); fn(data, e->right.sym, e->right.sym->name); break; + case E_LEQ: + case E_LTH: + if (e->left.sym->name) + fn(data, e->left.sym, e->left.sym->name); + else + fn(data, NULL, ""); + fn(data, NULL, e->type == E_LEQ ? "<=" : "<"); + fn(data, e->right.sym, e->right.sym->name); + break; + case E_GEQ: + case E_GTH: + if (e->left.sym->name) + fn(data, e->left.sym, e->left.sym->name); + else + fn(data, NULL, ""); + fn(data, NULL, e->type == E_LEQ ? ">=" : ">"); + fn(data, e->right.sym, e->right.sym->name); + break; case E_UNEQUAL: if (e->left.sym->name) fn(data, e->left.sym, e->left.sym->name); diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h index a2fc96a2bd2c..973b6f733368 100644 --- a/scripts/kconfig/expr.h +++ b/scripts/kconfig/expr.h @@ -29,7 +29,9 @@ typedef enum tristate { } tristate; enum expr_type { - E_NONE, E_OR, E_AND, E_NOT, E_EQUAL, E_UNEQUAL, E_LIST, E_SYMBOL, E_RANGE + E_NONE, E_OR, E_AND, E_NOT, + E_EQUAL, E_UNEQUAL, E_LTH, E_LEQ, E_GTH, E_GEQ, + E_LIST, E_SYMBOL, E_RANGE }; union expr_data { diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 6731377f9bb2..70c5ee189dce 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -1166,6 +1166,10 @@ static struct symbol *sym_check_expr_deps(struct expr *e) case E_NOT: return sym_check_expr_deps(e->left.expr); case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: sym = sym_check_deps(e->left.sym); if (sym) diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index 16741396d264..200a3fe30091 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -122,6 +122,10 @@ n [A-Za-z0-9_] "!" return T_NOT; "=" return T_EQUAL; "!=" return T_UNEQUAL; + "<=" return T_LESS_EQUAL; + ">=" return T_GREATER_EQUAL; + "<" return T_LESS; + ">" return T_GREATER; \"|\' { str = yytext[0]; new_string(); diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y index 0f683cfa53e9..71bf8bff696a 100644 --- a/scripts/kconfig/zconf.y +++ b/scripts/kconfig/zconf.y @@ -69,6 +69,10 @@ static struct menu *current_menu, *current_entry; %token T_WORD %token T_WORD_QUOTE %token T_UNEQUAL +%token T_LESS +%token T_LESS_EQUAL +%token T_GREATER +%token T_GREATER_EQUAL %token T_CLOSE_PAREN %token T_OPEN_PAREN %token T_EOL @@ -76,6 +80,7 @@ static struct menu *current_menu, *current_entry; %left T_OR %left T_AND %left T_EQUAL T_UNEQUAL +%left T_LESS T_LESS_EQUAL T_GREATER T_GREATER_EQUAL %nonassoc T_NOT %type prompt @@ -467,6 +472,10 @@ if_expr: /* empty */ { $$ = NULL; } ; expr: symbol { $$ = expr_alloc_symbol($1); } + | symbol T_LESS symbol { $$ = expr_alloc_comp(E_LTH, $1, $3); } + | symbol T_LESS_EQUAL symbol { $$ = expr_alloc_comp(E_LEQ, $1, $3); } + | symbol T_GREATER symbol { $$ = expr_alloc_comp(E_GTH, $1, $3); } + | symbol T_GREATER_EQUAL symbol { $$ = expr_alloc_comp(E_GEQ, $1, $3); } | symbol T_EQUAL symbol { $$ = expr_alloc_comp(E_EQUAL, $1, $3); } | symbol T_UNEQUAL symbol { $$ = expr_alloc_comp(E_UNEQUAL, $1, $3); } | T_OPEN_PAREN expr T_CLOSE_PAREN { $$ = $2; } From 4a47f1eb35c5933ef32b2b99ef20f8fe4a251429 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 15 Jun 2015 13:00:59 +0100 Subject: [PATCH 153/839] kconfig: re-generate *.c_shipped files after previous change Signed-off-by: Jan Beulich Signed-off-by: Michal Marek --- scripts/kconfig/zconf.lex.c_shipped | 349 +++++++++--------- scripts/kconfig/zconf.tab.c_shipped | 524 +++++++++++++++------------- 2 files changed, 476 insertions(+), 397 deletions(-) diff --git a/scripts/kconfig/zconf.lex.c_shipped b/scripts/kconfig/zconf.lex.c_shipped index 746691ed010e..dd4e86c82521 100644 --- a/scripts/kconfig/zconf.lex.c_shipped +++ b/scripts/kconfig/zconf.lex.c_shipped @@ -365,333 +365,354 @@ int zconflineno = 1; extern char *zconftext; #define yytext_ptr zconftext -static yyconst flex_int16_t yy_nxt[][17] = +static yyconst flex_int16_t yy_nxt[][19] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 11, 12, 13, 14, 12, 12, 15, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12 + 12, 12, 12, 12, 12, 12, 12, 12, 12 }, { 11, 12, 13, 14, 12, 12, 15, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12 + 12, 12, 12, 12, 12, 12, 12, 12, 12 }, { 11, 16, 16, 17, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 18, 16, 16, 16 + 16, 16, 16, 18, 16, 16, 16, 16, 16 }, { 11, 16, 16, 17, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 18, 16, 16, 16 + 16, 16, 16, 18, 16, 16, 16, 16, 16 }, { 11, 19, 20, 21, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19 + 19, 19, 19, 19, 19, 19, 19, 19, 19 }, { 11, 19, 20, 21, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19 + 19, 19, 19, 19, 19, 19, 19, 19, 19 }, { 11, 22, 22, 23, 22, 24, 22, 22, 24, 22, - 22, 22, 22, 22, 22, 25, 22 + 22, 22, 22, 22, 22, 22, 22, 25, 22 }, { 11, 22, 22, 23, 22, 24, 22, 22, 24, 22, - 22, 22, 22, 22, 22, 25, 22 + 22, 22, 22, 22, 22, 22, 22, 25, 22 }, { 11, 26, 27, 28, 29, 30, 31, 32, 30, 33, - 34, 35, 36, 36, 37, 38, 39 + 34, 35, 36, 36, 37, 38, 39, 40, 41 }, { 11, 26, 27, 28, 29, 30, 31, 32, 30, 33, - 34, 35, 36, 36, 37, 38, 39 + 34, 35, 36, 36, 37, 38, 39, 40, 41 }, { -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, - -11, -11, -11, -11, -11, -11, -11 + -11, -11, -11, -11, -11, -11, -11, -11, -11 }, { 11, -12, -12, -12, -12, -12, -12, -12, -12, -12, - -12, -12, -12, -12, -12, -12, -12 + -12, -12, -12, -12, -12, -12, -12, -12, -12 }, { - 11, -13, 40, 41, -13, -13, 42, -13, -13, -13, - -13, -13, -13, -13, -13, -13, -13 + 11, -13, 42, 43, -13, -13, 44, -13, -13, -13, + -13, -13, -13, -13, -13, -13, -13, -13, -13 }, { 11, -14, -14, -14, -14, -14, -14, -14, -14, -14, - -14, -14, -14, -14, -14, -14, -14 + -14, -14, -14, -14, -14, -14, -14, -14, -14 }, { - 11, 43, 43, 44, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43 + 11, 45, 45, 46, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45 }, { 11, -16, -16, -16, -16, -16, -16, -16, -16, -16, - -16, -16, -16, -16, -16, -16, -16 + -16, -16, -16, -16, -16, -16, -16, -16, -16 }, { 11, -17, -17, -17, -17, -17, -17, -17, -17, -17, - -17, -17, -17, -17, -17, -17, -17 + -17, -17, -17, -17, -17, -17, -17, -17, -17 }, { 11, -18, -18, -18, -18, -18, -18, -18, -18, -18, - -18, -18, -18, 45, -18, -18, -18 + -18, -18, -18, 47, -18, -18, -18, -18, -18 }, { - 11, 46, 46, -19, 46, 46, 46, 46, 46, 46, - 46, 46, 46, 46, 46, 46, 46 + 11, 48, 48, -19, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48 }, { - 11, -20, 47, 48, -20, -20, -20, -20, -20, -20, - -20, -20, -20, -20, -20, -20, -20 + 11, -20, 49, 50, -20, -20, -20, -20, -20, -20, + -20, -20, -20, -20, -20, -20, -20, -20, -20 }, { - 11, 49, -21, -21, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49 + 11, 51, -21, -21, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51 }, { - 11, 50, 50, 51, 50, -22, 50, 50, -22, 50, - 50, 50, 50, 50, 50, -22, 50 + 11, 52, 52, 53, 52, -22, 52, 52, -22, 52, + 52, 52, 52, 52, 52, 52, 52, -22, 52 }, { 11, -23, -23, -23, -23, -23, -23, -23, -23, -23, - -23, -23, -23, -23, -23, -23, -23 + -23, -23, -23, -23, -23, -23, -23, -23, -23 }, { 11, -24, -24, -24, -24, -24, -24, -24, -24, -24, - -24, -24, -24, -24, -24, -24, -24 + -24, -24, -24, -24, -24, -24, -24, -24, -24 }, { - 11, 52, 52, 53, 52, 52, 52, 52, 52, 52, - 52, 52, 52, 52, 52, 52, 52 + 11, 54, 54, 55, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54 }, { 11, -26, -26, -26, -26, -26, -26, -26, -26, -26, - -26, -26, -26, -26, -26, -26, -26 + -26, -26, -26, -26, -26, -26, -26, -26, -26 }, { - 11, -27, 54, -27, -27, -27, -27, -27, -27, -27, - -27, -27, -27, -27, -27, -27, -27 + 11, -27, 56, -27, -27, -27, -27, -27, -27, -27, + -27, -27, -27, -27, -27, -27, -27, -27, -27 }, { 11, -28, -28, -28, -28, -28, -28, -28, -28, -28, - -28, -28, -28, -28, -28, -28, -28 + -28, -28, -28, -28, -28, -28, -28, -28, -28 }, { 11, -29, -29, -29, -29, -29, -29, -29, -29, -29, - -29, -29, -29, -29, 55, -29, -29 + -29, -29, -29, -29, -29, 57, -29, -29, -29 }, { 11, -30, -30, -30, -30, -30, -30, -30, -30, -30, - -30, -30, -30, -30, -30, -30, -30 + -30, -30, -30, -30, -30, -30, -30, -30, -30 }, { - 11, 56, 56, -31, 56, 56, 56, 56, 56, 56, - 56, 56, 56, 56, 56, 56, 56 + 11, 58, 58, -31, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58 }, { - 11, -32, -32, -32, -32, -32, -32, 57, -32, -32, - -32, -32, -32, -32, -32, -32, -32 + 11, -32, -32, -32, -32, -32, -32, 59, -32, -32, + -32, -32, -32, -32, -32, -32, -32, -32, -32 }, { 11, -33, -33, -33, -33, -33, -33, -33, -33, -33, - -33, -33, -33, -33, -33, -33, -33 + -33, -33, -33, -33, -33, -33, -33, -33, -33 }, { 11, -34, -34, -34, -34, -34, -34, -34, -34, -34, - -34, -34, -34, -34, -34, -34, -34 + -34, -34, -34, -34, -34, -34, -34, -34, -34 }, { 11, -35, -35, -35, -35, -35, -35, -35, -35, -35, - -35, 58, 59, 59, -35, -35, -35 + -35, 60, 61, 61, -35, -35, -35, -35, -35 }, { 11, -36, -36, -36, -36, -36, -36, -36, -36, -36, - -36, 59, 59, 59, -36, -36, -36 + -36, 61, 61, 61, -36, -36, -36, -36, -36 }, { 11, -37, -37, -37, -37, -37, -37, -37, -37, -37, - -37, -37, -37, -37, -37, -37, -37 + -37, -37, -37, -37, -37, 62, -37, -37, -37 }, { - 11, -38, -38, 60, -38, -38, -38, -38, -38, -38, - -38, -38, -38, -38, -38, -38, -38 + 11, -38, -38, -38, -38, -38, -38, -38, -38, -38, + -38, -38, -38, -38, -38, -38, -38, -38, -38 }, { 11, -39, -39, -39, -39, -39, -39, -39, -39, -39, - -39, -39, -39, -39, -39, -39, 61 + -39, -39, -39, -39, -39, 63, -39, -39, -39 }, { - 11, -40, 40, 41, -40, -40, 42, -40, -40, -40, - -40, -40, -40, -40, -40, -40, -40 + 11, -40, -40, 64, -40, -40, -40, -40, -40, -40, + -40, -40, -40, -40, -40, -40, -40, -40, -40 }, { 11, -41, -41, -41, -41, -41, -41, -41, -41, -41, - -41, -41, -41, -41, -41, -41, -41 + -41, -41, -41, -41, -41, -41, -41, -41, 65 }, { - 11, 43, 43, 44, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43 + 11, -42, 42, 43, -42, -42, 44, -42, -42, -42, + -42, -42, -42, -42, -42, -42, -42, -42, -42 }, { - 11, 43, 43, 44, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43 + 11, -43, -43, -43, -43, -43, -43, -43, -43, -43, + -43, -43, -43, -43, -43, -43, -43, -43, -43 }, { - 11, -44, -44, -44, -44, -44, -44, -44, -44, -44, - -44, -44, -44, -44, -44, -44, -44 + 11, 45, 45, 46, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45 }, { - 11, -45, -45, -45, -45, -45, -45, -45, -45, -45, - -45, -45, -45, 45, -45, -45, -45 + 11, 45, 45, 46, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45 }, { - 11, 46, 46, -46, 46, 46, 46, 46, 46, 46, - 46, 46, 46, 46, 46, 46, 46 + 11, -46, -46, -46, -46, -46, -46, -46, -46, -46, + -46, -46, -46, -46, -46, -46, -46, -46, -46 }, { - 11, -47, 47, 48, -47, -47, -47, -47, -47, -47, - -47, -47, -47, -47, -47, -47, -47 + 11, -47, -47, -47, -47, -47, -47, -47, -47, -47, + -47, -47, -47, 47, -47, -47, -47, -47, -47 }, { - 11, 49, -48, -48, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49 + 11, 48, 48, -48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48 }, { - 11, -49, -49, -49, -49, -49, -49, -49, -49, -49, - -49, -49, -49, -49, -49, -49, -49 + 11, -49, 49, 50, -49, -49, -49, -49, -49, -49, + -49, -49, -49, -49, -49, -49, -49, -49, -49 }, { - 11, 50, 50, 51, 50, -50, 50, 50, -50, 50, - 50, 50, 50, 50, 50, -50, 50 + 11, 51, -50, -50, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51 }, { 11, -51, -51, -51, -51, -51, -51, -51, -51, -51, - -51, -51, -51, -51, -51, -51, -51 + -51, -51, -51, -51, -51, -51, -51, -51, -51 }, { - 11, -52, -52, 53, -52, -52, -52, -52, -52, -52, - -52, -52, -52, -52, -52, -52, -52 + 11, 52, 52, 53, 52, -52, 52, 52, -52, 52, + 52, 52, 52, 52, 52, 52, 52, -52, 52 }, { 11, -53, -53, -53, -53, -53, -53, -53, -53, -53, - -53, -53, -53, -53, -53, -53, -53 + -53, -53, -53, -53, -53, -53, -53, -53, -53 }, { - 11, -54, 54, -54, -54, -54, -54, -54, -54, -54, - -54, -54, -54, -54, -54, -54, -54 + 11, -54, -54, 55, -54, -54, -54, -54, -54, -54, + -54, -54, -54, -54, -54, -54, -54, -54, -54 }, { 11, -55, -55, -55, -55, -55, -55, -55, -55, -55, - -55, -55, -55, -55, -55, -55, -55 + -55, -55, -55, -55, -55, -55, -55, -55, -55 }, { - 11, 56, 56, -56, 56, 56, 56, 56, 56, 56, - 56, 56, 56, 56, 56, 56, 56 + 11, -56, 56, -56, -56, -56, -56, -56, -56, -56, + -56, -56, -56, -56, -56, -56, -56, -56, -56 }, { 11, -57, -57, -57, -57, -57, -57, -57, -57, -57, - -57, -57, -57, -57, -57, -57, -57 + -57, -57, -57, -57, -57, -57, -57, -57, -57 }, { - 11, -58, -58, -58, -58, -58, -58, -58, -58, -58, - -58, 62, 59, 59, -58, -58, -58 + 11, 58, 58, -58, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58 }, { 11, -59, -59, -59, -59, -59, -59, -59, -59, -59, - -59, 59, 59, 59, -59, -59, -59 + -59, -59, -59, -59, -59, -59, -59, -59, -59 }, { 11, -60, -60, -60, -60, -60, -60, -60, -60, -60, - -60, -60, -60, -60, -60, -60, -60 + -60, 66, 61, 61, -60, -60, -60, -60, -60 }, { 11, -61, -61, -61, -61, -61, -61, -61, -61, -61, - -61, -61, -61, -61, -61, -61, -61 + -61, 61, 61, 61, -61, -61, -61, -61, -61 }, { 11, -62, -62, -62, -62, -62, -62, -62, -62, -62, - -62, 59, 59, 59, -62, -62, -62 + -62, -62, -62, -62, -62, -62, -62, -62, -62 + }, + + { + 11, -63, -63, -63, -63, -63, -63, -63, -63, -63, + -63, -63, -63, -63, -63, -63, -63, -63, -63 + }, + + { + 11, -64, -64, -64, -64, -64, -64, -64, -64, -64, + -64, -64, -64, -64, -64, -64, -64, -64, -64 + + }, + + { + 11, -65, -65, -65, -65, -65, -65, -65, -65, -65, + -65, -65, -65, -65, -65, -65, -65, -65, -65 + }, + + { + 11, -66, -66, -66, -66, -66, -66, -66, -66, -66, + -66, 61, 61, 61, -66, -66, -66, -66, -66 }, } ; @@ -711,8 +732,8 @@ static void yy_fatal_error (yyconst char msg[] ); *yy_cp = '\0'; \ (yy_c_buf_p) = yy_cp; -#define YY_NUM_RULES 34 -#define YY_END_OF_BUFFER 35 +#define YY_NUM_RULES 38 +#define YY_END_OF_BUFFER 39 /* This struct is not used in this scanner, but its presence is necessary. */ struct yy_trans_info @@ -720,15 +741,15 @@ struct yy_trans_info flex_int32_t yy_verify; flex_int32_t yy_nxt; }; -static yyconst flex_int16_t yy_accept[63] = +static yyconst flex_int16_t yy_accept[67] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 35, 5, 4, 2, 3, 7, 8, 6, 33, 30, - 32, 25, 29, 28, 27, 23, 22, 17, 13, 16, - 20, 23, 11, 12, 19, 19, 14, 23, 23, 4, - 2, 3, 3, 1, 6, 33, 30, 32, 31, 25, - 24, 27, 26, 22, 15, 20, 9, 19, 19, 21, - 10, 18 + 39, 5, 4, 2, 3, 7, 8, 6, 37, 34, + 36, 29, 33, 32, 31, 27, 26, 21, 13, 20, + 24, 27, 11, 12, 23, 23, 18, 14, 19, 27, + 27, 4, 2, 3, 3, 1, 6, 37, 34, 36, + 35, 29, 28, 31, 30, 26, 15, 24, 9, 23, + 23, 16, 17, 25, 10, 22 } ; static yyconst flex_int32_t yy_ec[256] = @@ -738,15 +759,15 @@ static yyconst flex_int32_t yy_ec[256] = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 5, 6, 1, 1, 7, 8, 9, 10, 1, 1, 1, 11, 12, 12, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 1, 1, 1, - 14, 1, 1, 1, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 1, 1, 14, + 15, 16, 1, 1, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 1, 15, 1, 1, 13, 1, 13, 13, 13, 13, + 1, 17, 1, 1, 13, 1, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 1, 16, 1, 1, 1, 1, 1, 1, + 13, 13, 1, 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -1153,22 +1174,38 @@ return T_UNEQUAL; YY_BREAK case 16: YY_RULE_SETUP +return T_LESS_EQUAL; + YY_BREAK +case 17: +YY_RULE_SETUP +return T_GREATER_EQUAL; + YY_BREAK +case 18: +YY_RULE_SETUP +return T_LESS; + YY_BREAK +case 19: +YY_RULE_SETUP +return T_GREATER; + YY_BREAK +case 20: +YY_RULE_SETUP { str = zconftext[0]; new_string(); BEGIN(STRING); } YY_BREAK -case 17: -/* rule 17 can match eol */ +case 21: +/* rule 21 can match eol */ YY_RULE_SETUP BEGIN(INITIAL); current_file->lineno++; return T_EOL; YY_BREAK -case 18: +case 22: YY_RULE_SETUP /* ignore */ YY_BREAK -case 19: +case 23: YY_RULE_SETUP { const struct kconf_id *id = kconf_id_lookup(zconftext, zconfleng); @@ -1181,20 +1218,20 @@ YY_RULE_SETUP return T_WORD; } YY_BREAK -case 20: +case 24: YY_RULE_SETUP /* comment */ YY_BREAK -case 21: -/* rule 21 can match eol */ +case 25: +/* rule 25 can match eol */ YY_RULE_SETUP current_file->lineno++; YY_BREAK -case 22: +case 26: YY_RULE_SETUP YY_BREAK -case 23: +case 27: YY_RULE_SETUP { fprintf(stderr, @@ -1208,43 +1245,43 @@ case YY_STATE_EOF(PARAM): } YY_BREAK -case 24: -/* rule 24 can match eol */ -*yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */ -(yy_c_buf_p) = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up zconftext again */ -YY_RULE_SETUP -{ - append_string(zconftext, zconfleng); - zconflval.string = text; - return T_WORD_QUOTE; - } - YY_BREAK -case 25: -YY_RULE_SETUP -{ - append_string(zconftext, zconfleng); - } - YY_BREAK -case 26: -/* rule 26 can match eol */ -*yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */ -(yy_c_buf_p) = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up zconftext again */ -YY_RULE_SETUP -{ - append_string(zconftext + 1, zconfleng - 1); - zconflval.string = text; - return T_WORD_QUOTE; - } - YY_BREAK -case 27: -YY_RULE_SETUP -{ - append_string(zconftext + 1, zconfleng - 1); - } - YY_BREAK case 28: +/* rule 28 can match eol */ +*yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */ +(yy_c_buf_p) = yy_cp -= 1; +YY_DO_BEFORE_ACTION; /* set up zconftext again */ +YY_RULE_SETUP +{ + append_string(zconftext, zconfleng); + zconflval.string = text; + return T_WORD_QUOTE; + } + YY_BREAK +case 29: +YY_RULE_SETUP +{ + append_string(zconftext, zconfleng); + } + YY_BREAK +case 30: +/* rule 30 can match eol */ +*yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */ +(yy_c_buf_p) = yy_cp -= 1; +YY_DO_BEFORE_ACTION; /* set up zconftext again */ +YY_RULE_SETUP +{ + append_string(zconftext + 1, zconfleng - 1); + zconflval.string = text; + return T_WORD_QUOTE; + } + YY_BREAK +case 31: +YY_RULE_SETUP +{ + append_string(zconftext + 1, zconfleng - 1); + } + YY_BREAK +case 32: YY_RULE_SETUP { if (str == zconftext[0]) { @@ -1255,8 +1292,8 @@ YY_RULE_SETUP append_string(zconftext, 1); } YY_BREAK -case 29: -/* rule 29 can match eol */ +case 33: +/* rule 33 can match eol */ YY_RULE_SETUP { printf("%s:%d:warning: multi-line strings not supported\n", zconf_curname(), zconf_lineno()); @@ -1271,7 +1308,7 @@ case YY_STATE_EOF(STRING): } YY_BREAK -case 30: +case 34: YY_RULE_SETUP { ts = 0; @@ -1296,8 +1333,8 @@ YY_RULE_SETUP } } YY_BREAK -case 31: -/* rule 31 can match eol */ +case 35: +/* rule 35 can match eol */ *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up zconftext again */ @@ -1308,15 +1345,15 @@ YY_RULE_SETUP return T_HELPTEXT; } YY_BREAK -case 32: -/* rule 32 can match eol */ +case 36: +/* rule 36 can match eol */ YY_RULE_SETUP { current_file->lineno++; append_string("\n", 1); } YY_BREAK -case 33: +case 37: YY_RULE_SETUP { while (zconfleng) { @@ -1347,7 +1384,7 @@ case YY_STATE_EOF(COMMAND): yyterminate(); } YY_BREAK -case 34: +case 38: YY_RULE_SETUP YY_FATAL_ERROR( "flex scanner jammed" ); YY_BREAK diff --git a/scripts/kconfig/zconf.tab.c_shipped b/scripts/kconfig/zconf.tab.c_shipped index de5e84ed3f96..7a4d658c2066 100644 --- a/scripts/kconfig/zconf.tab.c_shipped +++ b/scripts/kconfig/zconf.tab.c_shipped @@ -1,8 +1,8 @@ -/* A Bison parser, made by GNU Bison 2.5. */ +/* A Bison parser, made by GNU Bison 2.5.1. */ /* Bison implementation for Yacc-like parsers in C - Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc. + Copyright (C) 1984, 1989-1990, 2000-2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -44,7 +44,7 @@ #define YYBISON 1 /* Bison version. */ -#define YYBISON_VERSION "2.5" +#define YYBISON_VERSION "2.5.1" /* Skeleton name. */ #define YYSKELETON_NAME "yacc.c" @@ -108,6 +108,14 @@ static struct menu *current_menu, *current_entry; +# ifndef YY_NULL +# if defined __cplusplus && 201103L <= __cplusplus +# define YY_NULL nullptr +# else +# define YY_NULL 0 +# endif +# endif + /* Enabling traces. */ #ifndef YYDEBUG # define YYDEBUG 1 @@ -159,13 +167,17 @@ static struct menu *current_menu, *current_entry; T_WORD = 281, T_WORD_QUOTE = 282, T_UNEQUAL = 283, - T_CLOSE_PAREN = 284, - T_OPEN_PAREN = 285, - T_EOL = 286, - T_OR = 287, - T_AND = 288, - T_EQUAL = 289, - T_NOT = 290 + T_LESS = 284, + T_LESS_EQUAL = 285, + T_GREATER = 286, + T_GREATER_EQUAL = 287, + T_CLOSE_PAREN = 288, + T_OPEN_PAREN = 289, + T_EOL = 290, + T_OR = 291, + T_AND = 292, + T_EQUAL = 293, + T_NOT = 294 }; #endif @@ -304,6 +316,7 @@ YYID (yyi) # if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \ || defined __cplusplus || defined _MSC_VER) # include /* INFRINGES ON USER NAME SPACE */ + /* Use EXIT_SUCCESS as a witness for stdlib.h. */ # ifndef EXIT_SUCCESS # define EXIT_SUCCESS 0 # endif @@ -395,20 +408,20 @@ union yyalloc #endif #if defined YYCOPY_NEEDED && YYCOPY_NEEDED -/* Copy COUNT objects from FROM to TO. The source and destination do +/* Copy COUNT objects from SRC to DST. The source and destination do not overlap. */ # ifndef YYCOPY # if defined __GNUC__ && 1 < __GNUC__ -# define YYCOPY(To, From, Count) \ - __builtin_memcpy (To, From, (Count) * sizeof (*(From))) +# define YYCOPY(Dst, Src, Count) \ + __builtin_memcpy (Dst, Src, (Count) * sizeof (*(Src))) # else -# define YYCOPY(To, From, Count) \ - do \ - { \ - YYSIZE_T yyi; \ - for (yyi = 0; yyi < (Count); yyi++) \ - (To)[yyi] = (From)[yyi]; \ - } \ +# define YYCOPY(Dst, Src, Count) \ + do \ + { \ + YYSIZE_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (Dst)[yyi] = (Src)[yyi]; \ + } \ while (YYID (0)) # endif # endif @@ -417,20 +430,20 @@ union yyalloc /* YYFINAL -- State number of the termination state. */ #define YYFINAL 11 /* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 290 +#define YYLAST 298 /* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 36 +#define YYNTOKENS 40 /* YYNNTS -- Number of nonterminals. */ #define YYNNTS 50 /* YYNRULES -- Number of rules. */ -#define YYNRULES 118 +#define YYNRULES 122 /* YYNRULES -- Number of states. */ -#define YYNSTATES 191 +#define YYNSTATES 199 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ #define YYUNDEFTOK 2 -#define YYMAXUTOK 290 +#define YYMAXUTOK 294 #define YYTRANSLATE(YYX) \ ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) @@ -467,7 +480,7 @@ static const yytype_uint8 yytranslate[] = 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, - 35 + 35, 36, 37, 38, 39 }; #if YYDEBUG @@ -486,64 +499,67 @@ static const yytype_uint16 yyprhs[] = 235, 238, 241, 244, 248, 252, 255, 258, 261, 262, 265, 268, 271, 276, 277, 280, 283, 286, 287, 290, 292, 294, 297, 300, 303, 305, 308, 309, 312, 314, - 318, 322, 326, 329, 333, 337, 339, 341, 342 + 318, 322, 326, 330, 334, 338, 342, 345, 349, 353, + 355, 357, 358 }; /* YYRHS -- A `-1'-separated list of the rules' RHS. */ static const yytype_int8 yyrhs[] = { - 37, 0, -1, 81, 38, -1, 38, -1, 63, 39, - -1, 39, -1, -1, 39, 41, -1, 39, 55, -1, - 39, 67, -1, 39, 80, -1, 39, 26, 1, 31, - -1, 39, 40, 1, 31, -1, 39, 1, 31, -1, + 41, 0, -1, 85, 42, -1, 42, -1, 67, 43, + -1, 43, -1, -1, 43, 45, -1, 43, 59, -1, + 43, 71, -1, 43, 84, -1, 43, 26, 1, 35, + -1, 43, 44, 1, 35, -1, 43, 1, 35, -1, 16, -1, 18, -1, 19, -1, 21, -1, 17, -1, - 22, -1, 20, -1, 23, -1, 31, -1, 61, -1, - 71, -1, 44, -1, 46, -1, 69, -1, 26, 1, - 31, -1, 1, 31, -1, 10, 26, 31, -1, 43, - 47, -1, 11, 26, 31, -1, 45, 47, -1, -1, - 47, 48, -1, 47, 49, -1, 47, 75, -1, 47, - 73, -1, 47, 42, -1, 47, 31, -1, 19, 78, - 31, -1, 18, 79, 82, 31, -1, 20, 83, 82, - 31, -1, 21, 26, 82, 31, -1, 22, 84, 84, - 82, 31, -1, 24, 50, 31, -1, -1, 50, 26, - 51, -1, -1, 34, 79, -1, 7, 85, 31, -1, - 52, 56, -1, 80, -1, 53, 58, 54, -1, -1, - 56, 57, -1, 56, 75, -1, 56, 73, -1, 56, - 31, -1, 56, 42, -1, 18, 79, 82, 31, -1, - 19, 78, 31, -1, 17, 31, -1, 20, 26, 82, - 31, -1, -1, 58, 41, -1, 14, 83, 81, -1, - 80, -1, 59, 62, 60, -1, -1, 62, 41, -1, - 62, 67, -1, 62, 55, -1, 3, 79, 81, -1, - 4, 79, 31, -1, 64, 76, 74, -1, 80, -1, - 65, 68, 66, -1, -1, 68, 41, -1, 68, 67, - -1, 68, 55, -1, 6, 79, 31, -1, 9, 79, - 31, -1, 70, 74, -1, 12, 31, -1, 72, 13, - -1, -1, 74, 75, -1, 74, 31, -1, 74, 42, - -1, 16, 25, 83, 31, -1, -1, 76, 77, -1, - 76, 31, -1, 23, 82, -1, -1, 79, 82, -1, - 26, -1, 27, -1, 5, 31, -1, 8, 31, -1, - 15, 31, -1, 31, -1, 81, 31, -1, -1, 14, - 83, -1, 84, -1, 84, 34, 84, -1, 84, 28, - 84, -1, 30, 83, 29, -1, 35, 83, -1, 83, - 32, 83, -1, 83, 33, 83, -1, 26, -1, 27, - -1, -1, 26, -1 + 22, -1, 20, -1, 23, -1, 35, -1, 65, -1, + 75, -1, 48, -1, 50, -1, 73, -1, 26, 1, + 35, -1, 1, 35, -1, 10, 26, 35, -1, 47, + 51, -1, 11, 26, 35, -1, 49, 51, -1, -1, + 51, 52, -1, 51, 53, -1, 51, 79, -1, 51, + 77, -1, 51, 46, -1, 51, 35, -1, 19, 82, + 35, -1, 18, 83, 86, 35, -1, 20, 87, 86, + 35, -1, 21, 26, 86, 35, -1, 22, 88, 88, + 86, 35, -1, 24, 54, 35, -1, -1, 54, 26, + 55, -1, -1, 38, 83, -1, 7, 89, 35, -1, + 56, 60, -1, 84, -1, 57, 62, 58, -1, -1, + 60, 61, -1, 60, 79, -1, 60, 77, -1, 60, + 35, -1, 60, 46, -1, 18, 83, 86, 35, -1, + 19, 82, 35, -1, 17, 35, -1, 20, 26, 86, + 35, -1, -1, 62, 45, -1, 14, 87, 85, -1, + 84, -1, 63, 66, 64, -1, -1, 66, 45, -1, + 66, 71, -1, 66, 59, -1, 3, 83, 85, -1, + 4, 83, 35, -1, 68, 80, 78, -1, 84, -1, + 69, 72, 70, -1, -1, 72, 45, -1, 72, 71, + -1, 72, 59, -1, 6, 83, 35, -1, 9, 83, + 35, -1, 74, 78, -1, 12, 35, -1, 76, 13, + -1, -1, 78, 79, -1, 78, 35, -1, 78, 46, + -1, 16, 25, 87, 35, -1, -1, 80, 81, -1, + 80, 35, -1, 23, 86, -1, -1, 83, 86, -1, + 26, -1, 27, -1, 5, 35, -1, 8, 35, -1, + 15, 35, -1, 35, -1, 85, 35, -1, -1, 14, + 87, -1, 88, -1, 88, 29, 88, -1, 88, 30, + 88, -1, 88, 31, 88, -1, 88, 32, 88, -1, + 88, 38, 88, -1, 88, 28, 88, -1, 34, 87, + 33, -1, 39, 87, -1, 87, 36, 87, -1, 87, + 37, 87, -1, 26, -1, 27, -1, -1, 26, -1 }; /* YYRLINE[YYN] -- source line where rule number YYN was defined. */ static const yytype_uint16 yyrline[] = { - 0, 103, 103, 103, 105, 105, 107, 109, 110, 111, - 112, 113, 114, 118, 122, 122, 122, 122, 122, 122, - 122, 122, 126, 127, 128, 129, 130, 131, 135, 136, - 142, 150, 156, 164, 174, 176, 177, 178, 179, 180, - 181, 184, 192, 198, 208, 214, 220, 223, 225, 236, - 237, 242, 251, 256, 264, 267, 269, 270, 271, 272, - 273, 276, 282, 293, 299, 309, 311, 316, 324, 332, - 335, 337, 338, 339, 344, 351, 358, 363, 371, 374, - 376, 377, 378, 381, 389, 396, 403, 409, 416, 418, - 419, 420, 423, 431, 433, 434, 437, 444, 446, 451, - 452, 455, 456, 457, 461, 462, 465, 466, 469, 470, - 471, 472, 473, 474, 475, 478, 479, 482, 483 + 0, 108, 108, 108, 110, 110, 112, 114, 115, 116, + 117, 118, 119, 123, 127, 127, 127, 127, 127, 127, + 127, 127, 131, 132, 133, 134, 135, 136, 140, 141, + 147, 155, 161, 169, 179, 181, 182, 183, 184, 185, + 186, 189, 197, 203, 213, 219, 225, 228, 230, 241, + 242, 247, 256, 261, 269, 272, 274, 275, 276, 277, + 278, 281, 287, 298, 304, 314, 316, 321, 329, 337, + 340, 342, 343, 344, 349, 356, 363, 368, 376, 379, + 381, 382, 383, 386, 394, 401, 408, 414, 421, 423, + 424, 425, 428, 436, 438, 439, 442, 449, 451, 456, + 457, 460, 461, 462, 466, 467, 470, 471, 474, 475, + 476, 477, 478, 479, 480, 481, 482, 483, 484, 487, + 488, 491, 492 }; #endif @@ -557,6 +573,7 @@ static const char *const yytname[] = "T_MENUCONFIG", "T_HELP", "T_HELPTEXT", "T_IF", "T_ENDIF", "T_DEPENDS", "T_OPTIONAL", "T_PROMPT", "T_TYPE", "T_DEFAULT", "T_SELECT", "T_RANGE", "T_VISIBLE", "T_OPTION", "T_ON", "T_WORD", "T_WORD_QUOTE", "T_UNEQUAL", + "T_LESS", "T_LESS_EQUAL", "T_GREATER", "T_GREATER_EQUAL", "T_CLOSE_PAREN", "T_OPEN_PAREN", "T_EOL", "T_OR", "T_AND", "T_EQUAL", "T_NOT", "$accept", "input", "start", "stmt_list", "option_name", "common_stmt", "option_error", "config_entry_start", "config_stmt", @@ -568,7 +585,7 @@ static const char *const yytname[] = "menu_entry", "menu_end", "menu_stmt", "menu_block", "source_stmt", "comment", "comment_stmt", "help_start", "help", "depends_list", "depends", "visibility_list", "visible", "prompt_stmt_opt", "prompt", - "end", "nl", "if_expr", "expr", "symbol", "word_opt", 0 + "end", "nl", "if_expr", "expr", "symbol", "word_opt", YY_NULL }; #endif @@ -580,25 +597,26 @@ static const yytype_uint16 yytoknum[] = 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 287, 288, 289, 290 + 285, 286, 287, 288, 289, 290, 291, 292, 293, 294 }; # endif /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ static const yytype_uint8 yyr1[] = { - 0, 36, 37, 37, 38, 38, 39, 39, 39, 39, - 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, - 40, 40, 41, 41, 41, 41, 41, 41, 42, 42, - 43, 44, 45, 46, 47, 47, 47, 47, 47, 47, - 47, 48, 48, 48, 48, 48, 49, 50, 50, 51, - 51, 52, 53, 54, 55, 56, 56, 56, 56, 56, - 56, 57, 57, 57, 57, 58, 58, 59, 60, 61, - 62, 62, 62, 62, 63, 64, 65, 66, 67, 68, - 68, 68, 68, 69, 70, 71, 72, 73, 74, 74, - 74, 74, 75, 76, 76, 76, 77, 78, 78, 79, - 79, 80, 80, 80, 81, 81, 82, 82, 83, 83, - 83, 83, 83, 83, 83, 84, 84, 85, 85 + 0, 40, 41, 41, 42, 42, 43, 43, 43, 43, + 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, + 44, 44, 45, 45, 45, 45, 45, 45, 46, 46, + 47, 48, 49, 50, 51, 51, 51, 51, 51, 51, + 51, 52, 52, 52, 52, 52, 53, 54, 54, 55, + 55, 56, 57, 58, 59, 60, 60, 60, 60, 60, + 60, 61, 61, 61, 61, 62, 62, 63, 64, 65, + 66, 66, 66, 66, 67, 68, 69, 70, 71, 72, + 72, 72, 72, 73, 74, 75, 76, 77, 78, 78, + 78, 78, 79, 80, 80, 80, 81, 82, 82, 83, + 83, 84, 84, 84, 85, 85, 86, 86, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, + 88, 89, 89 }; /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ @@ -615,7 +633,8 @@ static const yytype_uint8 yyr2[] = 2, 2, 2, 3, 3, 2, 2, 2, 0, 2, 2, 2, 4, 0, 2, 2, 2, 0, 2, 1, 1, 2, 2, 2, 1, 2, 0, 2, 1, 3, - 3, 3, 2, 3, 3, 1, 1, 0, 1 + 3, 3, 3, 3, 3, 3, 2, 3, 3, 1, + 1, 0, 1 }; /* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM. @@ -624,72 +643,72 @@ static const yytype_uint8 yyr2[] = static const yytype_uint8 yydefact[] = { 6, 0, 104, 0, 3, 0, 6, 6, 99, 100, - 0, 1, 0, 0, 0, 0, 117, 0, 0, 0, + 0, 1, 0, 0, 0, 0, 121, 0, 0, 0, 0, 0, 0, 14, 18, 15, 16, 20, 17, 19, 21, 0, 22, 0, 7, 34, 25, 34, 26, 55, 65, 8, 70, 23, 93, 79, 9, 27, 88, 24, - 10, 0, 105, 2, 74, 13, 0, 101, 0, 118, - 0, 102, 0, 0, 0, 115, 116, 0, 0, 0, + 10, 0, 105, 2, 74, 13, 0, 101, 0, 122, + 0, 102, 0, 0, 0, 119, 120, 0, 0, 0, 108, 103, 0, 0, 0, 0, 0, 0, 0, 88, - 0, 0, 75, 83, 51, 84, 30, 32, 0, 112, - 0, 0, 67, 0, 0, 11, 12, 0, 0, 0, - 0, 97, 0, 0, 0, 47, 0, 40, 39, 35, - 36, 0, 38, 37, 0, 0, 97, 0, 59, 60, - 56, 58, 57, 66, 54, 53, 71, 73, 69, 72, - 68, 106, 95, 0, 94, 80, 82, 78, 81, 77, - 90, 91, 89, 111, 113, 114, 110, 109, 29, 86, - 0, 106, 0, 106, 106, 106, 0, 0, 0, 87, - 63, 106, 0, 106, 0, 96, 0, 0, 41, 98, - 0, 0, 106, 49, 46, 28, 0, 62, 0, 107, - 92, 42, 43, 44, 0, 0, 48, 61, 64, 45, - 50 + 0, 0, 75, 83, 51, 84, 30, 32, 0, 116, + 0, 0, 67, 0, 0, 0, 0, 0, 0, 11, + 12, 0, 0, 0, 0, 97, 0, 0, 0, 47, + 0, 40, 39, 35, 36, 0, 38, 37, 0, 0, + 97, 0, 59, 60, 56, 58, 57, 66, 54, 53, + 71, 73, 69, 72, 68, 106, 95, 0, 94, 80, + 82, 78, 81, 77, 90, 91, 89, 115, 117, 118, + 114, 109, 110, 111, 112, 113, 29, 86, 0, 106, + 0, 106, 106, 106, 0, 0, 0, 87, 63, 106, + 0, 106, 0, 96, 0, 0, 41, 98, 0, 0, + 106, 49, 46, 28, 0, 62, 0, 107, 92, 42, + 43, 44, 0, 0, 48, 61, 64, 45, 50 }; /* YYDEFGOTO[NTERM-NUM]. */ static const yytype_int16 yydefgoto[] = { - -1, 3, 4, 5, 33, 34, 108, 35, 36, 37, - 38, 74, 109, 110, 157, 186, 39, 40, 124, 41, - 76, 120, 77, 42, 128, 43, 78, 6, 44, 45, - 137, 46, 80, 47, 48, 49, 111, 112, 81, 113, - 79, 134, 152, 153, 50, 7, 165, 69, 70, 60 + -1, 3, 4, 5, 33, 34, 112, 35, 36, 37, + 38, 74, 113, 114, 165, 194, 39, 40, 128, 41, + 76, 124, 77, 42, 132, 43, 78, 6, 44, 45, + 141, 46, 80, 47, 48, 49, 115, 116, 81, 117, + 79, 138, 160, 161, 50, 7, 173, 69, 70, 60 }; /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing STATE-NUM. */ -#define YYPACT_NINF -90 +#define YYPACT_NINF -91 static const yytype_int16 yypact[] = { - 4, 42, -90, 96, -90, 111, -90, 15, -90, -90, - 75, -90, 82, 42, 104, 42, 110, 107, 42, 115, - 125, -4, 121, -90, -90, -90, -90, -90, -90, -90, - -90, 162, -90, 163, -90, -90, -90, -90, -90, -90, - -90, -90, -90, -90, -90, -90, -90, -90, -90, -90, - -90, 139, -90, -90, 138, -90, 142, -90, 143, -90, - 152, -90, 164, 167, 168, -90, -90, -4, -4, 77, - -18, -90, 177, 185, 33, 71, 195, 247, 236, -2, - 236, 171, -90, -90, -90, -90, -90, -90, 41, -90, - -4, -4, 138, 97, 97, -90, -90, 186, 187, 194, - 42, 42, -4, 196, 97, -90, 219, -90, -90, -90, - -90, 210, -90, -90, 204, 42, 42, 199, -90, -90, - -90, -90, -90, -90, -90, -90, -90, -90, -90, -90, - -90, 222, -90, 223, -90, -90, -90, -90, -90, -90, - -90, -90, -90, -90, 215, -90, -90, -90, -90, -90, - -4, 222, 228, 222, -5, 222, 97, 35, 229, -90, - -90, 222, 232, 222, -4, -90, 135, 233, -90, -90, - 234, 235, 222, 240, -90, -90, 237, -90, 239, -13, - -90, -90, -90, -90, 244, 42, -90, -90, -90, -90, - -90 + 19, 37, -91, 13, -91, 79, -91, 20, -91, -91, + -16, -91, 21, 37, 25, 37, 41, 36, 37, 78, + 83, 31, 56, -91, -91, -91, -91, -91, -91, -91, + -91, 116, -91, 127, -91, -91, -91, -91, -91, -91, + -91, -91, -91, -91, -91, -91, -91, -91, -91, -91, + -91, 147, -91, -91, 105, -91, 109, -91, 111, -91, + 114, -91, 136, 137, 142, -91, -91, 31, 31, 76, + 254, -91, 143, 146, 27, 115, 207, 258, 243, -14, + 243, 179, -91, -91, -91, -91, -91, -91, -7, -91, + 31, 31, 105, 51, 51, 51, 51, 51, 51, -91, + -91, 156, 168, 181, 37, 37, 31, 178, 51, -91, + 206, -91, -91, -91, -91, 196, -91, -91, 175, 37, + 37, 185, -91, -91, -91, -91, -91, -91, -91, -91, + -91, -91, -91, -91, -91, 214, -91, 230, -91, -91, + -91, -91, -91, -91, -91, -91, -91, -91, 183, -91, + -91, -91, -91, -91, -91, -91, -91, -91, 31, 214, + 194, 214, 45, 214, 51, 26, 195, -91, -91, 214, + 197, 214, 31, -91, 139, 208, -91, -91, 220, 224, + 214, 222, -91, -91, 226, -91, 227, 123, -91, -91, + -91, -91, 235, 37, -91, -91, -91, -91, -91 }; /* YYPGOTO[NTERM-NUM]. */ static const yytype_int16 yypgoto[] = { - -90, -90, 269, 271, -90, 23, -70, -90, -90, -90, - -90, 243, -90, -90, -90, -90, -90, -90, -90, -48, - -90, -90, -90, -90, -90, -90, -90, -90, -90, -90, - -90, -20, -90, -90, -90, -90, -90, 206, 205, -68, - -90, -90, 169, -1, 27, -7, 118, -66, -89, -90 + -91, -91, 264, 268, -91, 30, -65, -91, -91, -91, + -91, 238, -91, -91, -91, -91, -91, -91, -91, -12, + -91, -91, -91, -91, -91, -91, -91, -91, -91, -91, + -91, -5, -91, -91, -91, -91, -91, 200, 209, -61, + -91, -91, 170, -1, 65, 0, 118, -66, -90, -91 }; /* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If @@ -698,102 +717,102 @@ static const yytype_int16 yypgoto[] = #define YYTABLE_NINF -86 static const yytype_int16 yytable[] = { - 10, 88, 89, 54, 146, 147, 119, 1, 122, 164, - 93, 141, 56, 142, 58, 156, 94, 62, 1, 90, - 91, 131, 65, 66, 144, 145, 67, 90, 91, 132, - 127, 68, 136, -31, 97, 2, 154, -31, -31, -31, - -31, -31, -31, -31, -31, 98, 52, -31, -31, 99, - -31, 100, 101, 102, 103, 104, -31, 105, 129, 106, - 138, 173, 92, 141, 107, 142, 174, 172, 8, 9, - 143, -33, 97, 90, 91, -33, -33, -33, -33, -33, - -33, -33, -33, 98, 166, -33, -33, 99, -33, 100, - 101, 102, 103, 104, -33, 105, 11, 106, 179, 151, - 123, 126, 107, 135, 125, 130, 2, 139, 2, 90, - 91, -5, 12, 55, 161, 13, 14, 15, 16, 17, - 18, 19, 20, 65, 66, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 57, 59, 31, 61, -4, - 12, 63, 32, 13, 14, 15, 16, 17, 18, 19, - 20, 64, 71, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 72, 73, 31, 180, 90, 91, 52, - 32, -85, 97, 82, 83, -85, -85, -85, -85, -85, - -85, -85, -85, 84, 190, -85, -85, 99, -85, -85, - -85, -85, -85, -85, -85, 85, 97, 106, 86, 87, - -52, -52, 140, -52, -52, -52, -52, 98, 95, -52, - -52, 99, 114, 115, 116, 117, 96, 148, 149, 150, - 158, 106, 155, 159, 97, 163, 118, -76, -76, -76, - -76, -76, -76, -76, -76, 160, 164, -76, -76, 99, - 13, 14, 15, 16, 17, 18, 19, 20, 91, 106, - 21, 22, 14, 15, 140, 17, 18, 19, 20, 168, - 175, 21, 22, 177, 181, 182, 183, 32, 187, 167, - 188, 169, 170, 171, 185, 189, 53, 51, 32, 176, - 75, 178, 121, 0, 133, 162, 0, 0, 0, 0, - 184 + 10, 88, 89, 150, 151, 152, 153, 154, 155, 135, + 54, 123, 56, 11, 58, 126, 145, 62, 164, 2, + 146, 136, 1, 1, 148, 149, 147, -31, 101, 90, + 91, -31, -31, -31, -31, -31, -31, -31, -31, 102, + 162, -31, -31, 103, -31, 104, 105, 106, 107, 108, + -31, 109, 181, 110, 2, 52, 55, 65, 66, 172, + 57, 182, 111, 8, 9, 67, 131, 59, 140, 92, + 68, 61, 145, 133, 180, 142, 146, 65, 66, -5, + 12, 90, 91, 13, 14, 15, 16, 17, 18, 19, + 20, 71, 174, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 159, 63, 31, 187, 127, 130, 64, + 139, 2, 90, 91, 32, -33, 101, 72, 169, -33, + -33, -33, -33, -33, -33, -33, -33, 102, 73, -33, + -33, 103, -33, 104, 105, 106, 107, 108, -33, 109, + 52, 110, 129, 134, 82, 143, 83, -4, 12, 84, + 111, 13, 14, 15, 16, 17, 18, 19, 20, 90, + 91, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 85, 86, 31, 188, 90, 91, 87, 99, -85, + 101, 100, 32, -85, -85, -85, -85, -85, -85, -85, + -85, 156, 198, -85, -85, 103, -85, -85, -85, -85, + -85, -85, -85, 157, 163, 110, 158, 166, 101, 167, + 168, 171, -52, -52, 144, -52, -52, -52, -52, 102, + 91, -52, -52, 103, 118, 119, 120, 121, 172, 176, + 183, 101, 185, 110, -76, -76, -76, -76, -76, -76, + -76, -76, 122, 189, -76, -76, 103, 13, 14, 15, + 16, 17, 18, 19, 20, 190, 110, 21, 22, 191, + 193, 195, 196, 14, 15, 144, 17, 18, 19, 20, + 197, 53, 21, 22, 51, 75, 125, 175, 32, 177, + 178, 179, 93, 94, 95, 96, 97, 184, 137, 186, + 170, 0, 98, 32, 0, 0, 0, 0, 192 }; #define yypact_value_is_default(yystate) \ - ((yystate) == (-90)) + ((yystate) == (-91)) #define yytable_value_is_error(yytable_value) \ YYID (0) static const yytype_int16 yycheck[] = { - 1, 67, 68, 10, 93, 94, 76, 3, 76, 14, - 28, 81, 13, 81, 15, 104, 34, 18, 3, 32, - 33, 23, 26, 27, 90, 91, 30, 32, 33, 31, - 78, 35, 80, 0, 1, 31, 102, 4, 5, 6, - 7, 8, 9, 10, 11, 12, 31, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 78, 26, - 80, 26, 69, 133, 31, 133, 31, 156, 26, 27, - 29, 0, 1, 32, 33, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 150, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 0, 26, 164, 100, - 77, 78, 31, 80, 77, 78, 31, 80, 31, 32, - 33, 0, 1, 31, 115, 4, 5, 6, 7, 8, - 9, 10, 11, 26, 27, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 31, 26, 26, 31, 0, - 1, 26, 31, 4, 5, 6, 7, 8, 9, 10, - 11, 26, 31, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 1, 1, 26, 31, 32, 33, 31, - 31, 0, 1, 31, 31, 4, 5, 6, 7, 8, - 9, 10, 11, 31, 185, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 31, 1, 26, 31, 31, - 5, 6, 31, 8, 9, 10, 11, 12, 31, 14, - 15, 16, 17, 18, 19, 20, 31, 31, 31, 25, - 1, 26, 26, 13, 1, 26, 31, 4, 5, 6, - 7, 8, 9, 10, 11, 31, 14, 14, 15, 16, - 4, 5, 6, 7, 8, 9, 10, 11, 33, 26, - 14, 15, 5, 6, 31, 8, 9, 10, 11, 31, - 31, 14, 15, 31, 31, 31, 31, 31, 31, 151, - 31, 153, 154, 155, 34, 31, 7, 6, 31, 161, - 37, 163, 76, -1, 79, 116, -1, -1, -1, -1, - 172 + 1, 67, 68, 93, 94, 95, 96, 97, 98, 23, + 10, 76, 13, 0, 15, 76, 81, 18, 108, 35, + 81, 35, 3, 3, 90, 91, 33, 0, 1, 36, + 37, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 106, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 26, 26, 35, 35, 35, 26, 27, 14, + 35, 35, 35, 26, 27, 34, 78, 26, 80, 69, + 39, 35, 137, 78, 164, 80, 137, 26, 27, 0, + 1, 36, 37, 4, 5, 6, 7, 8, 9, 10, + 11, 35, 158, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 104, 26, 26, 172, 77, 78, 26, + 80, 35, 36, 37, 35, 0, 1, 1, 119, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 1, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 35, 26, 77, 78, 35, 80, 35, 0, 1, 35, + 35, 4, 5, 6, 7, 8, 9, 10, 11, 36, + 37, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 35, 35, 26, 35, 36, 37, 35, 35, 0, + 1, 35, 35, 4, 5, 6, 7, 8, 9, 10, + 11, 35, 193, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 35, 26, 26, 25, 1, 1, 13, + 35, 26, 5, 6, 35, 8, 9, 10, 11, 12, + 37, 14, 15, 16, 17, 18, 19, 20, 14, 35, + 35, 1, 35, 26, 4, 5, 6, 7, 8, 9, + 10, 11, 35, 35, 14, 15, 16, 4, 5, 6, + 7, 8, 9, 10, 11, 35, 26, 14, 15, 35, + 38, 35, 35, 5, 6, 35, 8, 9, 10, 11, + 35, 7, 14, 15, 6, 37, 76, 159, 35, 161, + 162, 163, 28, 29, 30, 31, 32, 169, 79, 171, + 120, -1, 38, 35, -1, -1, -1, -1, 180 }; /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing symbol of state STATE-NUM. */ static const yytype_uint8 yystos[] = { - 0, 3, 31, 37, 38, 39, 63, 81, 26, 27, - 79, 0, 1, 4, 5, 6, 7, 8, 9, 10, + 0, 3, 35, 41, 42, 43, 67, 85, 26, 27, + 83, 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, - 23, 26, 31, 40, 41, 43, 44, 45, 46, 52, - 53, 55, 59, 61, 64, 65, 67, 69, 70, 71, - 80, 39, 31, 38, 81, 31, 79, 31, 79, 26, - 85, 31, 79, 26, 26, 26, 27, 30, 35, 83, - 84, 31, 1, 1, 47, 47, 56, 58, 62, 76, - 68, 74, 31, 31, 31, 31, 31, 31, 83, 83, - 32, 33, 81, 28, 34, 31, 31, 1, 12, 16, - 18, 19, 20, 21, 22, 24, 26, 31, 42, 48, - 49, 72, 73, 75, 17, 18, 19, 20, 31, 42, - 57, 73, 75, 41, 54, 80, 41, 55, 60, 67, - 80, 23, 31, 74, 77, 41, 55, 66, 67, 80, - 31, 42, 75, 29, 83, 83, 84, 84, 31, 31, - 25, 79, 78, 79, 83, 26, 84, 50, 1, 13, - 31, 79, 78, 26, 14, 82, 83, 82, 31, 82, - 82, 82, 84, 26, 31, 31, 82, 31, 82, 83, - 31, 31, 31, 31, 82, 34, 51, 31, 31, 31, - 79 + 23, 26, 35, 44, 45, 47, 48, 49, 50, 56, + 57, 59, 63, 65, 68, 69, 71, 73, 74, 75, + 84, 43, 35, 42, 85, 35, 83, 35, 83, 26, + 89, 35, 83, 26, 26, 26, 27, 34, 39, 87, + 88, 35, 1, 1, 51, 51, 60, 62, 66, 80, + 72, 78, 35, 35, 35, 35, 35, 35, 87, 87, + 36, 37, 85, 28, 29, 30, 31, 32, 38, 35, + 35, 1, 12, 16, 18, 19, 20, 21, 22, 24, + 26, 35, 46, 52, 53, 76, 77, 79, 17, 18, + 19, 20, 35, 46, 61, 77, 79, 45, 58, 84, + 45, 59, 64, 71, 84, 23, 35, 78, 81, 45, + 59, 70, 71, 84, 35, 46, 79, 33, 87, 87, + 88, 88, 88, 88, 88, 88, 35, 35, 25, 83, + 82, 83, 87, 26, 88, 54, 1, 13, 35, 83, + 82, 26, 14, 86, 87, 86, 35, 86, 86, 86, + 88, 26, 35, 35, 86, 35, 86, 87, 35, 35, + 35, 35, 86, 38, 55, 35, 35, 35, 83 }; #define yyerrok (yyerrstatus = 0) @@ -823,17 +842,18 @@ static const yytype_uint8 yystos[] = #define YYRECOVERING() (!!yyerrstatus) -#define YYBACKUP(Token, Value) \ -do \ - if (yychar == YYEMPTY && yylen == 1) \ - { \ - yychar = (Token); \ - yylval = (Value); \ - YYPOPSTACK (1); \ - goto yybackup; \ - } \ - else \ - { \ +#define YYBACKUP(Token, Value) \ +do \ + if (yychar == YYEMPTY) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + YYPOPSTACK (yylen); \ + yystate = *yyssp; \ + goto yybackup; \ + } \ + else \ + { \ yyerror (YY_("syntax error: cannot back up")); \ YYERROR; \ } \ @@ -928,6 +948,8 @@ yy_symbol_value_print (yyoutput, yytype, yyvaluep) YYSTYPE const * const yyvaluep; #endif { + FILE *yyo = yyoutput; + YYUSE (yyo); if (!yyvaluep) return; # ifdef YYPRINT @@ -1179,12 +1201,12 @@ static int yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, yytype_int16 *yyssp, int yytoken) { - YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]); + YYSIZE_T yysize0 = yytnamerr (YY_NULL, yytname[yytoken]); YYSIZE_T yysize = yysize0; YYSIZE_T yysize1; enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; /* Internationalized format string. */ - const char *yyformat = 0; + const char *yyformat = YY_NULL; /* Arguments of yyformat. */ char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; /* Number of reported tokens (one for the "unexpected", one per @@ -1244,7 +1266,7 @@ yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, break; } yyarg[yycount++] = yytname[yyx]; - yysize1 = yysize + yytnamerr (0, yytname[yyx]); + yysize1 = yysize + yytnamerr (YY_NULL, yytname[yyx]); if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) return 2; @@ -1329,7 +1351,7 @@ yydestruct (yymsg, yytype, yyvaluep) switch (yytype) { - case 53: /* "choice_entry" */ + case 57: /* "choice_entry" */ { fprintf(stderr, "%s:%d: missing end statement for this entry\n", @@ -1339,7 +1361,7 @@ yydestruct (yymsg, yytype, yyvaluep) }; break; - case 59: /* "if_entry" */ + case 63: /* "if_entry" */ { fprintf(stderr, "%s:%d: missing end statement for this entry\n", @@ -1349,7 +1371,7 @@ yydestruct (yymsg, yytype, yyvaluep) }; break; - case 65: /* "menu_entry" */ + case 69: /* "menu_entry" */ { fprintf(stderr, "%s:%d: missing end statement for this entry\n", @@ -1426,7 +1448,7 @@ yyparse () `yyss': related to states. `yyvs': related to semantic values. - Refer to the stacks thru separate pointers, to allow yyoverflow + Refer to the stacks through separate pointers, to allow yyoverflow to reallocate them elsewhere. */ /* The state stack. */ @@ -2012,46 +2034,66 @@ yyreduce: case 109: - { (yyval.expr) = expr_alloc_comp(E_EQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } + { (yyval.expr) = expr_alloc_comp(E_LTH, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 110: - { (yyval.expr) = expr_alloc_comp(E_UNEQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } + { (yyval.expr) = expr_alloc_comp(E_LEQ, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 111: - { (yyval.expr) = (yyvsp[(2) - (3)].expr); } + { (yyval.expr) = expr_alloc_comp(E_GTH, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 112: - { (yyval.expr) = expr_alloc_one(E_NOT, (yyvsp[(2) - (2)].expr)); } + { (yyval.expr) = expr_alloc_comp(E_GEQ, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 113: - { (yyval.expr) = expr_alloc_two(E_OR, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); } + { (yyval.expr) = expr_alloc_comp(E_EQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 114: - { (yyval.expr) = expr_alloc_two(E_AND, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); } + { (yyval.expr) = expr_alloc_comp(E_UNEQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 115: - { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), 0); free((yyvsp[(1) - (1)].string)); } + { (yyval.expr) = (yyvsp[(2) - (3)].expr); } break; case 116: - { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), SYMBOL_CONST); free((yyvsp[(1) - (1)].string)); } + { (yyval.expr) = expr_alloc_one(E_NOT, (yyvsp[(2) - (2)].expr)); } break; case 117: + { (yyval.expr) = expr_alloc_two(E_OR, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); } + break; + + case 118: + + { (yyval.expr) = expr_alloc_two(E_AND, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); } + break; + + case 119: + + { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), 0); free((yyvsp[(1) - (1)].string)); } + break; + + case 120: + + { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), SYMBOL_CONST); free((yyvsp[(1) - (1)].string)); } + break; + + case 121: + { (yyval.string) = NULL; } break; @@ -2243,7 +2285,7 @@ yyabortlab: yyresult = 1; goto yyreturn; -#if !defined(yyoverflow) || YYERROR_VERBOSE +#if !defined yyoverflow || YYERROR_VERBOSE /*-------------------------------------------------. | yyexhaustedlab -- memory exhaustion comes here. | `-------------------------------------------------*/ From cdd9eefdf905e92e7fc6cc393314efe68dc6ff66 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 10 Jun 2015 10:09:32 +1000 Subject: [PATCH 154/839] fs/ufs: restore s_lock mutex Commit 0244756edc4b98c ("ufs: sb mutex merge + mutex_destroy") generated deadlocks in read/write mode on mkdir. This patch partially reverts it keeping fixes by Andrew Morton and mutex_destroy() [AV: fixed a missing bit in ufs_remount()] Signed-off-by: Fabian Frederick Reported-by: Ian Campbell Suggested-by: Jan Kara Cc: Ian Campbell Cc: Evgeniy Dushistov Cc: Alexey Khoroshilov Cc: Roger Pau Monne Cc: Ian Jackson Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/ufs/balloc.c | 34 +++++++++++++++++----------------- fs/ufs/ialloc.c | 16 ++++++++-------- fs/ufs/super.c | 10 ++++++++++ fs/ufs/ufs.h | 1 + 4 files changed, 36 insertions(+), 25 deletions(-) diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 2c1036080d52..a7106eda5024 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -51,8 +51,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) if (ufs_fragnum(fragment) + count > uspi->s_fpg) ufs_error (sb, "ufs_free_fragments", "internal error"); - - lock_ufs(sb); + + mutex_lock(&UFS_SB(sb)->s_lock); cgno = ufs_dtog(uspi, fragment); bit = ufs_dtogd(uspi, fragment); @@ -115,13 +115,13 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); - - unlock_ufs(sb); + + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); return; failed: - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return; } @@ -151,7 +151,7 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count) goto failed; } - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); do_more: overflow = 0; @@ -211,12 +211,12 @@ do_more: } ufs_mark_sb_dirty(sb); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); return; failed_unlock: - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); failed: UFSD("EXIT (FAILED)\n"); return; @@ -357,7 +357,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, usb1 = ubh_get_usb_first(uspi); *err = -ENOSPC; - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); tmp = ufs_data_ptr_to_cpu(sb, p); if (count + ufs_fragnum(fragment) > uspi->s_fpb) { @@ -378,19 +378,19 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, "fragment %llu, tmp %llu\n", (unsigned long long)fragment, (unsigned long long)tmp); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return INVBLOCK; } if (fragment < UFS_I(inode)->i_lastfrag) { UFSD("EXIT (ALREADY ALLOCATED)\n"); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return 0; } } else { if (tmp) { UFSD("EXIT (ALREADY ALLOCATED)\n"); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return 0; } } @@ -399,7 +399,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, * There is not enough space for user on the device */ if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) { - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return 0; } @@ -424,7 +424,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); } - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT, result %llu\n", (unsigned long long)result); return result; } @@ -439,7 +439,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, fragment + count); ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT, result %llu\n", (unsigned long long)result); return result; } @@ -477,7 +477,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, *err = 0; UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); if (newcount < request) ufs_free_fragments (inode, result + newcount, request - newcount); ufs_free_fragments (inode, tmp, oldcount); @@ -485,7 +485,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, return result; } - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return 0; } diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 7caa01652888..fd0203ce1f7f 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -69,11 +69,11 @@ void ufs_free_inode (struct inode * inode) ino = inode->i_ino; - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); if (!((ino > 1) && (ino < (uspi->s_ncg * uspi->s_ipg )))) { ufs_warning(sb, "ufs_free_inode", "reserved inode or nonexistent inode %u\n", ino); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return; } @@ -81,7 +81,7 @@ void ufs_free_inode (struct inode * inode) bit = ufs_inotocgoff (ino); ucpi = ufs_load_cylinder (sb, cg); if (!ucpi) { - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return; } ucg = ubh_get_ucg(UCPI_UBH(ucpi)); @@ -115,7 +115,7 @@ void ufs_free_inode (struct inode * inode) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); } @@ -193,7 +193,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) sbi = UFS_SB(sb); uspi = sbi->s_uspi; - lock_ufs(sb); + mutex_lock(&sbi->s_lock); /* * Try to place the inode in its parent directory @@ -331,21 +331,21 @@ cg_found: sync_dirty_buffer(bh); brelse(bh); } - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); UFSD("allocating inode %lu\n", inode->i_ino); UFSD("EXIT\n"); return inode; fail_remove_inode: - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); clear_nlink(inode); unlock_new_inode(inode); iput(inode); UFSD("EXIT (FAILED): err %d\n", err); return ERR_PTR(err); failed: - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); make_bad_inode(inode); iput (inode); UFSD("EXIT (FAILED): err %d\n", err); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index b3bc3e7ae79d..afe9955654c8 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -694,6 +694,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait) unsigned flags; lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); UFSD("ENTER\n"); @@ -711,6 +712,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait) ufs_put_cstotal(sb); UFSD("EXIT\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; @@ -1277,6 +1279,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) sync_filesystem(sb); lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); uspi = UFS_SB(sb)->s_uspi; flags = UFS_SB(sb)->s_flags; usb1 = ubh_get_usb_first(uspi); @@ -1290,6 +1293,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) new_mount_opt = 0; ufs_set_opt (new_mount_opt, ONERROR_LOCK); if (!ufs_parse_options (data, &new_mount_opt)) { + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } @@ -1297,12 +1301,14 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) new_mount_opt |= ufstype; } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { pr_err("ufstype can't be changed during remount\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { UFS_SB(sb)->s_mount_opt = new_mount_opt; + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; } @@ -1326,6 +1332,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) */ #ifndef CONFIG_UFS_FS_WRITE pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; #else @@ -1335,11 +1342,13 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufstype != UFS_MOUNT_UFSTYPE_SUNx86 && ufstype != UFS_MOUNT_UFSTYPE_UFS2) { pr_err("this ufstype is read-only supported\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } if (!ufs_read_cylinder_structures(sb)) { pr_err("failed during remounting\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EPERM; } @@ -1347,6 +1356,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) #endif } UFS_SB(sb)->s_mount_opt = new_mount_opt; + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; } diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 2a07396d5f9e..cf6368d42d4a 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -30,6 +30,7 @@ struct ufs_sb_info { int work_queued; /* non-zero if the delayed work is queued */ struct delayed_work sync_work; /* FS sync delayed work */ spinlock_t work_lock; /* protects sync_work and work_queued */ + struct mutex s_lock; }; struct ufs_inode_info { From 12ecbb4b1d765a5076920999298d9625439dbe58 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 1 Jun 2015 14:52:04 +0200 Subject: [PATCH 155/839] ufs: Fix warning from unlock_new_inode() Commit e4502c63f56aeca88 (ufs: deal with nfsd/iget races) introduced unlock_new_inode() call into ufs_add_nondir(). However that function gets called also from ufs_link() which hands it already initialized inode and thus unlock_new_inode() complains. The problem is harmless but annoying. Fix the problem by opencoding necessary stuff in ufs_link() Fixes: e4502c63f56aeca887ced37f24e0def1ef11cec8 Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/ufs/namei.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 1f5223c9e1e2..2346b83fa12b 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -174,7 +174,12 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, inode_inc_link_count(inode); ihold(inode); - error = ufs_add_nondir(dentry, inode); + error = ufs_add_link(dentry, inode); + if (error) { + inode_dec_link_count(inode); + iput(inode); + } else + d_instantiate(dentry, inode); unlock_ufs(dir->i_sb); return error; } From 514d748f69c97a51a2645eb198ac5c6218f22ff9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 2 Jun 2015 11:26:34 +0200 Subject: [PATCH 156/839] ufs: Fix possible deadlock when looking up directories Commit e4502c63f56aeca88 (ufs: deal with nfsd/iget races) made ufs create inodes with I_NEW flag set. However ufs_mkdir() never cleared this flag. Thus if someone ever tried to lookup the directory by inode number, he would deadlock waiting for I_NEW to be cleared. Luckily this mostly happens only if the filesystem is exported over NFS since otherwise we have the inode attached to dentry and don't look it up by inode number. In rare cases dentry can get freed without inode being freed and then we'd hit the deadlock even without NFS export. Fix the problem by clearing I_NEW before instantiating new directory inode. Fixes: e4502c63f56aeca887ced37f24e0def1ef11cec8 Reported-by: Fabian Frederick Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/ufs/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 2346b83fa12b..60ee32249b72 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -212,6 +212,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) goto out_fail; unlock_ufs(dir->i_sb); + unlock_new_inode(inode); d_instantiate(dentry, inode); out: return err; From a50e4a02ad6957ef6f77cccaa8ef6a337f1856af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2015 01:50:43 -0400 Subject: [PATCH 157/839] ufs: don't bother with lock_ufs()/unlock_ufs() for directory access We are already serialized by ->i_mutex and operations on different directories are independent. These calls are just rudiments of blind BKL conversion and they should've been removed back then. Signed-off-by: Al Viro --- fs/ufs/namei.c | 54 +++++++++++++------------------------------------- 1 file changed, 14 insertions(+), 40 deletions(-) diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 60ee32249b72..3429079c11e2 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -56,11 +56,9 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsi if (dentry->d_name.len > UFS_MAXNAMLEN) return ERR_PTR(-ENAMETOOLONG); - lock_ufs(dir->i_sb); ino = ufs_inode_by_name(dir, &dentry->d_name); if (ino) inode = ufs_iget(dir->i_sb, ino); - unlock_ufs(dir->i_sb); return d_splice_alias(inode, dentry); } @@ -76,24 +74,16 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode, bool excl) { struct inode *inode; - int err; - - UFSD("BEGIN\n"); inode = ufs_new_inode(dir, mode); - err = PTR_ERR(inode); + if (IS_ERR(inode)) + return PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ufs_file_inode_operations; - inode->i_fop = &ufs_file_operations; - inode->i_mapping->a_ops = &ufs_aops; - mark_inode_dirty(inode); - lock_ufs(dir->i_sb); - err = ufs_add_nondir(dentry, inode); - unlock_ufs(dir->i_sb); - } - UFSD("END: err=%d\n", err); - return err; + inode->i_op = &ufs_file_inode_operations; + inode->i_fop = &ufs_file_operations; + inode->i_mapping->a_ops = &ufs_aops; + mark_inode_dirty(inode); + return ufs_add_nondir(dentry, inode); } static int ufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) @@ -110,9 +100,7 @@ static int ufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev init_special_inode(inode, mode, rdev); ufs_set_inode_dev(inode->i_sb, UFS_I(inode), rdev); mark_inode_dirty(inode); - lock_ufs(dir->i_sb); err = ufs_add_nondir(dentry, inode); - unlock_ufs(dir->i_sb); } return err; } @@ -121,18 +109,17 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, const char * symname) { struct super_block * sb = dir->i_sb; - int err = -ENAMETOOLONG; + int err; unsigned l = strlen(symname)+1; struct inode * inode; if (l > sb->s_blocksize) - goto out_notlocked; + return -ENAMETOOLONG; - lock_ufs(dir->i_sb); inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); err = PTR_ERR(inode); if (IS_ERR(inode)) - goto out; + return err; if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { /* slow symlink */ @@ -149,17 +136,13 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, } mark_inode_dirty(inode); - err = ufs_add_nondir(dentry, inode); -out: - unlock_ufs(dir->i_sb); -out_notlocked: - return err; + return ufs_add_nondir(dentry, inode); out_fail: inode_dec_link_count(inode); unlock_new_inode(inode); iput(inode); - goto out; + return err; } static int ufs_link (struct dentry * old_dentry, struct inode * dir, @@ -168,8 +151,6 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, struct inode *inode = d_inode(old_dentry); int error; - lock_ufs(dir->i_sb); - inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); ihold(inode); @@ -180,7 +161,6 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, iput(inode); } else d_instantiate(dentry, inode); - unlock_ufs(dir->i_sb); return error; } @@ -189,7 +169,6 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) struct inode * inode; int err; - lock_ufs(dir->i_sb); inode_inc_link_count(dir); inode = ufs_new_inode(dir, S_IFDIR|mode); @@ -210,12 +189,10 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) err = ufs_add_link(dentry, inode); if (err) goto out_fail; - unlock_ufs(dir->i_sb); unlock_new_inode(inode); d_instantiate(dentry, inode); -out: - return err; + return 0; out_fail: inode_dec_link_count(inode); @@ -224,8 +201,7 @@ out_fail: iput (inode); out_dir: inode_dec_link_count(dir); - unlock_ufs(dir->i_sb); - goto out; + return err; } static int ufs_unlink(struct inode *dir, struct dentry *dentry) @@ -255,7 +231,6 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) struct inode * inode = d_inode(dentry); int err= -ENOTEMPTY; - lock_ufs(dir->i_sb); if (ufs_empty_dir (inode)) { err = ufs_unlink(dir, dentry); if (!err) { @@ -264,7 +239,6 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) inode_dec_link_count(dir); } } - unlock_ufs(dir->i_sb); return err; } From 70d45cdb664390a5573c05f1eecc8432dcc6581b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2015 01:56:23 -0400 Subject: [PATCH 158/839] ufs: don't touch mtime/ctime of directory being moved See "ext2: Do not update mtime of a moved directory" (and followup in "ext2: fix unbalanced kmap()/kunmap()") for background; this is UFS equivalent - the same problem exists here. Signed-off-by: Al Viro --- fs/ufs/dir.c | 6 ++++-- fs/ufs/namei.c | 9 +++++++-- fs/ufs/ufs.h | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 1bfe8cabff0f..862d284d438e 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -87,7 +87,8 @@ ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr) /* Releases the page */ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, - struct page *page, struct inode *inode) + struct page *page, struct inode *inode, + bool update_times) { loff_t pos = page_offset(page) + (char *) de - (char *) page_address(page); @@ -103,7 +104,8 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, err = ufs_commit_chunk(page, pos, len); ufs_put_page(page); - dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + if (update_times) + dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(dir); } diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 3429079c11e2..d3fdfa22add5 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -276,7 +276,7 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page); if (!new_de) goto out_dir; - ufs_set_link(new_dir, new_de, new_page, old_inode); + ufs_set_link(new_dir, new_de, new_page, old_inode, 1); new_inode->i_ctime = CURRENT_TIME_SEC; if (dir_de) drop_nlink(new_inode); @@ -299,7 +299,12 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, mark_inode_dirty(old_inode); if (dir_de) { - ufs_set_link(old_inode, dir_de, dir_page, new_dir); + if (old_dir != new_dir) + ufs_set_link(old_inode, dir_de, dir_page, new_dir, 0); + else { + kunmap(dir_page); + page_cache_release(dir_page); + } inode_dec_link_count(old_dir); } return 0; diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index cf6368d42d4a..2e31ea2e35a3 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -106,7 +106,7 @@ extern int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct page extern int ufs_empty_dir (struct inode *); extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **); extern void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, - struct page *page, struct inode *inode); + struct page *page, struct inode *inode, bool update_times); /* file.c */ extern const struct inode_operations ufs_file_inode_operations; From bc0c94b1404b225b19b6b53a0e508f43e269ed1e Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 20 May 2015 21:48:03 -0700 Subject: [PATCH 159/839] target: Drop unnecessary core_tpg_register TFO parameter This patch drops unnecessary target_core_fabric_ops parameter usage for core_tpg_register() during fabric driver TFO->fabric_make_tpg() se_portal_group creation callback execution. Instead, use the existing se_wwn->wwn_tf->tf_ops pointer to ensure fabric driver is really using the same TFO provided at module_init time. Also go ahead and drop the forward TFO declarations tree-wide, and handling the special case for iscsi-target discovery TPG. Cc: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/infiniband/ulp/srpt/ib_srpt.c | 4 +-- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 9 ++---- drivers/target/iscsi/iscsi_target_configfs.c | 3 +- drivers/target/iscsi/iscsi_target_tpg.c | 8 ++++-- drivers/target/loopback/tcm_loop.c | 5 +--- drivers/target/sbp/sbp_target.c | 2 +- drivers/target/target_core_tpg.c | 30 ++++++++++++++++---- drivers/target/tcm_fc/tfc_conf.c | 5 +--- drivers/usb/gadget/legacy/tcm_usb_gadget.c | 5 +--- drivers/vhost/scsi.c | 4 +-- drivers/xen/xen-scsiback.c | 5 +--- include/target/target_core_fabric.h | 3 +- 12 files changed, 42 insertions(+), 41 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index dbad5c67a294..cea207e8b5d2 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -92,7 +92,6 @@ MODULE_PARM_DESC(srpt_service_guid, " instead of using the node_guid of the first HCA."); static struct ib_client srpt_client; -static const struct target_core_fabric_ops srpt_template; static void srpt_release_channel(struct srpt_rdma_ch *ch); static int srpt_queue_status(struct se_cmd *cmd); @@ -3733,8 +3732,7 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, int res; /* Initialize sport->port_wwn and sport->port_tpg_1 */ - res = core_tpg_register(&srpt_template, &sport->port_wwn, - &sport->port_tpg_1, SCSI_PROTOCOL_SRP); + res = core_tpg_register(&sport->port_wwn, &sport->port_tpg_1, SCSI_PROTOCOL_SRP); if (res) return ERR_PTR(res); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index cb53144c72d0..e14a7fd7d810 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -52,9 +52,6 @@ static struct workqueue_struct *tcm_qla2xxx_free_wq; static struct workqueue_struct *tcm_qla2xxx_cmd_wq; -static const struct target_core_fabric_ops tcm_qla2xxx_ops; -static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops; - /* * Parse WWN. * If strict, we require lower-case hex and colon separators to be sure @@ -1004,8 +1001,7 @@ static struct se_portal_group *tcm_qla2xxx_make_tpg( tpg->tpg_attrib.cache_dynamic_acls = 1; tpg->tpg_attrib.demo_mode_login_only = 1; - ret = core_tpg_register(&tcm_qla2xxx_ops, wwn, &tpg->se_tpg, - SCSI_PROTOCOL_FCP); + ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_FCP); if (ret < 0) { kfree(tpg); return NULL; @@ -1124,8 +1120,7 @@ static struct se_portal_group *tcm_qla2xxx_npiv_make_tpg( tpg->tpg_attrib.cache_dynamic_acls = 1; tpg->tpg_attrib.demo_mode_login_only = 1; - ret = core_tpg_register(&tcm_qla2xxx_npiv_ops, wwn, &tpg->se_tpg, - SCSI_PROTOCOL_FCP); + ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_FCP); if (ret < 0) { kfree(tpg); return NULL; diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 523ae556e22c..c1898c84b3d2 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1419,8 +1419,7 @@ static struct se_portal_group *lio_target_tiqn_addtpg( if (!tpg) return NULL; - ret = core_tpg_register(&iscsi_ops, wwn, &tpg->tpg_se_tpg, - SCSI_PROTOCOL_ISCSI); + ret = core_tpg_register(wwn, &tpg->tpg_se_tpg, SCSI_PROTOCOL_ISCSI); if (ret < 0) return NULL; diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index de8829102ab4..968068ffcb1c 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -66,8 +66,12 @@ int iscsit_load_discovery_tpg(void) pr_err("Unable to allocate struct iscsi_portal_group\n"); return -1; } - - ret = core_tpg_register(&iscsi_ops, NULL, &tpg->tpg_se_tpg, -1); + /* + * Save iscsi_ops pointer for special case discovery TPG that + * doesn't exist as se_wwn->wwn_group within configfs. + */ + tpg->tpg_se_tpg.se_tpg_tfo = &iscsi_ops; + ret = core_tpg_register(NULL, &tpg->tpg_se_tpg, -1); if (ret < 0) { kfree(tpg); return -1; diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index b0c17614493e..bd9d11a6f823 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -40,8 +40,6 @@ #define to_tcm_loop_hba(hba) container_of(hba, struct tcm_loop_hba, dev) -static const struct target_core_fabric_ops loop_ops; - static struct workqueue_struct *tcm_loop_workqueue; static struct kmem_cache *tcm_loop_cmd_cache; @@ -1081,8 +1079,7 @@ static struct se_portal_group *tcm_loop_make_naa_tpg( /* * Register the tl_tpg as a emulated SAS TCM Target Endpoint */ - ret = core_tpg_register(&loop_ops, wwn, &tl_tpg->tl_se_tpg, - tl_hba->tl_proto_id); + ret = core_tpg_register(wwn, &tl_tpg->tl_se_tpg, tl_hba->tl_proto_id); if (ret < 0) return ERR_PTR(-ENOMEM); diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 28e3adf1eb85..42f82d32ca85 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -2055,7 +2055,7 @@ static struct se_portal_group *sbp_make_tpg( goto out_free_tpg; } - ret = core_tpg_register(&sbp_ops, wwn, &tpg->se_tpg, SCSI_PROTOCOL_SBP); + ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_SBP); if (ret < 0) goto out_unreg_mgt_agt; diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index b9fcf2c4898e..2e77eebe1671 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -37,6 +37,7 @@ #include #include +#include #include #include "target_core_internal.h" @@ -487,16 +488,34 @@ static void core_tpg_lun_ref_release(struct percpu_ref *ref) } int core_tpg_register( - const struct target_core_fabric_ops *tfo, struct se_wwn *se_wwn, struct se_portal_group *se_tpg, int proto_id) { int ret; + if (!se_tpg) + return -EINVAL; + /* + * For the typical case where core_tpg_register() is called by a + * fabric driver from target_core_fabric_ops->fabric_make_tpg() + * configfs context, use the original tf_ops pointer already saved + * by target-core in target_fabric_make_wwn(). + * + * Otherwise, for special cases like iscsi-target discovery TPGs + * the caller is responsible for setting ->se_tpg_tfo ahead of + * calling core_tpg_register(). + */ + if (se_wwn) + se_tpg->se_tpg_tfo = se_wwn->wwn_tf->tf_ops; + + if (!se_tpg->se_tpg_tfo) { + pr_err("Unable to locate se_tpg->se_tpg_tfo pointer\n"); + return -EINVAL; + } + INIT_HLIST_HEAD(&se_tpg->tpg_lun_hlist); se_tpg->proto_id = proto_id; - se_tpg->se_tpg_tfo = tfo; se_tpg->se_tpg_wwn = se_wwn; atomic_set(&se_tpg->tpg_pr_ref_count, 0); INIT_LIST_HEAD(&se_tpg->acl_node_list); @@ -524,9 +543,10 @@ int core_tpg_register( spin_unlock_bh(&tpg_lock); pr_debug("TARGET_CORE[%s]: Allocated portal_group for endpoint: %s, " - "Proto: %d, Portal Tag: %u\n", tfo->get_fabric_name(), - tfo->tpg_get_wwn(se_tpg) ? tfo->tpg_get_wwn(se_tpg) : NULL, - se_tpg->proto_id, tfo->tpg_get_tag(se_tpg)); + "Proto: %d, Portal Tag: %u\n", se_tpg->se_tpg_tfo->get_fabric_name(), + se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg) ? + se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg) : NULL, + se_tpg->proto_id, se_tpg->se_tpg_tfo->tpg_get_tag(se_tpg)); return 0; } diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 8e1a54f2642c..80a4477fa7ee 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -47,8 +47,6 @@ #include "tcm_fc.h" -static const struct target_core_fabric_ops ft_fabric_ops; - static LIST_HEAD(ft_wwn_list); DEFINE_MUTEX(ft_lport_lock); @@ -282,8 +280,7 @@ static struct se_portal_group *ft_add_tpg( return NULL; } - ret = core_tpg_register(&ft_fabric_ops, wwn, &tpg->se_tpg, - SCSI_PROTOCOL_FCP); + ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_FCP); if (ret < 0) { destroy_workqueue(wq); kfree(tpg); diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c index 16b9a16678fe..4ff426edf22a 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c @@ -28,8 +28,6 @@ USB_GADGET_COMPOSITE_OPTIONS(); -static const struct target_core_fabric_ops usbg_ops; - static inline struct f_uas *to_f_uas(struct usb_function *f) { return container_of(f, struct f_uas, function); @@ -1418,8 +1416,7 @@ static struct se_portal_group *usbg_make_tpg( * SPC doesn't assign a protocol identifier for USB-SCSI, so we * pretend to be SAS.. */ - ret = core_tpg_register(&usbg_ops, wwn, &tpg->se_tpg, - SCSI_PROTOCOL_SAS); + ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_SAS); if (ret < 0) { destroy_workqueue(tpg->workqueue); kfree(tpg); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index dcd228b59ded..450aece0227c 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -206,7 +206,6 @@ struct vhost_scsi { int vs_events_nr; /* num of pending events, protected by vq->mutex */ }; -static struct target_core_fabric_ops vhost_scsi_ops; static struct workqueue_struct *vhost_scsi_workqueue; /* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */ @@ -2001,8 +2000,7 @@ vhost_scsi_make_tpg(struct se_wwn *wwn, tpg->tport = tport; tpg->tport_tpgt = tpgt; - ret = core_tpg_register(&vhost_scsi_ops, wwn, &tpg->se_tpg, - tport->tport_proto_id); + ret = core_tpg_register(wwn, &tpg->se_tpg, tport->tport_proto_id); if (ret < 0) { kfree(tpg); return NULL; diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 555033bd9239..25144a0ff07c 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -203,8 +203,6 @@ static LIST_HEAD(scsiback_free_pages); static DEFINE_MUTEX(scsiback_mutex); static LIST_HEAD(scsiback_list); -static const struct target_core_fabric_ops scsiback_ops; - static void scsiback_get(struct vscsibk_info *info) { atomic_inc(&info->nr_unreplied_reqs); @@ -1765,8 +1763,7 @@ scsiback_make_tpg(struct se_wwn *wwn, tpg->tport = tport; tpg->tport_tpgt = tpgt; - ret = core_tpg_register(&scsiback_ops, wwn, &tpg->se_tpg, - tport->tport_proto_id); + ret = core_tpg_register(wwn, &tpg->se_tpg, tport->tport_proto_id); if (ret < 0) { kfree(tpg); return NULL; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index b1e00a7d66de..d6216b761333 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -161,8 +161,7 @@ int core_tpg_set_initiator_node_queue_depth(struct se_portal_group *, unsigned char *, u32, int); int core_tpg_set_initiator_node_tag(struct se_portal_group *, struct se_node_acl *, const char *); -int core_tpg_register(const struct target_core_fabric_ops *, - struct se_wwn *, struct se_portal_group *, int); +int core_tpg_register(struct se_wwn *, struct se_portal_group *, int); int core_tpg_deregister(struct se_portal_group *); /* From e34d366273ce17bba902975f7c1e64a24a399024 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 31 May 2015 22:14:03 -0700 Subject: [PATCH 160/839] target: Drop duplicate + unused se_dev_check_wce Reported-by: kbuild test robot Reported-by: Bart Van Assche Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 3baa6cd7fded..db531fb5e787 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -538,18 +538,6 @@ static u32 se_dev_align_max_sectors(u32 max_sectors, u32 block_size) return aligned_max_sectors; } -bool se_dev_check_wce(struct se_device *dev) -{ - bool wce = false; - - if (dev->transport->get_write_cache) - wce = dev->transport->get_write_cache(dev); - else if (dev->dev_attrib.emulate_write_cache > 0) - wce = true; - - return wce; -} - int core_dev_add_lun( struct se_portal_group *tpg, struct se_device *dev, From f2d30680204f20b815e6796437923fb870b6c193 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 10 Jun 2015 08:41:22 +0200 Subject: [PATCH 161/839] target: use 64-bit LUNs As we're now using a list to hold the LUNs the target core can now converted to use 64-bit LUNs internally. Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_tmr.c | 4 +- drivers/target/loopback/tcm_loop.c | 4 +- drivers/target/target_core_alua.c | 2 +- drivers/target/target_core_configfs.c | 12 +++--- drivers/target/target_core_device.c | 30 +++++++------- drivers/target/target_core_fabric_configfs.c | 18 +++----- drivers/target/target_core_internal.h | 10 ++--- drivers/target/target_core_pr.c | 43 ++++++++++---------- drivers/target/target_core_pr.h | 6 +-- drivers/target/target_core_tpg.c | 6 +-- drivers/target/target_core_transport.c | 6 +-- drivers/target/target_core_ua.c | 8 ++-- drivers/target/target_core_ua.h | 2 +- include/target/target_core_base.h | 19 +++++---- include/target/target_core_fabric.h | 10 ++--- 15 files changed, 88 insertions(+), 92 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c index b0224a77e26d..515988258e9e 100644 --- a/drivers/target/iscsi/iscsi_target_tmr.c +++ b/drivers/target/iscsi/iscsi_target_tmr.c @@ -120,7 +120,7 @@ u8 iscsit_tmr_task_reassign( struct iscsi_tmr_req *tmr_req = cmd->tmr_req; struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req; struct iscsi_tm *hdr = (struct iscsi_tm *) buf; - int ret, ref_lun; + u64 ret, ref_lun; pr_debug("Got TASK_REASSIGN TMR ITT: 0x%08x," " RefTaskTag: 0x%08x, ExpDataSN: 0x%08x, CID: %hu\n", @@ -164,7 +164,7 @@ u8 iscsit_tmr_task_reassign( ref_lun = scsilun_to_int(&hdr->lun); if (ref_lun != ref_cmd->se_cmd.orig_fe_lun) { pr_err("Unable to perform connection recovery for" - " differing ref_lun: %d ref_cmd orig_fe_lun: %u\n", + " differing ref_lun: %llu ref_cmd orig_fe_lun: %llu\n", ref_lun, ref_cmd->se_cmd.orig_fe_lun); return ISCSI_TMF_RSP_REJECTED; } diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index bd9d11a6f823..a556bdebd775 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -215,7 +215,7 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) * to struct scsi_device */ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg, - int lun, int task, enum tcm_tmreq_table tmr) + u64 lun, int task, enum tcm_tmreq_table tmr) { struct se_cmd *se_cmd = NULL; struct se_session *se_sess; @@ -747,7 +747,7 @@ static void tcm_loop_port_unlink( se_lun->unpacked_lun); if (!sd) { pr_err("Unable to locate struct scsi_device for %d:%d:" - "%d\n", 0, tl_tpg->tl_tpgt, se_lun->unpacked_lun); + "%llu\n", 0, tl_tpg->tl_tpgt, se_lun->unpacked_lun); return; } /* diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 02d8e1a8a6a5..2318e6e5ce52 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1257,7 +1257,7 @@ static int core_alua_update_tpg_secondary_metadata(struct se_lun *lun) atomic_read(&lun->lun_tg_pt_secondary_offline), lun->lun_tg_pt_secondary_stat); - snprintf(path, ALUA_METADATA_PATH_LEN, "/var/target/alua/%s/%s/lun_%u", + snprintf(path, ALUA_METADATA_PATH_LEN, "/var/target/alua/%s/%s/lun_%llu", se_tpg->se_tpg_tfo->get_fabric_name(), wwn, lun->unpacked_lun); diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index f97b969e6714..68addbc7eb7b 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -1438,7 +1438,7 @@ static ssize_t target_core_dev_pr_show_attr_res_pr_holder_tg_port( tfo->tpg_get_wwn(se_tpg)); len += sprintf(page+len, "SPC-3 Reservation: Relative Port" " Identifier Tag: %hu %s Portal Group Tag: %hu" - " %s Logical Unit: %u\n", pr_reg->tg_pt_sep_rtpi, + " %s Logical Unit: %llu\n", pr_reg->tg_pt_sep_rtpi, tfo->get_fabric_name(), tfo->tpg_get_tag(se_tpg), tfo->get_fabric_name(), pr_reg->pr_aptpl_target_lun); @@ -1565,12 +1565,12 @@ static match_table_t tokens = { {Opt_res_type, "res_type=%d"}, {Opt_res_scope, "res_scope=%d"}, {Opt_res_all_tg_pt, "res_all_tg_pt=%d"}, - {Opt_mapped_lun, "mapped_lun=%d"}, + {Opt_mapped_lun, "mapped_lun=%lld"}, {Opt_target_fabric, "target_fabric=%s"}, {Opt_target_node, "target_node=%s"}, {Opt_tpgt, "tpgt=%d"}, {Opt_port_rtpi, "port_rtpi=%d"}, - {Opt_target_lun, "target_lun=%d"}, + {Opt_target_lun, "target_lun=%lld"}, {Opt_err, NULL} }; @@ -1585,7 +1585,7 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( substring_t args[MAX_OPT_ARGS]; unsigned long long tmp_ll; u64 sa_res_key = 0; - u32 mapped_lun = 0, target_lun = 0; + u64 mapped_lun = 0, target_lun = 0; int ret = -1, res_holder = 0, all_tg_pt = 0, arg, token; u16 tpgt = 0; u8 type = 0; @@ -1675,7 +1675,7 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( break; case Opt_mapped_lun: match_int(args, &arg); - mapped_lun = (u32)arg; + mapped_lun = (u64)arg; break; /* * PR APTPL Metadata for Target Port @@ -1710,7 +1710,7 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( break; case Opt_target_lun: match_int(args, &arg); - target_lun = (u32)arg; + target_lun = (u64)arg; break; default: break; diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index db531fb5e787..2ea7322c4dd4 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -56,7 +56,7 @@ static struct se_hba *lun0_hba; struct se_device *g_lun0_dev; sense_reason_t -transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) +transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun) { struct se_lun *se_lun = NULL; struct se_session *se_sess = se_cmd->se_sess; @@ -74,7 +74,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) if ((se_cmd->data_direction == DMA_TO_DEVICE) && (deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY)) { pr_err("TARGET_CORE[%s]: Detected WRITE_PROTECTED LUN" - " Access for 0x%08x\n", + " Access for 0x%08llx\n", se_cmd->se_tfo->get_fabric_name(), unpacked_lun); rcu_read_unlock(); @@ -107,7 +107,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) */ if (unpacked_lun != 0) { pr_err("TARGET_CORE[%s]: Detected NON_EXISTENT_LUN" - " Access for 0x%08x\n", + " Access for 0x%08llx\n", se_cmd->se_tfo->get_fabric_name(), unpacked_lun); return TCM_NON_EXISTENT_LUN; @@ -147,7 +147,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) } EXPORT_SYMBOL(transport_lookup_cmd_lun); -int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u32 unpacked_lun) +int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u64 unpacked_lun) { struct se_dev_entry *deve; struct se_lun *se_lun = NULL; @@ -172,7 +172,7 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u32 unpacked_lun) if (!se_lun) { pr_debug("TARGET_CORE[%s]: Detected NON_EXISTENT_LUN" - " Access for 0x%08x\n", + " Access for 0x%08llx\n", se_cmd->se_tfo->get_fabric_name(), unpacked_lun); return -ENODEV; @@ -260,7 +260,7 @@ void core_free_device_list_for_node( } void core_update_device_list_access( - u32 mapped_lun, + u64 mapped_lun, u32 lun_access, struct se_node_acl *nacl) { @@ -283,7 +283,7 @@ void core_update_device_list_access( /* * Called with rcu_read_lock or nacl->device_list_lock held. */ -struct se_dev_entry *target_nacl_find_deve(struct se_node_acl *nacl, u32 mapped_lun) +struct se_dev_entry *target_nacl_find_deve(struct se_node_acl *nacl, u64 mapped_lun) { struct se_dev_entry *deve; @@ -309,7 +309,7 @@ void target_pr_kref_release(struct kref *kref) int core_enable_device_list_for_node( struct se_lun *lun, struct se_lun_acl *lun_acl, - u32 mapped_lun, + u64 mapped_lun, u32 lun_access, struct se_node_acl *nacl, struct se_portal_group *tpg) @@ -550,7 +550,7 @@ int core_dev_add_lun( if (rc < 0) return rc; - pr_debug("%s_TPG[%u]_LUN[%u] - Activated %s Logical Unit from" + pr_debug("%s_TPG[%u]_LUN[%llu] - Activated %s Logical Unit from" " CORE HBA: %u\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, tpg->se_tpg_tfo->get_fabric_name(), dev->se_hba->hba_id); @@ -583,7 +583,7 @@ void core_dev_del_lun( struct se_portal_group *tpg, struct se_lun *lun) { - pr_debug("%s_TPG[%u]_LUN[%u] - Deactivating %s Logical Unit from" + pr_debug("%s_TPG[%u]_LUN[%llu] - Deactivating %s Logical Unit from" " device object\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, tpg->se_tpg_tfo->get_fabric_name()); @@ -594,7 +594,7 @@ void core_dev_del_lun( struct se_lun_acl *core_dev_init_initiator_node_lun_acl( struct se_portal_group *tpg, struct se_node_acl *nacl, - u32 mapped_lun, + u64 mapped_lun, int *ret) { struct se_lun_acl *lacl; @@ -646,7 +646,7 @@ int core_dev_add_initiator_node_lun_acl( lun_access, nacl, tpg) < 0) return -EINVAL; - pr_debug("%s_TPG[%hu]_LUN[%u->%u] - Added %s ACL for " + pr_debug("%s_TPG[%hu]_LUN[%llu->%llu] - Added %s ACL for " " InitiatorNode: %s\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, lacl->mapped_lun, (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) ? "RW" : "RO", @@ -678,8 +678,8 @@ int core_dev_del_initiator_node_lun_acl( core_disable_device_list_for_node(lun, deve, nacl, tpg); mutex_unlock(&nacl->lun_entry_mutex); - pr_debug("%s_TPG[%hu]_LUN[%u] - Removed ACL for" - " InitiatorNode: %s Mapped LUN: %u\n", + pr_debug("%s_TPG[%hu]_LUN[%llu] - Removed ACL for" + " InitiatorNode: %s Mapped LUN: %llu\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, lacl->initiatorname, lacl->mapped_lun); @@ -692,7 +692,7 @@ void core_dev_free_initiator_node_lun_acl( struct se_lun_acl *lacl) { pr_debug("%s_TPG[%hu] - Freeing ACL for %s InitiatorNode: %s" - " Mapped LUN: %u\n", tpg->se_tpg_tfo->get_fabric_name(), + " Mapped LUN: %llu\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), tpg->se_tpg_tfo->get_fabric_name(), lacl->initiatorname, lacl->mapped_lun); diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 0ee182fce1a6..2fd493e416b0 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -202,7 +202,7 @@ static ssize_t target_fabric_mappedlun_store_write_protect( lacl->se_lun_nacl); pr_debug("%s_ConfigFS: Changed Initiator ACL: %s" - " Mapped LUN: %u Write Protect bit to %s\n", + " Mapped LUN: %llu Write Protect bit to %s\n", se_tpg->se_tpg_tfo->get_fabric_name(), lacl->initiatorname, lacl->mapped_lun, (op) ? "ON" : "OFF"); @@ -322,7 +322,7 @@ static struct config_group *target_fabric_make_mappedlun( struct config_item *acl_ci; struct config_group *lacl_cg = NULL, *ml_stat_grp = NULL; char *buf; - unsigned long mapped_lun; + unsigned long long mapped_lun; int ret = 0; acl_ci = &group->cg_item; @@ -350,15 +350,11 @@ static struct config_group *target_fabric_make_mappedlun( * Determine the Mapped LUN value. This is what the SCSI Initiator * Port will actually see. */ - ret = kstrtoul(buf + 4, 0, &mapped_lun); + ret = kstrtoull(buf + 4, 0, &mapped_lun); if (ret) goto out; - if (mapped_lun > UINT_MAX) { - ret = -EINVAL; - goto out; - } if (mapped_lun > (TRANSPORT_MAX_LUNS_PER_TPG-1)) { - pr_err("Mapped LUN: %lu exceeds TRANSPORT_MAX_LUNS_PER_TPG" + pr_err("Mapped LUN: %llu exceeds TRANSPORT_MAX_LUNS_PER_TPG" "-1: %u for Target Portal Group: %u\n", mapped_lun, TRANSPORT_MAX_LUNS_PER_TPG-1, se_tpg->se_tpg_tfo->tpg_get_tag(se_tpg)); @@ -881,7 +877,7 @@ static struct config_group *target_fabric_make_lun( struct se_portal_group, tpg_lun_group); struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf; struct config_group *lun_cg = NULL, *port_stat_grp = NULL; - unsigned long unpacked_lun; + unsigned long long unpacked_lun; int errno; if (strstr(name, "lun_") != name) { @@ -889,11 +885,9 @@ static struct config_group *target_fabric_make_lun( " \"lun_$LUN_NUMBER\"\n"); return ERR_PTR(-EINVAL); } - errno = kstrtoul(name + 4, 0, &unpacked_lun); + errno = kstrtoull(name + 4, 0, &unpacked_lun); if (errno) return ERR_PTR(errno); - if (unpacked_lun > UINT_MAX) - return ERR_PTR(-EINVAL); lun = core_tpg_alloc_lun(se_tpg, unpacked_lun); if (IS_ERR(lun)) diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 8d8737a13e6f..511178969df3 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -26,10 +26,10 @@ struct se_dev_entry *core_get_se_deve_from_rtpi(struct se_node_acl *, u16); void target_pr_kref_release(struct kref *); void core_free_device_list_for_node(struct se_node_acl *, struct se_portal_group *); -void core_update_device_list_access(u32, u32, struct se_node_acl *); -struct se_dev_entry *target_nacl_find_deve(struct se_node_acl *, u32); +void core_update_device_list_access(u64, u32, struct se_node_acl *); +struct se_dev_entry *target_nacl_find_deve(struct se_node_acl *, u64); int core_enable_device_list_for_node(struct se_lun *, struct se_lun_acl *, - u32, u32, struct se_node_acl *, struct se_portal_group *); + u64, u32, struct se_node_acl *, struct se_portal_group *); void core_disable_device_list_for_node(struct se_lun *, struct se_dev_entry *, struct se_node_acl *, struct se_portal_group *); void core_clear_lun_from_tpg(struct se_lun *, struct se_portal_group *); @@ -37,7 +37,7 @@ int core_dev_add_lun(struct se_portal_group *, struct se_device *, struct se_lun *lun); void core_dev_del_lun(struct se_portal_group *, struct se_lun *); struct se_lun_acl *core_dev_init_initiator_node_lun_acl(struct se_portal_group *, - struct se_node_acl *, u32, int *); + struct se_node_acl *, u64, int *); int core_dev_add_initiator_node_lun_acl(struct se_portal_group *, struct se_lun_acl *, struct se_lun *lun, u32); int core_dev_del_initiator_node_lun_acl(struct se_lun *, @@ -80,7 +80,7 @@ struct se_node_acl *__core_tpg_get_initiator_node_acl(struct se_portal_group *tp void core_tpg_add_node_to_devs(struct se_node_acl *, struct se_portal_group *, struct se_lun *); void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *); -struct se_lun *core_tpg_alloc_lun(struct se_portal_group *, u32); +struct se_lun *core_tpg_alloc_lun(struct se_portal_group *, u64); int core_tpg_add_lun(struct se_portal_group *, struct se_lun *, u32, struct se_device *); void core_tpg_remove_lun(struct se_portal_group *, struct se_lun *); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 94935eaa7fa7..436e30b14a11 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -229,8 +229,9 @@ target_scsi2_reservation_release(struct se_cmd *cmd) dev->dev_reservation_flags &= ~DRF_SPC2_RESERVATIONS_WITH_ISID; } tpg = sess->se_tpg; - pr_debug("SCSI-2 Released reservation for %s LUN: %u ->" - " MAPPED LUN: %u for %s\n", tpg->se_tpg_tfo->get_fabric_name(), + pr_debug("SCSI-2 Released reservation for %s LUN: %llu ->" + " MAPPED LUN: %llu for %s\n", + tpg->se_tpg_tfo->get_fabric_name(), cmd->se_lun->unpacked_lun, cmd->orig_fe_lun, sess->se_node_acl->initiatorname); @@ -275,10 +276,10 @@ target_scsi2_reservation_reserve(struct se_cmd *cmd) (dev->dev_reserved_node_acl != sess->se_node_acl)) { pr_err("SCSI-2 RESERVATION CONFLIFT for %s fabric\n", tpg->se_tpg_tfo->get_fabric_name()); - pr_err("Original reserver LUN: %u %s\n", + pr_err("Original reserver LUN: %llu %s\n", cmd->se_lun->unpacked_lun, dev->dev_reserved_node_acl->initiatorname); - pr_err("Current attempt - LUN: %u -> MAPPED LUN: %u" + pr_err("Current attempt - LUN: %llu -> MAPPED LUN: %llu" " from %s \n", cmd->se_lun->unpacked_lun, cmd->orig_fe_lun, sess->se_node_acl->initiatorname); @@ -292,7 +293,7 @@ target_scsi2_reservation_reserve(struct se_cmd *cmd) dev->dev_res_bin_isid = sess->sess_bin_isid; dev->dev_reservation_flags |= DRF_SPC2_RESERVATIONS_WITH_ISID; } - pr_debug("SCSI-2 Reserved %s LUN: %u -> MAPPED LUN: %u" + pr_debug("SCSI-2 Reserved %s LUN: %llu -> MAPPED LUN: %llu" " for %s\n", tpg->se_tpg_tfo->get_fabric_name(), cmd->se_lun->unpacked_lun, cmd->orig_fe_lun, sess->se_node_acl->initiatorname); @@ -618,7 +619,7 @@ static struct t10_pr_registration *__core_scsi3_do_alloc_registration( struct se_node_acl *nacl, struct se_lun *lun, struct se_dev_entry *deve, - u32 mapped_lun, + u64 mapped_lun, unsigned char *isid, u64 sa_res_key, int all_tg_pt, @@ -671,7 +672,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( struct se_node_acl *nacl, struct se_lun *lun, struct se_dev_entry *deve, - u32 mapped_lun, + u64 mapped_lun, unsigned char *isid, u64 sa_res_key, int all_tg_pt, @@ -804,10 +805,10 @@ int core_scsi3_alloc_aptpl_registration( u64 sa_res_key, unsigned char *i_port, unsigned char *isid, - u32 mapped_lun, + u64 mapped_lun, unsigned char *t_port, u16 tpgt, - u32 target_lun, + u64 target_lun, int res_holder, int all_tg_pt, u8 type) @@ -901,9 +902,9 @@ static int __core_scsi3_check_aptpl_registration( struct se_device *dev, struct se_portal_group *tpg, struct se_lun *lun, - u32 target_lun, + u64 target_lun, struct se_node_acl *nacl, - u32 mapped_lun) + u64 mapped_lun) { struct t10_pr_registration *pr_reg, *pr_reg_tmp; struct t10_reservation *pr_tmpl = &dev->t10_pr; @@ -972,7 +973,7 @@ int core_scsi3_check_aptpl_registration( struct se_portal_group *tpg, struct se_lun *lun, struct se_node_acl *nacl, - u32 mapped_lun) + u64 mapped_lun) { if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return 0; @@ -1094,7 +1095,7 @@ static int core_scsi3_alloc_registration( struct se_node_acl *nacl, struct se_lun *lun, struct se_dev_entry *deve, - u32 mapped_lun, + u64 mapped_lun, unsigned char *isid, u64 sa_res_key, int all_tg_pt, @@ -1668,7 +1669,7 @@ core_scsi3_decode_spec_i_port( } pr_debug("SPC-3 PR SPEC_I_PT: Located %s Node: %s" - " dest_se_deve mapped_lun: %u\n", + " dest_se_deve mapped_lun: %llu\n", dest_tpg->se_tpg_tfo->get_fabric_name(), dest_node_acl->initiatorname, dest_se_deve->mapped_lun); @@ -1780,7 +1781,7 @@ core_scsi3_decode_spec_i_port( pr_debug("SPC-3 PR [%s] SPEC_I_PT: Successfully" " registered Transport ID for Node: %s%s Mapped LUN:" - " %u\n", dest_tpg->se_tpg_tfo->get_fabric_name(), + " %llu\n", dest_tpg->se_tpg_tfo->get_fabric_name(), dest_node_acl->initiatorname, i_buf, (dest_se_deve) ? dest_se_deve->mapped_lun : 0); @@ -1873,7 +1874,7 @@ static int core_scsi3_update_aptpl_buf( "sa_res_key=%llu\n" "res_holder=1\nres_type=%02x\n" "res_scope=%02x\nres_all_tg_pt=%d\n" - "mapped_lun=%u\n", reg_count, + "mapped_lun=%llu\n", reg_count, tpg->se_tpg_tfo->get_fabric_name(), pr_reg->pr_reg_nacl->initiatorname, isid_buf, pr_reg->pr_res_key, pr_reg->pr_res_type, @@ -1883,7 +1884,7 @@ static int core_scsi3_update_aptpl_buf( snprintf(tmp, 512, "PR_REG_START: %d\n" "initiator_fabric=%s\ninitiator_node=%s\n%s" "sa_res_key=%llu\nres_holder=0\n" - "res_all_tg_pt=%d\nmapped_lun=%u\n", + "res_all_tg_pt=%d\nmapped_lun=%llu\n", reg_count, tpg->se_tpg_tfo->get_fabric_name(), pr_reg->pr_reg_nacl->initiatorname, isid_buf, pr_reg->pr_res_key, pr_reg->pr_reg_all_tg_pt, @@ -1902,7 +1903,7 @@ static int core_scsi3_update_aptpl_buf( * Include information about the associated SCSI target port. */ snprintf(tmp, 512, "target_fabric=%s\ntarget_node=%s\n" - "tpgt=%hu\nport_rtpi=%hu\ntarget_lun=%u\nPR_REG_END:" + "tpgt=%hu\nport_rtpi=%hu\ntarget_lun=%llu\nPR_REG_END:" " %d\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_wwn(tpg), tpg->se_tpg_tfo->tpg_get_tag(tpg), @@ -2646,7 +2647,7 @@ core_scsi3_emulate_pro_clear(struct se_cmd *cmd, u64 res_key) struct se_session *se_sess = cmd->se_sess; struct t10_reservation *pr_tmpl = &dev->t10_pr; struct t10_pr_registration *pr_reg, *pr_reg_tmp, *pr_reg_n, *pr_res_holder; - u32 pr_res_mapped_lun = 0; + u64 pr_res_mapped_lun = 0; int calling_it_nexus = 0; /* * Locate the existing *pr_reg via struct se_node_acl pointers @@ -2802,7 +2803,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, LIST_HEAD(preempt_and_abort_list); struct t10_pr_registration *pr_reg, *pr_reg_tmp, *pr_reg_n, *pr_res_holder; struct t10_reservation *pr_tmpl = &dev->t10_pr; - u32 pr_res_mapped_lun = 0; + u64 pr_res_mapped_lun = 0; int all_reg = 0, calling_it_nexus = 0; bool sa_res_key_unmatched = sa_res_key != 0; int prh_type = 0, prh_scope = 0; @@ -3350,7 +3351,7 @@ after_iport_check: } pr_debug("SPC-3 PR REGISTER_AND_MOVE: Located %s node %s LUN" - " ACL for dest_se_deve->mapped_lun: %u\n", + " ACL for dest_se_deve->mapped_lun: %llu\n", dest_tf_ops->get_fabric_name(), dest_node_acl->initiatorname, dest_se_deve->mapped_lun); diff --git a/drivers/target/target_core_pr.h b/drivers/target/target_core_pr.h index 749fd7bb7510..e3d26e9126a0 100644 --- a/drivers/target/target_core_pr.h +++ b/drivers/target/target_core_pr.h @@ -56,11 +56,11 @@ extern sense_reason_t target_scsi2_reservation_release(struct se_cmd *); extern sense_reason_t target_scsi2_reservation_reserve(struct se_cmd *); extern int core_scsi3_alloc_aptpl_registration( struct t10_reservation *, u64, - unsigned char *, unsigned char *, u32, - unsigned char *, u16, u32, int, int, u8); + unsigned char *, unsigned char *, u64, + unsigned char *, u16, u64, int, int, u8); extern int core_scsi3_check_aptpl_registration(struct se_device *, struct se_portal_group *, struct se_lun *, - struct se_node_acl *, u32); + struct se_node_acl *, u64); extern void core_scsi3_free_pr_reg_from_nacl(struct se_device *, struct se_node_acl *); extern void core_scsi3_free_all_registrations(struct se_device *); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 2e77eebe1671..82ed8a0f3f0e 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -122,7 +122,7 @@ void core_tpg_add_node_to_devs( lun_access = TRANSPORT_LUNFLAGS_READ_WRITE; } - pr_debug("TARGET_CORE[%s]->TPG[%u]_LUN[%u] - Adding %s" + pr_debug("TARGET_CORE[%s]->TPG[%u]_LUN[%llu] - Adding %s" " access for LUN in Demo Mode\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, @@ -598,12 +598,12 @@ EXPORT_SYMBOL(core_tpg_deregister); struct se_lun *core_tpg_alloc_lun( struct se_portal_group *tpg, - u32 unpacked_lun) + u64 unpacked_lun) { struct se_lun *lun; if (unpacked_lun > (TRANSPORT_MAX_LUNS_PER_TPG-1)) { - pr_err("%s LUN: %u exceeds TRANSPORT_MAX_LUNS_PER_TPG" + pr_err("%s LUN: %llu exceeds TRANSPORT_MAX_LUNS_PER_TPG" "-1: %u for Target Portal Group: %u\n", tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, TRANSPORT_MAX_LUNS_PER_TPG-1, diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index d8a59122fe3b..eed9580b7f6c 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1366,7 +1366,7 @@ transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl, * assumes internal allocation of fabric payload buffer by target-core. */ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess, - unsigned char *cdb, unsigned char *sense, u32 unpacked_lun, + unsigned char *cdb, unsigned char *sense, u64 unpacked_lun, u32 data_length, int task_attr, int data_dir, int flags, struct scatterlist *sgl, u32 sgl_count, struct scatterlist *sgl_bidi, u32 sgl_bidi_count, @@ -1502,7 +1502,7 @@ EXPORT_SYMBOL(target_submit_cmd_map_sgls); * It also assumes interal target core SGL memory allocation. */ int target_submit_cmd(struct se_cmd *se_cmd, struct se_session *se_sess, - unsigned char *cdb, unsigned char *sense, u32 unpacked_lun, + unsigned char *cdb, unsigned char *sense, u64 unpacked_lun, u32 data_length, int task_attr, int data_dir, int flags) { return target_submit_cmd_map_sgls(se_cmd, se_sess, cdb, sense, @@ -1539,7 +1539,7 @@ static void target_complete_tmr_failure(struct work_struct *work) **/ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess, - unsigned char *sense, u32 unpacked_lun, + unsigned char *sense, u64 unpacked_lun, void *fabric_tmr_ptr, unsigned char tm_type, gfp_t gfp, unsigned int tag, int flags) { diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c index 6c9616d212b1..e53d4eec2ebc 100644 --- a/drivers/target/target_core_ua.c +++ b/drivers/target/target_core_ua.c @@ -88,7 +88,7 @@ target_scsi3_ua_check(struct se_cmd *cmd) int core_scsi3_ua_allocate( struct se_node_acl *nacl, - u32 unpacked_lun, + u64 unpacked_lun, u8 asc, u8 ascq) { @@ -177,7 +177,7 @@ int core_scsi3_ua_allocate( list_add_tail(&ua->ua_nacl_list, &deve->ua_list); spin_unlock(&deve->ua_lock); - pr_debug("[%s]: Allocated UNIT ATTENTION, mapped LUN: %u, ASC:" + pr_debug("[%s]: Allocated UNIT ATTENTION, mapped LUN: %llu, ASC:" " 0x%02x, ASCQ: 0x%02x\n", nacl->se_tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, asc, ascq); @@ -267,7 +267,7 @@ void core_scsi3_ua_for_check_condition( rcu_read_unlock(); pr_debug("[%s]: %s UNIT ATTENTION condition with" - " INTLCK_CTRL: %d, mapped LUN: %u, got CDB: 0x%02x" + " INTLCK_CTRL: %d, mapped LUN: %llu, got CDB: 0x%02x" " reported ASC: 0x%02x, ASCQ: 0x%02x\n", nacl->se_tpg->se_tpg_tfo->get_fabric_name(), (dev->dev_attrib.emulate_ua_intlck_ctrl != 0) ? "Reporting" : @@ -329,7 +329,7 @@ int core_scsi3_ua_clear_for_request_sense( rcu_read_unlock(); pr_debug("[%s]: Released UNIT ATTENTION condition, mapped" - " LUN: %u, got REQUEST_SENSE reported ASC: 0x%02x," + " LUN: %llu, got REQUEST_SENSE reported ASC: 0x%02x," " ASCQ: 0x%02x\n", nacl->se_tpg->se_tpg_tfo->get_fabric_name(), cmd->orig_fe_lun, *asc, *ascq); diff --git a/drivers/target/target_core_ua.h b/drivers/target/target_core_ua.h index a6b56b364e7a..6e592b10a8c0 100644 --- a/drivers/target/target_core_ua.h +++ b/drivers/target/target_core_ua.h @@ -28,7 +28,7 @@ extern struct kmem_cache *se_ua_cache; extern sense_reason_t target_scsi3_ua_check(struct se_cmd *); -extern int core_scsi3_ua_allocate(struct se_node_acl *, u32, u8, u8); +extern int core_scsi3_ua_allocate(struct se_node_acl *, u64, u8, u8); extern void core_scsi3_ua_release_all(struct se_dev_entry *); extern void core_scsi3_ua_for_check_condition(struct se_cmd *, u8 *, u8 *); extern int core_scsi3_ua_clear_for_request_sense(struct se_cmd *, diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index eefc2b0cfaa3..836ff8abdf18 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -355,8 +355,8 @@ struct t10_pr_registration { int pr_res_scope; /* Used for fabric initiator WWPNs using a ISID */ bool isid_present_at_reg; - u32 pr_res_mapped_lun; - u32 pr_aptpl_target_lun; + u64 pr_res_mapped_lun; + u64 pr_aptpl_target_lun; u16 tg_pt_sep_rtpi; u32 pr_res_generation; u64 pr_reg_bin_isid; @@ -476,7 +476,7 @@ struct se_cmd { /* Total size in bytes associated with command */ u32 data_length; u32 residual_count; - u32 orig_fe_lun; + u64 orig_fe_lun; /* Persistent Reservation key */ u64 pr_res_key; /* Used for sense data */ @@ -614,7 +614,7 @@ struct se_ml_stat_grps { struct se_lun_acl { char initiatorname[TRANSPORT_IQN_LEN]; - u32 mapped_lun; + u64 mapped_lun; struct se_node_acl *se_lun_nacl; struct se_lun *se_lun; struct config_group se_lun_group; @@ -623,10 +623,10 @@ struct se_lun_acl { struct se_dev_entry { /* See transport_lunflags_table */ - u32 lun_flags; - u32 mapped_lun; + u64 mapped_lun; u64 pr_res_key; u64 creation_time; + u32 lun_flags; u32 attach_count; atomic_long_t total_cmds; atomic_long_t read_bytes; @@ -696,14 +696,15 @@ struct scsi_port_stats { }; struct se_lun { - /* RELATIVE TARGET PORT IDENTIFER */ - u16 lun_rtpi; + u64 unpacked_lun; #define SE_LUN_LINK_MAGIC 0xffff7771 u32 lun_link_magic; u32 lun_access; u32 lun_flags; - u32 unpacked_lun; u32 lun_index; + + /* RELATIVE TARGET PORT IDENTIFER */ + u16 lun_rtpi; atomic_t lun_acl_count; struct se_device __rcu *lun_se_dev; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index d6216b761333..18afef91b447 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -116,16 +116,16 @@ void transport_deregister_session(struct se_session *); void transport_init_se_cmd(struct se_cmd *, const struct target_core_fabric_ops *, struct se_session *, u32, int, int, unsigned char *); -sense_reason_t transport_lookup_cmd_lun(struct se_cmd *, u32); +sense_reason_t transport_lookup_cmd_lun(struct se_cmd *, u64); sense_reason_t target_setup_cmd_from_cdb(struct se_cmd *, unsigned char *); int target_submit_cmd_map_sgls(struct se_cmd *, struct se_session *, - unsigned char *, unsigned char *, u32, u32, int, int, int, + unsigned char *, unsigned char *, u64, u32, int, int, int, struct scatterlist *, u32, struct scatterlist *, u32, struct scatterlist *, u32); int target_submit_cmd(struct se_cmd *, struct se_session *, unsigned char *, - unsigned char *, u32, u32, int, int, int); + unsigned char *, u64, u32, int, int, int); int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess, - unsigned char *sense, u32 unpacked_lun, + unsigned char *sense, u64 unpacked_lun, void *fabric_tmr_ptr, unsigned char tm_type, gfp_t, unsigned int, int); int transport_handle_cdb_direct(struct se_cmd *); @@ -151,7 +151,7 @@ void core_tmr_release_req(struct se_tmr_req *); int transport_generic_handle_tmr(struct se_cmd *); void transport_generic_request_failure(struct se_cmd *, sense_reason_t); void __target_execute_cmd(struct se_cmd *); -int transport_lookup_tmr_lun(struct se_cmd *, u32); +int transport_lookup_tmr_lun(struct se_cmd *, u64); struct se_node_acl *core_tpg_get_initiator_node_acl(struct se_portal_group *tpg, unsigned char *); From 196e2e2aa362850bf45bcb14b9517124b23b921e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 10 Jun 2015 08:41:23 +0200 Subject: [PATCH 162/839] target: Remove TARGET_MAX_LUNS_PER_TRANSPORT LUN allocation is now fully dynamic, so there is no need to artificially restrain the number of exported LUNs. Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 9 --------- drivers/target/target_core_fabric_configfs.c | 8 -------- drivers/target/target_core_tpg.c | 9 --------- drivers/xen/xen-scsiback.c | 7 ++++--- include/target/target_core_base.h | 3 --- 5 files changed, 4 insertions(+), 32 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 2ea7322c4dd4..ed084023e7d4 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -63,9 +63,6 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun) struct se_node_acl *nacl = se_sess->se_node_acl; struct se_dev_entry *deve; - if (unpacked_lun >= TRANSPORT_MAX_LUNS_PER_TPG) - return TCM_NON_EXISTENT_LUN; - rcu_read_lock(); deve = target_nacl_find_deve(nacl, unpacked_lun); if (deve) { @@ -156,9 +153,6 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u64 unpacked_lun) struct se_tmr_req *se_tmr = se_cmd->se_tmr_req; unsigned long flags; - if (unpacked_lun >= TRANSPORT_MAX_LUNS_PER_TPG) - return -ENODEV; - rcu_read_lock(); deve = target_nacl_find_deve(nacl, unpacked_lun); if (deve) { @@ -197,9 +191,6 @@ bool target_lun_is_rdonly(struct se_cmd *cmd) struct se_dev_entry *deve; bool ret; - if (cmd->se_lun->lun_access & TRANSPORT_LUNFLAGS_READ_ONLY) - return true; - rcu_read_lock(); deve = target_nacl_find_deve(se_sess->se_node_acl, cmd->orig_fe_lun); ret = (deve && deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY); diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 2fd493e416b0..6cfee595f3f2 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -353,14 +353,6 @@ static struct config_group *target_fabric_make_mappedlun( ret = kstrtoull(buf + 4, 0, &mapped_lun); if (ret) goto out; - if (mapped_lun > (TRANSPORT_MAX_LUNS_PER_TPG-1)) { - pr_err("Mapped LUN: %llu exceeds TRANSPORT_MAX_LUNS_PER_TPG" - "-1: %u for Target Portal Group: %u\n", mapped_lun, - TRANSPORT_MAX_LUNS_PER_TPG-1, - se_tpg->se_tpg_tfo->tpg_get_tag(se_tpg)); - ret = -EINVAL; - goto out; - } lacl = core_dev_init_initiator_node_lun_acl(se_tpg, se_nacl, mapped_lun, &ret); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 82ed8a0f3f0e..aa39bc89227b 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -602,15 +602,6 @@ struct se_lun *core_tpg_alloc_lun( { struct se_lun *lun; - if (unpacked_lun > (TRANSPORT_MAX_LUNS_PER_TPG-1)) { - pr_err("%s LUN: %llu exceeds TRANSPORT_MAX_LUNS_PER_TPG" - "-1: %u for Target Portal Group: %u\n", - tpg->se_tpg_tfo->get_fabric_name(), - unpacked_lun, TRANSPORT_MAX_LUNS_PER_TPG-1, - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - return ERR_PTR(-EOVERFLOW); - } - lun = kzalloc(sizeof(*lun), GFP_KERNEL); if (!lun) { pr_err("Unable to allocate se_lun memory\n"); diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 25144a0ff07c..10d67802a2fb 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -864,7 +864,7 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, struct list_head *head = &(info->v2p_entry_lists); unsigned long flags; char *lunp; - unsigned int unpacked_lun; + unsigned long long unpacked_lun; struct se_lun *se_lun; struct scsiback_tpg *tpg_entry, *tpg = NULL; char *error = "doesn't exist"; @@ -876,9 +876,10 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, } *lunp = 0; lunp++; - if (kstrtouint(lunp, 10, &unpacked_lun) || unpacked_lun >= TRANSPORT_MAX_LUNS_PER_TPG) { + err = kstrtoull(lunp, 10, &unpacked_lun); + if (err < 0) { pr_err("lun number not valid: %s\n", lunp); - return -EINVAL; + return err; } mutex_lock(&scsiback_mutex); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 836ff8abdf18..8cb612613c10 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -13,9 +13,6 @@ #define TARGET_CORE_MOD_VERSION "v4.1.0" #define TARGET_CORE_VERSION TARGET_CORE_MOD_VERSION -/* Maximum Number of LUNs per Target Portal Group */ -/* Don't raise above 511 or REPORT_LUNS needs to handle >1 page */ -#define TRANSPORT_MAX_LUNS_PER_TPG 256 /* * By default we use 32-byte CDBs in TCM Core and subsystem plugin code. * From 0fb1f14e88f6f814fc578d0b8b3de666d1c9327e Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 16 Jun 2015 00:37:16 -0700 Subject: [PATCH 163/839] xen-scsiback: Fix compile warning for 64-bit LUN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/xen/xen-scsiback.c: In function ‘scsiback_add_translation_entry’: drivers/xen/xen-scsiback.c:911:3: warning: format ‘%d’ expects argument of type ‘int’, but argument 3 has type ‘long long unsigned int’ [-Wformat] Signed-off-by: Nicholas Bellinger --- drivers/xen/xen-scsiback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 10d67802a2fb..823bdf4cc36e 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -908,7 +908,7 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, mutex_unlock(&scsiback_mutex); if (!tpg) { - pr_err("%s:%d %s\n", phy, unpacked_lun, error); + pr_err("%s:%llu %s\n", phy, unpacked_lun, error); return -ENODEV; } From ee8c07d0c32622b6fa44108b0141434575e43188 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 11 Jun 2015 10:01:24 +0200 Subject: [PATCH 164/839] target_core_alua: Correct UA handling when switching states When switching target port group ALUA states we need to send one UA when setting the ALUA state to 'transitioning', and another one once the final state has been set. Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 2318e6e5ce52..228a3c7925e5 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -941,16 +941,11 @@ static int core_alua_update_tpg_primary_metadata( return rc; } -static void core_alua_do_transition_tg_pt_work(struct work_struct *work) +static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp) { - struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work, - struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work); - struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; struct se_dev_entry *se_deve; struct se_lun *lun; struct se_lun_acl *lacl; - bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status == - ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG); spin_lock(&tg_pt_gp->tg_pt_gp_lock); list_for_each_entry(lun, &tg_pt_gp->tg_pt_gp_lun_list, @@ -1002,6 +997,16 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work) percpu_ref_put(&lun->lun_ref); } spin_unlock(&tg_pt_gp->tg_pt_gp_lock); +} + +static void core_alua_do_transition_tg_pt_work(struct work_struct *work) +{ + struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work, + struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work); + struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; + bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status == + ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG); + /* * Update the ALUA metadata buf that has been allocated in * core_alua_do_port_transition(), this metadata will be written @@ -1031,6 +1036,9 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work) tg_pt_gp->tg_pt_gp_id, core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_previous_state), core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state)); + + core_alua_queue_state_change_ua(tg_pt_gp); + spin_lock(&dev->t10_alua.tg_pt_gps_lock); atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt); spin_unlock(&dev->t10_alua.tg_pt_gps_lock); @@ -1083,6 +1091,8 @@ static int core_alua_do_transition_tg_pt( ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG : ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA; + core_alua_queue_state_change_ua(tg_pt_gp); + /* * Check for the optional ALUA primary state transition delay */ From 316058134ffa0017199b460318e109aa79432cc6 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 11 Jun 2015 10:01:25 +0200 Subject: [PATCH 165/839] target: Remove 'ua_nacl' pointer from se_ua structure Unused. Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_ua.c | 1 - include/target/target_core_base.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c index e53d4eec2ebc..e5062245171e 100644 --- a/drivers/target/target_core_ua.c +++ b/drivers/target/target_core_ua.c @@ -107,7 +107,6 @@ int core_scsi3_ua_allocate( } INIT_LIST_HEAD(&ua->ua_nacl_list); - ua->ua_nacl = nacl; ua->ua_asc = asc; ua->ua_ascq = ascq; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 8cb612613c10..8a4f861f12b7 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -548,7 +548,6 @@ struct se_cmd { struct se_ua { u8 ua_asc; u8 ua_ascq; - struct se_node_acl *ua_nacl; struct list_head ua_nacl_list; }; From 298073181112a6ab6c30fe7971b99de968daf81e Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Mon, 15 Jun 2015 15:55:30 +1000 Subject: [PATCH 166/839] SUNRPC: never enqueue a ->rq_cong request on ->sending If the sending queue has a task without ->rq_cong set at the front, and then a number of tasks with ->rq_cong set such that they use the entire congestion window, then the queue deadlocks. The first entry cannot be processed until later entries complete. This scenario has been seen with a client using UDP to access a server, and the network connection breaking for a period of time - it doesn't recover. It never really makes sense for an ->rq_cong request to be on the ->sending queue, but it can happen when a request is being retried, and finds the transport if locked (XPRT_LOCKED). In this case we simple call __xprt_put_cong() and the deadlock goes away. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e1fb538e10e0..3ca31f20b97c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -68,6 +68,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net); static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); +static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *); static void xprt_destroy(struct rpc_xprt *xprt); static DEFINE_SPINLOCK(xprt_list_lock); @@ -250,6 +251,8 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) } xprt_clear_locked(xprt); out_sleep: + if (req) + __xprt_put_cong(xprt, req); dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; From 455b6ee6452751dc471b5a434f9398fb825d5149 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Fri, 12 Jun 2015 18:58:50 +0200 Subject: [PATCH 167/839] pnfs/flexfiles: use swap() in ff_layout_sort_mirrors() Use kernel.h macro definition. Thanks to Julia Lawall for Coccinelle scripting support. Signed-off-by: Fabian Frederick Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 7d05089e52d6..0f1410c94827 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -182,17 +182,14 @@ static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls) static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) { - struct nfs4_ff_layout_mirror *tmp; int i, j; for (i = 0; i < fls->mirror_array_cnt - 1; i++) { for (j = i + 1; j < fls->mirror_array_cnt; j++) if (fls->mirror_array[i]->efficiency < - fls->mirror_array[j]->efficiency) { - tmp = fls->mirror_array[i]; - fls->mirror_array[i] = fls->mirror_array[j]; - fls->mirror_array[j] = tmp; - } + fls->mirror_array[j]->efficiency) + swap(fls->mirror_array[i], + fls->mirror_array[j]); } } From 764ad8ba8cd4c6f836fca9378f8c5121aece0842 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:43:56 -0400 Subject: [PATCH 168/839] nfs: increase size of EXCHANGE_ID name string buffer The current buffer is much too small if you have a relatively long hostname. Bring it up to the size of the one that SETCLIENTID has. Cc: Reported-by: Michael Skralivetsky Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 93ab6071bbe9..e9e9a8dcfb47 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1142,7 +1142,7 @@ struct nfs41_state_protection { struct nfs4_op_map allow; }; -#define NFS4_EXCHANGE_ID_LEN (48) +#define NFS4_EXCHANGE_ID_LEN (127) struct nfs41_exchange_id_args { struct nfs_client *client; nfs4_verifier *verifier; From 3a6bb738792500e8b4534c0350c13a132bac0492 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:43:57 -0400 Subject: [PATCH 169/839] nfs: convert setclientid and exchange_id encoders to use clp->cl_owner_id ...instead of buffers that are part of their arg structs. We already hold a reference to the client, so we might as well use the allocated buffer. In the event that we can't allocate the clp->cl_owner_id, then just return -ENOMEM. Note too that we switch from a GFP_KERNEL allocation here to GFP_NOFS. It's possible we could end up trying to do a SETCLIENTID or EXCHANGE_ID in order to reclaim some memory, and the GFP_KERNEL allocations in the existing code could cause recursion back into NFS reclaim. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 21 ++++++++++++++++----- fs/nfs/nfs4xdr.c | 8 +++++--- include/linux/nfs_xdr.h | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d689ea37be84..b2afe9556147 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5046,7 +5046,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, struct nfs4_setclientid setclientid = { .sc_verifier = &sc_verifier, .sc_prog = program, - .sc_cb_ident = clp->cl_cb_ident, + .sc_clnt = clp, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], @@ -5076,6 +5076,12 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, nfs4_init_nonuniform_client_string(clp, setclientid.sc_name, sizeof(setclientid.sc_name)); + + if (!clp->cl_owner_id) { + status = -ENOMEM; + goto out; + } + /* cb_client4 */ setclientid.sc_netid_len = nfs4_init_callback_netid(clp, @@ -5085,9 +5091,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); - dprintk("NFS call setclientid auth=%s, '%.*s'\n", + dprintk("NFS call setclientid auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, - setclientid.sc_name_len, setclientid.sc_name); + clp->cl_owner_id); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) { status = PTR_ERR(task); @@ -6850,9 +6856,14 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, nfs4_init_boot_verifier(clp, &verifier); args.id_len = nfs4_init_uniform_client_string(clp, args.id, sizeof(args.id)); - dprintk("NFS call exchange_id auth=%s, '%.*s'\n", + if (!clp->cl_owner_id) { + status = -ENOMEM; + goto out; + } + + dprintk("NFS call exchange_id auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, - args.id_len, args.id); + clp->cl_owner_id); res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), GFP_NOFS); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0aea97841d30..f58e8a979397 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1667,13 +1667,14 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr); encode_nfs4_verifier(xdr, setclientid->sc_verifier); - encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); + encode_string(xdr, strlen(setclientid->sc_clnt->cl_owner_id), + setclientid->sc_clnt->cl_owner_id); p = reserve_space(xdr, 4); *p = cpu_to_be32(setclientid->sc_prog); encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); p = reserve_space(xdr, 4); - *p = cpu_to_be32(setclientid->sc_cb_ident); + *p = cpu_to_be32(setclientid->sc_clnt->cl_cb_ident); } static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) @@ -1747,7 +1748,8 @@ static void encode_exchange_id(struct xdr_stream *xdr, encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); encode_nfs4_verifier(xdr, args->verifier); - encode_string(xdr, args->id_len, args->id); + encode_string(xdr, strlen(args->client->cl_owner_id), + args->client->cl_owner_id); encode_uint32(xdr, args->flags); encode_uint32(xdr, args->state_protect.how); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e9e9a8dcfb47..6c0b423d781b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -994,7 +994,7 @@ struct nfs4_setclientid { char sc_netid[RPCBIND_MAXNETIDLEN + 1]; unsigned int sc_uaddr_len; char sc_uaddr[RPCBIND_MAXUADDRLEN + 1]; - u32 sc_cb_ident; + struct nfs_client *sc_clnt; struct rpc_cred *sc_cred; }; From b8fb2f595e8009ad245d564b7a443d108a2334cd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:43:58 -0400 Subject: [PATCH 170/839] nfs: update maxsz values for SETCLIENTID and EXCHANGE_ID The spec allows for up to NFS4_OPAQUE_LIMIT (1k). While we'll almost certainly never use that much, these ops are generally the only ones in the compound so we might as well allow for them to be that large. Also, the existing code didn't add in a word for the opaque length field for either name string. Fix that while we're in there. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f58e8a979397..a07555d6968c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -139,7 +139,8 @@ static int nfs4_stat_to_errno(int); #define encode_setclientid_maxsz \ (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ - XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \ + /* client name */ \ + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ 1 /* sc_prog */ + \ 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ @@ -288,7 +289,8 @@ static int nfs4_stat_to_errno(int); #define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \ encode_verifier_maxsz + \ 1 /* co_ownerid.len */ + \ - XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \ + /* eia_clientowner */ \ + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ 1 /* flags */ + \ 1 /* spa_how */ + \ /* max is SP4_MACH_CRED (for now) */ + \ From a319268891089386bd303bb90ace89d75e47102f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:43:59 -0400 Subject: [PATCH 171/839] nfs: make nfs4_init_nonuniform_client_string use a dynamically allocated buffer The way the *_client_string functions work is a little goofy. They build the string in an on-stack buffer and then use kstrdup to copy it. This is not only stack-heavy but artificially limits the size of the client name string. Change it so that we determine the length of the string, allocate it and then scnprintf into it. Since the contents of the nonuniform string depend on rcu-managed data structures, it's possible that they'll change between when we allocate the string and when we go to fill it. If that happens, free the string, recalculate the length and try again. If it the mismatch isn't resolved on the second try then just give up and return -EINVAL. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 74 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 23 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b2afe9556147..c20322636a68 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4957,25 +4957,54 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, memcpy(bootverf->data, verf, sizeof(bootverf->data)); } -static unsigned int -nfs4_init_nonuniform_client_string(struct nfs_client *clp, - char *buf, size_t len) +static int +nfs4_init_nonuniform_client_string(struct nfs_client *clp) { - unsigned int result; + int result; + size_t len; + char *str; + bool retried = false; if (clp->cl_owner_id != NULL) - return strlcpy(buf, clp->cl_owner_id, len); + return 0; +retry: + rcu_read_lock(); + len = 10 + strlen(clp->cl_ipaddr) + 1 + + strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) + + 1 + + strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)) + + 1; + rcu_read_unlock(); + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; rcu_read_lock(); - result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s", - clp->cl_ipaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR), - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_PROTO)); + result = scnprintf(str, len, "Linux NFSv4.0 %s/%s %s", + clp->cl_ipaddr, + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)); rcu_read_unlock(); - clp->cl_owner_id = kstrdup(buf, GFP_KERNEL); - return result; + + /* Did something change? */ + if (result >= len) { + kfree(str); + if (retried) + return -EINVAL; + retried = true; + goto retry; + } + clp->cl_owner_id = str; + return 0; } static unsigned int @@ -5066,20 +5095,19 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, /* nfs_client_id4 */ nfs4_init_boot_verifier(clp, &sc_verifier); - if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) + if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) { setclientid.sc_name_len = nfs4_init_uniform_client_string(clp, setclientid.sc_name, sizeof(setclientid.sc_name)); - else - setclientid.sc_name_len = - nfs4_init_nonuniform_client_string(clp, - setclientid.sc_name, - sizeof(setclientid.sc_name)); - - if (!clp->cl_owner_id) { - status = -ENOMEM; - goto out; + if (!clp->cl_owner_id) { + status = -ENOMEM; + goto out; + } + } else { + status = nfs4_init_nonuniform_client_string(clp); + if (status) + goto out; } /* cb_client4 */ From 873e385116b2cc5c7daca8f51881371fadb90970 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:44:00 -0400 Subject: [PATCH 172/839] nfs: make nfs4_init_uniform_client_string use a dynamically allocated buffer Change the uniform client string generator to dynamically allocate the NFSv4 client name string buffer. With this patch, we can eliminate the buffers that are embedded within the "args" structs and simply use the name string that is hanging off the client. This uniform string case is a little simpler than the nonuniform since we don't need to deal with RCU, but we do have two different cases, depending on whether there is a uniquifier or not. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 112 +++++++++++++++++++++++++++------------- include/linux/nfs_xdr.h | 6 --- 2 files changed, 77 insertions(+), 41 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c20322636a68..d181090124db 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5007,28 +5007,78 @@ retry: return 0; } -static unsigned int -nfs4_init_uniform_client_string(struct nfs_client *clp, - char *buf, size_t len) +static int +nfs4_init_uniquifier_client_string(struct nfs_client *clp) { - const char *nodename = clp->cl_rpcclient->cl_nodename; - unsigned int result; + int result; + size_t len; + char *str; + + len = 10 + 10 + 1 + 10 + 1 + + strlen(nfs4_client_id_uniquifier) + 1 + + strlen(clp->cl_rpcclient->cl_nodename) + 1; + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; + + result = scnprintf(str, len, "Linux NFSv%u.%u %s/%s", + clp->rpc_ops->version, clp->cl_minorversion, + nfs4_client_id_uniquifier, + clp->cl_rpcclient->cl_nodename); + if (result >= len) { + kfree(str); + return -EINVAL; + } + clp->cl_owner_id = str; + return 0; +} + +static int +nfs4_init_uniform_client_string(struct nfs_client *clp) +{ + int result; + size_t len; + char *str; if (clp->cl_owner_id != NULL) - return strlcpy(buf, clp->cl_owner_id, len); + return 0; if (nfs4_client_id_uniquifier[0] != '\0') - result = scnprintf(buf, len, "Linux NFSv%u.%u %s/%s", - clp->rpc_ops->version, - clp->cl_minorversion, - nfs4_client_id_uniquifier, - nodename); - else - result = scnprintf(buf, len, "Linux NFSv%u.%u %s", - clp->rpc_ops->version, clp->cl_minorversion, - nodename); - clp->cl_owner_id = kstrdup(buf, GFP_KERNEL); - return result; + return nfs4_init_uniquifier_client_string(clp); + + len = 10 + 10 + 1 + 10 + 1 + + strlen(clp->cl_rpcclient->cl_nodename) + 1; + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; + + result = scnprintf(str, len, "Linux NFSv%u.%u %s", + clp->rpc_ops->version, clp->cl_minorversion, + clp->cl_rpcclient->cl_nodename); + if (result >= len) { + kfree(str); + return -EINVAL; + } + clp->cl_owner_id = str; + return 0; } /* @@ -5095,20 +5145,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, /* nfs_client_id4 */ nfs4_init_boot_verifier(clp, &sc_verifier); - if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) { - setclientid.sc_name_len = - nfs4_init_uniform_client_string(clp, - setclientid.sc_name, - sizeof(setclientid.sc_name)); - if (!clp->cl_owner_id) { - status = -ENOMEM; - goto out; - } - } else { + + if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) + status = nfs4_init_uniform_client_string(clp); + else status = nfs4_init_nonuniform_client_string(clp); - if (status) - goto out; - } + + if (status) + goto out; /* cb_client4 */ setclientid.sc_netid_len = @@ -6882,12 +6926,10 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, }; nfs4_init_boot_verifier(clp, &verifier); - args.id_len = nfs4_init_uniform_client_string(clp, args.id, - sizeof(args.id)); - if (!clp->cl_owner_id) { - status = -ENOMEM; + + status = nfs4_init_uniform_client_string(clp); + if (status) goto out; - } dprintk("NFS call exchange_id auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6c0b423d781b..c959e7eb5bd4 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -984,11 +984,8 @@ struct nfs4_readlink_res { struct nfs4_sequence_res seq_res; }; -#define NFS4_SETCLIENTID_NAMELEN (127) struct nfs4_setclientid { const nfs4_verifier * sc_verifier; - unsigned int sc_name_len; - char sc_name[NFS4_SETCLIENTID_NAMELEN + 1]; u32 sc_prog; unsigned int sc_netid_len; char sc_netid[RPCBIND_MAXNETIDLEN + 1]; @@ -1142,12 +1139,9 @@ struct nfs41_state_protection { struct nfs4_op_map allow; }; -#define NFS4_EXCHANGE_ID_LEN (127) struct nfs41_exchange_id_args { struct nfs_client *client; nfs4_verifier *verifier; - unsigned int id_len; - char id[NFS4_EXCHANGE_ID_LEN]; u32 flags; struct nfs41_state_protection state_protect; }; From df05a49f7204d81ba0e29fda3d56ccb730f05d87 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 13 Jun 2015 10:07:00 +0800 Subject: [PATCH 173/839] nfs: Fix showing truncated fsid/dev in, /proc/net/nfsfs/volumes A truncated fsid showing from /proc/fs/nfsfs/volumes as, NV SERVER PORT DEV FSID FSC v4 c0a80881 801 0:43 34931f044c2a439b no It should be as, NV SERVER PORT DEV FSID FSC v4 c0a80881 801 0:43 34931f044c2a439b:954c5d830fa4be8c no The max buffer length for storing "%llx:%llx" format should be 16 + 1 + 16 + 1 = 34 (16 for %llx, 1 for ':', 1 for '\0'). Also, for storing "%u:%u" of MAJOR() and MINOR() should be 8 + 1 + 3 + 1 = 13 (8 for 2^24, 1 for ':', 3 for 2^8, 1 for '\0'). v2, add comments for dev/fsid buffer and use sizeof in snprintf. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 892aefff3630..1e37491904ee 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1364,27 +1364,29 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) { struct nfs_server *server; struct nfs_client *clp; - char dev[8], fsid[17]; + char dev[13]; // 8 for 2^24, 1 for ':', 3 for 2^8, 1 for '\0' + char fsid[34]; // 2 * 16 for %llx, 1 for ':', 1 for '\0' struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* display header on line 1 */ if (v == &nn->nfs_volume_list) { - seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); + seq_puts(m, "NV SERVER PORT DEV FSID" + " FSC\n"); return 0; } /* display one transport per line on subsequent lines */ server = list_entry(v, struct nfs_server, master_link); clp = server->nfs_client; - snprintf(dev, 8, "%u:%u", + snprintf(dev, sizeof(dev), "%u:%u", MAJOR(server->s_dev), MINOR(server->s_dev)); - snprintf(fsid, 17, "%llx:%llx", + snprintf(fsid, sizeof(fsid), "%llx:%llx", (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); rcu_read_lock(); - seq_printf(m, "v%u %s %s %-7s %-17s %s\n", + seq_printf(m, "v%u %s %s %-12s %-33s %s\n", clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), From ae2ffef383fad723690a4c5bad70f7035fd7d71a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 12 Jun 2015 16:53:30 -0400 Subject: [PATCH 174/839] Recover from stateid-type error on SETATTR Client can receives stateid-type error (eg., BAD_STATEID) on SETATTR when delegation stateid was used. When no open state exists, in case of application calling truncate() on the file, client has no state to recover and fails with EIO. Instead, upon such error, return the bad delegation and then resend the SETATTR with a zero stateid. Signed-off: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d181090124db..6bcdecd61125 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -370,8 +370,14 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (state == NULL) + if (state == NULL) { + if (inode && nfs4_have_delegation(inode, + FMODE_READ)) { + nfs4_inode_return_delegation(inode); + exception->retry = 1; + } break; + } ret = nfs4_schedule_stateid_recovery(server, state); if (ret < 0) break; From 5ba12443a132420be45d089e4c4ac8a1e26b7da8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Jun 2015 11:26:35 -0400 Subject: [PATCH 175/839] NFSv4: Fix stateid recovery on revoked delegations Ensure that we fix the non-NULL stateid case as well. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6bcdecd61125..0fc1b0cdda98 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -356,6 +356,9 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ case 0: return 0; case -NFS4ERR_OPENMODE: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: if (inode && nfs4_have_delegation(inode, FMODE_READ)) { nfs4_inode_return_delegation(inode); exception->retry = 1; @@ -367,21 +370,6 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ if (ret < 0) break; goto wait_on_recovery; - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - if (state == NULL) { - if (inode && nfs4_have_delegation(inode, - FMODE_READ)) { - nfs4_inode_return_delegation(inode); - exception->retry = 1; - } - break; - } - ret = nfs4_schedule_stateid_recovery(server, state); - if (ret < 0) - break; - goto wait_on_recovery; case -NFS4ERR_EXPIRED: if (state != NULL) { ret = nfs4_schedule_stateid_recovery(server, state); From fec47d863587c272d6fbf4e50066209c953d5e60 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 22 May 2015 15:45:27 -0500 Subject: [PATCH 176/839] remoteproc: introduce rproc_get_by_phandle API Allow users of remoteproc the ability to get a handle to an rproc by passing a phandle supplied in the user's device tree node. This is useful in situations that require manual booting of the rproc. This patch uses the code removed by commit 40e575b1d0b3 ("remoteproc: remove the get_by_name/put API") for the ref counting but is modified to use a simple list and locking mechanism and has rproc_get_by_name replaced with an rproc_get_by_phandle API. Signed-off-by: Dave Gerlach Signed-off-by: Suman Anna [fix order of Signed-off-by tags] Signed-off-by: Ohad Ben-Cohen --- Documentation/remoteproc.txt | 6 ++++ drivers/remoteproc/remoteproc_core.c | 50 ++++++++++++++++++++++++++++ include/linux/remoteproc.h | 7 ++-- 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/Documentation/remoteproc.txt b/Documentation/remoteproc.txt index e6469fdcf89a..ef0219fa4bb4 100644 --- a/Documentation/remoteproc.txt +++ b/Documentation/remoteproc.txt @@ -51,6 +51,12 @@ cost. rproc_shutdown() returns, and users can still use it with a subsequent rproc_boot(), if needed. + struct rproc *rproc_get_by_phandle(phandle phandle) + - Find an rproc handle using a device tree phandle. Returns the rproc + handle on success, and NULL on failure. This function increments + the remote processor's refcount, so always use rproc_put() to + decrement it back once rproc isn't needed anymore. + 3. Typical usage #include diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index e991512e04ed..e1a6d693b8bb 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -44,6 +44,9 @@ #include "remoteproc_internal.h" +static DEFINE_MUTEX(rproc_list_mutex); +static LIST_HEAD(rproc_list); + typedef int (*rproc_handle_resources_t)(struct rproc *rproc, struct resource_table *table, int len); typedef int (*rproc_handle_resource_t)(struct rproc *rproc, @@ -1144,6 +1147,43 @@ out: } EXPORT_SYMBOL(rproc_shutdown); +/** + * rproc_get_by_phandle() - find a remote processor by phandle + * @phandle: phandle to the rproc + * + * Finds an rproc handle using the remote processor's phandle, and then + * return a handle to the rproc. + * + * This function increments the remote processor's refcount, so always + * use rproc_put() to decrement it back once rproc isn't needed anymore. + * + * Returns the rproc handle on success, and NULL on failure. + */ +struct rproc *rproc_get_by_phandle(phandle phandle) +{ + struct rproc *rproc = NULL, *r; + struct device_node *np; + + np = of_find_node_by_phandle(phandle); + if (!np) + return NULL; + + mutex_lock(&rproc_list_mutex); + list_for_each_entry(r, &rproc_list, node) { + if (r->dev.parent && r->dev.parent->of_node == np) { + rproc = r; + get_device(&rproc->dev); + break; + } + } + mutex_unlock(&rproc_list_mutex); + + of_node_put(np); + + return rproc; +} +EXPORT_SYMBOL(rproc_get_by_phandle); + /** * rproc_add() - register a remote processor * @rproc: the remote processor handle to register @@ -1173,6 +1213,11 @@ int rproc_add(struct rproc *rproc) if (ret < 0) return ret; + /* expose to rproc_get_by_phandle users */ + mutex_lock(&rproc_list_mutex); + list_add(&rproc->node, &rproc_list); + mutex_unlock(&rproc_list_mutex); + dev_info(dev, "%s is available\n", rproc->name); dev_info(dev, "Note: remoteproc is still under development and considered experimental.\n"); @@ -1360,6 +1405,11 @@ int rproc_del(struct rproc *rproc) /* Free the copy of the resource table */ kfree(rproc->cached_table); + /* the rproc is downref'ed as soon as it's removed from the klist */ + mutex_lock(&rproc_list_mutex); + list_del(&rproc->node); + mutex_unlock(&rproc_list_mutex); + device_del(&rproc->dev); return 0; diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 78b8a9b9d40a..56739e5df1e6 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -36,11 +36,11 @@ #define REMOTEPROC_H #include -#include #include #include #include #include +#include /** * struct resource_table - firmware resource table header @@ -375,7 +375,7 @@ enum rproc_crash_type { /** * struct rproc - represents a physical remote processor device - * @node: klist node of this rproc object + * @node: list node of this rproc object * @domain: iommu domain * @name: human readable name of the rproc * @firmware: name of firmware file to be loaded @@ -407,7 +407,7 @@ enum rproc_crash_type { * @has_iommu: flag to indicate if remote processor is behind an MMU */ struct rproc { - struct klist_node node; + struct list_head node; struct iommu_domain *domain; const char *name; const char *firmware; @@ -481,6 +481,7 @@ struct rproc_vdev { u32 rsc_offset; }; +struct rproc *rproc_get_by_phandle(phandle phandle); struct rproc *rproc_alloc(struct device *dev, const char *name, const struct rproc_ops *ops, const char *firmware, int len); From c51c8e7bcac966f209da83630fc8ca7e6cad279b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 11 Jun 2015 10:01:26 +0200 Subject: [PATCH 177/839] target: use 'se_dev_entry' when allocating UAs We need to use 'se_dev_entry' as argument when allocating UAs, otherwise we'll never see any UAs for an implicit ALUA state transition triggered from userspace. (Add target_ua_allocate_lun() common caller - nab) Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 29 ++++++++++++------ drivers/target/target_core_pr.c | 12 ++++---- drivers/target/target_core_transport.c | 12 ++++---- drivers/target/target_core_ua.c | 42 ++++++++++++++------------ drivers/target/target_core_ua.h | 3 +- 5 files changed, 55 insertions(+), 43 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 228a3c7925e5..aa2e4b103d43 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -972,23 +972,32 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp) list_for_each_entry(se_deve, &lun->lun_deve_list, lun_link) { lacl = rcu_dereference_check(se_deve->se_lun_acl, lockdep_is_held(&lun->lun_deve_lock)); - /* - * se_deve->se_lun_acl pointer may be NULL for a - * entry created without explicit Node+MappedLUN ACLs - */ - if (!lacl) - continue; + /* + * spc4r37 p.242: + * After an explicit target port asymmetric access + * state change, a device server shall establish a + * unit attention condition with the additional sense + * code set to ASYMMETRIC ACCESS STATE CHANGED for + * the initiator port associated with every I_T nexus + * other than the I_T nexus on which the SET TARGET + * PORT GROUPS command was received. + */ if ((tg_pt_gp->tg_pt_gp_alua_access_status == ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG) && - (tg_pt_gp->tg_pt_gp_alua_nacl != NULL) && - (tg_pt_gp->tg_pt_gp_alua_nacl == lacl->se_lun_nacl) && (tg_pt_gp->tg_pt_gp_alua_lun != NULL) && (tg_pt_gp->tg_pt_gp_alua_lun == lun)) continue; - core_scsi3_ua_allocate(lacl->se_lun_nacl, - se_deve->mapped_lun, 0x2A, + /* + * se_deve->se_lun_acl pointer may be NULL for a + * entry created without explicit Node+MappedLUN ACLs + */ + if (lacl && (tg_pt_gp->tg_pt_gp_alua_nacl != NULL) && + (tg_pt_gp->tg_pt_gp_alua_nacl == lacl->se_lun_nacl)) + continue; + + core_scsi3_ua_allocate(se_deve, 0x2A, ASCQ_2AH_ASYMMETRIC_ACCESS_STATE_CHANGED); } spin_unlock_bh(&lun->lun_deve_lock); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 436e30b14a11..0bb329243dba 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -2197,7 +2197,7 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, &pr_tmpl->registration_list, pr_reg_list) { - core_scsi3_ua_allocate( + target_ua_allocate_lun( pr_reg_p->pr_reg_nacl, pr_reg_p->pr_res_mapped_lun, 0x2A, @@ -2624,7 +2624,7 @@ core_scsi3_emulate_pro_release(struct se_cmd *cmd, int type, int scope, if (pr_reg_p == pr_reg) continue; - core_scsi3_ua_allocate(pr_reg_p->pr_reg_nacl, + target_ua_allocate_lun(pr_reg_p->pr_reg_nacl, pr_reg_p->pr_res_mapped_lun, 0x2A, ASCQ_2AH_RESERVATIONS_RELEASED); } @@ -2709,7 +2709,7 @@ core_scsi3_emulate_pro_clear(struct se_cmd *cmd, u64 res_key) * additional sense code set to RESERVATIONS PREEMPTED. */ if (!calling_it_nexus) - core_scsi3_ua_allocate(pr_reg_nacl, pr_res_mapped_lun, + target_ua_allocate_lun(pr_reg_nacl, pr_res_mapped_lun, 0x2A, ASCQ_2AH_RESERVATIONS_PREEMPTED); } spin_unlock(&pr_tmpl->registration_lock); @@ -2918,7 +2918,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, NULL, 0); } if (!calling_it_nexus) - core_scsi3_ua_allocate(pr_reg_nacl, + target_ua_allocate_lun(pr_reg_nacl, pr_res_mapped_lun, 0x2A, ASCQ_2AH_REGISTRATIONS_PREEMPTED); } @@ -3024,7 +3024,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, * persistent reservation and/or registration, with the * additional sense code set to REGISTRATIONS PREEMPTED; */ - core_scsi3_ua_allocate(pr_reg_nacl, pr_res_mapped_lun, 0x2A, + target_ua_allocate_lun(pr_reg_nacl, pr_res_mapped_lun, 0x2A, ASCQ_2AH_REGISTRATIONS_PREEMPTED); } spin_unlock(&pr_tmpl->registration_lock); @@ -3057,7 +3057,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, if (calling_it_nexus) continue; - core_scsi3_ua_allocate(pr_reg->pr_reg_nacl, + target_ua_allocate_lun(pr_reg->pr_reg_nacl, pr_reg->pr_res_mapped_lun, 0x2A, ASCQ_2AH_RESERVATIONS_RELEASED); } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index eed9580b7f6c..0364534f8d46 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1677,13 +1677,13 @@ void transport_generic_request_failure(struct se_cmd *cmd, * See spc4r17, section 7.4.6 Control Mode Page, Table 349 */ if (cmd->se_sess && - cmd->se_dev->dev_attrib.emulate_ua_intlck_ctrl == 2) - core_scsi3_ua_allocate(cmd->se_sess->se_node_acl, - cmd->orig_fe_lun, 0x2C, - ASCQ_2CH_PREVIOUS_RESERVATION_CONFLICT_STATUS); - + cmd->se_dev->dev_attrib.emulate_ua_intlck_ctrl == 2) { + target_ua_allocate_lun(cmd->se_sess->se_node_acl, + cmd->orig_fe_lun, 0x2C, + ASCQ_2CH_PREVIOUS_RESERVATION_CONFLICT_STATUS); + } trace_target_cmd_complete(cmd); - ret = cmd->se_tfo-> queue_status(cmd); + ret = cmd->se_tfo->queue_status(cmd); if (ret == -EAGAIN || ret == -ENOMEM) goto queue_full; goto check_stop; diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c index e5062245171e..fc095aed5a88 100644 --- a/drivers/target/target_core_ua.c +++ b/drivers/target/target_core_ua.c @@ -87,18 +87,11 @@ target_scsi3_ua_check(struct se_cmd *cmd) } int core_scsi3_ua_allocate( - struct se_node_acl *nacl, - u64 unpacked_lun, + struct se_dev_entry *deve, u8 asc, u8 ascq) { - struct se_dev_entry *deve; struct se_ua *ua, *ua_p, *ua_tmp; - /* - * PASSTHROUGH OPS - */ - if (!nacl) - return -EINVAL; ua = kmem_cache_zalloc(se_ua_cache, GFP_ATOMIC); if (!ua) { @@ -110,12 +103,6 @@ int core_scsi3_ua_allocate( ua->ua_asc = asc; ua->ua_ascq = ascq; - rcu_read_lock(); - deve = target_nacl_find_deve(nacl, unpacked_lun); - if (!deve) { - rcu_read_unlock(); - return -EINVAL; - } spin_lock(&deve->ua_lock); list_for_each_entry_safe(ua_p, ua_tmp, &deve->ua_list, ua_nacl_list) { /* @@ -123,7 +110,6 @@ int core_scsi3_ua_allocate( */ if ((ua_p->ua_asc == asc) && (ua_p->ua_ascq == ascq)) { spin_unlock(&deve->ua_lock); - rcu_read_unlock(); kmem_cache_free(se_ua_cache, ua); return 0; } @@ -170,22 +156,38 @@ int core_scsi3_ua_allocate( spin_unlock(&deve->ua_lock); atomic_inc_mb(&deve->ua_count); - rcu_read_unlock(); return 0; } list_add_tail(&ua->ua_nacl_list, &deve->ua_list); spin_unlock(&deve->ua_lock); - pr_debug("[%s]: Allocated UNIT ATTENTION, mapped LUN: %llu, ASC:" - " 0x%02x, ASCQ: 0x%02x\n", - nacl->se_tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, + pr_debug("Allocated UNIT ATTENTION, mapped LUN: %llu, ASC:" + " 0x%02x, ASCQ: 0x%02x\n", deve->mapped_lun, asc, ascq); atomic_inc_mb(&deve->ua_count); - rcu_read_unlock(); return 0; } +void target_ua_allocate_lun(struct se_node_acl *nacl, + u32 unpacked_lun, u8 asc, u8 ascq) +{ + struct se_dev_entry *deve; + + if (!nacl) + return; + + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, unpacked_lun); + if (!deve) { + rcu_read_unlock(); + return; + } + + core_scsi3_ua_allocate(deve, asc, ascq); + rcu_read_unlock(); +} + void core_scsi3_ua_release_all( struct se_dev_entry *deve) { diff --git a/drivers/target/target_core_ua.h b/drivers/target/target_core_ua.h index 6e592b10a8c0..96460bff490f 100644 --- a/drivers/target/target_core_ua.h +++ b/drivers/target/target_core_ua.h @@ -28,7 +28,8 @@ extern struct kmem_cache *se_ua_cache; extern sense_reason_t target_scsi3_ua_check(struct se_cmd *); -extern int core_scsi3_ua_allocate(struct se_node_acl *, u64, u8, u8); +extern int core_scsi3_ua_allocate(struct se_dev_entry *, u8, u8); +extern void target_ua_allocate_lun(struct se_node_acl *, u32, u8, u8); extern void core_scsi3_ua_release_all(struct se_dev_entry *); extern void core_scsi3_ua_for_check_condition(struct se_cmd *, u8 *, u8 *); extern int core_scsi3_ua_clear_for_request_sense(struct se_cmd *, From a01f7cd657c95941079d548ef7fbf0cc370c1259 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 22 May 2015 15:45:28 -0500 Subject: [PATCH 178/839] remoteproc: add a rproc ops for performing address translation The rproc_da_to_va API is currently used to perform any device to kernel address translations to meet the different needs of the remoteproc core/drivers (eg: loading). The functionality is achieved within the remoteproc core, and is limited only for carveouts allocated within the core. A new rproc ops, da_to_va, is added to provide flexibility to platform implementations to perform the address translation themselves when the above conditions cannot be met by the implementations. The rproc_da_to_va() API is extended to invoke this ops if present, and fallback to regular processing if the platform implementation cannot provide the translation. This will allow any remoteproc implementations to translate addresses for dedicated memories like internal memories. While at this, also update the rproc_da_to_va() documentation since it is an exported function. Signed-off-by: Suman Anna Signed-off-by: Dave Gerlach Signed-off-by: Ohad Ben-Cohen --- drivers/remoteproc/remoteproc_core.c | 31 ++++++++++++++++++++++------ include/linux/remoteproc.h | 2 ++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index e1a6d693b8bb..b4ab38015cf2 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -137,28 +137,46 @@ static void rproc_disable_iommu(struct rproc *rproc) iommu_domain_free(domain); } -/* +/** + * rproc_da_to_va() - lookup the kernel virtual address for a remoteproc address + * @rproc: handle of a remote processor + * @da: remoteproc device address to translate + * @len: length of the memory region @da is pointing to + * * Some remote processors will ask us to allocate them physically contiguous * memory regions (which we call "carveouts"), and map them to specific - * device addresses (which are hardcoded in the firmware). + * device addresses (which are hardcoded in the firmware). They may also have + * dedicated memory regions internal to the processors, and use them either + * exclusively or alongside carveouts. * * They may then ask us to copy objects into specific device addresses (e.g. * code/data sections) or expose us certain symbols in other device address * (e.g. their trace buffer). * - * This function is an internal helper with which we can go over the allocated - * carveouts and translate specific device address to kernel virtual addresses - * so we can access the referenced memory. + * This function is a helper function with which we can go over the allocated + * carveouts and translate specific device addresses to kernel virtual addresses + * so we can access the referenced memory. This function also allows to perform + * translations on the internal remoteproc memory regions through a platform + * implementation specific da_to_va ops, if present. + * + * The function returns a valid kernel address on success or NULL on failure. * * Note: phys_to_virt(iommu_iova_to_phys(rproc->domain, da)) will work too, * but only on kernel direct mapped RAM memory. Instead, we're just using - * here the output of the DMA API, which should be more correct. + * here the output of the DMA API for the carveouts, which should be more + * correct. */ void *rproc_da_to_va(struct rproc *rproc, u64 da, int len) { struct rproc_mem_entry *carveout; void *ptr = NULL; + if (rproc->ops->da_to_va) { + ptr = rproc->ops->da_to_va(rproc, da, len); + if (ptr) + goto out; + } + list_for_each_entry(carveout, &rproc->carveouts, node) { int offset = da - carveout->da; @@ -175,6 +193,7 @@ void *rproc_da_to_va(struct rproc *rproc, u64 da, int len) break; } +out: return ptr; } EXPORT_SYMBOL(rproc_da_to_va); diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 56739e5df1e6..9c4e1384f636 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -330,11 +330,13 @@ struct rproc; * @start: power on the device and boot it * @stop: power off the device * @kick: kick a virtqueue (virtqueue id given as a parameter) + * @da_to_va: optional platform hook to perform address translations */ struct rproc_ops { int (*start)(struct rproc *rproc); int (*stop)(struct rproc *rproc); void (*kick)(struct rproc *rproc, int vqid); + void * (*da_to_va)(struct rproc *rproc, u64 da, int len); }; /** From ccbbb9faac946ce61c241ce9f08b3486fabf031d Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 22 May 2015 15:45:29 -0500 Subject: [PATCH 179/839] Documentation: dt: add bindings for TI Wakeup M3 processor Add the device tree bindings document for the TI Wakeup M3 remote processor devices on AM33xx and AM43xx SoCs. These devices are used to offload low-level power management functionality, and are handled by the wkup_m3 remoteproc driver. Signed-off-by: Dave Gerlach Signed-off-by: Suman Anna Acked-by: Tony Lindgren Signed-off-by: Ohad Ben-Cohen --- .../bindings/remoteproc/wkup_m3_rproc.txt | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 Documentation/devicetree/bindings/remoteproc/wkup_m3_rproc.txt diff --git a/Documentation/devicetree/bindings/remoteproc/wkup_m3_rproc.txt b/Documentation/devicetree/bindings/remoteproc/wkup_m3_rproc.txt new file mode 100644 index 000000000000..3a70073797eb --- /dev/null +++ b/Documentation/devicetree/bindings/remoteproc/wkup_m3_rproc.txt @@ -0,0 +1,52 @@ +TI Wakeup M3 Remoteproc Driver +============================== + +The TI AM33xx and AM43xx family of devices use a small Cortex M3 co-processor +(commonly referred to as Wakeup M3 or CM3) to help with various low power tasks +that cannot be controlled from the MPU. This CM3 processor requires a firmware +binary to accomplish this. The wkup_m3 remoteproc driver handles the loading of +the firmware and booting of the CM3. + +Wkup M3 Device Node: +==================== +A wkup_m3 device node is used to represent the Wakeup M3 processor instance +within the SoC. It is added as a child node of the parent interconnect bus +(l4_wkup) through which it is accessible to the MPU. + +Required properties: +-------------------- +- compatible: Should be one of, + "ti,am3352-wkup-m3" for AM33xx SoCs + "ti,am4372-wkup-m3" for AM43xx SoCs +- reg: Should contain the address ranges for the two internal + memory regions, UMEM and DMEM. The parent node should + provide an appropriate ranges property for properly + translating these into bus addresses. +- reg-names: Contains the corresponding names for the two memory + regions. These should be named "umem" & "dmem". +- ti,hwmods: Name of the hwmod associated with the wkupm3 device. +- ti,pm-firmware: Name of firmware file to be used for loading and + booting the wkup_m3 remote processor. + +Example: +-------- +/* AM33xx */ +ocp { + l4_wkup: l4_wkup@44c00000 { + compatible = "am335-l4-wkup", "simple-bus"; + ranges = <0 0x44c00000 0x400000>; + #address-cells = <1>; + #size-cells = <1>; + + wkup_m3: wkup_m3@100000 { + compatible = "ti,am3352-wkup-m3"; + reg = <0x100000 0x4000>, + <0x180000 0x2000>; + reg-names = "umem", "dmem"; + ti,hwmods = "wkup_m3"; + ti,pm-firmware = "am335x-pm-firmware.elf"; + }; + }; + + ... +}; From a01bc0d5f557bd8becd0ba75d09c39192004697e Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 22 May 2015 15:45:30 -0500 Subject: [PATCH 180/839] remoteproc/wkup_m3: add a remoteproc driver for TI Wakeup M3 Add a remoteproc driver to load the firmware and boot a small Wakeup M3 processor present on TI AM33xx and AM43xx SoCs. This Wakeup M3 remote processor is an integrated Cortex M3 that allows the SoC to enter the lowest possible power state by taking control from the MPU after it has gone into its own low power state and shutting off any additional peripherals. The Wakeup M3 processor has two internal memory regions - 16 kB of unified instruction memory called UMEM used to store executable code, and 8 kB of data memory called DMEM used for all data sections. The Wakeup M3 processor executes its code entirely from within the UMEM and uses the DMEM for any data. It does not use any external memory or any other external resources. The device address view has the UMEM at address 0x0 and DMEM at address 0x80000, and these are computed automatically within the driver based on relative address calculation from the corresponding device tree IOMEM resources. These device addresses are used to aid the core remoteproc ELF loader code to properly translate and load the firmware segments through the .rproc_da_to_va ops. Signed-off-by: Dave Gerlach Signed-off-by: Suman Anna Signed-off-by: Ohad Ben-Cohen --- drivers/remoteproc/Kconfig | 13 ++ drivers/remoteproc/Makefile | 1 + drivers/remoteproc/wkup_m3_rproc.c | 257 ++++++++++++++++++++++++++ include/linux/platform_data/wkup_m3.h | 30 +++ 4 files changed, 301 insertions(+) create mode 100644 drivers/remoteproc/wkup_m3_rproc.c create mode 100644 include/linux/platform_data/wkup_m3.h diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index 5e343bab9458..28c711f0ac6b 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -41,6 +41,19 @@ config STE_MODEM_RPROC This can be either built-in or a loadable module. If unsure say N. +config WKUP_M3_RPROC + tristate "AMx3xx Wakeup M3 remoteproc support" + depends on SOC_AM33XX || SOC_AM43XX + select REMOTEPROC + help + Say y here to support Wakeup M3 remote processor on TI AM33xx + and AM43xx family of SoCs. + + Required for Suspend-to-RAM on AM33xx and AM43xx SoCs. Also needed + for deep CPUIdle states on AM33xx SoCs. Allows for loading of the + firmware onto these remote processors. + If unsure say N. + config DA8XX_REMOTEPROC tristate "DA8xx/OMAP-L13x remoteproc support" depends on ARCH_DAVINCI_DA8XX diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile index ac2ff75686d2..81b04d1e2e58 100644 --- a/drivers/remoteproc/Makefile +++ b/drivers/remoteproc/Makefile @@ -9,4 +9,5 @@ remoteproc-y += remoteproc_virtio.o remoteproc-y += remoteproc_elf_loader.o obj-$(CONFIG_OMAP_REMOTEPROC) += omap_remoteproc.o obj-$(CONFIG_STE_MODEM_RPROC) += ste_modem_rproc.o +obj-$(CONFIG_WKUP_M3_RPROC) += wkup_m3_rproc.o obj-$(CONFIG_DA8XX_REMOTEPROC) += da8xx_remoteproc.o diff --git a/drivers/remoteproc/wkup_m3_rproc.c b/drivers/remoteproc/wkup_m3_rproc.c new file mode 100644 index 000000000000..edf81819cce1 --- /dev/null +++ b/drivers/remoteproc/wkup_m3_rproc.c @@ -0,0 +1,257 @@ +/* + * TI AMx3 Wakeup M3 Remote Processor driver + * + * Copyright (C) 2014-2015 Texas Instruments, Inc. + * + * Dave Gerlach + * Suman Anna + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "remoteproc_internal.h" + +#define WKUPM3_MEM_MAX 2 + +/** + * struct wkup_m3_mem - WkupM3 internal memory structure + * @cpu_addr: MPU virtual address of the memory region + * @bus_addr: Bus address used to access the memory region + * @dev_addr: Device address from Wakeup M3 view + * @size: Size of the memory region + */ +struct wkup_m3_mem { + void __iomem *cpu_addr; + phys_addr_t bus_addr; + u32 dev_addr; + size_t size; +}; + +/** + * struct wkup_m3_rproc - WkupM3 remote processor state + * @rproc: rproc handle + * @pdev: pointer to platform device + * @mem: WkupM3 memory information + */ +struct wkup_m3_rproc { + struct rproc *rproc; + struct platform_device *pdev; + struct wkup_m3_mem mem[WKUPM3_MEM_MAX]; +}; + +static int wkup_m3_rproc_start(struct rproc *rproc) +{ + struct wkup_m3_rproc *wkupm3 = rproc->priv; + struct platform_device *pdev = wkupm3->pdev; + struct device *dev = &pdev->dev; + struct wkup_m3_platform_data *pdata = dev_get_platdata(dev); + + if (pdata->deassert_reset(pdev, pdata->reset_name)) { + dev_err(dev, "Unable to reset wkup_m3!\n"); + return -ENODEV; + } + + return 0; +} + +static int wkup_m3_rproc_stop(struct rproc *rproc) +{ + struct wkup_m3_rproc *wkupm3 = rproc->priv; + struct platform_device *pdev = wkupm3->pdev; + struct device *dev = &pdev->dev; + struct wkup_m3_platform_data *pdata = dev_get_platdata(dev); + + if (pdata->assert_reset(pdev, pdata->reset_name)) { + dev_err(dev, "Unable to assert reset of wkup_m3!\n"); + return -ENODEV; + } + + return 0; +} + +static void *wkup_m3_rproc_da_to_va(struct rproc *rproc, u64 da, int len) +{ + struct wkup_m3_rproc *wkupm3 = rproc->priv; + void *va = NULL; + int i; + u32 offset; + + if (len <= 0) + return NULL; + + for (i = 0; i < WKUPM3_MEM_MAX; i++) { + if (da >= wkupm3->mem[i].dev_addr && da + len <= + wkupm3->mem[i].dev_addr + wkupm3->mem[i].size) { + offset = da - wkupm3->mem[i].dev_addr; + /* __force to make sparse happy with type conversion */ + va = (__force void *)(wkupm3->mem[i].cpu_addr + offset); + break; + } + } + + return va; +} + +static struct rproc_ops wkup_m3_rproc_ops = { + .start = wkup_m3_rproc_start, + .stop = wkup_m3_rproc_stop, + .da_to_va = wkup_m3_rproc_da_to_va, +}; + +static const struct of_device_id wkup_m3_rproc_of_match[] = { + { .compatible = "ti,am3352-wkup-m3", }, + { .compatible = "ti,am4372-wkup-m3", }, + {}, +}; + +static int wkup_m3_rproc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct wkup_m3_platform_data *pdata = dev->platform_data; + /* umem always needs to be processed first */ + const char *mem_names[WKUPM3_MEM_MAX] = { "umem", "dmem" }; + struct wkup_m3_rproc *wkupm3; + const char *fw_name; + struct rproc *rproc; + struct resource *res; + const __be32 *addrp; + u32 l4_offset = 0; + u64 size; + int ret; + int i; + + if (!(pdata && pdata->deassert_reset && pdata->assert_reset && + pdata->reset_name)) { + dev_err(dev, "Platform data missing!\n"); + return -ENODEV; + } + + ret = of_property_read_string(dev->of_node, "ti,pm-firmware", + &fw_name); + if (ret) { + dev_err(dev, "No firmware filename given\n"); + return -ENODEV; + } + + pm_runtime_enable(&pdev->dev); + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) { + dev_err(&pdev->dev, "pm_runtime_get_sync() failed\n"); + goto err; + } + + rproc = rproc_alloc(dev, "wkup_m3", &wkup_m3_rproc_ops, + fw_name, sizeof(*wkupm3)); + if (!rproc) { + ret = -ENOMEM; + goto err; + } + + wkupm3 = rproc->priv; + wkupm3->rproc = rproc; + wkupm3->pdev = pdev; + + for (i = 0; i < ARRAY_SIZE(mem_names); i++) { + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + mem_names[i]); + wkupm3->mem[i].cpu_addr = devm_ioremap_resource(dev, res); + if (IS_ERR(wkupm3->mem[i].cpu_addr)) { + dev_err(&pdev->dev, "devm_ioremap_resource failed for resource %d\n", + i); + ret = PTR_ERR(wkupm3->mem[i].cpu_addr); + goto err; + } + wkupm3->mem[i].bus_addr = res->start; + wkupm3->mem[i].size = resource_size(res); + addrp = of_get_address(dev->of_node, i, &size, NULL); + /* + * The wkupm3 has umem at address 0 in its view, so the device + * addresses for each memory region is computed as a relative + * offset of the bus address for umem, and therefore needs to be + * processed first. + */ + if (!strcmp(mem_names[i], "umem")) + l4_offset = be32_to_cpu(*addrp); + wkupm3->mem[i].dev_addr = be32_to_cpu(*addrp) - l4_offset; + } + + dev_set_drvdata(dev, rproc); + + ret = rproc_add(rproc); + if (ret) { + dev_err(dev, "rproc_add failed\n"); + goto err_put_rproc; + } + + return 0; + +err_put_rproc: + rproc_put(rproc); +err: + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); + return ret; +} + +static int wkup_m3_rproc_remove(struct platform_device *pdev) +{ + struct rproc *rproc = platform_get_drvdata(pdev); + + rproc_del(rproc); + rproc_put(rproc); + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + return 0; +} + +#ifdef CONFIG_PM +static int wkup_m3_rpm_suspend(struct device *dev) +{ + return -EBUSY; +} + +static int wkup_m3_rpm_resume(struct device *dev) +{ + return 0; +} +#endif + +static const struct dev_pm_ops wkup_m3_rproc_pm_ops = { + SET_RUNTIME_PM_OPS(wkup_m3_rpm_suspend, wkup_m3_rpm_resume, NULL) +}; + +static struct platform_driver wkup_m3_rproc_driver = { + .probe = wkup_m3_rproc_probe, + .remove = wkup_m3_rproc_remove, + .driver = { + .name = "wkup_m3_rproc", + .of_match_table = wkup_m3_rproc_of_match, + .pm = &wkup_m3_rproc_pm_ops, + }, +}; + +module_platform_driver(wkup_m3_rproc_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("TI Wakeup M3 remote processor control driver"); +MODULE_AUTHOR("Dave Gerlach "); diff --git a/include/linux/platform_data/wkup_m3.h b/include/linux/platform_data/wkup_m3.h new file mode 100644 index 000000000000..3f1d77effd71 --- /dev/null +++ b/include/linux/platform_data/wkup_m3.h @@ -0,0 +1,30 @@ +/* + * TI Wakeup M3 remote processor platform data + * + * Copyright (C) 2014-2015 Texas Instruments, Inc. + * + * Dave Gerlach + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_PLATFORM_DATA_WKUP_M3_H +#define _LINUX_PLATFORM_DATA_WKUP_M3_H + +struct platform_device; + +struct wkup_m3_platform_data { + const char *reset_name; + + int (*assert_reset)(struct platform_device *pdev, const char *name); + int (*deassert_reset)(struct platform_device *pdev, const char *name); +}; + +#endif /* _LINUX_PLATFORM_DATA_WKUP_M3_H */ From 8c19a731a1a8b1f447a972174d2b2fe30d625518 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 11 Jun 2015 13:56:32 +0200 Subject: [PATCH 181/839] ARM: mvebu: adjust Armada XP DT spi muxing after pinctrl function rename Following the merge of "pinctrl: mvebu: armada-xp: rename spi to spi0" by Linus Walleij, we need to adjust the Armada XP Device Tree accordingly, by adjusting the pinctrl configuration for SPI pins. Signed-off-by: Thomas Petazzoni Acked-by: Gregory CLEMENT Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/armada-xp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi index e78ce4ab6b75..58eaef46efd5 100644 --- a/arch/arm/boot/dts/armada-xp.dtsi +++ b/arch/arm/boot/dts/armada-xp.dtsi @@ -297,7 +297,7 @@ spi0_pins: spi0-pins { marvell,pins = "mpp36", "mpp37", "mpp38", "mpp39"; - marvell,function = "spi"; + marvell,function = "spi0"; }; uart2_pins: uart2-pins { From 2f5bc307be2480ba89e4c5d118f406f04a4a7299 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 16 Jun 2015 14:12:57 +0200 Subject: [PATCH 182/839] ARM: mvebu: fix suspend to RAM on big-endian configurations The current Armada XP suspend to RAM implementation, as added in commit 27432825ae19f ("ARM: mvebu: Armada XP GP specific suspend/resume code") does not handle big-endian configurations properly: the small bit of assembly code putting the DRAM in self-refresh and toggling the GPIOs to turn off power forgets to convert the values to little-endian. This commit fixes that by making sure the two values we will write to the DRAM controller register and GPIO register are already in little-endian before entering the critical assembly code. Signed-off-by: Thomas Petazzoni Cc: # v3.19+ Fixes: 27432825ae19f ("ARM: mvebu: Armada XP GP specific suspend/resume code") --- arch/arm/mach-mvebu/pm-board.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-mvebu/pm-board.c b/arch/arm/mach-mvebu/pm-board.c index 6dfd4ab97b2a..301ab38d38ba 100644 --- a/arch/arm/mach-mvebu/pm-board.c +++ b/arch/arm/mach-mvebu/pm-board.c @@ -43,6 +43,9 @@ static void mvebu_armada_xp_gp_pm_enter(void __iomem *sdram_reg, u32 srcmd) for (i = 0; i < ARMADA_XP_GP_PIC_NR_GPIOS; i++) ackcmd |= BIT(pic_raw_gpios[i]); + srcmd = cpu_to_le32(srcmd); + ackcmd = cpu_to_le32(ackcmd); + /* * Wait a while, the PIC needs quite a bit of time between the * two GPIO commands. From e4f95517f18271b1da36cfc5d700e46844396d6e Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 17 Jun 2015 18:15:45 +0200 Subject: [PATCH 183/839] fs/ufs: restore s_lock mutex_init() Add last missing line in commit "cdd9eefdf905" ("fs/ufs: restore s_lock mutex") Signed-off-by: Fabian Frederick Signed-off-by: Al Viro --- fs/ufs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ufs/super.c b/fs/ufs/super.c index afe9955654c8..dc33f9416340 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -801,6 +801,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); mutex_init(&sbi->mutex); + mutex_init(&sbi->s_lock); spin_lock_init(&sbi->work_lock); INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); /* From 1ca018d28d96d07788474abf66a5f3e9594841f5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 17 Jun 2015 19:41:51 -0400 Subject: [PATCH 184/839] pNFS: Fix a memory leak when attempted pnfs fails pnfs_do_write() expects the call to pnfs_write_through_mds() to free the pgio header and to release the layout segment before exiting. The problem is that nfs_pgio_data_destroy() doesn't actually do this; it only frees the memory allocated by nfs_generic_pgio(). Ditto for pnfs_do_read()... Fix in both cases is to add a call to hdr->release(hdr). Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 230606243be6..219ee6a3f1b3 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1865,6 +1865,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); + hdr->release(hdr); } static enum pnfs_try_status @@ -1979,6 +1980,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); + hdr->release(hdr); } /* From c70701131f7a8edea91fc49d11796d342cff7c62 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 17 Jun 2015 19:56:22 -0400 Subject: [PATCH 185/839] NFS: Ensure we set NFS_CONTEXT_RESEND_WRITES when requeuing writes If a write attempt fails, and the write is queued up for resending to the server, as opposed to being dropped, then we need to set the appropriate flag so that nfs_file_fsync() does the right thing. Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 1 + fs/nfs/write.c | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 219ee6a3f1b3..d47c188682b1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1821,6 +1821,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) /* Resend all requests through the MDS */ nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, hdr->completion_ops); + set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dfc19f1575a1..daf355642845 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1289,6 +1289,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, static void nfs_redirty_request(struct nfs_page *req) { nfs_mark_request_dirty(req); + set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); nfs_unlock_request(req); nfs_end_page_writeback(req); nfs_release_request(req); From 8de3dbd0895bebe52d069a82feae8e3fb51c1bdf Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Thu, 18 Jun 2015 11:44:41 +0300 Subject: [PATCH 186/839] remoteproc: fix !CONFIG_OF build breakage Fix this: drivers/remoteproc/remoteproc_core.c: In function 'rproc_get_by_phandle': >> drivers/remoteproc/remoteproc_core.c:1167:2: error: implicit declaration of function 'of_find_node_by_phandle' [-Werror=implicit-function-declaration] np = of_find_node_by_phandle(phandle); Reported-by: kbuild test robot Cc: Dave Gerlach Cc: Suman Anna Signed-off-by: Ohad Ben-Cohen --- drivers/remoteproc/remoteproc_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index b4ab38015cf2..8b3130f22b42 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -1178,6 +1178,7 @@ EXPORT_SYMBOL(rproc_shutdown); * * Returns the rproc handle on success, and NULL on failure. */ +#ifdef CONFIG_OF struct rproc *rproc_get_by_phandle(phandle phandle) { struct rproc *rproc = NULL, *r; @@ -1201,6 +1202,12 @@ struct rproc *rproc_get_by_phandle(phandle phandle) return rproc; } +#else +struct rproc *rproc_get_by_phandle(phandle phandle) +{ + return NULL; +} +#endif EXPORT_SYMBOL(rproc_get_by_phandle); /** From d1381929deaa530b0b9faddc7cbb62e05766b866 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 16 Jun 2015 21:57:52 +0200 Subject: [PATCH 187/839] sunrpc: use sg_init_one() in krb5_rc4_setup_enc/seq_key() Don't opencode sg_init_one() Signed-off-by: Fabian Frederick Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index b5408e8a37f2..fee3c15a4b52 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -881,9 +881,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher, if (err) goto out_err; - sg_init_table(sg, 1); - sg_set_buf(sg, &zeroconstant, 4); - + sg_init_one(sg, &zeroconstant, 4); err = crypto_hash_digest(&desc, sg, 4, Kseq); if (err) goto out_err; @@ -951,9 +949,7 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher, if (err) goto out_err; - sg_init_table(sg, 1); - sg_set_buf(sg, zeroconstant, 4); - + sg_init_one(sg, zeroconstant, 4); err = crypto_hash_digest(&desc, sg, 4, Kcrypt); if (err) goto out_err; From dfad7000f32e3a6187e1ee36d52494d821553f2b Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 18 Jun 2015 19:37:13 +0800 Subject: [PATCH 188/839] nfs: Fix comment for nfs_pageio_init() and nfs_pageio_complete_mirror() Signed-off-by: Yijing Wang Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e9953ab1ac40..d70228a81056 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -709,7 +709,9 @@ static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, * nfs_pageio_init - initialise a page io descriptor * @desc: pointer to descriptor * @inode: pointer to inode - * @doio: pointer to io function + * @pg_ops: pointer to pageio operations + * @compl_ops: pointer to pageio completion operations + * @rw_ops: pointer to nfs read/write operations * @bsize: io block size * @io_flags: extra parameters for the io function */ @@ -1184,6 +1186,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an * nfs_pageio_descriptor * @desc: pointer to io descriptor + * @mirror_idx: pointer to mirror index */ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, u32 mirror_idx) From f25801ee4680ef1db21e15c112e6e5fe3ffe8da5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 18 Jun 2015 14:32:23 +0100 Subject: [PATCH 189/839] overlay: Call ovl_drop_write() earlier in ovl_dentry_open() Call ovl_drop_write() earlier in ovl_dentry_open() before we call vfs_open() as we've done the copy up for which we needed the freeze-write lock by that point. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/overlayfs/inode.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 308379b2d0b2..21079d1ca2aa 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -343,31 +343,25 @@ static int ovl_dentry_open(struct dentry *dentry, struct file *file, int err; struct path realpath; enum ovl_path_type type; - bool want_write = false; type = ovl_path_real(dentry, &realpath); if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { - want_write = true; err = ovl_want_write(dentry); if (err) - goto out; + return err; if (file->f_flags & O_TRUNC) err = ovl_copy_up_last(dentry, NULL, true); else err = ovl_copy_up(dentry); + ovl_drop_write(dentry); if (err) - goto out_drop_write; + return err; ovl_path_upper(dentry, &realpath); } - err = vfs_open(&realpath, file, cred); -out_drop_write: - if (want_write) - ovl_drop_write(dentry); -out: - return err; + return vfs_open(&realpath, file, cred); } static const struct inode_operations ovl_file_inode_operations = { From 4bacc9c9234c7c8eec44f5ed4e960d9f96fa0f01 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 18 Jun 2015 14:32:31 +0100 Subject: [PATCH 190/839] overlayfs: Make f_path always point to the overlay and f_inode to the underlay Make file->f_path always point to the overlay dentry so that the path in /proc/pid/fd is correct and to ensure that label-based LSMs have access to the overlay as well as the underlay (path-based LSMs probably don't need it). Using my union testsuite to set things up, before the patch I see: [root@andromeda union-testsuite]# bash 5 /a/foo107 [root@andromeda union-testsuite]# stat /mnt/a/foo107 ... Device: 23h/35d Inode: 13381 Links: 1 ... [root@andromeda union-testsuite]# stat -L /proc/$$/fd/5 ... Device: 23h/35d Inode: 13381 Links: 1 ... After the patch: [root@andromeda union-testsuite]# bash 5 /mnt/a/foo107 [root@andromeda union-testsuite]# stat /mnt/a/foo107 ... Device: 23h/35d Inode: 40346 Links: 1 ... [root@andromeda union-testsuite]# stat -L /proc/$$/fd/5 ... Device: 23h/35d Inode: 40346 Links: 1 ... Note the change in where /proc/$$/fd/5 points to in the ls command. It was pointing to /a/foo107 (which doesn't exist) and now points to /mnt/a/foo107 (which is correct). The inode accessed, however, is the lower layer. The union layer is on device 25h/37d and the upper layer on 24h/36d. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/dcache.c | 5 +++- fs/internal.h | 1 + fs/open.c | 49 +++++++++++++++++++++------------------- fs/overlayfs/inode.c | 14 +++++------- fs/overlayfs/overlayfs.h | 1 + fs/overlayfs/super.c | 1 + include/linux/dcache.h | 2 ++ include/linux/fs.h | 2 -- 8 files changed, 41 insertions(+), 34 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 37b5afdaf698..c4ce35110704 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1673,7 +1673,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) DCACHE_OP_COMPARE | DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | - DCACHE_OP_DELETE )); + DCACHE_OP_DELETE | + DCACHE_OP_SELECT_INODE)); dentry->d_op = op; if (!op) return; @@ -1689,6 +1690,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) dentry->d_flags |= DCACHE_OP_DELETE; if (op->d_prune) dentry->d_flags |= DCACHE_OP_PRUNE; + if (op->d_select_inode) + dentry->d_flags |= DCACHE_OP_SELECT_INODE; } EXPORT_SYMBOL(d_set_d_op); diff --git a/fs/internal.h b/fs/internal.h index 01dce1d1476b..4d5af583ab03 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -107,6 +107,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, extern long do_handle_open(int mountdirfd, struct file_handle __user *ufh, int open_flag); extern int open_check_o_direct(struct file *f); +extern int vfs_open(const struct path *, struct file *, const struct cred *); /* * inode.c diff --git a/fs/open.c b/fs/open.c index e0250bdcc440..b1c5823b7f11 100644 --- a/fs/open.c +++ b/fs/open.c @@ -678,18 +678,18 @@ int open_check_o_direct(struct file *f) } static int do_dentry_open(struct file *f, + struct inode *inode, int (*open)(struct inode *, struct file *), const struct cred *cred) { static const struct file_operations empty_fops = {}; - struct inode *inode; int error; f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; path_get(&f->f_path); - inode = f->f_inode = f->f_path.dentry->d_inode; + f->f_inode = inode; f->f_mapping = inode->i_mapping; if (unlikely(f->f_flags & O_PATH)) { @@ -793,7 +793,8 @@ int finish_open(struct file *file, struct dentry *dentry, BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ file->f_path.dentry = dentry; - error = do_dentry_open(file, open, current_cred()); + error = do_dentry_open(file, d_backing_inode(dentry), open, + current_cred()); if (!error) *opened |= FILE_OPENED; @@ -822,6 +823,28 @@ int finish_no_open(struct file *file, struct dentry *dentry) } EXPORT_SYMBOL(finish_no_open); +/** + * vfs_open - open the file at the given path + * @path: path to open + * @file: newly allocated file with f_flag initialized + * @cred: credentials to use + */ +int vfs_open(const struct path *path, struct file *file, + const struct cred *cred) +{ + struct dentry *dentry = path->dentry; + struct inode *inode = dentry->d_inode; + + file->f_path = *path; + if (dentry->d_flags & DCACHE_OP_SELECT_INODE) { + inode = dentry->d_op->d_select_inode(dentry, file->f_flags); + if (IS_ERR(inode)) + return PTR_ERR(inode); + } + + return do_dentry_open(file, inode, NULL, cred); +} + struct file *dentry_open(const struct path *path, int flags, const struct cred *cred) { @@ -853,26 +876,6 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); -/** - * vfs_open - open the file at the given path - * @path: path to open - * @filp: newly allocated file with f_flag initialized - * @cred: credentials to use - */ -int vfs_open(const struct path *path, struct file *filp, - const struct cred *cred) -{ - struct inode *inode = path->dentry->d_inode; - - if (inode->i_op->dentry_open) - return inode->i_op->dentry_open(path->dentry, filp, cred); - else { - filp->f_path = *path; - return do_dentry_open(filp, NULL, cred); - } -} -EXPORT_SYMBOL(vfs_open); - static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) { int lookup_flags = 0; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 21079d1ca2aa..f140e3dbfb7b 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -337,31 +337,30 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, return true; } -static int ovl_dentry_open(struct dentry *dentry, struct file *file, - const struct cred *cred) +struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags) { int err; struct path realpath; enum ovl_path_type type; type = ovl_path_real(dentry, &realpath); - if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { + if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) { err = ovl_want_write(dentry); if (err) - return err; + return ERR_PTR(err); - if (file->f_flags & O_TRUNC) + if (file_flags & O_TRUNC) err = ovl_copy_up_last(dentry, NULL, true); else err = ovl_copy_up(dentry); ovl_drop_write(dentry); if (err) - return err; + return ERR_PTR(err); ovl_path_upper(dentry, &realpath); } - return vfs_open(&realpath, file, cred); + return d_backing_inode(realpath.dentry); } static const struct inode_operations ovl_file_inode_operations = { @@ -372,7 +371,6 @@ static const struct inode_operations ovl_file_inode_operations = { .getxattr = ovl_getxattr, .listxattr = ovl_listxattr, .removexattr = ovl_removexattr, - .dentry_open = ovl_dentry_open, }; static const struct inode_operations ovl_symlink_inode_operations = { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 17ac5afc9ffb..ea5a40b06e3a 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -173,6 +173,7 @@ ssize_t ovl_getxattr(struct dentry *dentry, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); int ovl_removexattr(struct dentry *dentry, const char *name); +struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, struct ovl_entry *oe); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 5f0d1993e6e3..84c5e27fbfd9 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -275,6 +275,7 @@ static void ovl_dentry_release(struct dentry *dentry) static const struct dentry_operations ovl_dentry_operations = { .d_release = ovl_dentry_release, + .d_select_inode = ovl_d_select_inode, }; static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index df334cbacc6d..167ec0934049 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -160,6 +160,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); + struct inode *(*d_select_inode)(struct dentry *, unsigned); } ____cacheline_aligned; /* @@ -225,6 +226,7 @@ struct dentry_operations { #define DCACHE_MAY_FREE 0x00800000 #define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ +#define DCACHE_OP_SELECT_INODE 0x02000000 /* Unioned entry: dcache op selects inode */ extern seqlock_t rename_lock; diff --git a/include/linux/fs.h b/include/linux/fs.h index b577e801b4af..2bd77e10e8e5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1641,7 +1641,6 @@ struct inode_operations { int (*set_acl)(struct inode *, struct posix_acl *, int); /* WARNING: probably going away soon, do not use! */ - int (*dentry_open)(struct dentry *, struct file *, const struct cred *); } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, @@ -2194,7 +2193,6 @@ extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int); -extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); From 3832591e6fa53d8126d5e21446f225cda278d91a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Jun 2015 13:04:13 -0400 Subject: [PATCH 191/839] SUNRPC: Handle connection issues correctly on the back channel If the back channel is disconnected, we can and should just fail the transmission. The expectation is that the NFSv4.1 server will always retransmit any outstanding callbacks once the connection is re-established. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f41ed882ed3c..cbc6af923dd1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1031,6 +1031,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) struct xdr_buf *xbufp = &req->rq_snd_buf; struct rpc_task_setup task_setup_data = { .callback_ops = &rpc_default_ops, + .flags = RPC_TASK_SOFTCONN, }; dprintk("RPC: rpc_run_bc_task req= %p\n", req); @@ -1964,10 +1965,15 @@ call_bc_transmit(struct rpc_task *task) switch (task->tk_status) { case 0: /* Success */ - break; case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: + case -ECONNRESET: + case -ECONNREFUSED: + case -EADDRINUSE: + case -ENOTCONN: + case -EPIPE: + break; case -ETIMEDOUT: /* * Problem reaching the server. Disconnect and let the From 4876cc779ff525b9c2376d8076edf47815e71f2c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Jun 2015 16:17:57 -0400 Subject: [PATCH 192/839] SUNRPC: Ensure we release the TCP socket once it has been closed This fixes a regression introduced by commit caf4ccd4e88cf2 ("SUNRPC: Make xs_tcp_close() do a socket shutdown rather than a sock_release"). Prior to that commit, the autoclose feature would ensure that an idle connection would result in the socket being both disconnected and released, whereas now only gets disconnected. While the current behaviour is harmless, it does leave the port bound until either RPC traffic resumes or the RPC client is shut down. Reported-by: Steven Rostedt Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtsock.c | 40 ++++++++++++++++++++++------------------ 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3ca31f20b97c..ab5dd621ae0c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -611,8 +611,8 @@ static void xprt_autoclose(struct work_struct *work) struct rpc_xprt *xprt = container_of(work, struct rpc_xprt, task_cleanup); - xprt->ops->close(xprt); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + xprt->ops->close(xprt); xprt_release_write(xprt, NULL); } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index fda8ec8c74c0..ee0715dfc3c7 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -622,24 +622,6 @@ process_status: return status; } -/** - * xs_tcp_shutdown - gracefully shut down a TCP socket - * @xprt: transport - * - * Initiates a graceful shutdown of the TCP socket by calling the - * equivalent of shutdown(SHUT_RDWR); - */ -static void xs_tcp_shutdown(struct rpc_xprt *xprt) -{ - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct socket *sock = transport->sock; - - if (sock != NULL) { - kernel_sock_shutdown(sock, SHUT_RDWR); - trace_rpc_socket_shutdown(xprt, sock); - } -} - /** * xs_tcp_send_request - write an RPC request to a TCP socket * @task: address of RPC task that manages the state of an RPC request @@ -786,6 +768,7 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt) xs_sock_reset_connection_flags(xprt); /* Mark transport as closed and wake up all pending tasks */ xprt_disconnect_done(xprt); + xprt_force_disconnect(xprt); } /** @@ -2103,6 +2086,27 @@ out: xprt_wake_pending_tasks(xprt, status); } +/** + * xs_tcp_shutdown - gracefully shut down a TCP socket + * @xprt: transport + * + * Initiates a graceful shutdown of the TCP socket by calling the + * equivalent of shutdown(SHUT_RDWR); + */ +static void xs_tcp_shutdown(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct socket *sock = transport->sock; + + if (sock == NULL) + return; + if (xprt_connected(xprt)) { + kernel_sock_shutdown(sock, SHUT_RDWR); + trace_rpc_socket_shutdown(xprt, sock); + } else + xs_reset_transport(transport); +} + static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); From 775f06ab49f5f9e2f6bca9292ef57efa868a0f67 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 20 Jun 2015 15:31:54 -0400 Subject: [PATCH 193/839] SUNRPC: Set the TCP user timeout option on client sockets Use the TCP_USER_TIMEOUT socket option to advertise to the server how long we will keep the connection open if there is unacknowledged data. See RFC5482. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ee0715dfc3c7..ee359fc7af16 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2117,6 +2117,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) unsigned int keepidle = xprt->timeout->to_initval / HZ; unsigned int keepcnt = xprt->timeout->to_retries + 1; unsigned int opt_on = 1; + unsigned int timeo; /* TCP Keepalive options */ kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, @@ -2128,6 +2129,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keepcnt, sizeof(keepcnt)); + /* TCP user timeout (see RFC5482) */ + timeo = jiffies_to_msecs(xprt->timeout->to_initval) * + (xprt->timeout->to_retries + 1); + kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, + (char *)&timeo, sizeof(timeo)); + write_lock_bh(&sk->sk_callback_lock); xs_save_old_callbacks(transport, sk); From a6f15d9a756571babbb2b2cd4fdd1b64a5de232b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 22 Jun 2015 13:53:48 +0200 Subject: [PATCH 194/839] ovl: don't traverse automount points NFS and other distributed filesystems may place automount points in the tree. Previoulsy overlayfs refused to mount such filesystems types (based on the existence of the .d_automount callback), even if the actual export didn't have any automount points. It cannot be determined in advance whether the filesystem has automount points or not. The solution is to allow fs with .d_automount but refuse to traverse any automount points encountered. Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index bf8537c7f455..de9d2ee68ccf 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -303,6 +303,10 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir, } else if (!dentry->d_inode) { dput(dentry); dentry = NULL; + } else if (dentry->d_flags & DCACHE_MANAGED_DENTRY) { + dput(dentry); + /* Don't support traversing automounts */ + dentry = ERR_PTR(-EREMOTE); } return dentry; } @@ -700,12 +704,12 @@ static bool ovl_is_allowed_fs_type(struct dentry *root) /* * We don't support: - * - automount filesystems + * - autofs * - filesystems with revalidate (FIXME for lower layer) * - filesystems with case insensitive names */ if (dop && - (dop->d_manage || dop->d_automount || + (dop->d_manage || dop->d_revalidate || dop->d_weak_revalidate || dop->d_compare || dop->d_hash)) { return false; From 7c03b5d45b8eebf0111125053d8fe887cc262ba6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 22 Jun 2015 13:53:48 +0200 Subject: [PATCH 195/839] ovl: allow distributed fs as lower layer Allow filesystems with .d_revalidate as lower layer(s), but not as upper layer. For local filesystems the rule was that modifications on the layers directly while being part of the overlay results in undefined behavior. This can easily be extended to distributed filesystems: we assume the tree used as lower layer is static, which means ->d_revalidate() should always return "1". If that is not the case, return -ESTALE, don't try to work around the modification. Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 113 +++++++++++++++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 25 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index de9d2ee68ccf..8a08c582bc22 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -273,10 +273,57 @@ static void ovl_dentry_release(struct dentry *dentry) } } +static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct ovl_entry *oe = dentry->d_fsdata; + unsigned int i; + int ret = 1; + + for (i = 0; i < oe->numlower; i++) { + struct dentry *d = oe->lowerstack[i].dentry; + + if (d->d_flags & DCACHE_OP_REVALIDATE) { + ret = d->d_op->d_revalidate(d, flags); + if (ret < 0) + return ret; + if (!ret) { + if (!(flags & LOOKUP_RCU)) + d_invalidate(d); + return -ESTALE; + } + } + } + return 1; +} + +static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct ovl_entry *oe = dentry->d_fsdata; + unsigned int i; + int ret = 1; + + for (i = 0; i < oe->numlower; i++) { + struct dentry *d = oe->lowerstack[i].dentry; + + if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { + ret = d->d_op->d_weak_revalidate(d, flags); + if (ret <= 0) + break; + } + } + return ret; +} + static const struct dentry_operations ovl_dentry_operations = { .d_release = ovl_dentry_release, }; +static const struct dentry_operations ovl_reval_dentry_operations = { + .d_release = ovl_dentry_release, + .d_revalidate = ovl_dentry_revalidate, + .d_weak_revalidate = ovl_dentry_weak_revalidate, +}; + static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) { size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); @@ -288,6 +335,20 @@ static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) return oe; } +static bool ovl_dentry_remote(struct dentry *dentry) +{ + return dentry->d_flags & + (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); +} + +static bool ovl_dentry_weird(struct dentry *dentry) +{ + return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT | + DCACHE_MANAGE_TRANSIT | + DCACHE_OP_HASH | + DCACHE_OP_COMPARE); +} + static inline struct dentry *ovl_lookup_real(struct dentry *dir, struct qstr *name) { @@ -303,9 +364,9 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir, } else if (!dentry->d_inode) { dput(dentry); dentry = NULL; - } else if (dentry->d_flags & DCACHE_MANAGED_DENTRY) { + } else if (ovl_dentry_weird(dentry)) { dput(dentry); - /* Don't support traversing automounts */ + /* Don't support traversing automounts and other weirdness */ dentry = ERR_PTR(-EREMOTE); } return dentry; @@ -354,6 +415,11 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, goto out; if (this) { + if (unlikely(ovl_dentry_remote(this))) { + dput(this); + err = -EREMOTE; + goto out; + } if (ovl_is_whiteout(this)) { dput(this); this = NULL; @@ -698,25 +764,6 @@ static void ovl_unescape(char *s) } } -static bool ovl_is_allowed_fs_type(struct dentry *root) -{ - const struct dentry_operations *dop = root->d_op; - - /* - * We don't support: - * - autofs - * - filesystems with revalidate (FIXME for lower layer) - * - filesystems with case insensitive names - */ - if (dop && - (dop->d_manage || - dop->d_revalidate || dop->d_weak_revalidate || - dop->d_compare || dop->d_hash)) { - return false; - } - return true; -} - static int ovl_mount_dir_noesc(const char *name, struct path *path) { int err = -EINVAL; @@ -731,7 +778,7 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path) goto out; } err = -EINVAL; - if (!ovl_is_allowed_fs_type(path->dentry)) { + if (ovl_dentry_weird(path->dentry)) { pr_err("overlayfs: filesystem on '%s' not supported\n", name); goto out_put; } @@ -755,13 +802,21 @@ static int ovl_mount_dir(const char *name, struct path *path) if (tmp) { ovl_unescape(tmp); err = ovl_mount_dir_noesc(tmp, path); + + if (!err) + if (ovl_dentry_remote(path->dentry)) { + pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", + tmp); + path_put(path); + err = -EINVAL; + } kfree(tmp); } return err; } static int ovl_lower_dir(const char *name, struct path *path, long *namelen, - int *stack_depth) + int *stack_depth, bool *remote) { int err; struct kstatfs statfs; @@ -778,6 +833,9 @@ static int ovl_lower_dir(const char *name, struct path *path, long *namelen, *namelen = max(*namelen, statfs.f_namelen); *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); + if (ovl_dentry_remote(path->dentry)) + *remote = true; + return 0; out_put: @@ -831,6 +889,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) unsigned int numlower; unsigned int stacklen = 0; unsigned int i; + bool remote = false; int err; err = -ENOMEM; @@ -904,7 +963,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) lower = lowertmp; for (numlower = 0; numlower < stacklen; numlower++) { err = ovl_lower_dir(lower, &stack[numlower], - &ufs->lower_namelen, &sb->s_stack_depth); + &ufs->lower_namelen, &sb->s_stack_depth, + &remote); if (err) goto out_put_lowerpath; @@ -962,7 +1022,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!ufs->upper_mnt) sb->s_flags |= MS_RDONLY; - sb->s_d_op = &ovl_dentry_operations; + if (remote) + sb->s_d_op = &ovl_reval_dentry_operations; + else + sb->s_d_op = &ovl_dentry_operations; err = -ENOMEM; oe = ovl_alloc_entry(numlower); From cdb672795876d7bc1870aed9a2d7cb59f43d1d96 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 22 Jun 2015 13:53:48 +0200 Subject: [PATCH 196/839] ovl: lookup whiteouts outside iterate_dir() If jffs2 can deadlock on overlayfs readdir because it takes the same lock on ->iterate() as in ->lookup(). Fix by moving whiteout checking outside iterate_dir(). Optimized by collecting potential whiteouts (DT_CHR) in a temporary list and if non-empty iterating throug these and checking for a 0/0 chardev. Signed-off-by: Miklos Szeredi Fixes: 49c21e1cacd7 ("ovl: check whiteout while reading directory") Reported-by: Roman Yeryomin --- fs/overlayfs/readdir.c | 77 +++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 907870e81a72..70e9af551600 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -23,6 +23,7 @@ struct ovl_cache_entry { u64 ino; struct list_head l_node; struct rb_node node; + struct ovl_cache_entry *next_maybe_whiteout; bool is_whiteout; char name[]; }; @@ -39,7 +40,7 @@ struct ovl_readdir_data { struct rb_root root; struct list_head *list; struct list_head middle; - struct dentry *dir; + struct ovl_cache_entry *first_maybe_whiteout; int count; int err; }; @@ -79,7 +80,7 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, return NULL; } -static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, +static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, const char *name, int len, u64 ino, unsigned int d_type) { @@ -98,29 +99,8 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, p->is_whiteout = false; if (d_type == DT_CHR) { - struct dentry *dentry; - const struct cred *old_cred; - struct cred *override_cred; - - override_cred = prepare_creds(); - if (!override_cred) { - kfree(p); - return NULL; - } - - /* - * CAP_DAC_OVERRIDE for lookup - */ - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - old_cred = override_creds(override_cred); - - dentry = lookup_one_len(name, dir, len); - if (!IS_ERR(dentry)) { - p->is_whiteout = ovl_is_whiteout(dentry); - dput(dentry); - } - revert_creds(old_cred); - put_cred(override_cred); + p->next_maybe_whiteout = rdd->first_maybe_whiteout; + rdd->first_maybe_whiteout = p; } return p; } @@ -148,7 +128,7 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, return 0; } - p = ovl_cache_entry_new(rdd->dir, name, len, ino, d_type); + p = ovl_cache_entry_new(rdd, name, len, ino, d_type); if (p == NULL) return -ENOMEM; @@ -169,7 +149,7 @@ static int ovl_fill_lower(struct ovl_readdir_data *rdd, if (p) { list_move_tail(&p->l_node, &rdd->middle); } else { - p = ovl_cache_entry_new(rdd->dir, name, namelen, ino, d_type); + p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); if (p == NULL) rdd->err = -ENOMEM; else @@ -219,6 +199,43 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name, return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); } +static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) +{ + int err; + struct ovl_cache_entry *p; + struct dentry *dentry; + const struct cred *old_cred; + struct cred *override_cred; + + override_cred = prepare_creds(); + if (!override_cred) + return -ENOMEM; + + /* + * CAP_DAC_OVERRIDE for lookup + */ + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); + old_cred = override_creds(override_cred); + + err = mutex_lock_killable(&dir->d_inode->i_mutex); + if (!err) { + while (rdd->first_maybe_whiteout) { + p = rdd->first_maybe_whiteout; + rdd->first_maybe_whiteout = p->next_maybe_whiteout; + dentry = lookup_one_len(p->name, dir, p->len); + if (!IS_ERR(dentry)) { + p->is_whiteout = ovl_is_whiteout(dentry); + dput(dentry); + } + } + mutex_unlock(&dir->d_inode->i_mutex); + } + revert_creds(old_cred); + put_cred(override_cred); + + return err; +} + static inline int ovl_dir_read(struct path *realpath, struct ovl_readdir_data *rdd) { @@ -229,7 +246,7 @@ static inline int ovl_dir_read(struct path *realpath, if (IS_ERR(realfile)) return PTR_ERR(realfile); - rdd->dir = realpath->dentry; + rdd->first_maybe_whiteout = NULL; rdd->ctx.pos = 0; do { rdd->count = 0; @@ -238,6 +255,10 @@ static inline int ovl_dir_read(struct path *realpath, if (err >= 0) err = rdd->err; } while (!err && rdd->count); + + if (!err && rdd->first_maybe_whiteout) + err = ovl_check_whiteouts(realpath->dentry, rdd); + fput(realfile); return err; From 13d1e536b14ec2d404319a25e681a3287ca084ad Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 21 Jun 2015 12:41:16 +0900 Subject: [PATCH 197/839] perf top: Move toggling event logic into hists browser Current 'f' key action to enable/disable events won't work if there're more than one event since perf_evsel_menu__run() doesn't return the key. So move it to the hists browser loop so that it can be processed as like other key action, and it's more natural to handle it there IMHO. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1434858076-6533-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 24 +++--------------------- tools/perf/ui/browsers/hists.c | 19 +++++++++++++++++-- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 619a8696fda7..ecf319728f25 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -586,27 +586,9 @@ static void *display_thread_tui(void *arg) hists->uid_filter_str = top->record_opts.target.uid_str; } - while (true) { - int key = perf_evlist__tui_browse_hists(top->evlist, help, &hbt, - top->min_percent, - &top->session->header.env); - - if (key != 'f') - break; - - perf_evlist__toggle_enable(top->evlist); - /* - * No need to refresh, resort/decay histogram entries - * if we are not collecting samples: - */ - if (top->evlist->enabled) { - hbt.refresh = top->delay_secs; - help = "Press 'f' to disable the events or 'h' to see other hotkeys"; - } else { - help = "Press 'f' again to re-enable the events"; - hbt.refresh = 0; - } - } + perf_evlist__tui_browse_hists(top->evlist, help, &hbt, + top->min_percent, + &top->session->header.env); done = 1; return NULL; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index c42adb600091..7629bef2fd79 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1902,8 +1902,23 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, case CTRL('c'): goto out_free_stack; case 'f': - if (!is_report_browser(hbt)) - goto out_free_stack; + if (!is_report_browser(hbt)) { + struct perf_top *top = hbt->arg; + + perf_evlist__toggle_enable(top->evlist); + /* + * No need to refresh, resort/decay histogram + * entries if we are not collecting samples: + */ + if (top->evlist->enabled) { + helpline = "Press 'f' to disable the events or 'h' to see other hotkeys"; + hbt->refresh = delay_secs; + } else { + helpline = "Press 'f' again to re-enable the events"; + hbt->refresh = 0; + } + continue; + } /* Fall thru */ default: helpline = "Press '?' for help on key bindings"; From 502819c5f35ef44eb7151fb85cd883c5b76b506d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 22 Jun 2015 14:50:50 +0200 Subject: [PATCH 198/839] perf tests: Add testing for Makefile.perf Currently we test only builds through top level Makefile, but seems like there's a bunch of users using Makefile.perf directly. Changing the make suite to be run for Makefile.perf as well. It takes now considerable amount of time, but hopefully we catch more issues. Also fixing the output indentation for make_kernelsrc and make_kernelsrc_tools tests. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Lukas Wunner Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1434977452-32520-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/make | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 65280d28662e..bfe1962da0df 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -1,5 +1,16 @@ +ifndef MK +ifeq ($(MAKECMDGOALS),) +# no target specified, trigger the whole suite +all: + @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile + @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf +else +# run only specific test over 'Makefile' +%: + @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile $@ +endif +else PERF := . -MK := Makefile include config/Makefile.arch @@ -57,7 +68,12 @@ make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 # $(run) contains all available tests run := make_pure +# Targets 'clean all' can be run together only through top level +# Makefile because we detect clean target in Makefile.perf and +# disable features detection +ifeq ($(MK),Makefile) run += make_clean_all +endif run += make_python_perf_so run += make_debug run += make_no_libperl @@ -226,13 +242,13 @@ tarpkg: ( eval $$cmd ) >> $@ 2>&1 make_kernelsrc: - @echo " - make -C tools/perf" + @echo "- make -C tools/perf" $(call clean); \ (make -C ../.. tools/perf) > $@ 2>&1 && \ test -x perf && rm -f $@ || (cat $@ ; false) make_kernelsrc_tools: - @echo " - make -C /tools perf" + @echo "- make -C /tools perf" $(call clean); \ (make -C ../../tools perf) > $@ 2>&1 && \ test -x perf && rm -f $@ || (cat $@ ; false) @@ -244,3 +260,4 @@ out: $(run_O) @echo OK .PHONY: all $(run) $(run_O) tarpkg clean +endif # ifndef MK From eb30d2c5077b30f9f8e00eb2e5fadba18b148538 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 22 Jun 2015 14:50:51 +0200 Subject: [PATCH 199/839] perf tests: Add test for make install with prefix Lukas Wunner reported issue (and fix[1]) with 'make install prefix=...'. Adding automated test for this, so it wouldn't happen again. [1]: 75e84ab906ef ("perf tools: Fix build breakage if prefix= is specified") Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Lukas Wunner Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1434977452-32520-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/make | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/tests/make b/tools/perf/tests/make index bfe1962da0df..729112f4cfaa 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -58,6 +58,7 @@ make_install_man := install-man make_install_html := install-html make_install_info := install-info make_install_pdf := install-pdf +make_install_prefix := install prefix=/tmp/krava make_static := LDFLAGS=-static # all the NO_* variable combined @@ -99,6 +100,7 @@ run += make_util_map_o run += make_util_pmu_bison_o run += make_install run += make_install_bin +run += make_install_prefix # FIXME 'install-*' commented out till they're fixed # run += make_install_doc # run += make_install_man @@ -173,6 +175,12 @@ test_make_install_O := $(call test_dest_files,$(installed_files_all)) test_make_install_bin := $(call test_dest_files,$(installed_files_bin)) test_make_install_bin_O := $(call test_dest_files,$(installed_files_bin)) +# We prefix all installed files for make_install_prefix +# with '/tmp/krava' to match installed/prefix-ed files. +installed_files_all_prefix := $(addprefix /tmp/krava/,$(installed_files_all)) +test_make_install_prefix := $(call test_dest_files,$(installed_files_all_prefix)) +test_make_install_prefix_O := $(call test_dest_files,$(installed_files_all_prefix)) + # FIXME nothing gets installed test_make_install_man := test -f $$TMP_DEST/share/man/man1/perf.1 test_make_install_man_O := $(test_make_install_man) From 8e55735150934f9ab2ce8a8005626e5693a6b61d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 22 Jun 2015 14:50:52 +0200 Subject: [PATCH 200/839] perf build: Fix single target build dependency check Currently if we build a single target like: $ touch util/map.c && make util/map.o It will not rebuild util/map.o if it already exists and util/map.c is modified. The reason is that the top-level 'Makefile' processes util/map.o as an implicit rule and if util/map.o exists make considers the 'util/map.o' target as done and will not nest into Makefile.perf. Adding FORCE for '%', because that's what we want to nest into Makefile.perf for any target. Adding Makefile into phony targets, because make tries to rebuild it and it's also resolved as '%' target. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Lukas Wunner Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1434977452-32520-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index d31a7bbd7cee..480546d5f13b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -83,8 +83,8 @@ build-test: # # All other targets get passed through: # -%: +%: FORCE $(print_msg) $(make) -.PHONY: tags TAGS +.PHONY: tags TAGS FORCE Makefile From e998200c1912f22cf257ae88771e56de588ee8fb Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 22 Jun 2015 09:22:34 -0700 Subject: [PATCH 201/839] Input: imx_keypad - check for clk_prepare_enable() error clk_prepare_enable() may fail, so we should better check its return value and propagate it in the case of error. Signed-off-by: Fabio Estevam Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/imx_keypad.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/input/keyboard/imx_keypad.c b/drivers/input/keyboard/imx_keypad.c index 2e855e6f3565..d2ea863d6a45 100644 --- a/drivers/input/keyboard/imx_keypad.c +++ b/drivers/input/keyboard/imx_keypad.c @@ -506,7 +506,9 @@ static int imx_keypad_probe(struct platform_device *pdev) input_set_drvdata(input_dev, keypad); /* Ensure that the keypad will stay dormant until opened */ - clk_prepare_enable(keypad->clk); + error = clk_prepare_enable(keypad->clk); + if (error) + return error; imx_keypad_inhibit(keypad); clk_disable_unprepare(keypad->clk); From f992efbb0326ddf57c9d2f759c44a3339ab56a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sun, 21 Jun 2015 10:39:26 +0200 Subject: [PATCH 202/839] dell-laptop: Update information about wireless control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that all existing SMBIOS calls for wireless control are properly documented. This commit also add new documentation released by Dell. Signed-off-by: Pali Rohár Signed-off-by: Darren Hart --- drivers/platform/x86/dell-laptop.c | 158 ++++++++++++++++++++++------- 1 file changed, 119 insertions(+), 39 deletions(-) diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index 83e3d7f9d923..ab89103150b5 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -423,45 +423,125 @@ static inline int dell_smi_error(int value) } } -/* Derived from information in DellWirelessCtl.cpp: - Class 17, select 11 is radio control. It returns an array of 32-bit values. - - Input byte 0 = 0: Wireless information - - result[0]: return code - result[1]: - Bit 0: Hardware switch supported - Bit 1: Wifi locator supported - Bit 2: Wifi is supported - Bit 3: Bluetooth is supported - Bit 4: WWAN is supported - Bit 5: Wireless keyboard supported - Bits 6-7: Reserved - Bit 8: Wifi is installed - Bit 9: Bluetooth is installed - Bit 10: WWAN is installed - Bits 11-15: Reserved - Bit 16: Hardware switch is on - Bit 17: Wifi is blocked - Bit 18: Bluetooth is blocked - Bit 19: WWAN is blocked - Bits 20-31: Reserved - result[2]: NVRAM size in bytes - result[3]: NVRAM format version number - - Input byte 0 = 2: Wireless switch configuration - result[0]: return code - result[1]: - Bit 0: Wifi controlled by switch - Bit 1: Bluetooth controlled by switch - Bit 2: WWAN controlled by switch - Bits 3-6: Reserved - Bit 7: Wireless switch config locked - Bit 8: Wifi locator enabled - Bits 9-14: Reserved - Bit 15: Wifi locator setting locked - Bits 16-31: Reserved -*/ +/* + * Derived from information in smbios-wireless-ctl: + * + * cbSelect 17, Value 11 + * + * Return Wireless Info + * cbArg1, byte0 = 0x00 + * + * cbRes1 Standard return codes (0, -1, -2) + * cbRes2 Info bit flags: + * + * 0 Hardware switch supported (1) + * 1 WiFi locator supported (1) + * 2 WLAN supported (1) + * 3 Bluetooth (BT) supported (1) + * 4 WWAN supported (1) + * 5 Wireless KBD supported (1) + * 6 Uw b supported (1) + * 7 WiGig supported (1) + * 8 WLAN installed (1) + * 9 BT installed (1) + * 10 WWAN installed (1) + * 11 Uw b installed (1) + * 12 WiGig installed (1) + * 13-15 Reserved (0) + * 16 Hardware (HW) switch is On (1) + * 17 WLAN disabled (1) + * 18 BT disabled (1) + * 19 WWAN disabled (1) + * 20 Uw b disabled (1) + * 21 WiGig disabled (1) + * 20-31 Reserved (0) + * + * cbRes3 NVRAM size in bytes + * cbRes4, byte 0 NVRAM format version number + * + * + * Set QuickSet Radio Disable Flag + * cbArg1, byte0 = 0x01 + * cbArg1, byte1 + * Radio ID value: + * 0 Radio Status + * 1 WLAN ID + * 2 BT ID + * 3 WWAN ID + * 4 UWB ID + * 5 WIGIG ID + * cbArg1, byte2 Flag bits: + * 0 QuickSet disables radio (1) + * 1-7 Reserved (0) + * + * cbRes1 Standard return codes (0, -1, -2) + * cbRes2 QuickSet (QS) radio disable bit map: + * 0 QS disables WLAN + * 1 QS disables BT + * 2 QS disables WWAN + * 3 QS disables UWB + * 4 QS disables WIGIG + * 5-31 Reserved (0) + * + * Wireless Switch Configuration + * cbArg1, byte0 = 0x02 + * + * cbArg1, byte1 + * Subcommand: + * 0 Get config + * 1 Set config + * 2 Set WiFi locator enable/disable + * cbArg1,byte2 + * Switch settings (if byte 1==1): + * 0 WLAN sw itch control (1) + * 1 BT sw itch control (1) + * 2 WWAN sw itch control (1) + * 3 UWB sw itch control (1) + * 4 WiGig sw itch control (1) + * 5-7 Reserved (0) + * cbArg1, byte2 Enable bits (if byte 1==2): + * 0 Enable WiFi locator (1) + * + * cbRes1 Standard return codes (0, -1, -2) + * cbRes2 QuickSet radio disable bit map: + * 0 WLAN controlled by sw itch (1) + * 1 BT controlled by sw itch (1) + * 2 WWAN controlled by sw itch (1) + * 3 UWB controlled by sw itch (1) + * 4 WiGig controlled by sw itch (1) + * 5-6 Reserved (0) + * 7 Wireless sw itch config locked (1) + * 8 WiFi locator enabled (1) + * 9-14 Reserved (0) + * 15 WiFi locator setting locked (1) + * 16-31 Reserved (0) + * + * Read Local Config Data (LCD) + * cbArg1, byte0 = 0x10 + * cbArg1, byte1 NVRAM index low byte + * cbArg1, byte2 NVRAM index high byte + * cbRes1 Standard return codes (0, -1, -2) + * cbRes2 4 bytes read from LCD[index] + * cbRes3 4 bytes read from LCD[index+4] + * cbRes4 4 bytes read from LCD[index+8] + * + * Write Local Config Data (LCD) + * cbArg1, byte0 = 0x11 + * cbArg1, byte1 NVRAM index low byte + * cbArg1, byte2 NVRAM index high byte + * cbArg2 4 bytes to w rite at LCD[index] + * cbArg3 4 bytes to w rite at LCD[index+4] + * cbArg4 4 bytes to w rite at LCD[index+8] + * cbRes1 Standard return codes (0, -1, -2) + * + * Populate Local Config Data from NVRAM + * cbArg1, byte0 = 0x12 + * cbRes1 Standard return codes (0, -1, -2) + * + * Commit Local Config Data to NVRAM + * cbArg1, byte0 = 0x13 + * cbRes1 Standard return codes (0, -1, -2) + */ static int dell_rfkill_set(void *data, bool blocked) { From 2e19f93fecc502e68131e4433ef8d57ffa22bc3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sun, 21 Jun 2015 10:41:42 +0200 Subject: [PATCH 203/839] dell-laptop: Show info about WiGig and UWB in debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit show additional information about rfkill state in debugfs based on newly released documentation by Dell. Signed-off-by: Pali Rohár Signed-off-by: Darren Hart --- drivers/platform/x86/dell-laptop.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index ab89103150b5..9724613c28a6 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -629,12 +629,21 @@ static int dell_debugfs_show(struct seq_file *s, void *data) (status & BIT(4)) >> 4); seq_printf(s, "Bit 5 : Wireless keyboard supported: %lu\n", (status & BIT(5)) >> 5); + seq_printf(s, "Bit 6 : UWB supported: %lu\n", + (status & BIT(6)) >> 6); + seq_printf(s, "Bit 7 : WiGig supported: %lu\n", + (status & BIT(7)) >> 7); seq_printf(s, "Bit 8 : Wifi is installed: %lu\n", (status & BIT(8)) >> 8); seq_printf(s, "Bit 9 : Bluetooth is installed: %lu\n", (status & BIT(9)) >> 9); seq_printf(s, "Bit 10: WWAN is installed: %lu\n", (status & BIT(10)) >> 10); + seq_printf(s, "Bit 11: UWB installed: %lu\n", + (status & BIT(11)) >> 11); + seq_printf(s, "Bit 12: WiGig installed: %lu\n", + (status & BIT(12)) >> 12); + seq_printf(s, "Bit 16: Hardware switch is on: %lu\n", (status & BIT(16)) >> 16); seq_printf(s, "Bit 17: Wifi is blocked: %lu\n", @@ -643,6 +652,10 @@ static int dell_debugfs_show(struct seq_file *s, void *data) (status & BIT(18)) >> 18); seq_printf(s, "Bit 19: WWAN is blocked: %lu\n", (status & BIT(19)) >> 19); + seq_printf(s, "Bit 20: UWB is blocked: %lu\n", + (status & BIT(20)) >> 20); + seq_printf(s, "Bit 21: WiGig is blocked: %lu\n", + (status & BIT(21)) >> 21); seq_printf(s, "\nhwswitch_state:\t0x%X\n", hwswitch_state); seq_printf(s, "Bit 0 : Wifi controlled by switch: %lu\n", @@ -651,6 +664,10 @@ static int dell_debugfs_show(struct seq_file *s, void *data) (hwswitch_state & BIT(1)) >> 1); seq_printf(s, "Bit 2 : WWAN controlled by switch: %lu\n", (hwswitch_state & BIT(2)) >> 2); + seq_printf(s, "Bit 3 : UWB controlled by switch: %lu\n", + (hwswitch_state & BIT(3)) >> 3); + seq_printf(s, "Bit 4 : WiGig controlled by switch: %lu\n", + (hwswitch_state & BIT(4)) >> 4); seq_printf(s, "Bit 7 : Wireless switch config locked: %lu\n", (hwswitch_state & BIT(7)) >> 7); seq_printf(s, "Bit 8 : Wifi locator enabled: %lu\n", From 75b7f05d2798ee3a1cc5bbdd54acd0e318a80396 Mon Sep 17 00:00:00 2001 From: "Pierre-Loup A. Griffais" Date: Mon, 22 Jun 2015 14:10:36 -0700 Subject: [PATCH 204/839] Input: xpad - set the LEDs properly on XBox Wireless controllers Based on Patch by Pierre-Loup A. Griffais : Add the logic to set the LEDs on XBox Wireless controllers. Command sequence found by sniffing the Windows data stream when plugging the device in. Updated based on comments on linux-input: unify codepaths in xpad_send_led_command for wired/ wireless controller. Also document command values for clarification. All values tested on Xbox 360 Wireless Controller. Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 50 +++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 61c761156371..6f8755e2c491 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -881,17 +881,57 @@ struct xpad_led { struct usb_xpad *xpad; }; +/** + * @param command + * 0: off + * 1: all blink, then previous setting + * 2: 1/top-left blink, then on + * 3: 2/top-right blink, then on + * 4: 3/bottom-left blink, then on + * 5: 4/bottom-right blink, then on + * 6: 1/top-left on + * 7: 2/top-right on + * 8: 3/bottom-left on + * 9: 4/bottom-right on + * 10: rotate + * 11: blink, based on previous setting + * 12: slow blink, based on previous setting + * 13: rotate with two lights + * 14: persistent slow all blink + * 15: blink once, then previous setting + */ static void xpad_send_led_command(struct usb_xpad *xpad, int command) { - if (command >= 0 && command < 14) { - mutex_lock(&xpad->odata_mutex); + command %= 16; + + mutex_lock(&xpad->odata_mutex); + + switch (xpad->xtype) { + case XTYPE_XBOX360: xpad->odata[0] = 0x01; xpad->odata[1] = 0x03; xpad->odata[2] = command; xpad->irq_out->transfer_buffer_length = 3; - usb_submit_urb(xpad->irq_out, GFP_KERNEL); - mutex_unlock(&xpad->odata_mutex); + break; + case XTYPE_XBOX360W: + xpad->odata[0] = 0x00; + xpad->odata[1] = 0x00; + xpad->odata[2] = 0x08; + xpad->odata[3] = 0x40 + command; + xpad->odata[4] = 0x00; + xpad->odata[5] = 0x00; + xpad->odata[6] = 0x00; + xpad->odata[7] = 0x00; + xpad->odata[8] = 0x00; + xpad->odata[9] = 0x00; + xpad->odata[10] = 0x00; + xpad->odata[11] = 0x00; + xpad->irq_out->transfer_buffer_length = 12; + break; } + + usb_submit_urb(xpad->irq_out, GFP_KERNEL); + mutex_unlock(&xpad->odata_mutex); } static void xpad_led_set(struct led_classdev *led_cdev, @@ -911,7 +951,7 @@ static int xpad_led_probe(struct usb_xpad *xpad) struct led_classdev *led_cdev; int error; - if (xpad->xtype != XTYPE_XBOX360) + if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX360W) return 0; xpad->led = led = kzalloc(sizeof(struct xpad_led), GFP_KERNEL); From cae705baa40b2c20de8f9bb79ef4bcc6b2418c7c Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Mon, 22 Jun 2015 14:11:30 -0700 Subject: [PATCH 205/839] Input: xpad - re-send LED command on present event The controller only receives commands when its present. So for the correct LED to be lit the LED command has to be sent on the present event. Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 6f8755e2c491..f8850f9cb331 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -344,6 +344,7 @@ struct usb_xpad { int mapping; /* map d-pad to buttons or to axes */ int xtype; /* type of xbox device */ + unsigned long led_no; /* led to lit on xbox360 controllers */ }; /* @@ -488,6 +489,8 @@ static void xpad360_process_packet(struct usb_xpad *xpad, input_sync(dev); } +static void xpad_identify_controller(struct usb_xpad *xpad); + /* * xpad360w_process_packet * @@ -510,6 +513,11 @@ static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned cha if (data[1] & 0x80) { xpad->pad_present = 1; usb_submit_urb(xpad->bulk_out, GFP_ATOMIC); + /* + * Light up the segment corresponding to + * controller number. + */ + xpad_identify_controller(xpad); } else xpad->pad_present = 0; } @@ -934,6 +942,12 @@ static void xpad_send_led_command(struct usb_xpad *xpad, int command) mutex_unlock(&xpad->odata_mutex); } +static void xpad_identify_controller(struct usb_xpad *xpad) +{ + /* Light up the segment corresponding to controller number */ + xpad_send_led_command(xpad, (xpad->led_no % 4) + 2); +} + static void xpad_led_set(struct led_classdev *led_cdev, enum led_brightness value) { @@ -945,8 +959,7 @@ static void xpad_led_set(struct led_classdev *led_cdev, static int xpad_led_probe(struct usb_xpad *xpad) { - static atomic_t led_seq = ATOMIC_INIT(-1); - unsigned long led_no; + static atomic_t led_seq = ATOMIC_INIT(-1); struct xpad_led *led; struct led_classdev *led_cdev; int error; @@ -958,9 +971,9 @@ static int xpad_led_probe(struct usb_xpad *xpad) if (!led) return -ENOMEM; - led_no = atomic_inc_return(&led_seq); + xpad->led_no = atomic_inc_return(&led_seq); - snprintf(led->name, sizeof(led->name), "xpad%lu", led_no); + snprintf(led->name, sizeof(led->name), "xpad%lu", xpad->led_no); led->xpad = xpad; led_cdev = &led->led_cdev; @@ -974,10 +987,8 @@ static int xpad_led_probe(struct usb_xpad *xpad) return error; } - /* - * Light up the segment corresponding to controller number - */ - xpad_send_led_command(xpad, (led_no % 4) + 2); + /* Light up the segment corresponding to controller number */ + xpad_identify_controller(xpad); return 0; } @@ -994,6 +1005,7 @@ static void xpad_led_disconnect(struct usb_xpad *xpad) #else static int xpad_led_probe(struct usb_xpad *xpad) { return 0; } static void xpad_led_disconnect(struct usb_xpad *xpad) { } +static void xpad_identify_controller(struct usb_xpad *xpad) { } #endif From 1adff1b3a7f75a1c255b7fcab5676edf29d4a5d8 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 22 Jun 2015 23:44:05 -0700 Subject: [PATCH 206/839] target: Convert se_lun->lun_deve_lock to normal spinlock This patch converts se_lun->lun_deve_lock acquire/release access to use a normal, non bottom-half spin_lock_t for protecting se_lun->lun_deve_list access. Reported-by: Christoph Hellwig Cc: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 4 ++-- drivers/target/target_core_device.c | 12 ++++++------ drivers/target/target_core_pr.c | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index aa2e4b103d43..c56ae024c42e 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -968,7 +968,7 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp) continue; spin_unlock(&tg_pt_gp->tg_pt_gp_lock); - spin_lock_bh(&lun->lun_deve_lock); + spin_lock(&lun->lun_deve_lock); list_for_each_entry(se_deve, &lun->lun_deve_list, lun_link) { lacl = rcu_dereference_check(se_deve->se_lun_acl, lockdep_is_held(&lun->lun_deve_lock)); @@ -1000,7 +1000,7 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp) core_scsi3_ua_allocate(se_deve, 0x2A, ASCQ_2AH_ASYMMETRIC_ACCESS_STATE_CHANGED); } - spin_unlock_bh(&lun->lun_deve_lock); + spin_unlock(&lun->lun_deve_lock); spin_lock(&tg_pt_gp->tg_pt_gp_lock); percpu_ref_put(&lun->lun_ref); diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index ed084023e7d4..b6df5b9a6890 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -352,10 +352,10 @@ int core_enable_device_list_for_node( hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist); mutex_unlock(&nacl->lun_entry_mutex); - spin_lock_bh(&lun->lun_deve_lock); + spin_lock(&lun->lun_deve_lock); list_del(&orig->lun_link); list_add_tail(&new->lun_link, &lun->lun_deve_list); - spin_unlock_bh(&lun->lun_deve_lock); + spin_unlock(&lun->lun_deve_lock); kref_put(&orig->pr_kref, target_pr_kref_release); wait_for_completion(&orig->pr_comp); @@ -369,9 +369,9 @@ int core_enable_device_list_for_node( hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist); mutex_unlock(&nacl->lun_entry_mutex); - spin_lock_bh(&lun->lun_deve_lock); + spin_lock(&lun->lun_deve_lock); list_add_tail(&new->lun_link, &lun->lun_deve_list); - spin_unlock_bh(&lun->lun_deve_lock); + spin_unlock(&lun->lun_deve_lock); return 0; } @@ -403,9 +403,9 @@ void core_disable_device_list_for_node( * NodeACL context specific PR metadata for demo-mode * MappedLUN *deve will be released below.. */ - spin_lock_bh(&lun->lun_deve_lock); + spin_lock(&lun->lun_deve_lock); list_del(&orig->lun_link); - spin_unlock_bh(&lun->lun_deve_lock); + spin_unlock(&lun->lun_deve_lock); /* * Disable struct se_dev_entry LUN ACL mapping */ diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 0bb329243dba..7403b03d428f 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -709,7 +709,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( continue; spin_unlock(&dev->se_port_lock); - spin_lock_bh(&lun_tmp->lun_deve_lock); + spin_lock(&lun_tmp->lun_deve_lock); list_for_each_entry(deve_tmp, &lun_tmp->lun_deve_list, lun_link) { /* * This pointer will be NULL for demo mode MappedLUNs @@ -742,7 +742,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( continue; kref_get(&deve_tmp->pr_kref); - spin_unlock_bh(&lun_tmp->lun_deve_lock); + spin_unlock(&lun_tmp->lun_deve_lock); /* * Grab a configfs group dependency that is released * for the exception path at label out: below, or upon @@ -779,9 +779,9 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( list_add_tail(&pr_reg_atp->pr_reg_atp_mem_list, &pr_reg->pr_reg_atp_list); - spin_lock_bh(&lun_tmp->lun_deve_lock); + spin_lock(&lun_tmp->lun_deve_lock); } - spin_unlock_bh(&lun_tmp->lun_deve_lock); + spin_unlock(&lun_tmp->lun_deve_lock); spin_lock(&dev->se_port_lock); percpu_ref_put(&lun_tmp->lun_ref); From 3dd348fcaa407181585fcadc04b0cac6e31f9be0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 11 Jun 2015 10:01:27 +0200 Subject: [PATCH 207/839] target: Send UA on ALUA target port group change When the ALUA target port group changes an INQUIRY DATA CHANGE UA needs to be sent. Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 7 +++++++ drivers/target/target_core_ua.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index c56ae024c42e..744f308053e0 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1880,12 +1880,19 @@ static void core_alua_put_tg_pt_gp_from_name( static void __target_attach_tg_pt_gp(struct se_lun *lun, struct t10_alua_tg_pt_gp *tg_pt_gp) { + struct se_dev_entry *se_deve; + assert_spin_locked(&lun->lun_tg_pt_gp_lock); spin_lock(&tg_pt_gp->tg_pt_gp_lock); lun->lun_tg_pt_gp = tg_pt_gp; list_add_tail(&lun->lun_tg_pt_gp_link, &tg_pt_gp->tg_pt_gp_lun_list); tg_pt_gp->tg_pt_gp_members++; + spin_lock(&lun->lun_deve_lock); + list_for_each_entry(se_deve, &lun->lun_deve_list, lun_link) + core_scsi3_ua_allocate(se_deve, 0x3f, + ASCQ_3FH_INQUIRY_DATA_HAS_CHANGED); + spin_unlock(&lun->lun_deve_lock); spin_unlock(&tg_pt_gp->tg_pt_gp_lock); } diff --git a/drivers/target/target_core_ua.h b/drivers/target/target_core_ua.h index 96460bff490f..59278d662e66 100644 --- a/drivers/target/target_core_ua.h +++ b/drivers/target/target_core_ua.h @@ -25,6 +25,8 @@ #define ASCQ_2CH_PREVIOUS_RESERVATION_CONFLICT_STATUS 0x09 +#define ASCQ_3FH_INQUIRY_DATA_HAS_CHANGED 0x03 + extern struct kmem_cache *se_ua_cache; extern sense_reason_t target_scsi3_ua_check(struct se_cmd *); From b5aafb16ebdb713e9e5ac60db2940fe4baf6328f Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 11 Jun 2015 10:01:28 +0200 Subject: [PATCH 208/839] target: Send UA upon LUN RESET tmr completion SAM mandates that an BUS DEVICE RESET FUNCTION OCCURRED UA needs to be send after a LUN RESET tmr has completed. (Update to use target_ua_allocate_lun - hch) Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 0364534f8d46..e5088c059b4f 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2983,6 +2983,11 @@ static void target_tmr_work(struct work_struct *work) ret = core_tmr_lun_reset(dev, tmr, NULL, NULL); tmr->response = (!ret) ? TMR_FUNCTION_COMPLETE : TMR_FUNCTION_REJECTED; + if (tmr->response == TMR_FUNCTION_COMPLETE) { + target_ua_allocate_lun(cmd->se_sess->se_node_acl, + cmd->orig_fe_lun, 0x29, + ASCQ_29H_BUS_DEVICE_RESET_FUNCTION_OCCURRED); + } break; case TMR_TARGET_WARM_RESET: tmr->response = TMR_FUNCTION_REJECTED; From 7c0d0d51d26497866d2951a35f1736fc765e4fcf Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 11 Jun 2015 10:01:29 +0200 Subject: [PATCH 209/839] target: Send UA when changing LUN inventory When changind the LUN inventory via core_enable_device_list_for_node() or core_disable_device_list_for_node() a REPORTED LUNS DATA HAS CHANGED UA should be send. (Convert to target_luns_data_has_changed helper usage - hch) Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 23 +++++++++++++++++++---- drivers/target/target_core_ua.h | 1 + 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index b6df5b9a6890..52448483ed9b 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -293,10 +293,22 @@ void target_pr_kref_release(struct kref *kref) complete(&deve->pr_comp); } -/* core_enable_device_list_for_node(): - * - * - */ +static void +target_luns_data_has_changed(struct se_node_acl *nacl, struct se_dev_entry *new, + bool skip_new) +{ + struct se_dev_entry *tmp; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp, &nacl->lun_entry_hlist, link) { + if (skip_new && tmp == new) + continue; + core_scsi3_ua_allocate(tmp, 0x3F, + ASCQ_3FH_REPORTED_LUNS_DATA_HAS_CHANGED); + } + rcu_read_unlock(); +} + int core_enable_device_list_for_node( struct se_lun *lun, struct se_lun_acl *lun_acl, @@ -360,6 +372,7 @@ int core_enable_device_list_for_node( kref_put(&orig->pr_kref, target_pr_kref_release); wait_for_completion(&orig->pr_comp); + target_luns_data_has_changed(nacl, new, true); kfree_rcu(orig, rcu_head); return 0; } @@ -373,6 +386,7 @@ int core_enable_device_list_for_node( list_add_tail(&new->lun_link, &lun->lun_deve_list); spin_unlock(&lun->lun_deve_lock); + target_luns_data_has_changed(nacl, new, true); return 0; } @@ -428,6 +442,7 @@ void core_disable_device_list_for_node( kfree_rcu(orig, rcu_head); core_scsi3_free_pr_reg_from_nacl(dev, nacl); + target_luns_data_has_changed(nacl, NULL, false); } /* core_clear_lun_from_tpg(): diff --git a/drivers/target/target_core_ua.h b/drivers/target/target_core_ua.h index 59278d662e66..bd6e78ba153d 100644 --- a/drivers/target/target_core_ua.h +++ b/drivers/target/target_core_ua.h @@ -26,6 +26,7 @@ #define ASCQ_2CH_PREVIOUS_RESERVATION_CONFLICT_STATUS 0x09 #define ASCQ_3FH_INQUIRY_DATA_HAS_CHANGED 0x03 +#define ASCQ_3FH_REPORTED_LUNS_DATA_HAS_CHANGED 0x0E extern struct kmem_cache *se_ua_cache; From e2e21bd8f979a24462070cc89fae11e819cae90a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 11 Jun 2015 19:58:34 +0300 Subject: [PATCH 210/839] target/user: Fix inconsistent kmap_atomic/kunmap_atomic Pointers that are mapped by kmap_atomic() + offset must be unmapped without the offset. That would cause problems if the SG element length exceeds the PAGE_SIZE limit. Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 949e6165ef8a..078ef6e3eb70 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -260,7 +260,8 @@ static void alloc_and_scatter_data_area(struct tcmu_dev *udev, /* Uh oh, we wrapped the buffer. Must split sg across 2 iovs. */ if (sg->length != copy_bytes) { - from += copy_bytes; + void *from_skip = from + copy_bytes; + copy_bytes = sg->length - copy_bytes; (*iov)->iov_len = copy_bytes; @@ -270,7 +271,7 @@ static void alloc_and_scatter_data_area(struct tcmu_dev *udev, if (copy_data) { to = (void *) udev->mb_addr + udev->data_off + udev->data_head; - memcpy(to, from, copy_bytes); + memcpy(to, from_skip, copy_bytes); tcmu_flush_dcache_range(to, copy_bytes); } @@ -281,7 +282,7 @@ static void alloc_and_scatter_data_area(struct tcmu_dev *udev, copy_bytes, udev->data_size); } - kunmap_atomic(from); + kunmap_atomic(from - sg->offset); } } @@ -309,18 +310,19 @@ static void gather_and_free_data_area(struct tcmu_dev *udev, /* Uh oh, wrapped the data buffer for this sg's data */ if (sg->length != copy_bytes) { + void *to_skip = to + copy_bytes; + from = (void *) udev->mb_addr + udev->data_off + udev->data_tail; WARN_ON(udev->data_tail); - to += copy_bytes; copy_bytes = sg->length - copy_bytes; tcmu_flush_dcache_range(from, copy_bytes); - memcpy(to, from, copy_bytes); + memcpy(to_skip, from, copy_bytes); UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size); } - kunmap_atomic(to); + kunmap_atomic(to - sg->offset); } } From 7a971b1b3055f0e76ff09b8fd0dd809ea3b48279 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Jun 2015 15:10:58 +0200 Subject: [PATCH 211/839] target: replace se_cmd->execute_rw with a protocol_data field Instead of leaking this SBC read/write implementation detail just add an opaqueue protocol specific pointer to struct se_cmd that we can assign the sbc_ops vector to. Signed-off-by: Christoph Hellwig Reviewed-by: Andy Grover Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_sbc.c | 20 +++++++------------- include/target/target_core_base.h | 3 +-- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 31b2ae356120..287843e19275 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -381,7 +381,9 @@ out: static sense_reason_t sbc_execute_rw(struct se_cmd *cmd) { - return cmd->execute_rw(cmd, cmd->t_data_sg, cmd->t_data_nents, + struct sbc_ops *ops = cmd->protocol_data; + + return ops->execute_rw(cmd, cmd->t_data_sg, cmd->t_data_nents, cmd->data_direction); } @@ -560,6 +562,7 @@ out: static sense_reason_t sbc_compare_and_write(struct se_cmd *cmd) { + struct sbc_ops *ops = cmd->protocol_data; struct se_device *dev = cmd->se_dev; sense_reason_t ret; int rc; @@ -579,7 +582,7 @@ sbc_compare_and_write(struct se_cmd *cmd) */ cmd->data_length = cmd->t_task_nolb * dev->dev_attrib.block_size; - ret = cmd->execute_rw(cmd, cmd->t_bidi_data_sg, cmd->t_bidi_data_nents, + ret = ops->execute_rw(cmd, cmd->t_bidi_data_sg, cmd->t_bidi_data_nents, DMA_FROM_DEVICE); if (ret) { cmd->transport_complete_callback = NULL; @@ -766,12 +769,13 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) u32 sectors = 0; sense_reason_t ret; + cmd->protocol_data = ops; + switch (cdb[0]) { case READ_6: sectors = transport_get_sectors_6(cdb); cmd->t_task_lba = transport_lba_21(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case READ_10: @@ -786,7 +790,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case READ_12: @@ -801,7 +804,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case READ_16: @@ -816,14 +818,12 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case WRITE_6: sectors = transport_get_sectors_6(cdb); cmd->t_task_lba = transport_lba_21(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case WRITE_10: @@ -839,7 +839,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case WRITE_12: @@ -854,7 +853,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case WRITE_16: @@ -869,7 +867,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case XDWRITEREAD_10: @@ -887,7 +884,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) /* * Setup BIDI XOR callback to be run after I/O completion. */ - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; cmd->transport_complete_callback = &xdreadwrite_callback; break; @@ -911,7 +907,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) * Setup BIDI XOR callback to be run during after I/O * completion. */ - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; cmd->transport_complete_callback = &xdreadwrite_callback; break; @@ -955,7 +950,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) cmd->t_task_lba = get_unaligned_be64(&cdb[2]); cmd->t_task_nolb = sectors; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB | SCF_COMPARE_AND_WRITE; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_compare_and_write; cmd->transport_complete_callback = compare_and_write_callback; break; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 8a4f861f12b7..273818403a3e 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -490,9 +490,8 @@ struct se_cmd { struct kref cmd_kref; const struct target_core_fabric_ops *se_tfo; sense_reason_t (*execute_cmd)(struct se_cmd *); - sense_reason_t (*execute_rw)(struct se_cmd *, struct scatterlist *, - u32, enum dma_data_direction); sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool); + void *protocol_data; unsigned char *t_task_cdb; unsigned char __t_task_cdb[TCM_MAX_COMMAND_SIZE]; From 62e4694256dd3cbe301ebc0da799df8779b97014 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Jun 2015 15:10:59 +0200 Subject: [PATCH 212/839] target: simplify UNMAP handling Move a little more processing into the core code, and lift the previous do_unmap callback into the sbc_ops structure. Signed-off-by: Christoph Hellwig Reviewed-by: Andy Grover Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_file.c | 17 +++-------------- drivers/target/target_core_iblock.c | 16 +++------------- drivers/target/target_core_sbc.c | 14 ++++++-------- include/target/target_core_backend.h | 7 ++----- 4 files changed, 14 insertions(+), 40 deletions(-) diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 238e3a256d04..ced8c4fdc23d 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -475,9 +475,9 @@ fd_do_prot_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) } static sense_reason_t -fd_do_unmap(struct se_cmd *cmd, void *priv, sector_t lba, sector_t nolb) +fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) { - struct file *file = priv; + struct file *file = FD_DEV(cmd->se_dev)->fd_file; struct inode *inode = file->f_mapping->host; int ret; @@ -521,9 +521,6 @@ fd_do_unmap(struct se_cmd *cmd, void *priv, sector_t lba, sector_t nolb) static sense_reason_t fd_execute_write_same_unmap(struct se_cmd *cmd) { - struct se_device *se_dev = cmd->se_dev; - struct fd_dev *fd_dev = FD_DEV(se_dev); - struct file *file = fd_dev->fd_file; sector_t lba = cmd->t_task_lba; sector_t nolb = sbc_get_write_same_sectors(cmd); sense_reason_t ret; @@ -533,7 +530,7 @@ fd_execute_write_same_unmap(struct se_cmd *cmd) return 0; } - ret = fd_do_unmap(cmd, file, lba, nolb); + ret = fd_execute_unmap(cmd, lba, nolb); if (ret) return ret; @@ -541,14 +538,6 @@ fd_execute_write_same_unmap(struct se_cmd *cmd) return 0; } -static sense_reason_t -fd_execute_unmap(struct se_cmd *cmd) -{ - struct file *file = FD_DEV(cmd->se_dev)->fd_file; - - return sbc_execute_unmap(cmd, fd_do_unmap, file); -} - static sense_reason_t fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, enum dma_data_direction data_direction) diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index ae8ad2da6632..0a2308891c81 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -416,10 +416,9 @@ iblock_execute_sync_cache(struct se_cmd *cmd) } static sense_reason_t -iblock_do_unmap(struct se_cmd *cmd, void *priv, - sector_t lba, sector_t nolb) +iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) { - struct block_device *bdev = priv; + struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd; int ret; ret = blkdev_issue_discard(bdev, lba, nolb, GFP_KERNEL, 0); @@ -431,23 +430,14 @@ iblock_do_unmap(struct se_cmd *cmd, void *priv, return 0; } -static sense_reason_t -iblock_execute_unmap(struct se_cmd *cmd) -{ - struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd; - - return sbc_execute_unmap(cmd, iblock_do_unmap, bdev); -} - static sense_reason_t iblock_execute_write_same_unmap(struct se_cmd *cmd) { - struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd; sector_t lba = cmd->t_task_lba; sector_t nolb = sbc_get_write_same_sectors(cmd); sense_reason_t ret; - ret = iblock_do_unmap(cmd, bdev, lba, nolb); + ret = iblock_execute_unmap(cmd, lba, nolb); if (ret) return ret; diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 287843e19275..c16a66698725 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -38,6 +38,7 @@ static sense_reason_t sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char *, u32, bool); +static sense_reason_t sbc_execute_unmap(struct se_cmd *cmd); static sense_reason_t sbc_emulate_readcapacity(struct se_cmd *cmd) @@ -999,7 +1000,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return TCM_UNSUPPORTED_SCSI_OPCODE; } size = get_unaligned_be16(&cdb[7]); - cmd->execute_cmd = ops->execute_unmap; + cmd->execute_cmd = sbc_execute_unmap; break; case WRITE_SAME_16: sectors = transport_get_sectors_16(cdb); @@ -1087,12 +1088,10 @@ u32 sbc_get_device_type(struct se_device *dev) } EXPORT_SYMBOL(sbc_get_device_type); -sense_reason_t -sbc_execute_unmap(struct se_cmd *cmd, - sense_reason_t (*do_unmap_fn)(struct se_cmd *, void *, - sector_t, sector_t), - void *priv) +static sense_reason_t +sbc_execute_unmap(struct se_cmd *cmd) { + struct sbc_ops *ops = cmd->protocol_data; struct se_device *dev = cmd->se_dev; unsigned char *buf, *ptr = NULL; sector_t lba; @@ -1156,7 +1155,7 @@ sbc_execute_unmap(struct se_cmd *cmd, goto err; } - ret = do_unmap_fn(cmd, priv, lba, range); + ret = ops->execute_unmap(cmd, lba, range); if (ret) goto err; @@ -1170,7 +1169,6 @@ err: target_complete_cmd(cmd, GOOD); return ret; } -EXPORT_SYMBOL(sbc_execute_unmap); void sbc_dif_generate(struct se_cmd *cmd) diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 566c7d27f6d0..ff399369a21a 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -50,7 +50,8 @@ struct sbc_ops { sense_reason_t (*execute_sync_cache)(struct se_cmd *cmd); sense_reason_t (*execute_write_same)(struct se_cmd *cmd); sense_reason_t (*execute_write_same_unmap)(struct se_cmd *cmd); - sense_reason_t (*execute_unmap)(struct se_cmd *cmd); + sense_reason_t (*execute_unmap)(struct se_cmd *cmd, + sector_t lba, sector_t nolb); }; int transport_backend_register(const struct target_backend_ops *); @@ -68,10 +69,6 @@ sense_reason_t sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops); u32 sbc_get_device_rev(struct se_device *dev); u32 sbc_get_device_type(struct se_device *dev); sector_t sbc_get_write_same_sectors(struct se_cmd *cmd); -sense_reason_t sbc_execute_unmap(struct se_cmd *cmd, - sense_reason_t (*do_unmap_fn)(struct se_cmd *cmd, void *priv, - sector_t lba, sector_t nolb), - void *priv); void sbc_dif_generate(struct se_cmd *); sense_reason_t sbc_dif_verify(struct se_cmd *, sector_t, unsigned int, unsigned int, struct scatterlist *, int); From b753d643555e548163adfa2de9d75e0257e4b356 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Jun 2015 15:11:00 +0200 Subject: [PATCH 213/839] target: implement WRITE_SAME with UNMAP bit using ->execute_unmap Signed-off-by: Christoph Hellwig Reviewed-by: Andy Grover Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_file.c | 21 --------------------- drivers/target/target_core_iblock.c | 16 ---------------- drivers/target/target_core_sbc.c | 21 +++++++++++++++++++-- include/target/target_core_backend.h | 1 - 4 files changed, 19 insertions(+), 40 deletions(-) diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index ced8c4fdc23d..f5da2c1891b5 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -518,26 +518,6 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) return 0; } -static sense_reason_t -fd_execute_write_same_unmap(struct se_cmd *cmd) -{ - sector_t lba = cmd->t_task_lba; - sector_t nolb = sbc_get_write_same_sectors(cmd); - sense_reason_t ret; - - if (!nolb) { - target_complete_cmd(cmd, SAM_STAT_GOOD); - return 0; - } - - ret = fd_execute_unmap(cmd, lba, nolb); - if (ret) - return ret; - - target_complete_cmd(cmd, GOOD); - return 0; -} - static sense_reason_t fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, enum dma_data_direction data_direction) @@ -827,7 +807,6 @@ static struct sbc_ops fd_sbc_ops = { .execute_rw = fd_execute_rw, .execute_sync_cache = fd_execute_sync_cache, .execute_write_same = fd_execute_write_same, - .execute_write_same_unmap = fd_execute_write_same_unmap, .execute_unmap = fd_execute_unmap, }; diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 0a2308891c81..a869022b136e 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -430,21 +430,6 @@ iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) return 0; } -static sense_reason_t -iblock_execute_write_same_unmap(struct se_cmd *cmd) -{ - sector_t lba = cmd->t_task_lba; - sector_t nolb = sbc_get_write_same_sectors(cmd); - sense_reason_t ret; - - ret = iblock_execute_unmap(cmd, lba, nolb); - if (ret) - return ret; - - target_complete_cmd(cmd, GOOD); - return 0; -} - static sense_reason_t iblock_execute_write_same(struct se_cmd *cmd) { @@ -836,7 +821,6 @@ static struct sbc_ops iblock_sbc_ops = { .execute_rw = iblock_execute_rw, .execute_sync_cache = iblock_execute_sync_cache, .execute_write_same = iblock_execute_write_same, - .execute_write_same_unmap = iblock_execute_write_same_unmap, .execute_unmap = iblock_execute_unmap, }; diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index c16a66698725..9a5e7d094a5d 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -177,6 +177,23 @@ sector_t sbc_get_write_same_sectors(struct se_cmd *cmd) } EXPORT_SYMBOL(sbc_get_write_same_sectors); +static sense_reason_t +sbc_execute_write_same_unmap(struct se_cmd *cmd) +{ + struct sbc_ops *ops = cmd->protocol_data; + sector_t nolb = sbc_get_write_same_sectors(cmd); + sense_reason_t ret; + + if (nolb) { + ret = ops->execute_unmap(cmd, cmd->t_task_lba, nolb); + if (ret) + return ret; + } + + target_complete_cmd(cmd, GOOD); + return 0; +} + static sense_reason_t sbc_emulate_noop(struct se_cmd *cmd) { @@ -300,7 +317,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o * translated into block discard requests within backend code. */ if (flags[0] & 0x08) { - if (!ops->execute_write_same_unmap) + if (!ops->execute_unmap) return TCM_UNSUPPORTED_SCSI_OPCODE; if (!dev->dev_attrib.emulate_tpws) { @@ -308,7 +325,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o " has emulate_tpws disabled\n"); return TCM_UNSUPPORTED_SCSI_OPCODE; } - cmd->execute_cmd = ops->execute_write_same_unmap; + cmd->execute_cmd = sbc_execute_write_same_unmap; return 0; } if (!ops->execute_write_same) diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index ff399369a21a..1e5c8f949bae 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -49,7 +49,6 @@ struct sbc_ops { u32, enum dma_data_direction); sense_reason_t (*execute_sync_cache)(struct se_cmd *cmd); sense_reason_t (*execute_write_same)(struct se_cmd *cmd); - sense_reason_t (*execute_write_same_unmap)(struct se_cmd *cmd); sense_reason_t (*execute_unmap)(struct se_cmd *cmd, sector_t lba, sector_t nolb); }; From ce8dd25d0ebb1d868802e1d1c770f27f2249fae4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Jun 2015 15:14:39 +0200 Subject: [PATCH 214/839] target: consolidate version defines Signed-off-by: Christoph Hellwig Reviewed-by: Andy Grover Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_configfs.c | 6 +++--- drivers/target/target_core_file.c | 2 +- drivers/target/target_core_iblock.c | 2 +- drivers/target/target_core_pscsi.c | 2 +- drivers/target/target_core_rd.c | 2 +- include/target/target_core_base.h | 3 +-- include/target/target_core_configfs.h | 1 - 7 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 68addbc7eb7b..6003921a1c56 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -103,7 +103,7 @@ static ssize_t target_core_attr_show(struct config_item *item, char *page) { return sprintf(page, "Target Engine Core ConfigFS Infrastructure %s" - " on %s/%s on "UTS_RELEASE"\n", TARGET_CORE_CONFIGFS_VERSION, + " on %s/%s on "UTS_RELEASE"\n", TARGET_CORE_VERSION, utsname()->sysname, utsname()->machine); } @@ -3235,7 +3235,7 @@ static ssize_t target_core_hba_show_attr_hba_info( { return sprintf(page, "HBA Index: %d plugin: %s version: %s\n", hba->hba_id, hba->backend->ops->name, - TARGET_CORE_CONFIGFS_VERSION); + TARGET_CORE_VERSION); } SE_HBA_ATTR_RO(hba_info); @@ -3507,7 +3507,7 @@ static int __init target_core_init_configfs(void) goto out_global; } pr_debug("TARGET_CORE[0]: Initialized ConfigFS Fabric" - " Infrastructure: "TARGET_CORE_CONFIGFS_VERSION" on %s/%s" + " Infrastructure: "TARGET_CORE_VERSION" on %s/%s" " on "UTS_RELEASE"\n", utsname()->sysname, utsname()->machine); /* * Register built-in RAMDISK subsystem logic for virtual LUN 0 diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index f5da2c1891b5..7a0ef95b1407 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -61,7 +61,7 @@ static int fd_attach_hba(struct se_hba *hba, u32 host_id) pr_debug("CORE_HBA[%d] - TCM FILEIO HBA Driver %s on Generic" " Target Core Stack %s\n", hba->hba_id, FD_VERSION, - TARGET_CORE_MOD_VERSION); + TARGET_CORE_VERSION); pr_debug("CORE_HBA[%d] - Attached FILEIO HBA: %u to Generic\n", hba->hba_id, fd_host->fd_host_id); diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index a869022b136e..e37db742fdaf 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -57,7 +57,7 @@ static int iblock_attach_hba(struct se_hba *hba, u32 host_id) { pr_debug("CORE_HBA[%d] - TCM iBlock HBA Driver %s on" " Generic Target Core Stack %s\n", hba->hba_id, - IBLOCK_VERSION, TARGET_CORE_MOD_VERSION); + IBLOCK_VERSION, TARGET_CORE_VERSION); return 0; } diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index c673980ded04..f757178ed414 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -80,7 +80,7 @@ static int pscsi_attach_hba(struct se_hba *hba, u32 host_id) pr_debug("CORE_HBA[%d] - TCM SCSI HBA Driver %s on" " Generic Target Core Stack %s\n", hba->hba_id, - PSCSI_VERSION, TARGET_CORE_MOD_VERSION); + PSCSI_VERSION, TARGET_CORE_VERSION); pr_debug("CORE_HBA[%d] - Attached SCSI HBA to Generic\n", hba->hba_id); diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index e98432705f39..723c97a25cbc 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -58,7 +58,7 @@ static int rd_attach_hba(struct se_hba *hba, u32 host_id) pr_debug("CORE_HBA[%d] - TCM Ramdisk HBA Driver %s on" " Generic Target Core Stack %s\n", hba->hba_id, - RD_HBA_VERSION, TARGET_CORE_MOD_VERSION); + RD_HBA_VERSION, TARGET_CORE_VERSION); return 0; } diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 273818403a3e..a35303302df3 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -10,8 +10,7 @@ #include #include -#define TARGET_CORE_MOD_VERSION "v4.1.0" -#define TARGET_CORE_VERSION TARGET_CORE_MOD_VERSION +#define TARGET_CORE_VERSION "v4.1.0" /* * By default we use 32-byte CDBs in TCM Core and subsystem plugin code. diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h index abd063b8b301..4012ed22d783 100644 --- a/include/target/target_core_configfs.h +++ b/include/target/target_core_configfs.h @@ -1,4 +1,3 @@ -#define TARGET_CORE_CONFIGFS_VERSION TARGET_CORE_MOD_VERSION #define TARGET_CORE_CONFIG_ROOT "/sys/kernel/config" From 1315c7c7bbea6e0aba49a867400e7b8ccb6a5d16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Jun 2015 15:14:40 +0200 Subject: [PATCH 215/839] target: remove unused TARGET_CORE_CONFIG_ROOT define Signed-off-by: Christoph Hellwig Reviewed-by: Andy Grover Signed-off-by: Nicholas Bellinger --- include/target/target_core_configfs.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h index 4012ed22d783..d19dbeeab930 100644 --- a/include/target/target_core_configfs.h +++ b/include/target/target_core_configfs.h @@ -1,6 +1,4 @@ -#define TARGET_CORE_CONFIG_ROOT "/sys/kernel/config" - #define TARGET_CORE_NAME_MAX_LEN 64 #define TARGET_FABRIC_NAME_SIZE 32 From 6de2ce5b8cb6ff3562de34ea2584b0e501373f56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Jun 2015 15:14:41 +0200 Subject: [PATCH 216/839] target: remove target_core_configfs.h The remaining defintions are private to the target core and can be merged into target_core_internal.h. Signed-off-by: Christoph Hellwig Reviewed-by: Andy Grover Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_configfs.c | 1 - drivers/target/target_core_fabric_configfs.c | 1 - drivers/target/target_core_internal.h | 34 ++++++++++++++++++++ drivers/target/target_core_tpg.c | 1 - include/target/target_core_configfs.h | 34 -------------------- 5 files changed, 34 insertions(+), 37 deletions(-) delete mode 100644 include/target/target_core_configfs.h diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 6003921a1c56..0b0de3647478 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include "target_core_internal.h" diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 6cfee595f3f2..48a36989c1a6 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include "target_core_internal.h" diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 511178969df3..99c24acfe676 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -1,6 +1,9 @@ #ifndef TARGET_CORE_INTERNAL_H #define TARGET_CORE_INTERNAL_H +#define TARGET_CORE_NAME_MAX_LEN 64 +#define TARGET_FABRIC_NAME_SIZE 32 + struct target_backend { struct list_head list; @@ -14,6 +17,37 @@ struct target_backend { struct config_item_type tb_dev_stat_cit; }; +struct target_fabric_configfs { + atomic_t tf_access_cnt; + struct list_head tf_list; + struct config_group tf_group; + struct config_group tf_disc_group; + struct config_group *tf_default_groups[2]; + const struct target_core_fabric_ops *tf_ops; + + struct config_item_type tf_discovery_cit; + struct config_item_type tf_wwn_cit; + struct config_item_type tf_wwn_fabric_stats_cit; + struct config_item_type tf_tpg_cit; + struct config_item_type tf_tpg_base_cit; + struct config_item_type tf_tpg_lun_cit; + struct config_item_type tf_tpg_port_cit; + struct config_item_type tf_tpg_port_stat_cit; + struct config_item_type tf_tpg_np_cit; + struct config_item_type tf_tpg_np_base_cit; + struct config_item_type tf_tpg_attrib_cit; + struct config_item_type tf_tpg_auth_cit; + struct config_item_type tf_tpg_param_cit; + struct config_item_type tf_tpg_nacl_cit; + struct config_item_type tf_tpg_nacl_base_cit; + struct config_item_type tf_tpg_nacl_attrib_cit; + struct config_item_type tf_tpg_nacl_auth_cit; + struct config_item_type tf_tpg_nacl_param_cit; + struct config_item_type tf_tpg_nacl_stat_cit; + struct config_item_type tf_tpg_mappedlun_cit; + struct config_item_type tf_tpg_mappedlun_stat_cit; +}; + /* target_core_alua.c */ extern struct t10_alua_lu_gp *default_lu_gp; diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index aa39bc89227b..10321a8ffbb0 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -37,7 +37,6 @@ #include #include -#include #include #include "target_core_internal.h" diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h deleted file mode 100644 index d19dbeeab930..000000000000 --- a/include/target/target_core_configfs.h +++ /dev/null @@ -1,34 +0,0 @@ - -#define TARGET_CORE_NAME_MAX_LEN 64 -#define TARGET_FABRIC_NAME_SIZE 32 - -struct target_fabric_configfs { - atomic_t tf_access_cnt; - struct list_head tf_list; - struct config_group tf_group; - struct config_group tf_disc_group; - struct config_group *tf_default_groups[2]; - const struct target_core_fabric_ops *tf_ops; - - struct config_item_type tf_discovery_cit; - struct config_item_type tf_wwn_cit; - struct config_item_type tf_wwn_fabric_stats_cit; - struct config_item_type tf_tpg_cit; - struct config_item_type tf_tpg_base_cit; - struct config_item_type tf_tpg_lun_cit; - struct config_item_type tf_tpg_port_cit; - struct config_item_type tf_tpg_port_stat_cit; - struct config_item_type tf_tpg_np_cit; - struct config_item_type tf_tpg_np_base_cit; - struct config_item_type tf_tpg_attrib_cit; - struct config_item_type tf_tpg_auth_cit; - struct config_item_type tf_tpg_param_cit; - struct config_item_type tf_tpg_nacl_cit; - struct config_item_type tf_tpg_nacl_base_cit; - struct config_item_type tf_tpg_nacl_attrib_cit; - struct config_item_type tf_tpg_nacl_auth_cit; - struct config_item_type tf_tpg_nacl_param_cit; - struct config_item_type tf_tpg_nacl_stat_cit; - struct config_item_type tf_tpg_mappedlun_cit; - struct config_item_type tf_tpg_mappedlun_stat_cit; -}; From 2ec1e9e20701f37a06562966dbd37e7dd072fcb8 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 23 Jun 2015 00:45:14 -0700 Subject: [PATCH 217/839] target: Bump core version to v5.0 Signed-off-by: Nicholas Bellinger --- include/target/target_core_base.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index a35303302df3..0b9a331e5492 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -10,7 +10,7 @@ #include #include -#define TARGET_CORE_VERSION "v4.1.0" +#define TARGET_CORE_VERSION "v5.0" /* * By default we use 32-byte CDBs in TCM Core and subsystem plugin code. From 52c1d8038594ffd82a6cf88d3dc0ca68d8179a36 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Fri, 19 Jun 2015 11:38:16 +0800 Subject: [PATCH 218/839] ACPICA: Linuxize: Reduce divergences for 20150616 release This patch reduces source code differences between the Linux kernel and the ACPICA upstream so that the linuxized ACPICA 20150616 release can be applied with reduced human intervention. Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/dsobject.c | 2 +- drivers/acpi/acpica/hwxfsleep.c | 2 -- drivers/acpi/acpica/rscreate.c | 2 +- drivers/acpi/acpica/rsutils.c | 2 +- drivers/acpi/acpica/tbxface.c | 1 + include/acpi/platform/acenv.h | 1 - 6 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/acpica/dsobject.c b/drivers/acpi/acpica/dsobject.c index 8a7b07b6adc8..4a4b2f343713 100644 --- a/drivers/acpi/acpica/dsobject.c +++ b/drivers/acpi/acpica/dsobject.c @@ -751,7 +751,7 @@ acpi_ds_init_object_from_op(struct acpi_walk_state *walk_state, obj_desc->string.pointer = op->common.value.string; obj_desc->string.length = - (u32) ACPI_STRLEN(op->common.value.string); + (u32)ACPI_STRLEN(op->common.value.string); /* * The string is contained in the ACPI table, don't ever try diff --git a/drivers/acpi/acpica/hwxfsleep.c b/drivers/acpi/acpica/hwxfsleep.c index 3b3767698827..82e310b94ae4 100644 --- a/drivers/acpi/acpica/hwxfsleep.c +++ b/drivers/acpi/acpica/hwxfsleep.c @@ -138,7 +138,6 @@ acpi_status acpi_set_firmware_waking_vector64(u64 physical_address) { ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector64); - /* Determine if the 64-bit vector actually exists */ if ((acpi_gbl_FACS->length <= 32) || (acpi_gbl_FACS->version < 1)) { @@ -154,7 +153,6 @@ acpi_status acpi_set_firmware_waking_vector64(u64 physical_address) ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector64) #endif - /******************************************************************************* * * FUNCTION: acpi_enter_sleep_state_s4bios diff --git a/drivers/acpi/acpica/rscreate.c b/drivers/acpi/acpica/rscreate.c index 15434e4c9b34..f30f35e6ff2a 100644 --- a/drivers/acpi/acpica/rscreate.c +++ b/drivers/acpi/acpica/rscreate.c @@ -353,7 +353,7 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, /* +1 to include null terminator */ user_prt->length += - (u32) ACPI_STRLEN(user_prt->source) + 1; + (u32)ACPI_STRLEN(user_prt->source) + 1; break; case ACPI_TYPE_STRING: diff --git a/drivers/acpi/acpica/rsutils.c b/drivers/acpi/acpica/rsutils.c index ece3cd60cc6a..90417de38dab 100644 --- a/drivers/acpi/acpica/rsutils.c +++ b/drivers/acpi/acpica/rsutils.c @@ -367,7 +367,7 @@ acpi_rs_get_resource_source(acpi_rs_length resource_length, (u32) ACPI_STRLEN(ACPI_CAST_PTR(char, &aml_resource_source[1])) + 1; - total_length = (u32) ACPI_ROUND_UP_TO_NATIVE_WORD(total_length); + total_length = (u32)ACPI_ROUND_UP_TO_NATIVE_WORD(total_length); ACPI_MEMSET(resource_source->string_ptr, 0, total_length); diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c index 60e94f87f27a..54b9f7957ff1 100644 --- a/drivers/acpi/acpica/tbxface.c +++ b/drivers/acpi/acpica/tbxface.c @@ -242,6 +242,7 @@ acpi_get_table_header(char *signature, if (!header) { return (AE_NO_MEMORY); } + ACPI_MEMCPY(out_table_header, header, sizeof(struct acpi_table_header)); acpi_os_unmap_memory(header, diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h index 073997d729e9..1e84e62b448d 100644 --- a/include/acpi/platform/acenv.h +++ b/include/acpi/platform/acenv.h @@ -359,7 +359,6 @@ #define ACPI_MEMCMP(s1,s2,n) memcmp((const char *)(s1), (const char *)(s2), (acpi_size)(n)) #define ACPI_MEMCPY(d,s,n) (void) memcpy((d), (s), (acpi_size)(n)) #define ACPI_MEMSET(d,s,n) (void) memset((d), (s), (acpi_size)(n)) - #define ACPI_TOUPPER(i) toupper((int) (i)) #define ACPI_TOLOWER(i) tolower((int) (i)) #define ACPI_IS_XDIGIT(i) isxdigit((int) (i)) From c5de47f2e803c7e6ffc0a34b174d4d009c8bd8e1 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 10 Jun 2015 00:25:07 -0700 Subject: [PATCH 219/839] perf pmu: Use __weak definition from Jiri Olsa pointed out, that the defines the attribute '__weak'. We might as well use that. Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1433921123-25327-4-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 0fcc624eb767..c6b16b1db6d0 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -436,7 +437,7 @@ static struct cpu_map *pmu_cpumask(const char *name) return cpus; } -struct perf_event_attr *__attribute__((weak)) +struct perf_event_attr * __weak perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { return NULL; From 70c646e0e47b51c8fa431f9ec1962f7d6e512860 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 10 Jun 2015 00:25:08 -0700 Subject: [PATCH 220/839] perf pmu: Split perf_pmu__new_alias() Separate the event parsing code in perf_pmu__new_alias() out into a separate function __perf_pmu__new_alias() so that code can be called indepdently. This is based on an earlier patch from Andi Kleen. Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1433921123-25327-5-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index c6b16b1db6d0..7bcb8c315615 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -206,17 +206,12 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, return 0; } -static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) +static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, + char *desc __maybe_unused, char *val) { struct perf_pmu_alias *alias; - char buf[256]; int ret; - ret = fread(buf, 1, sizeof(buf), file); - if (ret == 0) - return -EINVAL; - buf[ret] = 0; - alias = malloc(sizeof(*alias)); if (!alias) return -ENOMEM; @@ -226,26 +221,43 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI alias->unit[0] = '\0'; alias->per_pkg = false; - ret = parse_events_terms(&alias->terms, buf); + ret = parse_events_terms(&alias->terms, val); if (ret) { + pr_err("Cannot parse alias %s: %d\n", val, ret); free(alias); return ret; } alias->name = strdup(name); - /* - * load unit name and scale if available - */ - perf_pmu__parse_unit(alias, dir, name); - perf_pmu__parse_scale(alias, dir, name); - perf_pmu__parse_per_pkg(alias, dir, name); - perf_pmu__parse_snapshot(alias, dir, name); + if (dir) { + /* + * load unit name and scale if available + */ + perf_pmu__parse_unit(alias, dir, name); + perf_pmu__parse_scale(alias, dir, name); + perf_pmu__parse_per_pkg(alias, dir, name); + perf_pmu__parse_snapshot(alias, dir, name); + } list_add_tail(&alias->list, list); return 0; } +static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) +{ + char buf[256]; + int ret; + + ret = fread(buf, 1, sizeof(buf), file); + if (ret == 0) + return -EINVAL; + + buf[ret] = 0; + + return __perf_pmu__new_alias(list, dir, name, NULL, buf); +} + static inline bool pmu_alias_info_file(char *name) { size_t len; From 5b021ddf81b0bcd17d88186b605f53a2ca452d3a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 10 Jun 2015 00:25:11 -0700 Subject: [PATCH 221/839] perf tools: Allow events with dot The Intel events use a dot to separate event name and unit mask. Allow dot in names in the scanner, and remove special handling of dot as EOF. Also remove the hack in jevents to replace dot with underscore. This way dotted events can be specified directly by the user. I'm not fully sure this change to the scanner is correct (what was the dot special case good for?), but I haven't found anything that breaks with it so far at least. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1433921123-25327-8-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.l | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 09e738fe9ea2..13cef3c65565 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -119,8 +119,8 @@ event [^,{}/]+ num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ -name [a-zA-Z_*?][a-zA-Z0-9_*?]* -name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?]* +name [a-zA-Z_*?][a-zA-Z0-9_*?.]* +name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.]* /* If you add a modifier you need to update check_modifier() */ modifier_event [ukhpGHSDI]+ modifier_bp [rwx]{1,3} @@ -165,7 +165,6 @@ modifier_bp [rwx]{1,3} return PE_EVENT_NAME; } -. | <> { BEGIN(INITIAL); REWIND(0); From 5531e16227cc18bc13b028a052a609a233b49c07 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 23 Jun 2015 10:52:48 +0300 Subject: [PATCH 222/839] perf session: Print a newline when dumping PERF_RECORD_FINISHED_ROUND With 'perf report -D' the PERF_RECORD_FINISHED_ROUND event was printed without a newline, resulting in: 0x91a18 [0x8]: PERF_RECORD_FINISHED_ROUNDAggregated stats Other events print their details, but PERF_RECORD_FINISHED_ROUND doesn't have any so just add a print for a newline. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1435045969-15999-1-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index aa482c10469d..aac1c4cee1e8 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -686,6 +686,8 @@ static int process_finished_round(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct ordered_events *oe) { + if (dump_trace) + fprintf(stdout, "\n"); return ordered_events__flush(oe, OE_FLUSH__ROUND); } From fe692ac86adbbbbf9745909aafe0c87fd7fc6368 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 23 Jun 2015 10:52:49 +0300 Subject: [PATCH 223/839] perf tools: Print a newline before dumping Aggregated stats When dumping events with 'perf report -D' the event print always starts with a newline (see dump_event()). Do the same with the "Aggregated stats" print so that it is not jammed up against the last event print. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1435045969-15999-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index aac1c4cee1e8..b5549b58bb2b 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1728,7 +1728,7 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) if (perf_header__has_feat(&session->header, HEADER_AUXTRACE)) msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)"; - ret = fprintf(fp, "Aggregated stats:%s\n", msg); + ret = fprintf(fp, "\nAggregated stats:%s\n", msg); ret += events_stats__fprintf(&session->evlist->stats, fp); return ret; From 7c31bb8c95ed269062ff7c7cc4a28b84a2b0f3a6 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Thu, 18 Jun 2015 02:49:10 +0000 Subject: [PATCH 224/839] perf probe: Fix failure to probe events on arm Fix failure to probe events on arm, the problem was introduced by commit 5a51fcd1f30c ("perf probe: Skip kernel symbols which is out of .text"). For some architectures, the '_etext' label is not in the .text section (in the .notes section for arm/arm64). Labels out of the .text section are not loaded as symbols and we get a zero value when looking up its addresses, which causes all events to be wrongly skipped. This patch skips checking the text address range when failing to get the address of '_etext' and thus fixes the problem. The problem can be reproduced on arm as follows: # perf probe --add='generic_perform_write' generic_perform_write+0 is out of .text, skip it. Probe point 'generic_perform_write' not found. Error: Failed to add events. After this patch: # perf probe --add='generic_perform_write' Added new event: probe:generic_perform_write (on generic_perform_write) You can now use it in all perf tools, such as: perf record -e probe:generic_perform_write -aR sleep 1 Signed-off-by: He Kuang Acked-by: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1434595750-129791-1-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 076527b639bd..381f23a443c7 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -249,8 +249,12 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) static bool kprobe_blacklist__listed(unsigned long address); static bool kprobe_warn_out_range(const char *symbol, unsigned long address) { + u64 etext_addr; + /* Get the address of _etext for checking non-probable text symbol */ - if (kernel_get_symbol_address_by_name("_etext", false) < address) + etext_addr = kernel_get_symbol_address_by_name("_etext", false); + + if (etext_addr != 0 && etext_addr < address) pr_warning("%s is out of .text, skip it.\n", symbol); else if (kprobe_blacklist__listed(address)) pr_warning("%s is blacklisted function, skip it.\n", symbol); From e13798c77bdfed1da497f5e076b105b2be17b44f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 23 Jun 2015 00:36:02 +0200 Subject: [PATCH 225/839] perf thread_map: Don't access the array entries directly Instead provide a method to set the array entries, and another to access the contents. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435012588-9007-2-git-send-email-jolsa@kernel.org [ Split providing the set/get accessors from transforming the entries structs ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 4 ++-- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/util/auxtrace.c | 4 ++-- tools/perf/util/event.c | 6 +++--- tools/perf/util/evlist.c | 4 ++-- tools/perf/util/evsel.c | 2 +- tools/perf/util/thread_map.c | 24 +++++++++++---------- tools/perf/util/thread_map.h | 10 +++++++++ 8 files changed, 34 insertions(+), 22 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index de5d277d1ad7..2bf2ca771ca5 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2325,7 +2325,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) */ if (trace->filter_pids.nr > 0) err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); - else if (evlist->threads->map[0] == -1) + else if (thread_map__pid(evlist->threads, 0) == -1) err = perf_evlist__set_filter_pid(evlist, getpid()); if (err < 0) { @@ -2343,7 +2343,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (forks) perf_evlist__start_workload(evlist); - trace->multiple_threads = evlist->threads->map[0] == -1 || + trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || evlist->threads->nr > 1 || perf_evlist__first(evlist)->attr.inherit; again: diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 6245221479d7..01a19626c846 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -45,7 +45,7 @@ int test__syscall_openat_tp_fields(void) perf_evsel__config(evsel, &opts); - evlist->threads->map[0] = getpid(); + thread_map__set_pid(evlist->threads, 0, getpid()); err = perf_evlist__open(evlist); if (err < 0) { diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index df66966cfde7..3dab006b4a03 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -119,12 +119,12 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, if (per_cpu) { mp->cpu = evlist->cpus->map[idx]; if (evlist->threads) - mp->tid = evlist->threads->map[0]; + mp->tid = thread_map__pid(evlist->threads, 0); else mp->tid = -1; } else { mp->cpu = -1; - mp->tid = evlist->threads->map[idx]; + mp->tid = thread_map__pid(evlist->threads, idx); } } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index d7d986d8f23e..67a977e5d0ab 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -504,7 +504,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, for (thread = 0; thread < threads->nr; ++thread) { if (__event__synthesize_thread(comm_event, mmap_event, fork_event, - threads->map[thread], 0, + thread_map__pid(threads, thread), 0, process, tool, machine, mmap_data, proc_map_timeout)) { err = -1; @@ -515,12 +515,12 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, * comm.pid is set to thread group id by * perf_event__synthesize_comm */ - if ((int) comm_event->comm.pid != threads->map[thread]) { + if ((int) comm_event->comm.pid != thread_map__pid(threads, thread)) { bool need_leader = true; /* is thread group leader in thread_map? */ for (j = 0; j < threads->nr; ++j) { - if ((int) comm_event->comm.pid == threads->map[j]) { + if ((int) comm_event->comm.pid == thread_map__pid(threads, j)) { need_leader = false; break; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 8366511b45f8..d29df901be3e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -548,7 +548,7 @@ static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, else sid->cpu = -1; if (!evsel->system_wide && evlist->threads && thread >= 0) - sid->tid = evlist->threads->map[thread]; + sid->tid = thread_map__pid(evlist->threads, thread); else sid->tid = -1; } @@ -1475,7 +1475,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *tar __func__, __LINE__); goto out_close_pipes; } - evlist->threads->map[0] = evlist->workload.pid; + thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); } close(child_ready_pipe[1]); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 33449decf7bd..1b56047af96b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1167,7 +1167,7 @@ retry_sample_id: int group_fd; if (!evsel->cgrp && !evsel->system_wide) - pid = threads->map[thread]; + pid = thread_map__pid(threads, thread); group_fd = get_group_fd(evsel, cpu, thread); retry_open: diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index f4822bd03709..8c3c3a0751bd 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -45,7 +45,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid) threads = thread_map__alloc(items); if (threads != NULL) { for (i = 0; i < items; i++) - threads->map[i] = atoi(namelist[i]->d_name); + thread_map__set_pid(threads, i, atoi(namelist[i]->d_name)); threads->nr = items; } @@ -61,8 +61,8 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) struct thread_map *threads = thread_map__alloc(1); if (threads != NULL) { - threads->map[0] = tid; - threads->nr = 1; + thread_map__set_pid(threads, 0, tid); + threads->nr = 1; } return threads; @@ -123,8 +123,10 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) threads = tmp; } - for (i = 0; i < items; i++) - threads->map[threads->nr + i] = atoi(namelist[i]->d_name); + for (i = 0; i < items; i++) { + thread_map__set_pid(threads, threads->nr + i, + atoi(namelist[i]->d_name)); + } for (i = 0; i < items; i++) zfree(&namelist[i]); @@ -201,7 +203,7 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) threads = nt; for (i = 0; i < items; i++) { - threads->map[j++] = atoi(namelist[i]->d_name); + thread_map__set_pid(threads, j++, atoi(namelist[i]->d_name)); zfree(&namelist[i]); } threads->nr = total_tasks; @@ -227,8 +229,8 @@ struct thread_map *thread_map__new_dummy(void) struct thread_map *threads = thread_map__alloc(1); if (threads != NULL) { - threads->map[0] = -1; - threads->nr = 1; + thread_map__set_pid(threads, 0, -1); + threads->nr = 1; } return threads; } @@ -267,8 +269,8 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) goto out_free_threads; threads = nt; - threads->map[ntasks - 1] = tid; - threads->nr = ntasks; + thread_map__set_pid(threads, ntasks - 1, tid); + threads->nr = ntasks; } out: return threads; @@ -301,7 +303,7 @@ size_t thread_map__fprintf(struct thread_map *threads, FILE *fp) size_t printed = fprintf(fp, "%d thread%s: ", threads->nr, threads->nr > 1 ? "s" : ""); for (i = 0; i < threads->nr; ++i) - printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]); + printed += fprintf(fp, "%s%d", i ? ", " : "", thread_map__pid(threads, i)); return printed + fprintf(fp, "\n"); } diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 95313f43cc0f..e22570390470 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -27,4 +27,14 @@ static inline int thread_map__nr(struct thread_map *threads) return threads ? threads->nr : 1; } +static inline pid_t thread_map__pid(struct thread_map *map, int thread) +{ + return map->map[thread]; +} + +static inline void +thread_map__set_pid(struct thread_map *map, int thread, pid_t pid) +{ + map->map[thread] = pid; +} #endif /* __PERF_THREAD_MAP_H */ From 38e89d2b1771649c006720903c06d8b8bedd4d83 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 23 Jun 2015 00:36:02 +0200 Subject: [PATCH 226/839] perf thread_map: Change map entries into a struct We need to store command names with the pid. Changing map entries to be a struct holding pid. Process name is coming in shortly. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435012588-9007-2-git-send-email-jolsa@kernel.org [ Split providing the set/get accessors from transforming the entries structs ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread_map.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index e22570390470..b9f40679f589 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -4,9 +4,13 @@ #include #include +struct thread_map_data { + pid_t pid; +}; + struct thread_map { int nr; - pid_t map[]; + struct thread_map_data map[]; }; struct thread_map *thread_map__new_dummy(void); @@ -29,12 +33,12 @@ static inline int thread_map__nr(struct thread_map *threads) static inline pid_t thread_map__pid(struct thread_map *map, int thread) { - return map->map[thread]; + return map->map[thread].pid; } static inline void thread_map__set_pid(struct thread_map *map, int thread, pid_t pid) { - map->map[thread] = pid; + map->map[thread].pid = pid; } #endif /* __PERF_THREAD_MAP_H */ From 83b2ea257eb1d43e52f76d756722aeb899a2852c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 29 May 2015 16:33:38 +0300 Subject: [PATCH 227/839] perf tools: Allow auxtrace data alignment Allow auxtrace data to be a multiple of something other than page size. That is needed for BTS where the buffer contains 24-byte records. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1432906425-9911-11-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 7 +++++++ tools/perf/util/auxtrace.h | 1 + 2 files changed, 8 insertions(+) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 3dab006b4a03..7e7405c9b936 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1182,6 +1182,13 @@ static int __auxtrace_mmap__read(struct auxtrace_mmap *mm, data2 = NULL; } + if (itr->alignment) { + unsigned int unwanted = len1 % itr->alignment; + + len1 -= unwanted; + size -= unwanted; + } + /* padding must be written by fn() e.g. record__process_auxtrace() */ padding = size & 7; if (padding) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index a171abbe7301..471aecbc4d68 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -303,6 +303,7 @@ struct auxtrace_record { const char *str); u64 (*reference)(struct auxtrace_record *itr); int (*read_finish)(struct auxtrace_record *itr, int idx); + unsigned int alignment; }; #ifdef HAVE_AUXTRACE_SUPPORT From 9bf39ab2adafd7cf8740859cb49e7b7952813a5d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 19 Jun 2015 10:29:13 +0200 Subject: [PATCH 228/839] vfs: add file_path() helper Turn d_path(&file->f_path, ...); into file_path(file, ...); Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- arch/arc/kernel/troubleshoot.c | 10 +++------- arch/blackfin/kernel/trace.c | 2 +- arch/tile/kernel/stack.c | 2 +- arch/tile/mm/elf.c | 2 +- drivers/block/loop.c | 2 +- drivers/md/bitmap.c | 2 +- drivers/md/md.c | 2 +- drivers/usb/gadget/function/f_mass_storage.c | 2 +- drivers/usb/gadget/function/storage_common.c | 2 +- fs/binfmt_elf.c | 4 ++-- fs/coredump.c | 2 +- fs/ext4/super.c | 2 +- fs/open.c | 6 ++++++ include/linux/fs.h | 2 ++ kernel/events/core.c | 2 +- mm/memory.c | 2 +- 16 files changed, 25 insertions(+), 21 deletions(-) diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index e00a01879025..9f80c5adcb68 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -67,15 +67,12 @@ static void print_task_path_n_nm(struct task_struct *tsk, char *buf) mmput(mm); if (exe_file) { - path = exe_file->f_path; - path_get(&exe_file->f_path); + path_nm = file_path(exe_file, buf, 255); fput(exe_file); - path_nm = d_path(&path, buf, 255); - path_put(&path); } done: - pr_info("Path: %s\n", path_nm); + pr_info("Path: %s\n", !IS_ERR(path_nm) ? path_nm : "?"); } static void show_faulting_vma(unsigned long address, char *buf) @@ -99,8 +96,7 @@ static void show_faulting_vma(unsigned long address, char *buf) if (vma && (vma->vm_start <= address)) { struct file *file = vma->vm_file; if (file) { - struct path *path = &file->f_path; - nm = d_path(path, buf, PAGE_SIZE - 1); + nm = file_path(file, buf, PAGE_SIZE - 1); inode = file_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c index c36efa0c7163..719dd796c12c 100644 --- a/arch/blackfin/kernel/trace.c +++ b/arch/blackfin/kernel/trace.c @@ -136,7 +136,7 @@ void decode_address(char *buf, unsigned long address) struct file *file = vma->vm_file; if (file) { - char *d_name = d_path(&file->f_path, _tmpbuf, + char *d_name = file_path(file, _tmpbuf, sizeof(_tmpbuf)); if (!IS_ERR(d_name)) name = d_name; diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index c42dce50acd8..8d62cf12c2c0 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -334,7 +334,7 @@ static void describe_addr(struct KBacktraceIterator *kbt, } if (vma->vm_file) { - p = d_path(&vma->vm_file->f_path, buf, bufsize); + p = file_path(vma->vm_file, buf, bufsize); if (IS_ERR(p)) p = "?"; name = kbasename(p); diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c index f7ddae3725a4..6225cc998db1 100644 --- a/arch/tile/mm/elf.c +++ b/arch/tile/mm/elf.c @@ -56,7 +56,7 @@ static int notify_exec(struct mm_struct *mm) if (exe_file == NULL) goto done_free; - path = d_path(&exe_file->f_path, buf, PAGE_SIZE); + path = file_path(exe_file, buf, PAGE_SIZE); if (IS_ERR(path)) goto done_put; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d7173cb1ea76..0d8ad59413cd 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -568,7 +568,7 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) spin_lock_irq(&lo->lo_lock); if (lo->lo_backing_file) - p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1); + p = file_path(lo->lo_backing_file, buf, PAGE_SIZE - 1); spin_unlock_irq(&lo->lo_lock); if (IS_ERR_OR_NULL(p)) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 2bc56e2a3526..dda33d648354 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -834,7 +834,7 @@ static void bitmap_file_kick(struct bitmap *bitmap) if (bitmap->storage.file) { path = kmalloc(PAGE_SIZE, GFP_KERNEL); if (path) - ptr = d_path(&bitmap->storage.file->f_path, + ptr = file_path(bitmap->storage.file, path, PAGE_SIZE); printk(KERN_ALERT diff --git a/drivers/md/md.c b/drivers/md/md.c index 593a02476c78..e67f3ac137bf 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5741,7 +5741,7 @@ static int get_bitmap_file(struct mddev *mddev, void __user * arg) /* bitmap disabled, zero the first byte and copy out */ if (!mddev->bitmap_info.file) file->pathname[0] = '\0'; - else if ((ptr = d_path(&mddev->bitmap_info.file->f_path, + else if ((ptr = file_path(mddev->bitmap_info.file, file->pathname, sizeof(file->pathname))), IS_ERR(ptr)) err = PTR_ERR(ptr); diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 3cc109f3c9c8..d2259c663996 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -2936,7 +2936,7 @@ int fsg_common_create_lun(struct fsg_common *common, struct fsg_lun_config *cfg, if (fsg_lun_is_open(lun)) { p = "(error)"; if (pathbuf) { - p = d_path(&lun->filp->f_path, pathbuf, PATH_MAX); + p = file_path(lun->filp, pathbuf, PATH_MAX); if (IS_ERR(p)) p = "(error)"; } diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c index 648f9e489b39..d62683017cf3 100644 --- a/drivers/usb/gadget/function/storage_common.c +++ b/drivers/usb/gadget/function/storage_common.c @@ -341,7 +341,7 @@ ssize_t fsg_show_file(struct fsg_lun *curlun, struct rw_semaphore *filesem, down_read(filesem); if (fsg_lun_is_open(curlun)) { /* Get the complete pathname */ - p = d_path(&curlun->filp->f_path, buf, PAGE_SIZE - 1); + p = file_path(curlun->filp, buf, PAGE_SIZE - 1); if (IS_ERR(p)) rc = PTR_ERR(p); else { diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 241ef68d2893..5046b6247103 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1530,7 +1530,7 @@ static int fill_files_note(struct memelfnote *note) file = vma->vm_file; if (!file) continue; - filename = d_path(&file->f_path, name_curpos, remaining); + filename = file_path(file, name_curpos, remaining); if (IS_ERR(filename)) { if (PTR_ERR(filename) == -ENAMETOOLONG) { vfree(data); @@ -1540,7 +1540,7 @@ static int fill_files_note(struct memelfnote *note) continue; } - /* d_path() fills at the end, move name down */ + /* file_path() fills at the end, move name down */ /* n = strlen(filename) + 1: */ n = (name_curpos + remaining) - filename; remaining = filename - name_curpos; diff --git a/fs/coredump.c b/fs/coredump.c index bbbe139ab280..5b771b36cc6e 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -138,7 +138,7 @@ static int cn_print_exe_file(struct core_name *cn) goto put_exe_file; } - path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); + path = file_path(exe_file, pathbuf, PATH_MAX); if (IS_ERR(path)) { ret = PTR_ERR(path); goto free_buf; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f06d0589ddba..0ae853d2e1f1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -449,7 +449,7 @@ void __ext4_error_file(struct file *file, const char *function, es = EXT4_SB(inode->i_sb)->s_es; es->s_last_error_ino = cpu_to_le32(inode->i_ino); if (ext4_error_ratelimit(inode->i_sb)) { - path = d_path(&(file->f_path), pathname, sizeof(pathname)); + path = file_path(file, pathname, sizeof(pathname)); if (IS_ERR(path)) path = "(unknown)"; va_start(args, fmt); diff --git a/fs/open.c b/fs/open.c index b1c5823b7f11..1dbc79358d59 100644 --- a/fs/open.c +++ b/fs/open.c @@ -823,6 +823,12 @@ int finish_no_open(struct file *file, struct dentry *dentry) } EXPORT_SYMBOL(finish_no_open); +char *file_path(struct file *filp, char *buf, int buflen) +{ + return d_path(&filp->f_path, buf, buflen); +} +EXPORT_SYMBOL(file_path); + /** * vfs_open - open the file at the given path * @path: path to open diff --git a/include/linux/fs.h b/include/linux/fs.h index 2bd77e10e8e5..2c135ad741a9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2500,6 +2500,8 @@ extern struct file * open_exec(const char *); extern int is_subdir(struct dentry *, struct dentry *); extern int path_is_under(struct path *, struct path *); +extern char *file_path(struct file *, char *, int); + #include /* needed for stackable file system support */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 81aa3a4ece9f..5c964e845483 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5791,7 +5791,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) * need to add enough zero bytes after the string to handle * the 64bit alignment we do later. */ - name = d_path(&file->f_path, buf, PATH_MAX - sizeof(u64)); + name = file_path(file, buf, PATH_MAX - sizeof(u64)); if (IS_ERR(name)) { name = "//toolong"; goto cpy_name; diff --git a/mm/memory.c b/mm/memory.c index 22e037e3364e..28c10da1efbc 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3724,7 +3724,7 @@ void print_vma_addr(char *prefix, unsigned long ip) if (buf) { char *p; - p = d_path(&f->f_path, buf, PAGE_SIZE); + p = file_path(f, buf, PAGE_SIZE); if (IS_ERR(p)) p = "?"; printk("%s%s[%lx+%lx]", prefix, kbasename(p), From 2726d56620ce71f40dd583d51391b86e1ab8cc57 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 19 Jun 2015 10:30:28 +0200 Subject: [PATCH 229/839] vfs: add seq_file_path() helper Turn seq_path(..., &file->f_path, ...); into seq_file_path(..., file, ...); Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- drivers/md/bitmap.c | 2 +- fs/proc/nommu.c | 2 +- fs/proc/task_mmu.c | 4 ++-- fs/proc/task_nommu.c | 2 +- fs/seq_file.c | 14 ++++++++++++++ include/linux/seq_file.h | 1 + mm/swapfile.c | 2 +- 7 files changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index dda33d648354..3813fdfee4be 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1922,7 +1922,7 @@ void bitmap_status(struct seq_file *seq, struct bitmap *bitmap) chunk_kb ? "KB" : "B"); if (bitmap->storage.file) { seq_printf(seq, ", file: "); - seq_path(seq, &bitmap->storage.file->f_path, " \t\n"); + seq_file_path(seq, bitmap->storage.file, " \t\n"); } seq_printf(seq, "\n"); diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index d4a35746cab9..f8595e8b5cd0 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -64,7 +64,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) if (file) { seq_pad(m, ' '); - seq_path(m, &file->f_path, ""); + seq_file_path(m, file, ""); } seq_putc(m, '\n'); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 6dee68d013ff..ca1e091881d4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -310,7 +310,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) */ if (file) { seq_pad(m, ' '); - seq_path(m, &file->f_path, "\n"); + seq_file_path(m, file, "\n"); goto done; } @@ -1509,7 +1509,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) if (file) { seq_puts(m, " file="); - seq_path(m, &file->f_path, "\n\t= "); + seq_file_path(m, file, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_puts(m, " heap"); } else { diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 599ec2e20104..e0d64c92e4f6 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -180,7 +180,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, if (file) { seq_pad(m, ' '); - seq_path(m, &file->f_path, ""); + seq_file_path(m, file, ""); } else if (mm) { pid_t tid = pid_of_stack(priv, vma, is_pid); diff --git a/fs/seq_file.c b/fs/seq_file.c index 555f82155be8..d8a0545ad7ea 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -487,6 +487,20 @@ int seq_path(struct seq_file *m, const struct path *path, const char *esc) } EXPORT_SYMBOL(seq_path); +/** + * seq_file_path - seq_file interface to print a pathname of a file + * @m: the seq_file handle + * @file: the struct file to print + * @esc: set of characters to escape in the output + * + * return the absolute path to the file. + */ +int seq_file_path(struct seq_file *m, struct file *file, const char *esc) +{ + return seq_path(m, &file->f_path, esc); +} +EXPORT_SYMBOL(seq_file_path); + /* * Same as seq_path, but relative to supplied root. */ diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index afbb1fd77c77..912a7c482649 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -123,6 +123,7 @@ __printf(2, 3) int seq_printf(struct seq_file *, const char *, ...); __printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args); int seq_path(struct seq_file *, const struct path *, const char *); +int seq_file_path(struct seq_file *, struct file *, const char *); int seq_dentry(struct seq_file *, struct dentry *, const char *); int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc); diff --git a/mm/swapfile.c b/mm/swapfile.c index a7e72103f23b..41e4581af7c5 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2032,7 +2032,7 @@ static int swap_show(struct seq_file *swap, void *v) } file = si->swap_file; - len = seq_path(swap, &file->f_path, " \t\n\\"); + len = seq_file_path(swap, file, " \t\n\\"); seq_printf(swap, "%*s%s\t%u\t%u\t%d\n", len < 40 ? 40 - len : 1, " ", S_ISBLK(file_inode(file)->i_mode) ? From 6b6dabc8dcefaf9997ce037c70b32d570ced9d3e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Jun 2015 01:37:24 -0400 Subject: [PATCH 230/839] nilfs2_direct_IO(): remove dead code Signed-off-by: Al Viro --- fs/nilfs2/inode.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 258d9fe2521a..4a73d6dffabf 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -307,31 +307,13 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping, static ssize_t nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = file->f_mapping->host; - size_t count = iov_iter_count(iter); - ssize_t size; + struct inode *inode = file_inode(iocb->ki_filp); if (iov_iter_rw(iter) == WRITE) return 0; /* Needs synchronization with the cleaner */ - size = blockdev_direct_IO(iocb, inode, iter, offset, nilfs_get_block); - - /* - * In case of error extending write may have instantiated a few - * blocks outside i_size. Trim these off again. - */ - if (unlikely(iov_iter_rw(iter) == WRITE && size < 0)) { - loff_t isize = i_size_read(inode); - loff_t end = offset + count; - - if (end > isize) - nilfs_write_failed(mapping, end); - } - - return size; + return blockdev_direct_IO(iocb, inode, iter, offset, nilfs_get_block); } const struct address_space_operations nilfs_aops = { From c0c3a718e3ab2430a52a60d614b109e5e48e83e2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 19 Jun 2015 09:00:55 +1000 Subject: [PATCH 231/839] fs/posix_acl.c: make posix_acl_create() safer and cleaner If posix_acl_create() returns an error code then "*acl" and "*default_acl" can be uninitialized or point to freed memory. This is a dangerous thing to do. For example, it causes a problem in ocfs2_reflink(): fs/ocfs2/refcounttree.c:4327 ocfs2_reflink() error: potentially using uninitialized 'default_acl'. I've re-written this so we set the pointers to NULL at the start. I've added a temporary "clone" variable to hold the value of "*acl" until end. Setting them to NULL means means we don't need the "no_acl" label. We may as well remove the "apply_umask" stuff forward and remove that label as well. Signed-off-by: Dan Carpenter Cc: Alexander Viro Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton --- fs/posix_acl.c | 46 ++++++++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 84bb65b83570..4fb17ded7d47 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -547,51 +547,45 @@ posix_acl_create(struct inode *dir, umode_t *mode, struct posix_acl **default_acl, struct posix_acl **acl) { struct posix_acl *p; + struct posix_acl *clone; int ret; + *acl = NULL; + *default_acl = NULL; + if (S_ISLNK(*mode) || !IS_POSIXACL(dir)) - goto no_acl; + return 0; p = get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(p)) { - if (p == ERR_PTR(-EOPNOTSUPP)) - goto apply_umask; - return PTR_ERR(p); + if (!p || p == ERR_PTR(-EOPNOTSUPP)) { + *mode &= ~current_umask(); + return 0; } + if (IS_ERR(p)) + return PTR_ERR(p); - if (!p) - goto apply_umask; - - *acl = posix_acl_clone(p, GFP_NOFS); - if (!*acl) + clone = posix_acl_clone(p, GFP_NOFS); + if (!clone) goto no_mem; - ret = posix_acl_create_masq(*acl, mode); + ret = posix_acl_create_masq(clone, mode); if (ret < 0) goto no_mem_clone; - if (ret == 0) { - posix_acl_release(*acl); - *acl = NULL; - } + if (ret == 0) + posix_acl_release(clone); + else + *acl = clone; - if (!S_ISDIR(*mode)) { + if (!S_ISDIR(*mode)) posix_acl_release(p); - *default_acl = NULL; - } else { + else *default_acl = p; - } - return 0; -apply_umask: - *mode &= ~current_umask(); -no_acl: - *default_acl = NULL; - *acl = NULL; return 0; no_mem_clone: - posix_acl_release(*acl); + posix_acl_release(clone); no_mem: posix_acl_release(p); return -ENOMEM; From 2426f3910069ed47c0cc58559a6d088af7920201 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 May 2015 16:05:52 +0200 Subject: [PATCH 232/839] fs: Fix S_NOSEC handling file_remove_suid() could mistakenly set S_NOSEC inode bit when root was modifying the file. As a result following writes to the file by ordinary user would avoid clearing suid or sgid bits. Fix the bug by checking actual mode bits before setting S_NOSEC. CC: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index e8d62688ed91..07f4cb5eab4b 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1706,8 +1706,8 @@ int file_remove_suid(struct file *file) error = security_inode_killpriv(dentry); if (!error && killsuid) error = __remove_suid(dentry, killsuid); - if (!error && (inode->i_sb->s_flags & MS_NOSEC)) - inode->i_flags |= S_NOSEC; + if (!error) + inode_has_no_xattr(inode); return error; } From 5fa8e0a1c6a762857ae67d1628c58b9a02362003 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 May 2015 16:05:53 +0200 Subject: [PATCH 233/839] fs: Rename file_remove_suid() to file_remove_privs() file_remove_suid() is a misnomer since it removes also file capabilities stored in xattrs and sets S_NOSEC flag. Also should_remove_suid() tells something else than whether file_remove_suid() call is necessary which leads to bugs. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/btrfs/file.c | 2 +- fs/ceph/file.c | 2 +- fs/fuse/file.c | 2 +- fs/inode.c | 13 ++++++++----- fs/ntfs/file.c | 2 +- fs/xfs/xfs_file.c | 2 +- include/linux/fs.h | 2 +- mm/filemap.c | 2 +- 8 files changed, 15 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b072e17479aa..86f97282779a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1748,7 +1748,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, } current->backing_dev_info = inode_to_bdi(inode); - err = file_remove_suid(file); + err = file_remove_privs(file); if (err) { mutex_unlock(&inode->i_mutex); goto out; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3b6b522b4b31..e55fe32c6224 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -959,7 +959,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) pos = iocb->ki_pos; count = iov_iter_count(from); - err = file_remove_suid(file); + err = file_remove_privs(file); if (err) goto out; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5ef05b5c4cff..1344647965dc 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1169,7 +1169,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err <= 0) goto out; - err = file_remove_suid(file); + err = file_remove_privs(file); if (err) goto out; diff --git a/fs/inode.c b/fs/inode.c index 07f4cb5eab4b..849210c155dc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1685,7 +1685,11 @@ static int __remove_suid(struct dentry *dentry, int kill) return notify_change(dentry, &newattrs, NULL); } -int file_remove_suid(struct file *file) +/* + * Remove special file priviledges (suid, capabilities) when file is written + * to or truncated. + */ +int file_remove_privs(struct file *file) { struct dentry *dentry = file->f_path.dentry; struct inode *inode = d_inode(dentry); @@ -1711,7 +1715,7 @@ int file_remove_suid(struct file *file) return error; } -EXPORT_SYMBOL(file_remove_suid); +EXPORT_SYMBOL(file_remove_privs); /** * file_update_time - update mtime and ctime time @@ -1966,9 +1970,8 @@ EXPORT_SYMBOL(inode_dio_wait); * inode is being instantiated). The reason for the cmpxchg() loop * --- which wouldn't be necessary if all code paths which modify * i_flags actually followed this rule, is that there is at least one - * code path which doesn't today --- for example, - * __generic_file_aio_write() calls file_remove_suid() without holding - * i_mutex --- so we use cmpxchg() out of an abundance of caution. + * code path which doesn't today so we use cmpxchg() out of an abundance + * of caution. * * In the long run, i_mutex is overkill, and we should probably look * at using the i_lock spinlock to protect i_flags, and then make sure diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 7bb487e663b4..182bb93aa79c 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -382,7 +382,7 @@ static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb, base_ni = ni; if (NInoAttr(ni)) base_ni = ni->ext.base_ntfs_ino; - err = file_remove_suid(file); + err = file_remove_privs(file); if (unlikely(err)) goto out; /* diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8121e75352ee..f3e4fbb59985 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -623,7 +623,7 @@ restart: * setgid bits if the process is not being run by root. This keeps * people from modifying setuid and setgid binaries. */ - return file_remove_suid(file); + return file_remove_privs(file); } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c135ad741a9..641e68d850cf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2553,7 +2553,7 @@ extern struct inode *new_inode_pseudo(struct super_block *sb); extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); -extern int file_remove_suid(struct file *); +extern int file_remove_privs(struct file *); extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) diff --git a/mm/filemap.c b/mm/filemap.c index 6bf5e42d560a..f851e36802d5 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2536,7 +2536,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = file_remove_suid(file); + err = file_remove_privs(file); if (err) goto out; From dbfae0cdcd87602737101d4417811f4323156b54 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 May 2015 16:05:54 +0200 Subject: [PATCH 234/839] fs: Provide function telling whether file_remove_privs() will do anything Provide function telling whether file_remove_privs() will do anything. Currently we only have should_remove_suid() and that does something slightly different. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/inode.c | 44 ++++++++++++++++++++++++++++++++------------ include/linux/fs.h | 1 + 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 849210c155dc..8c2dd74455c9 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1673,7 +1673,32 @@ int should_remove_suid(struct dentry *dentry) } EXPORT_SYMBOL(should_remove_suid); -static int __remove_suid(struct dentry *dentry, int kill) +/* + * Return mask of changes for notify_change() that need to be done as a + * response to write or truncate. Return 0 if nothing has to be changed. + * Negative value on error (change should be denied). + */ +int file_needs_remove_privs(struct file *file) +{ + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = d_inode(dentry); + int mask = 0; + int ret; + + if (IS_NOSEC(inode)) + return 0; + + mask = should_remove_suid(dentry); + ret = security_inode_need_killpriv(dentry); + if (ret < 0) + return ret; + if (ret) + mask |= ATTR_KILL_PRIV; + return mask; +} +EXPORT_SYMBOL(file_needs_remove_privs); + +static int __remove_privs(struct dentry *dentry, int kill) { struct iattr newattrs; @@ -1693,23 +1718,18 @@ int file_remove_privs(struct file *file) { struct dentry *dentry = file->f_path.dentry; struct inode *inode = d_inode(dentry); - int killsuid; - int killpriv; + int kill; int error = 0; /* Fast path for nothing security related */ if (IS_NOSEC(inode)) return 0; - killsuid = should_remove_suid(dentry); - killpriv = security_inode_need_killpriv(dentry); - - if (killpriv < 0) - return killpriv; - if (killpriv) - error = security_inode_killpriv(dentry); - if (!error && killsuid) - error = __remove_suid(dentry, killsuid); + kill = file_needs_remove_privs(file); + if (kill < 0) + return kill; + if (kill) + error = __remove_privs(dentry, kill); if (!error) inode_has_no_xattr(inode); diff --git a/include/linux/fs.h b/include/linux/fs.h index 641e68d850cf..ee60e8ab210f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2554,6 +2554,7 @@ extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); extern int file_remove_privs(struct file *); +extern int file_needs_remove_privs(struct file *file); extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) From 45f147a1bc97c743c6101a8d2741c69a51f583e4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 May 2015 16:05:55 +0200 Subject: [PATCH 235/839] fs: Call security_ops->inode_killpriv on truncate Comment in include/linux/security.h says that ->inode_killpriv() should be called when setuid bit is being removed and that similar security labels (in fact this applies only to file capabilities) should be removed at this time as well. However we don't call ->inode_killpriv() when we remove suid bit on truncate. We fix the problem by calling ->inode_need_killpriv() and subsequently ->inode_killpriv() on truncate the same way as we do it on file write. After this patch there's only one user of should_remove_suid() - ocfs2 - and indeed it's buggy because it doesn't call ->inode_killpriv() on write. However fixing it is difficult because of special locking constraints. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/inode.c | 5 ++--- fs/open.c | 6 ++++-- include/linux/fs.h | 6 +++++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 8c2dd74455c9..0401d2c6d087 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1678,9 +1678,8 @@ EXPORT_SYMBOL(should_remove_suid); * response to write or truncate. Return 0 if nothing has to be changed. * Negative value on error (change should be denied). */ -int file_needs_remove_privs(struct file *file) +int dentry_needs_remove_privs(struct dentry *dentry) { - struct dentry *dentry = file->f_path.dentry; struct inode *inode = d_inode(dentry); int mask = 0; int ret; @@ -1696,7 +1695,7 @@ int file_needs_remove_privs(struct file *file) mask |= ATTR_KILL_PRIV; return mask; } -EXPORT_SYMBOL(file_needs_remove_privs); +EXPORT_SYMBOL(dentry_needs_remove_privs); static int __remove_privs(struct dentry *dentry, int kill) { diff --git a/fs/open.c b/fs/open.c index 1dbc79358d59..e33dab287fa0 100644 --- a/fs/open.c +++ b/fs/open.c @@ -51,8 +51,10 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, newattrs.ia_valid |= ATTR_FILE; } - /* Remove suid/sgid on truncate too */ - ret = should_remove_suid(dentry); + /* Remove suid, sgid, and file capabilities on truncate too */ + ret = dentry_needs_remove_privs(dentry); + if (ret < 0) + return ret; if (ret) newattrs.ia_valid |= ret | ATTR_FORCE; diff --git a/include/linux/fs.h b/include/linux/fs.h index ee60e8ab210f..1e658b11c265 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2554,7 +2554,11 @@ extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); extern int file_remove_privs(struct file *); -extern int file_needs_remove_privs(struct file *file); +extern int dentry_needs_remove_privs(struct dentry *dentry); +static inline int file_needs_remove_privs(struct file *file) +{ + return dentry_needs_remove_privs(file->f_path.dentry); +} extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) From a6de82cab123beaf9406024943caa0242f0618b0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 May 2015 16:05:56 +0200 Subject: [PATCH 236/839] xfs: Correctly lock inode when removing suid and file capabilities Currently XFS calls file_remove_privs() without holding i_mutex. This is wrong because that function can end up messing with file permissions and file capabilities stored in xattrs for which we need i_mutex held. Fix the problem by grabbing iolock exclusively when we will need to change anything in permissions / xattrs. Reviewed-by: Dave Chinner Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/xfs/xfs_file.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f3e4fbb59985..71c2c712e609 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -563,6 +563,13 @@ restart: if (error) return error; + /* For changing security info in file_remove_privs() we need i_mutex */ + if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) { + xfs_rw_iunlock(ip, *iolock); + *iolock = XFS_IOLOCK_EXCL; + xfs_rw_ilock(ip, *iolock); + goto restart; + } /* * If the offset is beyond the size of the file, we need to zero any * blocks that fall between the existing EOF and the start of this @@ -623,7 +630,9 @@ restart: * setgid bits if the process is not being run by root. This keeps * people from modifying setuid and setgid binaries. */ - return file_remove_privs(file); + if (!IS_NOSEC(inode)) + return file_remove_privs(file); + return 0; } /* From db6172c41194576ba2a27e64fa2a5576d11d6eb9 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 19 Mar 2015 12:28:04 +0100 Subject: [PATCH 237/839] fs: cleanup slight list_entry abuse list_entry is just a wrapper for container_of, but it is arguably wrong (and slightly confusing) to use it when the pointed-to struct member is not a struct list_head. Use container_of directly instead. Signed-off-by: Rasmus Villemoes Signed-off-by: Al Viro --- fs/affs/affs.h | 2 +- fs/befs/befs.h | 2 +- fs/coda/coda_linux.h | 2 +- fs/hfs/hfs_fs.h | 2 +- fs/hfsplus/hfsplus_fs.h | 2 +- fs/hpfs/hpfs_fn.h | 2 +- fs/jffs2/os-linux.h | 2 +- fs/jfs/jfs_incore.h | 2 +- fs/minix/minix.h | 2 +- fs/ntfs/inode.h | 2 +- fs/squashfs/squashfs_fs_i.h | 2 +- fs/sysv/sysv.h | 2 +- fs/udf/udf_i.h | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/affs/affs.h b/fs/affs/affs.h index cffe8370fb44..c69a87eaf57d 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -64,7 +64,7 @@ struct affs_inode_info { /* short cut to get to the affs specific inode data */ static inline struct affs_inode_info *AFFS_I(struct inode *inode) { - return list_entry(inode, struct affs_inode_info, vfs_inode); + return container_of(inode, struct affs_inode_info, vfs_inode); } /* diff --git a/fs/befs/befs.h b/fs/befs/befs.h index 1fead8d56a98..35d19e8731e3 100644 --- a/fs/befs/befs.h +++ b/fs/befs/befs.h @@ -112,7 +112,7 @@ BEFS_SB(const struct super_block *super) static inline struct befs_inode_info * BEFS_I(const struct inode *inode) { - return list_entry(inode, struct befs_inode_info, vfs_inode); + return container_of(inode, struct befs_inode_info, vfs_inode); } static inline befs_blocknr_t diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index d6f7a76a1f5b..f829fe963f5b 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h @@ -79,7 +79,7 @@ void coda_sysctl_clean(void); static inline struct coda_inode_info *ITOC(struct inode *inode) { - return list_entry(inode, struct coda_inode_info, vfs_inode); + return container_of(inode, struct coda_inode_info, vfs_inode); } static __inline__ struct CodaFid *coda_i2f(struct inode *inode) diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 95d255219b1e..1f1c7dcbcc2f 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -252,7 +252,7 @@ extern void hfs_mark_mdb_dirty(struct super_block *sb); #define __hfs_u_to_mtime(sec) cpu_to_be32(sec + 2082844800U - sys_tz.tz_minuteswest * 60) #define __hfs_m_to_utime(sec) (be32_to_cpu(sec) - 2082844800U + sys_tz.tz_minuteswest * 60) -#define HFS_I(inode) (list_entry(inode, struct hfs_inode_info, vfs_inode)) +#define HFS_I(inode) (container_of(inode, struct hfs_inode_info, vfs_inode)) #define HFS_SB(sb) ((struct hfs_sb_info *)(sb)->s_fs_info) #define hfs_m_to_utime(time) (struct timespec){ .tv_sec = __hfs_m_to_utime(time) } diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index b0441d65fa54..f91a1faf819e 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -263,7 +263,7 @@ struct hfsplus_inode_info { static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) { - return list_entry(inode, struct hfsplus_inode_info, vfs_inode); + return container_of(inode, struct hfsplus_inode_info, vfs_inode); } /* diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index b63b75fa00e7..bb04b58d1d69 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -304,7 +304,7 @@ extern const struct address_space_operations hpfs_symlink_aops; static inline struct hpfs_inode_info *hpfs_i(struct inode *inode) { - return list_entry(inode, struct hpfs_inode_info, vfs_inode); + return container_of(inode, struct hpfs_inode_info, vfs_inode); } static inline struct hpfs_sb_info *hpfs_sb(struct super_block *sb) diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index d200a9b8fd5e..824e61ede465 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -19,7 +19,7 @@ struct kstatfs; struct kvec; -#define JFFS2_INODE_INFO(i) (list_entry(i, struct jffs2_inode_info, vfs_inode)) +#define JFFS2_INODE_INFO(i) (container_of(i, struct jffs2_inode_info, vfs_inode)) #define OFNI_EDONI_2SFFJ(f) (&(f)->vfs_inode) #define JFFS2_SB_INFO(sb) (sb->s_fs_info) #define OFNI_BS_2SFFJ(c) ((struct super_block *)c->os_priv) diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index fa7e795bd8ae..1f26d1910409 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -206,7 +206,7 @@ struct jfs_sb_info { static inline struct jfs_inode_info *JFS_IP(struct inode *inode) { - return list_entry(inode, struct jfs_inode_info, vfs_inode); + return container_of(inode, struct jfs_inode_info, vfs_inode); } static inline int jfs_dirtable_inline(struct inode *inode) diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 1ebd11854622..01ad81dcacc5 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -84,7 +84,7 @@ static inline struct minix_sb_info *minix_sb(struct super_block *sb) static inline struct minix_inode_info *minix_i(struct inode *inode) { - return list_entry(inode, struct minix_inode_info, vfs_inode); + return container_of(inode, struct minix_inode_info, vfs_inode); } static inline unsigned minix_blocks_needed(unsigned bits, unsigned blocksize) diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index 76b6cfb579d7..b3c3469de6cb 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -239,7 +239,7 @@ typedef struct { */ static inline ntfs_inode *NTFS_I(struct inode *inode) { - return (ntfs_inode *)list_entry(inode, big_ntfs_inode, vfs_inode); + return (ntfs_inode *)container_of(inode, big_ntfs_inode, vfs_inode); } static inline struct inode *VFS_I(ntfs_inode *ni) diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h index 73588e7700ed..d09fcd6fb85d 100644 --- a/fs/squashfs/squashfs_fs_i.h +++ b/fs/squashfs/squashfs_fs_i.h @@ -49,6 +49,6 @@ struct squashfs_inode_info { static inline struct squashfs_inode_info *squashfs_i(struct inode *inode) { - return list_entry(inode, struct squashfs_inode_info, vfs_inode); + return container_of(inode, struct squashfs_inode_info, vfs_inode); } #endif diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 2c13525131cd..6c212288adcb 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -73,7 +73,7 @@ struct sysv_inode_info { static inline struct sysv_inode_info *SYSV_I(struct inode *inode) { - return list_entry(inode, struct sysv_inode_info, vfs_inode); + return container_of(inode, struct sysv_inode_info, vfs_inode); } static inline struct sysv_sb_info *SYSV_SB(struct super_block *sb) diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h index b5cd8ed2aa12..b1b9a63d8cf3 100644 --- a/fs/udf/udf_i.h +++ b/fs/udf/udf_i.h @@ -56,7 +56,7 @@ struct udf_inode_info { static inline struct udf_inode_info *UDF_I(struct inode *inode) { - return list_entry(inode, struct udf_inode_info, vfs_inode); + return container_of(inode, struct udf_inode_info, vfs_inode); } #endif /* _UDF_I_H) */ From e5e6e97fe0f63b374e44a22f5a5c2d151c7fa8c5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 4 Jun 2015 21:49:23 -0400 Subject: [PATCH 238/839] remove the pointless include of lglock.h Signed-off-by: Al Viro --- fs/file_table.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/file_table.c b/fs/file_table.c index 294174dcc226..7f9d407c7595 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include From b57c2cb9ea1a02c2ae08e16de8c20cc13ffbf85a Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sun, 24 May 2015 17:19:41 +0200 Subject: [PATCH 239/839] pagemap.h: move dir_pages() over there That function was declared in a lot of filesystems to calculate directory pages. Signed-off-by: Fabian Frederick Signed-off-by: Al Viro --- fs/exofs/dir.c | 6 ------ fs/ext2/dir.c | 5 ----- fs/freevxfs/vxfs_lookup.c | 7 ------- fs/minix/dir.c | 5 ----- fs/nilfs2/dir.c | 5 ----- fs/qnx6/dir.c | 5 ----- fs/sysv/dir.c | 5 ----- include/linux/pagemap.h | 6 ++++++ 8 files changed, 6 insertions(+), 38 deletions(-) diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 4deb0b05b011..e5bb2abf77f9 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -44,12 +44,6 @@ static inline void exofs_put_page(struct page *page) page_cache_release(page); } -/* Accesses dir's inode->i_size must be called under inode lock */ -static inline unsigned long dir_pages(struct inode *inode) -{ - return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -} - static unsigned exofs_last_byte(struct inode *inode, unsigned long page_nr) { loff_t last_byte = inode->i_size; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 796b491e6978..0c6638b40f21 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -70,11 +70,6 @@ static inline void ext2_put_page(struct page *page) page_cache_release(page); } -static inline unsigned long dir_pages(struct inode *inode) -{ - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} - /* * Return the offset into page `page_nr' of the last valid * byte in that page, plus one. diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 99c7f0a37af4..484b32d3234a 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -61,13 +61,6 @@ const struct file_operations vxfs_dir_operations = { .iterate = vxfs_readdir, }; - -static inline u_long -dir_pages(struct inode *inode) -{ - return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -} - static inline u_long dir_blocks(struct inode *ip) { diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 118e4e7bc935..d19ac258105a 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -45,11 +45,6 @@ minix_last_byte(struct inode *inode, unsigned long page_nr) return last_byte; } -static inline unsigned long dir_pages(struct inode *inode) -{ - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} - static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len) { struct address_space *mapping = page->mapping; diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 0ee0bed3649b..6b8b92b19cec 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -61,11 +61,6 @@ static inline void nilfs_put_page(struct page *page) page_cache_release(page); } -static inline unsigned long dir_pages(struct inode *inode) -{ - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} - /* * Return the offset into page `page_nr' of the last valid * byte in that page, plus one. diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c index 8d64bb5366bf..e1f37278cf97 100644 --- a/fs/qnx6/dir.c +++ b/fs/qnx6/dir.c @@ -32,11 +32,6 @@ static struct page *qnx6_get_page(struct inode *dir, unsigned long n) return page; } -static inline unsigned long dir_pages(struct inode *inode) -{ - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} - static unsigned last_entry(struct inode *inode, unsigned long page_nr) { unsigned long last_byte = inode->i_size; diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 8f3555f00c54..63c1bcb224ee 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -33,11 +33,6 @@ static inline void dir_put_page(struct page *page) page_cache_release(page); } -static inline unsigned long dir_pages(struct inode *inode) -{ - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} - static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len) { struct address_space *mapping = page->mapping; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4b3736f7065c..808942d31062 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -670,4 +670,10 @@ static inline int add_to_page_cache(struct page *page, return error; } +static inline unsigned long dir_pages(struct inode *inode) +{ + return (unsigned long)(inode->i_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; +} + #endif /* _LINUX_PAGEMAP_H */ From 5d754ced150e392c7b5dbf73ed2feddb60cd579a Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sun, 24 May 2015 17:19:43 +0200 Subject: [PATCH 240/839] ufs: use dir_pages instead of ufs_dir_pages() dir_pages was declared in a lot of filesystems. Use newly dir_pages() from pagemap.h Signed-off-by: Fabian Frederick Signed-off-by: Al Viro --- fs/ufs/dir.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 862d284d438e..74f2e80288bf 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -65,11 +65,6 @@ static inline void ufs_put_page(struct page *page) page_cache_release(page); } -static inline unsigned long ufs_dir_pages(struct inode *inode) -{ - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} - ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr) { ino_t res = 0; @@ -258,7 +253,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr, int namelen = qstr->len; unsigned reclen = UFS_DIR_REC_LEN(namelen); unsigned long start, n; - unsigned long npages = ufs_dir_pages(dir); + unsigned long npages = dir_pages(dir); struct page *page = NULL; struct ufs_inode_info *ui = UFS_I(dir); struct ufs_dir_entry *de; @@ -322,7 +317,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode) unsigned short rec_len, name_len; struct page *page = NULL; struct ufs_dir_entry *de; - unsigned long npages = ufs_dir_pages(dir); + unsigned long npages = dir_pages(dir); unsigned long n; char *kaddr; loff_t pos; @@ -439,7 +434,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx) struct super_block *sb = inode->i_sb; unsigned int offset = pos & ~PAGE_CACHE_MASK; unsigned long n = pos >> PAGE_CACHE_SHIFT; - unsigned long npages = ufs_dir_pages(inode); + unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); int need_revalidate = file->f_version != inode->i_version; unsigned flags = UFS_SB(sb)->s_flags; @@ -610,7 +605,7 @@ int ufs_empty_dir(struct inode * inode) { struct super_block *sb = inode->i_sb; struct page *page = NULL; - unsigned long i, npages = ufs_dir_pages(inode); + unsigned long i, npages = dir_pages(inode); for (i = 0; i < npages; i++) { char *kaddr; From dc3f4198eac14e52a98dfc79cd84b45e280f59cd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 18 May 2015 10:10:34 -0400 Subject: [PATCH 241/839] make simple_positive() public Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- arch/s390/hypfs/inode.c | 7 +------ drivers/block/drbd/drbd_debugfs.c | 10 +--------- drivers/infiniband/hw/ipath/ipath_fs.c | 2 +- drivers/infiniband/hw/qib/qib_fs.c | 2 +- fs/autofs4/autofs_i.h | 5 ----- fs/ceph/dir.c | 2 +- fs/configfs/inode.c | 2 +- fs/debugfs/inode.c | 11 +++-------- fs/libfs.c | 5 ----- fs/nfs/dir.c | 2 +- fs/tracefs/inode.c | 11 +++-------- include/linux/dcache.h | 5 +++++ security/inode.c | 19 ++++++------------- 14 files changed, 25 insertions(+), 60 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 1ba6307be4db..11634fa7ab3c 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -166,7 +166,7 @@ static void spufs_prune_dir(struct dentry *dir) mutex_lock(&d_inode(dir)->i_mutex); list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) { spin_lock(&dentry->d_lock); - if (!(d_unhashed(dentry)) && d_really_is_positive(dentry)) { + if (simple_positive(dentry)) { dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index d3f896a35b98..8ffad5437232 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -62,18 +62,13 @@ static void hypfs_add_dentry(struct dentry *dentry) hypfs_last_dentry = dentry; } -static inline int hypfs_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - static void hypfs_remove(struct dentry *dentry) { struct dentry *parent; parent = dentry->d_parent; mutex_lock(&d_inode(parent)->i_mutex); - if (hypfs_positive(dentry)) { + if (simple_positive(dentry)) { if (d_is_dir(dentry)) simple_rmdir(d_inode(parent), dentry); else diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index a6ee3d750c30..6b88a35fb048 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c @@ -419,14 +419,6 @@ static int in_flight_summary_show(struct seq_file *m, void *pos) return 0; } -/* simple_positive(file->f_path.dentry) respectively debugfs_positive(), - * but neither is "reachable" from here. - * So we have our own inline version of it above. :-( */ -static inline int debugfs_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - /* make sure at *open* time that the respective object won't go away. */ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, void *), void *data, struct kref *kref, @@ -444,7 +436,7 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo /* serialize with d_delete() */ mutex_lock(&d_inode(parent)->i_mutex); /* Make sure the object is still alive */ - if (debugfs_positive(file->f_path.dentry) + if (simple_positive(file->f_path.dentry) && kref_get_unless_zero(kref)) ret = 0; mutex_unlock(&d_inode(parent)->i_mutex); diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 1ca8e32a9592..25422a3a7238 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -277,7 +277,7 @@ static int remove_file(struct dentry *parent, char *name) } spin_lock(&tmp->d_lock); - if (!d_unhashed(tmp) && d_really_is_positive(tmp)) { + if (simple_positive(tmp)) { dget_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index bdd5d3857203..13ef22bd9459 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -455,7 +455,7 @@ static int remove_file(struct dentry *parent, char *name) } spin_lock(&tmp->d_lock); - if (!d_unhashed(tmp) && d_really_is_positive(tmp)) { + if (simple_positive(tmp)) { __d_drop(tmp); spin_unlock(&tmp->d_lock); simple_unlink(d_inode(parent), tmp); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 5b700ef1e59d..c37149b929be 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -238,11 +238,6 @@ static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi) return d_inode(sbi->sb->s_root)->i_ino; } -static inline int simple_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - static inline void __autofs4_add_expiring(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 4248307fea90..edbb8da02a6a 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -161,7 +161,7 @@ more: } spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); if (di->lease_shared_gen == shared_gen && - !d_unhashed(dentry) && d_really_is_positive(dentry) && + simple_positive(dentry) && ceph_snap(d_inode(dentry)) != CEPH_SNAPDIR && ceph_ino(d_inode(dentry)) != CEPH_INO_CEPH && fpos_cmp(ctx->pos, di->offset) <= 0) diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 8d89f5fd0331..eae87575e681 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -236,7 +236,7 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) if (dentry) { spin_lock(&dentry->d_lock); - if (!d_unhashed(dentry) && d_really_is_positive(dentry)) { + if (simple_positive(dentry)) { dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 7eaec88ea970..ef86ad6bdc3e 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -44,11 +44,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb) return inode; } -static inline int debugfs_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - struct debugfs_mount_opts { kuid_t uid; kgid_t gid; @@ -522,7 +517,7 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent) { int ret = 0; - if (debugfs_positive(dentry)) { + if (simple_positive(dentry)) { dget(dentry); if (d_is_dir(dentry)) ret = simple_rmdir(d_inode(parent), dentry); @@ -602,7 +597,7 @@ void debugfs_remove_recursive(struct dentry *dentry) */ spin_lock(&parent->d_lock); list_for_each_entry(child, &parent->d_subdirs, d_child) { - if (!debugfs_positive(child)) + if (!simple_positive(child)) continue; /* perhaps simple_empty(child) makes more sense */ @@ -623,7 +618,7 @@ void debugfs_remove_recursive(struct dentry *dentry) * from d_subdirs. When releasing the parent->d_lock we can * no longer trust that the next pointer is valid. * Restart the loop. We'll skip this one with the - * debugfs_positive() check. + * simple_positive() check. */ goto loop; } diff --git a/fs/libfs.c b/fs/libfs.c index 65e1feca8b98..4d9e6c118fe1 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -20,11 +20,6 @@ #include "internal.h" -static inline int simple_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b2c8b31b2be7..b9108f4254a7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1771,7 +1771,7 @@ EXPORT_SYMBOL_GPL(nfs_mkdir); static void nfs_dentry_handle_enoent(struct dentry *dentry) { - if (d_really_is_positive(dentry) && !d_unhashed(dentry)) + if (simple_positive(dentry)) d_delete(dentry); } diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index d92bdf3b079a..6e8a1400d662 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -496,16 +496,11 @@ struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *pare return dentry; } -static inline int tracefs_positive(struct dentry *dentry) -{ - return dentry->d_inode && !d_unhashed(dentry); -} - static int __tracefs_remove(struct dentry *dentry, struct dentry *parent) { int ret = 0; - if (tracefs_positive(dentry)) { + if (simple_positive(dentry)) { if (dentry->d_inode) { dget(dentry); switch (dentry->d_inode->i_mode & S_IFMT) { @@ -582,7 +577,7 @@ void tracefs_remove_recursive(struct dentry *dentry) */ spin_lock(&parent->d_lock); list_for_each_entry(child, &parent->d_subdirs, d_child) { - if (!tracefs_positive(child)) + if (!simple_positive(child)) continue; /* perhaps simple_empty(child) makes more sense */ @@ -603,7 +598,7 @@ void tracefs_remove_recursive(struct dentry *dentry) * from d_subdirs. When releasing the parent->d_lock we can * no longer trust that the next pointer is valid. * Restart the loop. We'll skip this one with the - * tracefs_positive() check. + * simple_positive() check. */ goto loop; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 167ec0934049..d2d50249b7b2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -507,6 +507,11 @@ static inline bool d_really_is_positive(const struct dentry *dentry) return dentry->d_inode != NULL; } +static inline int simple_positive(struct dentry *dentry) +{ + return d_really_is_positive(dentry) && !d_unhashed(dentry); +} + extern void d_set_fallthru(struct dentry *dentry); static inline bool d_is_fallthru(const struct dentry *dentry) diff --git a/security/inode.c b/security/inode.c index 91503b79c5f8..6df0d8dae1e0 100644 --- a/security/inode.c +++ b/security/inode.c @@ -25,11 +25,6 @@ static struct vfsmount *mount; static int mount_count; -static inline int positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - static int fill_super(struct super_block *sb, void *data, int silent) { static struct tree_descr files[] = {{""}}; @@ -201,14 +196,12 @@ void securityfs_remove(struct dentry *dentry) return; mutex_lock(&d_inode(parent)->i_mutex); - if (positive(dentry)) { - if (d_really_is_positive(dentry)) { - if (d_is_dir(dentry)) - simple_rmdir(d_inode(parent), dentry); - else - simple_unlink(d_inode(parent), dentry); - dput(dentry); - } + if (simple_positive(dentry)) { + if (d_is_dir(dentry)) + simple_rmdir(d_inode(parent), dentry); + else + simple_unlink(d_inode(parent), dentry); + dput(dentry); } mutex_unlock(&d_inode(parent)->i_mutex); simple_release_fs(&mount, &mount_count); From 59a5b0f7bf74f88da6670bcbf924d8cc1e75b1ee Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 24 Jun 2015 07:54:15 +0200 Subject: [PATCH 242/839] virtio-pci: alloc only resources actually used. Move resource allocation from common code to legacy and modern code. Only request resources actually used, i.e. bar0 in legacy mode and the bar(s) specified by capabilities in modern mode. Signed-off-by: Gerd Hoffmann Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 7 ------- drivers/virtio/virtio_pci_common.h | 2 ++ drivers/virtio/virtio_pci_legacy.c | 13 ++++++++++++- drivers/virtio/virtio_pci_modern.c | 24 ++++++++++++++++++------ 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index e894eb278d83..4fd036c3468f 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -509,10 +509,6 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, if (rc) goto err_enable_device; - rc = pci_request_regions(pci_dev, "virtio-pci"); - if (rc) - goto err_request_regions; - if (force_legacy) { rc = virtio_pci_legacy_probe(vp_dev); /* Also try modern mode if we can't map BAR0 (no IO space). */ @@ -542,8 +538,6 @@ err_register: else virtio_pci_modern_remove(vp_dev); err_probe: - pci_release_regions(pci_dev); -err_request_regions: pci_disable_device(pci_dev); err_enable_device: kfree(vp_dev); @@ -561,7 +555,6 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) else virtio_pci_modern_remove(vp_dev); - pci_release_regions(pci_dev); pci_disable_device(pci_dev); } diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 28ee4e56badf..b976d968e793 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -75,6 +75,8 @@ struct virtio_pci_device { /* Multiply queue_notify_off by this value. (non-legacy mode). */ u32 notify_offset_multiplier; + int modern_bars; + /* Legacy only field */ /* the IO mapping for the PCI config space */ void __iomem *ioaddr; diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 256a5278a515..48bc9797e530 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -215,6 +215,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = { int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) { struct pci_dev *pci_dev = vp_dev->pci_dev; + int rc; /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) @@ -226,9 +227,14 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) return -ENODEV; } + rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy"); + if (rc) + return rc; + + rc = -ENOMEM; vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); if (!vp_dev->ioaddr) - return -ENOMEM; + goto err_iomap; vp_dev->isr = vp_dev->ioaddr + VIRTIO_PCI_ISR; @@ -246,6 +252,10 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) vp_dev->del_vq = del_vq; return 0; + +err_iomap: + pci_release_region(pci_dev, 0); + return rc; } void virtio_pci_legacy_remove(struct virtio_pci_device *vp_dev) @@ -253,4 +263,5 @@ void virtio_pci_legacy_remove(struct virtio_pci_device *vp_dev) struct pci_dev *pci_dev = vp_dev->pci_dev; pci_iounmap(pci_dev, vp_dev->ioaddr); + pci_release_region(pci_dev, 0); } diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index e88e0997a889..8e5cf194cc0b 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -499,7 +499,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = { * Returns offset of the capability, or 0. */ static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type, - u32 ioresource_types) + u32 ioresource_types, int *bars) { int pos; @@ -520,8 +520,10 @@ static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type, if (type == cfg_type) { if (pci_resource_len(dev, bar) && - pci_resource_flags(dev, bar) & ioresource_types) + pci_resource_flags(dev, bar) & ioresource_types) { + *bars |= (1 << bar); return pos; + } } } return 0; @@ -617,7 +619,8 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) /* check for a common config: if not, use legacy mode (bar 0). */ common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG, - IORESOURCE_IO | IORESOURCE_MEM); + IORESOURCE_IO | IORESOURCE_MEM, + &vp_dev->modern_bars); if (!common) { dev_info(&pci_dev->dev, "virtio_pci: leaving for legacy driver\n"); @@ -626,9 +629,11 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) /* If common is there, these should be too... */ isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG, - IORESOURCE_IO | IORESOURCE_MEM); + IORESOURCE_IO | IORESOURCE_MEM, + &vp_dev->modern_bars); notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG, - IORESOURCE_IO | IORESOURCE_MEM); + IORESOURCE_IO | IORESOURCE_MEM, + &vp_dev->modern_bars); if (!isr || !notify) { dev_err(&pci_dev->dev, "virtio_pci: missing capabilities %i/%i/%i\n", @@ -640,7 +645,13 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) * device-specific configuration. */ device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG, - IORESOURCE_IO | IORESOURCE_MEM); + IORESOURCE_IO | IORESOURCE_MEM, + &vp_dev->modern_bars); + + err = pci_request_selected_regions(pci_dev, vp_dev->modern_bars, + "virtio-pci-modern"); + if (err) + return err; err = -EINVAL; vp_dev->common = map_capability(pci_dev, common, @@ -727,4 +738,5 @@ void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev) pci_iounmap(pci_dev, vp_dev->notify_base); pci_iounmap(pci_dev, vp_dev->isr); pci_iounmap(pci_dev, vp_dev->common); + pci_release_selected_regions(pci_dev, vp_dev->modern_bars); } From a903e3b64adfc2e126237f06e0b3ea7d2d8d13b5 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 17 Mar 2015 15:31:11 +0200 Subject: [PATCH 243/839] drm/omap: return error if dma_alloc_writecombine fails On a platform with no TILER (e.g. omap3, am43xx), when the user wants to allocate buffer with flag OMAP_BO_SCANOUT, the buffer needs to be allocated with dma_alloc_writecombine. For some reason the driver does not return an error if that alloc fails, instead it continues without backing memory. This leads to errors later when the user tries to use the buffer. This patch makes the driver return an error if dma_alloc_writecombine fails. Signed-off-by: Tomi Valkeinen Acked-by: Laurent Pinchart --- drivers/gpu/drm/omapdrm/omap_gem.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 2ab77801cf5f..874eb6dcf94b 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -1378,11 +1378,7 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, omap_obj = kzalloc(sizeof(*omap_obj), GFP_KERNEL); if (!omap_obj) - goto fail; - - spin_lock(&priv->list_lock); - list_add(&omap_obj->mm_list, &priv->obj_list); - spin_unlock(&priv->list_lock); + return NULL; obj = &omap_obj->base; @@ -1392,11 +1388,19 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, */ omap_obj->vaddr = dma_alloc_writecombine(dev->dev, size, &omap_obj->paddr, GFP_KERNEL); - if (omap_obj->vaddr) - flags |= OMAP_BO_DMA; + if (!omap_obj->vaddr) { + kfree(omap_obj); + return NULL; + } + + flags |= OMAP_BO_DMA; } + spin_lock(&priv->list_lock); + list_add(&omap_obj->mm_list, &priv->obj_list); + spin_unlock(&priv->list_lock); + omap_obj->flags = flags; if (flags & OMAP_BO_TILED) { From 5d6bed2a9c8bc161bff4cc7cede00f2e0e27a7e7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 22 Jun 2015 18:39:43 +0100 Subject: [PATCH 244/839] ARM: dove: fix legacy dove IRQ numbers v3.18 changed handle_IRQ() to call __handle_domain_irq(), which now rejects attempts to deliver IRQ0. Since IRQ 0 is used as the timer interrupt (just like the PIT on x86), this causes boot to fail as the bogomips calibration never completes. Fix this by shuffling all interrupts up by one. Fixes: a71b092a9c68 ("ARM: Convert handle_IRQ to use __handle_domain_irq") Signed-off-by: Russell King Signed-off-by: Gregory CLEMENT --- arch/arm/mach-dove/include/mach/irqs.h | 118 ++++++++++++------------- arch/arm/mach-dove/irq.c | 8 +- 2 files changed, 63 insertions(+), 63 deletions(-) diff --git a/arch/arm/mach-dove/include/mach/irqs.h b/arch/arm/mach-dove/include/mach/irqs.h index 03d401d20453..3f29e6bca058 100644 --- a/arch/arm/mach-dove/include/mach/irqs.h +++ b/arch/arm/mach-dove/include/mach/irqs.h @@ -14,73 +14,73 @@ /* * Dove Low Interrupt Controller */ -#define IRQ_DOVE_BRIDGE 0 -#define IRQ_DOVE_H2C 1 -#define IRQ_DOVE_C2H 2 -#define IRQ_DOVE_NAND 3 -#define IRQ_DOVE_PDMA 4 -#define IRQ_DOVE_SPI1 5 -#define IRQ_DOVE_SPI0 6 -#define IRQ_DOVE_UART_0 7 -#define IRQ_DOVE_UART_1 8 -#define IRQ_DOVE_UART_2 9 -#define IRQ_DOVE_UART_3 10 -#define IRQ_DOVE_I2C 11 -#define IRQ_DOVE_GPIO_0_7 12 -#define IRQ_DOVE_GPIO_8_15 13 -#define IRQ_DOVE_GPIO_16_23 14 -#define IRQ_DOVE_PCIE0_ERR 15 -#define IRQ_DOVE_PCIE0 16 -#define IRQ_DOVE_PCIE1_ERR 17 -#define IRQ_DOVE_PCIE1 18 -#define IRQ_DOVE_I2S0 19 -#define IRQ_DOVE_I2S0_ERR 20 -#define IRQ_DOVE_I2S1 21 -#define IRQ_DOVE_I2S1_ERR 22 -#define IRQ_DOVE_USB_ERR 23 -#define IRQ_DOVE_USB0 24 -#define IRQ_DOVE_USB1 25 -#define IRQ_DOVE_GE00_RX 26 -#define IRQ_DOVE_GE00_TX 27 -#define IRQ_DOVE_GE00_MISC 28 -#define IRQ_DOVE_GE00_SUM 29 -#define IRQ_DOVE_GE00_ERR 30 -#define IRQ_DOVE_CRYPTO 31 +#define IRQ_DOVE_BRIDGE (1 + 0) +#define IRQ_DOVE_H2C (1 + 1) +#define IRQ_DOVE_C2H (1 + 2) +#define IRQ_DOVE_NAND (1 + 3) +#define IRQ_DOVE_PDMA (1 + 4) +#define IRQ_DOVE_SPI1 (1 + 5) +#define IRQ_DOVE_SPI0 (1 + 6) +#define IRQ_DOVE_UART_0 (1 + 7) +#define IRQ_DOVE_UART_1 (1 + 8) +#define IRQ_DOVE_UART_2 (1 + 9) +#define IRQ_DOVE_UART_3 (1 + 10) +#define IRQ_DOVE_I2C (1 + 11) +#define IRQ_DOVE_GPIO_0_7 (1 + 12) +#define IRQ_DOVE_GPIO_8_15 (1 + 13) +#define IRQ_DOVE_GPIO_16_23 (1 + 14) +#define IRQ_DOVE_PCIE0_ERR (1 + 15) +#define IRQ_DOVE_PCIE0 (1 + 16) +#define IRQ_DOVE_PCIE1_ERR (1 + 17) +#define IRQ_DOVE_PCIE1 (1 + 18) +#define IRQ_DOVE_I2S0 (1 + 19) +#define IRQ_DOVE_I2S0_ERR (1 + 20) +#define IRQ_DOVE_I2S1 (1 + 21) +#define IRQ_DOVE_I2S1_ERR (1 + 22) +#define IRQ_DOVE_USB_ERR (1 + 23) +#define IRQ_DOVE_USB0 (1 + 24) +#define IRQ_DOVE_USB1 (1 + 25) +#define IRQ_DOVE_GE00_RX (1 + 26) +#define IRQ_DOVE_GE00_TX (1 + 27) +#define IRQ_DOVE_GE00_MISC (1 + 28) +#define IRQ_DOVE_GE00_SUM (1 + 29) +#define IRQ_DOVE_GE00_ERR (1 + 30) +#define IRQ_DOVE_CRYPTO (1 + 31) /* * Dove High Interrupt Controller */ -#define IRQ_DOVE_AC97 32 -#define IRQ_DOVE_PMU 33 -#define IRQ_DOVE_CAM 34 -#define IRQ_DOVE_SDIO0 35 -#define IRQ_DOVE_SDIO1 36 -#define IRQ_DOVE_SDIO0_WAKEUP 37 -#define IRQ_DOVE_SDIO1_WAKEUP 38 -#define IRQ_DOVE_XOR_00 39 -#define IRQ_DOVE_XOR_01 40 -#define IRQ_DOVE_XOR0_ERR 41 -#define IRQ_DOVE_XOR_10 42 -#define IRQ_DOVE_XOR_11 43 -#define IRQ_DOVE_XOR1_ERR 44 -#define IRQ_DOVE_LCD_DCON 45 -#define IRQ_DOVE_LCD1 46 -#define IRQ_DOVE_LCD0 47 -#define IRQ_DOVE_GPU 48 -#define IRQ_DOVE_PERFORM_MNTR 49 -#define IRQ_DOVE_VPRO_DMA1 51 -#define IRQ_DOVE_SSP_TIMER 54 -#define IRQ_DOVE_SSP 55 -#define IRQ_DOVE_MC_L2_ERR 56 -#define IRQ_DOVE_CRYPTO_ERR 59 -#define IRQ_DOVE_GPIO_24_31 60 -#define IRQ_DOVE_HIGH_GPIO 61 -#define IRQ_DOVE_SATA 62 +#define IRQ_DOVE_AC97 (1 + 32) +#define IRQ_DOVE_PMU (1 + 33) +#define IRQ_DOVE_CAM (1 + 34) +#define IRQ_DOVE_SDIO0 (1 + 35) +#define IRQ_DOVE_SDIO1 (1 + 36) +#define IRQ_DOVE_SDIO0_WAKEUP (1 + 37) +#define IRQ_DOVE_SDIO1_WAKEUP (1 + 38) +#define IRQ_DOVE_XOR_00 (1 + 39) +#define IRQ_DOVE_XOR_01 (1 + 40) +#define IRQ_DOVE_XOR0_ERR (1 + 41) +#define IRQ_DOVE_XOR_10 (1 + 42) +#define IRQ_DOVE_XOR_11 (1 + 43) +#define IRQ_DOVE_XOR1_ERR (1 + 44) +#define IRQ_DOVE_LCD_DCON (1 + 45) +#define IRQ_DOVE_LCD1 (1 + 46) +#define IRQ_DOVE_LCD0 (1 + 47) +#define IRQ_DOVE_GPU (1 + 48) +#define IRQ_DOVE_PERFORM_MNTR (1 + 49) +#define IRQ_DOVE_VPRO_DMA1 (1 + 51) +#define IRQ_DOVE_SSP_TIMER (1 + 54) +#define IRQ_DOVE_SSP (1 + 55) +#define IRQ_DOVE_MC_L2_ERR (1 + 56) +#define IRQ_DOVE_CRYPTO_ERR (1 + 59) +#define IRQ_DOVE_GPIO_24_31 (1 + 60) +#define IRQ_DOVE_HIGH_GPIO (1 + 61) +#define IRQ_DOVE_SATA (1 + 62) /* * DOVE General Purpose Pins */ -#define IRQ_DOVE_GPIO_START 64 +#define IRQ_DOVE_GPIO_START 65 #define NR_GPIO_IRQS 64 /* diff --git a/arch/arm/mach-dove/irq.c b/arch/arm/mach-dove/irq.c index 4a5a7aedcb76..df0223f76fa9 100644 --- a/arch/arm/mach-dove/irq.c +++ b/arch/arm/mach-dove/irq.c @@ -126,14 +126,14 @@ __exception_irq_entry dove_legacy_handle_irq(struct pt_regs *regs) stat = readl_relaxed(dove_irq_base + IRQ_CAUSE_LOW_OFF); stat &= readl_relaxed(dove_irq_base + IRQ_MASK_LOW_OFF); if (stat) { - unsigned int hwirq = __fls(stat); + unsigned int hwirq = 1 + __fls(stat); handle_IRQ(hwirq, regs); return; } stat = readl_relaxed(dove_irq_base + IRQ_CAUSE_HIGH_OFF); stat &= readl_relaxed(dove_irq_base + IRQ_MASK_HIGH_OFF); if (stat) { - unsigned int hwirq = 32 + __fls(stat); + unsigned int hwirq = 33 + __fls(stat); handle_IRQ(hwirq, regs); return; } @@ -144,8 +144,8 @@ void __init dove_init_irq(void) { int i; - orion_irq_init(0, IRQ_VIRT_BASE + IRQ_MASK_LOW_OFF); - orion_irq_init(32, IRQ_VIRT_BASE + IRQ_MASK_HIGH_OFF); + orion_irq_init(1, IRQ_VIRT_BASE + IRQ_MASK_LOW_OFF); + orion_irq_init(33, IRQ_VIRT_BASE + IRQ_MASK_HIGH_OFF); #ifdef CONFIG_MULTI_IRQ_HANDLER set_handle_irq(dove_legacy_handle_irq); From a8955cc3a504e43ecd606e9f0d9bc25de240aacb Mon Sep 17 00:00:00 2001 From: Ley Foon Tan Date: Wed, 24 Jun 2015 17:52:44 +0800 Subject: [PATCH 245/839] nios2: check number of timer instances Display error message if number of timers is less than 2. Signed-off-by: Ley Foon Tan --- arch/nios2/kernel/time.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c index 7f4547418ee1..bea8839d56d8 100644 --- a/arch/nios2/kernel/time.c +++ b/arch/nios2/kernel/time.c @@ -18,7 +18,9 @@ #include #include -#define ALTERA_TIMER_STATUS_REG 0 +#define ALTR_TIMER_COMPATIBLE "altr,timer-1.0" + +#define ALTERA_TIMER_STATUS_REG 0 #define ALTERA_TIMER_CONTROL_REG 4 #define ALTERA_TIMER_PERIODL_REG 8 #define ALTERA_TIMER_PERIODH_REG 12 @@ -302,7 +304,16 @@ void read_persistent_clock(struct timespec *ts) void __init time_init(void) { + struct device_node *np; + int count = 0; + + for_each_compatible_node(np, NULL, ALTR_TIMER_COMPATIBLE) + count++; + + if (count < 2) + panic("%d timer is found, it needs 2 timers in system\n", count); + clocksource_of_init(); } -CLOCKSOURCE_OF_DECLARE(nios2_timer, "altr,timer-1.0", nios2_time_init); +CLOCKSOURCE_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init); From be3a5d233922d73f27002ce2767f6ec03c3f473d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Jun 2015 19:51:55 +0800 Subject: [PATCH 246/839] NFSv.2/pnfs Add a LAYOUTSTATS rpc function Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42.h | 9 ++- fs/nfs/nfs42proc.c | 27 +++++++++ fs/nfs/nfs42xdr.c | 122 ++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 4 +- fs/nfs/nfs4xdr.c | 1 + include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 43 ++++++++++++++ 8 files changed, 205 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 7afb8947dfdf..ff66ae700b89 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -5,11 +5,18 @@ #ifndef __LINUX_FS_NFS_NFS4_2_H #define __LINUX_FS_NFS_NFS4_2_H +/* + * FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support + * more? Need to consider not to pre-alloc too much for a compound. + */ +#define PNFS_LAYOUTSTATS_MAXDEV (4) + /* nfs4.2proc.c */ int nfs42_proc_allocate(struct file *, loff_t, loff_t); int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); - +int nfs42_proc_layoutstats_generic(struct nfs_server *, + struct nfs42_layoutstat_data *); /* nfs4.2xdr.h */ extern struct rpc_procinfo nfs4_2_procedures[]; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 3a9e75235f30..ac929685350b 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -165,3 +165,30 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); } + +static const struct rpc_call_ops nfs42_layoutstat_ops = { +}; + +int nfs42_proc_layoutstats_generic(struct nfs_server *server, + struct nfs42_layoutstat_data *data) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTSTATS], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + }; + struct rpc_task_setup task_setup = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs42_layoutstat_ops, + .callback_data = data, + .flags = RPC_TASK_ASYNC, + }; + struct rpc_task *task; + + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + task = rpc_run_task(&task_setup); + if (IS_ERR(task)) + return PTR_ERR(task); + return 0; +} diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 1a25b27248f2..9aae0205ef11 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -4,6 +4,8 @@ #ifndef __LINUX_FS_NFS_NFS4_2XDR_H #define __LINUX_FS_NFS_NFS4_2XDR_H +#include "nfs42.h" + #define encode_fallocate_maxsz (encode_stateid_maxsz + \ 2 /* offset */ + \ 2 /* length */) @@ -22,6 +24,16 @@ 1 /* whence */ + \ 2 /* offset */ + \ 2 /* length */) +#define encode_io_info_maxsz 4 +#define encode_layoutstats_maxsz (op_decode_hdr_maxsz + \ + 2 /* offset */ + \ + 2 /* length */ + \ + encode_stateid_maxsz + \ + encode_io_info_maxsz + \ + encode_io_info_maxsz + \ + 1 /* opaque devaddr4 length */ + \ + XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE)) +#define decode_layoutstats_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -45,6 +57,14 @@ #define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_seek_maxsz) +#define NFS4_enc_layoutstats_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + PNFS_LAYOUTSTATS_MAXDEV * encode_layoutstats_maxsz) +#define NFS4_dec_layoutstats_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz) static void encode_fallocate(struct xdr_stream *xdr, @@ -81,6 +101,33 @@ static void encode_seek(struct xdr_stream *xdr, encode_uint32(xdr, args->sa_what); } +static void encode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args, + struct nfs42_layoutstat_devinfo *devinfo, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_LAYOUTSTATS, decode_layoutstats_maxsz, hdr); + p = reserve_space(xdr, 8 + 8); + p = xdr_encode_hyper(p, devinfo->offset); + p = xdr_encode_hyper(p, devinfo->length); + encode_nfs4_stateid(xdr, &args->stateid); + p = reserve_space(xdr, 4*8 + NFS4_DEVICEID4_SIZE + 4); + p = xdr_encode_hyper(p, devinfo->read_count); + p = xdr_encode_hyper(p, devinfo->read_bytes); + p = xdr_encode_hyper(p, devinfo->write_count); + p = xdr_encode_hyper(p, devinfo->write_bytes); + p = xdr_encode_opaque_fixed(p, devinfo->dev_id.data, + NFS4_DEVICEID4_SIZE); + /* Encode layoutupdate4 */ + *p++ = cpu_to_be32(devinfo->layout_type); + if (devinfo->layoutstats_encode != NULL) + devinfo->layoutstats_encode(xdr, args, devinfo); + else + encode_uint32(xdr, 0); +} + /* * Encode ALLOCATE request */ @@ -137,6 +184,28 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode LAYOUTSTATS request + */ +static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args) +{ + int i; + + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + WARN_ON(args->num_dev > PNFS_LAYOUTSTATS_MAXDEV); + for (i = 0; i < args->num_dev; i++) + encode_layoutstats(xdr, args, &args->devinfo[i], &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -169,6 +238,28 @@ out_overflow: return -EIO; } +static int decode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_res *res) +{ + int status; + __be32 *p; + + status = decode_op_hdr(xdr, OP_LAYOUTSTATS); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + + res->rpc_status = be32_to_cpup(p++); + return 0; + +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + /* * Decode ALLOCATE request */ @@ -246,4 +337,35 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode LAYOUTSTATS request + */ +static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_layoutstat_res *res) +{ + struct compound_hdr hdr; + int status, i; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + WARN_ON(res->num_dev > PNFS_LAYOUTSTATS_MAXDEV); + for (i = 0; i < res->num_dev; i++) { + status = decode_layoutstats(xdr, res); + if (status) + goto out; + } +out: + res->rpc_status = status; + return status; +} + #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index fdef424b0cd3..ea3bee919a76 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -233,6 +233,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); +extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e9cd45f1d60f..643ce3a91b22 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -476,8 +476,8 @@ struct nfs4_call_sync_data { struct nfs4_sequence_res *seq_res; }; -static void nfs4_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) +void nfs4_init_sequence(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, int cache_reply) { args->sa_slot = NULL; args->sa_cache_this = cache_reply; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a07555d6968c..558cd65dbdb7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7431,6 +7431,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(SEEK, enc_seek, dec_seek), PROC(ALLOCATE, enc_allocate, dec_allocate), PROC(DEALLOCATE, enc_deallocate, dec_deallocate), + PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 32201c269890..b8e72aad919c 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -500,6 +500,7 @@ enum { NFSPROC4_CLNT_SEEK, NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_DEALLOCATE, + NFSPROC4_CLNT_LAYOUTSTATS, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c959e7eb5bd4..7bbe50504211 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -316,6 +316,49 @@ struct nfs4_layoutreturn { int rpc_status; }; +#define PNFS_LAYOUTSTATS_MAXSIZE 256 + +struct nfs42_layoutstat_args; +struct nfs42_layoutstat_devinfo; +typedef void (*layoutstats_encode_t)(struct xdr_stream *, + struct nfs42_layoutstat_args *, + struct nfs42_layoutstat_devinfo *); + +/* Per file per deviceid layoutstats */ +struct nfs42_layoutstat_devinfo { + struct nfs4_deviceid dev_id; + __u64 offset; + __u64 length; + __u64 read_count; + __u64 read_bytes; + __u64 write_count; + __u64 write_bytes; + __u32 layout_type; + layoutstats_encode_t layoutstats_encode; + void *layout_private; +}; + +struct nfs42_layoutstat_args { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + struct inode *inode; + nfs4_stateid stateid; + int num_dev; + struct nfs42_layoutstat_devinfo *devinfo; +}; + +struct nfs42_layoutstat_res { + struct nfs4_sequence_res seq_res; + int num_dev; + int rpc_status; +}; + +struct nfs42_layoutstat_data { + struct inode *inode; + struct nfs42_layoutstat_args args; + struct nfs42_layoutstat_res res; +}; + struct stateowner_id { __u64 create_time; __u32 uniquifier; From 1b4a4bd82c308aed36f13fff4beb33d3840cfa0e Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:51:56 +0800 Subject: [PATCH 247/839] pNFS: fill in nfs42_layoutstat_ops Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index ac929685350b..a0530418255f 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -10,6 +10,11 @@ #include #include "nfs4_fs.h" #include "nfs42.h" +#include "iostat.h" +#include "pnfs.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, fmode_t fmode) @@ -166,7 +171,44 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); } +static void +nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + struct nfs_server *server = NFS_SERVER(data->args.inode); + + nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args, + &data->res.seq_res, task); +} + +static void +nfs42_layoutstat_done(struct rpc_task *task, void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + + if (!nfs4_sequence_done(task, &data->res.seq_res)) + return; + + /* well, we don't care about errors at all! */ + if (task->tk_status) + dprintk("%s server returns %d\n", __func__, task->tk_status); +} + +static void +nfs42_layoutstat_release(void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + + pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); + nfs_iput_and_deactive(data->inode); + kfree(data->args.devinfo); + kfree(data); +} + static const struct rpc_call_ops nfs42_layoutstat_ops = { + .rpc_call_prepare = nfs42_layoutstat_prepare, + .rpc_call_done = nfs42_layoutstat_done, + .rpc_release = nfs42_layoutstat_release, }; int nfs42_proc_layoutstats_generic(struct nfs_server *server, @@ -186,6 +228,11 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, }; struct rpc_task *task; + data->inode = nfs_igrab_and_active(data->args.inode); + if (!data->inode) { + nfs42_layoutstat_release(data); + return -EAGAIN; + } nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); task = rpc_run_task(&task_setup); if (IS_ERR(task)) From 8733408d6ed713d080c325262d7b51a780136d41 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:51:57 +0800 Subject: [PATCH 248/839] pnfs: add pnfs_report_layoutstat helper function Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 4 ++++ fs/nfs/pnfs.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/pnfs.h | 3 +++ 3 files changed, 56 insertions(+) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index a0530418255f..ee0248340a42 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -198,6 +198,10 @@ static void nfs42_layoutstat_release(void *calldata) { struct nfs42_layoutstat_data *data = calldata; + struct nfs_server *nfss = NFS_SERVER(data->args.inode); + + if (nfss->pnfs_curr_ld->cleanup_layoutstats) + nfss->pnfs_curr_ld->cleanup_layoutstats(data); pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); nfs_iput_and_deactive(data->inode); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d47c188682b1..6279cff7df74 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -35,6 +35,7 @@ #include "iostat.h" #include "nfs4trace.h" #include "delegation.h" +#include "nfs42.h" #define NFSDBG_FACILITY NFSDBG_PNFS #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) @@ -2250,3 +2251,51 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) } return thp; } + +int +pnfs_report_layoutstat(struct inode *inode) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs42_layoutstat_data *data; + struct pnfs_layout_hdr *hdr; + int status = 0; + + if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) + goto out; + + spin_lock(&inode->i_lock); + if (!NFS_I(inode)->layout) { + spin_unlock(&inode->i_lock); + goto out; + } + hdr = NFS_I(inode)->layout; + pnfs_get_layout_hdr(hdr); + spin_unlock(&inode->i_lock); + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) { + status = -ENOMEM; + goto out_put; + } + + data->args.fh = NFS_FH(inode); + data->args.inode = inode; + nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid); + status = ld->prepare_layoutstats(&data->args); + if (status) + goto out_free; + + status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); + +out: + dprintk("%s returns %d\n", __func__, status); + return status; + +out_free: + kfree(data); +out_put: + pnfs_put_layout_hdr(hdr); + goto out; +} +EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1e6308f82fc3..0a4723945db2 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -178,6 +178,8 @@ struct pnfs_layoutdriver_type { void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, const struct nfs4_layoutcommit_args *args); + int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args); + void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data); }; struct pnfs_layout_hdr { @@ -290,6 +292,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg); +int pnfs_report_layoutstat(struct inode *inode); /* nfs4_deviceid_flags */ enum { From 27797d1bb36bf9e21fa14094f23ab785b442bd76 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Jun 2015 19:51:58 +0800 Subject: [PATCH 249/839] pNFS/flexfiles: Remove unused struct members user_name, group_name Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 070f20445b2d..275e4b735247 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -48,8 +48,6 @@ struct nfs4_ff_layout_mirror { u32 fh_versions_cnt; struct nfs_fh *fh_versions; nfs4_stateid stateid; - struct nfs4_string user_name; - struct nfs4_string group_name; u32 uid; u32 gid; struct rpc_cred *cred; From abcb7bfc9fdecd1550b98a5e13d40bfba6c0649b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Jun 2015 19:51:59 +0800 Subject: [PATCH 250/839] pNFS/flexfiles: add layoutstats tracking Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 192 +++++++++++++++++++++++-- fs/nfs/flexfilelayout/flexfilelayout.h | 22 +++ 2 files changed, 203 insertions(+), 11 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 0f1410c94827..8ac31fbf040a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -412,6 +412,130 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) return 1; } +static void +nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer) +{ + ktime_t old, new; + + /* + * Note: careful here! + * If the counter is zero, then we must not increment it until after + * we've set the start_time. + * If we were instead to use atomic_inc_return(), then another + * request might come in, bump, and then call end_busy_timer() + * before we've set the timer->start_time. + */ + old = timer->start_time; + if (atomic_inc_not_zero(&timer->n_ops) == 0) { + new = ktime_get(); + cmpxchg(&timer->start_time.tv64, old.tv64, new.tv64); + atomic_inc(&timer->n_ops); + } +} + +static ktime_t +nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer) +{ + ktime_t start, now; + + now = ktime_get(); + start.tv64 = xchg(&timer->start_time.tv64, now.tv64); + atomic_dec(&timer->n_ops); + return ktime_sub(now, start); +} + +static ktime_t +nfs4_ff_layout_calc_completion_time(struct rpc_task *task) +{ + return ktime_sub(ktime_get(), task->tk_start); +} + +static void +nfs4_ff_layoutstat_start_io(struct nfs4_ff_layoutstat *layoutstat) +{ + nfs4_ff_start_busy_timer(&layoutstat->busy_timer); +} + +static void +nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat, + __u64 requested) +{ + struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; + + iostat->ops_requested++; + iostat->bytes_requested += requested; +} + +static void +nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat, + __u64 requested, + __u64 completed, + ktime_t time_completed) +{ + struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; + ktime_t timer; + + iostat->ops_completed++; + iostat->bytes_completed += completed; + iostat->bytes_not_delivered += requested - completed; + + timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer); + iostat->total_busy_time = + ktime_add(iostat->total_busy_time, timer); + iostat->aggregate_completion_time = + ktime_add(iostat->aggregate_completion_time, time_completed); +} + +static void +nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror, + __u64 requested) +{ + spin_lock(&mirror->lock); + nfs4_ff_layoutstat_start_io(&mirror->read_stat); + nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); + spin_unlock(&mirror->lock); +} + +static void +nfs4_ff_layout_stat_io_end_read(struct rpc_task *task, + struct nfs4_ff_layout_mirror *mirror, + __u64 requested, + __u64 completed) +{ + spin_lock(&mirror->lock); + nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat, + requested, completed, + nfs4_ff_layout_calc_completion_time(task)); + spin_unlock(&mirror->lock); +} + +static void +nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror, + __u64 requested) +{ + spin_lock(&mirror->lock); + nfs4_ff_layoutstat_start_io(&mirror->write_stat); + nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); + spin_unlock(&mirror->lock); +} + +static void +nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, + struct nfs4_ff_layout_mirror *mirror, + __u64 requested, + __u64 completed, + enum nfs3_stable_how committed) +{ + if (committed == NFS_UNSTABLE) + requested = completed = 0; + + spin_lock(&mirror->lock); + nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat, + requested, completed, + nfs4_ff_layout_calc_completion_time(task)); + spin_unlock(&mirror->lock); +} + static int ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, @@ -906,6 +1030,10 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) static int ff_layout_read_prepare_common(struct rpc_task *task, struct nfs_pgio_header *hdr) { + nfs4_ff_layout_stat_io_start_read( + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -959,15 +1087,15 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - if (ff_layout_read_prepare_common(task, hdr)) - return; - if (ff_layout_setup_sequence(hdr->ds_clp, &hdr->args.seq_args, &hdr->res.seq_res, task)) return; + if (ff_layout_read_prepare_common(task, hdr)) + return; + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, hdr->args.lock_context, FMODE_READ) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ @@ -979,6 +1107,10 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data) dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); + nfs4_ff_layout_stat_io_end_read(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1080,6 +1212,10 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, static int ff_layout_write_prepare_common(struct rpc_task *task, struct nfs_pgio_header *hdr) { + nfs4_ff_layout_stat_io_start_write( + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -1113,15 +1249,15 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - if (ff_layout_write_prepare_common(task, hdr)) - return; - if (ff_layout_setup_sequence(hdr->ds_clp, &hdr->args.seq_args, &hdr->res.seq_res, task)) return; + if (ff_layout_write_prepare_common(task, hdr)) + return; + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, hdr->args.lock_context, FMODE_WRITE) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ @@ -1131,6 +1267,11 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count, + hdr->res.verf->committed); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1149,8 +1290,17 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data) &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); } +static void ff_layout_commit_prepare_common(struct rpc_task *task, + struct nfs_commit_data *cdata) +{ + nfs4_ff_layout_stat_io_start_write( + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + 0); +} + static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) { + ff_layout_commit_prepare_common(task, data); rpc_call_start(task); } @@ -1158,10 +1308,30 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) { struct nfs_commit_data *wdata = data; - ff_layout_setup_sequence(wdata->ds_clp, + if (ff_layout_setup_sequence(wdata->ds_clp, &wdata->args.seq_args, &wdata->res.seq_res, - task); + task)) + return; + ff_layout_commit_prepare_common(task, data); +} + +static void ff_layout_commit_done(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *cdata = data; + struct nfs_page *req; + __u64 count = 0; + + if (task->tk_status == 0) { + list_for_each_entry(req, &cdata->pages, wb_list) + count += req->wb_bytes; + } + + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + count, count, NFS_FILE_SYNC); + + pnfs_generic_write_commit_done(task, data); } static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) @@ -1202,14 +1372,14 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { .rpc_call_prepare = ff_layout_commit_prepare_v3, - .rpc_call_done = pnfs_generic_write_commit_done, + .rpc_call_done = ff_layout_commit_done, .rpc_count_stats = ff_layout_commit_count_stats, .rpc_release = pnfs_generic_commit_release, }; static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { .rpc_call_prepare = ff_layout_commit_prepare_v4, - .rpc_call_done = pnfs_generic_write_commit_done, + .rpc_call_done = ff_layout_commit_done, .rpc_count_stats = ff_layout_commit_count_stats, .rpc_release = pnfs_generic_commit_release, }; @@ -1253,7 +1423,6 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) fh = nfs4_ff_layout_select_ds_fh(lseg, idx); if (fh) hdr->args.fh = fh; - /* * Note that if we ever decide to split across DSes, * then we may need to handle dense-like offsets. @@ -1382,6 +1551,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) data->args.fh = fh; + return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_commit_call_ops_v3 : &ff_layout_commit_call_ops_v4, diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 275e4b735247..f7493f7cf13c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -41,6 +41,26 @@ struct nfs4_ff_layout_ds_err { struct nfs4_deviceid deviceid; }; +struct nfs4_ff_io_stat { + __u64 ops_requested; + __u64 bytes_requested; + __u64 ops_completed; + __u64 bytes_completed; + __u64 bytes_not_delivered; + ktime_t total_busy_time; + ktime_t aggregate_completion_time; +}; + +struct nfs4_ff_busy_timer { + ktime_t start_time; + atomic_t n_ops; +}; + +struct nfs4_ff_layoutstat { + struct nfs4_ff_io_stat io_stat; + struct nfs4_ff_busy_timer busy_timer; +}; + struct nfs4_ff_layout_mirror { u32 ds_count; u32 efficiency; @@ -52,6 +72,8 @@ struct nfs4_ff_layout_mirror { u32 gid; struct rpc_cred *cred; spinlock_t lock; + struct nfs4_ff_layoutstat read_stat; + struct nfs4_ff_layoutstat write_stat; }; struct nfs4_ff_layout_segment { From d983803d3818a7aef3200147c72a45f573d84537 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:00 +0800 Subject: [PATCH 251/839] pNFS/flexfiles: track when layout is first used So that we can report cumulative time since the beginning of statistics collection of the layout. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 10 +++++++--- fs/nfs/flexfilelayout/flexfilelayout.h | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 8ac31fbf040a..4215278c9c76 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -451,9 +451,13 @@ nfs4_ff_layout_calc_completion_time(struct rpc_task *task) } static void -nfs4_ff_layoutstat_start_io(struct nfs4_ff_layoutstat *layoutstat) +nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, + struct nfs4_ff_layoutstat *layoutstat) { + static const ktime_t notime = {0}; + nfs4_ff_start_busy_timer(&layoutstat->busy_timer); + cmpxchg(&mirror->start_time, notime, ktime_get()); } static void @@ -491,7 +495,7 @@ nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror, __u64 requested) { spin_lock(&mirror->lock); - nfs4_ff_layoutstat_start_io(&mirror->read_stat); + nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat); nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); spin_unlock(&mirror->lock); } @@ -514,7 +518,7 @@ nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror, __u64 requested) { spin_lock(&mirror->lock); - nfs4_ff_layoutstat_start_io(&mirror->write_stat); + nfs4_ff_layoutstat_start_io(mirror, &mirror->write_stat); nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); spin_unlock(&mirror->lock); } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index f7493f7cf13c..0e7366f44d11 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -74,6 +74,7 @@ struct nfs4_ff_layout_mirror { spinlock_t lock; struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat write_stat; + ktime_t start_time; }; struct nfs4_ff_layout_segment { From ad4dc53e6496ba0b3c18aeda6f3367d9800b6ebf Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:01 +0800 Subject: [PATCH 252/839] pnfs/flexfiles: add ff_layout_prepare_layoutstats It fills in the generic part of LAYOUTSTATS call. One thing to note is that we don't really track if IO is continuous or not. So just fake to use the completed bytes for it. Still missing flexfiles specific part, which will be included in the next patch. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 71 ++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 4215278c9c76..cd999af504d9 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -20,6 +20,7 @@ #include "../nfs4trace.h" #include "../iostat.h" #include "../nfs.h" +#include "../nfs42.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -1659,6 +1660,75 @@ out: dprintk("%s: Return\n", __func__); } +static bool +ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, + struct pnfs_layout_segment *pls, + int *dev_count, int dev_limit) +{ + struct nfs4_ff_layout_mirror *mirror; + struct nfs4_deviceid_node *dev; + struct nfs42_layoutstat_devinfo *devinfo; + int i; + + for (i = 0; i <= FF_LAYOUT_MIRROR_COUNT(pls); i++) { + if (*dev_count >= dev_limit) + break; + mirror = FF_LAYOUT_COMP(pls, i); + dev = FF_LAYOUT_DEVID_NODE(pls, i); + devinfo = &args->devinfo[*dev_count]; + memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); + devinfo->offset = pls->pls_range.offset; + devinfo->length = pls->pls_range.length; + /* well, we don't really know if IO is continuous or not! */ + devinfo->read_count = mirror->read_stat.io_stat.bytes_completed; + devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed; + devinfo->write_count = mirror->write_stat.io_stat.bytes_completed; + devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; + devinfo->layout_type = LAYOUT_FLEX_FILES; + devinfo->layoutstats_encode = NULL; + devinfo->layout_private = NULL; + + ++(*dev_count); + } + + return *dev_count < dev_limit; +} + +static int +ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) +{ + struct pnfs_layout_segment *pls; + int dev_count = 0; + + spin_lock(&args->inode->i_lock); + list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) { + dev_count += FF_LAYOUT_MIRROR_COUNT(pls); + } + spin_unlock(&args->inode->i_lock); + /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */ + if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) { + dprintk("%s: truncating devinfo to limit (%d:%d)\n", + __func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV); + dev_count = PNFS_LAYOUTSTATS_MAXDEV; + } + args->devinfo = kmalloc(dev_count * sizeof(*args->devinfo), GFP_KERNEL); + if (!args->devinfo) + return -ENOMEM; + + dev_count = 0; + spin_lock(&args->inode->i_lock); + list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) { + if (!ff_layout_mirror_prepare_stats(args, pls, &dev_count, + PNFS_LAYOUTSTATS_MAXDEV)) { + break; + } + } + spin_unlock(&args->inode->i_lock); + args->num_dev = dev_count; + + return 0; +} + static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", @@ -1681,6 +1751,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .alloc_deviceid_node = ff_layout_alloc_deviceid_node, .encode_layoutreturn = ff_layout_encode_layoutreturn, .sync = pnfs_nfs_generic_sync, + .prepare_layoutstats = ff_layout_prepare_layoutstats, }; static int __init nfs4flexfilelayout_init(void) From 27c430644369ccd9a2272492ba6d0e85e2e4800b Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:02 +0800 Subject: [PATCH 253/839] pnfs/flexfiles: encode LAYOUTSTATS flexfiles specific data Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 178 ++++++++++++++++++++++++- fs/nfs/flexfilelayout/flexfilelayout.h | 1 + 2 files changed, 177 insertions(+), 2 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index cd999af504d9..6bcb6d6c9dc3 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -272,6 +272,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, spin_lock_init(&fls->mirror_array[i]->lock); fls->mirror_array[i]->ds_count = ds_count; + fls->mirror_array[i]->lseg = &fls->generic_hdr; /* deviceid */ rc = decode_deviceid(&stream, &devid); @@ -1660,6 +1661,161 @@ out: dprintk("%s: Return\n", __func__); } +static int +ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen) +{ + const struct sockaddr_in *sin = (struct sockaddr_in *)sap; + + return snprintf(buf, buflen, "%pI4", &sin->sin_addr); +} + +static size_t +ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf, + const int buflen) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + const struct in6_addr *addr = &sin6->sin6_addr; + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded ANY address + */ + if (ipv6_addr_any(addr)) + return snprintf(buf, buflen, "::"); + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded loopback address + */ + if (ipv6_addr_loopback(addr)) + return snprintf(buf, buflen, "::1"); + + /* + * RFC 4291, Section 2.2.3 + * + * Special presentation address format for mapped v4 + * addresses. + */ + if (ipv6_addr_v4mapped(addr)) + return snprintf(buf, buflen, "::ffff:%pI4", + &addr->s6_addr32[3]); + + /* + * RFC 4291, Section 2.2.1 + */ + return snprintf(buf, buflen, "%pI6c", addr); +} + +/* Derived from rpc_sockaddr2uaddr */ +static void +ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da) +{ + struct sockaddr *sap = (struct sockaddr *)&da->da_addr; + char portbuf[RPCBIND_MAXUADDRPLEN]; + char addrbuf[RPCBIND_MAXUADDRLEN]; + char *netid; + unsigned short port; + int len, netid_len; + __be32 *p; + + switch (sap->sa_family) { + case AF_INET: + if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) + return; + port = ntohs(((struct sockaddr_in *)sap)->sin_port); + netid = "tcp"; + netid_len = 3; + break; + case AF_INET6: + if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) + return; + port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + netid = "tcp6"; + netid_len = 4; + break; + default: + /* we only support tcp and tcp6 */ + WARN_ON_ONCE(1); + return; + } + + snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff); + len = strlcat(addrbuf, portbuf, sizeof(addrbuf)); + + p = xdr_reserve_space(xdr, 4 + netid_len); + xdr_encode_opaque(p, netid, netid_len); + + p = xdr_reserve_space(xdr, 4 + len); + xdr_encode_opaque(p, addrbuf, len); +} + +static void +ff_layout_encode_nfstime(struct xdr_stream *xdr, + ktime_t t) +{ + struct timespec64 ts; + __be32 *p; + + p = xdr_reserve_space(xdr, 12); + ts = ktime_to_timespec64(t); + p = xdr_encode_hyper(p, ts.tv_sec); + *p++ = cpu_to_be32(ts.tv_nsec); +} + +static void +ff_layout_encode_io_latency(struct xdr_stream *xdr, + struct nfs4_ff_io_stat *stat) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 5 * 8); + p = xdr_encode_hyper(p, stat->ops_requested); + p = xdr_encode_hyper(p, stat->bytes_requested); + p = xdr_encode_hyper(p, stat->ops_completed); + p = xdr_encode_hyper(p, stat->bytes_completed); + p = xdr_encode_hyper(p, stat->bytes_not_delivered); + ff_layout_encode_nfstime(xdr, stat->total_busy_time); + ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time); +} + +static void +ff_layout_encode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args, + struct nfs42_layoutstat_devinfo *devinfo) +{ + struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private; + struct nfs4_pnfs_ds_addr *da; + struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds; + struct nfs_fh *fh = &mirror->fh_versions[0]; + __be32 *p, *start; + + da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); + dprintk("%s: DS %s: encoding address %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); + /* layoutupdate length */ + start = xdr_reserve_space(xdr, 4); + /* netaddr4 */ + ff_layout_encode_netaddr(xdr, da); + /* nfs_fh4 */ + p = xdr_reserve_space(xdr, 4 + fh->size); + xdr_encode_opaque(p, fh->data, fh->size); + /* ff_io_latency4 read */ + spin_lock(&mirror->lock); + ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat); + /* ff_io_latency4 write */ + ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat); + spin_unlock(&mirror->lock); + /* nfstime4 */ + ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time)); + /* bool */ + p = xdr_reserve_space(xdr, 4); + *p = cpu_to_be32(false); + + *start = cpu_to_be32((xdr->p - start - 1) * 4); +} + static bool ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, struct pnfs_layout_segment *pls, @@ -1674,6 +1830,8 @@ ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, if (*dev_count >= dev_limit) break; mirror = FF_LAYOUT_COMP(pls, i); + if (!mirror || !mirror->mirror_ds) + continue; dev = FF_LAYOUT_DEVID_NODE(pls, i); devinfo = &args->devinfo[*dev_count]; memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); @@ -1685,8 +1843,10 @@ ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, devinfo->write_count = mirror->write_stat.io_stat.bytes_completed; devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; devinfo->layout_type = LAYOUT_FLEX_FILES; - devinfo->layoutstats_encode = NULL; - devinfo->layout_private = NULL; + devinfo->layoutstats_encode = ff_layout_encode_layoutstats; + devinfo->layout_private = mirror; + /* lseg refcount put in cleanup_layoutstats */ + pnfs_get_lseg(pls); ++(*dev_count); } @@ -1729,6 +1889,19 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) return 0; } +static void +ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data) +{ + struct nfs4_ff_layout_mirror *mirror; + int i; + + for (i = 0; i < data->args.num_dev; i++) { + mirror = data->args.devinfo[i].layout_private; + data->args.devinfo[i].layout_private = NULL; + pnfs_put_lseg(mirror->lseg); + } +} + static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", @@ -1752,6 +1925,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .encode_layoutreturn = ff_layout_encode_layoutreturn, .sync = pnfs_nfs_generic_sync, .prepare_layoutstats = ff_layout_prepare_layoutstats, + .cleanup_layoutstats = ff_layout_cleanup_layoutstats, }; static int __init nfs4flexfilelayout_init(void) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 0e7366f44d11..7e248874f46d 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -75,6 +75,7 @@ struct nfs4_ff_layout_mirror { struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat write_stat; ktime_t start_time; + struct pnfs_layout_segment *lseg; /* back pointer */ }; struct nfs4_ff_layout_segment { From 1bfe3b259ff2e579b2acb190f874397d53274497 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:03 +0800 Subject: [PATCH 254/839] nfs42: serialize LAYOUTSTATS calls of the same file There is no need to report concurrently. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 3 +++ fs/nfs/pnfs.c | 7 +++++++ include/linux/nfs_fs.h | 1 + 3 files changed, 11 insertions(+) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index ee0248340a42..06c74cd93875 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -204,6 +204,9 @@ nfs42_layoutstat_release(void *calldata) nfss->pnfs_curr_ld->cleanup_layoutstats(data); pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); + smp_mb__before_atomic(); + clear_bit(NFS_INO_LAYOUTSTATS, &NFS_I(data->args.inode)->flags); + smp_mb__after_atomic(); nfs_iput_and_deactive(data->inode); kfree(data->args.devinfo); kfree(data); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6279cff7df74..5b5224341bcd 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2257,6 +2257,7 @@ pnfs_report_layoutstat(struct inode *inode) { struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; struct nfs_server *server = NFS_SERVER(inode); + struct nfs_inode *nfsi = NFS_I(inode); struct nfs42_layoutstat_data *data; struct pnfs_layout_hdr *hdr; int status = 0; @@ -2264,6 +2265,9 @@ pnfs_report_layoutstat(struct inode *inode) if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) goto out; + if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) + goto out; + spin_lock(&inode->i_lock); if (!NFS_I(inode)->layout) { spin_unlock(&inode->i_lock); @@ -2296,6 +2300,9 @@ out_free: kfree(data); out_put: pnfs_put_layout_hdr(hdr); + smp_mb__before_atomic(); + clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); + smp_mb__after_atomic(); goto out; } EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b95f914ce083..f91b5ade30c9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -219,6 +219,7 @@ struct nfs_inode { #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ +#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { From 97ba375b5df43790aa82c256f7f16ee8eb95c272 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:04 +0800 Subject: [PATCH 255/839] pnfs/flexfiles: report layoutstat regularly As a simple scheme, report every minute if IO is still going on. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 27 ++++++++++++++++++++++---- fs/nfs/flexfilelayout/flexfilelayout.h | 6 +++++- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 6bcb6d6c9dc3..d3b3b6236711 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -452,14 +452,23 @@ nfs4_ff_layout_calc_completion_time(struct rpc_task *task) return ktime_sub(ktime_get(), task->tk_start); } -static void +static bool nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, struct nfs4_ff_layoutstat *layoutstat) { static const ktime_t notime = {0}; + ktime_t now = ktime_get(); nfs4_ff_start_busy_timer(&layoutstat->busy_timer); - cmpxchg(&mirror->start_time, notime, ktime_get()); + cmpxchg(&mirror->start_time.tv64, notime.tv64, now.tv64); + cmpxchg(&mirror->last_report_time.tv64, notime.tv64, now.tv64); + if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= + FF_LAYOUTSTATS_REPORT_INTERVAL) { + mirror->last_report_time = now; + return true; + } + + return false; } static void @@ -496,10 +505,15 @@ static void nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror, __u64 requested) { + bool report; + spin_lock(&mirror->lock); - nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat); + report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat); nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); spin_unlock(&mirror->lock); + + if (report) + pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode); } static void @@ -519,10 +533,15 @@ static void nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror, __u64 requested) { + bool report; + spin_lock(&mirror->lock); - nfs4_ff_layoutstat_start_io(mirror, &mirror->write_stat); + report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat); nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); spin_unlock(&mirror->lock); + + if (report) + pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode); } static void diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 7e248874f46d..6fcd8d5e8e3d 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -15,6 +15,9 @@ * due to network error etc. */ #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 +/* LAYOUTSTATS report interval in ms */ +#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L) + struct nfs4_ff_ds_version { u32 version; u32 minor_version; @@ -62,6 +65,7 @@ struct nfs4_ff_layoutstat { }; struct nfs4_ff_layout_mirror { + struct pnfs_layout_segment *lseg; /* back pointer */ u32 ds_count; u32 efficiency; struct nfs4_ff_layout_ds *mirror_ds; @@ -75,7 +79,7 @@ struct nfs4_ff_layout_mirror { struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat write_stat; ktime_t start_time; - struct pnfs_layout_segment *lseg; /* back pointer */ + ktime_t last_report_time; }; struct nfs4_ff_layout_segment { From 7011bf4eccb63b4f34e4caa37a3a86f59d4031d4 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Fri, 19 Jun 2015 11:38:22 +0800 Subject: [PATCH 256/839] ACPICA: Linuxize: Replace __FUNCTION__ with __func__ ACPICA commit cb3d1c79f862cd368d749c9b8d9dced40111b0d0 __FUNCTION__ is MSVC only, in Linux, it is __func__. Lv Zheng. As noted by Christoph Hellwig: "__func__ is in C99 and never. __FUNCTION__ is an old extension supported by various compilers." In ACPICA, this is achieved by string replacement in release script and this patch contains the source code difference between the Linux upstream and ACPICA that is caused by the back porting. Link: https://github.com/acpica/acpica/commit/cb3d1c79 Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/utdebug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c index 4f3f888d33bb..cd02693841db 100644 --- a/drivers/acpi/acpica/utdebug.c +++ b/drivers/acpi/acpica/utdebug.c @@ -111,8 +111,8 @@ void acpi_ut_track_stack_ptr(void) * RETURN: Updated pointer to the function name * * DESCRIPTION: Remove the "Acpi" prefix from the function name, if present. - * This allows compiler macros such as __func__ to be used with no - * change to the debug output. + * This allows compiler macros such as __func__ to be used + * with no change to the debug output. * ******************************************************************************/ From eeeee40fcc3f92745639cfb166854656dddeb040 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 24 Jun 2015 14:25:54 -0700 Subject: [PATCH 257/839] Input: axp20x-pek - fix reporting button state as inverted Currently we are reporting the button state as inverted on all boards with an axp209 pmic, tested on a ba10-tvbox, bananapi, bananapro, cubietruck and utoo-p66 tablet. The axp209 datasheet clearly states that the power button must be connected between the PWRON key and ground. Which means that on a press we will get a falling edge (dbf) irq not a rising one, and likewise on release we will get a rising edge (dbr) irq, not a falling one. This commit swaps the check for the 2 irqs fixing the inverted reporting of the power button state. Signed-off-by: Hans de Goede Acked-by: Chen-Yu Tsai Acked-by: Carlo Caione Signed-off-by: Dmitry Torokhov --- drivers/input/misc/axp20x-pek.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/input/misc/axp20x-pek.c b/drivers/input/misc/axp20x-pek.c index f1c844739cd7..10e140af5aac 100644 --- a/drivers/input/misc/axp20x-pek.c +++ b/drivers/input/misc/axp20x-pek.c @@ -167,9 +167,13 @@ static irqreturn_t axp20x_pek_irq(int irq, void *pwr) struct input_dev *idev = pwr; struct axp20x_pek *axp20x_pek = input_get_drvdata(idev); - if (irq == axp20x_pek->irq_dbr) + /* + * The power-button is connected to ground so a falling edge (dbf) + * means it is pressed. + */ + if (irq == axp20x_pek->irq_dbf) input_report_key(idev, KEY_POWER, true); - else if (irq == axp20x_pek->irq_dbf) + else if (irq == axp20x_pek->irq_dbr) input_report_key(idev, KEY_POWER, false); input_sync(idev); From 3e30c11c8a902de350281a0d821e17cdb45ef156 Mon Sep 17 00:00:00 2001 From: HungNien Chen Date: Mon, 15 Jun 2015 18:57:08 +0300 Subject: [PATCH 258/839] Input: add a driver for wdt87xx touchscreen controller This is a driver for Weida HiTech WDT87xx series touchscreen controller. Signed-off-by: HungNien Chen Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/Kconfig | 12 + drivers/input/touchscreen/Makefile | 1 + drivers/input/touchscreen/wdt87xx_i2c.c | 1140 +++++++++++++++++++++++ 3 files changed, 1153 insertions(+) create mode 100644 drivers/input/touchscreen/wdt87xx_i2c.c diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index a398d636ca39..aa2b5f21b89b 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -658,6 +658,18 @@ config TOUCHSCREEN_PIXCIR To compile this driver as a module, choose M here: the module will be called pixcir_i2c_ts. +config TOUCHSCREEN_WDT87XX_I2C + tristate "Weida HiTech I2C touchscreen" + depends on I2C + help + Say Y here if you have a Weida WDT87XX I2C touchscreen + connected to your system. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called wdt87xx_i2c. + config TOUCHSCREEN_WM831X tristate "Support for WM831x touchscreen controllers" depends on MFD_WM831X diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index 44deea743d02..fa3d33bac7fc 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -72,6 +72,7 @@ obj-$(CONFIG_TOUCHSCREEN_TSC2007) += tsc2007.o obj-$(CONFIG_TOUCHSCREEN_UCB1400) += ucb1400_ts.o obj-$(CONFIG_TOUCHSCREEN_WACOM_W8001) += wacom_w8001.o obj-$(CONFIG_TOUCHSCREEN_WACOM_I2C) += wacom_i2c.o +obj-$(CONFIG_TOUCHSCREEN_WDT87XX_I2C) += wdt87xx_i2c.o obj-$(CONFIG_TOUCHSCREEN_WM831X) += wm831x-ts.o obj-$(CONFIG_TOUCHSCREEN_WM97XX) += wm97xx-ts.o wm97xx-ts-$(CONFIG_TOUCHSCREEN_WM9705) += wm9705.o diff --git a/drivers/input/touchscreen/wdt87xx_i2c.c b/drivers/input/touchscreen/wdt87xx_i2c.c new file mode 100644 index 000000000000..c073ea9345e2 --- /dev/null +++ b/drivers/input/touchscreen/wdt87xx_i2c.c @@ -0,0 +1,1140 @@ +/* + * Weida HiTech WDT87xx TouchScreen I2C driver + * + * Copyright (c) 2015 Weida Hi-Tech Co., Ltd. + * HN Chen + * + * This software is licensed under the terms of the GNU General Public + * License, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define WDT87XX_NAME "wdt87xx_i2c" +#define WDT87XX_DRV_VER "0.9.5" +#define WDT87XX_FW_NAME "wdt87xx_fw.bin" +#define WDT87XX_CFG_NAME "wdt87xx_cfg.bin" + +#define MODE_ACTIVE 0x01 +#define MODE_READY 0x02 +#define MODE_IDLE 0x03 +#define MODE_SLEEP 0x04 +#define MODE_STOP 0xFF + +#define WDT_MAX_FINGER 10 +#define WDT_RAW_BUF_COUNT 54 +#define WDT_V1_RAW_BUF_COUNT 74 +#define WDT_FIRMWARE_ID 0xa9e368f5 + +#define PG_SIZE 0x1000 +#define MAX_RETRIES 3 + +#define MAX_UNIT_AXIS 0x7FFF + +#define PKT_READ_SIZE 72 +#define PKT_WRITE_SIZE 80 + +/* the finger definition of the report event */ +#define FINGER_EV_OFFSET_ID 0 +#define FINGER_EV_OFFSET_X 1 +#define FINGER_EV_OFFSET_Y 3 +#define FINGER_EV_SIZE 5 + +#define FINGER_EV_V1_OFFSET_ID 0 +#define FINGER_EV_V1_OFFSET_W 1 +#define FINGER_EV_V1_OFFSET_P 2 +#define FINGER_EV_V1_OFFSET_X 3 +#define FINGER_EV_V1_OFFSET_Y 5 +#define FINGER_EV_V1_SIZE 7 + +/* The definition of a report packet */ +#define TOUCH_PK_OFFSET_REPORT_ID 0 +#define TOUCH_PK_OFFSET_EVENT 1 +#define TOUCH_PK_OFFSET_SCAN_TIME 51 +#define TOUCH_PK_OFFSET_FNGR_NUM 53 + +#define TOUCH_PK_V1_OFFSET_REPORT_ID 0 +#define TOUCH_PK_V1_OFFSET_EVENT 1 +#define TOUCH_PK_V1_OFFSET_SCAN_TIME 71 +#define TOUCH_PK_V1_OFFSET_FNGR_NUM 73 + +/* The definition of the controller parameters */ +#define CTL_PARAM_OFFSET_FW_ID 0 +#define CTL_PARAM_OFFSET_PLAT_ID 2 +#define CTL_PARAM_OFFSET_XMLS_ID1 4 +#define CTL_PARAM_OFFSET_XMLS_ID2 6 +#define CTL_PARAM_OFFSET_PHY_CH_X 8 +#define CTL_PARAM_OFFSET_PHY_CH_Y 10 +#define CTL_PARAM_OFFSET_PHY_X0 12 +#define CTL_PARAM_OFFSET_PHY_X1 14 +#define CTL_PARAM_OFFSET_PHY_Y0 16 +#define CTL_PARAM_OFFSET_PHY_Y1 18 +#define CTL_PARAM_OFFSET_PHY_W 22 +#define CTL_PARAM_OFFSET_PHY_H 24 + +/* Communication commands */ +#define PACKET_SIZE 56 +#define VND_REQ_READ 0x06 +#define VND_READ_DATA 0x07 +#define VND_REQ_WRITE 0x08 + +#define VND_CMD_START 0x00 +#define VND_CMD_STOP 0x01 +#define VND_CMD_RESET 0x09 + +#define VND_CMD_ERASE 0x1A + +#define VND_GET_CHECKSUM 0x66 + +#define VND_SET_DATA 0x83 +#define VND_SET_COMMAND_DATA 0x84 +#define VND_SET_CHECKSUM_CALC 0x86 +#define VND_SET_CHECKSUM_LENGTH 0x87 + +#define VND_CMD_SFLCK 0xFC +#define VND_CMD_SFUNL 0xFD + +#define CMD_SFLCK_KEY 0xC39B +#define CMD_SFUNL_KEY 0x95DA + +#define STRIDX_PLATFORM_ID 0x80 +#define STRIDX_PARAMETERS 0x81 + +#define CMD_BUF_SIZE 8 +#define PKT_BUF_SIZE 64 + +/* The definition of the command packet */ +#define CMD_REPORT_ID_OFFSET 0x0 +#define CMD_TYPE_OFFSET 0x1 +#define CMD_INDEX_OFFSET 0x2 +#define CMD_KEY_OFFSET 0x3 +#define CMD_LENGTH_OFFSET 0x4 +#define CMD_DATA_OFFSET 0x8 + +/* The definition of firmware chunk tags */ +#define FOURCC_ID_RIFF 0x46464952 +#define FOURCC_ID_WHIF 0x46494857 +#define FOURCC_ID_FRMT 0x544D5246 +#define FOURCC_ID_FRWR 0x52575246 +#define FOURCC_ID_CNFG 0x47464E43 + +#define CHUNK_ID_FRMT FOURCC_ID_FRMT +#define CHUNK_ID_FRWR FOURCC_ID_FRWR +#define CHUNK_ID_CNFG FOURCC_ID_CNFG + +#define FW_FOURCC1_OFFSET 0 +#define FW_SIZE_OFFSET 4 +#define FW_FOURCC2_OFFSET 8 +#define FW_PAYLOAD_OFFSET 40 + +#define FW_CHUNK_ID_OFFSET 0 +#define FW_CHUNK_SIZE_OFFSET 4 +#define FW_CHUNK_TGT_START_OFFSET 8 +#define FW_CHUNK_PAYLOAD_LEN_OFFSET 12 +#define FW_CHUNK_SRC_START_OFFSET 16 +#define FW_CHUNK_VERSION_OFFSET 20 +#define FW_CHUNK_ATTR_OFFSET 24 +#define FW_CHUNK_PAYLOAD_OFFSET 32 + +/* Controller requires minimum 300us between commands */ +#define WDT_COMMAND_DELAY_MS 2 +#define WDT_FLASH_WRITE_DELAY_MS 4 + +struct wdt87xx_sys_param { + u16 fw_id; + u16 plat_id; + u16 xmls_id1; + u16 xmls_id2; + u16 phy_ch_x; + u16 phy_ch_y; + u16 phy_w; + u16 phy_h; + u32 max_x; + u32 max_y; +}; + +struct wdt87xx_data { + struct i2c_client *client; + struct input_dev *input; + /* Mutex for fw update to prevent concurrent access */ + struct mutex fw_mutex; + struct wdt87xx_sys_param param; + u8 phys[32]; +}; + +static int wdt87xx_i2c_xfer(struct i2c_client *client, + void *txdata, size_t txlen, + void *rxdata, size_t rxlen) +{ + struct i2c_msg msgs[] = { + { + .addr = client->addr, + .flags = 0, + .len = txlen, + .buf = txdata, + }, + { + .addr = client->addr, + .flags = I2C_M_RD, + .len = rxlen, + .buf = rxdata, + }, + }; + int error; + int ret; + + ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); + if (ret != ARRAY_SIZE(msgs)) { + error = ret < 0 ? ret : -EIO; + dev_err(&client->dev, "%s: i2c transfer failed: %d\n", + __func__, error); + return error; + } + + return 0; +} + +static int wdt87xx_get_string(struct i2c_client *client, u8 str_idx, + u8 *buf, size_t len) +{ + u8 tx_buf[] = { 0x22, 0x00, 0x13, 0x0E, str_idx, 0x23, 0x00 }; + u8 rx_buf[PKT_WRITE_SIZE]; + size_t rx_len = len + 2; + int error; + + if (rx_len > sizeof(rx_buf)) + return -EINVAL; + + error = wdt87xx_i2c_xfer(client, tx_buf, sizeof(tx_buf), + rx_buf, rx_len); + if (error) { + dev_err(&client->dev, "get string failed: %d\n", error); + return error; + } + + if (rx_buf[1] != 0x03) { + dev_err(&client->dev, "unexpected response to get string: %d\n", + rx_buf[1]); + return -EINVAL; + } + + rx_len = min_t(size_t, len, rx_buf[0]); + memcpy(buf, &rx_buf[2], rx_len); + + mdelay(WDT_COMMAND_DELAY_MS); + + return 0; +} + +static int wdt87xx_get_feature(struct i2c_client *client, + u8 *buf, size_t buf_size) +{ + u8 tx_buf[8]; + u8 rx_buf[PKT_WRITE_SIZE]; + size_t tx_len = 0; + size_t rx_len = buf_size + 2; + int error; + + if (rx_len > sizeof(rx_buf)) + return -EINVAL; + + /* Get feature command packet */ + tx_buf[tx_len++] = 0x22; + tx_buf[tx_len++] = 0x00; + if (buf[CMD_REPORT_ID_OFFSET] > 0xF) { + tx_buf[tx_len++] = 0x30; + tx_buf[tx_len++] = 0x02; + tx_buf[tx_len++] = buf[CMD_REPORT_ID_OFFSET]; + } else { + tx_buf[tx_len++] = 0x30 | buf[CMD_REPORT_ID_OFFSET]; + tx_buf[tx_len++] = 0x02; + } + tx_buf[tx_len++] = 0x23; + tx_buf[tx_len++] = 0x00; + + error = wdt87xx_i2c_xfer(client, tx_buf, tx_len, rx_buf, rx_len); + if (error) { + dev_err(&client->dev, "get feature failed: %d\n", error); + return error; + } + + rx_len = min_t(size_t, buf_size, get_unaligned_le16(rx_buf)); + memcpy(buf, &rx_buf[2], rx_len); + + mdelay(WDT_COMMAND_DELAY_MS); + + return 0; +} + +static int wdt87xx_set_feature(struct i2c_client *client, + const u8 *buf, size_t buf_size) +{ + u8 tx_buf[PKT_WRITE_SIZE]; + int tx_len = 0; + int error; + + /* Set feature command packet */ + tx_buf[tx_len++] = 0x22; + tx_buf[tx_len++] = 0x00; + if (buf[CMD_REPORT_ID_OFFSET] > 0xF) { + tx_buf[tx_len++] = 0x30; + tx_buf[tx_len++] = 0x03; + tx_buf[tx_len++] = buf[CMD_REPORT_ID_OFFSET]; + } else { + tx_buf[tx_len++] = 0x30 | buf[CMD_REPORT_ID_OFFSET]; + tx_buf[tx_len++] = 0x03; + } + tx_buf[tx_len++] = 0x23; + tx_buf[tx_len++] = 0x00; + tx_buf[tx_len++] = (buf_size & 0xFF); + tx_buf[tx_len++] = ((buf_size & 0xFF00) >> 8); + + if (tx_len + buf_size > sizeof(tx_buf)) + return -EINVAL; + + memcpy(&tx_buf[tx_len], buf, buf_size); + tx_len += buf_size; + + error = i2c_master_send(client, tx_buf, tx_len); + if (error < 0) { + dev_err(&client->dev, "set feature failed: %d\n", error); + return error; + } + + mdelay(WDT_COMMAND_DELAY_MS); + + return 0; +} + +static int wdt87xx_send_command(struct i2c_client *client, int cmd, int value) +{ + u8 cmd_buf[CMD_BUF_SIZE]; + + /* Set the command packet */ + cmd_buf[CMD_REPORT_ID_OFFSET] = VND_REQ_WRITE; + cmd_buf[CMD_TYPE_OFFSET] = VND_SET_COMMAND_DATA; + put_unaligned_le16((u16)cmd, &cmd_buf[CMD_INDEX_OFFSET]); + + switch (cmd) { + case VND_CMD_START: + case VND_CMD_STOP: + case VND_CMD_RESET: + /* Mode selector */ + put_unaligned_le32((value & 0xFF), &cmd_buf[CMD_LENGTH_OFFSET]); + break; + + case VND_CMD_SFLCK: + put_unaligned_le16(CMD_SFLCK_KEY, &cmd_buf[CMD_KEY_OFFSET]); + break; + + case VND_CMD_SFUNL: + put_unaligned_le16(CMD_SFUNL_KEY, &cmd_buf[CMD_KEY_OFFSET]); + break; + + case VND_CMD_ERASE: + case VND_SET_CHECKSUM_CALC: + case VND_SET_CHECKSUM_LENGTH: + put_unaligned_le32(value, &cmd_buf[CMD_KEY_OFFSET]); + break; + + default: + cmd_buf[CMD_REPORT_ID_OFFSET] = 0; + dev_err(&client->dev, "Invalid command: %d\n", cmd); + return -EINVAL; + } + + return wdt87xx_set_feature(client, cmd_buf, sizeof(cmd_buf)); +} + +static int wdt87xx_sw_reset(struct i2c_client *client) +{ + int error; + + dev_dbg(&client->dev, "resetting device now\n"); + + error = wdt87xx_send_command(client, VND_CMD_RESET, 0); + if (error) { + dev_err(&client->dev, "reset failed\n"); + return error; + } + + /* Wait the device to be ready */ + msleep(200); + + return 0; +} + +static const void *wdt87xx_get_fw_chunk(const struct firmware *fw, u32 id) +{ + size_t pos = FW_PAYLOAD_OFFSET; + u32 chunk_id, chunk_size; + + while (pos < fw->size) { + chunk_id = get_unaligned_le32(fw->data + + pos + FW_CHUNK_ID_OFFSET); + if (chunk_id == id) + return fw->data + pos; + + chunk_size = get_unaligned_le32(fw->data + + pos + FW_CHUNK_SIZE_OFFSET); + pos += chunk_size + 2 * sizeof(u32); /* chunk ID + size */ + } + + return NULL; +} + +static int wdt87xx_get_sysparam(struct i2c_client *client, + struct wdt87xx_sys_param *param) +{ + u8 buf[PKT_READ_SIZE]; + int error; + + error = wdt87xx_get_string(client, STRIDX_PARAMETERS, buf, 32); + if (error) { + dev_err(&client->dev, "failed to get parameters\n"); + return error; + } + + param->xmls_id1 = get_unaligned_le16(buf + CTL_PARAM_OFFSET_XMLS_ID1); + param->xmls_id2 = get_unaligned_le16(buf + CTL_PARAM_OFFSET_XMLS_ID2); + param->phy_ch_x = get_unaligned_le16(buf + CTL_PARAM_OFFSET_PHY_CH_X); + param->phy_ch_y = get_unaligned_le16(buf + CTL_PARAM_OFFSET_PHY_CH_Y); + param->phy_w = get_unaligned_le16(buf + CTL_PARAM_OFFSET_PHY_W) / 10; + param->phy_h = get_unaligned_le16(buf + CTL_PARAM_OFFSET_PHY_H) / 10; + + param->max_x = MAX_UNIT_AXIS; + param->max_y = DIV_ROUND_CLOSEST(MAX_UNIT_AXIS * param->phy_h, + param->phy_w); + + error = wdt87xx_get_string(client, STRIDX_PLATFORM_ID, buf, 8); + if (error) { + dev_err(&client->dev, "failed to get platform id\n"); + return error; + } + + param->plat_id = buf[1]; + + buf[0] = 0xf2; + error = wdt87xx_get_feature(client, buf, 16); + if (error) { + dev_err(&client->dev, "failed to get firmware id\n"); + return error; + } + + if (buf[0] != 0xf2) { + dev_err(&client->dev, "wrong id of fw response: 0x%x\n", + buf[0]); + return -EINVAL; + } + + param->fw_id = get_unaligned_le16(&buf[1]); + + dev_info(&client->dev, + "fw_id: 0x%x, plat_id: 0x%x\nxml_id1: %04x, xml_id2: %04x\n", + param->fw_id, param->plat_id, + param->xmls_id1, param->xmls_id2); + + return 0; +} + +static int wdt87xx_validate_firmware(struct wdt87xx_data *wdt, + const struct firmware *fw) +{ + const void *fw_chunk; + u32 data1, data2; + u32 size; + u8 fw_chip_id; + u8 chip_id; + + data1 = get_unaligned_le32(fw->data + FW_FOURCC1_OFFSET); + data2 = get_unaligned_le32(fw->data + FW_FOURCC2_OFFSET); + if (data1 != FOURCC_ID_RIFF || data2 != FOURCC_ID_WHIF) { + dev_err(&wdt->client->dev, "check fw tag failed\n"); + return -EINVAL; + } + + size = get_unaligned_le32(fw->data + FW_SIZE_OFFSET); + if (size != fw->size) { + dev_err(&wdt->client->dev, + "fw size mismatch: expected %d, actual %zd\n", + size, fw->size); + return -EINVAL; + } + + /* + * Get the chip_id from the firmware. Make sure that it is the + * right controller to do the firmware and config update. + */ + fw_chunk = wdt87xx_get_fw_chunk(fw, CHUNK_ID_FRWR); + if (!fw_chunk) { + dev_err(&wdt->client->dev, + "unable to locate firmware chunk\n"); + return -EINVAL; + } + + fw_chip_id = (get_unaligned_le32(fw_chunk + + FW_CHUNK_VERSION_OFFSET) >> 12) & 0xF; + chip_id = (wdt->param.fw_id >> 12) & 0xF; + + if (fw_chip_id != chip_id) { + dev_err(&wdt->client->dev, + "fw version mismatch: fw %d vs. chip %d\n", + fw_chip_id, chip_id); + return -ENODEV; + } + + return 0; +} + +static int wdt87xx_validate_fw_chunk(const void *data, int id) +{ + if (id == CHUNK_ID_FRWR) { + u32 fw_id; + + fw_id = get_unaligned_le32(data + FW_CHUNK_PAYLOAD_OFFSET); + if (fw_id != WDT_FIRMWARE_ID) + return -EINVAL; + } + + return 0; +} + +static int wdt87xx_write_data(struct i2c_client *client, const char *data, + u32 address, int length) +{ + u16 packet_size; + int count = 0; + int error; + u8 pkt_buf[PKT_BUF_SIZE]; + + /* Address and length should be 4 bytes aligned */ + if ((address & 0x3) != 0 || (length & 0x3) != 0) { + dev_err(&client->dev, + "addr & len must be 4 bytes aligned %x, %x\n", + address, length); + return -EINVAL; + } + + while (length) { + packet_size = min(length, PACKET_SIZE); + + pkt_buf[CMD_REPORT_ID_OFFSET] = VND_REQ_WRITE; + pkt_buf[CMD_TYPE_OFFSET] = VND_SET_DATA; + put_unaligned_le16(packet_size, &pkt_buf[CMD_INDEX_OFFSET]); + put_unaligned_le32(address, &pkt_buf[CMD_LENGTH_OFFSET]); + memcpy(&pkt_buf[CMD_DATA_OFFSET], data, packet_size); + + error = wdt87xx_set_feature(client, pkt_buf, sizeof(pkt_buf)); + if (error) + return error; + + length -= packet_size; + data += packet_size; + address += packet_size; + + /* Wait for the controller to finish the write */ + mdelay(WDT_FLASH_WRITE_DELAY_MS); + + if ((++count % 32) == 0) { + /* Delay for fw to clear watch dog */ + msleep(20); + } + } + + return 0; +} + +static u16 misr(u16 cur_value, u8 new_value) +{ + u32 a, b; + u32 bit0; + u32 y; + + a = cur_value; + b = new_value; + bit0 = a ^ (b & 1); + bit0 ^= a >> 1; + bit0 ^= a >> 2; + bit0 ^= a >> 4; + bit0 ^= a >> 5; + bit0 ^= a >> 7; + bit0 ^= a >> 11; + bit0 ^= a >> 15; + y = (a << 1) ^ b; + y = (y & ~1) | (bit0 & 1); + + return (u16)y; +} + +static u16 wdt87xx_calculate_checksum(const u8 *data, size_t length) +{ + u16 checksum = 0; + size_t i; + + for (i = 0; i < length; i++) + checksum = misr(checksum, data[i]); + + return checksum; +} + +static int wdt87xx_get_checksum(struct i2c_client *client, u16 *checksum, + u32 address, int length) +{ + int error; + int time_delay; + u8 pkt_buf[PKT_BUF_SIZE]; + u8 cmd_buf[CMD_BUF_SIZE]; + + error = wdt87xx_send_command(client, VND_SET_CHECKSUM_LENGTH, length); + if (error) { + dev_err(&client->dev, "failed to set checksum length\n"); + return error; + } + + error = wdt87xx_send_command(client, VND_SET_CHECKSUM_CALC, address); + if (error) { + dev_err(&client->dev, "failed to set checksum address\n"); + return error; + } + + /* Wait the operation to complete */ + time_delay = DIV_ROUND_UP(length, 1024); + msleep(time_delay * 30); + + memset(cmd_buf, 0, sizeof(cmd_buf)); + cmd_buf[CMD_REPORT_ID_OFFSET] = VND_REQ_READ; + cmd_buf[CMD_TYPE_OFFSET] = VND_GET_CHECKSUM; + error = wdt87xx_set_feature(client, cmd_buf, sizeof(cmd_buf)); + if (error) { + dev_err(&client->dev, "failed to request checksum\n"); + return error; + } + + memset(pkt_buf, 0, sizeof(pkt_buf)); + pkt_buf[CMD_REPORT_ID_OFFSET] = VND_READ_DATA; + error = wdt87xx_get_feature(client, pkt_buf, sizeof(pkt_buf)); + if (error) { + dev_err(&client->dev, "failed to read checksum\n"); + return error; + } + + *checksum = get_unaligned_le16(&pkt_buf[CMD_DATA_OFFSET]); + return 0; +} + +static int wdt87xx_write_firmware(struct i2c_client *client, const void *chunk) +{ + u32 start_addr = get_unaligned_le32(chunk + FW_CHUNK_TGT_START_OFFSET); + u32 size = get_unaligned_le32(chunk + FW_CHUNK_PAYLOAD_LEN_OFFSET); + const void *data = chunk + FW_CHUNK_PAYLOAD_OFFSET; + int error; + int err1; + int page_size; + int retry = 0; + u16 device_checksum, firmware_checksum; + + dev_dbg(&client->dev, "start 4k page program\n"); + + error = wdt87xx_send_command(client, VND_CMD_STOP, MODE_STOP); + if (error) { + dev_err(&client->dev, "stop report mode failed\n"); + return error; + } + + error = wdt87xx_send_command(client, VND_CMD_SFUNL, 0); + if (error) { + dev_err(&client->dev, "unlock failed\n"); + goto out_enable_reporting; + } + + mdelay(10); + + while (size) { + dev_dbg(&client->dev, "%s: %x, %x\n", __func__, + start_addr, size); + + page_size = min_t(u32, size, PG_SIZE); + size -= page_size; + + for (retry = 0; retry < MAX_RETRIES; retry++) { + error = wdt87xx_send_command(client, VND_CMD_ERASE, + start_addr); + if (error) { + dev_err(&client->dev, + "erase failed at %#08x\n", start_addr); + break; + } + + msleep(50); + + error = wdt87xx_write_data(client, data, start_addr, + page_size); + if (error) { + dev_err(&client->dev, + "write failed at %#08x (%d bytes)\n", + start_addr, page_size); + break; + } + + error = wdt87xx_get_checksum(client, &device_checksum, + start_addr, page_size); + if (error) { + dev_err(&client->dev, + "failed to retrieve checksum for %#08x (len: %d)\n", + start_addr, page_size); + break; + } + + firmware_checksum = + wdt87xx_calculate_checksum(data, page_size); + + if (device_checksum == firmware_checksum) + break; + + dev_err(&client->dev, + "checksum fail: %d vs %d, retry %d\n", + device_checksum, firmware_checksum, retry); + } + + if (retry == MAX_RETRIES) { + dev_err(&client->dev, "page write failed\n"); + error = -EIO; + goto out_lock_device; + } + + start_addr = start_addr + page_size; + data = data + page_size; + } + +out_lock_device: + err1 = wdt87xx_send_command(client, VND_CMD_SFLCK, 0); + if (err1) + dev_err(&client->dev, "lock failed\n"); + + mdelay(10); + +out_enable_reporting: + err1 = wdt87xx_send_command(client, VND_CMD_START, 0); + if (err1) + dev_err(&client->dev, "start to report failed\n"); + + return error ? error : err1; +} + +static int wdt87xx_load_chunk(struct i2c_client *client, + const struct firmware *fw, u32 ck_id) +{ + const void *chunk; + int error; + + chunk = wdt87xx_get_fw_chunk(fw, ck_id); + if (!chunk) { + dev_err(&client->dev, "unable to locate chunk (type %d)\n", + ck_id); + return -EINVAL; + } + + error = wdt87xx_validate_fw_chunk(chunk, ck_id); + if (error) { + dev_err(&client->dev, "invalid chunk (type %d): %d\n", + ck_id, error); + return error; + } + + error = wdt87xx_write_firmware(client, chunk); + if (error) { + dev_err(&client->dev, + "failed to write fw chunk (type %d): %d\n", + ck_id, error); + return error; + } + + return 0; +} + +static int wdt87xx_do_update_firmware(struct i2c_client *client, + const struct firmware *fw, + unsigned int chunk_id) +{ + struct wdt87xx_data *wdt = i2c_get_clientdata(client); + int error; + + error = wdt87xx_validate_firmware(wdt, fw); + if (error) + return error; + + error = mutex_lock_interruptible(&wdt->fw_mutex); + if (error) + return error; + + disable_irq(client->irq); + + error = wdt87xx_load_chunk(client, fw, chunk_id); + if (error) { + dev_err(&client->dev, + "firmware load failed (type: %d): %d\n", + chunk_id, error); + goto out; + } + + error = wdt87xx_sw_reset(client); + if (error) { + dev_err(&client->dev, "soft reset failed: %d\n", error); + goto out; + } + + /* Refresh the parameters */ + error = wdt87xx_get_sysparam(client, &wdt->param); + if (error) + dev_err(&client->dev, + "failed to refresh system paramaters: %d\n", error); +out: + enable_irq(client->irq); + mutex_unlock(&wdt->fw_mutex); + + return error ? error : 0; +} + +static int wdt87xx_update_firmware(struct device *dev, + const char *fw_name, unsigned int chunk_id) +{ + struct i2c_client *client = to_i2c_client(dev); + const struct firmware *fw; + int error; + + error = request_firmware(&fw, fw_name, dev); + if (error) { + dev_err(&client->dev, "unable to retrieve firmware %s: %d\n", + fw_name, error); + return error; + } + + error = wdt87xx_do_update_firmware(client, fw, chunk_id); + + release_firmware(fw); + + return error ? error : 0; +} + +static ssize_t config_csum_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct wdt87xx_data *wdt = i2c_get_clientdata(client); + u32 cfg_csum; + + cfg_csum = wdt->param.xmls_id1; + cfg_csum = (cfg_csum << 16) | wdt->param.xmls_id2; + + return scnprintf(buf, PAGE_SIZE, "%x\n", cfg_csum); +} + +static ssize_t fw_version_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct wdt87xx_data *wdt = i2c_get_clientdata(client); + + return scnprintf(buf, PAGE_SIZE, "%x\n", wdt->param.fw_id); +} + +static ssize_t plat_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct wdt87xx_data *wdt = i2c_get_clientdata(client); + + return scnprintf(buf, PAGE_SIZE, "%x\n", wdt->param.plat_id); +} + +static ssize_t update_config_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int error; + + error = wdt87xx_update_firmware(dev, WDT87XX_CFG_NAME, CHUNK_ID_CNFG); + + return error ? error : count; +} + +static ssize_t update_fw_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int error; + + error = wdt87xx_update_firmware(dev, WDT87XX_FW_NAME, CHUNK_ID_FRWR); + + return error ? error : count; +} + +static DEVICE_ATTR_RO(config_csum); +static DEVICE_ATTR_RO(fw_version); +static DEVICE_ATTR_RO(plat_id); +static DEVICE_ATTR_WO(update_config); +static DEVICE_ATTR_WO(update_fw); + +static struct attribute *wdt87xx_attrs[] = { + &dev_attr_config_csum.attr, + &dev_attr_fw_version.attr, + &dev_attr_plat_id.attr, + &dev_attr_update_config.attr, + &dev_attr_update_fw.attr, + NULL +}; + +static const struct attribute_group wdt87xx_attr_group = { + .attrs = wdt87xx_attrs, +}; + +static void wdt87xx_report_contact(struct input_dev *input, + struct wdt87xx_sys_param *param, + u8 *buf) +{ + int finger_id; + u32 x, y; + u8 w, p; + + finger_id = (buf[FINGER_EV_V1_OFFSET_ID] >> 3) - 1; + if (finger_id < 0) + return; + + /* Check if this is an active contact */ + if (!(buf[FINGER_EV_V1_OFFSET_ID] & 0x1)) + return; + + w = buf[FINGER_EV_V1_OFFSET_W]; + p = buf[FINGER_EV_V1_OFFSET_P]; + + x = get_unaligned_le16(buf + FINGER_EV_V1_OFFSET_X); + + y = get_unaligned_le16(buf + FINGER_EV_V1_OFFSET_Y); + y = DIV_ROUND_CLOSEST(y * param->phy_h, param->phy_w); + + /* Refuse incorrect coordinates */ + if (x > param->max_x || y > param->max_y) + return; + + dev_dbg(input->dev.parent, "tip on (%d), x(%d), y(%d)\n", + finger_id, x, y); + + input_mt_slot(input, finger_id); + input_mt_report_slot_state(input, MT_TOOL_FINGER, 1); + input_report_abs(input, ABS_MT_TOUCH_MAJOR, w); + input_report_abs(input, ABS_MT_PRESSURE, p); + input_report_abs(input, ABS_MT_POSITION_X, x); + input_report_abs(input, ABS_MT_POSITION_Y, y); +} + +static irqreturn_t wdt87xx_ts_interrupt(int irq, void *dev_id) +{ + struct wdt87xx_data *wdt = dev_id; + struct i2c_client *client = wdt->client; + int i, fingers; + int error; + u8 raw_buf[WDT_V1_RAW_BUF_COUNT] = {0}; + + error = i2c_master_recv(client, raw_buf, WDT_V1_RAW_BUF_COUNT); + if (error < 0) { + dev_err(&client->dev, "read v1 raw data failed: %d\n", error); + goto irq_exit; + } + + fingers = raw_buf[TOUCH_PK_V1_OFFSET_FNGR_NUM]; + if (!fingers) + goto irq_exit; + + for (i = 0; i < WDT_MAX_FINGER; i++) + wdt87xx_report_contact(wdt->input, + &wdt->param, + &raw_buf[TOUCH_PK_V1_OFFSET_EVENT + + i * FINGER_EV_V1_SIZE]); + + input_mt_sync_frame(wdt->input); + input_sync(wdt->input); + +irq_exit: + return IRQ_HANDLED; +} + +static int wdt87xx_ts_create_input_device(struct wdt87xx_data *wdt) +{ + struct device *dev = &wdt->client->dev; + struct input_dev *input; + unsigned int res = DIV_ROUND_CLOSEST(MAX_UNIT_AXIS, wdt->param.phy_w); + int error; + + input = devm_input_allocate_device(dev); + if (!input) { + dev_err(dev, "failed to allocate input device\n"); + return -ENOMEM; + } + wdt->input = input; + + input->name = "WDT87xx Touchscreen"; + input->id.bustype = BUS_I2C; + input->phys = wdt->phys; + + input_set_abs_params(input, ABS_MT_POSITION_X, 0, + wdt->param.max_x, 0, 0); + input_set_abs_params(input, ABS_MT_POSITION_Y, 0, + wdt->param.max_y, 0, 0); + input_abs_set_res(input, ABS_MT_POSITION_X, res); + input_abs_set_res(input, ABS_MT_POSITION_Y, res); + + input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, 0, 0xFF, 0, 0); + input_set_abs_params(input, ABS_MT_PRESSURE, 0, 0xFF, 0, 0); + + input_mt_init_slots(input, WDT_MAX_FINGER, + INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED); + + error = input_register_device(input); + if (error) { + dev_err(dev, "failed to register input device: %d\n", error); + return error; + } + + return 0; +} + +static int wdt87xx_ts_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct wdt87xx_data *wdt; + int error; + + dev_dbg(&client->dev, "adapter=%d, client irq: %d\n", + client->adapter->nr, client->irq); + + /* Check if the I2C function is ok in this adaptor */ + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) + return -ENXIO; + + wdt = devm_kzalloc(&client->dev, sizeof(*wdt), GFP_KERNEL); + if (!wdt) + return -ENOMEM; + + wdt->client = client; + mutex_init(&wdt->fw_mutex); + i2c_set_clientdata(client, wdt); + + snprintf(wdt->phys, sizeof(wdt->phys), "i2c-%u-%04x/input0", + client->adapter->nr, client->addr); + + error = wdt87xx_get_sysparam(client, &wdt->param); + if (error) + return error; + + error = wdt87xx_ts_create_input_device(wdt); + if (error) + return error; + + error = devm_request_threaded_irq(&client->dev, client->irq, + NULL, wdt87xx_ts_interrupt, + IRQF_ONESHOT, + client->name, wdt); + if (error) { + dev_err(&client->dev, "request irq failed: %d\n", error); + return error; + } + + error = sysfs_create_group(&client->dev.kobj, &wdt87xx_attr_group); + if (error) { + dev_err(&client->dev, "create sysfs failed: %d\n", error); + return error; + } + + return 0; +} + +static int wdt87xx_ts_remove(struct i2c_client *client) +{ + sysfs_remove_group(&client->dev.kobj, &wdt87xx_attr_group); + + return 0; +} + +static int __maybe_unused wdt87xx_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + int error; + + disable_irq(client->irq); + + error = wdt87xx_send_command(client, VND_CMD_STOP, MODE_IDLE); + if (error) { + enable_irq(client->irq); + dev_err(&client->dev, + "failed to stop device when suspending: %d\n", + error); + return error; + } + + return 0; +} + +static int __maybe_unused wdt87xx_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + int error; + + /* + * The chip may have been reset while system is resuming, + * give it some time to settle. + */ + mdelay(100); + + error = wdt87xx_send_command(client, VND_CMD_START, 0); + if (error) + dev_err(&client->dev, + "failed to start device when resuming: %d\n", + error); + + enable_irq(client->irq); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(wdt87xx_pm_ops, wdt87xx_suspend, wdt87xx_resume); + +static const struct i2c_device_id wdt87xx_dev_id[] = { + { WDT87XX_NAME, 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, wdt87xx_dev_id); + +static const struct acpi_device_id wdt87xx_acpi_id[] = { + { "WDHT0001", 0 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, wdt87xx_acpi_id); + +static struct i2c_driver wdt87xx_driver = { + .probe = wdt87xx_ts_probe, + .remove = wdt87xx_ts_remove, + .id_table = wdt87xx_dev_id, + .driver = { + .name = WDT87XX_NAME, + .pm = &wdt87xx_pm_ops, + .acpi_match_table = ACPI_PTR(wdt87xx_acpi_id), + }, +}; +module_i2c_driver(wdt87xx_driver); + +MODULE_AUTHOR("HN Chen "); +MODULE_DESCRIPTION("WeidaHiTech WDT87XX Touchscreen driver"); +MODULE_VERSION(WDT87XX_DRV_VER); +MODULE_LICENSE("GPL"); From b8830a4e71b15d0364ac8e6c55301eea73f211da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 23 Jun 2015 10:11:19 +0200 Subject: [PATCH 259/839] dell-laptop: Fix allocating & freeing SMI buffer page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit fix kernel crash when probing for rfkill devices in dell-laptop driver failed. Function free_page() was incorrectly used on struct page * instead of virtual address of SMI buffer. This commit also simplify allocating page for SMI buffer by using __get_free_page() function instead of sequential call of functions alloc_page() and page_address(). Signed-off-by: Pali Rohár Acked-by: Michal Hocko Cc: stable@vger.kernel.org Signed-off-by: Darren Hart --- drivers/platform/x86/dell-laptop.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index 9724613c28a6..35758cbc6bc8 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -306,7 +306,6 @@ static const struct dmi_system_id dell_quirks[] __initconst = { }; static struct calling_interface_buffer *buffer; -static struct page *bufferpage; static DEFINE_MUTEX(buffer_mutex); static int hwswitch_state; @@ -2068,12 +2067,11 @@ static int __init dell_init(void) * Allocate buffer below 4GB for SMI data--only 32-bit physical addr * is passed to SMI handler. */ - bufferpage = alloc_page(GFP_KERNEL | GFP_DMA32); - if (!bufferpage) { + buffer = (void *)__get_free_page(GFP_KERNEL | GFP_DMA32); + if (!buffer) { ret = -ENOMEM; goto fail_buffer; } - buffer = page_address(bufferpage); ret = dell_setup_rfkill(); @@ -2135,7 +2133,7 @@ static int __init dell_init(void) fail_backlight: dell_cleanup_rfkill(); fail_rfkill: - free_page((unsigned long)bufferpage); + free_page((unsigned long)buffer); fail_buffer: platform_device_del(platform_device); fail_platform_device2: From 66ba609f7b96096116ca7bbc21ec6922ea41a992 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 27 Apr 2015 11:02:35 +0800 Subject: [PATCH 260/839] libceph: properly release STAT request's raw_data_in Signed-off-by: Yan, Zheng Reviewed-by: Alex Elder --- net/ceph/osd_client.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index c4ec9239249a..72459b9df8a1 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -296,6 +296,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req, case CEPH_OSD_OP_CMPXATTR: ceph_osd_data_release(&op->xattr.osd_data); break; + case CEPH_OSD_OP_STAT: + ceph_osd_data_release(&op->raw_data_in); + break; default: break; } From 144cba1493fdd6e3e1980e439a31df877831ebcd Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 27 Apr 2015 11:09:54 +0800 Subject: [PATCH 261/839] libceph: allow setting osd_req_op's flags Signed-off-by: Yan, Zheng Reviewed-by: Alex Elder --- drivers/block/rbd.c | 4 ++-- fs/ceph/addr.c | 3 ++- fs/ceph/file.c | 2 +- include/linux/ceph/osd_client.h | 2 +- net/ceph/osd_client.c | 24 +++++++++++++++--------- 5 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ec6c5c6e1ac9..349115ae3bc2 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2376,7 +2376,7 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request, } if (opcode == CEPH_OSD_OP_DELETE) - osd_req_op_init(osd_request, num_ops, opcode); + osd_req_op_init(osd_request, num_ops, opcode, 0); else osd_req_op_extent_init(osd_request, num_ops, opcode, offset, length, 0, 0); @@ -2848,7 +2848,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request) goto out; stat_request->callback = rbd_img_obj_exists_callback; - osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT); + osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0); osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0, false, false); rbd_osd_req_format_read(stat_request); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index e162bcd105ee..feeaf3b65fa0 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -884,7 +884,8 @@ get_more_pages: } if (do_sync) - osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC); + osd_req_op_init(req, 1, + CEPH_OSD_OP_STARTSYNC, 0); req->r_callback = writepages_finish; req->r_inode = inode; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3b6b522b4b31..777eb21d556f 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -614,7 +614,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) break; } - osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC); + osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); n = iov_iter_get_pages_alloc(from, &pages, len, &start); if (unlikely(n < 0)) { diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 61b19c46bdb3..7506b485bb6d 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -249,7 +249,7 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); extern void osd_req_op_init(struct ceph_osd_request *osd_req, - unsigned int which, u16 opcode); + unsigned int which, u16 opcode, u32 flags); extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, unsigned int which, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 72459b9df8a1..4cb4fab46e4f 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -453,7 +453,7 @@ __CEPH_FORALL_OSD_OPS(GENERATE_CASE) */ static struct ceph_osd_req_op * _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, - u16 opcode) + u16 opcode, u32 flags) { struct ceph_osd_req_op *op; @@ -463,14 +463,15 @@ _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, op = &osd_req->r_ops[which]; memset(op, 0, sizeof (*op)); op->op = opcode; + op->flags = flags; return op; } void osd_req_op_init(struct ceph_osd_request *osd_req, - unsigned int which, u16 opcode) + unsigned int which, u16 opcode, u32 flags) { - (void)_osd_req_op_init(osd_req, which, opcode); + (void)_osd_req_op_init(osd_req, which, opcode, flags); } EXPORT_SYMBOL(osd_req_op_init); @@ -479,7 +480,8 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req, u64 offset, u64 length, u64 truncate_size, u32 truncate_seq) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, + opcode, 0); size_t payload_len = 0; BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && @@ -518,7 +520,8 @@ EXPORT_SYMBOL(osd_req_op_extent_update); void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *class, const char *method) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, + opcode, 0); struct ceph_pagelist *pagelist; size_t payload_len = 0; size_t size; @@ -555,7 +558,8 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *name, const void *value, size_t size, u8 cmp_op, u8 cmp_mode) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, + opcode, 0); struct ceph_pagelist *pagelist; size_t payload_len; @@ -588,7 +592,8 @@ void osd_req_op_watch_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u64 cookie, u64 version, int flag) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, + opcode, 0); BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH); @@ -605,7 +610,8 @@ void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, u64 expected_write_size) { struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, - CEPH_OSD_OP_SETALLOCHINT); + CEPH_OSD_OP_SETALLOCHINT, + 0); op->alloc_hint.expected_object_size = expected_object_size; op->alloc_hint.expected_write_size = expected_write_size; @@ -789,7 +795,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, } if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) { - osd_req_op_init(req, which, opcode); + osd_req_op_init(req, which, opcode, 0); } else { u32 object_size = le32_to_cpu(layout->fl_object_size); u32 object_base = off - objoff; From 10183a69551f76702ac68bc74a437b25419c6de0 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 27 Apr 2015 15:33:28 +0800 Subject: [PATCH 262/839] ceph: check OSD caps before read/write Signed-off-by: Yan, Zheng --- fs/ceph/addr.c | 203 +++++++++++++++++++++++++++++++++++++++++++ fs/ceph/caps.c | 4 + fs/ceph/inode.c | 3 + fs/ceph/mds_client.c | 4 + fs/ceph/mds_client.h | 9 ++ fs/ceph/super.c | 15 +++- fs/ceph/super.h | 17 ++-- 7 files changed, 249 insertions(+), 6 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index feeaf3b65fa0..b96027727248 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1598,3 +1598,206 @@ int ceph_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_ops = &ceph_vmops; return 0; } + +enum { + POOL_READ = 1, + POOL_WRITE = 2, +}; + +static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool) +{ + struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode); + struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_osd_request *rd_req = NULL, *wr_req = NULL; + struct rb_node **p, *parent; + struct ceph_pool_perm *perm; + struct page **pages; + int err = 0, err2 = 0, have = 0; + + down_read(&mdsc->pool_perm_rwsem); + p = &mdsc->pool_perm_tree.rb_node; + while (*p) { + perm = rb_entry(*p, struct ceph_pool_perm, node); + if (pool < perm->pool) + p = &(*p)->rb_left; + else if (pool > perm->pool) + p = &(*p)->rb_right; + else { + have = perm->perm; + break; + } + } + up_read(&mdsc->pool_perm_rwsem); + if (*p) + goto out; + + dout("__ceph_pool_perm_get pool %u no perm cached\n", pool); + + down_write(&mdsc->pool_perm_rwsem); + parent = NULL; + while (*p) { + parent = *p; + perm = rb_entry(parent, struct ceph_pool_perm, node); + if (pool < perm->pool) + p = &(*p)->rb_left; + else if (pool > perm->pool) + p = &(*p)->rb_right; + else { + have = perm->perm; + break; + } + } + if (*p) { + up_write(&mdsc->pool_perm_rwsem); + goto out; + } + + rd_req = ceph_osdc_alloc_request(&fsc->client->osdc, + ci->i_snap_realm->cached_context, + 1, false, GFP_NOFS); + if (!rd_req) { + err = -ENOMEM; + goto out_unlock; + } + + rd_req->r_flags = CEPH_OSD_FLAG_READ; + osd_req_op_init(rd_req, 0, CEPH_OSD_OP_STAT, 0); + rd_req->r_base_oloc.pool = pool; + snprintf(rd_req->r_base_oid.name, sizeof(rd_req->r_base_oid.name), + "%llx.00000000", ci->i_vino.ino); + rd_req->r_base_oid.name_len = strlen(rd_req->r_base_oid.name); + + wr_req = ceph_osdc_alloc_request(&fsc->client->osdc, + ci->i_snap_realm->cached_context, + 1, false, GFP_NOFS); + if (!wr_req) { + err = -ENOMEM; + goto out_unlock; + } + + wr_req->r_flags = CEPH_OSD_FLAG_WRITE | + CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK; + osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL); + wr_req->r_base_oloc.pool = pool; + wr_req->r_base_oid = rd_req->r_base_oid; + + /* one page should be large enough for STAT data */ + pages = ceph_alloc_page_vector(1, GFP_KERNEL); + if (IS_ERR(pages)) { + err = PTR_ERR(pages); + goto out_unlock; + } + + osd_req_op_raw_data_in_pages(rd_req, 0, pages, PAGE_SIZE, + 0, false, true); + ceph_osdc_build_request(rd_req, 0, NULL, CEPH_NOSNAP, + &ci->vfs_inode.i_mtime); + err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false); + + ceph_osdc_build_request(wr_req, 0, NULL, CEPH_NOSNAP, + &ci->vfs_inode.i_mtime); + err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false); + + if (!err) + err = ceph_osdc_wait_request(&fsc->client->osdc, rd_req); + if (!err2) + err2 = ceph_osdc_wait_request(&fsc->client->osdc, wr_req); + + if (err >= 0 || err == -ENOENT) + have |= POOL_READ; + else if (err != -EPERM) + goto out_unlock; + + if (err2 == 0 || err2 == -EEXIST) + have |= POOL_WRITE; + else if (err2 != -EPERM) { + err = err2; + goto out_unlock; + } + + perm = kmalloc(sizeof(*perm), GFP_NOFS); + if (!perm) { + err = -ENOMEM; + goto out_unlock; + } + + perm->pool = pool; + perm->perm = have; + rb_link_node(&perm->node, parent, p); + rb_insert_color(&perm->node, &mdsc->pool_perm_tree); + err = 0; +out_unlock: + up_write(&mdsc->pool_perm_rwsem); + + if (rd_req) + ceph_osdc_put_request(rd_req); + if (wr_req) + ceph_osdc_put_request(wr_req); +out: + if (!err) + err = have; + dout("__ceph_pool_perm_get pool %u result = %d\n", pool, err); + return err; +} + +int ceph_pool_perm_check(struct ceph_inode_info *ci, int need) +{ + u32 pool; + int ret, flags; + + if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode), + NOPOOLPERM)) + return 0; + + spin_lock(&ci->i_ceph_lock); + flags = ci->i_ceph_flags; + pool = ceph_file_layout_pg_pool(ci->i_layout); + spin_unlock(&ci->i_ceph_lock); +check: + if (flags & CEPH_I_POOL_PERM) { + if ((need & CEPH_CAP_FILE_RD) && !(flags & CEPH_I_POOL_RD)) { + dout("ceph_pool_perm_check pool %u no read perm\n", + pool); + return -EPERM; + } + if ((need & CEPH_CAP_FILE_WR) && !(flags & CEPH_I_POOL_WR)) { + dout("ceph_pool_perm_check pool %u no write perm\n", + pool); + return -EPERM; + } + return 0; + } + + ret = __ceph_pool_perm_get(ci, pool); + if (ret < 0) + return ret; + + flags = CEPH_I_POOL_PERM; + if (ret & POOL_READ) + flags |= CEPH_I_POOL_RD; + if (ret & POOL_WRITE) + flags |= CEPH_I_POOL_WR; + + spin_lock(&ci->i_ceph_lock); + if (pool == ceph_file_layout_pg_pool(ci->i_layout)) { + ci->i_ceph_flags = flags; + } else { + pool = ceph_file_layout_pg_pool(ci->i_layout); + flags = ci->i_ceph_flags; + } + spin_unlock(&ci->i_ceph_lock); + goto check; +} + +void ceph_pool_perm_destroy(struct ceph_mds_client *mdsc) +{ + struct ceph_pool_perm *perm; + struct rb_node *n; + + while (!RB_EMPTY_ROOT(&mdsc->pool_perm_tree)) { + n = rb_first(&mdsc->pool_perm_tree); + perm = rb_entry(n, struct ceph_pool_perm, node); + rb_erase(n, &mdsc->pool_perm_tree); + kfree(perm); + } +} diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index be5ea6af8366..7c8e93aeb01e 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2233,6 +2233,10 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, { int _got, check_max, ret, err = 0; + ret = ceph_pool_perm_check(ci, need); + if (ret < 0) + return ret; + retry: if (endoff > 0) check_max_size(&ci->vfs_inode, endoff); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e876e1944519..1a68c0e38a52 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -753,7 +753,10 @@ static int fill_inode(struct inode *inode, struct page *locked_page, if (new_version || (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { + if (ci->i_layout.fl_pg_pool != info->layout.fl_pg_pool) + ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; ci->i_layout = info->layout; + queue_trunc = ceph_fill_file_size(inode, issued, le32_to_cpu(info->truncate_seq), le64_to_cpu(info->truncate_size), diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 84f37f34f9aa..f125e06dacb8 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3414,6 +3414,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) ceph_caps_init(mdsc); ceph_adjust_min_caps(mdsc, fsc->min_caps); + init_rwsem(&mdsc->pool_perm_rwsem); + mdsc->pool_perm_tree = RB_ROOT; + return 0; } @@ -3607,6 +3610,7 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) ceph_mdsmap_destroy(mdsc->mdsmap); kfree(mdsc->sessions); ceph_caps_finalize(mdsc); + ceph_pool_perm_destroy(mdsc); } void ceph_mdsc_destroy(struct ceph_fs_client *fsc) diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 1875b5d985c6..d474141c034a 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -260,6 +260,12 @@ struct ceph_mds_request { int r_num_caps; }; +struct ceph_pool_perm { + struct rb_node node; + u32 pool; + int perm; +}; + /* * mds client state */ @@ -328,6 +334,9 @@ struct ceph_mds_client { spinlock_t dentry_lru_lock; struct list_head dentry_lru; int num_dentry; + + struct rw_semaphore pool_perm_rwsem; + struct rb_root pool_perm_tree; }; extern const char *ceph_mds_op_name(int op); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 4e9905374078..9a5350030af8 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -134,10 +134,12 @@ enum { Opt_noino32, Opt_fscache, Opt_nofscache, + Opt_poolperm, + Opt_nopoolperm, #ifdef CONFIG_CEPH_FS_POSIX_ACL Opt_acl, #endif - Opt_noacl + Opt_noacl, }; static match_table_t fsopt_tokens = { @@ -165,6 +167,8 @@ static match_table_t fsopt_tokens = { {Opt_noino32, "noino32"}, {Opt_fscache, "fsc"}, {Opt_nofscache, "nofsc"}, + {Opt_poolperm, "poolperm"}, + {Opt_nopoolperm, "nopoolperm"}, #ifdef CONFIG_CEPH_FS_POSIX_ACL {Opt_acl, "acl"}, #endif @@ -268,6 +272,13 @@ static int parse_fsopt_token(char *c, void *private) case Opt_nofscache: fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; break; + case Opt_poolperm: + fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; + printk ("pool perm"); + break; + case Opt_nopoolperm: + fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; + break; #ifdef CONFIG_CEPH_FS_POSIX_ACL case Opt_acl: fsopt->sb_flags |= MS_POSIXACL; @@ -436,6 +447,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",nodcache"); if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) seq_puts(m, ",fsc"); + if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) + seq_puts(m, ",nopoolperm"); #ifdef CONFIG_CEPH_FS_POSIX_ACL if (fsopt->sb_flags & MS_POSIXACL) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index fa20e1318939..18b917c7feb4 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -35,6 +35,7 @@ #define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */ #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ +#define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */ #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES | \ CEPH_MOUNT_OPT_DCACHE) @@ -438,10 +439,14 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, /* * Ceph inode. */ -#define CEPH_I_DIR_ORDERED 1 /* dentries in dir are ordered */ -#define CEPH_I_NODELAY 4 /* do not delay cap release */ -#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ -#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ +#define CEPH_I_DIR_ORDERED (1 << 0) /* dentries in dir are ordered */ +#define CEPH_I_NODELAY (1 << 1) /* do not delay cap release */ +#define CEPH_I_FLUSH (1 << 2) /* do not delay flush of dirty metadata */ +#define CEPH_I_NOFLUSH (1 << 3) /* do not flush dirty caps */ +#define CEPH_I_POOL_PERM (1 << 4) /* pool rd/wr bits are valid */ +#define CEPH_I_POOL_RD (1 << 5) /* can read from pool */ +#define CEPH_I_POOL_WR (1 << 6) /* can write to pool */ + static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, int release_count, int ordered_count) @@ -879,6 +884,9 @@ extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode); /* addr.c */ extern const struct address_space_operations ceph_aops; extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); +extern int ceph_uninline_data(struct file *filp, struct page *locked_page); +extern int ceph_pool_perm_check(struct ceph_inode_info *ci, int need); +extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc); /* file.c */ extern const struct file_operations ceph_file_fops; @@ -890,7 +898,6 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, extern int ceph_release(struct inode *inode, struct file *filp); extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, char *data, size_t len); -int ceph_uninline_data(struct file *filp, struct page *locked_page); /* dir.c */ extern const struct file_operations ceph_dir_fops; extern const struct file_operations ceph_snapdir_fops; From b01da6a08c523f5d1cd1a51107f8cca061040f45 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 4 May 2015 14:10:11 +0300 Subject: [PATCH 263/839] libceph: use kvfree() instead of open-coding it This one sneaked in through vfs tree with commit 2b777c9dd9eb ("ceph_sync_read: stop poking into iov_iter guts"). Signed-off-by: Ilya Dryomov --- net/ceph/pagevec.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 096d91447e06..d4f5f220a8e5 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -51,10 +51,7 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty) set_page_dirty_lock(pages[i]); put_page(pages[i]); } - if (is_vmalloc_addr(pages)) - vfree(pages); - else - kfree(pages); + kvfree(pages); } EXPORT_SYMBOL(ceph_put_page_vector); From 7b06a826e7c52d77ce801e5960ecf0338eafe886 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 1 May 2015 10:03:40 +0800 Subject: [PATCH 264/839] ceph: use empty snap context for uninline_data and get_pool_perm Cached_context in ceph_snap_realm is directly accessed by uninline_data() and get_pool_perm(). This is racy in theory. both uninline_data() and get_pool_perm() do not modify existing object, they only create new object. So we can pass the empty snap context to them. Unlike cached_context in ceph_snap_realm, we do not need to protect the empty snap context. Signed-off-by: Yan, Zheng --- fs/ceph/addr.c | 9 ++++----- fs/ceph/snap.c | 18 +++++++++--------- fs/ceph/super.h | 1 + 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index b96027727248..ccc4325aa5c5 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1510,8 +1510,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ceph_vino(inode), 0, &len, 0, 1, CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, - ci->i_snap_realm->cached_context, - 0, 0, false); + ceph_empty_snapc, 0, 0, false); if (IS_ERR(req)) { err = PTR_ERR(req); goto out; @@ -1529,7 +1528,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ceph_vino(inode), 0, &len, 1, 3, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, - ci->i_snap_realm->cached_context, + ceph_empty_snapc, ci->i_truncate_seq, ci->i_truncate_size, false); if (IS_ERR(req)) { @@ -1653,7 +1652,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool) } rd_req = ceph_osdc_alloc_request(&fsc->client->osdc, - ci->i_snap_realm->cached_context, + ceph_empty_snapc, 1, false, GFP_NOFS); if (!rd_req) { err = -ENOMEM; @@ -1668,7 +1667,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool) rd_req->r_base_oid.name_len = strlen(rd_req->r_base_oid.name); wr_req = ceph_osdc_alloc_request(&fsc->client->osdc, - ci->i_snap_realm->cached_context, + ceph_empty_snapc, 1, false, GFP_NOFS); if (!wr_req) { err = -ENOMEM; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index a97e39f09ba6..b2a945345d2b 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -296,7 +296,7 @@ static int cmpu64_rev(const void *a, const void *b) } -static struct ceph_snap_context *empty_snapc; +struct ceph_snap_context *ceph_empty_snapc; /* * build the snap context for a given realm. @@ -338,9 +338,9 @@ static int build_snap_context(struct ceph_snap_realm *realm) return 0; } - if (num == 0 && realm->seq == empty_snapc->seq) { - ceph_get_snap_context(empty_snapc); - snapc = empty_snapc; + if (num == 0 && realm->seq == ceph_empty_snapc->seq) { + ceph_get_snap_context(ceph_empty_snapc); + snapc = ceph_empty_snapc; goto done; } @@ -482,7 +482,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) cap_snap. lucky us. */ dout("queue_cap_snap %p already pending\n", inode); kfree(capsnap); - } else if (ci->i_snap_realm->cached_context == empty_snapc) { + } else if (ci->i_snap_realm->cached_context == ceph_empty_snapc) { dout("queue_cap_snap %p empty snapc\n", inode); kfree(capsnap); } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| @@ -964,14 +964,14 @@ out: int __init ceph_snap_init(void) { - empty_snapc = ceph_create_snap_context(0, GFP_NOFS); - if (!empty_snapc) + ceph_empty_snapc = ceph_create_snap_context(0, GFP_NOFS); + if (!ceph_empty_snapc) return -ENOMEM; - empty_snapc->seq = 1; + ceph_empty_snapc->seq = 1; return 0; } void ceph_snap_exit(void) { - ceph_put_snap_context(empty_snapc); + ceph_put_snap_context(ceph_empty_snapc); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 18b917c7feb4..b182fd7499d9 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -692,6 +692,7 @@ static inline int default_congestion_kb(void) /* snap.c */ +extern struct ceph_snap_context *ceph_empty_snapc; struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc, u64 ino); extern void ceph_get_snap_realm(struct ceph_mds_client *mdsc, From 5dda377cf0a6bd43f64a3c1efb670d7c668e7b29 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 30 Apr 2015 14:40:54 +0800 Subject: [PATCH 265/839] ceph: set i_head_snapc when getting CEPH_CAP_FILE_WR reference In most cases that snap context is needed, we are holding reference of CEPH_CAP_FILE_WR. So we can set ceph inode's i_head_snapc when getting the CEPH_CAP_FILE_WR reference, and make codes get snap context from i_head_snapc. This makes the code simpler. Another benefit of this change is that we can handle snap notification more elegantly. Especially when snap context is updated while someone else is doing write. The old queue cap_snap code may set cap_snap's context to ether the old context or the new snap context, depending on if i_head_snapc is set. The new queue capp_snap code always set cap_snap's context to the old snap context. Signed-off-by: Yan, Zheng --- fs/ceph/addr.c | 37 +++++------- fs/ceph/caps.c | 157 ++++++++++++++++++++++++++++++++++--------------- fs/ceph/file.c | 31 +++++++--- fs/ceph/snap.c | 136 ++++++++++++++++++++++-------------------- 4 files changed, 218 insertions(+), 143 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index ccc4325aa5c5..5f53ac0d9d7c 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -87,17 +87,21 @@ static int ceph_set_page_dirty(struct page *page) inode = mapping->host; ci = ceph_inode(inode); - /* - * Note that we're grabbing a snapc ref here without holding - * any locks! - */ - snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); - /* dirty the head */ spin_lock(&ci->i_ceph_lock); - if (ci->i_head_snapc == NULL) - ci->i_head_snapc = ceph_get_snap_context(snapc); - ++ci->i_wrbuffer_ref_head; + BUG_ON(ci->i_wr_ref == 0); // caller should hold Fw reference + if (__ceph_have_pending_cap_snap(ci)) { + struct ceph_cap_snap *capsnap = + list_last_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, + ci_item); + snapc = ceph_get_snap_context(capsnap->context); + capsnap->dirty_pages++; + } else { + BUG_ON(!ci->i_head_snapc); + snapc = ceph_get_snap_context(ci->i_head_snapc); + ++ci->i_wrbuffer_ref_head; + } if (ci->i_wrbuffer_ref == 0) ihold(inode); ++ci->i_wrbuffer_ref; @@ -1033,7 +1037,6 @@ static int ceph_update_writeable_page(struct file *file, { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; loff_t page_off = pos & PAGE_CACHE_MASK; int pos_in_page = pos & ~PAGE_CACHE_MASK; int end_in_page = pos_in_page + len; @@ -1045,10 +1048,6 @@ retry_locked: /* writepages currently holds page lock, but if we change that later, */ wait_on_page_writeback(page); - /* check snap context */ - BUG_ON(!ci->i_snap_realm); - down_read(&mdsc->snap_rwsem); - BUG_ON(!ci->i_snap_realm->cached_context); snapc = page_snap_context(page); if (snapc && snapc != ci->i_head_snapc) { /* @@ -1056,7 +1055,6 @@ retry_locked: * context! is it writeable now? */ oldest = get_oldest_context(inode, NULL); - up_read(&mdsc->snap_rwsem); if (snapc->seq > oldest->seq) { ceph_put_snap_context(oldest); @@ -1113,7 +1111,6 @@ retry_locked: } /* we need to read it. */ - up_read(&mdsc->snap_rwsem); r = readpage_nounlock(file, page); if (r < 0) goto fail_nosnap; @@ -1158,16 +1155,13 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, /* * we don't do anything in here that simple_write_end doesn't do - * except adjust dirty page accounting and drop read lock on - * mdsc->snap_rwsem. + * except adjust dirty page accounting */ static int ceph_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { struct inode *inode = file_inode(file); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_mds_client *mdsc = fsc->mdsc; unsigned from = pos & (PAGE_CACHE_SIZE - 1); int check_cap = 0; @@ -1189,7 +1183,6 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, set_page_dirty(page); unlock_page(page); - up_read(&mdsc->snap_rwsem); page_cache_release(page); if (check_cap) @@ -1315,7 +1308,6 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = file_inode(vma->vm_file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct page *page = vmf->page; loff_t off = page_offset(page); loff_t size = i_size_read(inode); @@ -1374,7 +1366,6 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret == 0) { /* success. we'll keep the page locked. */ set_page_dirty(page); - up_read(&mdsc->snap_rwsem); ret = VM_FAULT_LOCKED; } else { if (ret == -ENOMEM) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7c8e93aeb01e..feb8ec92f1b4 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2073,7 +2073,8 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, * * Protected by i_ceph_lock. */ -static void __take_cap_refs(struct ceph_inode_info *ci, int got) +static void __take_cap_refs(struct ceph_inode_info *ci, int got, + bool snap_rwsem_locked) { if (got & CEPH_CAP_PIN) ci->i_pin_ref++; @@ -2081,8 +2082,14 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got) ci->i_rd_ref++; if (got & CEPH_CAP_FILE_CACHE) ci->i_rdcache_ref++; - if (got & CEPH_CAP_FILE_WR) + if (got & CEPH_CAP_FILE_WR) { + if (ci->i_wr_ref == 0 && !ci->i_head_snapc) { + BUG_ON(!snap_rwsem_locked); + ci->i_head_snapc = ceph_get_snap_context( + ci->i_snap_realm->cached_context); + } ci->i_wr_ref++; + } if (got & CEPH_CAP_FILE_BUFFER) { if (ci->i_wb_ref == 0) ihold(&ci->vfs_inode); @@ -2100,16 +2107,19 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got) * requested from the MDS. */ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, - loff_t endoff, int *got, int *check_max, int *err) + loff_t endoff, bool nonblock, int *got, int *err) { struct inode *inode = &ci->vfs_inode; + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; int ret = 0; int have, implemented; int file_wanted; + bool snap_rwsem_locked = false; dout("get_cap_refs %p need %s want %s\n", inode, ceph_cap_string(need), ceph_cap_string(want)); +again: spin_lock(&ci->i_ceph_lock); /* make sure file is actually open */ @@ -2125,6 +2135,10 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, /* finish pending truncate */ while (ci->i_truncate_pending) { spin_unlock(&ci->i_ceph_lock); + if (snap_rwsem_locked) { + up_read(&mdsc->snap_rwsem); + snap_rwsem_locked = false; + } __ceph_do_pending_vmtruncate(inode); spin_lock(&ci->i_ceph_lock); } @@ -2136,7 +2150,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, dout("get_cap_refs %p endoff %llu > maxsize %llu\n", inode, endoff, ci->i_max_size); if (endoff > ci->i_requested_max_size) { - *check_max = 1; + *err = -EAGAIN; ret = 1; } goto out_unlock; @@ -2164,8 +2178,29 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, inode, ceph_cap_string(have), ceph_cap_string(not), ceph_cap_string(revoking)); if ((revoking & not) == 0) { + if (!snap_rwsem_locked && + !ci->i_head_snapc && + (need & CEPH_CAP_FILE_WR)) { + if (!down_read_trylock(&mdsc->snap_rwsem)) { + /* + * we can not call down_read() when + * task isn't in TASK_RUNNING state + */ + if (nonblock) { + *err = -EAGAIN; + ret = 1; + goto out_unlock; + } + + spin_unlock(&ci->i_ceph_lock); + down_read(&mdsc->snap_rwsem); + snap_rwsem_locked = true; + goto again; + } + snap_rwsem_locked = true; + } *got = need | (have & want); - __take_cap_refs(ci, *got); + __take_cap_refs(ci, *got, true); ret = 1; } } else { @@ -2189,6 +2224,8 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, } out_unlock: spin_unlock(&ci->i_ceph_lock); + if (snap_rwsem_locked) + up_read(&mdsc->snap_rwsem); dout("get_cap_refs %p ret %d got %s\n", inode, ret, ceph_cap_string(*got)); @@ -2231,54 +2268,70 @@ static void check_max_size(struct inode *inode, loff_t endoff) int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, loff_t endoff, int *got, struct page **pinned_page) { - int _got, check_max, ret, err = 0; + int _got, ret, err = 0; ret = ceph_pool_perm_check(ci, need); if (ret < 0) return ret; -retry: - if (endoff > 0) - check_max_size(&ci->vfs_inode, endoff); - _got = 0; - check_max = 0; - ret = wait_event_interruptible(ci->i_cap_wq, - try_get_cap_refs(ci, need, want, endoff, - &_got, &check_max, &err)); - if (err) - ret = err; - if (ret < 0) - return ret; + while (true) { + if (endoff > 0) + check_max_size(&ci->vfs_inode, endoff); - if (check_max) - goto retry; - - if (ci->i_inline_version != CEPH_INLINE_NONE && - (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && - i_size_read(&ci->vfs_inode) > 0) { - struct page *page = find_get_page(ci->vfs_inode.i_mapping, 0); - if (page) { - if (PageUptodate(page)) { - *pinned_page = page; - goto out; - } - page_cache_release(page); - } - /* - * drop cap refs first because getattr while holding - * caps refs can cause deadlock. - */ - ceph_put_cap_refs(ci, _got); + err = 0; _got = 0; + ret = try_get_cap_refs(ci, need, want, endoff, + false, &_got, &err); + if (ret) { + if (err == -EAGAIN) + continue; + if (err < 0) + return err; + } else { + ret = wait_event_interruptible(ci->i_cap_wq, + try_get_cap_refs(ci, need, want, endoff, + true, &_got, &err)); + if (err == -EAGAIN) + continue; + if (err < 0) + ret = err; + if (ret < 0) + return ret; + } - /* getattr request will bring inline data into page cache */ - ret = __ceph_do_getattr(&ci->vfs_inode, NULL, - CEPH_STAT_CAP_INLINE_DATA, true); - if (ret < 0) - return ret; - goto retry; + if (ci->i_inline_version != CEPH_INLINE_NONE && + (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && + i_size_read(&ci->vfs_inode) > 0) { + struct page *page = + find_get_page(ci->vfs_inode.i_mapping, 0); + if (page) { + if (PageUptodate(page)) { + *pinned_page = page; + break; + } + page_cache_release(page); + } + /* + * drop cap refs first because getattr while + * holding * caps refs can cause deadlock. + */ + ceph_put_cap_refs(ci, _got); + _got = 0; + + /* + * getattr request will bring inline data into + * page cache + */ + ret = __ceph_do_getattr(&ci->vfs_inode, NULL, + CEPH_STAT_CAP_INLINE_DATA, + true); + if (ret < 0) + return ret; + continue; + } + break; } -out: + *got = _got; return 0; } @@ -2290,7 +2343,7 @@ out: void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps) { spin_lock(&ci->i_ceph_lock); - __take_cap_refs(ci, caps); + __take_cap_refs(ci, caps, false); spin_unlock(&ci->i_ceph_lock); } @@ -2341,6 +2394,13 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) wake = 1; } } + if (ci->i_wrbuffer_ref_head == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { + BUG_ON(!ci->i_head_snapc); + ceph_put_snap_context(ci->i_head_snapc); + ci->i_head_snapc = NULL; + } /* see comment in __ceph_remove_cap() */ if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) drop_inode_snap_realm(ci); @@ -2384,7 +2444,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, if (ci->i_head_snapc == snapc) { ci->i_wrbuffer_ref_head -= nr; if (ci->i_wrbuffer_ref_head == 0 && - ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) { + ci->i_wr_ref == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { BUG_ON(!ci->i_head_snapc); ceph_put_snap_context(ci->i_head_snapc); ci->i_head_snapc = NULL; @@ -2775,7 +2837,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, dout(" inode %p now clean\n", inode); BUG_ON(!list_empty(&ci->i_dirty_item)); drop = 1; - if (ci->i_wrbuffer_ref_head == 0) { + if (ci->i_wr_ref == 0 && + ci->i_wrbuffer_ref_head == 0) { BUG_ON(!ci->i_head_snapc); ceph_put_snap_context(ci->i_head_snapc); ci->i_head_snapc = NULL; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 777eb21d556f..0a76a370d798 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -557,13 +557,13 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe) * objects, rollback on failure, etc.) */ static ssize_t -ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) +ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, + struct ceph_snap_context *snapc) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_snap_context *snapc; struct ceph_vino vino; struct ceph_osd_request *req; struct page **pages; @@ -600,7 +600,6 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) size_t start; ssize_t n; - snapc = ci->i_snap_realm->cached_context; vino = ceph_vino(inode); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, pos, &len, 0, @@ -674,13 +673,13 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) * objects, rollback on failure, etc.) */ static ssize_t -ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) +ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, + struct ceph_snap_context *snapc) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_snap_context *snapc; struct ceph_vino vino; struct ceph_osd_request *req; struct page **pages; @@ -717,7 +716,6 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) size_t left; int n; - snapc = ci->i_snap_realm->cached_context; vino = ceph_vino(inode); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, pos, &len, 0, 1, @@ -996,14 +994,30 @@ retry_snap: if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { + struct ceph_snap_context *snapc; struct iov_iter data; mutex_unlock(&inode->i_mutex); + + spin_lock(&ci->i_ceph_lock); + if (__ceph_have_pending_cap_snap(ci)) { + struct ceph_cap_snap *capsnap = + list_last_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, + ci_item); + snapc = ceph_get_snap_context(capsnap->context); + } else { + BUG_ON(!ci->i_head_snapc); + snapc = ceph_get_snap_context(ci->i_head_snapc); + } + spin_unlock(&ci->i_ceph_lock); + /* we might need to revert back to that point */ data = *from; if (iocb->ki_flags & IOCB_DIRECT) - written = ceph_sync_direct_write(iocb, &data, pos); + written = ceph_sync_direct_write(iocb, &data, pos, + snapc); else - written = ceph_sync_write(iocb, &data, pos); + written = ceph_sync_write(iocb, &data, pos, snapc); if (written == -EOLDSNAPC) { dout("aio_write %p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n", @@ -1014,6 +1028,7 @@ retry_snap: } if (written > 0) iov_iter_advance(from, written); + ceph_put_snap_context(snapc); } else { loff_t old_size = inode->i_size; /* diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index b2a945345d2b..5bfdab9a465e 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -455,6 +455,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) { struct inode *inode = &ci->vfs_inode; struct ceph_cap_snap *capsnap; + struct ceph_snap_context *old_snapc; int used, dirty; capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); @@ -467,6 +468,8 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) used = __ceph_caps_used(ci); dirty = __ceph_caps_dirty(ci); + old_snapc = ci->i_head_snapc; + /* * If there is a write in progress, treat that as a dirty Fw, * even though it hasn't completed yet; by the time we finish @@ -481,76 +484,79 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) writes in progress now were started before the previous cap_snap. lucky us. */ dout("queue_cap_snap %p already pending\n", inode); - kfree(capsnap); - } else if (ci->i_snap_realm->cached_context == ceph_empty_snapc) { + goto update_snapc; + } + if (ci->i_snap_realm->cached_context == ceph_empty_snapc) { dout("queue_cap_snap %p empty snapc\n", inode); - kfree(capsnap); - } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| - CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) { - struct ceph_snap_context *snapc = ci->i_head_snapc; - - /* - * if we are a sync write, we may need to go to the snaprealm - * to get the current snapc. - */ - if (!snapc) - snapc = ci->i_snap_realm->cached_context; - - dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n", - inode, capsnap, snapc, ceph_cap_string(dirty)); - ihold(inode); - - atomic_set(&capsnap->nref, 1); - capsnap->ci = ci; - INIT_LIST_HEAD(&capsnap->ci_item); - INIT_LIST_HEAD(&capsnap->flushing_item); - - capsnap->follows = snapc->seq; - capsnap->issued = __ceph_caps_issued(ci, NULL); - capsnap->dirty = dirty; - - capsnap->mode = inode->i_mode; - capsnap->uid = inode->i_uid; - capsnap->gid = inode->i_gid; - - if (dirty & CEPH_CAP_XATTR_EXCL) { - __ceph_build_xattrs_blob(ci); - capsnap->xattr_blob = - ceph_buffer_get(ci->i_xattrs.blob); - capsnap->xattr_version = ci->i_xattrs.version; - } else { - capsnap->xattr_blob = NULL; - capsnap->xattr_version = 0; - } - - capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE; - - /* dirty page count moved from _head to this cap_snap; - all subsequent writes page dirties occur _after_ this - snapshot. */ - capsnap->dirty_pages = ci->i_wrbuffer_ref_head; - ci->i_wrbuffer_ref_head = 0; - capsnap->context = snapc; - ci->i_head_snapc = - ceph_get_snap_context(ci->i_snap_realm->cached_context); - dout(" new snapc is %p\n", ci->i_head_snapc); - list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); - - if (used & CEPH_CAP_FILE_WR) { - dout("queue_cap_snap %p cap_snap %p snapc %p" - " seq %llu used WR, now pending\n", inode, - capsnap, snapc, snapc->seq); - capsnap->writing = 1; - } else { - /* note mtime, size NOW. */ - __ceph_finish_cap_snap(ci, capsnap); - } - } else { + goto update_snapc; + } + if (!(dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| + CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) { dout("queue_cap_snap %p nothing dirty|writing\n", inode); - kfree(capsnap); + goto update_snapc; } + BUG_ON(!old_snapc); + + dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n", + inode, capsnap, old_snapc, ceph_cap_string(dirty)); + ihold(inode); + + atomic_set(&capsnap->nref, 1); + capsnap->ci = ci; + INIT_LIST_HEAD(&capsnap->ci_item); + INIT_LIST_HEAD(&capsnap->flushing_item); + + capsnap->follows = old_snapc->seq; + capsnap->issued = __ceph_caps_issued(ci, NULL); + capsnap->dirty = dirty; + + capsnap->mode = inode->i_mode; + capsnap->uid = inode->i_uid; + capsnap->gid = inode->i_gid; + + if (dirty & CEPH_CAP_XATTR_EXCL) { + __ceph_build_xattrs_blob(ci); + capsnap->xattr_blob = + ceph_buffer_get(ci->i_xattrs.blob); + capsnap->xattr_version = ci->i_xattrs.version; + } else { + capsnap->xattr_blob = NULL; + capsnap->xattr_version = 0; + } + + capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE; + + /* dirty page count moved from _head to this cap_snap; + all subsequent writes page dirties occur _after_ this + snapshot. */ + capsnap->dirty_pages = ci->i_wrbuffer_ref_head; + ci->i_wrbuffer_ref_head = 0; + capsnap->context = old_snapc; + list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); + old_snapc = NULL; + + if (used & CEPH_CAP_FILE_WR) { + dout("queue_cap_snap %p cap_snap %p snapc %p" + " seq %llu used WR, now pending\n", inode, + capsnap, old_snapc, old_snapc->seq); + capsnap->writing = 1; + } else { + /* note mtime, size NOW. */ + __ceph_finish_cap_snap(ci, capsnap); + } + capsnap = NULL; + +update_snapc: + if (ci->i_head_snapc) { + ci->i_head_snapc = ceph_get_snap_context( + ci->i_snap_realm->cached_context); + dout(" new snapc is %p\n", ci->i_head_snapc); + } spin_unlock(&ci->i_ceph_lock); + + kfree(capsnap); + ceph_put_snap_context(old_snapc); } /* From 860560904962d08fd38666207c910065fe53e074 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 1 May 2015 16:57:16 +0800 Subject: [PATCH 266/839] ceph: avoid sending unnessesary FLUSHSNAP message when a snap notification contains no new snapshot, we can avoid sending FLUSHSNAP message to MDS. But we still need to create cap_snap in some case because it's required by write path and page writeback path Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 69 +++++++++++++++++++++++++++---------------------- fs/ceph/snap.c | 49 ++++++++++++++++++++++++++--------- fs/ceph/super.h | 5 ++-- 3 files changed, 78 insertions(+), 45 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index feb8ec92f1b4..f1dbcae7c75c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1297,11 +1297,8 @@ retry: if (capsnap->dirty_pages || capsnap->writing) break; - /* - * if cap writeback already occurred, we should have dropped - * the capsnap in ceph_put_wrbuffer_cap_refs. - */ - BUG_ON(capsnap->dirty == 0); + /* should be removed by ceph_try_drop_cap_snap() */ + BUG_ON(!capsnap->need_flush); /* pick mds, take s_mutex */ if (ci->i_auth_cap == NULL) { @@ -2347,6 +2344,27 @@ void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps) spin_unlock(&ci->i_ceph_lock); } + +/* + * drop cap_snap that is not associated with any snapshot. + * we don't need to send FLUSHSNAP message for it. + */ +static int ceph_try_drop_cap_snap(struct ceph_cap_snap *capsnap) +{ + if (!capsnap->need_flush && + !capsnap->writing && !capsnap->dirty_pages) { + + dout("dropping cap_snap %p follows %llu\n", + capsnap, capsnap->follows); + ceph_put_snap_context(capsnap->context); + list_del(&capsnap->ci_item); + list_del(&capsnap->flushing_item); + ceph_put_cap_snap(capsnap); + return 1; + } + return 0; +} + /* * Release cap refs. * @@ -2360,7 +2378,6 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) { struct inode *inode = &ci->vfs_inode; int last = 0, put = 0, flushsnaps = 0, wake = 0; - struct ceph_cap_snap *capsnap; spin_lock(&ci->i_ceph_lock); if (had & CEPH_CAP_PIN) @@ -2382,17 +2399,17 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) if (had & CEPH_CAP_FILE_WR) if (--ci->i_wr_ref == 0) { last++; - if (!list_empty(&ci->i_cap_snaps)) { - capsnap = list_first_entry(&ci->i_cap_snaps, - struct ceph_cap_snap, - ci_item); - if (capsnap->writing) { - capsnap->writing = 0; - flushsnaps = - __ceph_finish_cap_snap(ci, - capsnap); - wake = 1; - } + if (__ceph_have_pending_cap_snap(ci)) { + struct ceph_cap_snap *capsnap = + list_last_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, + ci_item); + capsnap->writing = 0; + if (ceph_try_drop_cap_snap(capsnap)) + put++; + else if (__ceph_finish_cap_snap(ci, capsnap)) + flushsnaps = 1; + wake = 1; } if (ci->i_wrbuffer_ref_head == 0 && ci->i_dirty_caps == 0 && @@ -2416,7 +2433,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) ceph_flush_snaps(ci); if (wake) wake_up_all(&ci->i_cap_wq); - if (put) + while (put-- > 0) iput(inode); } @@ -2467,25 +2484,15 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, capsnap->dirty_pages -= nr; if (capsnap->dirty_pages == 0) { complete_capsnap = 1; - if (capsnap->dirty == 0) - /* cap writeback completed before we created - * the cap_snap; no FLUSHSNAP is needed */ - drop_capsnap = 1; + drop_capsnap = ceph_try_drop_cap_snap(capsnap); } dout("put_wrbuffer_cap_refs on %p cap_snap %p " - " snap %lld %d/%d -> %d/%d %s%s%s\n", + " snap %lld %d/%d -> %d/%d %s%s\n", inode, capsnap, capsnap->context->seq, ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, ci->i_wrbuffer_ref, capsnap->dirty_pages, last ? " (wrbuffer last)" : "", - complete_capsnap ? " (complete capsnap)" : "", - drop_capsnap ? " (drop capsnap)" : ""); - if (drop_capsnap) { - ceph_put_snap_context(capsnap->context); - list_del(&capsnap->ci_item); - list_del(&capsnap->flushing_item); - ceph_put_cap_snap(capsnap); - } + complete_capsnap ? " (complete capsnap)" : ""); } spin_unlock(&ci->i_ceph_lock); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 5bfdab9a465e..ba708017d60b 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -436,6 +436,14 @@ static int dup_array(u64 **dst, __le64 *src, u32 num) return 0; } +static bool has_new_snaps(struct ceph_snap_context *o, + struct ceph_snap_context *n) +{ + if (n->num_snaps == 0) + return false; + /* snaps are in descending order */ + return n->snaps[0] > o->seq; +} /* * When a snapshot is applied, the size/mtime inode metadata is queued @@ -455,7 +463,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) { struct inode *inode = &ci->vfs_inode; struct ceph_cap_snap *capsnap; - struct ceph_snap_context *old_snapc; + struct ceph_snap_context *old_snapc, *new_snapc; int used, dirty; capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); @@ -469,6 +477,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) dirty = __ceph_caps_dirty(ci); old_snapc = ci->i_head_snapc; + new_snapc = ci->i_snap_realm->cached_context; /* * If there is a write in progress, treat that as a dirty Fw, @@ -486,20 +495,37 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) dout("queue_cap_snap %p already pending\n", inode); goto update_snapc; } - if (ci->i_snap_realm->cached_context == ceph_empty_snapc) { - dout("queue_cap_snap %p empty snapc\n", inode); - goto update_snapc; - } - if (!(dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| - CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) { + if (ci->i_wrbuffer_ref_head == 0 && + !(dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR))) { dout("queue_cap_snap %p nothing dirty|writing\n", inode); goto update_snapc; } BUG_ON(!old_snapc); - dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n", - inode, capsnap, old_snapc, ceph_cap_string(dirty)); + /* + * There is no need to send FLUSHSNAP message to MDS if there is + * no new snapshot. But when there is dirty pages or on-going + * writes, we still need to create cap_snap. cap_snap is needed + * by the write path and page writeback path. + * + * also see ceph_try_drop_cap_snap() + */ + if (has_new_snaps(old_snapc, new_snapc)) { + if (dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR)) + capsnap->need_flush = true; + } else { + if (!(used & CEPH_CAP_FILE_WR) && + ci->i_wrbuffer_ref_head == 0) { + dout("queue_cap_snap %p " + "no new_snap|dirty_page|writing\n", inode); + goto update_snapc; + } + } + + dout("queue_cap_snap %p cap_snap %p queuing under %p %s %s\n", + inode, capsnap, old_snapc, ceph_cap_string(dirty), + capsnap->need_flush ? "" : "no_flush"); ihold(inode); atomic_set(&capsnap->nref, 1); @@ -549,9 +575,8 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) update_snapc: if (ci->i_head_snapc) { - ci->i_head_snapc = ceph_get_snap_context( - ci->i_snap_realm->cached_context); - dout(" new snapc is %p\n", ci->i_head_snapc); + ci->i_head_snapc = ceph_get_snap_context(new_snapc); + dout(" new snapc is %p\n", new_snapc); } spin_unlock(&ci->i_ceph_lock); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b182fd7499d9..4ef1ae92c2a6 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -164,6 +164,7 @@ struct ceph_cap_snap { int writing; /* a sync write is still in progress */ int dirty_pages; /* dirty pages awaiting writeback */ bool inline_data; + bool need_flush; }; static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) @@ -719,8 +720,8 @@ extern void ceph_snap_exit(void); static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci) { return !list_empty(&ci->i_cap_snaps) && - list_entry(ci->i_cap_snaps.prev, struct ceph_cap_snap, - ci_item)->writing; + list_last_entry(&ci->i_cap_snaps, struct ceph_cap_snap, + ci_item)->writing; } /* inode.c */ From 604d1b0245b97738cde4341944ad93edff4b2827 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 1 May 2015 17:49:16 +0800 Subject: [PATCH 267/839] ceph: take snap_rwsem when accessing snap realm's cached_context When ceph inode's i_head_snapc is NULL, __ceph_mark_dirty_caps() accesses snap realm's cached_context. So we need take read lock of snap_rwsem. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 4 +++- fs/ceph/inode.c | 15 +++++++++++++++ fs/ceph/xattr.c | 45 +++++++++++++++++++++++++++++++++++++++------ 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f1dbcae7c75c..900c05fd77d8 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1413,9 +1413,11 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) ceph_cap_string(was | mask)); ci->i_dirty_caps |= mask; if (was == 0) { - if (!ci->i_head_snapc) + if (!ci->i_head_snapc) { + WARN_ON_ONCE(!rwsem_is_locked(&mdsc->snap_rwsem)); ci->i_head_snapc = ceph_get_snap_context( ci->i_snap_realm->cached_context); + } dout(" inode %p now dirty snapc %p auth cap %p\n", &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); BUG_ON(!list_empty(&ci->i_dirty_item)); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 1a68c0e38a52..1c991df276c9 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1727,6 +1727,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) int mask = 0; int err = 0; int inode_dirty_flags = 0; + bool lock_snap_rwsem = false; if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; @@ -1742,6 +1743,18 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) spin_lock(&ci->i_ceph_lock); issued = __ceph_caps_issued(ci, NULL); + + if (!ci->i_head_snapc && + (issued & (CEPH_CAP_ANY_EXCL | CEPH_CAP_FILE_WR))) { + lock_snap_rwsem = true; + if (!down_read_trylock(&mdsc->snap_rwsem)) { + spin_unlock(&ci->i_ceph_lock); + down_read(&mdsc->snap_rwsem); + spin_lock(&ci->i_ceph_lock); + issued = __ceph_caps_issued(ci, NULL); + } + } + dout("setattr %p issued %s\n", inode, ceph_cap_string(issued)); if (ia_valid & ATTR_UID) { @@ -1890,6 +1903,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) release &= issued; spin_unlock(&ci->i_ceph_lock); + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); if (inode_dirty_flags) __mark_inode_dirty(inode, inode_dirty_flags); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index cd7ffad4041d..c6f7d9b82085 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -911,6 +911,7 @@ int __ceph_setxattr(struct dentry *dentry, const char *name, struct inode *inode = d_inode(dentry); struct ceph_vxattr *vxattr; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; int issued; int err; int dirty = 0; @@ -920,6 +921,7 @@ int __ceph_setxattr(struct dentry *dentry, const char *name, char *newval = NULL; struct ceph_inode_xattr *xattr = NULL; int required_blob_size; + bool lock_snap_rwsem = false; if (!ceph_is_valid_xattr(name)) return -EOPNOTSUPP; @@ -951,9 +953,20 @@ int __ceph_setxattr(struct dentry *dentry, const char *name, spin_lock(&ci->i_ceph_lock); retry: issued = __ceph_caps_issued(ci, NULL); - dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) goto do_sync; + + if (!lock_snap_rwsem && !ci->i_head_snapc) { + lock_snap_rwsem = true; + if (!down_read_trylock(&mdsc->snap_rwsem)) { + spin_unlock(&ci->i_ceph_lock); + down_read(&mdsc->snap_rwsem); + spin_lock(&ci->i_ceph_lock); + goto retry; + } + } + + dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); __build_xattrs(inode); required_blob_size = __get_required_blob_size(ci, name_len, val_len); @@ -966,7 +979,7 @@ retry: dout(" preaallocating new blob size=%d\n", required_blob_size); blob = ceph_buffer_new(required_blob_size, GFP_NOFS); if (!blob) - goto out; + goto do_sync_unlocked; spin_lock(&ci->i_ceph_lock); if (ci->i_xattrs.prealloc_blob) ceph_buffer_put(ci->i_xattrs.prealloc_blob); @@ -984,6 +997,8 @@ retry: } spin_unlock(&ci->i_ceph_lock); + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); if (dirty) __mark_inode_dirty(inode, dirty); return err; @@ -991,6 +1006,8 @@ retry: do_sync: spin_unlock(&ci->i_ceph_lock); do_sync_unlocked: + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); err = ceph_sync_setxattr(dentry, name, value, size, flags); out: kfree(newname); @@ -1044,10 +1061,12 @@ int __ceph_removexattr(struct dentry *dentry, const char *name) struct inode *inode = d_inode(dentry); struct ceph_vxattr *vxattr; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; int issued; int err; int required_blob_size; int dirty; + bool lock_snap_rwsem = false; if (!ceph_is_valid_xattr(name)) return -EOPNOTSUPP; @@ -1064,10 +1083,21 @@ int __ceph_removexattr(struct dentry *dentry, const char *name) spin_lock(&ci->i_ceph_lock); retry: issued = __ceph_caps_issued(ci, NULL); - dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); - if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) goto do_sync; + + if (!lock_snap_rwsem && !ci->i_head_snapc) { + lock_snap_rwsem = true; + if (!down_read_trylock(&mdsc->snap_rwsem)) { + spin_unlock(&ci->i_ceph_lock); + down_read(&mdsc->snap_rwsem); + spin_lock(&ci->i_ceph_lock); + goto retry; + } + } + + dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); + __build_xattrs(inode); required_blob_size = __get_required_blob_size(ci, 0, 0); @@ -1080,7 +1110,7 @@ retry: dout(" preaallocating new blob size=%d\n", required_blob_size); blob = ceph_buffer_new(required_blob_size, GFP_NOFS); if (!blob) - goto out; + goto do_sync_unlocked; spin_lock(&ci->i_ceph_lock); if (ci->i_xattrs.prealloc_blob) ceph_buffer_put(ci->i_xattrs.prealloc_blob); @@ -1094,14 +1124,17 @@ retry: ci->i_xattrs.dirty = true; inode->i_ctime = CURRENT_TIME; spin_unlock(&ci->i_ceph_lock); + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); if (dirty) __mark_inode_dirty(inode, dirty); return err; do_sync: spin_unlock(&ci->i_ceph_lock); do_sync_unlocked: + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); err = ceph_send_removexattr(dentry, name); -out: return err; } From 622f3e250f498976ad4cbae6f2be5cb359ded4f5 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 7 May 2015 10:59:47 +0800 Subject: [PATCH 268/839] ceph: don't trim auth cap when there are cap snaps Signed-off-by: Yan, Zheng --- fs/ceph/mds_client.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f125e06dacb8..88010f9a254d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1371,7 +1371,8 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued), ceph_cap_string(used), ceph_cap_string(wanted)); if (cap == ci->i_auth_cap) { - if (ci->i_dirty_caps | ci->i_flushing_caps) + if (ci->i_dirty_caps || ci->i_flushing_caps || + !list_empty(&ci->i_cap_snaps)) goto out; if ((used | wanted) & CEPH_CAP_ANY_WR) goto out; From affbc19a68f9966ad65a773db405f78e2bafc07b Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 5 May 2015 21:22:13 +0800 Subject: [PATCH 269/839] ceph: make sure syncfs flushes all cap snaps Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 18 ++++++---- fs/ceph/mds_client.c | 86 +++++++++++++++++++++++++++++++------------- fs/ceph/mds_client.h | 1 + fs/ceph/snap.c | 2 ++ 4 files changed, 76 insertions(+), 31 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 900c05fd77d8..bbd969e16a01 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1259,14 +1259,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, * asynchronously back to the MDS once sync writes complete and dirty * data is written out. * - * Unless @again is true, skip cap_snaps that were already sent to + * Unless @kick is true, skip cap_snaps that were already sent to * the MDS (i.e., during this session). * * Called under i_ceph_lock. Takes s_mutex as needed. */ void __ceph_flush_snaps(struct ceph_inode_info *ci, struct ceph_mds_session **psession, - int again) + int kick) __releases(ci->i_ceph_lock) __acquires(ci->i_ceph_lock) { @@ -1307,7 +1307,7 @@ retry: } /* only flush each capsnap once */ - if (!again && !list_empty(&capsnap->flushing_item)) { + if (!kick && !list_empty(&capsnap->flushing_item)) { dout("already flushed %p, skipping\n", capsnap); continue; } @@ -1317,6 +1317,9 @@ retry: if (session && session->s_mds != mds) { dout("oops, wrong session %p mutex\n", session); + if (kick) + goto out; + mutex_unlock(&session->s_mutex); ceph_put_mds_session(session); session = NULL; @@ -1342,10 +1345,9 @@ retry: capsnap->flush_tid = ++ci->i_cap_flush_last_tid; atomic_inc(&capsnap->nref); - if (!list_empty(&capsnap->flushing_item)) - list_del_init(&capsnap->flushing_item); - list_add_tail(&capsnap->flushing_item, - &session->s_cap_snaps_flushing); + if (list_empty(&capsnap->flushing_item)) + list_add_tail(&capsnap->flushing_item, + &session->s_cap_snaps_flushing); spin_unlock(&ci->i_ceph_lock); dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", @@ -2876,6 +2878,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, struct ceph_mds_session *session) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; u64 follows = le64_to_cpu(m->snap_follows); struct ceph_cap_snap *capsnap; int drop = 0; @@ -2899,6 +2902,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, list_del(&capsnap->ci_item); list_del(&capsnap->flushing_item); ceph_put_cap_snap(capsnap); + wake_up_all(&mdsc->cap_flushing_wq); drop = 1; break; } else { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 88010f9a254d..2bb9264b9225 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1488,17 +1488,22 @@ out_unlocked: return err; } -static int check_cap_flush(struct inode *inode, u64 want_flush_seq) +static int check_cap_flush(struct ceph_inode_info *ci, + u64 want_flush_seq, u64 want_snap_seq) { - struct ceph_inode_info *ci = ceph_inode(inode); - int ret; + int ret1 = 1, ret2 = 1; spin_lock(&ci->i_ceph_lock); - if (ci->i_flushing_caps) - ret = ci->i_cap_flush_seq >= want_flush_seq; - else - ret = 1; + if (want_flush_seq > 0 && ci->i_flushing_caps) + ret1 = ci->i_cap_flush_seq >= want_flush_seq; + + if (want_snap_seq > 0 && !list_empty(&ci->i_cap_snaps)) { + struct ceph_cap_snap *capsnap = + list_first_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, ci_item); + ret2 = capsnap->follows >= want_snap_seq; + } spin_unlock(&ci->i_ceph_lock); - return ret; + return ret1 && ret2; } /* @@ -1506,44 +1511,71 @@ static int check_cap_flush(struct inode *inode, u64 want_flush_seq) * * returns true if we've flushed through want_flush_seq */ -static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) +static void wait_caps_flush(struct ceph_mds_client *mdsc, + u64 want_flush_seq, u64 want_snap_seq) { int mds; dout("check_cap_flush want %lld\n", want_flush_seq); mutex_lock(&mdsc->mutex); - for (mds = 0; mds < mdsc->max_sessions; mds++) { + for (mds = 0; mds < mdsc->max_sessions; ) { struct ceph_mds_session *session = mdsc->sessions[mds]; - struct inode *inode = NULL; + struct inode *inode1 = NULL, *inode2 = NULL; - if (!session) + if (!session) { + mds++; continue; + } get_session(session); mutex_unlock(&mdsc->mutex); mutex_lock(&session->s_mutex); if (!list_empty(&session->s_cap_flushing)) { struct ceph_inode_info *ci = - list_entry(session->s_cap_flushing.next, - struct ceph_inode_info, - i_flushing_item); + list_first_entry(&session->s_cap_flushing, + struct ceph_inode_info, + i_flushing_item); - if (!check_cap_flush(&ci->vfs_inode, want_flush_seq)) { + if (!check_cap_flush(ci, want_flush_seq, 0)) { dout("check_cap_flush still flushing %p " "seq %lld <= %lld to mds%d\n", &ci->vfs_inode, ci->i_cap_flush_seq, - want_flush_seq, session->s_mds); - inode = igrab(&ci->vfs_inode); + want_flush_seq, mds); + inode1 = igrab(&ci->vfs_inode); + } + } + if (!list_empty(&session->s_cap_snaps_flushing)) { + struct ceph_cap_snap *capsnap = + list_first_entry(&session->s_cap_snaps_flushing, + struct ceph_cap_snap, + flushing_item); + struct ceph_inode_info *ci = capsnap->ci; + if (!check_cap_flush(ci, 0, want_snap_seq)) { + dout("check_cap_flush still flushing snap %p " + "follows %lld <= %lld to mds%d\n", + &ci->vfs_inode, capsnap->follows, + want_snap_seq, mds); + inode2 = igrab(&ci->vfs_inode); } } mutex_unlock(&session->s_mutex); ceph_put_mds_session(session); - if (inode) { + if (inode1) { wait_event(mdsc->cap_flushing_wq, - check_cap_flush(inode, want_flush_seq)); - iput(inode); + check_cap_flush(ceph_inode(inode1), + want_flush_seq, 0)); + iput(inode1); } + if (inode2) { + wait_event(mdsc->cap_flushing_wq, + check_cap_flush(ceph_inode(inode2), + 0, want_snap_seq)); + iput(inode2); + } + + if (!inode1 && !inode2) + mds++; mutex_lock(&mdsc->mutex); } @@ -3391,6 +3423,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) atomic_set(&mdsc->num_sessions, 0); mdsc->max_sessions = 0; mdsc->stopping = 0; + mdsc->last_snap_seq = 0; init_rwsem(&mdsc->snap_rwsem); mdsc->snap_realms = RB_ROOT; INIT_LIST_HEAD(&mdsc->snap_empty); @@ -3517,7 +3550,7 @@ restart: void ceph_mdsc_sync(struct ceph_mds_client *mdsc) { - u64 want_tid, want_flush; + u64 want_tid, want_flush, want_snap; if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) return; @@ -3532,10 +3565,15 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) want_flush = mdsc->cap_flush_seq; spin_unlock(&mdsc->cap_dirty_lock); - dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush); + down_read(&mdsc->snap_rwsem); + want_snap = mdsc->last_snap_seq; + up_read(&mdsc->snap_rwsem); + + dout("sync want tid %lld flush_seq %lld snap_seq %lld\n", + want_tid, want_flush, want_snap); wait_unsafe_requests(mdsc, want_tid); - wait_caps_flush(mdsc, want_flush); + wait_caps_flush(mdsc, want_flush, want_snap); } /* diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index d474141c034a..bf24d88cfeb2 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -290,6 +290,7 @@ struct ceph_mds_client { * references (implying they contain no inodes with caps) that * should be destroyed. */ + u64 last_snap_seq; struct rw_semaphore snap_rwsem; struct rb_root snap_realms; struct list_head snap_empty; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index ba708017d60b..233d906aec02 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -730,6 +730,8 @@ more: /* queue realm for cap_snap creation */ list_add(&realm->dirty_item, &dirty_realms); + if (realm->seq > mdsc->last_snap_seq) + mdsc->last_snap_seq = realm->seq; invalidate = 1; } else if (!realm->cached_context) { From 745a8e3bccbc6adae69a98ddc525e529aa44636e Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 14 May 2015 17:22:42 +0800 Subject: [PATCH 270/839] ceph: don't pre-allocate space for cap release messages Previously we pre-allocate cap release messages for each caps. This wastes lots of memory when there are large amount of caps. This patch make the code not pre-allocate the cap release messages. Instead, we add the corresponding ceph_cap struct to a list when releasing a cap. Later when flush cap releases is needed, we allocate the cap release messages dynamically. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 85 ++++++---------- fs/ceph/mds_client.c | 231 +++++++++++++++++-------------------------- fs/ceph/mds_client.h | 3 - fs/ceph/super.h | 20 ++-- 4 files changed, 133 insertions(+), 206 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index bbd969e16a01..245ca381a6dc 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -926,16 +926,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) /* remove from session list */ spin_lock(&session->s_cap_lock); - /* - * s_cap_reconnect is protected by s_cap_lock. no one changes - * s_cap_gen while session is in the reconnect state. - */ - if (queue_release && - (!session->s_cap_reconnect || - cap->cap_gen == session->s_cap_gen)) - __queue_cap_release(session, ci->i_vino.ino, cap->cap_id, - cap->mseq, cap->issue_seq); - if (session->s_cap_iterator == cap) { /* not yet, we are iterating over this very cap */ dout("__ceph_remove_cap delaying %p removal from session %p\n", @@ -948,6 +938,25 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) } /* protect backpointer with s_cap_lock: see iterate_session_caps */ cap->ci = NULL; + + /* + * s_cap_reconnect is protected by s_cap_lock. no one changes + * s_cap_gen while session is in the reconnect state. + */ + if (queue_release && + (!session->s_cap_reconnect || cap->cap_gen == session->s_cap_gen)) { + cap->queue_release = 1; + if (removed) { + list_add_tail(&cap->session_caps, + &session->s_cap_releases); + session->s_num_cap_releases++; + removed = 0; + } + } else { + cap->queue_release = 0; + } + cap->cap_ino = ci->i_vino.ino; + spin_unlock(&session->s_cap_lock); /* remove from inode list */ @@ -1053,44 +1062,6 @@ static int send_cap_msg(struct ceph_mds_session *session, return 0; } -void __queue_cap_release(struct ceph_mds_session *session, - u64 ino, u64 cap_id, u32 migrate_seq, - u32 issue_seq) -{ - struct ceph_msg *msg; - struct ceph_mds_cap_release *head; - struct ceph_mds_cap_item *item; - - BUG_ON(!session->s_num_cap_releases); - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, list_head); - - dout(" adding %llx release to mds%d msg %p (%d left)\n", - ino, session->s_mds, msg, session->s_num_cap_releases); - - BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); - head = msg->front.iov_base; - le32_add_cpu(&head->num, 1); - item = msg->front.iov_base + msg->front.iov_len; - item->ino = cpu_to_le64(ino); - item->cap_id = cpu_to_le64(cap_id); - item->migrate_seq = cpu_to_le32(migrate_seq); - item->seq = cpu_to_le32(issue_seq); - - session->s_num_cap_releases--; - - msg->front.iov_len += sizeof(*item); - if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { - dout(" release msg %p full\n", msg); - list_move_tail(&msg->list_head, &session->s_cap_releases_done); - } else { - dout(" release msg %p at %d/%d (%d)\n", msg, - (int)le32_to_cpu(head->num), - (int)CEPH_CAPS_PER_RELEASE, - (int)msg->front.iov_len); - } -} - /* * Queue cap releases when an inode is dropped from our cache. Since * inode is about to be destroyed, there is no need for i_ceph_lock. @@ -3051,7 +3022,6 @@ retry: mutex_lock_nested(&session->s_mutex, SINGLE_DEPTH_NESTING); } - ceph_add_cap_releases(mdsc, tsession); new_cap = ceph_get_cap(mdsc, NULL); } else { WARN_ON(1); @@ -3247,16 +3217,20 @@ void ceph_handle_caps(struct ceph_mds_session *session, dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, (unsigned)seq); - if (op == CEPH_CAP_OP_IMPORT) - ceph_add_cap_releases(mdsc, session); - if (!inode) { dout(" i don't have ino %llx\n", vino.ino); if (op == CEPH_CAP_OP_IMPORT) { + cap = ceph_get_cap(mdsc, NULL); + cap->cap_ino = vino.ino; + cap->queue_release = 1; + cap->cap_id = cap_id; + cap->mseq = mseq; + cap->seq = seq; spin_lock(&session->s_cap_lock); - __queue_cap_release(session, vino.ino, cap_id, - mseq, seq); + list_add_tail(&cap->session_caps, + &session->s_cap_releases); + session->s_num_cap_releases++; spin_unlock(&session->s_cap_lock); } goto flush_cap_releases; @@ -3332,11 +3306,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, flush_cap_releases: /* - * send any full release message to try to move things + * send any cap release message to try to move things * along for the mds (who clearly thinks we still have this * cap). */ - ceph_add_cap_releases(mdsc, session); ceph_send_cap_releases(mdsc, session); done: diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 2bb9264b9225..76eb14489bfa 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -458,7 +458,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_cap_reconnect = 0; s->s_cap_iterator = NULL; INIT_LIST_HEAD(&s->s_cap_releases); - INIT_LIST_HEAD(&s->s_cap_releases_done); INIT_LIST_HEAD(&s->s_cap_flushing); INIT_LIST_HEAD(&s->s_cap_snaps_flushing); @@ -998,27 +997,25 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, * session caps */ -/* - * Free preallocated cap messages assigned to this session - */ -static void cleanup_cap_releases(struct ceph_mds_session *session) +/* caller holds s_cap_lock, we drop it */ +static void cleanup_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session) + __releases(session->s_cap_lock) { - struct ceph_msg *msg; - - spin_lock(&session->s_cap_lock); - while (!list_empty(&session->s_cap_releases)) { - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, list_head); - list_del_init(&msg->list_head); - ceph_msg_put(msg); - } - while (!list_empty(&session->s_cap_releases_done)) { - msg = list_first_entry(&session->s_cap_releases_done, - struct ceph_msg, list_head); - list_del_init(&msg->list_head); - ceph_msg_put(msg); - } + LIST_HEAD(tmp_list); + list_splice_init(&session->s_cap_releases, &tmp_list); + session->s_num_cap_releases = 0; spin_unlock(&session->s_cap_lock); + + dout("cleanup_cap_releases mds%d\n", session->s_mds); + while (!list_empty(&tmp_list)) { + struct ceph_cap *cap; + /* zero out the in-progress message */ + cap = list_first_entry(&tmp_list, + struct ceph_cap, session_caps); + list_del(&cap->session_caps); + ceph_put_cap(mdsc, cap); + } } static void cleanup_session_requests(struct ceph_mds_client *mdsc, @@ -1095,10 +1092,16 @@ static int iterate_session_caps(struct ceph_mds_session *session, dout("iterate_session_caps finishing cap %p removal\n", cap); BUG_ON(cap->session != session); + cap->session = NULL; list_del_init(&cap->session_caps); session->s_nr_caps--; - cap->session = NULL; - old_cap = cap; /* put_cap it w/o locks held */ + if (cap->queue_release) { + list_add_tail(&cap->session_caps, + &session->s_cap_releases); + session->s_num_cap_releases++; + } else { + old_cap = cap; /* put_cap it w/o locks held */ + } } if (ret < 0) goto out; @@ -1191,11 +1194,12 @@ static void remove_session_caps(struct ceph_mds_session *session) spin_lock(&session->s_cap_lock); } } - spin_unlock(&session->s_cap_lock); + + // drop cap expires and unlock s_cap_lock + cleanup_cap_releases(session->s_mdsc, session); BUG_ON(session->s_nr_caps > 0); BUG_ON(!list_empty(&session->s_cap_flushing)); - cleanup_cap_releases(session); } /* @@ -1418,76 +1422,10 @@ static int trim_caps(struct ceph_mds_client *mdsc, session->s_trim_caps = 0; } - ceph_add_cap_releases(mdsc, session); ceph_send_cap_releases(mdsc, session); return 0; } -/* - * Allocate cap_release messages. If there is a partially full message - * in the queue, try to allocate enough to cover it's remainder, so that - * we can send it immediately. - * - * Called under s_mutex. - */ -int ceph_add_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) -{ - struct ceph_msg *msg, *partial = NULL; - struct ceph_mds_cap_release *head; - int err = -ENOMEM; - int extra = mdsc->fsc->mount_options->cap_release_safety; - int num; - - dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, - extra); - - spin_lock(&session->s_cap_lock); - - if (!list_empty(&session->s_cap_releases)) { - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, - list_head); - head = msg->front.iov_base; - num = le32_to_cpu(head->num); - if (num) { - dout(" partial %p with (%d/%d)\n", msg, num, - (int)CEPH_CAPS_PER_RELEASE); - extra += CEPH_CAPS_PER_RELEASE - num; - partial = msg; - } - } - while (session->s_num_cap_releases < session->s_nr_caps + extra) { - spin_unlock(&session->s_cap_lock); - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, - GFP_NOFS, false); - if (!msg) - goto out_unlocked; - dout("add_cap_releases %p msg %p now %d\n", session, msg, - (int)msg->front.iov_len); - head = msg->front.iov_base; - head->num = cpu_to_le32(0); - msg->front.iov_len = sizeof(*head); - spin_lock(&session->s_cap_lock); - list_add(&msg->list_head, &session->s_cap_releases); - session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE; - } - - if (partial) { - head = partial->front.iov_base; - num = le32_to_cpu(head->num); - dout(" queueing partial %p with %d/%d\n", partial, num, - (int)CEPH_CAPS_PER_RELEASE); - list_move_tail(&partial->list_head, - &session->s_cap_releases_done); - session->s_num_cap_releases -= CEPH_CAPS_PER_RELEASE - num; - } - err = 0; - spin_unlock(&session->s_cap_lock); -out_unlocked: - return err; -} - static int check_cap_flush(struct ceph_inode_info *ci, u64 want_flush_seq, u64 want_snap_seq) { @@ -1590,62 +1528,76 @@ static void wait_caps_flush(struct ceph_mds_client *mdsc, void ceph_send_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { - struct ceph_msg *msg; + struct ceph_msg *msg = NULL; + struct ceph_mds_cap_release *head; + struct ceph_mds_cap_item *item; + struct ceph_cap *cap; + LIST_HEAD(tmp_list); + int num_cap_releases; - dout("send_cap_releases mds%d\n", session->s_mds); spin_lock(&session->s_cap_lock); - while (!list_empty(&session->s_cap_releases_done)) { - msg = list_first_entry(&session->s_cap_releases_done, - struct ceph_msg, list_head); - list_del_init(&msg->list_head); - spin_unlock(&session->s_cap_lock); +again: + list_splice_init(&session->s_cap_releases, &tmp_list); + num_cap_releases = session->s_num_cap_releases; + session->s_num_cap_releases = 0; + spin_unlock(&session->s_cap_lock); + + while (!list_empty(&tmp_list)) { + if (!msg) { + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, + PAGE_CACHE_SIZE, GFP_NOFS, false); + if (!msg) + goto out_err; + head = msg->front.iov_base; + head->num = cpu_to_le32(0); + msg->front.iov_len = sizeof(*head); + } + cap = list_first_entry(&tmp_list, struct ceph_cap, + session_caps); + list_del(&cap->session_caps); + num_cap_releases--; + + head = msg->front.iov_base; + le32_add_cpu(&head->num, 1); + item = msg->front.iov_base + msg->front.iov_len; + item->ino = cpu_to_le64(cap->cap_ino); + item->cap_id = cpu_to_le64(cap->cap_id); + item->migrate_seq = cpu_to_le32(cap->mseq); + item->seq = cpu_to_le32(cap->issue_seq); + msg->front.iov_len += sizeof(*item); + + ceph_put_cap(mdsc, cap); + + if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); + dout("send_cap_releases mds%d %p\n", session->s_mds, msg); + ceph_con_send(&session->s_con, msg); + msg = NULL; + } + } + + BUG_ON(num_cap_releases != 0); + + spin_lock(&session->s_cap_lock); + if (!list_empty(&session->s_cap_releases)) + goto again; + spin_unlock(&session->s_cap_lock); + + if (msg) { msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); dout("send_cap_releases mds%d %p\n", session->s_mds, msg); ceph_con_send(&session->s_con, msg); - spin_lock(&session->s_cap_lock); } + return; +out_err: + pr_err("send_cap_releases mds%d, failed to allocate message\n", + session->s_mds); + spin_lock(&session->s_cap_lock); + list_splice(&tmp_list, &session->s_cap_releases); + session->s_num_cap_releases += num_cap_releases; spin_unlock(&session->s_cap_lock); } -static void discard_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) -{ - struct ceph_msg *msg; - struct ceph_mds_cap_release *head; - unsigned num; - - dout("discard_cap_releases mds%d\n", session->s_mds); - - if (!list_empty(&session->s_cap_releases)) { - /* zero out the in-progress message */ - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, list_head); - head = msg->front.iov_base; - num = le32_to_cpu(head->num); - dout("discard_cap_releases mds%d %p %u\n", - session->s_mds, msg, num); - head->num = cpu_to_le32(0); - msg->front.iov_len = sizeof(*head); - session->s_num_cap_releases += num; - } - - /* requeue completed messages */ - while (!list_empty(&session->s_cap_releases_done)) { - msg = list_first_entry(&session->s_cap_releases_done, - struct ceph_msg, list_head); - list_del_init(&msg->list_head); - - head = msg->front.iov_base; - num = le32_to_cpu(head->num); - dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, - num); - session->s_num_cap_releases += num; - head->num = cpu_to_le32(0); - msg->front.iov_len = sizeof(*head); - list_add(&msg->list_head, &session->s_cap_releases); - } -} - /* * requests */ @@ -2529,7 +2481,6 @@ out_err: } mutex_unlock(&mdsc->mutex); - ceph_add_cap_releases(mdsc, req->r_session); mutex_unlock(&session->s_mutex); /* kick calling process */ @@ -2921,8 +2872,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, */ session->s_cap_reconnect = 1; /* drop old cap expires; we're about to reestablish that state */ - discard_cap_releases(mdsc, session); - spin_unlock(&session->s_cap_lock); + cleanup_cap_releases(mdsc, session); /* trim unused caps to reduce MDS's cache rejoin time */ if (mdsc->fsc->sb->s_root) @@ -3385,7 +3335,6 @@ static void delayed_work(struct work_struct *work) send_renew_caps(mdsc, s); else ceph_con_keepalive(&s->s_con); - ceph_add_cap_releases(mdsc, s); if (s->s_state == CEPH_MDS_SESSION_OPEN || s->s_state == CEPH_MDS_SESSION_HUNG) ceph_send_cap_releases(mdsc, s); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index bf24d88cfeb2..294fa23a7df6 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -139,7 +139,6 @@ struct ceph_mds_session { int s_cap_reconnect; int s_readonly; struct list_head s_cap_releases; /* waiting cap_release messages */ - struct list_head s_cap_releases_done; /* ready to send */ struct ceph_cap *s_cap_iterator; /* protected by mutex */ @@ -389,8 +388,6 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) kref_put(&req->r_kref, ceph_mdsc_release_request); } -extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session); extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 4ef1ae92c2a6..c4961353d058 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -122,11 +122,21 @@ struct ceph_cap { struct rb_node ci_node; /* per-ci cap tree */ struct ceph_mds_session *session; struct list_head session_caps; /* per-session caplist */ - int mds; u64 cap_id; /* unique cap id (mds provided) */ - int issued; /* latest, from the mds */ - int implemented; /* implemented superset of issued (for revocation) */ - int mds_wanted; + union { + /* in-use caps */ + struct { + int issued; /* latest, from the mds */ + int implemented; /* implemented superset of + issued (for revocation) */ + int mds, mds_wanted; + }; + /* caps to release */ + struct { + u64 cap_ino; + int queue_release; + }; + }; u32 seq, issue_seq, mseq; u32 cap_gen; /* active/stale cycle */ unsigned long last_used; @@ -845,8 +855,6 @@ extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); extern int ceph_is_any_caps(struct inode *inode); -extern void __queue_cap_release(struct ceph_mds_session *session, u64 ino, - u64 cap_id, u32 migrate_seq, u32 issue_seq); extern void ceph_queue_caps_release(struct inode *inode); extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); extern int ceph_fsync(struct file *file, loff_t start, loff_t end, From e8a7b8b12b13831467c6158c1e82801e25b5dd98 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 19 May 2015 18:54:40 +0800 Subject: [PATCH 271/839] ceph: exclude setfilelock requests when calculating oldest tid setfilelock requests can block for a long time, which can prevent client from advancing its oldest tid. Signed-off-by: Yan, Zheng --- fs/ceph/mds_client.c | 30 +++++++++++++++++++++++------- fs/ceph/mds_client.h | 2 ++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 76eb14489bfa..69a36f40517f 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -628,6 +628,9 @@ static void __register_request(struct ceph_mds_client *mdsc, req->r_uid = current_fsuid(); req->r_gid = current_fsgid(); + if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK) + mdsc->oldest_tid = req->r_tid; + if (dir) { struct ceph_inode_info *ci = ceph_inode(dir); @@ -643,6 +646,21 @@ static void __unregister_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { dout("__unregister_request %p tid %lld\n", req, req->r_tid); + + if (req->r_tid == mdsc->oldest_tid) { + struct rb_node *p = rb_next(&req->r_node); + mdsc->oldest_tid = 0; + while (p) { + struct ceph_mds_request *next_req = + rb_entry(p, struct ceph_mds_request, r_node); + if (next_req->r_op != CEPH_MDS_OP_SETFILELOCK) { + mdsc->oldest_tid = next_req->r_tid; + break; + } + p = rb_next(p); + } + } + rb_erase(&req->r_node, &mdsc->request_tree); RB_CLEAR_NODE(&req->r_node); @@ -1682,13 +1700,9 @@ static struct ceph_mds_request *__get_oldest_req(struct ceph_mds_client *mdsc) struct ceph_mds_request, r_node); } -static u64 __get_oldest_tid(struct ceph_mds_client *mdsc) +static inline u64 __get_oldest_tid(struct ceph_mds_client *mdsc) { - struct ceph_mds_request *req = __get_oldest_req(mdsc); - - if (req) - return req->r_tid; - return 0; + return mdsc->oldest_tid; } /* @@ -3378,6 +3392,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) INIT_LIST_HEAD(&mdsc->snap_empty); spin_lock_init(&mdsc->snap_empty_lock); mdsc->last_tid = 0; + mdsc->oldest_tid = 0; mdsc->request_tree = RB_ROOT; INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); mdsc->last_renew_caps = jiffies; @@ -3471,7 +3486,8 @@ restart: nextreq = rb_entry(n, struct ceph_mds_request, r_node); else nextreq = NULL; - if ((req->r_op & CEPH_MDS_OP_WRITE)) { + if (req->r_op != CEPH_MDS_OP_SETFILELOCK && + (req->r_op & CEPH_MDS_OP_WRITE)) { /* write op */ ceph_mdsc_get_request(req); if (nextreq) diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 294fa23a7df6..2ef799961ebb 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -296,6 +296,8 @@ struct ceph_mds_client { spinlock_t snap_empty_lock; /* protect snap_empty */ u64 last_tid; /* most recent mds request */ + u64 oldest_tid; /* oldest incomplete mds request, + excluding setfilelock requests */ struct rb_root request_tree; /* pending mds requests */ struct delayed_work delayed_work; /* delayed work */ unsigned long last_renew_caps; /* last time we renewed our caps */ From d50c97b566c5bbf990eff472e9feaa58fdebdd33 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 15 May 2015 11:52:20 +0300 Subject: [PATCH 272/839] libceph: nuke time_sub() Unused since ceph got merged into mainline I guess. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/libceph.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 30f92cefaa72..85ae9a889a3f 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -93,15 +93,6 @@ enum { CEPH_MOUNT_SHUTDOWN, }; -/* - * subtract jiffies - */ -static inline unsigned long time_sub(unsigned long a, unsigned long b) -{ - BUG_ON(time_after(b, a)); - return (long)a - (long)b; -} - struct ceph_mds_client; /* From a319bf56a617354e62cf5f774d2ca4e1a8a3bff3 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 15 May 2015 12:02:17 +0300 Subject: [PATCH 273/839] libceph: store timeouts in jiffies, verify user input There are currently three libceph-level timeouts that the user can specify on mount: mount_timeout, osd_idle_ttl and osdkeepalive. All of these are in seconds and no checking is done on user input: negative values are accepted, we multiply them all by HZ which may or may not overflow, arbitrarily large jiffies then get added together, etc. There is also a bug in the way mount_timeout=0 is handled. It's supposed to mean "infinite timeout", but that's not how wait.h APIs treat it and so __ceph_open_session() for example will busy loop without much chance of being interrupted if none of ceph-mons are there. Fix all this by verifying user input, storing timeouts capped by msecs_to_jiffies() in jiffies and using the new ceph_timeout_jiffies() helper for all user-specified waits to handle infinite timeouts correctly. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- drivers/block/rbd.c | 5 +++-- fs/ceph/dir.c | 4 ++-- fs/ceph/mds_client.c | 12 +++++------ fs/ceph/mds_client.h | 2 +- fs/ceph/super.c | 2 +- include/linux/ceph/libceph.h | 17 +++++++++------ net/ceph/ceph_common.c | 41 +++++++++++++++++++++++++++--------- net/ceph/mon_client.c | 11 ++++++++-- net/ceph/osd_client.c | 15 ++++++------- 9 files changed, 71 insertions(+), 38 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 349115ae3bc2..992683b6b299 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4963,8 +4963,8 @@ out_err: */ static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name) { + struct ceph_options *opts = rbdc->client->options; u64 newest_epoch; - unsigned long timeout = rbdc->client->options->mount_timeout * HZ; int tries = 0; int ret; @@ -4979,7 +4979,8 @@ again: if (rbdc->client->osdc.osdmap->epoch < newest_epoch) { ceph_monc_request_next_osdmap(&rbdc->client->monc); (void) ceph_monc_wait_osdmap(&rbdc->client->monc, - newest_epoch, timeout); + newest_epoch, + opts->mount_timeout); goto again; } else { /* the osdmap we have is new enough */ diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 4248307fea90..173dd4b58c71 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1259,8 +1259,8 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, inode, req->r_tid, last_tid); if (req->r_timeout) { unsigned long time_left = wait_for_completion_timeout( - &req->r_safe_completion, - req->r_timeout); + &req->r_safe_completion, + ceph_timeout_jiffies(req->r_timeout)); if (time_left > 0) ret = 0; else diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 69a36f40517f..0b0e0a9a81c0 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2268,7 +2268,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, dout("do_request waiting\n"); if (req->r_timeout) { err = (long)wait_for_completion_killable_timeout( - &req->r_completion, req->r_timeout); + &req->r_completion, + ceph_timeout_jiffies(req->r_timeout)); if (err == 0) err = -EIO; } else if (req->r_wait_for_completion) { @@ -3424,8 +3425,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) */ static void wait_requests(struct ceph_mds_client *mdsc) { + struct ceph_options *opts = mdsc->fsc->client->options; struct ceph_mds_request *req; - struct ceph_fs_client *fsc = mdsc->fsc; mutex_lock(&mdsc->mutex); if (__get_oldest_req(mdsc)) { @@ -3433,7 +3434,7 @@ static void wait_requests(struct ceph_mds_client *mdsc) dout("wait_requests waiting for requests\n"); wait_for_completion_timeout(&mdsc->safe_umount_waiters, - fsc->client->options->mount_timeout * HZ); + ceph_timeout_jiffies(opts->mount_timeout)); /* tear down remaining requests */ mutex_lock(&mdsc->mutex); @@ -3556,10 +3557,9 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc) */ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) { + struct ceph_options *opts = mdsc->fsc->client->options; struct ceph_mds_session *session; int i; - struct ceph_fs_client *fsc = mdsc->fsc; - unsigned long timeout = fsc->client->options->mount_timeout * HZ; dout("close_sessions\n"); @@ -3580,7 +3580,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) dout("waiting for sessions to close\n"); wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc), - timeout); + ceph_timeout_jiffies(opts->mount_timeout)); /* tear down remaining sessions */ mutex_lock(&mdsc->mutex); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 2ef799961ebb..509d6822e9b1 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -227,7 +227,7 @@ struct ceph_mds_request { int r_err; bool r_aborted; - unsigned long r_timeout; /* optional. jiffies */ + unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */ unsigned long r_started; /* start time to measure timeout against */ unsigned long r_request_started; /* start time for mds request only, used to measure lease durations */ diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9a5350030af8..edeb83c43112 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -742,7 +742,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, req->r_ino1.ino = CEPH_INO_ROOT; req->r_ino1.snap = CEPH_NOSNAP; req->r_started = started; - req->r_timeout = fsc->client->options->mount_timeout * HZ; + req->r_timeout = fsc->client->options->mount_timeout; req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); req->r_num_caps = 2; err = ceph_mdsc_do_request(mdsc, NULL, req); diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 85ae9a889a3f..d73a569f9bf5 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -43,9 +43,9 @@ struct ceph_options { int flags; struct ceph_fsid fsid; struct ceph_entity_addr my_addr; - int mount_timeout; - int osd_idle_ttl; - int osd_keepalive_timeout; + unsigned long mount_timeout; /* jiffies */ + unsigned long osd_idle_ttl; /* jiffies */ + unsigned long osd_keepalive_timeout; /* jiffies */ /* * any type that can't be simply compared or doesn't need need @@ -63,9 +63,9 @@ struct ceph_options { /* * defaults */ -#define CEPH_MOUNT_TIMEOUT_DEFAULT 60 -#define CEPH_OSD_KEEPALIVE_DEFAULT 5 -#define CEPH_OSD_IDLE_TTL_DEFAULT 60 +#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) +#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) +#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) @@ -93,6 +93,11 @@ enum { CEPH_MOUNT_SHUTDOWN, }; +static inline unsigned long ceph_timeout_jiffies(unsigned long timeout) +{ + return timeout ?: MAX_SCHEDULE_TIMEOUT; +} + struct ceph_mds_client; /* diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 79e8f71aef5b..a80e91c2c9a3 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -352,8 +352,8 @@ ceph_parse_options(char *options, const char *dev_name, /* start with defaults */ opt->flags = CEPH_OPT_DEFAULT; opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; - opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ - opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ + opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; + opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* get mon ip(s) */ /* ip1[:port1][,ip2[:port2]...] */ @@ -439,13 +439,32 @@ ceph_parse_options(char *options, const char *dev_name, pr_warn("ignoring deprecated osdtimeout option\n"); break; case Opt_osdkeepalivetimeout: - opt->osd_keepalive_timeout = intval; + /* 0 isn't well defined right now, reject it */ + if (intval < 1 || intval > INT_MAX / 1000) { + pr_err("osdkeepalive out of range\n"); + err = -EINVAL; + goto out; + } + opt->osd_keepalive_timeout = + msecs_to_jiffies(intval * 1000); break; case Opt_osd_idle_ttl: - opt->osd_idle_ttl = intval; + /* 0 isn't well defined right now, reject it */ + if (intval < 1 || intval > INT_MAX / 1000) { + pr_err("osd_idle_ttl out of range\n"); + err = -EINVAL; + goto out; + } + opt->osd_idle_ttl = msecs_to_jiffies(intval * 1000); break; case Opt_mount_timeout: - opt->mount_timeout = intval; + /* 0 is "wait forever" (i.e. infinite timeout) */ + if (intval < 0 || intval > INT_MAX / 1000) { + pr_err("mount_timeout out of range\n"); + err = -EINVAL; + goto out; + } + opt->mount_timeout = msecs_to_jiffies(intval * 1000); break; case Opt_share: @@ -512,12 +531,14 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) seq_puts(m, "notcp_nodelay,"); if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) - seq_printf(m, "mount_timeout=%d,", opt->mount_timeout); + seq_printf(m, "mount_timeout=%d,", + jiffies_to_msecs(opt->mount_timeout) / 1000); if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) - seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl); + seq_printf(m, "osd_idle_ttl=%d,", + jiffies_to_msecs(opt->osd_idle_ttl) / 1000); if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) seq_printf(m, "osdkeepalivetimeout=%d,", - opt->osd_keepalive_timeout); + jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000); /* drop redundant comma */ if (m->count != pos) @@ -627,7 +648,7 @@ static int have_mon_and_osd_map(struct ceph_client *client) int __ceph_open_session(struct ceph_client *client, unsigned long started) { int err; - unsigned long timeout = client->options->mount_timeout * HZ; + unsigned long timeout = client->options->mount_timeout; /* open session, and wait for mon and osd maps */ err = ceph_monc_open_session(&client->monc); @@ -643,7 +664,7 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started) dout("mount waiting for mon_map\n"); err = wait_event_interruptible_timeout(client->auth_wq, have_mon_and_osd_map(client) || (client->auth_err < 0), - timeout); + ceph_timeout_jiffies(timeout)); if (err == -EINTR || err == -ERESTARTSYS) return err; if (client->auth_err < 0) diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 2b3cf05e87b0..0da3bdc116f7 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -298,6 +298,12 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) } EXPORT_SYMBOL(ceph_monc_request_next_osdmap); +/* + * Wait for an osdmap with a given epoch. + * + * @epoch: epoch to wait for + * @timeout: in jiffies, 0 means "wait forever" + */ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, unsigned long timeout) { @@ -308,11 +314,12 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, while (monc->have_osdmap < epoch) { mutex_unlock(&monc->mutex); - if (timeout != 0 && time_after_eq(jiffies, started + timeout)) + if (timeout && time_after_eq(jiffies, started + timeout)) return -ETIMEDOUT; ret = wait_event_interruptible_timeout(monc->client->auth_wq, - monc->have_osdmap >= epoch, timeout); + monc->have_osdmap >= epoch, + ceph_timeout_jiffies(timeout)); if (ret < 0) return ret; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 4cb4fab46e4f..50033677c0fa 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1097,7 +1097,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc, BUG_ON(!list_empty(&osd->o_osd_lru)); list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); - osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; + osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl; } static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc, @@ -1208,7 +1208,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o) static void __schedule_osd_timeout(struct ceph_osd_client *osdc) { schedule_delayed_work(&osdc->timeout_work, - osdc->client->options->osd_keepalive_timeout * HZ); + osdc->client->options->osd_keepalive_timeout); } static void __cancel_osd_timeout(struct ceph_osd_client *osdc) @@ -1576,10 +1576,9 @@ static void handle_timeout(struct work_struct *work) { struct ceph_osd_client *osdc = container_of(work, struct ceph_osd_client, timeout_work.work); + struct ceph_options *opts = osdc->client->options; struct ceph_osd_request *req; struct ceph_osd *osd; - unsigned long keepalive = - osdc->client->options->osd_keepalive_timeout * HZ; struct list_head slow_osds; dout("timeout\n"); down_read(&osdc->map_sem); @@ -1595,7 +1594,8 @@ static void handle_timeout(struct work_struct *work) */ INIT_LIST_HEAD(&slow_osds); list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { - if (time_before(jiffies, req->r_stamp + keepalive)) + if (time_before(jiffies, + req->r_stamp + opts->osd_keepalive_timeout)) break; osd = req->r_osd; @@ -1622,8 +1622,7 @@ static void handle_osds_timeout(struct work_struct *work) struct ceph_osd_client *osdc = container_of(work, struct ceph_osd_client, osds_timeout_work.work); - unsigned long delay = - osdc->client->options->osd_idle_ttl * HZ >> 2; + unsigned long delay = osdc->client->options->osd_idle_ttl / 4; dout("osds timeout\n"); down_read(&osdc->map_sem); @@ -2628,7 +2627,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) osdc->event_count = 0; schedule_delayed_work(&osdc->osds_timeout_work, - round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); + round_jiffies_relative(osdc->client->options->osd_idle_ttl)); err = -ENOMEM; osdc->req_mempool = mempool_create_kmalloc_pool(10, From 216639dd5091de4f4d7ad19b0b8dde11fad18286 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 19 May 2015 12:03:33 +0300 Subject: [PATCH 274/839] libceph: a couple tweaks for wait loops - return -ETIMEDOUT instead of -EIO in case of timeout - wait_event_interruptible_timeout() returns time left until timeout and since it can be almost LONG_MAX we had better assign it to long Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- net/ceph/ceph_common.c | 7 +++---- net/ceph/mon_client.c | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index a80e91c2c9a3..925d0c890b80 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -647,8 +647,8 @@ static int have_mon_and_osd_map(struct ceph_client *client) */ int __ceph_open_session(struct ceph_client *client, unsigned long started) { - int err; unsigned long timeout = client->options->mount_timeout; + long err; /* open session, and wait for mon and osd maps */ err = ceph_monc_open_session(&client->monc); @@ -656,16 +656,15 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started) return err; while (!have_mon_and_osd_map(client)) { - err = -EIO; if (timeout && time_after_eq(jiffies, started + timeout)) - return err; + return -ETIMEDOUT; /* wait */ dout("mount waiting for mon_map\n"); err = wait_event_interruptible_timeout(client->auth_wq, have_mon_and_osd_map(client) || (client->auth_err < 0), ceph_timeout_jiffies(timeout)); - if (err == -EINTR || err == -ERESTARTSYS) + if (err < 0) return err; if (client->auth_err < 0) return client->auth_err; diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 0da3bdc116f7..9d6ff1215928 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -308,7 +308,7 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, unsigned long timeout) { unsigned long started = jiffies; - int ret; + long ret; mutex_lock(&monc->mutex); while (monc->have_osdmap < epoch) { From 5be73034771c8f18b241f1974803865a4de2cad1 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 19 May 2015 12:05:38 +0300 Subject: [PATCH 275/839] ceph: simplify two mount_timeout sites No need to bifurcate wait now that we've got ceph_timeout_jiffies(). Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder Reviewed-by: Yan, Zheng --- fs/ceph/dir.c | 14 ++++---------- fs/ceph/mds_client.c | 18 ++++++++++-------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 173dd4b58c71..3dec27e36417 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1257,17 +1257,11 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, dout("dir_fsync %p wait on tid %llu (until %llu)\n", inode, req->r_tid, last_tid); - if (req->r_timeout) { - unsigned long time_left = wait_for_completion_timeout( - &req->r_safe_completion, + ret = !wait_for_completion_timeout(&req->r_safe_completion, ceph_timeout_jiffies(req->r_timeout)); - if (time_left > 0) - ret = 0; - else - ret = -EIO; /* timed out */ - } else { - wait_for_completion(&req->r_safe_completion); - } + if (ret) + ret = -EIO; /* timed out */ + ceph_mdsc_put_request(req); spin_lock(&ci->i_unsafe_lock); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 0b0e0a9a81c0..5be2d287a26c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2266,16 +2266,18 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, /* wait */ mutex_unlock(&mdsc->mutex); dout("do_request waiting\n"); - if (req->r_timeout) { - err = (long)wait_for_completion_killable_timeout( - &req->r_completion, - ceph_timeout_jiffies(req->r_timeout)); - if (err == 0) - err = -EIO; - } else if (req->r_wait_for_completion) { + if (!req->r_timeout && req->r_wait_for_completion) { err = req->r_wait_for_completion(mdsc, req); } else { - err = wait_for_completion_killable(&req->r_completion); + long timeleft = wait_for_completion_killable_timeout( + &req->r_completion, + ceph_timeout_jiffies(req->r_timeout)); + if (timeleft > 0) + err = 0; + else if (!timeleft) + err = -EIO; /* timed out */ + else + err = timeleft; /* killed */ } dout("do_request waited, got %d\n", err); mutex_lock(&mdsc->mutex); From 2894e1d769743eb583bf8dc6be149f64a7c6a798 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 12 May 2015 19:53:24 +0300 Subject: [PATCH 276/839] rbd: timeout watch teardown on unmap with mount_timeout As part of unmap sequence, kernel client has to talk to the OSDs to teardown watch on the header object. If none of the OSDs are available it would hang forever, until interrupted by a signal - when that happens we follow through with the rest of unmap procedure (i.e. unregister the device and put all the data structures) and the unmap is still considired successful (rbd cli tool exits with 0). The watch on the userspace side should eventually timeout so that's fine. This isn't very nice, because various userspace tools (pacemaker rbd resource agent, for example) then have to worry about setting up their own timeouts. Timeout it with mount_timeout (60 seconds by default). Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder Reviewed-by: Sage Weil --- drivers/block/rbd.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 992683b6b299..89fe8a4bc02e 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1563,22 +1563,39 @@ static void rbd_obj_request_end(struct rbd_obj_request *obj_request) /* * Wait for an object request to complete. If interrupted, cancel the * underlying osd request. + * + * @timeout: in jiffies, 0 means "wait forever" */ -static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) +static int __rbd_obj_request_wait(struct rbd_obj_request *obj_request, + unsigned long timeout) { - int ret; + long ret; dout("%s %p\n", __func__, obj_request); - - ret = wait_for_completion_interruptible(&obj_request->completion); - if (ret < 0) { - dout("%s %p interrupted\n", __func__, obj_request); + ret = wait_for_completion_interruptible_timeout( + &obj_request->completion, + ceph_timeout_jiffies(timeout)); + if (ret <= 0) { + if (ret == 0) + ret = -ETIMEDOUT; rbd_obj_request_end(obj_request); - return ret; + } else { + ret = 0; } - dout("%s %p done\n", __func__, obj_request); - return 0; + dout("%s %p ret %d\n", __func__, obj_request, (int)ret); + return ret; +} + +static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) +{ + return __rbd_obj_request_wait(obj_request, 0); +} + +static int rbd_obj_request_wait_timeout(struct rbd_obj_request *obj_request, + unsigned long timeout) +{ + return __rbd_obj_request_wait(obj_request, timeout); } static void rbd_img_request_complete(struct rbd_img_request *img_request) @@ -3122,6 +3139,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper( bool watch) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct ceph_options *opts = osdc->client->options; struct rbd_obj_request *obj_request; int ret; @@ -3148,7 +3166,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper( if (ret) goto out; - ret = rbd_obj_request_wait(obj_request); + ret = rbd_obj_request_wait_timeout(obj_request, opts->mount_timeout); if (ret) goto out; From 3e0708b990f7e46d87d47b3b06de322490f2f2ee Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 22 May 2015 16:38:02 +0800 Subject: [PATCH 277/839] ceph: ratelimit warn messages for MDS closes session Signed-off-by: Yan, Zheng --- fs/ceph/mds_client.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 5be2d287a26c..8080d486a991 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "super.h" #include "mds_client.h" @@ -1048,7 +1049,8 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, req = list_first_entry(&session->s_unsafe, struct ceph_mds_request, r_unsafe_item); list_del_init(&req->r_unsafe_item); - pr_info(" dropping unsafe request %llu\n", req->r_tid); + pr_warn_ratelimited(" dropping unsafe request %llu\n", + req->r_tid); __unregister_request(mdsc, req); } /* zero r_attempts, so kick_requests() will re-send requests */ @@ -1152,7 +1154,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, spin_lock(&mdsc->cap_dirty_lock); if (!list_empty(&ci->i_dirty_item)) { - pr_info(" dropping dirty %s state for %p %lld\n", + pr_warn_ratelimited( + " dropping dirty %s state for %p %lld\n", ceph_cap_string(ci->i_dirty_caps), inode, ceph_ino(inode)); ci->i_dirty_caps = 0; @@ -1160,7 +1163,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, drop = 1; } if (!list_empty(&ci->i_flushing_item)) { - pr_info(" dropping dirty+flushing %s state for %p %lld\n", + pr_warn_ratelimited( + " dropping dirty+flushing %s state for %p %lld\n", ceph_cap_string(ci->i_flushing_caps), inode, ceph_ino(inode)); ci->i_flushing_caps = 0; From 41445999aeec1f0fdf196ab55b2c770473b2ea01 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 25 May 2015 17:36:42 +0800 Subject: [PATCH 278/839] ceph: don't include used caps in cap_wanted when copying files to cephfs, file data may stay in page cache after corresponding file is closed. Cached data use Fc capability. If we include Fc capability in cap_wanted, MDS will treat files with cached data as open files, and journal them in an EOpen event when trimming log segment. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 245ca381a6dc..a80a899e5c41 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1525,13 +1525,13 @@ retry: retry_locked: file_wanted = __ceph_caps_file_wanted(ci); used = __ceph_caps_used(ci); - want = file_wanted | used; issued = __ceph_caps_issued(ci, &implemented); revoking = implemented & ~issued; - retain = want | CEPH_CAP_PIN; + want = file_wanted; + retain = file_wanted | used | CEPH_CAP_PIN; if (!mdsc->stopping && inode->i_nlink > 0) { - if (want) { + if (file_wanted) { retain |= CEPH_CAP_ANY; /* be greedy */ } else if (S_ISDIR(inode->i_mode) && (issued & CEPH_CAP_FILE_SHARED) && From 89b52fe14de4d703ba837a7418bb4cd286dcc87f Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 27 May 2015 09:59:48 +0800 Subject: [PATCH 279/839] ceph: fix flushing caps Current ceph_fsync() only flushes dirty caps and wait for them to be flushed. It doesn't wait for caps that has already been flushing. This patch makes ceph_fsync() wait for pending flushing caps too. Besides, this patch also makes caps_are_flushed() peroperly handle tid wrapping. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a80a899e5c41..e9b03b51b874 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1097,8 +1097,7 @@ void ceph_queue_caps_release(struct inode *inode) * caller should hold snap_rwsem (read), s_mutex. */ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, - int op, int used, int want, int retain, int flushing, - unsigned *pflush_tid) + int op, int used, int want, int retain, int flushing) __releases(cap->ci->i_ceph_lock) { struct ceph_inode_info *ci = cap->ci; @@ -1170,8 +1169,6 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, * first ack clean Ax. */ flush_tid = ++ci->i_cap_flush_last_tid; - if (pflush_tid) - *pflush_tid = flush_tid; dout(" cap_flush_tid %d\n", (int)flush_tid); for (i = 0; i < CEPH_CAP_BITS; i++) if (flushing & (1 << i)) @@ -1724,7 +1721,7 @@ ack: /* __send_cap drops i_ceph_lock */ delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used, - want, retain, flushing, NULL); + want, retain, flushing); goto retry; /* retake i_ceph_lock and restart our cap scan. */ } @@ -1753,12 +1750,12 @@ ack: /* * Try to flush dirty caps back to the auth mds. */ -static int try_flush_caps(struct inode *inode, unsigned *flush_tid) +static int try_flush_caps(struct inode *inode, u16 flush_tid[]) { struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); - int flushing = 0; struct ceph_mds_session *session = NULL; + int flushing = 0; retry: spin_lock(&ci->i_ceph_lock); @@ -1787,17 +1784,19 @@ retry: /* __send_cap drops i_ceph_lock */ delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want, - cap->issued | cap->implemented, flushing, - flush_tid); - if (!delayed) - goto out_unlocked; + cap->issued | cap->implemented, flushing); spin_lock(&ci->i_ceph_lock); - __cap_delay_requeue(mdsc, ci); + if (delayed) + __cap_delay_requeue(mdsc, ci); } + + flushing = ci->i_flushing_caps; + if (flushing) + memcpy(flush_tid, ci->i_cap_flush_tid, + sizeof(ci->i_cap_flush_tid)); out: spin_unlock(&ci->i_ceph_lock); -out_unlocked: if (session) mutex_unlock(&session->s_mutex); return flushing; @@ -1806,19 +1805,22 @@ out_unlocked: /* * Return true if we've flushed caps through the given flush_tid. */ -static int caps_are_flushed(struct inode *inode, unsigned tid) +static int caps_are_flushed(struct inode *inode, u16 flush_tid[]) { struct ceph_inode_info *ci = ceph_inode(inode); int i, ret = 1; spin_lock(&ci->i_ceph_lock); - for (i = 0; i < CEPH_CAP_BITS; i++) - if ((ci->i_flushing_caps & (1 << i)) && - ci->i_cap_flush_tid[i] <= tid) { + for (i = 0; i < CEPH_CAP_BITS; i++) { + if (!(ci->i_flushing_caps & (1 << i))) + continue; + // tid only has 16 bits. we need to handle wrapping + if ((s16)(ci->i_cap_flush_tid[i] - flush_tid[i]) <= 0) { /* still flushing this bit */ ret = 0; break; } + } spin_unlock(&ci->i_ceph_lock); return ret; } @@ -1871,7 +1873,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); - unsigned flush_tid; + u16 flush_tid[CEPH_CAP_BITS]; int ret; int dirty; @@ -1883,7 +1885,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) return ret; mutex_lock(&inode->i_mutex); - dirty = try_flush_caps(inode, &flush_tid); + dirty = try_flush_caps(inode, flush_tid); dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); /* @@ -1892,7 +1894,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) * wait for that) */ if (!datasync && (dirty & ~CEPH_CAP_ANY_FILE_WR)) { - dout("fsync waiting for flush_tid %u\n", flush_tid); ret = wait_event_interruptible(ci->i_cap_wq, caps_are_flushed(inode, flush_tid)); } @@ -1911,14 +1912,14 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) { struct ceph_inode_info *ci = ceph_inode(inode); - unsigned flush_tid; + u16 flush_tid[CEPH_CAP_BITS]; int err = 0; int dirty; int wait = wbc->sync_mode == WB_SYNC_ALL; dout("write_inode %p wait=%d\n", inode, wait); if (wait) { - dirty = try_flush_caps(inode, &flush_tid); + dirty = try_flush_caps(inode, flush_tid); if (dirty) err = wait_event_interruptible(ci->i_cap_wq, caps_are_flushed(inode, flush_tid)); @@ -1988,7 +1989,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, __ceph_caps_used(ci), __ceph_caps_wanted(ci), cap->issued | cap->implemented, - ci->i_flushing_caps, NULL); + ci->i_flushing_caps); if (delayed) { spin_lock(&ci->i_ceph_lock); __cap_delay_requeue(mdsc, ci); @@ -2027,7 +2028,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, __ceph_caps_used(ci), __ceph_caps_wanted(ci), cap->issued | cap->implemented, - ci->i_flushing_caps, NULL); + ci->i_flushing_caps); if (delayed) { spin_lock(&ci->i_ceph_lock); __cap_delay_requeue(mdsc, ci); From da819c8150c5b6e6a6a21ee41135b88f6cd18c3e Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 27 May 2015 11:19:34 +0800 Subject: [PATCH 280/839] ceph: fix directory fsync fsync() on directory should flush dirty caps and wait for any uncommitted directory opertions to commit. But ceph_dir_fsync() only waits for uncommitted directory opertions. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 73 +++++++++++++++++++++++++++++++++++++++++++------- fs/ceph/dir.c | 56 +------------------------------------- 2 files changed, 65 insertions(+), 64 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index e9b03b51b874..dc988337f841 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1838,13 +1838,16 @@ static void sync_write_wait(struct inode *inode) struct ceph_osd_request *req; u64 last_tid; + if (!S_ISREG(inode->i_mode)) + return; + spin_lock(&ci->i_unsafe_lock); if (list_empty(head)) goto out; /* set upper bound as _last_ entry in chain */ - req = list_entry(head->prev, struct ceph_osd_request, - r_unsafe_item); + req = list_last_entry(head, struct ceph_osd_request, + r_unsafe_item); last_tid = req->r_tid; do { @@ -1862,13 +1865,59 @@ static void sync_write_wait(struct inode *inode) */ if (list_empty(head)) break; - req = list_entry(head->next, struct ceph_osd_request, - r_unsafe_item); + req = list_first_entry(head, struct ceph_osd_request, + r_unsafe_item); } while (req->r_tid < last_tid); out: spin_unlock(&ci->i_unsafe_lock); } +/* + * wait for any uncommitted directory operations to commit. + */ +static int unsafe_dirop_wait(struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct list_head *head = &ci->i_unsafe_dirops; + struct ceph_mds_request *req; + u64 last_tid; + int ret = 0; + + if (!S_ISDIR(inode->i_mode)) + return 0; + + spin_lock(&ci->i_unsafe_lock); + if (list_empty(head)) + goto out; + + req = list_last_entry(head, struct ceph_mds_request, + r_unsafe_dir_item); + last_tid = req->r_tid; + + do { + ceph_mdsc_get_request(req); + spin_unlock(&ci->i_unsafe_lock); + + dout("unsafe_dirop_wait %p wait on tid %llu (until %llu)\n", + inode, req->r_tid, last_tid); + ret = !wait_for_completion_timeout(&req->r_safe_completion, + ceph_timeout_jiffies(req->r_timeout)); + if (ret) + ret = -EIO; /* timed out */ + + ceph_mdsc_put_request(req); + + spin_lock(&ci->i_unsafe_lock); + if (ret || list_empty(head)) + break; + req = list_first_entry(head, struct ceph_mds_request, + r_unsafe_dir_item); + } while (req->r_tid < last_tid); +out: + spin_unlock(&ci->i_unsafe_lock); + return ret; +} + int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -1882,24 +1931,30 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret < 0) - return ret; + goto out; + + if (datasync) + goto out; + mutex_lock(&inode->i_mutex); dirty = try_flush_caps(inode, flush_tid); dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); + ret = unsafe_dirop_wait(inode); + /* * only wait on non-file metadata writeback (the mds * can recover size and mtime, so we don't need to * wait for that) */ - if (!datasync && (dirty & ~CEPH_CAP_ANY_FILE_WR)) { + if (!ret && (dirty & ~CEPH_CAP_ANY_FILE_WR)) { ret = wait_event_interruptible(ci->i_cap_wq, - caps_are_flushed(inode, flush_tid)); + caps_are_flushed(inode, flush_tid)); } - - dout("fsync %p%s done\n", inode, datasync ? " datasync" : ""); mutex_unlock(&inode->i_mutex); +out: + dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); return ret; } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 3dec27e36417..424e23138c59 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1223,60 +1223,6 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, return size - left; } -/* - * an fsync() on a dir will wait for any uncommitted directory - * operations to commit. - */ -static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, - int datasync) -{ - struct inode *inode = file_inode(file); - struct ceph_inode_info *ci = ceph_inode(inode); - struct list_head *head = &ci->i_unsafe_dirops; - struct ceph_mds_request *req; - u64 last_tid; - int ret = 0; - - dout("dir_fsync %p\n", inode); - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret) - return ret; - mutex_lock(&inode->i_mutex); - - spin_lock(&ci->i_unsafe_lock); - if (list_empty(head)) - goto out; - - req = list_entry(head->prev, - struct ceph_mds_request, r_unsafe_dir_item); - last_tid = req->r_tid; - - do { - ceph_mdsc_get_request(req); - spin_unlock(&ci->i_unsafe_lock); - - dout("dir_fsync %p wait on tid %llu (until %llu)\n", - inode, req->r_tid, last_tid); - ret = !wait_for_completion_timeout(&req->r_safe_completion, - ceph_timeout_jiffies(req->r_timeout)); - if (ret) - ret = -EIO; /* timed out */ - - ceph_mdsc_put_request(req); - - spin_lock(&ci->i_unsafe_lock); - if (ret || list_empty(head)) - break; - req = list_entry(head->next, - struct ceph_mds_request, r_unsafe_dir_item); - } while (req->r_tid < last_tid); -out: - spin_unlock(&ci->i_unsafe_lock); - mutex_unlock(&inode->i_mutex); - - return ret; -} - /* * We maintain a private dentry LRU. * @@ -1347,7 +1293,7 @@ const struct file_operations ceph_dir_fops = { .open = ceph_open, .release = ceph_release, .unlocked_ioctl = ceph_ioctl, - .fsync = ceph_dir_fsync, + .fsync = ceph_fsync, }; const struct file_operations ceph_snapdir_fops = { From 6c13a6bb55df6666275b992ba76620324429d7cf Mon Sep 17 00:00:00 2001 From: Hong Zhiguo Date: Wed, 10 Jun 2015 21:13:25 +0800 Subject: [PATCH 281/839] libceph: fix wrong name "Ceph filesystem for Linux" modinfo libceph prints the module name "Ceph filesystem for Linux", which is same as the real fs module ceph. It's confusing. Signed-off-by: Hong Zhiguo Signed-off-by: Ilya Dryomov --- net/ceph/ceph_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 925d0c890b80..cb7db320dd27 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -741,5 +741,5 @@ module_exit(exit_ceph_lib); MODULE_AUTHOR("Sage Weil "); MODULE_AUTHOR("Yehuda Sadeh "); MODULE_AUTHOR("Patience Warnick "); -MODULE_DESCRIPTION("Ceph filesystem for Linux"); +MODULE_DESCRIPTION("Ceph core library"); MODULE_LICENSE("GPL"); From 553adfd941f8ca622965ef809553d918ea039929 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 9 Jun 2015 15:48:57 +0800 Subject: [PATCH 282/839] ceph: track pending caps flushing accurately Previously we do not trace accurate TID for flushing caps. when MDS failovers, we have no choice but to re-send all flushing caps with a new TID. This can cause problem because MDS can has already flushed some caps and has issued the same caps to other client. The re-sent cap flush has a new TID, which makes MDS unable to detect if it has already processed the cap flush. This patch adds code to track pending caps flushing accurately. When re-sending cap flush is needed, we use its original flush TID. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 245 ++++++++++++++++++++++++++++--------------- fs/ceph/inode.c | 3 +- fs/ceph/mds_client.c | 20 ++++ fs/ceph/mds_client.h | 1 + fs/ceph/super.h | 11 +- 5 files changed, 192 insertions(+), 88 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index dc988337f841..9a25f8d66fbc 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1097,7 +1097,8 @@ void ceph_queue_caps_release(struct inode *inode) * caller should hold snap_rwsem (read), s_mutex. */ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, - int op, int used, int want, int retain, int flushing) + int op, int used, int want, int retain, int flushing, + u64 flush_tid) __releases(cap->ci->i_ceph_lock) { struct ceph_inode_info *ci = cap->ci; @@ -1115,8 +1116,6 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, u64 xattr_version = 0; struct ceph_buffer *xattr_blob = NULL; int delayed = 0; - u64 flush_tid = 0; - int i; int ret; bool inline_data; @@ -1160,24 +1159,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, cap->implemented &= cap->issued | used; cap->mds_wanted = want; - if (flushing) { - /* - * assign a tid for flush operations so we can avoid - * flush1 -> dirty1 -> flush2 -> flushack1 -> mark - * clean type races. track latest tid for every bit - * so we can handle flush AxFw, flush Fw, and have the - * first ack clean Ax. - */ - flush_tid = ++ci->i_cap_flush_last_tid; - dout(" cap_flush_tid %d\n", (int)flush_tid); - for (i = 0; i < CEPH_CAP_BITS; i++) - if (flushing & (1 << i)) - ci->i_cap_flush_tid[i] = flush_tid; - - follows = ci->i_head_snapc->seq; - } else { - follows = 0; - } + follows = flushing ? ci->i_head_snapc->seq : 0; keep = cap->implemented; seq = cap->seq; @@ -1311,7 +1293,10 @@ retry: goto retry; } - capsnap->flush_tid = ++ci->i_cap_flush_last_tid; + spin_lock(&mdsc->cap_dirty_lock); + capsnap->flush_tid = ++mdsc->last_cap_flush_tid; + spin_unlock(&mdsc->cap_dirty_lock); + atomic_inc(&capsnap->nref); if (list_empty(&capsnap->flushing_item)) list_add_tail(&capsnap->flushing_item, @@ -1407,6 +1392,29 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) return dirty; } +static void __add_cap_flushing_to_inode(struct ceph_inode_info *ci, + struct ceph_cap_flush *cf) +{ + struct rb_node **p = &ci->i_cap_flush_tree.rb_node; + struct rb_node *parent = NULL; + struct ceph_cap_flush *other = NULL; + + while (*p) { + parent = *p; + other = rb_entry(parent, struct ceph_cap_flush, i_node); + + if (cf->tid < other->tid) + p = &(*p)->rb_left; + else if (cf->tid > other->tid) + p = &(*p)->rb_right; + else + BUG(); + } + + rb_link_node(&cf->i_node, parent, p); + rb_insert_color(&cf->i_node, &ci->i_cap_flush_tree); +} + /* * Add dirty inode to the flushing list. Assigned a seq number so we * can wait for caps to flush without starving. @@ -1414,10 +1422,12 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) * Called under i_ceph_lock. */ static int __mark_caps_flushing(struct inode *inode, - struct ceph_mds_session *session) + struct ceph_mds_session *session, + u64 *flush_tid) { struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_cap_flush *cf; int flushing; BUG_ON(ci->i_dirty_caps == 0); @@ -1432,9 +1442,14 @@ static int __mark_caps_flushing(struct inode *inode, ci->i_dirty_caps = 0; dout(" inode %p now !dirty\n", inode); + cf = kmalloc(sizeof(*cf), GFP_ATOMIC); + cf->caps = flushing; + spin_lock(&mdsc->cap_dirty_lock); list_del_init(&ci->i_dirty_item); + cf->tid = ++mdsc->last_cap_flush_tid; + if (list_empty(&ci->i_flushing_item)) { ci->i_cap_flush_seq = ++mdsc->cap_flush_seq; list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing); @@ -1448,6 +1463,9 @@ static int __mark_caps_flushing(struct inode *inode, } spin_unlock(&mdsc->cap_dirty_lock); + __add_cap_flushing_to_inode(ci, cf); + + *flush_tid = cf->tid; return flushing; } @@ -1493,6 +1511,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = &ci->vfs_inode; struct ceph_cap *cap; + u64 flush_tid; int file_wanted, used, cap_used; int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ int issued, implemented, want, retain, revoking, flushing = 0; @@ -1711,17 +1730,20 @@ ack: took_snap_rwsem = 1; } - if (cap == ci->i_auth_cap && ci->i_dirty_caps) - flushing = __mark_caps_flushing(inode, session); - else + if (cap == ci->i_auth_cap && ci->i_dirty_caps) { + flushing = __mark_caps_flushing(inode, session, + &flush_tid); + } else { flushing = 0; + flush_tid = 0; + } mds = cap->mds; /* remember mds, so we don't repeat */ sent++; /* __send_cap drops i_ceph_lock */ delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used, - want, retain, flushing); + want, retain, flushing, flush_tid); goto retry; /* retake i_ceph_lock and restart our cap scan. */ } @@ -1750,12 +1772,13 @@ ack: /* * Try to flush dirty caps back to the auth mds. */ -static int try_flush_caps(struct inode *inode, u16 flush_tid[]) +static int try_flush_caps(struct inode *inode, u64 *ptid) { struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_session *session = NULL; int flushing = 0; + u64 flush_tid = 0; retry: spin_lock(&ci->i_ceph_lock); @@ -1780,46 +1803,52 @@ retry: if (cap->session->s_state < CEPH_MDS_SESSION_OPEN) goto out; - flushing = __mark_caps_flushing(inode, session); + flushing = __mark_caps_flushing(inode, session, &flush_tid); /* __send_cap drops i_ceph_lock */ delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want, - cap->issued | cap->implemented, flushing); + (cap->issued | cap->implemented), + flushing, flush_tid); - spin_lock(&ci->i_ceph_lock); - if (delayed) + if (delayed) { + spin_lock(&ci->i_ceph_lock); __cap_delay_requeue(mdsc, ci); + spin_unlock(&ci->i_ceph_lock); + } + } else { + struct rb_node *n = rb_last(&ci->i_cap_flush_tree); + if (n) { + struct ceph_cap_flush *cf = + rb_entry(n, struct ceph_cap_flush, i_node); + flush_tid = cf->tid; + } + flushing = ci->i_flushing_caps; + spin_unlock(&ci->i_ceph_lock); } - - flushing = ci->i_flushing_caps; - if (flushing) - memcpy(flush_tid, ci->i_cap_flush_tid, - sizeof(ci->i_cap_flush_tid)); out: - spin_unlock(&ci->i_ceph_lock); if (session) mutex_unlock(&session->s_mutex); + + *ptid = flush_tid; return flushing; } /* * Return true if we've flushed caps through the given flush_tid. */ -static int caps_are_flushed(struct inode *inode, u16 flush_tid[]) +static int caps_are_flushed(struct inode *inode, u64 flush_tid) { struct ceph_inode_info *ci = ceph_inode(inode); - int i, ret = 1; + struct ceph_cap_flush *cf; + struct rb_node *n; + int ret = 1; spin_lock(&ci->i_ceph_lock); - for (i = 0; i < CEPH_CAP_BITS; i++) { - if (!(ci->i_flushing_caps & (1 << i))) - continue; - // tid only has 16 bits. we need to handle wrapping - if ((s16)(ci->i_cap_flush_tid[i] - flush_tid[i]) <= 0) { - /* still flushing this bit */ + n = rb_first(&ci->i_cap_flush_tree); + if (n) { + cf = rb_entry(n, struct ceph_cap_flush, i_node); + if (cf->tid <= flush_tid) ret = 0; - break; - } } spin_unlock(&ci->i_ceph_lock); return ret; @@ -1922,7 +1951,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); - u16 flush_tid[CEPH_CAP_BITS]; + u64 flush_tid; int ret; int dirty; @@ -1938,7 +1967,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) mutex_lock(&inode->i_mutex); - dirty = try_flush_caps(inode, flush_tid); + dirty = try_flush_caps(inode, &flush_tid); dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); ret = unsafe_dirop_wait(inode); @@ -1967,14 +1996,14 @@ out: int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) { struct ceph_inode_info *ci = ceph_inode(inode); - u16 flush_tid[CEPH_CAP_BITS]; + u64 flush_tid; int err = 0; int dirty; int wait = wbc->sync_mode == WB_SYNC_ALL; dout("write_inode %p wait=%d\n", inode, wait); if (wait) { - dirty = try_flush_caps(inode, flush_tid); + dirty = try_flush_caps(inode, &flush_tid); if (dirty) err = wait_event_interruptible(ci->i_cap_wq, caps_are_flushed(inode, flush_tid)); @@ -2022,6 +2051,51 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, } } +static int __kick_flushing_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct ceph_inode_info *ci) +{ + struct inode *inode = &ci->vfs_inode; + struct ceph_cap *cap; + struct ceph_cap_flush *cf; + struct rb_node *n; + int delayed = 0; + u64 first_tid = 0; + + while (true) { + spin_lock(&ci->i_ceph_lock); + cap = ci->i_auth_cap; + if (!(cap && cap->session == session)) { + pr_err("%p auth cap %p not mds%d ???\n", inode, + cap, session->s_mds); + spin_unlock(&ci->i_ceph_lock); + break; + } + + for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) { + cf = rb_entry(n, struct ceph_cap_flush, i_node); + if (cf->tid >= first_tid) + break; + } + if (!n) { + spin_unlock(&ci->i_ceph_lock); + break; + } + + cf = rb_entry(n, struct ceph_cap_flush, i_node); + first_tid = cf->tid + 1; + + dout("kick_flushing_caps %p cap %p tid %llu %s\n", inode, + cap, cf->tid, ceph_cap_string(cf->caps)); + delayed |= __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, + __ceph_caps_used(ci), + __ceph_caps_wanted(ci), + cap->issued | cap->implemented, + cf->caps, cf->tid); + } + return delayed; +} + void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { @@ -2031,28 +2105,10 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, dout("kick_flushing_caps mds%d\n", session->s_mds); list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { - struct inode *inode = &ci->vfs_inode; - struct ceph_cap *cap; - int delayed = 0; - - spin_lock(&ci->i_ceph_lock); - cap = ci->i_auth_cap; - if (cap && cap->session == session) { - dout("kick_flushing_caps %p cap %p %s\n", inode, - cap, ceph_cap_string(ci->i_flushing_caps)); - delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, - __ceph_caps_used(ci), - __ceph_caps_wanted(ci), - cap->issued | cap->implemented, - ci->i_flushing_caps); - if (delayed) { - spin_lock(&ci->i_ceph_lock); - __cap_delay_requeue(mdsc, ci); - spin_unlock(&ci->i_ceph_lock); - } - } else { - pr_err("%p auth cap %p not mds%d ???\n", inode, - cap, session->s_mds); + int delayed = __kick_flushing_caps(mdsc, session, ci); + if (delayed) { + spin_lock(&ci->i_ceph_lock); + __cap_delay_requeue(mdsc, ci); spin_unlock(&ci->i_ceph_lock); } } @@ -2064,7 +2120,6 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap *cap; - int delayed = 0; spin_lock(&ci->i_ceph_lock); cap = ci->i_auth_cap; @@ -2074,16 +2129,16 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, __ceph_flush_snaps(ci, &session, 1); if (ci->i_flushing_caps) { + int delayed; + spin_lock(&mdsc->cap_dirty_lock); list_move_tail(&ci->i_flushing_item, &cap->session->s_cap_flushing); spin_unlock(&mdsc->cap_dirty_lock); - delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, - __ceph_caps_used(ci), - __ceph_caps_wanted(ci), - cap->issued | cap->implemented, - ci->i_flushing_caps); + spin_unlock(&ci->i_ceph_lock); + + delayed = __kick_flushing_caps(mdsc, session, ci); if (delayed) { spin_lock(&ci->i_ceph_lock); __cap_delay_requeue(mdsc, ci); @@ -2836,16 +2891,29 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_cap_flush *cf; + struct rb_node *n; + LIST_HEAD(to_remove); unsigned seq = le32_to_cpu(m->seq); int dirty = le32_to_cpu(m->dirty); int cleaned = 0; int drop = 0; - int i; - for (i = 0; i < CEPH_CAP_BITS; i++) - if ((dirty & (1 << i)) && - (u16)flush_tid == ci->i_cap_flush_tid[i]) - cleaned |= 1 << i; + n = rb_first(&ci->i_cap_flush_tree); + while (n) { + cf = rb_entry(n, struct ceph_cap_flush, i_node); + n = rb_next(&cf->i_node); + if (cf->tid == flush_tid) + cleaned = cf->caps; + if (cf->tid <= flush_tid) { + rb_erase(&cf->i_node, &ci->i_cap_flush_tree); + list_add_tail(&cf->list, &to_remove); + } else { + cleaned &= ~cf->caps; + if (!cleaned) + break; + } + } dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," " flushing %s -> %s\n", @@ -2890,6 +2958,13 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, out: spin_unlock(&ci->i_ceph_lock); + + while (!list_empty(&to_remove)) { + cf = list_first_entry(&to_remove, + struct ceph_cap_flush, list); + list_del(&cf->list); + kfree(cf); + } if (drop) iput(inode); } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 1c991df276c9..6d3f19db8c8a 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -417,8 +417,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_dirty_item); INIT_LIST_HEAD(&ci->i_flushing_item); ci->i_cap_flush_seq = 0; - ci->i_cap_flush_last_tid = 0; - memset(&ci->i_cap_flush_tid, 0, sizeof(ci->i_cap_flush_tid)); + ci->i_cap_flush_tree = RB_ROOT; init_waitqueue_head(&ci->i_cap_wq); ci->i_hold_caps_min = 0; ci->i_hold_caps_max = 0; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 8080d486a991..839901f51512 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1142,6 +1142,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) { struct ceph_inode_info *ci = ceph_inode(inode); + LIST_HEAD(to_remove); int drop = 0; dout("removing cap %p, ci is %p, inode is %p\n", @@ -1149,9 +1150,19 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, spin_lock(&ci->i_ceph_lock); __ceph_remove_cap(cap, false); if (!ci->i_auth_cap) { + struct ceph_cap_flush *cf; struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + while (true) { + struct rb_node *n = rb_first(&ci->i_cap_flush_tree); + if (!n) + break; + cf = rb_entry(n, struct ceph_cap_flush, i_node); + rb_erase(&cf->i_node, &ci->i_cap_flush_tree); + list_add(&cf->list, &to_remove); + } + spin_lock(&mdsc->cap_dirty_lock); if (!list_empty(&ci->i_dirty_item)) { pr_warn_ratelimited( @@ -1173,8 +1184,16 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, drop = 1; } spin_unlock(&mdsc->cap_dirty_lock); + } spin_unlock(&ci->i_ceph_lock); + while (!list_empty(&to_remove)) { + struct ceph_cap_flush *cf; + cf = list_first_entry(&to_remove, + struct ceph_cap_flush, list); + list_del(&cf->list); + kfree(cf); + } while (drop--) iput(inode); return 0; @@ -3408,6 +3427,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) INIT_LIST_HEAD(&mdsc->snap_flush_list); spin_lock_init(&mdsc->snap_flush_lock); mdsc->cap_flush_seq = 0; + mdsc->last_cap_flush_tid = 1; INIT_LIST_HEAD(&mdsc->cap_dirty); INIT_LIST_HEAD(&mdsc->cap_dirty_migrating); mdsc->num_cap_flushing = 0; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 509d6822e9b1..19f6084203f0 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -307,6 +307,7 @@ struct ceph_mds_client { spinlock_t snap_flush_lock; u64 cap_flush_seq; + u64 last_cap_flush_tid; struct list_head cap_dirty; /* inodes with dirty caps */ struct list_head cap_dirty_migrating; /* ...that are migration... */ int num_cap_flushing; /* # caps we are flushing */ diff --git a/fs/ceph/super.h b/fs/ceph/super.h index c4961353d058..cc597f52e046 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -186,6 +186,15 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) } } +struct ceph_cap_flush { + u64 tid; + int caps; + union { + struct rb_node i_node; + struct list_head list; + }; +}; + /* * The frag tree describes how a directory is fragmented, potentially across * multiple metadata servers. It is also used to indicate points where @@ -299,7 +308,7 @@ struct ceph_inode_info { /* we need to track cap writeback on a per-cap-bit basis, to allow * overlapping, pipelined cap flushes to the mds. we can probably * reduce the tid to 8 bits if we're concerned about inode size. */ - u16 i_cap_flush_last_tid, i_cap_flush_tid[CEPH_CAP_BITS]; + struct rb_root i_cap_flush_tree; wait_queue_head_t i_cap_wq; /* threads waiting on a capability */ unsigned long i_hold_caps_min; /* jiffies */ unsigned long i_hold_caps_max; /* jiffies */ From 8310b08913eca8aee98744c9aff1ec0d1f603b19 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 9 Jun 2015 17:20:12 +0800 Subject: [PATCH 283/839] ceph: track pending caps flushing globally So we know TID of the oldest pending caps flushing. Later patch will send this information to MDS, so that MDS can trim its completed caps flush list. Tracking pending caps flushing globally also simplifies syncfs code. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 50 +++++++++++++++++++----- fs/ceph/inode.c | 1 - fs/ceph/mds_client.c | 93 +++++++++++++++++++++++--------------------- fs/ceph/mds_client.h | 2 +- fs/ceph/super.h | 2 +- 5 files changed, 91 insertions(+), 57 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 9a25f8d66fbc..0295048724d2 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1415,6 +1415,29 @@ static void __add_cap_flushing_to_inode(struct ceph_inode_info *ci, rb_insert_color(&cf->i_node, &ci->i_cap_flush_tree); } +static void __add_cap_flushing_to_mdsc(struct ceph_mds_client *mdsc, + struct ceph_cap_flush *cf) +{ + struct rb_node **p = &mdsc->cap_flush_tree.rb_node; + struct rb_node *parent = NULL; + struct ceph_cap_flush *other = NULL; + + while (*p) { + parent = *p; + other = rb_entry(parent, struct ceph_cap_flush, g_node); + + if (cf->tid < other->tid) + p = &(*p)->rb_left; + else if (cf->tid > other->tid) + p = &(*p)->rb_right; + else + BUG(); + } + + rb_link_node(&cf->g_node, parent, p); + rb_insert_color(&cf->g_node, &mdsc->cap_flush_tree); +} + /* * Add dirty inode to the flushing list. Assigned a seq number so we * can wait for caps to flush without starving. @@ -1449,17 +1472,16 @@ static int __mark_caps_flushing(struct inode *inode, list_del_init(&ci->i_dirty_item); cf->tid = ++mdsc->last_cap_flush_tid; + __add_cap_flushing_to_mdsc(mdsc, cf); if (list_empty(&ci->i_flushing_item)) { - ci->i_cap_flush_seq = ++mdsc->cap_flush_seq; list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing); mdsc->num_cap_flushing++; - dout(" inode %p now flushing seq %lld\n", inode, - ci->i_cap_flush_seq); + dout(" inode %p now flushing tid %llu\n", inode, cf->tid); } else { list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing); - dout(" inode %p now flushing (more) seq %lld\n", inode, - ci->i_cap_flush_seq); + dout(" inode %p now flushing (more) tid %llu\n", + inode, cf->tid); } spin_unlock(&mdsc->cap_dirty_lock); @@ -2123,8 +2145,8 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, spin_lock(&ci->i_ceph_lock); cap = ci->i_auth_cap; - dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, - ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); + dout("kick_flushing_inode_caps %p flushing %s\n", inode, + ceph_cap_string(ci->i_flushing_caps)); __ceph_flush_snaps(ci, &session, 1); @@ -2921,12 +2943,23 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, ceph_cap_string(cleaned), ceph_cap_string(ci->i_flushing_caps), ceph_cap_string(ci->i_flushing_caps & ~cleaned)); - if (ci->i_flushing_caps == (ci->i_flushing_caps & ~cleaned)) + if (list_empty(&to_remove) && !cleaned) goto out; ci->i_flushing_caps &= ~cleaned; spin_lock(&mdsc->cap_dirty_lock); + + if (!list_empty(&to_remove)) { + list_for_each_entry(cf, &to_remove, list) + rb_erase(&cf->g_node, &mdsc->cap_flush_tree); + + n = rb_first(&mdsc->cap_flush_tree); + cf = n ? rb_entry(n, struct ceph_cap_flush, g_node) : NULL; + if (!cf || cf->tid > flush_tid) + wake_up_all(&mdsc->cap_flushing_wq); + } + if (ci->i_flushing_caps == 0) { list_del_init(&ci->i_flushing_item); if (!list_empty(&session->s_cap_flushing)) @@ -2936,7 +2969,6 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, struct ceph_inode_info, i_flushing_item)->vfs_inode); mdsc->num_cap_flushing--; - wake_up_all(&mdsc->cap_flushing_wq); dout(" inode %p now !flushing\n", inode); if (ci->i_dirty_caps == 0) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 6d3f19db8c8a..3326302f5884 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -416,7 +416,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_flushing_caps = 0; INIT_LIST_HEAD(&ci->i_dirty_item); INIT_LIST_HEAD(&ci->i_flushing_item); - ci->i_cap_flush_seq = 0; ci->i_cap_flush_tree = RB_ROOT; init_waitqueue_head(&ci->i_cap_wq); ci->i_hold_caps_min = 0; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 839901f51512..31f6a78caa0a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1164,6 +1164,10 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, } spin_lock(&mdsc->cap_dirty_lock); + + list_for_each_entry(cf, &to_remove, list) + rb_erase(&cf->g_node, &mdsc->cap_flush_tree); + if (!list_empty(&ci->i_dirty_item)) { pr_warn_ratelimited( " dropping dirty %s state for %p %lld\n", @@ -1467,39 +1471,56 @@ static int trim_caps(struct ceph_mds_client *mdsc, return 0; } -static int check_cap_flush(struct ceph_inode_info *ci, - u64 want_flush_seq, u64 want_snap_seq) +static int check_capsnap_flush(struct ceph_inode_info *ci, + u64 want_snap_seq) { - int ret1 = 1, ret2 = 1; + int ret = 1; spin_lock(&ci->i_ceph_lock); - if (want_flush_seq > 0 && ci->i_flushing_caps) - ret1 = ci->i_cap_flush_seq >= want_flush_seq; - if (want_snap_seq > 0 && !list_empty(&ci->i_cap_snaps)) { struct ceph_cap_snap *capsnap = list_first_entry(&ci->i_cap_snaps, struct ceph_cap_snap, ci_item); - ret2 = capsnap->follows >= want_snap_seq; + ret = capsnap->follows >= want_snap_seq; } spin_unlock(&ci->i_ceph_lock); - return ret1 && ret2; + return ret; +} + +static int check_caps_flush(struct ceph_mds_client *mdsc, + u64 want_flush_tid) +{ + struct rb_node *n; + struct ceph_cap_flush *cf; + int ret = 1; + + spin_lock(&mdsc->cap_dirty_lock); + n = rb_first(&mdsc->cap_flush_tree); + cf = n ? rb_entry(n, struct ceph_cap_flush, g_node) : NULL; + if (cf && cf->tid <= want_flush_tid) { + dout("check_caps_flush still flushing tid %llu <= %llu\n", + cf->tid, want_flush_tid); + ret = 0; + } + spin_unlock(&mdsc->cap_dirty_lock); + return ret; } /* * flush all dirty inode data to disk. * - * returns true if we've flushed through want_flush_seq + * returns true if we've flushed through want_flush_tid */ static void wait_caps_flush(struct ceph_mds_client *mdsc, - u64 want_flush_seq, u64 want_snap_seq) + u64 want_flush_tid, u64 want_snap_seq) { int mds; - dout("check_cap_flush want %lld\n", want_flush_seq); + dout("check_caps_flush want %llu snap want %llu\n", + want_flush_tid, want_snap_seq); mutex_lock(&mdsc->mutex); for (mds = 0; mds < mdsc->max_sessions; ) { struct ceph_mds_session *session = mdsc->sessions[mds]; - struct inode *inode1 = NULL, *inode2 = NULL; + struct inode *inode = NULL; if (!session) { mds++; @@ -1509,58 +1530,40 @@ static void wait_caps_flush(struct ceph_mds_client *mdsc, mutex_unlock(&mdsc->mutex); mutex_lock(&session->s_mutex); - if (!list_empty(&session->s_cap_flushing)) { - struct ceph_inode_info *ci = - list_first_entry(&session->s_cap_flushing, - struct ceph_inode_info, - i_flushing_item); - - if (!check_cap_flush(ci, want_flush_seq, 0)) { - dout("check_cap_flush still flushing %p " - "seq %lld <= %lld to mds%d\n", - &ci->vfs_inode, ci->i_cap_flush_seq, - want_flush_seq, mds); - inode1 = igrab(&ci->vfs_inode); - } - } if (!list_empty(&session->s_cap_snaps_flushing)) { struct ceph_cap_snap *capsnap = list_first_entry(&session->s_cap_snaps_flushing, struct ceph_cap_snap, flushing_item); struct ceph_inode_info *ci = capsnap->ci; - if (!check_cap_flush(ci, 0, want_snap_seq)) { + if (!check_capsnap_flush(ci, want_snap_seq)) { dout("check_cap_flush still flushing snap %p " "follows %lld <= %lld to mds%d\n", &ci->vfs_inode, capsnap->follows, want_snap_seq, mds); - inode2 = igrab(&ci->vfs_inode); + inode = igrab(&ci->vfs_inode); } } mutex_unlock(&session->s_mutex); ceph_put_mds_session(session); - if (inode1) { + if (inode) { wait_event(mdsc->cap_flushing_wq, - check_cap_flush(ceph_inode(inode1), - want_flush_seq, 0)); - iput(inode1); - } - if (inode2) { - wait_event(mdsc->cap_flushing_wq, - check_cap_flush(ceph_inode(inode2), - 0, want_snap_seq)); - iput(inode2); - } - - if (!inode1 && !inode2) + check_capsnap_flush(ceph_inode(inode), + want_snap_seq)); + iput(inode); + } else { mds++; + } mutex_lock(&mdsc->mutex); } - mutex_unlock(&mdsc->mutex); - dout("check_cap_flush ok, flushed thru %lld\n", want_flush_seq); + + wait_event(mdsc->cap_flushing_wq, + check_caps_flush(mdsc, want_flush_tid)); + + dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid); } /* @@ -3426,8 +3429,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) spin_lock_init(&mdsc->cap_delay_lock); INIT_LIST_HEAD(&mdsc->snap_flush_list); spin_lock_init(&mdsc->snap_flush_lock); - mdsc->cap_flush_seq = 0; mdsc->last_cap_flush_tid = 1; + mdsc->cap_flush_tree = RB_ROOT; INIT_LIST_HEAD(&mdsc->cap_dirty); INIT_LIST_HEAD(&mdsc->cap_dirty_migrating); mdsc->num_cap_flushing = 0; @@ -3554,7 +3557,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) ceph_flush_dirty_caps(mdsc); spin_lock(&mdsc->cap_dirty_lock); - want_flush = mdsc->cap_flush_seq; + want_flush = mdsc->last_cap_flush_tid; spin_unlock(&mdsc->cap_dirty_lock); down_read(&mdsc->snap_rwsem); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 19f6084203f0..470be4eb25f3 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -306,8 +306,8 @@ struct ceph_mds_client { struct list_head snap_flush_list; /* cap_snaps ready to flush */ spinlock_t snap_flush_lock; - u64 cap_flush_seq; u64 last_cap_flush_tid; + struct rb_root cap_flush_tree; struct list_head cap_dirty; /* inodes with dirty caps */ struct list_head cap_dirty_migrating; /* ...that are migration... */ int num_cap_flushing; /* # caps we are flushing */ diff --git a/fs/ceph/super.h b/fs/ceph/super.h index cc597f52e046..94d91471165f 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -189,6 +189,7 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) struct ceph_cap_flush { u64 tid; int caps; + struct rb_node g_node; union { struct rb_node i_node; struct list_head list; @@ -304,7 +305,6 @@ struct ceph_inode_info { struct ceph_cap *i_auth_cap; /* authoritative cap, if any */ unsigned i_dirty_caps, i_flushing_caps; /* mask of dirtied fields */ struct list_head i_dirty_item, i_flushing_item; - u64 i_cap_flush_seq; /* we need to track cap writeback on a per-cap-bit basis, to allow * overlapping, pipelined cap flushes to the mds. we can probably * reduce the tid to 8 bits if we're concerned about inode size. */ From a2971c8ccb9bd7677a6c43cdbed9aacfef5e9f26 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 10 Jun 2015 11:09:32 +0800 Subject: [PATCH 284/839] ceph: send TID of the oldest pending caps flush to MDS According to this information, MDS can trim its completed caps flush list (which is used to detect duplicated cap flush). Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 67 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 18 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0295048724d2..420272788e01 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -986,8 +986,8 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) static int send_cap_msg(struct ceph_mds_session *session, u64 ino, u64 cid, int op, int caps, int wanted, int dirty, - u32 seq, u64 flush_tid, u32 issue_seq, u32 mseq, - u64 size, u64 max_size, + u32 seq, u64 flush_tid, u64 oldest_flush_tid, + u32 issue_seq, u32 mseq, u64 size, u64 max_size, struct timespec *mtime, struct timespec *atime, u64 time_warp_seq, kuid_t uid, kgid_t gid, umode_t mode, @@ -1001,20 +1001,23 @@ static int send_cap_msg(struct ceph_mds_session *session, size_t extra_len; dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" - " seq %u/%u mseq %u follows %lld size %llu/%llu" + " seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu" " xattr_ver %llu xattr_len %d\n", ceph_cap_op_name(op), cid, ino, ceph_cap_string(caps), ceph_cap_string(wanted), ceph_cap_string(dirty), - seq, issue_seq, mseq, follows, size, max_size, + seq, issue_seq, flush_tid, oldest_flush_tid, + mseq, follows, size, max_size, xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); - /* flock buffer size + inline version + inline data size */ - extra_len = 4 + 8 + 4; + /* flock buffer size + inline version + inline data size + + * osd_epoch_barrier + oldest_flush_tid */ + extra_len = 4 + 8 + 4 + 4 + 8; msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len, GFP_NOFS, false); if (!msg) return -ENOMEM; + msg->hdr.version = cpu_to_le16(6); msg->hdr.tid = cpu_to_le64(flush_tid); fc = msg->front.iov_base; @@ -1050,6 +1053,10 @@ static int send_cap_msg(struct ceph_mds_session *session, ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE); /* inline data size */ ceph_encode_32(&p, 0); + /* osd_epoch_barrier */ + ceph_encode_32(&p, 0); + /* oldest_flush_tid */ + ceph_encode_64(&p, oldest_flush_tid); fc->xattr_version = cpu_to_le64(xattr_version); if (xattrs_buf) { @@ -1098,7 +1105,7 @@ void ceph_queue_caps_release(struct inode *inode) */ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, int op, int used, int want, int retain, int flushing, - u64 flush_tid) + u64 flush_tid, u64 oldest_flush_tid) __releases(cap->ci->i_ceph_lock) { struct ceph_inode_info *ci = cap->ci; @@ -1187,7 +1194,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, spin_unlock(&ci->i_ceph_lock); ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, - op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, + op, keep, want, flushing, seq, + flush_tid, oldest_flush_tid, issue_seq, mseq, size, max_size, &mtime, &atime, time_warp_seq, uid, gid, mode, xattr_version, xattr_blob, follows, inline_data); @@ -1307,8 +1315,8 @@ retry: inode, capsnap, capsnap->follows, capsnap->flush_tid); send_cap_msg(session, ceph_vino(inode).ino, 0, CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, - capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, - capsnap->size, 0, + capsnap->dirty, 0, capsnap->flush_tid, 0, + 0, mseq, capsnap->size, 0, &capsnap->mtime, &capsnap->atime, capsnap->time_warp_seq, capsnap->uid, capsnap->gid, capsnap->mode, @@ -1438,6 +1446,17 @@ static void __add_cap_flushing_to_mdsc(struct ceph_mds_client *mdsc, rb_insert_color(&cf->g_node, &mdsc->cap_flush_tree); } +static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc) +{ + struct rb_node *n = rb_first(&mdsc->cap_flush_tree); + if (n) { + struct ceph_cap_flush *cf = + rb_entry(n, struct ceph_cap_flush, g_node); + return cf->tid; + } + return 0; +} + /* * Add dirty inode to the flushing list. Assigned a seq number so we * can wait for caps to flush without starving. @@ -1446,7 +1465,7 @@ static void __add_cap_flushing_to_mdsc(struct ceph_mds_client *mdsc, */ static int __mark_caps_flushing(struct inode *inode, struct ceph_mds_session *session, - u64 *flush_tid) + u64 *flush_tid, u64 *oldest_flush_tid) { struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); @@ -1473,6 +1492,7 @@ static int __mark_caps_flushing(struct inode *inode, cf->tid = ++mdsc->last_cap_flush_tid; __add_cap_flushing_to_mdsc(mdsc, cf); + *oldest_flush_tid = __get_oldest_flush_tid(mdsc); if (list_empty(&ci->i_flushing_item)) { list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing); @@ -1533,7 +1553,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = &ci->vfs_inode; struct ceph_cap *cap; - u64 flush_tid; + u64 flush_tid, oldest_flush_tid; int file_wanted, used, cap_used; int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ int issued, implemented, want, retain, revoking, flushing = 0; @@ -1754,10 +1774,14 @@ ack: if (cap == ci->i_auth_cap && ci->i_dirty_caps) { flushing = __mark_caps_flushing(inode, session, - &flush_tid); + &flush_tid, + &oldest_flush_tid); } else { flushing = 0; flush_tid = 0; + spin_lock(&mdsc->cap_dirty_lock); + oldest_flush_tid = __get_oldest_flush_tid(mdsc); + spin_unlock(&mdsc->cap_dirty_lock); } mds = cap->mds; /* remember mds, so we don't repeat */ @@ -1765,7 +1789,8 @@ ack: /* __send_cap drops i_ceph_lock */ delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used, - want, retain, flushing, flush_tid); + want, retain, flushing, + flush_tid, oldest_flush_tid); goto retry; /* retake i_ceph_lock and restart our cap scan. */ } @@ -1800,7 +1825,7 @@ static int try_flush_caps(struct inode *inode, u64 *ptid) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_session *session = NULL; int flushing = 0; - u64 flush_tid = 0; + u64 flush_tid = 0, oldest_flush_tid = 0; retry: spin_lock(&ci->i_ceph_lock); @@ -1825,12 +1850,13 @@ retry: if (cap->session->s_state < CEPH_MDS_SESSION_OPEN) goto out; - flushing = __mark_caps_flushing(inode, session, &flush_tid); + flushing = __mark_caps_flushing(inode, session, &flush_tid, + &oldest_flush_tid); /* __send_cap drops i_ceph_lock */ delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want, (cap->issued | cap->implemented), - flushing, flush_tid); + flushing, flush_tid, oldest_flush_tid); if (delayed) { spin_lock(&ci->i_ceph_lock); @@ -2083,6 +2109,11 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc, struct rb_node *n; int delayed = 0; u64 first_tid = 0; + u64 oldest_flush_tid; + + spin_lock(&mdsc->cap_dirty_lock); + oldest_flush_tid = __get_oldest_flush_tid(mdsc); + spin_unlock(&mdsc->cap_dirty_lock); while (true) { spin_lock(&ci->i_ceph_lock); @@ -2113,7 +2144,7 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc, __ceph_caps_used(ci), __ceph_caps_wanted(ci), cap->issued | cap->implemented, - cf->caps, cf->tid); + cf->caps, cf->tid, oldest_flush_tid); } return delayed; } From e548e9b93d3e565e42b938a99804114565be1f81 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 10 Jun 2015 15:17:56 +0800 Subject: [PATCH 285/839] ceph: re-send flushing caps (which are revoked) in reconnect stage if flushing caps were revoked, we should re-send the cap flush in client reconnect stage. This guarantees that MDS processes the cap flush message before issuing the flushing caps to other client. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 57 ++++++++++++++++++++++++++++++++++++++++---- fs/ceph/mds_client.c | 3 +++ fs/ceph/super.h | 7 ++++-- 3 files changed, 61 insertions(+), 6 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 420272788e01..69a16044ec41 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1486,6 +1486,7 @@ static int __mark_caps_flushing(struct inode *inode, cf = kmalloc(sizeof(*cf), GFP_ATOMIC); cf->caps = flushing; + cf->kick = false; spin_lock(&mdsc->cap_dirty_lock); list_del_init(&ci->i_dirty_item); @@ -2101,7 +2102,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, static int __kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, - struct ceph_inode_info *ci) + struct ceph_inode_info *ci, + bool kick_all) { struct inode *inode = &ci->vfs_inode; struct ceph_cap *cap; @@ -2127,7 +2129,9 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc, for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) { cf = rb_entry(n, struct ceph_cap_flush, i_node); - if (cf->tid >= first_tid) + if (cf->tid < first_tid) + continue; + if (kick_all || cf->kick) break; } if (!n) { @@ -2136,6 +2140,8 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc, } cf = rb_entry(n, struct ceph_cap_flush, i_node); + cf->kick = false; + first_tid = cf->tid + 1; dout("kick_flushing_caps %p cap %p tid %llu %s\n", inode, @@ -2149,6 +2155,49 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc, return delayed; } +void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session) +{ + struct ceph_inode_info *ci; + struct ceph_cap *cap; + struct ceph_cap_flush *cf; + struct rb_node *n; + + dout("early_kick_flushing_caps mds%d\n", session->s_mds); + list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { + spin_lock(&ci->i_ceph_lock); + cap = ci->i_auth_cap; + if (!(cap && cap->session == session)) { + pr_err("%p auth cap %p not mds%d ???\n", + &ci->vfs_inode, cap, session->s_mds); + spin_unlock(&ci->i_ceph_lock); + continue; + } + + + /* + * if flushing caps were revoked, we re-send the cap flush + * in client reconnect stage. This guarantees MDS * processes + * the cap flush message before issuing the flushing caps to + * other client. + */ + if ((cap->issued & ci->i_flushing_caps) != + ci->i_flushing_caps) { + spin_unlock(&ci->i_ceph_lock); + if (!__kick_flushing_caps(mdsc, session, ci, true)) + continue; + spin_lock(&ci->i_ceph_lock); + } + + for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) { + cf = rb_entry(n, struct ceph_cap_flush, i_node); + cf->kick = true; + } + + spin_unlock(&ci->i_ceph_lock); + } +} + void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { @@ -2158,7 +2207,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, dout("kick_flushing_caps mds%d\n", session->s_mds); list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { - int delayed = __kick_flushing_caps(mdsc, session, ci); + int delayed = __kick_flushing_caps(mdsc, session, ci, false); if (delayed) { spin_lock(&ci->i_ceph_lock); __cap_delay_requeue(mdsc, ci); @@ -2191,7 +2240,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, spin_unlock(&ci->i_ceph_lock); - delayed = __kick_flushing_caps(mdsc, session, ci); + delayed = __kick_flushing_caps(mdsc, session, ci, true); if (delayed) { spin_lock(&ci->i_ceph_lock); __cap_delay_requeue(mdsc, ci); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 31f6a78caa0a..89e4305a94d4 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2982,6 +2982,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, reply->hdr.data_len = cpu_to_le32(pagelist->length); ceph_msg_data_add_pagelist(reply, pagelist); + + ceph_early_kick_flushing_caps(mdsc, session); + ceph_con_send(&session->s_con, reply); mutex_unlock(&session->s_mutex); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 94d91471165f..e7f13f742357 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -189,9 +189,10 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) struct ceph_cap_flush { u64 tid; int caps; - struct rb_node g_node; + bool kick; + struct rb_node g_node; // global union { - struct rb_node i_node; + struct rb_node i_node; // inode struct list_head list; }; }; @@ -868,6 +869,8 @@ extern void ceph_queue_caps_release(struct inode *inode); extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); extern int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync); +extern void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session); extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, From f66fd9f0952187d274c13c136b74548f792c1925 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 10 Jun 2015 17:26:13 +0800 Subject: [PATCH 286/839] ceph: pre-allocate data structure that tracks caps flushing Signed-off-by: Yan, Zheng --- fs/ceph/addr.c | 19 +++++++++++++++---- fs/ceph/caps.c | 26 ++++++++++++++++++++++---- fs/ceph/file.c | 18 ++++++++++++++++-- fs/ceph/inode.c | 15 +++++++++++++-- fs/ceph/mds_client.c | 6 +++++- fs/ceph/super.c | 8 ++++++++ fs/ceph/super.h | 6 +++++- fs/ceph/xattr.c | 20 ++++++++++++++++++-- include/linux/ceph/libceph.h | 1 + 9 files changed, 103 insertions(+), 16 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 5f53ac0d9d7c..7edf3c49e661 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1308,12 +1308,17 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = file_inode(vma->vm_file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; + struct ceph_cap_flush *prealloc_cf; struct page *page = vmf->page; loff_t off = page_offset(page); loff_t size = i_size_read(inode); size_t len; int want, got, ret; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return VM_FAULT_SIGBUS; + if (ci->i_inline_version != CEPH_INLINE_NONE) { struct page *locked_page = NULL; if (off == 0) { @@ -1323,8 +1328,10 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = ceph_uninline_data(vma->vm_file, locked_page); if (locked_page) unlock_page(locked_page); - if (ret < 0) - return VM_FAULT_SIGBUS; + if (ret < 0) { + ret = VM_FAULT_SIGBUS; + goto out_free; + } } if (off + PAGE_CACHE_SIZE <= size) @@ -1346,7 +1353,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) break; if (ret != -ERESTARTSYS) { WARN_ON(1); - return VM_FAULT_SIGBUS; + ret = VM_FAULT_SIGBUS; + goto out_free; } } dout("page_mkwrite %p %llu~%zd got cap refs on %s\n", @@ -1381,7 +1389,8 @@ out: int dirty; spin_lock(&ci->i_ceph_lock); ci->i_inline_version = CEPH_INLINE_NONE; - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, + &prealloc_cf); spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); @@ -1390,6 +1399,8 @@ out: dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n", inode, off, len, ceph_cap_string(got), ret); ceph_put_cap_refs(ci, got); +out_free: + ceph_free_cap_flush(prealloc_cf); return ret; } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 69a16044ec41..dd7b20adf1d4 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1356,7 +1356,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) * Caller is then responsible for calling __mark_inode_dirty with the * returned flags value. */ -int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) +int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, + struct ceph_cap_flush **pcf) { struct ceph_mds_client *mdsc = ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; @@ -1376,6 +1377,9 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) ceph_cap_string(was | mask)); ci->i_dirty_caps |= mask; if (was == 0) { + WARN_ON_ONCE(ci->i_prealloc_cap_flush); + swap(ci->i_prealloc_cap_flush, *pcf); + if (!ci->i_head_snapc) { WARN_ON_ONCE(!rwsem_is_locked(&mdsc->snap_rwsem)); ci->i_head_snapc = ceph_get_snap_context( @@ -1391,6 +1395,8 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) ihold(inode); dirty |= I_DIRTY_SYNC; } + } else { + WARN_ON_ONCE(!ci->i_prealloc_cap_flush); } BUG_ON(list_empty(&ci->i_dirty_item)); if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) && @@ -1446,6 +1452,17 @@ static void __add_cap_flushing_to_mdsc(struct ceph_mds_client *mdsc, rb_insert_color(&cf->g_node, &mdsc->cap_flush_tree); } +struct ceph_cap_flush *ceph_alloc_cap_flush(void) +{ + return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL); +} + +void ceph_free_cap_flush(struct ceph_cap_flush *cf) +{ + if (cf) + kmem_cache_free(ceph_cap_flush_cachep, cf); +} + static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc) { struct rb_node *n = rb_first(&mdsc->cap_flush_tree); @@ -1469,11 +1486,12 @@ static int __mark_caps_flushing(struct inode *inode, { struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_cap_flush *cf; + struct ceph_cap_flush *cf = NULL; int flushing; BUG_ON(ci->i_dirty_caps == 0); BUG_ON(list_empty(&ci->i_dirty_item)); + BUG_ON(!ci->i_prealloc_cap_flush); flushing = ci->i_dirty_caps; dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n", @@ -1484,7 +1502,7 @@ static int __mark_caps_flushing(struct inode *inode, ci->i_dirty_caps = 0; dout(" inode %p now !dirty\n", inode); - cf = kmalloc(sizeof(*cf), GFP_ATOMIC); + swap(cf, ci->i_prealloc_cap_flush); cf->caps = flushing; cf->kick = false; @@ -3075,7 +3093,7 @@ out: cf = list_first_entry(&to_remove, struct ceph_cap_flush, list); list_del(&cf->list); - kfree(cf); + ceph_free_cap_flush(cf); } if (drop) iput(inode); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0a76a370d798..8a4eb4d21d3c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -939,6 +939,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->client->osdc; + struct ceph_cap_flush *prealloc_cf; ssize_t count, written = 0; int err, want, got; loff_t pos; @@ -946,6 +947,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return -ENOMEM; + mutex_lock(&inode->i_mutex); /* We can write back this queue in page reclaim */ @@ -1050,7 +1055,8 @@ retry_snap: int dirty; spin_lock(&ci->i_ceph_lock); ci->i_inline_version = CEPH_INLINE_NONE; - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, + &prealloc_cf); spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); @@ -1074,6 +1080,7 @@ retry_snap: out: mutex_unlock(&inode->i_mutex); out_unlocked: + ceph_free_cap_flush(prealloc_cf); current->backing_dev_info = NULL; return written ? written : err; } @@ -1270,6 +1277,7 @@ static long ceph_fallocate(struct file *file, int mode, struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->client->osdc; + struct ceph_cap_flush *prealloc_cf; int want, got = 0; int dirty; int ret = 0; @@ -1282,6 +1290,10 @@ static long ceph_fallocate(struct file *file, int mode, if (!S_ISREG(inode->i_mode)) return -EOPNOTSUPP; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return -ENOMEM; + mutex_lock(&inode->i_mutex); if (ceph_snap(inode) != CEPH_NOSNAP) { @@ -1328,7 +1340,8 @@ static long ceph_fallocate(struct file *file, int mode, if (!ret) { spin_lock(&ci->i_ceph_lock); ci->i_inline_version = CEPH_INLINE_NONE; - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, + &prealloc_cf); spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); @@ -1337,6 +1350,7 @@ static long ceph_fallocate(struct file *file, int mode, ceph_put_cap_refs(ci, got); unlock: mutex_unlock(&inode->i_mutex); + ceph_free_cap_flush(prealloc_cf); return ret; } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 3326302f5884..e86d1a4efc46 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -416,6 +416,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_flushing_caps = 0; INIT_LIST_HEAD(&ci->i_dirty_item); INIT_LIST_HEAD(&ci->i_flushing_item); + ci->i_prealloc_cap_flush = NULL; ci->i_cap_flush_tree = RB_ROOT; init_waitqueue_head(&ci->i_cap_wq); ci->i_hold_caps_min = 0; @@ -1720,6 +1721,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) const unsigned int ia_valid = attr->ia_valid; struct ceph_mds_request *req; struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; + struct ceph_cap_flush *prealloc_cf; int issued; int release = 0, dirtied = 0; int mask = 0; @@ -1734,10 +1736,16 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) if (err != 0) return err; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return -ENOMEM; + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR, USE_AUTH_MDS); - if (IS_ERR(req)) + if (IS_ERR(req)) { + ceph_free_cap_flush(prealloc_cf); return PTR_ERR(req); + } spin_lock(&ci->i_ceph_lock); issued = __ceph_caps_issued(ci, NULL); @@ -1895,7 +1903,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) dout("setattr %p ATTR_FILE ... hrm!\n", inode); if (dirtied) { - inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied); + inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied, + &prealloc_cf); inode->i_ctime = CURRENT_TIME; } @@ -1927,9 +1936,11 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) ceph_mdsc_put_request(req); if (mask & CEPH_SETATTR_SIZE) __ceph_do_pending_vmtruncate(inode); + ceph_free_cap_flush(prealloc_cf); return err; out_put: ceph_mdsc_put_request(req); + ceph_free_cap_flush(prealloc_cf); return err; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 89e4305a94d4..8d73fe9d488b 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1189,6 +1189,10 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, } spin_unlock(&mdsc->cap_dirty_lock); + if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) { + list_add(&ci->i_prealloc_cap_flush->list, &to_remove); + ci->i_prealloc_cap_flush = NULL; + } } spin_unlock(&ci->i_ceph_lock); while (!list_empty(&to_remove)) { @@ -1196,7 +1200,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, cf = list_first_entry(&to_remove, struct ceph_cap_flush, list); list_del(&cf->list); - kfree(cf); + ceph_free_cap_flush(cf); } while (drop--) iput(inode); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index edeb83c43112..d1c833c321b9 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -622,6 +622,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) */ struct kmem_cache *ceph_inode_cachep; struct kmem_cache *ceph_cap_cachep; +struct kmem_cache *ceph_cap_flush_cachep; struct kmem_cache *ceph_dentry_cachep; struct kmem_cache *ceph_file_cachep; @@ -647,6 +648,10 @@ static int __init init_caches(void) SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); if (ceph_cap_cachep == NULL) goto bad_cap; + ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); + if (ceph_cap_flush_cachep == NULL) + goto bad_cap_flush; ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); @@ -665,6 +670,8 @@ static int __init init_caches(void) bad_file: kmem_cache_destroy(ceph_dentry_cachep); bad_dentry: + kmem_cache_destroy(ceph_cap_flush_cachep); +bad_cap_flush: kmem_cache_destroy(ceph_cap_cachep); bad_cap: kmem_cache_destroy(ceph_inode_cachep); @@ -681,6 +688,7 @@ static void destroy_caches(void) kmem_cache_destroy(ceph_inode_cachep); kmem_cache_destroy(ceph_cap_cachep); + kmem_cache_destroy(ceph_cap_flush_cachep); kmem_cache_destroy(ceph_dentry_cachep); kmem_cache_destroy(ceph_file_cachep); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index e7f13f742357..4415e977d72b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -309,6 +309,7 @@ struct ceph_inode_info { /* we need to track cap writeback on a per-cap-bit basis, to allow * overlapping, pipelined cap flushes to the mds. we can probably * reduce the tid to 8 bits if we're concerned about inode size. */ + struct ceph_cap_flush *i_prealloc_cap_flush; struct rb_root i_cap_flush_tree; wait_queue_head_t i_cap_wq; /* threads waiting on a capability */ unsigned long i_hold_caps_min; /* jiffies */ @@ -578,7 +579,10 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci) { return ci->i_dirty_caps | ci->i_flushing_caps; } -extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask); +extern struct ceph_cap_flush *ceph_alloc_cap_flush(void); +extern void ceph_free_cap_flush(struct ceph_cap_flush *cf); +extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, + struct ceph_cap_flush **pcf); extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci, struct ceph_cap *ocap, int mask); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index c6f7d9b82085..819163d8313b 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -912,6 +912,7 @@ int __ceph_setxattr(struct dentry *dentry, const char *name, struct ceph_vxattr *vxattr; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; + struct ceph_cap_flush *prealloc_cf = NULL; int issued; int err; int dirty = 0; @@ -950,6 +951,10 @@ int __ceph_setxattr(struct dentry *dentry, const char *name, if (!xattr) goto out; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + goto out; + spin_lock(&ci->i_ceph_lock); retry: issued = __ceph_caps_issued(ci, NULL); @@ -991,7 +996,8 @@ retry: flags, value ? 1 : -1, &xattr); if (!err) { - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, + &prealloc_cf); ci->i_xattrs.dirty = true; inode->i_ctime = CURRENT_TIME; } @@ -1001,6 +1007,7 @@ retry: up_read(&mdsc->snap_rwsem); if (dirty) __mark_inode_dirty(inode, dirty); + ceph_free_cap_flush(prealloc_cf); return err; do_sync: @@ -1010,6 +1017,7 @@ do_sync_unlocked: up_read(&mdsc->snap_rwsem); err = ceph_sync_setxattr(dentry, name, value, size, flags); out: + ceph_free_cap_flush(prealloc_cf); kfree(newname); kfree(newval); kfree(xattr); @@ -1062,6 +1070,7 @@ int __ceph_removexattr(struct dentry *dentry, const char *name) struct ceph_vxattr *vxattr; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; + struct ceph_cap_flush *prealloc_cf = NULL; int issued; int err; int required_blob_size; @@ -1079,6 +1088,10 @@ int __ceph_removexattr(struct dentry *dentry, const char *name) if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) goto do_sync_unlocked; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return -ENOMEM; + err = -ENOMEM; spin_lock(&ci->i_ceph_lock); retry: @@ -1120,7 +1133,8 @@ retry: err = __remove_xattr_by_name(ceph_inode(inode), name); - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, + &prealloc_cf); ci->i_xattrs.dirty = true; inode->i_ctime = CURRENT_TIME; spin_unlock(&ci->i_ceph_lock); @@ -1128,12 +1142,14 @@ retry: up_read(&mdsc->snap_rwsem); if (dirty) __mark_inode_dirty(inode, dirty); + ceph_free_cap_flush(prealloc_cf); return err; do_sync: spin_unlock(&ci->i_ceph_lock); do_sync_unlocked: if (lock_snap_rwsem) up_read(&mdsc->snap_rwsem); + ceph_free_cap_flush(prealloc_cf); err = ceph_send_removexattr(dentry, name); return err; } diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index d73a569f9bf5..9ebee53d3bf5 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -174,6 +174,7 @@ static inline int calc_pages_for(u64 off, u64 len) extern struct kmem_cache *ceph_inode_cachep; extern struct kmem_cache *ceph_cap_cachep; +extern struct kmem_cache *ceph_cap_flush_cachep; extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; From 687265e5a885d6308f5d73e738efe3c2674fa218 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sat, 13 Jun 2015 17:27:05 +0800 Subject: [PATCH 287/839] ceph: switch some GFP_NOFS memory allocation to GFP_KERNEL GFP_NOFS memory allocation is required for page writeback path. But there is no need to use GFP_NOFS in syscall path and readpage path Signed-off-by: Yan, Zheng --- fs/ceph/acl.c | 4 ++-- fs/ceph/addr.c | 4 ++-- fs/ceph/dir.c | 10 +++++----- fs/ceph/file.c | 8 ++++---- fs/ceph/mds_client.c | 3 ++- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 64fa248343f6..8f84646f10e9 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -187,10 +187,10 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode, val_size2 = posix_acl_xattr_size(default_acl->a_count); err = -ENOMEM; - tmp_buf = kmalloc(max(val_size1, val_size2), GFP_NOFS); + tmp_buf = kmalloc(max(val_size1, val_size2), GFP_KERNEL); if (!tmp_buf) goto out_err; - pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_NOFS); + pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_KERNEL); if (!pagelist) goto out_err; ceph_pagelist_init(pagelist); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 7edf3c49e661..97102038fe03 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -350,7 +350,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) /* build page vector */ nr_pages = calc_pages_for(0, len); - pages = kmalloc(sizeof(*pages) * nr_pages, GFP_NOFS); + pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL); ret = -ENOMEM; if (!pages) goto out; @@ -362,7 +362,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) dout("start_read %p adding %p idx %lu\n", inode, page, page->index); if (add_to_page_cache_lru(page, &inode->i_data, page->index, - GFP_NOFS)) { + GFP_KERNEL)) { ceph_fscache_uncache_page(inode, page); page_cache_release(page); dout("start_read %p add_to_page_cache failed %p\n", diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 424e23138c59..b99f2ff8189d 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -38,7 +38,7 @@ int ceph_init_dentry(struct dentry *dentry) if (dentry->d_fsdata) return 0; - di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); + di = kmem_cache_alloc(ceph_dentry_cachep, GFP_KERNEL | __GFP_ZERO); if (!di) return -ENOMEM; /* oh well */ @@ -231,7 +231,7 @@ static int note_last_dentry(struct ceph_file_info *fi, const char *name, int len) { kfree(fi->last_name); - fi->last_name = kmalloc(len+1, GFP_NOFS); + fi->last_name = kmalloc(len+1, GFP_KERNEL); if (!fi->last_name) return -ENOMEM; memcpy(fi->last_name, name, len); @@ -342,7 +342,7 @@ more: req->r_direct_hash = ceph_frag_value(frag); req->r_direct_is_hash = true; if (fi->last_name) { - req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); + req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL); if (!req->r_path2) { ceph_mdsc_put_request(req); return -ENOMEM; @@ -764,7 +764,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, err = PTR_ERR(req); goto out; } - req->r_path2 = kstrdup(dest, GFP_NOFS); + req->r_path2 = kstrdup(dest, GFP_KERNEL); if (!req->r_path2) { err = -ENOMEM; ceph_mdsc_put_request(req); @@ -1189,7 +1189,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, return -EISDIR; if (!cf->dir_info) { - cf->dir_info = kmalloc(bufsize, GFP_NOFS); + cf->dir_info = kmalloc(bufsize, GFP_KERNEL); if (!cf->dir_info) return -ENOMEM; cf->dir_info_len = diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8a4eb4d21d3c..424b5b540207 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -89,7 +89,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) case S_IFDIR: dout("init_file %p %p 0%o (regular)\n", inode, file, inode->i_mode); - cf = kmem_cache_alloc(ceph_file_cachep, GFP_NOFS | __GFP_ZERO); + cf = kmem_cache_alloc(ceph_file_cachep, GFP_KERNEL | __GFP_ZERO); if (cf == NULL) { ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ return -ENOMEM; @@ -483,7 +483,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, } } else { num_pages = calc_pages_for(off, len); - pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); + pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); if (IS_ERR(pages)) return PTR_ERR(pages); ret = striped_read(inode, off, len, pages, @@ -734,7 +734,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, */ num_pages = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); + pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto out; @@ -858,7 +858,7 @@ again: struct page *page = NULL; loff_t i_size; if (retry_op == READ_INLINE) { - page = __page_cache_alloc(GFP_NOFS); + page = __page_cache_alloc(GFP_KERNEL); if (!page) return -ENOMEM; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 8d73fe9d488b..6aa07af67603 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1668,7 +1668,8 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, order = get_order(size * num_entries); while (order >= 0) { - rinfo->dir_in = (void*)__get_free_pages(GFP_NOFS | __GFP_NOWARN, + rinfo->dir_in = (void*)__get_free_pages(GFP_KERNEL | + __GFP_NOWARN, order); if (rinfo->dir_in) break; From 8f529795bace5d6263b134f4ff3adccfc0a0cce6 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 12 Jun 2015 11:20:03 +0300 Subject: [PATCH 288/839] crush: fix crash from invalid 'take' argument Verify that the 'take' argument is a valid device or bucket. Otherwise ignore it (do not add the value to the working vector). Reflects ceph.git commit 9324d0a1af61e1c234cc48e2175b4e6320fff8f4. Signed-off-by: Ilya Dryomov --- net/ceph/crush/mapper.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 5b47736d27d9..7568cb59b9e5 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -790,8 +790,15 @@ int crush_do_rule(const struct crush_map *map, switch (curstep->op) { case CRUSH_RULE_TAKE: - w[0] = curstep->arg1; - wsize = 1; + if ((curstep->arg1 >= 0 && + curstep->arg1 < map->max_devices) || + (-1-curstep->arg1 < map->max_buckets && + map->buckets[-1-curstep->arg1])) { + w[0] = curstep->arg1; + wsize = 1; + } else { + dprintk(" bad take value %d\n", curstep->arg1); + } break; case CRUSH_RULE_SET_CHOOSE_TRIES: From b459be739f97e2062b2ba77cfe8ea198dbd58904 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 12 Jun 2015 13:21:07 +0300 Subject: [PATCH 289/839] crush: sync up with userspace .. up to ceph.git commit 1db1abc8328d ("crush: eliminate ad hoc diff between kernel and userspace"). This fixes a bunch of recently pulled coding style issues and makes includes a bit cleaner. A patch "crush:Make the function crush_ln static" from Nicholas Krause is folded in as crush_ln() has been made static in userspace as well. Signed-off-by: Ilya Dryomov --- include/linux/crush/crush.h | 40 +++++++++- include/linux/crush/hash.h | 6 ++ include/linux/crush/mapper.h | 2 +- net/ceph/crush/crush.c | 13 ++- net/ceph/crush/crush_ln_table.h | 32 ++++---- net/ceph/crush/hash.c | 8 +- net/ceph/crush/mapper.c | 137 +++++++++++++++++++++----------- 7 files changed, 160 insertions(+), 78 deletions(-) diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 48a1a7d100f1..48b49305716b 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -1,7 +1,11 @@ #ifndef CEPH_CRUSH_CRUSH_H #define CEPH_CRUSH_CRUSH_H -#include +#ifdef __KERNEL__ +# include +#else +# include "crush_compat.h" +#endif /* * CRUSH is a pseudo-random data distribution algorithm that @@ -20,7 +24,11 @@ #define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */ #define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */ +#define CRUSH_MAX_RULESET (1<<8) /* max crush ruleset number */ +#define CRUSH_MAX_RULES CRUSH_MAX_RULESET /* should be the same as max rulesets */ +#define CRUSH_MAX_DEVICE_WEIGHT (100u * 0x10000u) +#define CRUSH_MAX_BUCKET_WEIGHT (65535u * 0x10000u) #define CRUSH_ITEM_UNDEF 0x7ffffffe /* undefined result (internal use only) */ #define CRUSH_ITEM_NONE 0x7fffffff /* no result */ @@ -108,6 +116,15 @@ enum { }; extern const char *crush_bucket_alg_name(int alg); +/* + * although tree was a legacy algorithm, it has been buggy, so + * exclude it. + */ +#define CRUSH_LEGACY_ALLOWED_BUCKET_ALGS ( \ + (1 << CRUSH_BUCKET_UNIFORM) | \ + (1 << CRUSH_BUCKET_LIST) | \ + (1 << CRUSH_BUCKET_STRAW)) + struct crush_bucket { __s32 id; /* this'll be negative */ __u16 type; /* non-zero; type=0 is reserved for devices */ @@ -174,7 +191,7 @@ struct crush_map { /* choose local attempts using a fallback permutation before * re-descent */ __u32 choose_local_fallback_tries; - /* choose attempts before giving up */ + /* choose attempts before giving up */ __u32 choose_total_tries; /* attempt chooseleaf inner descent once for firstn mode; on * reject retry outer descent. Note that this does *not* @@ -187,6 +204,25 @@ struct crush_map { * that want to limit reshuffling, a value of 3 or 4 will make the * mappings line up a bit better with previous mappings. */ __u8 chooseleaf_vary_r; + +#ifndef __KERNEL__ + /* + * version 0 (original) of straw_calc has various flaws. version 1 + * fixes a few of them. + */ + __u8 straw_calc_version; + + /* + * allowed bucket algs is a bitmask, here the bit positions + * are CRUSH_BUCKET_*. note that these are *bits* and + * CRUSH_BUCKET_* values are not, so we need to or together (1 + * << CRUSH_BUCKET_WHATEVER). The 0th bit is not used to + * minimize confusion (bucket type values start at 1). + */ + __u32 allowed_bucket_algs; + + __u32 *choose_tries; +#endif }; diff --git a/include/linux/crush/hash.h b/include/linux/crush/hash.h index 91e884230d5d..d1d90258242e 100644 --- a/include/linux/crush/hash.h +++ b/include/linux/crush/hash.h @@ -1,6 +1,12 @@ #ifndef CEPH_CRUSH_HASH_H #define CEPH_CRUSH_HASH_H +#ifdef __KERNEL__ +# include +#else +# include "crush_compat.h" +#endif + #define CRUSH_HASH_RJENKINS1 0 #define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1 diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index eab367446eea..5dfd5b1125d2 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -8,7 +8,7 @@ * LGPL2 */ -#include +#include "crush.h" extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size); extern int crush_do_rule(const struct crush_map *map, diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c index 9d84ce4ea0df..80d7c3a97cb8 100644 --- a/net/ceph/crush/crush.c +++ b/net/ceph/crush/crush.c @@ -1,15 +1,11 @@ - #ifdef __KERNEL__ # include +# include #else -# include -# include -# define kfree(x) do { if (x) free(x); } while (0) -# define BUG_ON(x) assert(!(x)) +# include "crush_compat.h" +# include "crush.h" #endif -#include - const char *crush_bucket_alg_name(int alg) { switch (alg) { @@ -134,6 +130,9 @@ void crush_destroy(struct crush_map *map) kfree(map->rules); } +#ifndef __KERNEL__ + kfree(map->choose_tries); +#endif kfree(map); } diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h index 6192c7fc958c..aae534c901a4 100644 --- a/net/ceph/crush/crush_ln_table.h +++ b/net/ceph/crush/crush_ln_table.h @@ -10,20 +10,20 @@ * */ -#if defined(__linux__) -#include -#elif defined(__FreeBSD__) -#include -#endif - #ifndef CEPH_CRUSH_LN_H #define CEPH_CRUSH_LN_H +#ifdef __KERNEL__ +# include +#else +# include "crush_compat.h" +#endif -// RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0) -// RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0) - -static int64_t __RH_LH_tbl[128*2+2] = { +/* + * RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0) + * RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0) + */ +static __s64 __RH_LH_tbl[128*2+2] = { 0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll, 0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all, 0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll, @@ -89,11 +89,12 @@ static int64_t __RH_LH_tbl[128*2+2] = { 0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll, 0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll, 0x0000800000000000ll, 0x0000ffff00000000ll, - }; +}; - - // LL_tbl[k] = 2^48*log2(1.0+k/2^15); -static int64_t __LL_tbl[256] = { +/* + * LL_tbl[k] = 2^48*log2(1.0+k/2^15) + */ +static __s64 __LL_tbl[256] = { 0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull, 0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull, 0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull, @@ -160,7 +161,4 @@ static int64_t __LL_tbl[256] = { 0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull, }; - - - #endif diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c index 5bb63e37a8a1..ed123af49eba 100644 --- a/net/ceph/crush/hash.c +++ b/net/ceph/crush/hash.c @@ -1,6 +1,8 @@ - -#include -#include +#ifdef __KERNEL__ +# include +#else +# include "hash.h" +#endif /* * Robert Jenkins' function for mixing 32-bit values diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 7568cb59b9e5..393bfb22d5bb 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -1,27 +1,31 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2015 Intel Corporation All Rights Reserved + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ #ifdef __KERNEL__ # include # include # include # include -# ifndef dprintk -# define dprintk(args...) -# endif +# include +# include #else -# include -# include -# include -# include -# define BUG_ON(x) assert(!(x)) -# define dprintk(args...) /* printf(args) */ -# define kmalloc(x, f) malloc(x) -# define kfree(x) free(x) +# include "crush_compat.h" +# include "crush.h" +# include "hash.h" #endif - -#include -#include #include "crush_ln_table.h" +#define dprintk(args...) /* printf(args) */ + /* * Implement the core CRUSH mapping algorithm. */ @@ -139,7 +143,7 @@ static int bucket_list_choose(struct crush_bucket_list *bucket, int i; for (i = bucket->h.size-1; i >= 0; i--) { - __u64 w = crush_hash32_4(bucket->h.hash,x, bucket->h.items[i], + __u64 w = crush_hash32_4(bucket->h.hash, x, bucket->h.items[i], r, bucket->h.id); w &= 0xffff; dprintk("list_choose i=%d x=%d r=%d item %d weight %x " @@ -238,43 +242,46 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket, return bucket->h.items[high]; } -// compute 2^44*log2(input+1) -uint64_t crush_ln(unsigned xin) +/* compute 2^44*log2(input+1) */ +static __u64 crush_ln(unsigned int xin) { - unsigned x=xin, x1; - int iexpon, index1, index2; - uint64_t RH, LH, LL, xl64, result; + unsigned int x = xin, x1; + int iexpon, index1, index2; + __u64 RH, LH, LL, xl64, result; - x++; + x++; - // normalize input - iexpon = 15; - while(!(x&0x18000)) { x<<=1; iexpon--; } + /* normalize input */ + iexpon = 15; + while (!(x & 0x18000)) { + x <<= 1; + iexpon--; + } - index1 = (x>>8)<<1; - // RH ~ 2^56/index1 - RH = __RH_LH_tbl[index1 - 256]; - // LH ~ 2^48 * log2(index1/256) - LH = __RH_LH_tbl[index1 + 1 - 256]; + index1 = (x >> 8) << 1; + /* RH ~ 2^56/index1 */ + RH = __RH_LH_tbl[index1 - 256]; + /* LH ~ 2^48 * log2(index1/256) */ + LH = __RH_LH_tbl[index1 + 1 - 256]; - // RH*x ~ 2^48 * (2^15 + xf), xf<2^8 - xl64 = (int64_t)x * RH; - xl64 >>= 48; - x1 = xl64; + /* RH*x ~ 2^48 * (2^15 + xf), xf<2^8 */ + xl64 = (__s64)x * RH; + xl64 >>= 48; + x1 = xl64; - result = iexpon; - result <<= (12 + 32); + result = iexpon; + result <<= (12 + 32); - index2 = x1 & 0xff; - // LL ~ 2^48*log2(1.0+index2/2^15) - LL = __LL_tbl[index2]; + index2 = x1 & 0xff; + /* LL ~ 2^48*log2(1.0+index2/2^15) */ + LL = __LL_tbl[index2]; - LH = LH + LL; + LH = LH + LL; - LH >>= (48-12 - 32); - result += LH; + LH >>= (48 - 12 - 32); + result += LH; - return result; + return result; } @@ -290,9 +297,9 @@ uint64_t crush_ln(unsigned xin) static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket, int x, int r) { - unsigned i, high = 0; - unsigned u; - unsigned w; + unsigned int i, high = 0; + unsigned int u; + unsigned int w; __s64 ln, draw, high_draw = 0; for (i = 0; i < bucket->h.size; i++) { @@ -567,6 +574,10 @@ reject: out[outpos] = item; outpos++; count--; +#ifndef __KERNEL__ + if (map->choose_tries && ftotal <= map->choose_total_tries) + map->choose_tries[ftotal]++; +#endif } dprintk("CHOOSE returns %d\n", outpos); @@ -610,6 +621,20 @@ static void crush_choose_indep(const struct crush_map *map, } for (ftotal = 0; left > 0 && ftotal < tries; ftotal++) { +#ifdef DEBUG_INDEP + if (out2 && ftotal) { + dprintk("%u %d a: ", ftotal, left); + for (rep = outpos; rep < endpos; rep++) { + dprintk(" %d", out[rep]); + } + dprintk("\n"); + dprintk("%u %d b: ", ftotal, left); + for (rep = outpos; rep < endpos; rep++) { + dprintk(" %d", out2[rep]); + } + dprintk("\n"); + } +#endif for (rep = outpos; rep < endpos; rep++) { if (out[rep] != CRUSH_ITEM_UNDEF) continue; @@ -726,6 +751,24 @@ static void crush_choose_indep(const struct crush_map *map, out2[rep] = CRUSH_ITEM_NONE; } } +#ifndef __KERNEL__ + if (map->choose_tries && ftotal <= map->choose_total_tries) + map->choose_tries[ftotal]++; +#endif +#ifdef DEBUG_INDEP + if (out2) { + dprintk("%u %d a: ", ftotal, left); + for (rep = outpos; rep < endpos; rep++) { + dprintk(" %d", out[rep]); + } + dprintk("\n"); + dprintk("%u %d b: ", ftotal, left); + for (rep = outpos; rep < endpos; rep++) { + dprintk(" %d", out2[rep]); + } + dprintk("\n"); + } +#endif } /** @@ -884,7 +927,7 @@ int crush_do_rule(const struct crush_map *map, 0); } else { out_size = ((numrep < (result_max-osize)) ? - numrep : (result_max-osize)); + numrep : (result_max-osize)); crush_choose_indep( map, map->buckets[-1-w[i]], @@ -930,5 +973,3 @@ int crush_do_rule(const struct crush_map *map, } return result_len; } - - From fdd4e15838e59c394a1ec4963b57c22c12608685 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 16 Jun 2015 20:48:56 +0800 Subject: [PATCH 290/839] ceph: rework dcache readdir Previously our dcache readdir code relies on that child dentries in directory dentry's d_subdir list are sorted by dentry's offset in descending order. When adding dentries to the dcache, if a dentry already exists, our readdir code moves it to head of directory dentry's d_subdir list. This design relies on dcache internals. Al Viro suggests using ncpfs's approach: keeping array of pointers to dentries in page cache of directory inode. the validity of those pointers are presented by directory inode's complete and ordered flags. When a dentry gets pruned, we clear directory inode's complete flag in the d_prune() callback. Before moving a dentry to other directory, we clear the ordered flag for both old and new directory. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 14 +- fs/ceph/dir.c | 319 ++++++++++++++++++++++--------------------- fs/ceph/file.c | 2 +- fs/ceph/inode.c | 118 ++++++++++++---- fs/ceph/mds_client.h | 3 + fs/ceph/super.h | 60 ++++---- 6 files changed, 298 insertions(+), 218 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index dd7b20adf1d4..dc10c9dd36c1 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -833,7 +833,9 @@ int __ceph_caps_used(struct ceph_inode_info *ci) used |= CEPH_CAP_PIN; if (ci->i_rd_ref) used |= CEPH_CAP_FILE_RD; - if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages) + if (ci->i_rdcache_ref || + (!S_ISDIR(ci->vfs_inode.i_mode) && /* ignore readdir cache */ + ci->vfs_inode.i_data.nrpages)) used |= CEPH_CAP_FILE_CACHE; if (ci->i_wr_ref) used |= CEPH_CAP_FILE_WR; @@ -1651,9 +1653,10 @@ retry_locked: * If we fail, it's because pages are locked.... try again later. */ if ((!is_delayed || mdsc->stopping) && - ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ - inode->i_data.nrpages && /* have cached pages */ - (file_wanted == 0 || /* no open files */ + !S_ISDIR(inode->i_mode) && /* ignore readdir cache */ + ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ + inode->i_data.nrpages && /* have cached pages */ + (file_wanted == 0 || /* no open files */ (revoking & (CEPH_CAP_FILE_CACHE| CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ !tried_invalidate) { @@ -2805,7 +2808,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, * try to invalidate (once). (If there are dirty buffers, we * will invalidate _after_ writeback.) */ - if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && + if (!S_ISDIR(inode->i_mode) && /* don't invalidate readdir cache */ + ((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && !ci->i_wrbuffer_ref) { if (try_nonblocking_invalidate(inode)) { diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index b99f2ff8189d..9314b4ea2375 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -106,6 +106,27 @@ static int fpos_cmp(loff_t l, loff_t r) return (int)(fpos_off(l) - fpos_off(r)); } +/* + * make note of the last dentry we read, so we can + * continue at the same lexicographical point, + * regardless of what dir changes take place on the + * server. + */ +static int note_last_dentry(struct ceph_file_info *fi, const char *name, + int len, unsigned next_offset) +{ + char *buf = kmalloc(len+1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + kfree(fi->last_name); + fi->last_name = buf; + memcpy(fi->last_name, name, len); + fi->last_name[len] = 0; + fi->next_offset = next_offset; + dout("note_last_dentry '%s'\n", fi->last_name); + return 0; +} + /* * When possible, we try to satisfy a readdir by peeking at the * dcache. We make this work by carefully ordering dentries on @@ -123,123 +144,113 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, struct ceph_file_info *fi = file->private_data; struct dentry *parent = file->f_path.dentry; struct inode *dir = d_inode(parent); - struct list_head *p; - struct dentry *dentry, *last; + struct dentry *dentry, *last = NULL; struct ceph_dentry_info *di; + unsigned nsize = PAGE_CACHE_SIZE / sizeof(struct dentry *); int err = 0; + loff_t ptr_pos = 0; + struct ceph_readdir_cache_control cache_ctl = {}; - /* claim ref on last dentry we returned */ - last = fi->dentry; - fi->dentry = NULL; + dout("__dcache_readdir %p v%u at %llu\n", dir, shared_gen, ctx->pos); - dout("__dcache_readdir %p v%u at %llu (last %p)\n", - dir, shared_gen, ctx->pos, last); - - spin_lock(&parent->d_lock); - - /* start at beginning? */ - if (ctx->pos == 2 || last == NULL || - fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) { - if (list_empty(&parent->d_subdirs)) - goto out_unlock; - p = parent->d_subdirs.prev; - dout(" initial p %p/%p\n", p->prev, p->next); - } else { - p = last->d_child.prev; + /* we can calculate cache index for the first dirfrag */ + if (ceph_frag_is_leftmost(fpos_frag(ctx->pos))) { + cache_ctl.index = fpos_off(ctx->pos) - 2; + BUG_ON(cache_ctl.index < 0); + ptr_pos = cache_ctl.index * sizeof(struct dentry *); } -more: - dentry = list_entry(p, struct dentry, d_child); - di = ceph_dentry(dentry); - while (1) { - dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, - d_unhashed(dentry) ? "!hashed" : "hashed", - parent->d_subdirs.prev, parent->d_subdirs.next); - if (p == &parent->d_subdirs) { + while (true) { + pgoff_t pgoff; + bool emit_dentry; + + if (ptr_pos >= i_size_read(dir)) { fi->flags |= CEPH_F_ATEND; - goto out_unlock; + err = 0; + break; } - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + + err = -EAGAIN; + pgoff = ptr_pos >> PAGE_CACHE_SHIFT; + if (!cache_ctl.page || pgoff != page_index(cache_ctl.page)) { + ceph_readdir_cache_release(&cache_ctl); + cache_ctl.page = find_lock_page(&dir->i_data, pgoff); + if (!cache_ctl.page) { + dout(" page %lu not found\n", pgoff); + break; + } + /* reading/filling the cache are serialized by + * i_mutex, no need to use page lock */ + unlock_page(cache_ctl.page); + cache_ctl.dentries = kmap(cache_ctl.page); + } + + rcu_read_lock(); + spin_lock(&parent->d_lock); + /* check i_size again here, because empty directory can be + * marked as complete while not holding the i_mutex. */ + if (ceph_dir_is_complete_ordered(dir) && + ptr_pos < i_size_read(dir)) + dentry = cache_ctl.dentries[cache_ctl.index % nsize]; + else + dentry = NULL; + spin_unlock(&parent->d_lock); + if (dentry && !lockref_get_not_dead(&dentry->d_lockref)) + dentry = NULL; + rcu_read_unlock(); + if (!dentry) + break; + + emit_dentry = false; + di = ceph_dentry(dentry); + spin_lock(&dentry->d_lock); if (di->lease_shared_gen == shared_gen && - !d_unhashed(dentry) && d_really_is_positive(dentry) && + d_really_is_positive(dentry) && ceph_snap(d_inode(dentry)) != CEPH_SNAPDIR && ceph_ino(d_inode(dentry)) != CEPH_INO_CEPH && - fpos_cmp(ctx->pos, di->offset) <= 0) - break; - dout(" skipping %p %pd at %llu (%llu)%s%s\n", dentry, - dentry, di->offset, - ctx->pos, d_unhashed(dentry) ? " unhashed" : "", - !d_inode(dentry) ? " null" : ""); - spin_unlock(&dentry->d_lock); - p = p->prev; - dentry = list_entry(p, struct dentry, d_child); - di = ceph_dentry(dentry); - } - - dget_dlock(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&parent->d_lock); - - /* make sure a dentry wasn't dropped while we didn't have parent lock */ - if (!ceph_dir_is_complete_ordered(dir)) { - dout(" lost dir complete on %p; falling back to mds\n", dir); - dput(dentry); - err = -EAGAIN; - goto out; - } - - dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos, - dentry, dentry, d_inode(dentry)); - if (!dir_emit(ctx, dentry->d_name.name, - dentry->d_name.len, - ceph_translate_ino(dentry->d_sb, d_inode(dentry)->i_ino), - d_inode(dentry)->i_mode >> 12)) { - if (last) { - /* remember our position */ - fi->dentry = last; - fi->next_offset = fpos_off(di->offset); + fpos_cmp(ctx->pos, di->offset) <= 0) { + emit_dentry = true; } - dput(dentry); - return 0; + spin_unlock(&dentry->d_lock); + + if (emit_dentry) { + dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos, + dentry, dentry, d_inode(dentry)); + ctx->pos = di->offset; + if (!dir_emit(ctx, dentry->d_name.name, + dentry->d_name.len, + ceph_translate_ino(dentry->d_sb, + d_inode(dentry)->i_ino), + d_inode(dentry)->i_mode >> 12)) { + dput(dentry); + err = 0; + break; + } + ctx->pos++; + + if (last) + dput(last); + last = dentry; + } else { + dput(dentry); + } + + cache_ctl.index++; + ptr_pos += sizeof(struct dentry *); } - - ctx->pos = di->offset + 1; - - if (last) - dput(last); - last = dentry; - - spin_lock(&parent->d_lock); - p = p->prev; /* advance to next dentry */ - goto more; - -out_unlock: - spin_unlock(&parent->d_lock); -out: - if (last) + ceph_readdir_cache_release(&cache_ctl); + if (last) { + int ret; + di = ceph_dentry(last); + ret = note_last_dentry(fi, last->d_name.name, last->d_name.len, + fpos_off(di->offset) + 1); + if (ret < 0) + err = ret; dput(last); + } return err; } -/* - * make note of the last dentry we read, so we can - * continue at the same lexicographical point, - * regardless of what dir changes take place on the - * server. - */ -static int note_last_dentry(struct ceph_file_info *fi, const char *name, - int len) -{ - kfree(fi->last_name); - fi->last_name = kmalloc(len+1, GFP_KERNEL); - if (!fi->last_name) - return -ENOMEM; - memcpy(fi->last_name, name, len); - fi->last_name[len] = 0; - dout("note_last_dentry '%s'\n", fi->last_name); - return 0; -} - static int ceph_readdir(struct file *file, struct dir_context *ctx) { struct ceph_file_info *fi = file->private_data; @@ -280,8 +291,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) /* can we use the dcache? */ spin_lock(&ci->i_ceph_lock); - if ((ctx->pos == 2 || fi->dentry) && - ceph_test_mount_opt(fsc, DCACHE) && + if (ceph_test_mount_opt(fsc, DCACHE) && !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && ceph_snap(inode) != CEPH_SNAPDIR && __ceph_dir_is_complete_ordered(ci) && @@ -296,24 +306,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) } else { spin_unlock(&ci->i_ceph_lock); } - if (fi->dentry) { - err = note_last_dentry(fi, fi->dentry->d_name.name, - fi->dentry->d_name.len); - if (err) - return err; - dput(fi->dentry); - fi->dentry = NULL; - } /* proceed with a normal readdir */ - - if (ctx->pos == 2) { - /* note dir version at start of readdir so we can tell - * if any dentries get dropped */ - fi->dir_release_count = atomic_read(&ci->i_release_count); - fi->dir_ordered_count = ci->i_ordered_count; - } - more: /* do we have the correct frag content buffered? */ if (fi->frag != frag || fi->last_readdir == NULL) { @@ -348,6 +342,9 @@ more: return -ENOMEM; } } + req->r_dir_release_cnt = fi->dir_release_count; + req->r_dir_ordered_cnt = fi->dir_ordered_count; + req->r_readdir_cache_idx = fi->readdir_cache_idx; req->r_readdir_offset = fi->next_offset; req->r_args.readdir.frag = cpu_to_le32(frag); @@ -364,26 +361,38 @@ more: (int)req->r_reply_info.dir_end, (int)req->r_reply_info.dir_complete); - if (!req->r_did_prepopulate) { - dout("readdir !did_prepopulate"); - /* preclude from marking dir complete */ - fi->dir_release_count--; - } /* note next offset and last dentry name */ rinfo = &req->r_reply_info; if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { frag = le32_to_cpu(rinfo->dir_dir->frag); - if (ceph_frag_is_leftmost(frag)) - fi->next_offset = 2; - else - fi->next_offset = 0; - off = fi->next_offset; + off = req->r_readdir_offset; + fi->next_offset = off; } + fi->frag = frag; fi->offset = fi->next_offset; fi->last_readdir = req; + if (req->r_did_prepopulate) { + fi->readdir_cache_idx = req->r_readdir_cache_idx; + if (fi->readdir_cache_idx < 0) { + /* preclude from marking dir ordered */ + fi->dir_ordered_count = 0; + } else if (ceph_frag_is_leftmost(frag) && off == 2) { + /* note dir version at start of readdir so + * we can tell if any dentries get dropped */ + fi->dir_release_count = req->r_dir_release_cnt; + fi->dir_ordered_count = req->r_dir_ordered_cnt; + } + } else { + dout("readdir !did_prepopulate"); + /* disable readdir cache */ + fi->readdir_cache_idx = -1; + /* preclude from marking dir complete */ + fi->dir_release_count = 0; + } + if (req->r_reply_info.dir_end) { kfree(fi->last_name); fi->last_name = NULL; @@ -394,10 +403,10 @@ more: } else { err = note_last_dentry(fi, rinfo->dir_dname[rinfo->dir_nr-1], - rinfo->dir_dname_len[rinfo->dir_nr-1]); + rinfo->dir_dname_len[rinfo->dir_nr-1], + fi->next_offset + rinfo->dir_nr); if (err) return err; - fi->next_offset += rinfo->dir_nr; } } @@ -453,16 +462,22 @@ more: * were released during the whole readdir, and we should have * the complete dir contents in our cache. */ - spin_lock(&ci->i_ceph_lock); - if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { - if (ci->i_ordered_count == fi->dir_ordered_count) + if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) { + spin_lock(&ci->i_ceph_lock); + if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) { dout(" marking %p complete and ordered\n", inode); - else + /* use i_size to track number of entries in + * readdir cache */ + BUG_ON(fi->readdir_cache_idx < 0); + i_size_write(inode, fi->readdir_cache_idx * + sizeof(struct dentry*)); + } else { dout(" marking %p complete\n", inode); + } __ceph_dir_set_complete(ci, fi->dir_release_count, fi->dir_ordered_count); + spin_unlock(&ci->i_ceph_lock); } - spin_unlock(&ci->i_ceph_lock); dout("readdir %p file %p done.\n", inode, file); return 0; @@ -476,14 +491,12 @@ static void reset_readdir(struct ceph_file_info *fi, unsigned frag) } kfree(fi->last_name); fi->last_name = NULL; + fi->dir_release_count = 0; + fi->readdir_cache_idx = -1; if (ceph_frag_is_leftmost(frag)) fi->next_offset = 2; /* compensate for . and .. */ else fi->next_offset = 0; - if (fi->dentry) { - dput(fi->dentry); - fi->dentry = NULL; - } fi->flags &= ~CEPH_F_ATEND; } @@ -497,13 +510,12 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) mutex_lock(&inode->i_mutex); retval = -EINVAL; switch (whence) { - case SEEK_END: - offset += inode->i_size + 2; /* FIXME */ - break; case SEEK_CUR: offset += file->f_pos; case SEEK_SET: break; + case SEEK_END: + retval = -EOPNOTSUPP; default: goto out; } @@ -516,20 +528,18 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) } retval = offset; - /* - * discard buffered readdir content on seekdir(0), or - * seek to new frag, or seek prior to current chunk. - */ if (offset == 0 || fpos_frag(offset) != fi->frag || fpos_off(offset) < fi->offset) { + /* discard buffered readdir content on seekdir(0), or + * seek to new frag, or seek prior to current chunk */ dout("dir_llseek dropping %p content\n", file); reset_readdir(fi, fpos_frag(offset)); + } else if (fpos_cmp(offset, old_offset) > 0) { + /* reset dir_release_count if we did a forward seek */ + fi->dir_release_count = 0; + fi->readdir_cache_idx = -1; } - - /* bump dir_release_count if we did a forward seek */ - if (fpos_cmp(offset, old_offset) > 0) - fi->dir_release_count--; } out: mutex_unlock(&inode->i_mutex); @@ -985,16 +995,15 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, * to do it here. */ + /* d_move screws up sibling dentries' offsets */ + ceph_dir_clear_complete(old_dir); + ceph_dir_clear_complete(new_dir); + d_move(old_dentry, new_dentry); /* ensure target dentry is invalidated, despite rehashing bug in vfs_rename_dir */ ceph_invalidate_dentry_lease(new_dentry); - - /* d_move screws up sibling dentries' offsets */ - ceph_dir_clear_complete(old_dir); - ceph_dir_clear_complete(new_dir); - } ceph_mdsc_put_request(req); return err; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 424b5b540207..faf92095e105 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -96,6 +96,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) } cf->fmode = fmode; cf->next_offset = 2; + cf->readdir_cache_idx = -1; file->private_data = cf; BUG_ON(inode->i_fop->release != ceph_release); break; @@ -324,7 +325,6 @@ int ceph_release(struct inode *inode, struct file *file) ceph_mdsc_put_request(cf->last_readdir); kfree(cf->last_name); kfree(cf->dir_info); - dput(cf->dentry); kmem_cache_free(ceph_file_cachep, cf); /* wake up anyone waiting for caps on this inode */ diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e86d1a4efc46..2a6d93befbae 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -390,9 +390,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_inline_version = 0; ci->i_time_warp_seq = 0; ci->i_ceph_flags = 0; - ci->i_ordered_count = 0; - atomic_set(&ci->i_release_count, 1); - atomic_set(&ci->i_complete_count, 0); + atomic64_set(&ci->i_ordered_count, 1); + atomic64_set(&ci->i_release_count, 1); + atomic64_set(&ci->i_complete_seq[0], 0); + atomic64_set(&ci->i_complete_seq[1], 0); ci->i_symlink = NULL; memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); @@ -860,9 +861,10 @@ static int fill_inode(struct inode *inode, struct page *locked_page, (issued & CEPH_CAP_FILE_EXCL) == 0 && !__ceph_dir_is_complete(ci)) { dout(" marking %p complete (empty)\n", inode); + i_size_write(inode, 0); __ceph_dir_set_complete(ci, - atomic_read(&ci->i_release_count), - ci->i_ordered_count); + atomic64_read(&ci->i_release_count), + atomic64_read(&ci->i_ordered_count)); } wake = true; @@ -1214,6 +1216,10 @@ retry_lookup: dout("fill_trace doing d_move %p -> %p\n", req->r_old_dentry, dn); + /* d_move screws up sibling dentries' offsets */ + ceph_dir_clear_ordered(dir); + ceph_dir_clear_ordered(olddir); + d_move(req->r_old_dentry, dn); dout(" src %p '%pd' dst %p '%pd'\n", req->r_old_dentry, @@ -1224,10 +1230,6 @@ retry_lookup: rehashing bug in vfs_rename_dir */ ceph_invalidate_dentry_lease(dn); - /* d_move screws up sibling dentries' offsets */ - ceph_dir_clear_ordered(dir); - ceph_dir_clear_ordered(olddir); - dout("dn %p gets new offset %lld\n", req->r_old_dentry, ceph_dentry(req->r_old_dentry)->offset); @@ -1335,6 +1337,49 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, return err; } +void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl) +{ + if (ctl->page) { + kunmap(ctl->page); + page_cache_release(ctl->page); + ctl->page = NULL; + } +} + +static int fill_readdir_cache(struct inode *dir, struct dentry *dn, + struct ceph_readdir_cache_control *ctl, + struct ceph_mds_request *req) +{ + struct ceph_inode_info *ci = ceph_inode(dir); + unsigned nsize = PAGE_CACHE_SIZE / sizeof(struct dentry*); + unsigned idx = ctl->index % nsize; + pgoff_t pgoff = ctl->index / nsize; + + if (!ctl->page || pgoff != page_index(ctl->page)) { + ceph_readdir_cache_release(ctl); + ctl->page = grab_cache_page(&dir->i_data, pgoff); + if (!ctl->page) { + ctl->index = -1; + return -ENOMEM; + } + /* reading/filling the cache are serialized by + * i_mutex, no need to use page lock */ + unlock_page(ctl->page); + ctl->dentries = kmap(ctl->page); + } + + if (req->r_dir_release_cnt == atomic64_read(&ci->i_release_count) && + req->r_dir_ordered_cnt == atomic64_read(&ci->i_ordered_count)) { + dout("readdir cache dn %p idx %d\n", dn, ctl->index); + ctl->dentries[idx] = dn; + ctl->index++; + } else { + dout("disable readdir cache\n"); + ctl->index = -1; + } + return 0; +} + int ceph_readdir_prepopulate(struct ceph_mds_request *req, struct ceph_mds_session *session) { @@ -1347,8 +1392,11 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, struct inode *snapdir = NULL; struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; struct ceph_dentry_info *di; - u64 r_readdir_offset = req->r_readdir_offset; u32 frag = le32_to_cpu(rhead->args.readdir.frag); + struct ceph_readdir_cache_control cache_ctl = {}; + + if (req->r_aborted) + return readdir_prepopulate_inodes_only(req, session); if (rinfo->dir_dir && le32_to_cpu(rinfo->dir_dir->frag) != frag) { @@ -1356,14 +1404,11 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, frag, le32_to_cpu(rinfo->dir_dir->frag)); frag = le32_to_cpu(rinfo->dir_dir->frag); if (ceph_frag_is_leftmost(frag)) - r_readdir_offset = 2; + req->r_readdir_offset = 2; else - r_readdir_offset = 0; + req->r_readdir_offset = 0; } - if (req->r_aborted) - return readdir_prepopulate_inodes_only(req, session); - if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) { snapdir = ceph_get_snapdir(d_inode(parent)); parent = d_find_alias(snapdir); @@ -1376,6 +1421,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir); } + if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2) { + /* note dir version at start of readdir so we can tell + * if any dentries get dropped */ + struct ceph_inode_info *ci = ceph_inode(d_inode(parent)); + req->r_dir_release_cnt = atomic64_read(&ci->i_release_count); + req->r_dir_ordered_cnt = atomic64_read(&ci->i_ordered_count); + req->r_readdir_cache_idx = 0; + } + + cache_ctl.index = req->r_readdir_cache_idx; + /* FIXME: release caps/leases if error occurs */ for (i = 0; i < rinfo->dir_nr; i++) { struct ceph_vino vino; @@ -1415,13 +1471,6 @@ retry_lookup: d_delete(dn); dput(dn); goto retry_lookup; - } else { - /* reorder parent's d_subdirs */ - spin_lock(&parent->d_lock); - spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&dn->d_child, &parent->d_subdirs); - spin_unlock(&dn->d_lock); - spin_unlock(&parent->d_lock); } /* inode */ @@ -1438,13 +1487,15 @@ retry_lookup: } } - if (fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session, - req->r_request_started, -1, - &req->r_caps_reservation) < 0) { + ret = fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session, + req->r_request_started, -1, + &req->r_caps_reservation); + if (ret < 0) { pr_err("fill_inode badness on %p\n", in); if (d_really_is_negative(dn)) iput(in); d_drop(dn); + err = ret; goto next_item; } @@ -1460,19 +1511,28 @@ retry_lookup: } di = dn->d_fsdata; - di->offset = ceph_make_fpos(frag, i + r_readdir_offset); + di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset); update_dentry_lease(dn, rinfo->dir_dlease[i], req->r_session, req->r_request_started); + + if (err == 0 && cache_ctl.index >= 0) { + ret = fill_readdir_cache(d_inode(parent), dn, + &cache_ctl, req); + if (ret < 0) + err = ret; + } next_item: if (dn) dput(dn); } - if (err == 0) - req->r_did_prepopulate = true; - out: + if (err == 0) { + req->r_did_prepopulate = true; + req->r_readdir_cache_idx = cache_ctl.index; + } + ceph_readdir_cache_release(&cache_ctl); if (snapdir) { iput(snapdir); dput(parent); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 470be4eb25f3..762757e6cebf 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -253,6 +253,9 @@ struct ceph_mds_request { bool r_got_unsafe, r_got_safe, r_got_result; bool r_did_prepopulate; + long long r_dir_release_cnt; + long long r_dir_ordered_cnt; + int r_readdir_cache_idx; u32 r_readdir_offset; struct ceph_cap_reservation r_caps_reservation; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 4415e977d72b..860cc016e70d 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -282,9 +282,9 @@ struct ceph_inode_info { u32 i_time_warp_seq; unsigned i_ceph_flags; - int i_ordered_count; - atomic_t i_release_count; - atomic_t i_complete_count; + atomic64_t i_release_count; + atomic64_t i_ordered_count; + atomic64_t i_complete_seq[2]; struct ceph_dir_layout i_dir_layout; struct ceph_file_layout i_layout; @@ -471,30 +471,36 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, - int release_count, int ordered_count) + long long release_count, + long long ordered_count) { - atomic_set(&ci->i_complete_count, release_count); - if (ci->i_ordered_count == ordered_count) - ci->i_ceph_flags |= CEPH_I_DIR_ORDERED; - else - ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED; + smp_mb__before_atomic(); + atomic64_set(&ci->i_complete_seq[0], release_count); + atomic64_set(&ci->i_complete_seq[1], ordered_count); } static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci) { - atomic_inc(&ci->i_release_count); + atomic64_inc(&ci->i_release_count); +} + +static inline void __ceph_dir_clear_ordered(struct ceph_inode_info *ci) +{ + atomic64_inc(&ci->i_ordered_count); } static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci) { - return atomic_read(&ci->i_complete_count) == - atomic_read(&ci->i_release_count); + return atomic64_read(&ci->i_complete_seq[0]) == + atomic64_read(&ci->i_release_count); } static inline bool __ceph_dir_is_complete_ordered(struct ceph_inode_info *ci) { - return __ceph_dir_is_complete(ci) && - (ci->i_ceph_flags & CEPH_I_DIR_ORDERED); + return atomic64_read(&ci->i_complete_seq[0]) == + atomic64_read(&ci->i_release_count) && + atomic64_read(&ci->i_complete_seq[1]) == + atomic64_read(&ci->i_ordered_count); } static inline void ceph_dir_clear_complete(struct inode *inode) @@ -504,20 +510,13 @@ static inline void ceph_dir_clear_complete(struct inode *inode) static inline void ceph_dir_clear_ordered(struct inode *inode) { - struct ceph_inode_info *ci = ceph_inode(inode); - spin_lock(&ci->i_ceph_lock); - ci->i_ordered_count++; - ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED; - spin_unlock(&ci->i_ceph_lock); + __ceph_dir_clear_ordered(ceph_inode(inode)); } static inline bool ceph_dir_is_complete_ordered(struct inode *inode) { - struct ceph_inode_info *ci = ceph_inode(inode); - bool ret; - spin_lock(&ci->i_ceph_lock); - ret = __ceph_dir_is_complete_ordered(ci); - spin_unlock(&ci->i_ceph_lock); + bool ret = __ceph_dir_is_complete_ordered(ceph_inode(inode)); + smp_rmb(); return ret; } @@ -636,16 +635,20 @@ struct ceph_file_info { unsigned offset; /* offset of last chunk, adjusted for . and .. */ unsigned next_offset; /* offset of next chunk (last_name's + 1) */ char *last_name; /* last entry in previous chunk */ - struct dentry *dentry; /* next dentry (for dcache readdir) */ - int dir_release_count; - int dir_ordered_count; + long long dir_release_count; + long long dir_ordered_count; + int readdir_cache_idx; /* used for -o dirstat read() on directory thing */ char *dir_info; int dir_info_len; }; - +struct ceph_readdir_cache_control { + struct page *page; + struct dentry **dentries; + int index; +}; /* * A "snap realm" describes a subset of the file hierarchy sharing @@ -944,6 +947,7 @@ extern void ceph_dentry_lru_del(struct dentry *dn); extern void ceph_invalidate_dentry_lease(struct dentry *dentry); extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn); extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry); +extern void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl); /* * our d_ops vary depending on whether the inode is live, From be081d9bf3e163a9ed1ca2f0f14f08424c7f9016 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 24 Jun 2015 13:14:18 -0700 Subject: [PATCH 291/839] ARM64: smp: Fix suspicious RCU usage with ipi tracepoints John Stultz reported an RCU splat on ARM with ipi trace events enabled. It looks like the same problem exists on ARM64. At this point in the IPI handling path we haven't called irq_enter() yet, so RCU doesn't know that we're about to exit idle and properly warns that we're using RCU from an idle CPU. Use trace_ipi_entry_rcuidle() instead of trace_ipi_entry() so that RCU is informed about our exit from idle. Cc: John Stultz Cc: Nicolas Pitre Acked-by: Steven Rostedt Reviewed-by: Paul E. McKenney Cc: # 3.17+ Fixes: 45ed695ac10a ("ARM64: add IPI tracepoints") Signed-off-by: Stephen Boyd Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 4b2121bd7f9c..a1883bfdd9d6 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -693,7 +693,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { - trace_ipi_entry(ipi_types[ipinr]); + trace_ipi_entry_rcuidle(ipi_types[ipinr]); __inc_irq_stat(cpu, ipi_irqs[ipinr]); } @@ -736,7 +736,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) } if ((unsigned)ipinr < NR_IPI) - trace_ipi_exit(ipi_types[ipinr]); + trace_ipi_exit_rcuidle(ipi_types[ipinr]); set_irq_regs(old_regs); } From 8eee539ddea09bccae2426f09b0ba6a18b72b691 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Thu, 25 Jun 2015 05:47:39 -0700 Subject: [PATCH 292/839] arm64: bpf: fix out-of-bounds read in bpf2a64_offset() Problems occur when bpf_to or bpf_from has value prog->len - 1 (e.g., "Very long jump backwards" in test_bpf where the last instruction is a jump): since ctx->offset has length prog->len, ctx->offset[bpf_to + 1] or ctx->offset[bpf_from + 1] will cause an out-of-bounds read, leading to a bogus jump offset and kernel panic. This patch moves updating ctx->offset to after calling build_insn(), and changes indexing to use bpf_to and bpf_from without + 1. Fixes: e54bcde3d69d ("arm64: eBPF JIT compiler") Cc: # 3.18+ Cc: Zi Shen Lim Cc: Will Deacon Acked-by: Alexei Starovoitov Signed-off-by: Xi Wang Signed-off-by: Catalin Marinas --- arch/arm64/net/bpf_jit_comp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index dc6a4842683a..c81ddd4ff7f7 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -113,9 +113,9 @@ static inline void emit_a64_mov_i(const int is64, const int reg, static inline int bpf2a64_offset(int bpf_to, int bpf_from, const struct jit_ctx *ctx) { - int to = ctx->offset[bpf_to + 1]; + int to = ctx->offset[bpf_to]; /* -1 to account for the Branch instruction */ - int from = ctx->offset[bpf_from + 1] - 1; + int from = ctx->offset[bpf_from] - 1; return to - from; } @@ -640,10 +640,11 @@ static int build_body(struct jit_ctx *ctx) const struct bpf_insn *insn = &prog->insnsi[i]; int ret; + ret = build_insn(insn, ctx); + if (ctx->image == NULL) ctx->offset[i] = ctx->idx; - ret = build_insn(insn, ctx); if (ret > 0) { i++; continue; From 2b42b09b88c831ba4da2d669581dde371c38c2af Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 24 Jun 2015 16:40:04 +0530 Subject: [PATCH 293/839] perf bench numa: Fix to show proper convergence stats With commit: e1e455f4f4d3 (perf tools: Work around lack of sched_getcpu in glibc < 2.6), perf_bench numa mem with -c or -m option is not able to correctly calculate convergence. With the above commit, sched_getcpu always seems to return -1. The intention of commit e1e455f was to add a sched_getcpu in glibc < 2.6. Hence keep the sched_getcpu definition under an ifdef. This regression happened occurred between v4.0 and v4.1 Signed-off-by: Srikar Dronamraju Acked-by: Ingo Molnar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Vinson Lee Fixes: e1e455f4f4d3 ("perf tools: Work around lack of sched_getcpu in glibc < 2.6") Link: http://lkml.kernel.org/r/20150624111004.GA5220@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cloexec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index 85b523885f9d..2babddaa2481 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -7,11 +7,15 @@ static unsigned long flag = PERF_FLAG_FD_CLOEXEC; +#ifdef __GLIBC_PREREQ +#if !__GLIBC_PREREQ(2, 6) int __weak sched_getcpu(void) { errno = ENOSYS; return -1; } +#endif +#endif static int perf_flag_probe(void) { From d3834fefcfe5610702379d78596337875df2db5b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 12 Jun 2015 19:19:02 +0300 Subject: [PATCH 294/839] rbd: bump queue_max_segments The default queue_limits::max_segments value (BLK_MAX_SEGMENTS = 128) unnecessarily limits bio sizes to 512k (assuming 4k pages). rbd, being a virtual block device, doesn't have any restrictions on the number of physical segments, so bump max_segments to max_hw_sectors, in theory allowing a sector per segment (although the only case this matters that I can think of is some readv/writev style thing). In practice this is going to give us 1M bios - the number of segments in a bio is limited in bio_get_nr_vecs() by BIO_MAX_PAGES = 256. Note that this doesn't result in any improvement on a typical direct sequential test. This is because on a box with a not too badly fragmented memory the default BLK_MAX_SEGMENTS is enough to see nice rbd object size sized requests. The only difference is the size of bios being merged - 512k vs 1M for something like $ dd if=/dev/zero of=/dev/rbd0 oflag=direct bs=$RBD_OBJ_SIZE $ dd if=/dev/rbd0 iflag=direct of=/dev/null bs=$RBD_OBJ_SIZE Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- drivers/block/rbd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 89fe8a4bc02e..bc88fbcb9715 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3791,6 +3791,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) /* set io sizes to object size */ segment_size = rbd_obj_bytes(&rbd_dev->header); blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); + blk_queue_max_segments(q, segment_size / SECTOR_SIZE); blk_queue_max_segment_size(q, segment_size); blk_queue_io_min(q, segment_size); blk_queue_io_opt(q, segment_size); From e1966b49446a43994c3f25a07d0eb4d05660b429 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 18 Jun 2015 03:10:58 +0800 Subject: [PATCH 295/839] ceph: fix ceph_writepages_start() Before a page get locked, someone else can write data to the page and increase the i_size. So we should re-check the i_size after pages are locked. Signed-off-by: Yan, Zheng --- fs/ceph/addr.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 97102038fe03..890c50971a69 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -440,7 +440,7 @@ out: * only snap context we are allowed to write back. */ static struct ceph_snap_context *get_oldest_context(struct inode *inode, - u64 *snap_size) + loff_t *snap_size) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc = NULL; @@ -480,8 +480,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) struct ceph_osd_client *osdc; struct ceph_snap_context *snapc, *oldest; loff_t page_off = page_offset(page); + loff_t snap_size = -1; long writeback_stat; - u64 truncate_size, snap_size = 0; + u64 truncate_size; u32 truncate_seq; int err = 0, len = PAGE_CACHE_SIZE; @@ -516,7 +517,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) spin_lock(&ci->i_ceph_lock); truncate_seq = ci->i_truncate_seq; truncate_size = ci->i_truncate_size; - if (!snap_size) + if (snap_size == -1) snap_size = i_size_read(inode); spin_unlock(&ci->i_ceph_lock); @@ -699,7 +700,8 @@ static int ceph_writepages_start(struct address_space *mapping, unsigned wsize = 1 << inode->i_blkbits; struct ceph_osd_request *req = NULL; int do_sync = 0; - u64 truncate_size, snap_size; + loff_t snap_size, i_size; + u64 truncate_size; u32 truncate_seq; /* @@ -745,7 +747,7 @@ static int ceph_writepages_start(struct address_space *mapping, retry: /* find oldest snap context with dirty data */ ceph_put_snap_context(snapc); - snap_size = 0; + snap_size = -1; snapc = get_oldest_context(inode, &snap_size); if (!snapc) { /* hmm, why does writepages get called when there @@ -753,16 +755,13 @@ retry: dout(" no snap context with dirty data?\n"); goto out; } - if (snap_size == 0) - snap_size = i_size_read(inode); dout(" oldest snapc is %p seq %lld (%d snaps)\n", snapc, snapc->seq, snapc->num_snaps); spin_lock(&ci->i_ceph_lock); truncate_seq = ci->i_truncate_seq; truncate_size = ci->i_truncate_size; - if (!snap_size) - snap_size = i_size_read(inode); + i_size = i_size_read(inode); spin_unlock(&ci->i_ceph_lock); if (last_snapc && snapc != last_snapc) { @@ -832,8 +831,10 @@ get_more_pages: dout("waiting on writeback %p\n", page); wait_on_page_writeback(page); } - if (page_offset(page) >= snap_size) { - dout("%p page eof %llu\n", page, snap_size); + if (page_offset(page) >= + (snap_size == -1 ? i_size : snap_size)) { + dout("%p page eof %llu\n", page, + (snap_size == -1 ? i_size : snap_size)); done = 1; unlock_page(page); break; @@ -949,10 +950,18 @@ get_more_pages: } /* Format the osd request message and submit the write */ - offset = page_offset(pages[0]); - len = min(snap_size - offset, - (u64)locked_pages << PAGE_CACHE_SHIFT); + len = (u64)locked_pages << PAGE_CACHE_SHIFT; + if (snap_size == -1) { + len = min(len, (u64)i_size_read(inode) - offset); + /* writepages_finish() clears writeback pages + * according to the data length, so make sure + * data length covers all locked pages */ + len = max(len, 1 + + ((u64)(locked_pages - 1) << PAGE_CACHE_SHIFT)); + } else { + len = min(len, snap_size - offset); + } dout("writepages got %d pages at %llu~%llu\n", locked_pages, offset, len); From 210c104c544e5be321fc5b78e74b596d36820332 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 22 Jun 2015 13:24:48 +0300 Subject: [PATCH 296/839] rbd: terminate rbd_opts_tokens with Opt_err Also nuke useless Opt_last_bool and don't break lines unnecessarily. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- drivers/block/rbd.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index bc88fbcb9715..4de8c9167c4b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -724,7 +724,7 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) } /* - * mount options + * (Per device) rbd map options */ enum { Opt_last_int, @@ -733,8 +733,7 @@ enum { /* string args above */ Opt_read_only, Opt_read_write, - /* Boolean args above */ - Opt_last_bool, + Opt_err }; static match_table_t rbd_opts_tokens = { @@ -744,8 +743,7 @@ static match_table_t rbd_opts_tokens = { {Opt_read_only, "ro"}, /* Alternate spelling */ {Opt_read_write, "read_write"}, {Opt_read_write, "rw"}, /* Alternate spelling */ - /* Boolean args above */ - {-1, NULL} + {Opt_err, NULL} }; struct rbd_options { @@ -761,22 +759,15 @@ static int parse_rbd_opts_token(char *c, void *private) int token, intval, ret; token = match_token(c, rbd_opts_tokens, argstr); - if (token < 0) - return -EINVAL; - if (token < Opt_last_int) { ret = match_int(&argstr[0], &intval); if (ret < 0) { - pr_err("bad mount option arg (not int) " - "at '%s'\n", c); + pr_err("bad mount option arg (not int) at '%s'\n", c); return ret; } dout("got int token %d val %d\n", token, intval); } else if (token > Opt_last_int && token < Opt_last_string) { - dout("got string token %d val %s\n", token, - argstr[0].from); - } else if (token > Opt_last_string && token < Opt_last_bool) { - dout("got Boolean token %d\n", token); + dout("got string token %d val %s\n", token, argstr[0].from); } else { dout("got token %d\n", token); } @@ -789,9 +780,10 @@ static int parse_rbd_opts_token(char *c, void *private) rbd_opts->read_only = false; break; default: - rbd_assert(false); - break; + /* libceph prints "bad option" msg */ + return -EINVAL; } + return 0; } From d147543d7943eaa549a569143b7815482585fb91 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 22 Jun 2015 13:24:48 +0300 Subject: [PATCH 297/839] rbd: store rbd_options in rbd_device Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- drivers/block/rbd.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4de8c9167c4b..e502bce02d2c 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -346,6 +346,7 @@ struct rbd_device { struct rbd_image_header header; unsigned long flags; /* possibly lock protected */ struct rbd_spec *spec; + struct rbd_options *opts; char *header_name; @@ -4055,7 +4056,8 @@ static void rbd_spec_free(struct kref *kref) } static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, - struct rbd_spec *spec) + struct rbd_spec *spec, + struct rbd_options *opts) { struct rbd_device *rbd_dev; @@ -4069,8 +4071,9 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, INIT_LIST_HEAD(&rbd_dev->node); init_rwsem(&rbd_dev->header_rwsem); - rbd_dev->spec = spec; rbd_dev->rbd_client = rbdc; + rbd_dev->spec = spec; + rbd_dev->opts = opts; /* Initialize the layout used for all rbd requests */ @@ -4086,6 +4089,7 @@ static void rbd_dev_destroy(struct rbd_device *rbd_dev) { rbd_put_client(rbd_dev->rbd_client); rbd_spec_put(rbd_dev->spec); + kfree(rbd_dev->opts); kfree(rbd_dev); } @@ -5160,7 +5164,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) rbdc = __rbd_get_client(rbd_dev->rbd_client); ret = -ENOMEM; - parent = rbd_dev_create(rbdc, parent_spec); + parent = rbd_dev_create(rbdc, parent_spec, NULL); if (!parent) goto out_err; @@ -5406,9 +5410,6 @@ static ssize_t do_rbd_add(struct bus_type *bus, rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); if (rc < 0) goto err_out_module; - read_only = rbd_opts->read_only; - kfree(rbd_opts); - rbd_opts = NULL; /* done with this */ rbdc = rbd_get_client(ceph_opts); if (IS_ERR(rbdc)) { @@ -5434,11 +5435,12 @@ static ssize_t do_rbd_add(struct bus_type *bus, goto err_out_client; } - rbd_dev = rbd_dev_create(rbdc, spec); + rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts); if (!rbd_dev) goto err_out_client; rbdc = NULL; /* rbd_dev now owns this */ spec = NULL; /* rbd_dev now owns this */ + rbd_opts = NULL; /* rbd_dev now owns this */ rc = rbd_dev_image_probe(rbd_dev, true); if (rc < 0) @@ -5446,6 +5448,7 @@ static ssize_t do_rbd_add(struct bus_type *bus, /* If we are mapping a snapshot it must be marked read-only */ + read_only = rbd_dev->opts->read_only; if (rbd_dev->spec->snap_id != CEPH_NOSNAP) read_only = true; rbd_dev->mapping.read_only = read_only; @@ -5470,6 +5473,7 @@ err_out_client: rbd_put_client(rbdc); err_out_args: rbd_spec_put(spec); + kfree(rbd_opts); err_out_module: module_put(THIS_MODULE); From b55841807fb864eccca0167650a65722fd7cd553 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 23 Jun 2015 16:21:19 +0300 Subject: [PATCH 298/839] rbd: queue_depth map option nr_requests (/sys/block/rbd/queue/nr_requests) is pretty much irrelevant in blk-mq case because each driver sets its own max depth that it can handle and that's the number of tags that gets preallocated on setup. Users can't increase queue depth beyond that value via writing to nr_requests. For rbd we are happy with the default BLKDEV_MAX_RQ (128) for most cases but we want to give users the opportunity to increase it. Introduce a new per-device queue_depth option to do just that: $ sudo rbd map -o queue_depth=1024 ... Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- drivers/block/rbd.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index e502bce02d2c..b316ee48a30b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -728,6 +728,7 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) * (Per device) rbd map options */ enum { + Opt_queue_depth, Opt_last_int, /* int args above */ Opt_last_string, @@ -738,6 +739,7 @@ enum { }; static match_table_t rbd_opts_tokens = { + {Opt_queue_depth, "queue_depth=%d"}, /* int args above */ /* string args above */ {Opt_read_only, "read_only"}, @@ -748,9 +750,11 @@ static match_table_t rbd_opts_tokens = { }; struct rbd_options { + int queue_depth; bool read_only; }; +#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ #define RBD_READ_ONLY_DEFAULT false static int parse_rbd_opts_token(char *c, void *private) @@ -774,6 +778,13 @@ static int parse_rbd_opts_token(char *c, void *private) } switch (token) { + case Opt_queue_depth: + if (intval < 1) { + pr_err("queue_depth out of range\n"); + return -EINVAL; + } + rbd_opts->queue_depth = intval; + break; case Opt_read_only: rbd_opts->read_only = true; break; @@ -3761,10 +3772,9 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set)); rbd_dev->tag_set.ops = &rbd_mq_ops; - rbd_dev->tag_set.queue_depth = BLKDEV_MAX_RQ; + rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth; rbd_dev->tag_set.numa_node = NUMA_NO_NODE; - rbd_dev->tag_set.flags = - BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; + rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; rbd_dev->tag_set.nr_hw_queues = 1; rbd_dev->tag_set.cmd_size = sizeof(struct work_struct); @@ -4948,6 +4958,7 @@ static int rbd_add_parse_args(const char *buf, goto out_mem; rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; + rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; copts = ceph_parse_options(options, mon_addrs, mon_addrs + mon_addrs_size - 1, From 6ba8edc0bcbdf337293e60123ddac8fc1c895a3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Canet?= Date: Wed, 24 Jun 2015 23:18:39 +0200 Subject: [PATCH 299/839] libceph: Remove spurious kunmap() of the zero page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ceph_tcp_sendpage already does the work of mapping/unmapping the zero page if needed. Signed-off-by: Benoît Canet Reviewed-by: Alex Elder Signed-off-by: Ilya Dryomov --- net/ceph/messenger.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 967080a9f043..38f06a4c3c9e 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -278,7 +278,6 @@ static void _ceph_msgr_exit(void) ceph_msgr_slab_exit(); BUG_ON(zero_page == NULL); - kunmap(zero_page); page_cache_release(zero_page); zero_page = NULL; } From 060664f3b9dff37860e48b5158e8429b2467e526 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Jun 2015 14:48:49 -0300 Subject: [PATCH 300/839] perf tools: Future-proof thread_map allocation size calculation Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20150625174840.GH3253@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 8c3c3a0751bd..920136dd8c2e 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -22,7 +22,7 @@ static int filter(const struct dirent *dir) static struct thread_map *thread_map__realloc(struct thread_map *map, int nr) { - size_t size = sizeof(*map) + sizeof(pid_t) * nr; + size_t size = sizeof(*map) + sizeof(map->map[0]) * nr; return realloc(map, size); } From 4cc97614812e96c135e369f3d723fcda07d33437 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 25 Jun 2015 17:12:32 +0200 Subject: [PATCH 301/839] perf header: Delete an unnecessary check before the calling free_event_desc() The free_event_desc() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Cc: Julia Lawall Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: http://lkml.kernel.org/r/558C2ABA.3000603@users.sourceforge.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 21a77e7a171e..03ace57a800c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1063,8 +1063,7 @@ out: free(buf); return events; error: - if (events) - free_event_desc(events); + free_event_desc(events); events = NULL; goto out; } From f30a79b012e5d9b3887f6a59293d9ef3ca0e2c3e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 23 Jun 2015 00:36:04 +0200 Subject: [PATCH 302/839] perf tools: Add reference counting for cpu_map object Adding refference counting for cpu_map object, so it could be easily shared among other objects. Using cpu_map__put instead cpu_map__delete and making cpu_map__delete static. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435012588-9007-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/util/cpumap.c | 26 ++++++++++++++++++++++++-- tools/perf/util/cpumap.h | 6 +++++- tools/perf/util/evlist.c | 4 ++-- tools/perf/util/evsel.c | 1 + tools/perf/util/parse-events.c | 5 ++++- tools/perf/util/python.c | 2 +- tools/perf/util/record.c | 4 ++-- tools/perf/util/session.c | 2 +- tools/perf/util/svghelper.c | 2 +- 13 files changed, 45 insertions(+), 15 deletions(-) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 22f8a00446e1..6b3250f54240 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -545,7 +545,7 @@ out_err: if (evlist) { perf_evlist__delete(evlist); } else { - cpu_map__delete(cpus); + cpu_map__put(cpus); thread_map__delete(threads); } machines__destroy_kernel_maps(&machines); diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 5b171d1e338b..a330235cefc0 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -144,7 +144,7 @@ out_err: perf_evlist__disable(evlist); perf_evlist__delete(evlist); } else { - cpu_map__delete(cpus); + cpu_map__put(cpus); thread_map__delete(threads); } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 5855cf471210..5a9ef5833452 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -140,7 +140,7 @@ out_delete_evlist: cpus = NULL; threads = NULL; out_free_cpus: - cpu_map__delete(cpus); + cpu_map__put(cpus); out_free_threads: thread_map__delete(threads); return err; diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 0d31403ea593..1b06122beb76 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -560,7 +560,7 @@ out: perf_evlist__disable(evlist); perf_evlist__delete(evlist); } else { - cpu_map__delete(cpus); + cpu_map__put(cpus); thread_map__delete(threads); } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index c4e55b71010c..3667e2123e5b 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -5,6 +5,7 @@ #include #include #include +#include "asm/bug.h" static struct cpu_map *cpu_map__default_new(void) { @@ -22,6 +23,7 @@ static struct cpu_map *cpu_map__default_new(void) cpus->map[i] = i; cpus->nr = nr_cpus; + atomic_set(&cpus->refcnt, 1); } return cpus; @@ -35,6 +37,7 @@ static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) if (cpus != NULL) { cpus->nr = nr_cpus; memcpy(cpus->map, tmp_cpus, payload_size); + atomic_set(&cpus->refcnt, 1); } return cpus; @@ -194,14 +197,32 @@ struct cpu_map *cpu_map__dummy_new(void) if (cpus != NULL) { cpus->nr = 1; cpus->map[0] = -1; + atomic_set(&cpus->refcnt, 1); } return cpus; } -void cpu_map__delete(struct cpu_map *map) +static void cpu_map__delete(struct cpu_map *map) { - free(map); + if (map) { + WARN_ONCE(atomic_read(&map->refcnt) != 0, + "cpu_map refcnt unbalanced\n"); + free(map); + } +} + +struct cpu_map *cpu_map__get(struct cpu_map *map) +{ + if (map) + atomic_inc(&map->refcnt); + return map; +} + +void cpu_map__put(struct cpu_map *map) +{ + if (map && atomic_dec_and_test(&map->refcnt)) + cpu_map__delete(map); } int cpu_map__get_socket(struct cpu_map *map, int idx) @@ -263,6 +284,7 @@ static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, /* ensure we process id in increasing order */ qsort(c->map, c->nr, sizeof(int), cmp_ids); + atomic_set(&cpus->refcnt, 1); *res = c; return 0; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 61a654849002..0af9cecb4c51 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -3,18 +3,19 @@ #include #include +#include #include "perf.h" #include "util/debug.h" struct cpu_map { + atomic_t refcnt; int nr; int map[]; }; struct cpu_map *cpu_map__new(const char *cpu_list); struct cpu_map *cpu_map__dummy_new(void); -void cpu_map__delete(struct cpu_map *map); struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); int cpu_map__get_socket(struct cpu_map *map, int idx); @@ -22,6 +23,9 @@ int cpu_map__get_core(struct cpu_map *map, int idx); int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); +struct cpu_map *cpu_map__get(struct cpu_map *map); +void cpu_map__put(struct cpu_map *map); + static inline int cpu_map__socket(struct cpu_map *sock, int s) { if (!sock || s > sock->nr || s < 0) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d29df901be3e..59498f7b3e9b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -114,7 +114,7 @@ void perf_evlist__delete(struct perf_evlist *evlist) { perf_evlist__munmap(evlist); perf_evlist__close(evlist); - cpu_map__delete(evlist->cpus); + cpu_map__put(evlist->cpus); thread_map__delete(evlist->threads); evlist->cpus = NULL; evlist->threads = NULL; @@ -1353,7 +1353,7 @@ static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) out: return err; out_free_cpus: - cpu_map__delete(evlist->cpus); + cpu_map__put(evlist->cpus); evlist->cpus = NULL; goto out; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1b56047af96b..31b0afb68825 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -885,6 +885,7 @@ void perf_evsel__exit(struct perf_evsel *evsel) perf_evsel__free_fd(evsel); perf_evsel__free_id(evsel); close_cgroup(evsel->cgrp); + cpu_map__put(evsel->cpus); zfree(&evsel->group_name); zfree(&evsel->name); perf_evsel__object.fini(evsel); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 2a4d1ec02846..09f8d2357108 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -17,6 +17,7 @@ #include "parse-events-flex.h" #include "pmu.h" #include "thread_map.h" +#include "cpumap.h" #include "asm/bug.h" #define MAX_NAME_LEN 100 @@ -285,7 +286,9 @@ __add_event(struct list_head *list, int *idx, if (!evsel) return NULL; - evsel->cpus = cpus; + if (cpus) + evsel->cpus = cpu_map__get(cpus); + if (name) evsel->name = strdup(name); list_add_tail(&evsel->node, list); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index d906d0ad5d40..b106d56df240 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -384,7 +384,7 @@ static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus, static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus) { - cpu_map__delete(pcpus->cpus); + cpu_map__put(pcpus->cpus); pcpus->ob_type->tp_free((PyObject*)pcpus); } diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index d457c523a33d..1f7becbe5e18 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -64,7 +64,7 @@ static bool perf_probe_api(setup_probe_fn_t fn) if (!cpus) return false; cpu = cpus->map[0]; - cpu_map__delete(cpus); + cpu_map__put(cpus); do { ret = perf_do_probe_api(fn, cpu, try[i++]); @@ -226,7 +226,7 @@ bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str) struct cpu_map *cpus = cpu_map__new(NULL); cpu = cpus ? cpus->map[0] : 0; - cpu_map__delete(cpus); + cpu_map__put(cpus); } else { cpu = evlist->cpus->map[0]; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index b5549b58bb2b..ed9dc2555ec7 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1895,7 +1895,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, err = 0; out_delete_map: - cpu_map__delete(map); + cpu_map__put(map); return err; } diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 283d3e73e2f2..eec6c1149f44 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -748,7 +748,7 @@ static int str_to_bitmap(char *s, cpumask_t *b) set_bit(c, cpumask_bits(b)); } - cpu_map__delete(m); + cpu_map__put(m); return ret; } From 186fbb7432f4a740b4fbaf4145375442210110bb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 23 Jun 2015 00:36:05 +0200 Subject: [PATCH 303/839] perf tools: Add reference counting for thread_map object Adding reference counting for thread_map object, so it could be easily shared among other objects. Using thread_map__put instead thread_map__delete and making thread_map__delete static. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435012588-9007-5-git-send-email-jolsa@kernel.org [ Adjustments to move it ahead of the "comm" patches ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/mmap-thread-lookup.c | 2 +- tools/perf/tests/openat-syscall-all-cpus.c | 2 +- tools/perf/tests/openat-syscall.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/util/evlist.c | 4 +-- tools/perf/util/python.c | 2 +- tools/perf/util/thread_map.c | 30 ++++++++++++++++++++-- tools/perf/util/thread_map.h | 7 +++-- 11 files changed, 43 insertions(+), 14 deletions(-) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 6b3250f54240..39c784a100a9 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -546,7 +546,7 @@ out_err: perf_evlist__delete(evlist); } else { cpu_map__put(cpus); - thread_map__delete(threads); + thread_map__put(threads); } machines__destroy_kernel_maps(&machines); machine__delete_threads(machine); diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index a330235cefc0..4d4b9837b630 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -145,7 +145,7 @@ out_err: perf_evlist__delete(evlist); } else { cpu_map__put(cpus); - thread_map__delete(threads); + thread_map__put(threads); } return err; diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 5a9ef5833452..666b67a4df9d 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -142,6 +142,6 @@ out_delete_evlist: out_free_cpus: cpu_map__put(cpus); out_free_threads: - thread_map__delete(threads); + thread_map__put(threads); return err; } diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 7f48efa7e295..145050e2e544 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -143,7 +143,7 @@ static int synth_process(struct machine *machine) perf_event__process, machine, 0, 500); - thread_map__delete(map); + thread_map__put(map); return err; } diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 9a7a116e09b8..b8d552b13950 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -111,6 +111,6 @@ out_close_fd: out_evsel_delete: perf_evsel__delete(evsel); out_thread_map_delete: - thread_map__delete(threads); + thread_map__put(threads); return err; } diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 9f9491bb8e48..bdfa1f446681 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -56,6 +56,6 @@ out_close_fd: out_evsel_delete: perf_evsel__delete(evsel); out_thread_map_delete: - thread_map__delete(threads); + thread_map__put(threads); return err; } diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 1b06122beb76..e698742d4fec 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -561,7 +561,7 @@ out: perf_evlist__delete(evlist); } else { cpu_map__put(cpus); - thread_map__delete(threads); + thread_map__put(threads); } return err; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 59498f7b3e9b..a8d18a3d2164 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -115,7 +115,7 @@ void perf_evlist__delete(struct perf_evlist *evlist) perf_evlist__munmap(evlist); perf_evlist__close(evlist); cpu_map__put(evlist->cpus); - thread_map__delete(evlist->threads); + thread_map__put(evlist->threads); evlist->cpus = NULL; evlist->threads = NULL; perf_evlist__purge(evlist); @@ -1120,7 +1120,7 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) return 0; out_delete_threads: - thread_map__delete(evlist->threads); + thread_map__put(evlist->threads); evlist->threads = NULL; return -1; } diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index b106d56df240..626422eda727 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -453,7 +453,7 @@ static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads) { - thread_map__delete(pthreads->threads); + thread_map__put(pthreads->threads); pthreads->ob_type->tp_free((PyObject*)pthreads); } diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 920136dd8c2e..368cc58c6892 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -8,6 +8,7 @@ #include #include "strlist.h" #include +#include "asm/bug.h" #include "thread_map.h" #include "util.h" @@ -47,6 +48,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid) for (i = 0; i < items; i++) thread_map__set_pid(threads, i, atoi(namelist[i]->d_name)); threads->nr = items; + atomic_set(&threads->refcnt, 1); } for (i=0; inr = 1; + atomic_set(&threads->refcnt, 1); } return threads; @@ -84,6 +87,7 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) goto out_free_threads; threads->nr = 0; + atomic_set(&threads->refcnt, 1); while (!readdir_r(proc, &dirent, &next) && next) { char *end; @@ -212,6 +216,8 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) out: strlist__delete(slist); + if (threads) + atomic_set(&threads->refcnt, 1); return threads; out_free_namelist: @@ -231,6 +237,7 @@ struct thread_map *thread_map__new_dummy(void) if (threads != NULL) { thread_map__set_pid(threads, 0, -1); threads->nr = 1; + atomic_set(&threads->refcnt, 1); } return threads; } @@ -273,6 +280,8 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) threads->nr = ntasks; } out: + if (threads) + atomic_set(&threads->refcnt, 1); return threads; out_free_threads: @@ -292,9 +301,26 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, return thread_map__new_by_tid_str(tid); } -void thread_map__delete(struct thread_map *threads) +static void thread_map__delete(struct thread_map *threads) { - free(threads); + if (threads) { + WARN_ONCE(atomic_read(&threads->refcnt) != 0, + "thread map refcnt unbalanced\n"); + free(threads); + } +} + +struct thread_map *thread_map__get(struct thread_map *map) +{ + if (map) + atomic_inc(&map->refcnt); + return map; +} + +void thread_map__put(struct thread_map *map) +{ + if (map && atomic_dec_and_test(&map->refcnt)) + thread_map__delete(map); } size_t thread_map__fprintf(struct thread_map *threads, FILE *fp) diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index b9f40679f589..6b0cd2dc006b 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -3,12 +3,14 @@ #include #include +#include struct thread_map_data { pid_t pid; }; struct thread_map { + atomic_t refcnt; int nr; struct thread_map_data map[]; }; @@ -19,11 +21,12 @@ struct thread_map *thread_map__new_by_tid(pid_t tid); struct thread_map *thread_map__new_by_uid(uid_t uid); struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); +struct thread_map *thread_map__get(struct thread_map *map); +void thread_map__put(struct thread_map *map); + struct thread_map *thread_map__new_str(const char *pid, const char *tid, uid_t uid); -void thread_map__delete(struct thread_map *threads); - size_t thread_map__fprintf(struct thread_map *threads, FILE *fp); static inline int thread_map__nr(struct thread_map *threads) From b7f0c203586b91419ff9aa9b1115e261082ff5b4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 23 Jun 2015 00:36:06 +0200 Subject: [PATCH 304/839] perf evlist: Propagate cpu maps to evsels in an evlist Propagate evlist's cpu_map object through all the evsel objects, while keeping already configured evsel->cpus. It'll be handy to access evsel's cpus directly in following patches. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435012588-9007-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a8d18a3d2164..214affaf1cf6 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1101,6 +1101,29 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); } +static int perf_evlist__propagate_maps(struct perf_evlist *evlist, + struct target *target) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + /* + * We already have cpus for evsel (via PMU sysfs) so + * keep it, if there's no target cpu list defined. + */ + if (evsel->cpus && target->cpu_list) + cpu_map__put(evsel->cpus); + + if (!evsel->cpus || target->cpu_list) + evsel->cpus = cpu_map__get(evlist->cpus); + + if (!evsel->cpus) + return -ENOMEM; + } + + return 0; +} + int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) { evlist->threads = thread_map__new_str(target->pid, target->tid, @@ -1117,7 +1140,7 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) if (evlist->cpus == NULL) goto out_delete_threads; - return 0; + return perf_evlist__propagate_maps(evlist, target); out_delete_threads: thread_map__put(evlist->threads); From 578e91ec04d03aca89e300151addb3e3ed5b06ea Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 23 Jun 2015 00:36:07 +0200 Subject: [PATCH 305/839] perf evlist: Propagate thread maps through the evlist Propagate evlist's thread_map object through all the evsel objects. It'll be handy to access evsel's threads directly in following patches. The reason is there's no link from evsel to evlist which hold threads map now and evlist is not always available. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435012588-9007-7-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 4 +++- tools/perf/util/evsel.c | 1 + tools/perf/util/evsel.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 214affaf1cf6..6cfdee68e763 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1117,7 +1117,9 @@ static int perf_evlist__propagate_maps(struct perf_evlist *evlist, if (!evsel->cpus || target->cpu_list) evsel->cpus = cpu_map__get(evlist->cpus); - if (!evsel->cpus) + evsel->threads = thread_map__get(evlist->threads); + + if (!evsel->cpus || !evsel->threads) return -ENOMEM; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 31b0afb68825..1b2f480a3e82 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -886,6 +886,7 @@ void perf_evsel__exit(struct perf_evsel *evsel) perf_evsel__free_id(evsel); close_cgroup(evsel->cgrp); cpu_map__put(evsel->cpus); + thread_map__put(evsel->threads); zfree(&evsel->group_name); zfree(&evsel->name); perf_evsel__object.fini(evsel); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index bb0579e8a10a..9e16a5c4eb01 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -82,6 +82,7 @@ struct perf_evsel { struct cgroup_sel *cgrp; void *handler; struct cpu_map *cpus; + struct thread_map *threads; unsigned int sample_size; int id_pos; int is_pos; From a22e99cd74a31dee4b5241bd60a256c629c808da Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 23 Jun 2015 00:36:08 +0200 Subject: [PATCH 306/839] perf tools: Make perf_evsel__(nr_)cpus generic Because we now propagate all evlist's cpu_maps and thread_map objects through all evsels, the perf_evsel__(nr_)cpus no longer need to be specific to stat object and check evlist and target objects. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435012588-9007-8-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 10 ---------- tools/perf/util/evsel.h | 11 +++++++++++ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index fcf99bdeb19e..3e1636cae76b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -141,16 +141,6 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a, } } -static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) -{ - return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; -} - -static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) -{ - return perf_evsel__cpus(evsel)->nr; -} - static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) { int i; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 9e16a5c4eb01..4dbf32d94dfb 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -8,6 +8,7 @@ #include #include "xyarray.h" #include "symbol.h" +#include "cpumap.h" struct perf_counts_values { union { @@ -114,6 +115,16 @@ struct thread_map; struct perf_evlist; struct record_opts; +static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) +{ + return evsel->cpus; +} + +static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) +{ + return perf_evsel__cpus(evsel)->nr; +} + void perf_counts_values__scale(struct perf_counts_values *count, bool scale, s8 *pscaled); From 3e2b03dad54bbcab5be948629a644d55ce7b5a2e Mon Sep 17 00:00:00 2001 From: Anshul Garg Date: Thu, 25 Jun 2015 13:33:12 -0700 Subject: [PATCH 307/839] Input: use for_each_set_bit() where appropriate Instead of iterating over all bits in a bitmap and test them individually let's siwtch to for_each_set_bit() which is more compact and is also faster. Also use bitmap_weight() when counting number of set bits. This also fixes INPUT_DO_TOGGLE() implementation as it should have used *_CNT as the upper boundary, not *_MAX. Signed-off-by: Anshul Garg Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/drivers/input/input.c b/drivers/input/input.c index f31578423636..78d24990a816 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -677,12 +677,9 @@ static void input_dev_release_keys(struct input_dev *dev) int code; if (is_event_supported(EV_KEY, dev->evbit, EV_MAX)) { - for (code = 0; code <= KEY_MAX; code++) { - if (is_event_supported(code, dev->keybit, KEY_MAX) && - __test_and_clear_bit(code, dev->key)) { - input_pass_event(dev, EV_KEY, code, 0); - } - } + for_each_set_bit(code, dev->key, KEY_CNT) + input_pass_event(dev, EV_KEY, code, 0); + memset(dev->key, 0, sizeof(dev->key)); input_pass_event(dev, EV_SYN, SYN_REPORT, 1); } } @@ -1626,10 +1623,7 @@ static int input_dev_uevent(struct device *device, struct kobj_uevent_env *env) if (!test_bit(EV_##type, dev->evbit)) \ break; \ \ - for (i = 0; i < type##_MAX; i++) { \ - if (!test_bit(i, dev->bits##bit)) \ - continue; \ - \ + for_each_set_bit(i, dev->bits##bit, type##_CNT) { \ active = test_bit(i, dev->bits); \ if (!active && !on) \ continue; \ @@ -1980,22 +1974,12 @@ static unsigned int input_estimate_events_per_packet(struct input_dev *dev) events = mt_slots + 1; /* count SYN_MT_REPORT and SYN_REPORT */ - if (test_bit(EV_ABS, dev->evbit)) { - for (i = 0; i < ABS_CNT; i++) { - if (test_bit(i, dev->absbit)) { - if (input_is_mt_axis(i)) - events += mt_slots; - else - events++; - } - } - } + if (test_bit(EV_ABS, dev->evbit)) + for_each_set_bit(i, dev->absbit, ABS_CNT) + events += input_is_mt_axis(i) ? mt_slots : 1; - if (test_bit(EV_REL, dev->evbit)) { - for (i = 0; i < REL_CNT; i++) - if (test_bit(i, dev->relbit)) - events++; - } + if (test_bit(EV_REL, dev->evbit)) + events += bitmap_weight(dev->relbit, REL_CNT); /* Make room for KEY and MSC events */ events += 7; From 38e1b72bd8a1de4d7adb6a7cea88174e412cf4d7 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 1 Jun 2015 10:41:11 -0700 Subject: [PATCH 308/839] Input: edt-ft5x06 - mark as direct input device edt-ft5x06 is a touchscreen and thus a direct input device; let's amrk it as such. This also allows us to drop some initialization code as input_init_mt_slots() will do that for us. Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/edt-ft5x06.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index e6aef3e48bd9..29d179a1953b 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -1035,11 +1035,6 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, input->id.bustype = BUS_I2C; input->dev.parent = &client->dev; - __set_bit(EV_KEY, input->evbit); - __set_bit(EV_ABS, input->evbit); - __set_bit(BTN_TOUCH, input->keybit); - input_set_abs_params(input, ABS_X, 0, tsdata->num_x * 64 - 1, 0, 0); - input_set_abs_params(input, ABS_Y, 0, tsdata->num_y * 64 - 1, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_X, 0, tsdata->num_x * 64 - 1, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, @@ -1048,7 +1043,7 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, if (!pdata) touchscreen_parse_of_params(input); - error = input_mt_init_slots(input, MAX_SUPPORT_POINTS, 0); + error = input_mt_init_slots(input, MAX_SUPPORT_POINTS, INPUT_MT_DIRECT); if (error) { dev_err(&client->dev, "Unable to init MT slots.\n"); return error; From 7c494375b773497228502133879072a9e959c063 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 1 Jun 2015 10:35:16 -0700 Subject: [PATCH 309/839] Input: improve parsing OF parameters for touchscreens When applying touchscreen parameters specified in device tree let's make sure we keep whatever setup was done by the driver and not reset the missing values to zero. Reported-by: Pavel Machek Tested-by: Pavel Machek Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/edt-ft5x06.c | 2 +- drivers/input/touchscreen/of_touchscreen.c | 69 ++++++++++++++-------- drivers/input/touchscreen/tsc2005.c | 2 +- include/linux/input/touchscreen.h | 5 +- 4 files changed, 49 insertions(+), 29 deletions(-) diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index 29d179a1953b..394b1de9a2a3 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -1041,7 +1041,7 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, 0, tsdata->num_y * 64 - 1, 0, 0); if (!pdata) - touchscreen_parse_of_params(input); + touchscreen_parse_of_params(input, true); error = input_mt_init_slots(input, MAX_SUPPORT_POINTS, INPUT_MT_DIRECT); if (error) { diff --git a/drivers/input/touchscreen/of_touchscreen.c b/drivers/input/touchscreen/of_touchscreen.c index b82b5207c78b..806cd0ad160f 100644 --- a/drivers/input/touchscreen/of_touchscreen.c +++ b/drivers/input/touchscreen/of_touchscreen.c @@ -14,14 +14,22 @@ #include #include -static u32 of_get_optional_u32(struct device_node *np, - const char *property) +static bool touchscreen_get_prop_u32(struct device_node *np, + const char *property, + unsigned int default_value, + unsigned int *value) { - u32 val = 0; + u32 val; + int error; - of_property_read_u32(np, property, &val); + error = of_property_read_u32(np, property, &val); + if (error) { + *value = default_value; + return false; + } - return val; + *value = val; + return true; } static void touchscreen_set_params(struct input_dev *dev, @@ -54,34 +62,45 @@ static void touchscreen_set_params(struct input_dev *dev, * input device accordingly. The function keeps previously setuped default * values if no value is specified via DT. */ -void touchscreen_parse_of_params(struct input_dev *dev) +void touchscreen_parse_of_params(struct input_dev *dev, bool multitouch) { struct device_node *np = dev->dev.parent->of_node; - u32 maximum, fuzz; + unsigned int axis; + unsigned int maximum, fuzz; + bool data_present; input_alloc_absinfo(dev); if (!dev->absinfo) return; - maximum = of_get_optional_u32(np, "touchscreen-size-x"); - fuzz = of_get_optional_u32(np, "touchscreen-fuzz-x"); - if (maximum || fuzz) { - touchscreen_set_params(dev, ABS_X, maximum, fuzz); - touchscreen_set_params(dev, ABS_MT_POSITION_X, maximum, fuzz); - } + axis = multitouch ? ABS_MT_POSITION_X : ABS_X; + data_present = touchscreen_get_prop_u32(np, "touchscreen-size-x", + input_abs_get_max(dev, axis), + &maximum) | + touchscreen_get_prop_u32(np, "touchscreen-fuzz-x", + input_abs_get_fuzz(dev, axis), + &fuzz); + if (data_present) + touchscreen_set_params(dev, axis, maximum, fuzz); - maximum = of_get_optional_u32(np, "touchscreen-size-y"); - fuzz = of_get_optional_u32(np, "touchscreen-fuzz-y"); - if (maximum || fuzz) { - touchscreen_set_params(dev, ABS_Y, maximum, fuzz); - touchscreen_set_params(dev, ABS_MT_POSITION_Y, maximum, fuzz); - } + axis = multitouch ? ABS_MT_POSITION_Y : ABS_Y; + data_present = touchscreen_get_prop_u32(np, "touchscreen-size-y", + input_abs_get_max(dev, axis), + &maximum) | + touchscreen_get_prop_u32(np, "touchscreen-fuzz-y", + input_abs_get_fuzz(dev, axis), + &fuzz); + if (data_present) + touchscreen_set_params(dev, axis, maximum, fuzz); - maximum = of_get_optional_u32(np, "touchscreen-max-pressure"); - fuzz = of_get_optional_u32(np, "touchscreen-fuzz-pressure"); - if (maximum || fuzz) { - touchscreen_set_params(dev, ABS_PRESSURE, maximum, fuzz); - touchscreen_set_params(dev, ABS_MT_PRESSURE, maximum, fuzz); - } + axis = multitouch ? ABS_MT_PRESSURE : ABS_PRESSURE; + data_present = touchscreen_get_prop_u32(np, "touchscreen-max-pressure", + input_abs_get_max(dev, axis), + &maximum) | + touchscreen_get_prop_u32(np, "touchscreen-fuzz-pressure", + input_abs_get_fuzz(dev, axis), + &fuzz); + if (data_present) + touchscreen_set_params(dev, axis, maximum, fuzz); } EXPORT_SYMBOL(touchscreen_parse_of_params); diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c index 72657c579430..d8c025b0f88c 100644 --- a/drivers/input/touchscreen/tsc2005.c +++ b/drivers/input/touchscreen/tsc2005.c @@ -709,7 +709,7 @@ static int tsc2005_probe(struct spi_device *spi) input_set_abs_params(input_dev, ABS_PRESSURE, 0, max_p, fudge_p, 0); if (np) - touchscreen_parse_of_params(input_dev); + touchscreen_parse_of_params(input_dev, false); input_dev->open = tsc2005_open; input_dev->close = tsc2005_close; diff --git a/include/linux/input/touchscreen.h b/include/linux/input/touchscreen.h index 08a5ef6e8f25..eecc9ea6cd58 100644 --- a/include/linux/input/touchscreen.h +++ b/include/linux/input/touchscreen.h @@ -12,9 +12,10 @@ #include #ifdef CONFIG_OF -void touchscreen_parse_of_params(struct input_dev *dev); +void touchscreen_parse_of_params(struct input_dev *dev, bool multitouch); #else -static inline void touchscreen_parse_of_params(struct input_dev *dev) +static inline void touchscreen_parse_of_params(struct input_dev *dev, + bool multitouch) { } #endif From a24221dca1868101c9b4b5adde4a6a5b1a3a64a7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 24 Jun 2015 12:10:23 -0400 Subject: [PATCH 310/839] nfs: fix potential credential leak in ff_layout_update_mirror_cred If we have two tasks racing to update a mirror's credentials, then they can end up leaking one (or more) sets of credentials. The first task will set mirror->cred and then the second task will just overwrite it. Use a cmpxchg to ensure that the creds are only set once. If we get to the point where we would set mirror->cred and find that they're already set, then we just release the creds that were just found. Signed-off-by: Jeff Layton Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 77a2d026aa12..c19b9a88f748 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -324,7 +324,8 @@ static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, __func__, PTR_ERR(cred)); return PTR_ERR(cred); } else { - mirror->cred = cred; + if (cmpxchg(&mirror->cred, NULL, cred)) + put_rpccred(cred); } } return 0; From 0c8315dd56577445dd1afe6b9cfa06b7efdf2f82 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 24 Jun 2015 12:10:24 -0400 Subject: [PATCH 311/839] nfs: always update creds in mirror, even when we have an already connected ds A ds can be associated with more than one mirror, but we currently skip setting a mirror's credentials if we find that it's already set up with a connected client. The upshot is that we can end up sending DS writes with MDS credentials instead of properly setting them up. Fix nfs4_ff_layout_prepare_ds to always verify that the mirror's credentials are set up, even when we have a DS that's already connected. Reported-by: Tom Haynes Signed-off-by: Jeff Layton Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index c19b9a88f748..f13e1969eedd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -387,7 +387,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ smp_rmb(); if (ds->ds_clp) - goto out; + goto out_update_creds; flavor = nfs4_ff_layout_choose_authflavor(mirror); @@ -431,7 +431,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, } } } - +out_update_creds: if (ff_layout_update_mirror_cred(mirror, ds)) ds = NULL; out: From 18a600897212c1480eb635112baeab017babfc83 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 25 Jun 2015 09:25:50 -0400 Subject: [PATCH 312/839] nfs: verify open flags before allowing open Commit 9597c13b forbade opens with O_APPEND|O_DIRECT for NFSv4: nfs: verify open flags before allowing an atomic open Currently, you can open a NFSv4 file with O_APPEND|O_DIRECT, but cannot fcntl(F_SETFL,...) with those flags. This flag combination is explicitly forbidden on NFSv3 opens, and it seems like it should also be on NFSv4. However, you can still open a file with O_DIRECT|O_APPEND if there exists a cached dentry for the file because nfs4_file_open() is used instead of nfs_atomic_open() and the check is bypassed. Add the check in nfs4_file_open() as well. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index f58c17b3b480..dcd39d4e2efe 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -41,6 +41,10 @@ nfs4_file_open(struct inode *inode, struct file *filp) dprintk("NFS: open file(%pd2)\n", dentry); + err = nfs_check_flags(openflags); + if (err) + return err; + if ((openflags & O_ACCMODE) == 3) openflags--; From ac88cd738425e04dbed3706621cf613a00708834 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 28 May 2015 11:07:11 -0700 Subject: [PATCH 313/839] drm/i915: Fix IPS related flicker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We cannot let IPS enabled with no plane on the pipe: BSpec: "IPS cannot be enabled until after at least one plane has been enabled for at least one vertical blank." and "IPS must be disabled while there is still at least one plane enabled on the same pipe as IPS." This restriction apply to HSW and BDW. However a shortcut path on update primary plane function to make primary plane invisible by setting DSPCTRL to 0 was leting IPS enabled while there was no other plane enabled on the pipe causing flickerings that we were believing that it was caused by that other restriction where ips cannot be used when pixel rate is greater than 95% of cdclok. v2: Don't mess with Atomic path as pointed out by Ville. Reference: https://bugs.freedesktop.org/show_bug.cgi?id=85583 Cc: Ville Syrjälä Cc: Paulo Zanoni Cc: stable@vger.kernel.org Signed-off-by: Rodrigo Vivi Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 13 +++++++++++++ drivers/gpu/drm/i915/intel_drv.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index dcb1d25d6f05..1b61f9810387 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13303,6 +13303,16 @@ intel_check_primary_plane(struct drm_plane *plane, intel_crtc->atomic.wait_vblank = true; } + /* + * FIXME: Actually if we will still have any other plane enabled + * on the pipe we could let IPS enabled still, but for + * now lets consider that when we make primary invisible + * by setting DSPCNTR to 0 on update_primary_plane function + * IPS needs to be disable. + */ + if (!state->visible || !fb) + intel_crtc->atomic.disable_ips = true; + intel_crtc->atomic.fb_bits |= INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe); @@ -13400,6 +13410,9 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc) if (intel_crtc->atomic.disable_fbc) intel_fbc_disable(dev); + if (intel_crtc->atomic.disable_ips) + hsw_disable_ips(intel_crtc); + if (intel_crtc->atomic.pre_disable_primary) intel_pre_disable_primary(crtc); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 2afb31a46275..105928382e21 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -485,6 +485,7 @@ struct intel_crtc_atomic_commit { /* Sleepable operations to perform before commit */ bool wait_for_flips; bool disable_fbc; + bool disable_ips; bool pre_disable_primary; bool update_wm; unsigned disabled_planes; From 00245266b4be4fbe989ee073663f56716da6c1f3 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Thu, 25 Jun 2015 12:59:38 +0100 Subject: [PATCH 314/839] drm/i915/ppgtt: Break loop in gen8_ppgtt_clear_range failure path If for some reason [1], the page directory/table does not exist, clear_range would end up in an infinite while loop. Introduced by commit 06fda602dbca ("drm/i915: Create page table allocators"). [1] This is already being addressed in one of Mika's patches: http://mid.gmane.org/1432314314-23530-17-git-send-email-mika.kuoppala@intel.com Cc: Mika Kuoppala Cc: stable@vger.kernel.org Reported-by: John Harrison Signed-off-by: Michel Thierry Reviewed-by: Mika Kuoppala Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_gtt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 619dad1b2386..9daa2883ac18 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -516,17 +516,17 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, struct page *page_table; if (WARN_ON(!ppgtt->pdp.page_directory[pdpe])) - continue; + break; pd = ppgtt->pdp.page_directory[pdpe]; if (WARN_ON(!pd->page_table[pde])) - continue; + break; pt = pd->page_table[pde]; if (WARN_ON(!pt->page)) - continue; + break; page_table = pt->page; From d63903bbc30c7ccad040851dfdb4da12d9a17bcf Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Thu, 25 Jun 2015 18:39:15 -0700 Subject: [PATCH 315/839] arm64: bpf: fix endianness conversion bugs Upper bits should be zeroed in endianness conversion: - even when there's no need to change endianness (i.e., BPF_FROM_BE on big endian or BPF_FROM_LE on little endian); - after rev16. This patch fixes such bugs by emitting extra instructions to clear upper bits. Cc: Zi Shen Lim Acked-by: Alexei Starovoitov Fixes: e54bcde3d69d ("arm64: eBPF JIT compiler") Cc: # 3.18+ Signed-off-by: Xi Wang Signed-off-by: Catalin Marinas --- arch/arm64/net/bpf_jit.h | 4 ++++ arch/arm64/net/bpf_jit_comp.c | 22 ++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index de0a81a539a0..98a26ce82d26 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -110,6 +110,10 @@ /* Rd = Rn >> shift; signed */ #define A64_ASR(sf, Rd, Rn, shift) A64_SBFM(sf, Rd, Rn, shift, (sf) ? 63 : 31) +/* Zero extend */ +#define A64_UXTH(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 15) +#define A64_UXTW(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 31) + /* Move wide (immediate) */ #define A64_MOVEW(sf, Rd, imm16, shift, type) \ aarch64_insn_gen_movewide(Rd, imm16, shift, \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index c81ddd4ff7f7..c047598b09e0 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -289,23 +289,41 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_END | BPF_FROM_BE: #ifdef CONFIG_CPU_BIG_ENDIAN if (BPF_SRC(code) == BPF_FROM_BE) - break; + goto emit_bswap_uxt; #else /* !CONFIG_CPU_BIG_ENDIAN */ if (BPF_SRC(code) == BPF_FROM_LE) - break; + goto emit_bswap_uxt; #endif switch (imm) { case 16: emit(A64_REV16(is64, dst, dst), ctx); + /* zero-extend 16 bits into 64 bits */ + emit(A64_UXTH(is64, dst, dst), ctx); break; case 32: emit(A64_REV32(is64, dst, dst), ctx); + /* upper 32 bits already cleared */ break; case 64: emit(A64_REV64(dst, dst), ctx); break; } break; +emit_bswap_uxt: + switch (imm) { + case 16: + /* zero-extend 16 bits into 64 bits */ + emit(A64_UXTH(is64, dst, dst), ctx); + break; + case 32: + /* zero-extend 32 bits into 64 bits */ + emit(A64_UXTW(is64, dst, dst), ctx); + break; + case 64: + /* nop */ + break; + } + break; /* dst = imm */ case BPF_ALU | BPF_MOV | BPF_K: case BPF_ALU64 | BPF_MOV | BPF_K: From d0cc439b30be638c3a606767e9469c300d397433 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Jun 2015 19:32:33 -0300 Subject: [PATCH 316/839] perf trace: Validate syscall list passed via -e argument The 'trace' tool was accepting any names passed and just looking if syscalls returned via the raw_syscalls:* tracepoints were in that list, leading to it accepting perf events and then never finding any, as those are not valid syscall names, confusing users. Fix it by checking each entry in the list using audit_name_to_syscall, telling the user which entries are invalid and suggesting where to look for valid syscall names. E.g: [root@zoo ~]# trace -e open,foo,bar,close,baz Error: Invalid syscall bar, baz, foo Hint: try 'perf list syscalls:sys_enter_*' Hint: and: 'man syscalls' [root@zoo ~]# Reported-by: Flavio Leitner Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/n/tip-4g1i3m1z6fzsrznn2umi02wa@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2bf2ca771ca5..39ad4d0ca884 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1617,6 +1617,34 @@ static int trace__read_syscall_info(struct trace *trace, int id) return syscall__set_arg_fmts(sc); } +static int trace__validate_ev_qualifier(struct trace *trace) +{ + int err = 0; + struct str_node *pos; + + strlist__for_each(pos, trace->ev_qualifier) { + const char *sc = pos->s; + + if (audit_name_to_syscall(sc, trace->audit.machine) < 0) { + if (err == 0) { + fputs("Error:\tInvalid syscall ", trace->output); + err = -EINVAL; + } else { + fputs(", ", trace->output); + } + + fputs(sc, trace->output); + } + } + + if (err < 0) { + fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" + "\nHint:\tand: 'man syscalls'\n", trace->output); + } + + return err; +} + /* * args is to be interpreted as a series of longs but we need to handle * 8-byte unaligned accesses. args points to raw_data within the event @@ -2862,6 +2890,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) err = -ENOMEM; goto out_close; } + + err = trace__validate_ev_qualifier(&trace); + if (err) + goto out_close; } err = target__validate(&trace.opts.target); From 62eea464380633b88902da35bf9cbd8515289703 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:06 +0200 Subject: [PATCH 317/839] perf thread_map: Introduce thread_map__reset function We need to reset newly allocated 'struct thread_map_data' entries, because we will introduce new comm memeber, which will get set later or not at all. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-2-git-send-email-jolsa@kernel.org [ Use sizeof(map->map[0]) to be independent of the array entry type ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread_map.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 368cc58c6892..ed76c179cf4e 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -21,11 +21,26 @@ static int filter(const struct dirent *dir) return 1; } +static void thread_map__reset(struct thread_map *map, int start, int nr) +{ + size_t size = (nr - start) * sizeof(map->map[0]); + + memset(&map->map[start], 0, size); +} + static struct thread_map *thread_map__realloc(struct thread_map *map, int nr) { size_t size = sizeof(*map) + sizeof(map->map[0]) * nr; + int start = map ? map->nr : 0; - return realloc(map, size); + map = realloc(map, size); + /* + * We only realloc to add more items, let's reset new items. + */ + if (map) + thread_map__reset(map, start, nr); + + return map; } #define thread_map__alloc(__nr) thread_map__realloc(NULL, __nr) From 792402fd5c0a5a5300868e9dfc8ee569f3a39169 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:07 +0200 Subject: [PATCH 318/839] perf thrad_map: Add comm string into array Adding support to hold comm name together with pids in 'struct thread_map'. It will be useful for --per-thread option to display task pid together with task name. Adding thread_map__read_comms function that reads/set comm string for the 'struct thread_map'. Getting the task name from /proc/$pid/comm. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/python-ext-sources | 1 + tools/perf/util/thread_map.c | 59 ++++++++++++++++++++++++++++++ tools/perf/util/thread_map.h | 8 ++++ 3 files changed, 68 insertions(+) diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 5925fec90562..e23ded40c79e 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -20,3 +20,4 @@ util/stat.c util/strlist.c util/trace-event.c ../../lib/rbtree.c +util/string.c diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index ed76c179cf4e..da7646d767fe 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -8,9 +8,11 @@ #include #include "strlist.h" #include +#include #include "asm/bug.h" #include "thread_map.h" #include "util.h" +#include "debug.h" /* Skip "." and ".." directories */ static int filter(const struct dirent *dir) @@ -319,8 +321,12 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, static void thread_map__delete(struct thread_map *threads) { if (threads) { + int i; + WARN_ONCE(atomic_read(&threads->refcnt) != 0, "thread map refcnt unbalanced\n"); + for (i = 0; i < threads->nr; i++) + free(thread_map__comm(threads, i)); free(threads); } } @@ -348,3 +354,56 @@ size_t thread_map__fprintf(struct thread_map *threads, FILE *fp) return printed + fprintf(fp, "\n"); } + +static int get_comm(char **comm, pid_t pid) +{ + char *path; + size_t size; + int err; + + if (asprintf(&path, "%s/%d/comm", procfs__mountpoint(), pid) == -1) + return -ENOMEM; + + err = filename__read_str(path, comm, &size); + if (!err) { + /* + * We're reading 16 bytes, while filename__read_str + * allocates data per BUFSIZ bytes, so we can safely + * mark the end of the string. + */ + (*comm)[size] = 0; + rtrim(*comm); + } + + free(path); + return err; +} + +static void comm_init(struct thread_map *map, int i) +{ + pid_t pid = thread_map__pid(map, i); + char *comm = NULL; + + /* dummy pid comm initialization */ + if (pid == -1) { + map->map[i].comm = strdup("dummy"); + return; + } + + /* + * The comm name is like extra bonus ;-), + * so just warn if we fail for any reason. + */ + if (get_comm(&comm, pid)) + pr_warning("Couldn't resolve comm name for pid %d\n", pid); + + map->map[i].comm = comm; +} + +void thread_map__read_comms(struct thread_map *threads) +{ + int i; + + for (i = 0; i < threads->nr; ++i) + comm_init(threads, i); +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 6b0cd2dc006b..af679d8a50f8 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -7,6 +7,7 @@ struct thread_map_data { pid_t pid; + char *comm; }; struct thread_map { @@ -44,4 +45,11 @@ thread_map__set_pid(struct thread_map *map, int thread, pid_t pid) { map->map[thread].pid = pid; } + +static inline char *thread_map__comm(struct thread_map *map, int thread) +{ + return map->map[thread].comm; +} + +void thread_map__read_comms(struct thread_map *threads); #endif /* __PERF_THREAD_MAP_H */ From 134aa44f6bff6b967efb85255ee9e8982cb8e486 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:08 +0200 Subject: [PATCH 319/839] perf tests: Add thread_map object tests Adding thread_map object tests for comm name values. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 4 ++++ tools/perf/tests/tests.h | 1 + tools/perf/tests/thread-map.c | 38 +++++++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+) create mode 100644 tools/perf/tests/thread-map.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index ee41e705b2eb..d20d6e6ab65b 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -31,6 +31,7 @@ perf-y += code-reading.o perf-y += sample-parsing.o perf-y += parse-no-sample-id-all.o perf-y += kmod-path.o +perf-y += thread-map.o perf-$(CONFIG_X86) += perf-time-to-tsc.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 87b9961646e4..c1dde733c3a6 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -170,6 +170,10 @@ static struct test { .desc = "Test kmod_path__parse function", .func = test__kmod_path__parse, }, + { + .desc = "Test thread map", + .func = test__thread_map, + }, { .func = NULL, }, diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 8e5038b48ba8..ebb47d96bc0b 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -61,6 +61,7 @@ int test__switch_tracking(void); int test__fdarray__filter(void); int test__fdarray__add(void); int test__kmod_path__parse(void); +int test__thread_map(void); #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c new file mode 100644 index 000000000000..5acf000939ea --- /dev/null +++ b/tools/perf/tests/thread-map.c @@ -0,0 +1,38 @@ +#include +#include +#include "tests.h" +#include "thread_map.h" +#include "debug.h" + +int test__thread_map(void) +{ + struct thread_map *map; + + /* test map on current pid */ + map = thread_map__new_by_pid(getpid()); + TEST_ASSERT_VAL("failed to alloc map", map); + + thread_map__read_comms(map); + + TEST_ASSERT_VAL("wrong nr", map->nr == 1); + TEST_ASSERT_VAL("wrong pid", + thread_map__pid(map, 0) == getpid()); + TEST_ASSERT_VAL("wrong comm", + thread_map__comm(map, 0) && + !strcmp(thread_map__comm(map, 0), "perf")); + thread_map__put(map); + + /* test dummy pid */ + map = thread_map__new_dummy(); + TEST_ASSERT_VAL("failed to alloc map", map); + + thread_map__read_comms(map); + + TEST_ASSERT_VAL("wrong nr", map->nr == 1); + TEST_ASSERT_VAL("wrong pid", thread_map__pid(map, 0) == -1); + TEST_ASSERT_VAL("wrong comm", + thread_map__comm(map, 0) && + !strcmp(thread_map__comm(map, 0), "dummy")); + thread_map__put(map); + return 0; +} From 1ac77e1ce8654ec94ada0c508d58ba80a4647fba Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:09 +0200 Subject: [PATCH 320/839] perf stat: Introduce perf_counts function Introducing perf_counts function, that returns 'struct perf_counts_values' pointer for given cpu. Also moving perf_counts* structures into stat.h. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-5-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 14 ++++++------- tools/perf/tests/openat-syscall-all-cpus.c | 4 ++-- tools/perf/tests/openat-syscall.c | 2 +- tools/perf/util/evsel.c | 6 +++--- tools/perf/util/evsel.h | 18 +---------------- tools/perf/util/stat.h | 23 ++++++++++++++++++++++ 6 files changed, 37 insertions(+), 30 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3e1636cae76b..49b90374232c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -316,7 +316,7 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, if (!evsel->snapshot) perf_evsel__compute_deltas(evsel, cpu, count); perf_counts_values__scale(count, scale, NULL); - evsel->counts->cpu[cpu] = *count; + *perf_counts(evsel->counts, cpu) = *count; if (aggr_mode == AGGR_NONE) perf_stat__update_shadow_stats(evsel, count->values, cpu); break; @@ -805,9 +805,9 @@ static void print_aggr(char *prefix) s2 = aggr_get_id(evsel_list->cpus, cpu2); if (s2 != id) continue; - val += counter->counts->cpu[cpu].val; - ena += counter->counts->cpu[cpu].ena; - run += counter->counts->cpu[cpu].run; + val += perf_counts(counter->counts, cpu)->val; + ena += perf_counts(counter->counts, cpu)->ena; + run += perf_counts(counter->counts, cpu)->run; nr++; } if (prefix) @@ -915,9 +915,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix) int cpu; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - val = counter->counts->cpu[cpu].val; - ena = counter->counts->cpu[cpu].ena; - run = counter->counts->cpu[cpu].run; + val = perf_counts(counter->counts, cpu)->val; + ena = perf_counts(counter->counts, cpu)->ena; + run = perf_counts(counter->counts, cpu)->run; if (prefix) fprintf(output, "%s", prefix); diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index b8d552b13950..e8d944fe1bd0 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -98,9 +98,9 @@ int test__openat_syscall_event_on_all_cpus(void) } expected = nr_openat_calls + cpu; - if (evsel->counts->cpu[cpu].val != expected) { + if (perf_counts(evsel->counts, cpu)->val != expected) { pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", - expected, cpus->map[cpu], evsel->counts->cpu[cpu].val); + expected, cpus->map[cpu], perf_counts(evsel->counts, cpu)->val); err = -1; } } diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index bdfa1f446681..e86fc477a74f 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -44,7 +44,7 @@ int test__openat_syscall_event(void) goto out_close_fd; } - if (evsel->counts->cpu[0].val != nr_openat_calls) { + if (perf_counts(evsel->counts, 0)->val != nr_openat_calls) { pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n", nr_openat_calls, evsel->counts->cpu[0].val); goto out_close_fd; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1b2f480a3e82..8401b042b9d4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -910,8 +910,8 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, tmp = evsel->prev_raw_counts->aggr; evsel->prev_raw_counts->aggr = *count; } else { - tmp = evsel->prev_raw_counts->cpu[cpu]; - evsel->prev_raw_counts->cpu[cpu] = *count; + tmp = *perf_counts(evsel->prev_raw_counts, cpu); + *perf_counts(evsel->prev_raw_counts, cpu) = *count; } count->val = count->val - tmp.val; @@ -972,7 +972,7 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, perf_evsel__compute_deltas(evsel, cpu, &count); perf_counts_values__scale(&count, scale, NULL); - evsel->counts->cpu[cpu] = count; + *perf_counts(evsel->counts, cpu) = count; return 0; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4dbf32d94dfb..b420f8f5fc5d 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -9,23 +9,7 @@ #include "xyarray.h" #include "symbol.h" #include "cpumap.h" - -struct perf_counts_values { - union { - struct { - u64 val; - u64 ena; - u64 run; - }; - u64 values[3]; - }; -}; - -struct perf_counts { - s8 scaled; - struct perf_counts_values aggr; - struct perf_counts_values cpu[]; -}; +#include "stat.h" struct perf_evsel; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 093dc3cb28dd..5e43348836a6 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -31,6 +31,29 @@ enum aggr_mode { AGGR_CORE, }; +struct perf_counts_values { + union { + struct { + u64 val; + u64 ena; + u64 run; + }; + u64 values[3]; + }; +}; + +struct perf_counts { + s8 scaled; + struct perf_counts_values aggr; + struct perf_counts_values cpu[]; +}; + +static inline struct perf_counts_values* +perf_counts(struct perf_counts *counts, int cpu) +{ + return &counts->cpu[cpu]; +} + void update_stats(struct stats *stats, u64 val); double avg_stats(struct stats *stats); double stddev_stats(struct stats *stats); From a8e02324dfe6bcafc15d02b790f33321ec4facb0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:10 +0200 Subject: [PATCH 321/839] perf stat: Use xyarray for cpu evsel counts Switching single dimensional array of 'struct perf_counts_values' with xyarray object, so we could store thread dimension counts. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- tools/perf/tests/openat-syscall.c | 2 +- tools/perf/util/stat.c | 31 ++++++++++++++++++++++--------- tools/perf/util/stat.h | 7 ++++--- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 49b90374232c..055ce83dd6f2 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -218,7 +218,7 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist) evlist__for_each(evlist, evsel) { perf_evsel__reset_stat_priv(evsel); - perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); + perf_evsel__reset_counts(evsel); } perf_stat__reset_shadow_stats(); diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index e86fc477a74f..bd882f09ebbc 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -46,7 +46,7 @@ int test__openat_syscall_event(void) if (perf_counts(evsel->counts, 0)->val != nr_openat_calls) { pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n", - nr_openat_calls, evsel->counts->cpu[0].val); + nr_openat_calls, perf_counts(evsel->counts, 0)->val); goto out_close_fd; } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 4014b709f956..453480aa7650 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -97,26 +97,39 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel) struct perf_counts *perf_counts__new(int ncpus) { - int size = sizeof(struct perf_counts) + - ncpus * sizeof(struct perf_counts_values); + struct perf_counts *counts = zalloc(sizeof(*counts)); - return zalloc(size); + if (counts) { + struct xyarray *cpu; + + cpu = xyarray__new(ncpus, 1, sizeof(struct perf_counts_values)); + if (!cpu) { + free(counts); + return NULL; + } + + counts->cpu = cpu; + } + + return counts; } void perf_counts__delete(struct perf_counts *counts) { - free(counts); + if (counts) { + xyarray__delete(counts->cpu); + free(counts); + } } -static void perf_counts__reset(struct perf_counts *counts, int ncpus) +static void perf_counts__reset(struct perf_counts *counts) { - memset(counts, 0, (sizeof(*counts) + - (ncpus * sizeof(struct perf_counts_values)))); + xyarray__reset(counts->cpu); } -void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus) +void perf_evsel__reset_counts(struct perf_evsel *evsel) { - perf_counts__reset(evsel->counts, ncpus); + perf_counts__reset(evsel->counts); } int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 5e43348836a6..6d07612545e0 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -3,6 +3,7 @@ #include #include +#include "xyarray.h" struct stats { @@ -45,13 +46,13 @@ struct perf_counts_values { struct perf_counts { s8 scaled; struct perf_counts_values aggr; - struct perf_counts_values cpu[]; + struct xyarray *cpu; }; static inline struct perf_counts_values* perf_counts(struct perf_counts *counts, int cpu) { - return &counts->cpu[cpu]; + return xyarray__entry(counts->cpu, cpu, 0); } void update_stats(struct stats *stats, u64 val); @@ -88,7 +89,7 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, struct perf_counts *perf_counts__new(int ncpus); void perf_counts__delete(struct perf_counts *counts); -void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus); +void perf_evsel__reset_counts(struct perf_evsel *evsel); int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); void perf_evsel__free_counts(struct perf_evsel *evsel); #endif From a6fa003855d38d53d90c1a8a5827102e62702334 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:11 +0200 Subject: [PATCH 322/839] perf stat: Make stats work over the thread dimension Now that we have space for thread dimension counts, let's store it. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-7-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 32 ++++++++++++---------- tools/perf/tests/openat-syscall-all-cpus.c | 6 ++-- tools/perf/tests/openat-syscall.c | 4 +-- tools/perf/util/evsel.c | 12 ++++---- tools/perf/util/evsel.h | 2 +- tools/perf/util/stat.c | 8 +++--- tools/perf/util/stat.h | 8 +++--- 7 files changed, 38 insertions(+), 34 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 055ce83dd6f2..983bcbbe8e0a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -166,11 +166,12 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) zfree(&evsel->priv); } -static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel) +static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, + int ncpus, int nthreads) { struct perf_counts *counts; - counts = perf_counts__new(perf_evsel__nr_cpus(evsel)); + counts = perf_counts__new(ncpus, nthreads); if (counts) evsel->prev_raw_counts = counts; @@ -197,11 +198,14 @@ static void perf_evlist__free_stats(struct perf_evlist *evlist) static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) { struct perf_evsel *evsel; + int nthreads = thread_map__nr(evsel_list->threads); evlist__for_each(evlist, evsel) { + int ncpus = perf_evsel__nr_cpus(evsel); + if (perf_evsel__alloc_stat_priv(evsel) < 0 || - perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 || - (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0)) + perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || + (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) goto out_free; } @@ -294,7 +298,7 @@ static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) return 0; } -static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, +static int read_cb(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count) { struct perf_counts_values *aggr = &evsel->counts->aggr; @@ -314,9 +318,9 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, case AGGR_SOCKET: case AGGR_NONE: if (!evsel->snapshot) - perf_evsel__compute_deltas(evsel, cpu, count); + perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, scale, NULL); - *perf_counts(evsel->counts, cpu) = *count; + *perf_counts(evsel->counts, cpu, thread) = *count; if (aggr_mode == AGGR_NONE) perf_stat__update_shadow_stats(evsel, count->values, cpu); break; @@ -352,7 +356,7 @@ static int read_counter_aggr(struct perf_evsel *counter) return -1; if (!counter->snapshot) - perf_evsel__compute_deltas(counter, -1, aggr); + perf_evsel__compute_deltas(counter, -1, -1, aggr); perf_counts_values__scale(aggr, scale, &counter->counts->scaled); for (i = 0; i < 3; i++) @@ -805,9 +809,9 @@ static void print_aggr(char *prefix) s2 = aggr_get_id(evsel_list->cpus, cpu2); if (s2 != id) continue; - val += perf_counts(counter->counts, cpu)->val; - ena += perf_counts(counter->counts, cpu)->ena; - run += perf_counts(counter->counts, cpu)->run; + val += perf_counts(counter->counts, cpu, 0)->val; + ena += perf_counts(counter->counts, cpu, 0)->ena; + run += perf_counts(counter->counts, cpu, 0)->run; nr++; } if (prefix) @@ -915,9 +919,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix) int cpu; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - val = perf_counts(counter->counts, cpu)->val; - ena = perf_counts(counter->counts, cpu)->ena; - run = perf_counts(counter->counts, cpu)->run; + val = perf_counts(counter->counts, cpu, 0)->val; + ena = perf_counts(counter->counts, cpu, 0)->ena; + run = perf_counts(counter->counts, cpu, 0)->run; if (prefix) fprintf(output, "%s", prefix); diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index e8d944fe1bd0..a572f87e9c8d 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -78,7 +78,7 @@ int test__openat_syscall_event_on_all_cpus(void) * we use the auto allocation it will allocate just for 1 cpu, * as we start by cpu 0. */ - if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) { + if (perf_evsel__alloc_counts(evsel, cpus->nr, 1) < 0) { pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr); goto out_close_fd; } @@ -98,9 +98,9 @@ int test__openat_syscall_event_on_all_cpus(void) } expected = nr_openat_calls + cpu; - if (perf_counts(evsel->counts, cpu)->val != expected) { + if (perf_counts(evsel->counts, cpu, 0)->val != expected) { pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", - expected, cpus->map[cpu], perf_counts(evsel->counts, cpu)->val); + expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val); err = -1; } } diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index bd882f09ebbc..c9a37bc6b33a 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -44,9 +44,9 @@ int test__openat_syscall_event(void) goto out_close_fd; } - if (perf_counts(evsel->counts, 0)->val != nr_openat_calls) { + if (perf_counts(evsel->counts, 0, 0)->val != nr_openat_calls) { pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n", - nr_openat_calls, perf_counts(evsel->counts, 0)->val); + nr_openat_calls, perf_counts(evsel->counts, 0, 0)->val); goto out_close_fd; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 8401b042b9d4..cd6ce7066f85 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -898,7 +898,7 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, +void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count) { struct perf_counts_values tmp; @@ -910,8 +910,8 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, tmp = evsel->prev_raw_counts->aggr; evsel->prev_raw_counts->aggr = *count; } else { - tmp = *perf_counts(evsel->prev_raw_counts, cpu); - *perf_counts(evsel->prev_raw_counts, cpu) = *count; + tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread); + *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count; } count->val = count->val - tmp.val; @@ -964,15 +964,15 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, if (FD(evsel, cpu, thread) < 0) return -EINVAL; - if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) + if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) return -ENOMEM; if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) return -errno; - perf_evsel__compute_deltas(evsel, cpu, &count); + perf_evsel__compute_deltas(evsel, cpu, thread, &count); perf_counts_values__scale(&count, scale, NULL); - *perf_counts(evsel->counts, cpu) = count; + *perf_counts(evsel->counts, cpu, thread) = count; return 0; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index b420f8f5fc5d..020f7e13634a 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -112,7 +112,7 @@ static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) void perf_counts_values__scale(struct perf_counts_values *count, bool scale, s8 *pscaled); -void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, +void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count); int perf_evsel__object_config(size_t object_size, diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 453480aa7650..7bcc19b62dd1 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -95,14 +95,14 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel) } } -struct perf_counts *perf_counts__new(int ncpus) +struct perf_counts *perf_counts__new(int ncpus, int nthreads) { struct perf_counts *counts = zalloc(sizeof(*counts)); if (counts) { struct xyarray *cpu; - cpu = xyarray__new(ncpus, 1, sizeof(struct perf_counts_values)); + cpu = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values)); if (!cpu) { free(counts); return NULL; @@ -132,9 +132,9 @@ void perf_evsel__reset_counts(struct perf_evsel *evsel) perf_counts__reset(evsel->counts); } -int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads) { - evsel->counts = perf_counts__new(ncpus); + evsel->counts = perf_counts__new(ncpus, nthreads); return evsel->counts != NULL ? 0 : -ENOMEM; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 6d07612545e0..e0b8dc50fbb6 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -50,9 +50,9 @@ struct perf_counts { }; static inline struct perf_counts_values* -perf_counts(struct perf_counts *counts, int cpu) +perf_counts(struct perf_counts *counts, int cpu, int thread) { - return xyarray__entry(counts->cpu, cpu, 0); + return xyarray__entry(counts->cpu, cpu, thread); } void update_stats(struct stats *stats, u64 val); @@ -86,10 +86,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, double avg, int cpu, enum aggr_mode aggr); -struct perf_counts *perf_counts__new(int ncpus); +struct perf_counts *perf_counts__new(int ncpus, int nthreads); void perf_counts__delete(struct perf_counts *counts); void perf_evsel__reset_counts(struct perf_evsel *evsel); -int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_counts(struct perf_evsel *evsel); #endif From 57b289159ab4fe1f7b2b531464cf67f65e48dd00 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:12 +0200 Subject: [PATCH 323/839] perf stat: Rename struct perf_counts::cpu member to values Renaming 'struct xyarray *cpu' pointer to more fitting/generic values, because now we store both cpu and thread values. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-8-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 12 ++++++------ tools/perf/util/stat.h | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 7bcc19b62dd1..197a2db5f2c4 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -100,15 +100,15 @@ struct perf_counts *perf_counts__new(int ncpus, int nthreads) struct perf_counts *counts = zalloc(sizeof(*counts)); if (counts) { - struct xyarray *cpu; + struct xyarray *values; - cpu = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values)); - if (!cpu) { + values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values)); + if (!values) { free(counts); return NULL; } - counts->cpu = cpu; + counts->values = values; } return counts; @@ -117,14 +117,14 @@ struct perf_counts *perf_counts__new(int ncpus, int nthreads) void perf_counts__delete(struct perf_counts *counts) { if (counts) { - xyarray__delete(counts->cpu); + xyarray__delete(counts->values); free(counts); } } static void perf_counts__reset(struct perf_counts *counts) { - xyarray__reset(counts->cpu); + xyarray__reset(counts->values); } void perf_evsel__reset_counts(struct perf_evsel *evsel) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index e0b8dc50fbb6..295d1e29d3d6 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -46,13 +46,13 @@ struct perf_counts_values { struct perf_counts { s8 scaled; struct perf_counts_values aggr; - struct xyarray *cpu; + struct xyarray *values; }; static inline struct perf_counts_values* perf_counts(struct perf_counts *counts, int cpu, int thread) { - return xyarray__entry(counts->cpu, cpu, thread); + return xyarray__entry(counts->values, cpu, thread); } void update_stats(struct stats *stats, u64 val); From 254ecbc7474dfa08155c5595e90cd4a0fa9d14ce Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:13 +0200 Subject: [PATCH 324/839] perf stat: Introduce perf_evlist__reset_stats To fit in with the rest of the helpers (alloc and free). Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-9-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 983bcbbe8e0a..188cd98a4ab3 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -216,7 +216,7 @@ out_free: return -1; } -static void perf_stat__reset_stats(struct perf_evlist *evlist) +static void perf_evlist__reset_stats(struct perf_evlist *evlist) { struct perf_evsel *evsel; @@ -224,7 +224,11 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist) perf_evsel__reset_stat_priv(evsel); perf_evsel__reset_counts(evsel); } +} +static void perf_stat__reset_stats(void) +{ + perf_evlist__reset_stats(evsel_list); perf_stat__reset_shadow_stats(); } @@ -1473,7 +1477,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) status = run_perf_stat(argc, argv); if (forever && status != -1) { print_stat(argc, argv); - perf_stat__reset_stats(evsel_list); + perf_stat__reset_stats(); } } From 9689edfabc1547155a7cdd1304fd294107054291 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:14 +0200 Subject: [PATCH 325/839] perf stat: Move perf_evsel__(alloc|free|reset)_stat_priv into stat object Moving perf_evsel__(alloc|free|reset)_stat_priv into stat object, so it could be used outside stat command in following patches. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-10-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 25 ------------------------- tools/perf/util/stat.c | 25 +++++++++++++++++++++++++ tools/perf/util/stat.h | 4 ++++ 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 188cd98a4ab3..8a7fd0c60ab1 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -141,31 +141,6 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a, } } -static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) -{ - int i; - struct perf_stat *ps = evsel->priv; - - for (i = 0; i < 3; i++) - init_stats(&ps->res_stats[i]); - - perf_stat_evsel_id_init(evsel); -} - -static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) -{ - evsel->priv = zalloc(sizeof(struct perf_stat)); - if (evsel->priv == NULL) - return -ENOMEM; - perf_evsel__reset_stat_priv(evsel); - return 0; -} - -static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) -{ - zfree(&evsel->priv); -} - static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, int ncpus, int nthreads) { diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 197a2db5f2c4..b8c329f41f13 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -143,3 +143,28 @@ void perf_evsel__free_counts(struct perf_evsel *evsel) perf_counts__delete(evsel->counts); evsel->counts = NULL; } + +void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) +{ + int i; + struct perf_stat *ps = evsel->priv; + + for (i = 0; i < 3; i++) + init_stats(&ps->res_stats[i]); + + perf_stat_evsel_id_init(evsel); +} + +int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) +{ + evsel->priv = zalloc(sizeof(struct perf_stat)); + if (evsel->priv == NULL) + return -ENOMEM; + perf_evsel__reset_stat_priv(evsel); + return 0; +} + +void perf_evsel__free_stat_priv(struct perf_evsel *evsel) +{ + zfree(&evsel->priv); +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 295d1e29d3d6..c441cb312565 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -92,4 +92,8 @@ void perf_counts__delete(struct perf_counts *counts); void perf_evsel__reset_counts(struct perf_evsel *evsel); int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_counts(struct perf_evsel *evsel); + +void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); +int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); +void perf_evsel__free_stat_priv(struct perf_evsel *evsel); #endif From a939512d10a5583a32ae85c1bff6c4ae63a8a517 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:15 +0200 Subject: [PATCH 326/839] perf stat: Move perf_evsel__(alloc|free)_prev_raw_counts into stat object Moving perf_evsel__(alloc|free)_prev_raw_counts into stat object, so it could be used in following patches. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-11-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 18 ------------------ tools/perf/util/stat.c | 18 ++++++++++++++++++ tools/perf/util/stat.h | 4 ++++ 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8a7fd0c60ab1..0ccba5c6b61d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -141,24 +141,6 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a, } } -static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, - int ncpus, int nthreads) -{ - struct perf_counts *counts; - - counts = perf_counts__new(ncpus, nthreads); - if (counts) - evsel->prev_raw_counts = counts; - - return counts ? 0 : -ENOMEM; -} - -static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) -{ - perf_counts__delete(evsel->prev_raw_counts); - evsel->prev_raw_counts = NULL; -} - static void perf_evlist__free_stats(struct perf_evlist *evlist) { struct perf_evsel *evsel; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index b8c329f41f13..6221c3898397 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -168,3 +168,21 @@ void perf_evsel__free_stat_priv(struct perf_evsel *evsel) { zfree(&evsel->priv); } + +int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, + int ncpus, int nthreads) +{ + struct perf_counts *counts; + + counts = perf_counts__new(ncpus, nthreads); + if (counts) + evsel->prev_raw_counts = counts; + + return counts ? 0 : -ENOMEM; +} + +void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) +{ + perf_counts__delete(evsel->prev_raw_counts); + evsel->prev_raw_counts = NULL; +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index c441cb312565..454b26875aef 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -96,4 +96,8 @@ void perf_evsel__free_counts(struct perf_evsel *evsel); void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); void perf_evsel__free_stat_priv(struct perf_evsel *evsel); + +int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, + int ncpus, int nthreads); +void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel); #endif From 24e34f68e21a47a582b3cc2eccdb835b80b1830a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:16 +0200 Subject: [PATCH 327/839] perf stat: Move perf_evlist__(alloc|free|reset)_stats into stat object Moving perf_evlist__(alloc|free|reset)_stats into stat object, so it could be used in following patches. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-12-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 42 ------------------------------------- tools/perf/util/evlist.h | 1 - tools/perf/util/stat.c | 44 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/stat.h | 6 ++++++ 4 files changed, 50 insertions(+), 43 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 0ccba5c6b61d..3e385f9f12ee 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -141,48 +141,6 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a, } } -static void perf_evlist__free_stats(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - perf_evsel__free_stat_priv(evsel); - perf_evsel__free_counts(evsel); - perf_evsel__free_prev_raw_counts(evsel); - } -} - -static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) -{ - struct perf_evsel *evsel; - int nthreads = thread_map__nr(evsel_list->threads); - - evlist__for_each(evlist, evsel) { - int ncpus = perf_evsel__nr_cpus(evsel); - - if (perf_evsel__alloc_stat_priv(evsel) < 0 || - perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || - (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) - goto out_free; - } - - return 0; - -out_free: - perf_evlist__free_stats(evlist); - return -1; -} - -static void perf_evlist__reset_stats(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - perf_evsel__reset_stat_priv(evsel); - perf_evsel__reset_counts(evsel); - } -} - static void perf_stat__reset_stats(void) { perf_evlist__reset_stats(evsel_list); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a8489b9d2812..037633c1da9d 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -289,5 +289,4 @@ void perf_evlist__to_front(struct perf_evlist *evlist, void perf_evlist__set_tracking_event(struct perf_evlist *evlist, struct perf_evsel *tracking_evsel); - #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 6221c3898397..818cb022fcb6 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -1,6 +1,8 @@ #include #include "stat.h" +#include "evlist.h" #include "evsel.h" +#include "thread_map.h" void update_stats(struct stats *stats, u64 val) { @@ -186,3 +188,45 @@ void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) perf_counts__delete(evsel->prev_raw_counts); evsel->prev_raw_counts = NULL; } + +int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) +{ + struct perf_evsel *evsel; + int nthreads = thread_map__nr(evlist->threads); + + evlist__for_each(evlist, evsel) { + int ncpus = perf_evsel__nr_cpus(evsel); + + if (perf_evsel__alloc_stat_priv(evsel) < 0 || + perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || + (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) + goto out_free; + } + + return 0; + +out_free: + perf_evlist__free_stats(evlist); + return -1; +} + +void perf_evlist__free_stats(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + perf_evsel__free_stat_priv(evsel); + perf_evsel__free_counts(evsel); + perf_evsel__free_prev_raw_counts(evsel); + } +} + +void perf_evlist__reset_stats(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + perf_evsel__reset_stat_priv(evsel); + perf_evsel__reset_counts(evsel); + } +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 454b26875aef..5f62db2472c7 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -70,6 +70,8 @@ static inline void init_stats(struct stats *stats) } struct perf_evsel; +struct perf_evlist; + bool __perf_evsel_stat__is(struct perf_evsel *evsel, enum perf_stat_evsel_id id); @@ -100,4 +102,8 @@ void perf_evsel__free_stat_priv(struct perf_evsel *evsel); int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel); + +int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); +void perf_evlist__free_stats(struct perf_evlist *evlist); +void perf_evlist__reset_stats(struct perf_evlist *evlist); #endif From a7d0a102e4ae46b75b70a9500979e7ed3cdf183f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:17 +0200 Subject: [PATCH 328/839] perf stat: Introduce perf_evsel__alloc_stats function Move all stat allocation logic related to stat object under single function. This way we can use it separately for stat object out of evlist object. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-13-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 20 ++++++++++++++------ tools/perf/util/stat.h | 2 ++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 818cb022fcb6..f2a0d1521e26 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -189,17 +189,25 @@ void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) evsel->prev_raw_counts = NULL; } +int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw) +{ + int ncpus = perf_evsel__nr_cpus(evsel); + int nthreads = thread_map__nr(evsel->threads); + + if (perf_evsel__alloc_stat_priv(evsel) < 0 || + perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || + (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) + return -ENOMEM; + + return 0; +} + int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) { struct perf_evsel *evsel; - int nthreads = thread_map__nr(evlist->threads); evlist__for_each(evlist, evsel) { - int ncpus = perf_evsel__nr_cpus(evsel); - - if (perf_evsel__alloc_stat_priv(evsel) < 0 || - perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || - (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) + if (perf_evsel__alloc_stats(evsel, alloc_raw)) goto out_free; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 5f62db2472c7..9f05c571befe 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -103,6 +103,8 @@ int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel); +int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw); + int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct perf_evlist *evlist); void perf_evlist__reset_stats(struct perf_evlist *evlist); From f99f4719b841a9745d6a7652eef3956aaf2db66a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:18 +0200 Subject: [PATCH 329/839] perf stat: Introduce perf_evsel__read function Adding simple read function that reads/store data into given struct perf_counts_values *count object. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-14-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 14 ++++++++++++++ tools/perf/util/evsel.h | 3 +++ 2 files changed, 17 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cd6ce7066f85..2e0a4e064f44 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -955,6 +955,20 @@ int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, return cb(evsel, cpu, thread, &count); } +int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, + struct perf_counts_values *count) +{ + memset(count, 0, sizeof(*count)); + + if (FD(evsel, cpu, thread) < 0) + return -EINVAL; + + if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) < 0) + return -errno; + + return 0; +} + int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 020f7e13634a..a79944a21e8f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -236,6 +236,9 @@ typedef int (perf_evsel__read_cb_t)(struct perf_evsel *evsel, int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, perf_evsel__read_cb_t cb); +int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, + struct perf_counts_values *count); + int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale); From 106a94a0f8c207ef4113ce7e32f34a00b3b174e7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:19 +0200 Subject: [PATCH 330/839] perf stat: Introduce read_counters function Moving read counters logic into single read_counters function, which will be called for both interval and overall processing legs. The reason is to split reading and processing (following patches) counters code, so we could read counters from other sources (like perf.data) and process them in the same way as 'perf stat' command does. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-15-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 49 ++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3e385f9f12ee..158859e622d3 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -323,27 +323,35 @@ static int read_counter(struct perf_evsel *counter) return 0; } +static void read_counters(bool close) +{ + struct perf_evsel *counter; + struct perf_stat *ps; + + evlist__for_each(evsel_list, counter) { + ps = counter->priv; + memset(ps->res_stats, 0, sizeof(ps->res_stats)); + + if (aggr_mode == AGGR_GLOBAL) + read_counter_aggr(counter); + else + read_counter(counter); + + if (close) { + perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), + thread_map__nr(evsel_list->threads)); + } + } +} + static void print_interval(void) { static int num_print_interval; struct perf_evsel *counter; - struct perf_stat *ps; struct timespec ts, rs; char prefix[64]; - if (aggr_mode == AGGR_GLOBAL) { - evlist__for_each(evsel_list, counter) { - ps = counter->priv; - memset(ps->res_stats, 0, sizeof(ps->res_stats)); - read_counter_aggr(counter); - } - } else { - evlist__for_each(evsel_list, counter) { - ps = counter->priv; - memset(ps->res_stats, 0, sizeof(ps->res_stats)); - read_counter(counter); - } - } + read_counters(false); clock_gettime(CLOCK_MONOTONIC, &ts); diff_timespec(&rs, &ts, &ref_time); @@ -525,18 +533,7 @@ static int __run_perf_stat(int argc, const char **argv) update_stats(&walltime_nsecs_stats, t1 - t0); - if (aggr_mode == AGGR_GLOBAL) { - evlist__for_each(evsel_list, counter) { - read_counter_aggr(counter); - perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), - thread_map__nr(evsel_list->threads)); - } - } else { - evlist__for_each(evsel_list, counter) { - read_counter(counter); - perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1); - } - } + read_counters(true); return WEXITSTATUS(status); } From 3b3eb044519ac4c422dbc6084303c470b8d2dc61 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:20 +0200 Subject: [PATCH 331/839] perf stat: Separate counters reading and processing Separating counters reading and processing so we could use the processing part in following patches. Using simple reading via perf_evsel__read function to read counters now, because part of the processing was in the read_cb callback. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-16-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 56 +++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 158859e622d3..74ac92baa2bd 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -217,8 +217,9 @@ static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) return 0; } -static int read_cb(struct perf_evsel *evsel, int cpu, int thread, - struct perf_counts_values *count) +static int +process_counter_values(struct perf_evsel *evsel, int cpu, int thread, + struct perf_counts_values *count) { struct perf_counts_values *aggr = &evsel->counts->aggr; static struct perf_counts_values zero; @@ -239,7 +240,6 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread, if (!evsel->snapshot) perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, scale, NULL); - *perf_counts(evsel->counts, cpu, thread) = *count; if (aggr_mode == AGGR_NONE) perf_stat__update_shadow_stats(evsel, count->values, cpu); break; @@ -256,23 +256,41 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread, return 0; } -static int read_counter(struct perf_evsel *counter); +static int process_counter_maps(struct perf_evsel *counter) +{ + int nthreads = thread_map__nr(counter->threads); + int ncpus = perf_evsel__nr_cpus(counter); + int cpu, thread; -/* - * Read out the results of a single counter: - * aggregate counts across CPUs in system-wide mode - */ -static int read_counter_aggr(struct perf_evsel *counter) + if (counter->system_wide) + nthreads = 1; + + for (thread = 0; thread < nthreads; thread++) { + for (cpu = 0; cpu < ncpus; cpu++) { + if (process_counter_values(counter, cpu, thread, + perf_counts(counter->counts, cpu, thread))) + return -1; + } + } + + return 0; +} + +static int process_counter(struct perf_evsel *counter) { struct perf_counts_values *aggr = &counter->counts->aggr; struct perf_stat *ps = counter->priv; u64 *count = counter->counts->aggr.values; - int i; + int i, ret; aggr->val = aggr->ena = aggr->run = 0; - if (read_counter(counter)) - return -1; + ret = process_counter_maps(counter); + if (ret) + return ret; + + if (aggr_mode != AGGR_GLOBAL) + return 0; if (!counter->snapshot) perf_evsel__compute_deltas(counter, -1, -1, aggr); @@ -315,7 +333,10 @@ static int read_counter(struct perf_evsel *counter) for (thread = 0; thread < nthreads; thread++) { for (cpu = 0; cpu < ncpus; cpu++) { - if (perf_evsel__read_cb(counter, cpu, thread, read_cb)) + struct perf_counts_values *count; + + count = perf_counts(counter->counts, cpu, thread); + if (perf_evsel__read(counter, cpu, thread, count)) return -1; } } @@ -332,10 +353,11 @@ static void read_counters(bool close) ps = counter->priv; memset(ps->res_stats, 0, sizeof(ps->res_stats)); - if (aggr_mode == AGGR_GLOBAL) - read_counter_aggr(counter); - else - read_counter(counter); + if (read_counter(counter)) + pr_warning("failed to read counter %s\n", counter->name); + + if (process_counter(counter)) + pr_warning("failed to process counter %s\n", counter->name); if (close) { perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), From 32ce0a47aaa401a7dd27dfe65210f3e3aa809682 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:21 +0200 Subject: [PATCH 332/839] perf stat: Move zero_per_pkg into counter process code Moving zero_per_pkg into counter process code, to make the reading path free of processing logic. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-17-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 74ac92baa2bd..b7c184bd559c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -285,6 +285,9 @@ static int process_counter(struct perf_evsel *counter) aggr->val = aggr->ena = aggr->run = 0; + if (counter->per_pkg) + zero_per_pkg(counter); + ret = process_counter_maps(counter); if (ret) return ret; @@ -328,9 +331,6 @@ static int read_counter(struct perf_evsel *counter) if (counter->system_wide) nthreads = 1; - if (counter->per_pkg) - zero_per_pkg(counter); - for (thread = 0; thread < nthreads; thread++) { for (cpu = 0; cpu < ncpus; cpu++) { struct perf_counts_values *count; From 7ddb1b6bbf1e614e6700f226cb0a559c5c635336 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:22 +0200 Subject: [PATCH 333/839] perf stat: Move perf_stat initialization counter process code Moving perf_stat initialization counter process code, to make the reading path free of processing logic. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-18-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b7c184bd559c..526f67753664 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -284,6 +284,7 @@ static int process_counter(struct perf_evsel *counter) int i, ret; aggr->val = aggr->ena = aggr->run = 0; + memset(ps->res_stats, 0, sizeof(ps->res_stats)); if (counter->per_pkg) zero_per_pkg(counter); @@ -347,12 +348,8 @@ static int read_counter(struct perf_evsel *counter) static void read_counters(bool close) { struct perf_evsel *counter; - struct perf_stat *ps; evlist__for_each(evsel_list, counter) { - ps = counter->priv; - memset(ps->res_stats, 0, sizeof(ps->res_stats)); - if (read_counter(counter)) pr_warning("failed to read counter %s\n", counter->name); From d8ee3b54d0458cbed0a31a410458f4bfd38fc147 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:23 +0200 Subject: [PATCH 334/839] perf stat: Remove perf_evsel__read_cb function It's no longer used, the stat command uses perf_evsel__read now. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-19-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 16 ---------------- tools/perf/util/evsel.h | 7 ------- 2 files changed, 23 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2e0a4e064f44..2936b3080722 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -939,22 +939,6 @@ void perf_counts_values__scale(struct perf_counts_values *count, *pscaled = scaled; } -int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, - perf_evsel__read_cb_t cb) -{ - struct perf_counts_values count; - - memset(&count, 0, sizeof(count)); - - if (FD(evsel, cpu, thread) < 0) - return -EINVAL; - - if (readn(FD(evsel, cpu, thread), &count, sizeof(count)) < 0) - return -errno; - - return cb(evsel, cpu, thread, &count); -} - int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index a79944a21e8f..4a7ed5656cf0 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -229,13 +229,6 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1, (a)->attr.type == (b)->attr.type && \ (a)->attr.config == (b)->attr.config) -typedef int (perf_evsel__read_cb_t)(struct perf_evsel *evsel, - int cpu, int thread, - struct perf_counts_values *count); - -int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, - perf_evsel__read_cb_t cb); - int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count); From ba411a954eae3e88c02667a5670cac97fb9c3f58 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:24 +0200 Subject: [PATCH 335/839] perf stat: Rename print_interval to process_interval It suits better, because the function also reads counter's data. Also the 'print_interval' name will be used in following generalization of counters display. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-20-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 526f67753664..aa706fca410e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -363,7 +363,7 @@ static void read_counters(bool close) } } -static void print_interval(void) +static void process_interval(void) { static int num_print_interval; struct perf_evsel *counter; @@ -526,7 +526,7 @@ static int __run_perf_stat(int argc, const char **argv) if (interval) { while (!waitpid(child_pid, &status, WNOHANG)) { nanosleep(&ts, NULL); - print_interval(); + process_interval(); } } wait(&status); @@ -544,7 +544,7 @@ static int __run_perf_stat(int argc, const char **argv) while (!done) { nanosleep(&ts, NULL); if (interval) - print_interval(); + process_interval(); } } From 5835e2286583e4fa6c2a609446e1320e7da2b161 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:25 +0200 Subject: [PATCH 336/839] perf stat: Using init_stats instead of memset The init_stats function is meant to init 'struct stats'. Reported-by: David Ahern Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-21-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index aa706fca410e..39a97ade2cf3 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -284,7 +284,7 @@ static int process_counter(struct perf_evsel *counter) int i, ret; aggr->val = aggr->ena = aggr->run = 0; - memset(ps->res_stats, 0, sizeof(ps->res_stats)); + init_stats(ps->res_stats); if (counter->per_pkg) zero_per_pkg(counter); From d4f63a4741a808c0bf25d92884713008706fca16 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:26 +0200 Subject: [PATCH 337/839] perf stat: Introduce print_counters function Centralize counters print code into single print_counters function. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-22-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 127 +++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 63 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 39a97ade2cf3..56dc8881cb05 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -67,10 +67,7 @@ #define CNTR_NOT_SUPPORTED "" #define CNTR_NOT_COUNTED "" -static void print_stat(int argc, const char **argv); -static void print_counter_aggr(struct perf_evsel *counter, char *prefix); -static void print_counter(struct perf_evsel *counter, char *prefix); -static void print_aggr(char *prefix); +static void print_counters(struct timespec *ts, int argc, const char **argv); /* Default events used for perf stat -T */ static const char *transaction_attrs = { @@ -365,53 +362,14 @@ static void read_counters(bool close) static void process_interval(void) { - static int num_print_interval; - struct perf_evsel *counter; struct timespec ts, rs; - char prefix[64]; read_counters(false); clock_gettime(CLOCK_MONOTONIC, &ts); diff_timespec(&rs, &ts, &ref_time); - sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); - if (num_print_interval == 0 && !csv_output) { - switch (aggr_mode) { - case AGGR_SOCKET: - fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); - break; - case AGGR_CORE: - fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); - break; - case AGGR_NONE: - fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); - break; - case AGGR_GLOBAL: - default: - fprintf(output, "# time counts %*s events\n", unit_width, "unit"); - } - } - - if (++num_print_interval == 25) - num_print_interval = 0; - - switch (aggr_mode) { - case AGGR_CORE: - case AGGR_SOCKET: - print_aggr(prefix); - break; - case AGGR_NONE: - evlist__for_each(evsel_list, counter) - print_counter(counter, prefix); - break; - case AGGR_GLOBAL: - default: - evlist__for_each(evsel_list, counter) - print_counter_aggr(counter, prefix); - } - - fflush(output); + print_counters(&rs, 0, NULL); } static void handle_initial_delay(void) @@ -901,9 +859,35 @@ static void print_counter(struct perf_evsel *counter, char *prefix) } } -static void print_stat(int argc, const char **argv) +static void print_interval(char *prefix, struct timespec *ts) +{ + static int num_print_interval; + + sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); + + if (num_print_interval == 0 && !csv_output) { + switch (aggr_mode) { + case AGGR_SOCKET: + fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); + break; + case AGGR_CORE: + fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); + break; + case AGGR_NONE: + fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); + break; + case AGGR_GLOBAL: + default: + fprintf(output, "# time counts %*s events\n", unit_width, "unit"); + } + } + + if (++num_print_interval == 25) + num_print_interval = 0; +} + +static void print_header(int argc, const char **argv) { - struct perf_evsel *counter; int i; fflush(stdout); @@ -929,36 +913,53 @@ static void print_stat(int argc, const char **argv) fprintf(output, " (%d runs)", run_count); fprintf(output, ":\n\n"); } +} + +static void print_footer(void) +{ + if (!null_run) + fprintf(output, "\n"); + fprintf(output, " %17.9f seconds time elapsed", + avg_stats(&walltime_nsecs_stats)/1e9); + if (run_count > 1) { + fprintf(output, " "); + print_noise_pct(stddev_stats(&walltime_nsecs_stats), + avg_stats(&walltime_nsecs_stats)); + } + fprintf(output, "\n\n"); +} + +static void print_counters(struct timespec *ts, int argc, const char **argv) +{ + struct perf_evsel *counter; + char buf[64], *prefix = NULL; + + if (interval) + print_interval(prefix = buf, ts); + else + print_header(argc, argv); switch (aggr_mode) { case AGGR_CORE: case AGGR_SOCKET: - print_aggr(NULL); + print_aggr(prefix); break; case AGGR_GLOBAL: evlist__for_each(evsel_list, counter) - print_counter_aggr(counter, NULL); + print_counter_aggr(counter, prefix); break; case AGGR_NONE: evlist__for_each(evsel_list, counter) - print_counter(counter, NULL); + print_counter(counter, prefix); break; default: break; } - if (!csv_output) { - if (!null_run) - fprintf(output, "\n"); - fprintf(output, " %17.9f seconds time elapsed", - avg_stats(&walltime_nsecs_stats)/1e9); - if (run_count > 1) { - fprintf(output, " "); - print_noise_pct(stddev_stats(&walltime_nsecs_stats), - avg_stats(&walltime_nsecs_stats)); - } - fprintf(output, "\n\n"); - } + if (!interval && !csv_output) + print_footer(); + + fflush(output); } static volatile int signr = -1; @@ -1407,13 +1408,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) status = run_perf_stat(argc, argv); if (forever && status != -1) { - print_stat(argc, argv); + print_counters(NULL, argc, argv); perf_stat__reset_stats(); } } if (!forever && status != -1 && !interval) - print_stat(argc, argv); + print_counters(NULL, argc, argv); perf_evlist__free_stats(evsel_list); out: From 32b8af82e3708bc19af75c31fa773a98449f0edc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Jun 2015 11:29:27 +0200 Subject: [PATCH 338/839] perf stat: Introduce --per-thread option Currently all the -p option PID arguments tasks values get aggregated and printed as single values. Adding --per-tasks option to print values per task. $ perf stat -e cycles,instructions --per-thread -p 30190,30242 ^C Performance counter stats for process id '30190,30242': cat-30190 0 cycles yes-30242 3,842,525,421 cycles cat-30190 0 instructions yes-30242 10,370,817,010 instructions 1.143155657 seconds time elapsed Also works under interval mode: $ perf stat -e cycles,instructions --per-thread -p 30190,30242 -I 1000 # time comm-pid counts unit events 1.000073435 cat-30190 89,058 cycles 1.000073435 yes-30242 3,360,786,902 cycles (100.00%) 1.000073435 cat-30190 14,066 instructions 1.000073435 yes-30242 9,069,937,462 instructions 2.000204830 cat-30190 0 cycles 2.000204830 yes-30242 3,351,667,626 cycles 2.000204830 cat-30190 0 instructions 2.000204830 yes-30242 9,045,796,885 instructions ^C 2.771286639 cat-30190 0 cycles 2.771286639 yes-30242 2,593,884,166 cycles 2.771286639 cat-30190 0 instructions 2.771286639 yes-30242 7,001,171,191 instructions It works only with -t and -p options, otherwise following error is printed: $ perf stat -e cycles --per-thread -I 1000 ls The --per-thread option is only available when monitoring via -p -t options. -p, --pid stat events on existing process id -t, --tid stat events on existing thread id Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1435310967-14570-23-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 4 ++ tools/perf/builtin-stat.c | 76 +++++++++++++++++++++++++- tools/perf/util/stat.h | 1 + 3 files changed, 79 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 04e150d83e7d..47469abdcc1c 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -144,6 +144,10 @@ is a useful mode to detect imbalance between physical cores. To enable this mod use --per-core in addition to -a. (system-wide). The output includes the core number and the number of online logical processors on that physical processor. +--per-thread:: +Aggregate counts per monitored threads, when monitoring threads (-t option) +or processes (-p option). + -D msecs:: --delay msecs:: After starting the program, wait msecs before measuring. This is useful to diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 56dc8881cb05..37e301a32f43 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -231,6 +231,7 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread, count = &zero; switch (aggr_mode) { + case AGGR_THREAD: case AGGR_CORE: case AGGR_SOCKET: case AGGR_NONE: @@ -602,6 +603,14 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_output ? 0 : -4, perf_evsel__cpus(evsel)->map[id], csv_sep); break; + case AGGR_THREAD: + fprintf(output, "%*s-%*d%s", + csv_output ? 0 : 16, + thread_map__comm(evsel->threads, id), + csv_output ? 0 : -8, + thread_map__pid(evsel->threads, id), + csv_sep); + break; case AGGR_GLOBAL: default: break; @@ -750,6 +759,40 @@ static void print_aggr(char *prefix) } } +static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +{ + int nthreads = thread_map__nr(counter->threads); + int ncpus = cpu_map__nr(counter->cpus); + int cpu, thread; + double uval; + + for (thread = 0; thread < nthreads; thread++) { + u64 ena = 0, run = 0, val = 0; + + for (cpu = 0; cpu < ncpus; cpu++) { + val += perf_counts(counter->counts, cpu, thread)->val; + ena += perf_counts(counter->counts, cpu, thread)->ena; + run += perf_counts(counter->counts, cpu, thread)->run; + } + + if (prefix) + fprintf(output, "%s", prefix); + + uval = val * counter->scale; + + if (nsec_counter(counter)) + nsec_printout(thread, 0, counter, uval); + else + abs_printout(thread, 0, counter, uval); + + if (!csv_output) + print_noise(counter, 1.0); + + print_running(run, ena); + fputc('\n', output); + } +} + /* * Print out the results of a single counter: * aggregated counts in system-wide mode @@ -876,6 +919,9 @@ static void print_interval(char *prefix, struct timespec *ts) case AGGR_NONE: fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); break; + case AGGR_THREAD: + fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); + break; case AGGR_GLOBAL: default: fprintf(output, "# time counts %*s events\n", unit_width, "unit"); @@ -944,6 +990,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) case AGGR_SOCKET: print_aggr(prefix); break; + case AGGR_THREAD: + evlist__for_each(evsel_list, counter) + print_aggr_thread(counter, prefix); + break; case AGGR_GLOBAL: evlist__for_each(evsel_list, counter) print_counter_aggr(counter, prefix); @@ -1031,6 +1081,7 @@ static int perf_stat_init_aggr_mode(void) break; case AGGR_NONE: case AGGR_GLOBAL: + case AGGR_THREAD: default: break; } @@ -1255,6 +1306,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "aggregate counts per processor socket", AGGR_SOCKET), OPT_SET_UINT(0, "per-core", &aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), + OPT_SET_UINT(0, "per-thread", &aggr_mode, + "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), OPT_END() @@ -1346,8 +1399,19 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) run_count = 1; } - /* no_aggr, cgroup are for system-wide only */ - if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) && + if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { + fprintf(stderr, "The --per-thread option is only available " + "when monitoring via -p -t options.\n"); + parse_options_usage(NULL, options, "p", 1); + parse_options_usage(NULL, options, "t", 1); + goto out; + } + + /* + * no_aggr, cgroup are for system-wide only + * --per-thread is aggregated per thread, we dont mix it with cpu mode + */ + if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) && !target__has_cpu(&target)) { fprintf(stderr, "both cgroup and no-aggregation " "modes only available in system-wide mode\n"); @@ -1375,6 +1439,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) } goto out; } + + /* + * Initialize thread_map with comm names, + * so we could print it out on output. + */ + if (aggr_mode == AGGR_THREAD) + thread_map__read_comms(evsel_list->threads); + if (interval && interval < 100) { pr_err("print interval must be >= 100ms\n"); parse_options_usage(stat_usage, options, "I", 1); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 9f05c571befe..1cfbe0a980ac 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -30,6 +30,7 @@ enum aggr_mode { AGGR_GLOBAL, AGGR_SOCKET, AGGR_CORE, + AGGR_THREAD, }; struct perf_counts_values { From 36c8bb56a9f718a9a5f35d1834ca9dcec95deb4a Mon Sep 17 00:00:00 2001 From: Li Zhang Date: Fri, 19 Jun 2015 16:57:33 +0800 Subject: [PATCH 339/839] perf symbols: Check access permission when reading symbol files There 2 problems when reading symbols files: * It doesn't report any errors even if when users specify symbol files which don't exist with --kallsyms or --vmlinux. The result just shows the address without symbols, which is not what is expected. So it's better to report errors and exit the program. * When using command perf report --kallsyms=/proc/kallsyms with a non-root user, symbols are resolved. Then select one symbol and annotate it, it reports the error as the following: Can't annotate __clear_user: No vmlinux file with build id xxx was found. The problem is caused by reading /proc/kcore without access permission. /proc/kcore requires CAP_SYS_RAWIO capability to access, so it needs to change access permission to allow a specific user to read /proc/kcore or use root to execute the perf command. This patch is to report errors when symbol files specified by users don't exist. And check access permission of /proc/kcore when reading it. Signed-off-by: Li Zhang Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1434704253-2632-1-git-send-email-zhlcindy@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 11 +++++++++++ tools/perf/util/symbol.c | 5 ++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 32626ea3e227..348bed4a2abf 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -742,6 +742,17 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options(argc, argv, options, report_usage, 0); + if (symbol_conf.vmlinux_name && + access(symbol_conf.vmlinux_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name); + return -EINVAL; + } + if (symbol_conf.kallsyms_name && + access(symbol_conf.kallsyms_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name); + return -EINVAL; + } + if (report.use_stdio) use_browser = 0; else if (report.use_tui) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 504f2d73b7ee..48b588c6951a 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1132,8 +1132,11 @@ static int dso__load_kcore(struct dso *dso, struct map *map, INIT_LIST_HEAD(&md.maps); fd = open(kcore_filename, O_RDONLY); - if (fd < 0) + if (fd < 0) { + pr_err("%s requires CAP_SYS_RAWIO capability to access.\n", + kcore_filename); return -EINVAL; + } /* Read new maps into temporary lists */ err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md, From 2341c0234731f5a6ea0b403f3dc14d1841b8e247 Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Thu, 25 Jun 2015 10:19:50 +0530 Subject: [PATCH 340/839] arm64/hw_breakpoint.c: remove unnecessary header Header is not needed for arm64/hw_breakpoint.c, Removing the same. Signed-off-by: Maninder Singh Reviewed-by: Vaneet Narang Signed-off-by: Catalin Marinas --- arch/arm64/kernel/hw_breakpoint.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index e7d934d3afe0..7a1a5da6c8c1 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include From 865a7ecb2131a3ba26cc1d49daf18275375109f0 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 25 Jun 2015 18:19:32 +0800 Subject: [PATCH 341/839] nfs: provide pnfs_report_layoutstat when NFS42 is disabled kbuild test robot reported: fs/built-in.o: In function `pnfs_report_layoutstat': >> (.text+0x151a1c): undefined reference to `nfs42_proc_layoutstats_generic' Reported-by: kbuild test robot Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 ++ fs/nfs/pnfs.h | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5b5224341bcd..40bacebb5b97 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2252,6 +2252,7 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) return thp; } +#if IS_ENABLED(CONFIG_NFS_V4_2) int pnfs_report_layoutstat(struct inode *inode) { @@ -2306,3 +2307,4 @@ out_put: goto out; } EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); +#endif diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 0a4723945db2..3e6ab7bfbabd 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -292,8 +292,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg); -int pnfs_report_layoutstat(struct inode *inode); - /* nfs4_deviceid_flags */ enum { NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ @@ -692,4 +690,14 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void) #endif /* CONFIG_NFS_V4_1 */ +#if IS_ENABLED(CONFIG_NFS_V4_2) +int pnfs_report_layoutstat(struct inode *inode); +#else +static inline int +pnfs_report_layoutstat(struct inode *inode) +{ + return 0; +} +#endif + #endif /* FS_NFS_PNFS_H */ From 9bbd9bb40cfae5ff35710e88866fcadb0e8f91ed Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 26 Jun 2015 09:45:49 +0800 Subject: [PATCH 342/839] pnfs/flexfiles: protect ktime manipulation with mirror lock It looks as if xchg() and cmpxchg() are not available for 64-bit integers on sparc32: > New breakage seen in linux-next today: > > ERROR: "__xchg_called_with_bad_pointer" [fs/nfs/flexfilelayout/nfs_layout_flexfiles.ko] undefined! > ERROR: "__cmpxchg_called_with_bad_pointer" [fs/nfs/flexfilelayout/nfs_layout_flexfiles.ko] undefined! > make[2]: *** [__modpost] Error 1 > make[1]: *** [modules] Error 2 Given that mirror ktime manipulation is already under mirror->lock, let's make use of the fact. Reported-by: Paul Gortmaker Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 31 ++++++++++---------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index d3b3b6236711..85b4234b8090 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -417,21 +417,9 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) static void nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer) { - ktime_t old, new; - - /* - * Note: careful here! - * If the counter is zero, then we must not increment it until after - * we've set the start_time. - * If we were instead to use atomic_inc_return(), then another - * request might come in, bump, and then call end_busy_timer() - * before we've set the timer->start_time. - */ - old = timer->start_time; - if (atomic_inc_not_zero(&timer->n_ops) == 0) { - new = ktime_get(); - cmpxchg(&timer->start_time.tv64, old.tv64, new.tv64); - atomic_inc(&timer->n_ops); + /* first IO request? */ + if (atomic_inc_return(&timer->n_ops) == 1) { + timer->start_time = ktime_get(); } } @@ -440,9 +428,12 @@ nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer) { ktime_t start, now; + if (atomic_dec_return(&timer->n_ops) < 0) + WARN_ON_ONCE(1); + now = ktime_get(); - start.tv64 = xchg(&timer->start_time.tv64, now.tv64); - atomic_dec(&timer->n_ops); + start = timer->start_time; + timer->start_time = now; return ktime_sub(now, start); } @@ -460,8 +451,10 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, ktime_t now = ktime_get(); nfs4_ff_start_busy_timer(&layoutstat->busy_timer); - cmpxchg(&mirror->start_time.tv64, notime.tv64, now.tv64); - cmpxchg(&mirror->last_report_time.tv64, notime.tv64, now.tv64); + if (ktime_equal(mirror->start_time, notime)) + mirror->start_time = now; + if (ktime_equal(mirror->last_report_time, notime)) + mirror->last_report_time = now; if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= FF_LAYOUTSTATS_REPORT_INTERVAL) { mirror->last_report_time = now; From c0f5f5059f6c9c9efb6ca688f4d6a800c11f6b58 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 26 Jun 2015 14:51:32 -0400 Subject: [PATCH 343/839] pNFS/flexfiles: Turn off layoutcommit for servers that don't need it This patch ensures that we record the value of 'ffl_flags' from the layout, and then checks for the presence of the FF_FLAGS_NO_LAYOUTCOMMIT flag before deciding whether or not to call pnfs_set_layoutcommit(). The effect is that servers now can decide whether or not they want the client to call layoutcommit before returning a writeable layout. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 16 +++++++++++++++- fs/nfs/flexfilelayout/flexfilelayout.h | 3 +++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 85b4234b8090..594ab35eacfb 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -343,6 +343,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, fls->mirror_array[i]->gid); } + p = xdr_inline_decode(&stream, 4); + if (p) + fls->flags = be32_to_cpup(p); + ff_layout_sort_mirrors(fls); rc = ff_layout_check_layout(lgr); if (rc) @@ -1018,6 +1022,12 @@ static int ff_layout_read_done_cb(struct rpc_task *task, return 0; } +static bool +ff_layout_need_layoutcommit(struct pnfs_layout_segment *lseg) +{ + return !(FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_LAYOUTCOMMIT); +} + /* * We reference the rpc_cred of the first WRITE that triggers the need for * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. @@ -1030,6 +1040,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task, static void ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) { + if (!ff_layout_need_layoutcommit(hdr->lseg)) + return; + pnfs_set_layoutcommit(hdr->inode, hdr->lseg, hdr->mds_offset + hdr->res.count); dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, @@ -1221,7 +1234,8 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, return -EAGAIN; } - if (data->verf.committed == NFS_UNSTABLE) + if (data->verf.committed == NFS_UNSTABLE + && ff_layout_need_layoutcommit(data->lseg)) pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); return 0; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 6fcd8d5e8e3d..f92f9a0a856b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -9,6 +9,8 @@ #ifndef FS_NFS_NFS4FLEXFILELAYOUT_H #define FS_NFS_NFS4FLEXFILELAYOUT_H +#define FF_FLAGS_NO_LAYOUTCOMMIT 1 + #include "../pnfs.h" /* XXX: Let's filter out insanely large mirror count for now to avoid oom @@ -85,6 +87,7 @@ struct nfs4_ff_layout_mirror { struct nfs4_ff_layout_segment { struct pnfs_layout_segment generic_hdr; u64 stripe_unit; + u32 flags; u32 mirror_array_cnt; struct nfs4_ff_layout_mirror **mirror_array; }; From d620876990f02788d5a663075df007ffb91bdfad Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 26 Jun 2015 15:37:58 -0400 Subject: [PATCH 344/839] pNFS/flexfiles: Fix the reset of struct pgio_header when resending hdr->good_bytes needs to be set to the length of the request, not zero. Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 594ab35eacfb..c12951b9551e 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -774,7 +774,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) nfs_direct_set_resched_writes(hdr->dreq); /* fake unstable write to let common nfs resend pages */ hdr->verf.committed = NFS_UNSTABLE; - hdr->good_bytes = 0; + hdr->good_bytes = hdr->args.count; } return; } From 0d44b410a01a75daff60ce4cb78313135cdb3649 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 26 Jun 2015 21:21:16 +0200 Subject: [PATCH 345/839] tc1100-wmi: Delete an unnecessary check before the function call "kfree" The kfree() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Darren Hart --- drivers/platform/x86/tc1100-wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/tc1100-wmi.c b/drivers/platform/x86/tc1100-wmi.c index e36542564131..89aa976f0ab2 100644 --- a/drivers/platform/x86/tc1100-wmi.c +++ b/drivers/platform/x86/tc1100-wmi.c @@ -82,7 +82,7 @@ static int get_state(u32 *out, u8 instance) tmp = 0; } - if (result.length > 0 && result.pointer) + if (result.length > 0) kfree(result.pointer); switch (instance) { From da2e8127510525eb4bce0fe34aff06192e042c8f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 27 Jun 2015 11:30:57 -0400 Subject: [PATCH 346/839] NFSv4.2: Fix up a decoding error in layoutstats According to the spec, the server is only returning the status, which we decode in the op header. Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 9aae0205ef11..a6bd27da6286 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -241,23 +241,7 @@ out_overflow: static int decode_layoutstats(struct xdr_stream *xdr, struct nfs42_layoutstat_res *res) { - int status; - __be32 *p; - - status = decode_op_hdr(xdr, OP_LAYOUTSTATS); - if (status) - return status; - - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - - res->rpc_status = be32_to_cpup(p++); - return 0; - -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; + return decode_op_hdr(xdr, OP_LAYOUTSTATS); } /* From 6c5a0d891543873aefc3aaf846c1e7afe0982ff9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 27 Jun 2015 11:45:46 -0400 Subject: [PATCH 347/839] NFSv4.2: LAYOUTSTATS is optional to implement Make it so, by checking the return value for NFS4ERR_MOTSUPP and caching the information as a server capability. Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 10 ++++++++-- fs/nfs/nfs4proc.c | 3 ++- fs/nfs/pnfs.c | 3 +++ include/linux/nfs_fs_sb.h | 1 + 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 06c74cd93875..f486b80f927a 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -189,9 +189,15 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) if (!nfs4_sequence_done(task, &data->res.seq_res)) return; - /* well, we don't care about errors at all! */ - if (task->tk_status) + switch (task->tk_status) { + case 0: + break; + case -ENOTSUPP: + case -EOPNOTSUPP: + NFS_SERVER(data->inode)->caps &= ~NFS_CAP_LAYOUTSTATS; + default: dprintk("%s server returns %d\n", __func__, task->tk_status); + } } static void diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 643ce3a91b22..8eab42407d39 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8635,7 +8635,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_ATOMIC_OPEN_V1 | NFS_CAP_ALLOCATE | NFS_CAP_DEALLOCATE - | NFS_CAP_SEEK, + | NFS_CAP_SEEK + | NFS_CAP_LAYOUTSTATS, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 40bacebb5b97..0ba9a02c9566 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2266,6 +2266,9 @@ pnfs_report_layoutstat(struct inode *inode) if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) goto out; + if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS)) + goto out; + if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) goto out; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5e1273d4de14..a2ea1491d3df 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -237,5 +237,6 @@ struct nfs_server { #define NFS_CAP_SEEK (1U << 19) #define NFS_CAP_ALLOCATE (1U << 20) #define NFS_CAP_DEALLOCATE (1U << 21) +#define NFS_CAP_LAYOUTSTATS (1U << 22) #endif From 398f74569cebbf06bc6b069442bcd0e9616ca465 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 19 Jun 2015 21:37:56 +0100 Subject: [PATCH 348/839] ARM: 8393/1: smp: Fix suspicious RCU usage with ipi tracepoints John Stultz reports an RCU splat on boot with ARM ipi trace events enabled. =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc7-00033-gb5bed2f #153 Not tainted ------------------------------- include/trace/events/ipi.h:68 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! no locks held by swapper/0/0. stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.1.0-rc7-00033-gb5bed2f #153 Hardware name: Qualcomm (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x70/0xbc) [] (dump_stack) from [] (handle_IPI+0x428/0x604) [] (handle_IPI) from [] (gic_handle_irq+0x54/0x5c) [] (gic_handle_irq) from [] (__irq_svc+0x44/0x7c) Exception stack(0xc09f3f48 to 0xc09f3f90) 3f40: 00000001 00000001 00000000 c09f73b8 c09f4528 c0a5de9c 3f60: c076b4f0 00000000 00000000 c09ef108 c0a5cec1 00000001 00000000 c09f3f90 3f80: c026bf60 c0210ab8 20000113 ffffffff [] (__irq_svc) from [] (arch_cpu_idle+0x20/0x3c) [] (arch_cpu_idle) from [] (cpu_startup_entry+0x2c0/0x5dc) [] (cpu_startup_entry) from [] (start_kernel+0x358/0x3c4) [] (start_kernel) from [<8020807c>] (0x8020807c) At this point in the IPI handling path we haven't called irq_enter() yet, so RCU doesn't know that we're about to exit idle and properly warns that we're using RCU from an idle CPU. Use trace_ipi_entry_rcuidle() instead of trace_ipi_entry() so that RCU is informed about our exit from idle. Fixes: 365ec7b17327 ("ARM: add IPI tracepoints") Reported-by: John Stultz Tested-by: John Stultz Acked-by: Steven Rostedt Reviewed-by: Paul E. McKenney Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cca5b8758185..f11d82527076 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -576,7 +576,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { - trace_ipi_entry(ipi_types[ipinr]); + trace_ipi_entry_rcuidle(ipi_types[ipinr]); __inc_irq_stat(cpu, ipi_irqs[ipinr]); } @@ -635,7 +635,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) } if ((unsigned)ipinr < NR_IPI) - trace_ipi_exit(ipi_types[ipinr]); + trace_ipi_exit_rcuidle(ipi_types[ipinr]); set_irq_regs(old_regs); } From 3de1f52a3ae823265299409e276f56cdadddc310 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 25 Jun 2015 01:04:20 +0100 Subject: [PATCH 349/839] ARM: 8394/1: update memblock limit after mapping lowmem The memblock limit is currently used in find_limits to find the bounds for ZONE_NORMAL. The memblock limit may need to be rounded down a PMD size to ensure allocations are fully mapped though. This has the side effect of reducing the amount of memory in ZONE_NORMAL. Once all lowmem is mapped, it's safe to change the memblock limit back to include the unaligned section. Adjust the memblock limit after lowmem mapping is complete. Before: # cat /proc/zoneinfo | grep managed managed 62907 managed 424 After: # cat /proc/zoneinfo | grep managed managed 63331 Signed-off-by: Laura Abbott Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 7186382672b5..904d1532e6d0 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1519,6 +1519,7 @@ void __init paging_init(const struct machine_desc *mdesc) build_mem_type_table(); prepare_page_table(); map_lowmem(); + memblock_set_current_limit(arm_lowmem_limit); dma_contiguous_remap(); devicemaps_init(mdesc); kmap_init(); From e48866647b486f31ff7c3927b48de8bbb1c6a4c0 Mon Sep 17 00:00:00 2001 From: Vitaly Andrianov Date: Fri, 26 Jun 2015 17:13:03 +0100 Subject: [PATCH 350/839] ARM: 8396/1: use phys_addr_t in pfn_to_kaddr() This patch fixes pfn_to_kaddr() to use phys_addr_t. Without this, this macro is broken on LPAE systems. For physical addresses above first 4GB result of shifting pfn with PAGE_SHIFT may be truncated. Signed-off-by: Vitaly Andrianov Acked-by: Nicolas Pitre Acked-by: Santosh Shilimkar Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 184def0e1652..063ef314cca9 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -291,7 +291,7 @@ static inline void *phys_to_virt(phys_addr_t x) */ #define __pa(x) __virt_to_phys((unsigned long)(x)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +#define pfn_to_kaddr(pfn) __va((phys_addr_t)(pfn) << PAGE_SHIFT) extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x); From 8b572a4200828b4e75cc22ed2f494b58d5372d65 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 28 Jun 2015 14:18:16 +0100 Subject: [PATCH 351/839] agp/intel: Fix typo in needs_ilk_vtd_wa() In needs_ilk_vtd_wa(), we pass in the GPU device but compared it against the ids for the mobile GPU and the mobile host bridge. That latter is impossible and so likely was just a typo for the desktop GPU device id (which is also buggy). Fixes commit da88a5f7f7d434e2cde1b3e19d952e6d84533662 Author: Chris Wilson Date: Wed Feb 13 09:31:53 2013 +0000 drm/i915: Disable WC PTE updates to w/a buggy IOMMU on ILK Reported-by: Ting-Wei Lan Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91127 References: https://bugzilla.freedesktop.org/show_bug.cgi?id=60391 Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/char/agp/intel-gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 0b4188b9af7c..c6dea3f6917b 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -581,7 +581,7 @@ static inline int needs_ilk_vtd_wa(void) /* Query intel_iommu to see if we need the workaround. Presumably that * was loaded first. */ - if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB || + if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG || gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG) && intel_iommu_gfx_mapped) return 1; From 2059ac3b1304cb6a82f9d90762dea9f556831627 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 26 Jun 2015 14:18:56 +0300 Subject: [PATCH 352/839] drm/i915: fix backlight after resume on 855gm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some 855gm models (at least ThinkPad X40) regressed because of commit b0cd324faed23d10d66ba6ade66579c681feef6f Author: Jani Nikula Date: Wed Nov 12 16:25:43 2014 +0200 drm/i915: don't save/restore backlight hist ctl registers which tried to make our driver more robust by not blindly saving and restoring registers, but it failed to take into account commit 0eb96d6ed38430b72897adde58f5477a6b71757a Author: Jesse Barnes Date: Wed Oct 14 12:33:41 2009 -0700 drm/i915: save/restore BLC histogram control reg across suspend/resume Fix the regression by enabling hist ctl on gen2. v2: Improved the comment. v3: Improved the comment, again. Reported-and-tested-by: Philipp Gesang References: http://mid.gmane.org/20150623222648.GD12335@acheron Fixes: b0cd324faed2 ("drm/i915: don't save/restore backlight hist ctl registers") Cc: Ville Syrjälä Cc: stable@vger.kernel.org Acked-by: Chris Wilson Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_panel.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6d3fead3a358..47bb110a5e36 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3491,6 +3491,7 @@ enum skl_disp_power_wells { #define BLM_POLARITY_PNV (1 << 0) /* pnv only */ #define BLC_HIST_CTL (dev_priv->info.display_mmio_offset + 0x61260) +#define BLM_HISTOGRAM_ENABLE (1 << 31) /* New registers for PCH-split platforms. Safe where new bits show up, the * register layout machtes with gen4 BLC_PWM_CTL[12]. */ diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 7d83527f95f7..55aad2322e10 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -907,6 +907,14 @@ static void i9xx_enable_backlight(struct intel_connector *connector) /* XXX: combine this into above write? */ intel_panel_actually_set_backlight(connector, panel->backlight.level); + + /* + * Needed to enable backlight on some 855gm models. BLC_HIST_CTL is + * 855gm only, but checking for gen2 is safe, as 855gm is the only gen2 + * that has backlight. + */ + if (IS_GEN2(dev)) + I915_WRITE(BLC_HIST_CTL, BLM_HISTOGRAM_ENABLE); } static void i965_enable_backlight(struct intel_connector *connector) From 39f897fdbd46873defc8501102124f862e8f4d80 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Jun 2015 14:28:54 +1000 Subject: [PATCH 353/839] NFSv4: When returning a delegation, don't reclaim an incompatible open mode. It is possible to have an active open with one mode, and a delegation for the same file with a different mode. In particular, a WR_ONLY open and an RD_ONLY delegation. This happens if a WR_ONLY open is followed by a RD_ONLY open which provides a delegation, but is then close. When returning the delegation, we currently try to claim opens for every open type (n_rdwr, n_rdonly, n_wronly). As there is no harm in claiming an open for a mode that we already have, this is often simplest. However if the delegation only provides a subset of the modes that we currently have open, this will produce an error from the server. So when claiming open modes prior to returning a delegation, skip the open request if the mode is not covered by the delegation - the open_stateid must already cover that mode, so there is nothing to do. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8eab42407d39..605c203de556 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1547,6 +1547,13 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod struct nfs4_state *newstate; int ret; + if ((opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR || + opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEG_CUR_FH) && + (opendata->o_arg.u.delegation_type & fmode) != fmode) + /* This mode can't have been delegated, so we must have + * a valid open_stateid to cover it - not need to reclaim. + */ + return 0; opendata->o_arg.open_flags = 0; opendata->o_arg.fmode = fmode; opendata->o_arg.share_access = nfs4_map_atomic_open_share( From e6ae32c343981a50aa07c34767f2a967cc920edc Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 25 Jun 2015 10:34:38 +0100 Subject: [PATCH 354/839] ARM: fix DEBUG_SET_MODULE_RONX build dependencies randconfig testing reveals that DEBUG_SET_MODULE_RONX needs to depend on MMU otherwise these build errors are observed: kernel/built-in.o: In function `set_section_ro_nx': kernel/module.c:1738: undefined reference to `set_memory_nx' kernel/built-in.o: In function `set_page_attributes': kernel/module.c:1709: undefined reference to `set_memory_ro' This is because the pageattr functions are not built for !MMU configs as they don't have page tables. Signed-off-by: Russell King --- arch/arm/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 0c12ffb155a2..dfa21cb58f36 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1590,7 +1590,7 @@ config PID_IN_CONTEXTIDR config DEBUG_SET_MODULE_RONX bool "Set loadable kernel module data as NX and text as RO" - depends on MODULES + depends on MODULES && MMU ---help--- This option helps catch unintended modifications to loadable kernel module's text and read-only data. It also prevents execution From b4d103d1a45fed0da2f31c905eb5e053c84a41c6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 25 Jun 2015 10:49:45 +0100 Subject: [PATCH 355/839] ARM: add help text for HIGHPTE configuration entry Add some help text for the HIGHPTE configuration entry. This comes from the x86 entry, but reworded to be more a more accurate description of what this option does. Signed-off-by: Russell King --- arch/arm/Kconfig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 45df48ba0b12..cc0ea0c6cfc9 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1661,6 +1661,12 @@ config HIGHMEM config HIGHPTE bool "Allocate 2nd-level pagetables from highmem" depends on HIGHMEM + help + The VM uses one page of physical memory for each page table. + For systems with a lot of processes, this can use a lot of + precious low memory, eventually leading to low memory being + consumed by page tables. Setting this option will allow + user-space 2nd level page tables to reside in high memory. config HW_PERF_EVENTS bool "Enable hardware performance counter support for perf events" From eeb3fee8f6cca5e7bf1647d9e327c7b40e384578 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 25 Jun 2015 10:52:58 +0100 Subject: [PATCH 356/839] ARM: add helpful message when truncating physical memory Add a nmessage to suggest that HIGHMEM is enabled when physical memory is truncated due to lack of virtual address space to map it in the low memory mapping. Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 904d1532e6d0..79de062c6077 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1072,6 +1072,7 @@ void __init sanity_check_meminfo(void) int highmem = 0; phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; struct memblock_region *reg; + bool should_use_highmem = false; for_each_memblock(memory, reg) { phys_addr_t block_start = reg->base; @@ -1090,6 +1091,7 @@ void __init sanity_check_meminfo(void) pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n", &block_start, &block_end); memblock_remove(reg->base, reg->size); + should_use_highmem = true; continue; } @@ -1100,6 +1102,7 @@ void __init sanity_check_meminfo(void) &block_start, &block_end, &vmalloc_limit); memblock_remove(vmalloc_limit, overlap_size); block_end = vmalloc_limit; + should_use_highmem = true; } } @@ -1134,6 +1137,9 @@ void __init sanity_check_meminfo(void) } } + if (should_use_highmem) + pr_notice("Consider using a HIGHMEM enabled kernel.\n"); + high_memory = __va(arm_lowmem_limit - 1) + 1; /* From e922cfb1c410d8cb200a08f1b2a527d92d95480a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 Jun 2015 11:26:03 -0400 Subject: [PATCH 357/839] drm/amdgpu: fix hpd range check in dce_v8_0_hpd_irq() Spotted by Dan Carpenter. This is a slight variant of his fix. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 72c27ac915f2..aaca8d663f2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -3379,7 +3379,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev, uint32_t disp_int, mask, int_control, tmp; unsigned hpd; - if (entry->src_data > 6) { + if (entry->src_data >= adev->mode_info.num_hpd) { DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); return 0; } From 69ee2410238e6ea6fd5d0a91b04510bba8abc7b5 Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Fri, 19 Jun 2015 09:35:23 +0530 Subject: [PATCH 358/839] drm/radeon: use kzalloc for allocating one thing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use kzalloc for allocating one thing rather than kcalloc(1... The semantic patch that makes this change is as follows: // @@ @@ - kcalloc(1, + kzalloc( ...) // Signed-off-by: Maninder Singh Reviewed-by: Vaneet Narang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index edafd3c2b170..06ac59fe332a 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -719,7 +719,7 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm) return 0; if (gtt && gtt->userptr) { - ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL); + ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) return -ENOMEM; From cd06bf687b89f7532d19b0e4484299372e1bf70c Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Wed, 24 Jun 2015 22:39:21 +0300 Subject: [PATCH 359/839] drm/amdgpu: Initialize compute sdma and memory from kgd v2: add missing MTYPE_NONCACHED enum Signed-off-by: Ben Goz Acked-by: Oded Gabbay Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cikd.h | 6 ++++ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 42 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 47 ++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 28 +++++++++++++++ 4 files changed, 123 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 220865a44814..d19085a97064 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -552,4 +552,10 @@ #define VCE_CMD_IB_AUTO 0x00000005 #define VCE_CMD_SEMAPHORE 0x00000006 +/* valid for both DEFAULT_MTYPE and APE1_MTYPE */ +enum { + MTYPE_CACHED = 0, + MTYPE_NONCACHED = 3 +}; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index cb7907447b81..2c188fb9fd22 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2009,6 +2009,46 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev, mutex_unlock(&adev->grbm_idx_mutex); } +/** + * gmc_v7_0_init_compute_vmid - gart enable + * + * @rdev: amdgpu_device pointer + * + * Initialize compute vmid sh_mem registers + * + */ +#define DEFAULT_SH_MEM_BASES (0x6000) +#define FIRST_COMPUTE_VMID (8) +#define LAST_COMPUTE_VMID (16) +static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev) +{ + int i; + uint32_t sh_mem_config; + uint32_t sh_mem_bases; + + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); + sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT; + mutex_lock(&adev->srbm_mutex); + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + cik_srbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32(mmSH_MEM_CONFIG, sh_mem_config); + WREG32(mmSH_MEM_APE1_BASE, 1); + WREG32(mmSH_MEM_APE1_LIMIT, 0); + WREG32(mmSH_MEM_BASES, sh_mem_bases); + } + cik_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + /** * gfx_v7_0_gpu_init - setup the 3D engine * @@ -2230,6 +2270,8 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + gmc_v7_0_init_compute_vmid(adev); + WREG32(mmSX_DEBUG_1, 0x20); WREG32(mmTA_CNTL_AUX, 0x00010000); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 14242bd33363..e4aeb74d2599 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1894,6 +1894,51 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev, mutex_unlock(&adev->grbm_idx_mutex); } +/** + * gmc_v8_0_init_compute_vmid - gart enable + * + * @rdev: amdgpu_device pointer + * + * Initialize compute vmid sh_mem registers + * + */ +#define DEFAULT_SH_MEM_BASES (0x6000) +#define FIRST_COMPUTE_VMID (8) +#define LAST_COMPUTE_VMID (16) +static void gmc_v8_0_init_compute_vmid(struct amdgpu_device *adev) +{ + int i; + uint32_t sh_mem_config; + uint32_t sh_mem_bases; + + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); + + sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << + SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | + MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | + SH_MEM_CONFIG__PRIVATE_ATC_MASK; + + mutex_lock(&adev->srbm_mutex); + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + vi_srbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32(mmSH_MEM_CONFIG, sh_mem_config); + WREG32(mmSH_MEM_APE1_BASE, 1); + WREG32(mmSH_MEM_APE1_LIMIT, 0); + WREG32(mmSH_MEM_BASES, sh_mem_bases); + } + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) { u32 gb_addr_config; @@ -2113,6 +2158,8 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + gmc_v8_0_init_compute_vmid(adev); + mutex_lock(&adev->grbm_idx_mutex); /* * making sure that the following register writes will be broadcasted diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index e3c1fde75363..7bb37b93993f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -438,6 +438,31 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev) /* XXX todo */ } +/** + * sdma_v3_0_ctx_switch_enable - stop the async dma engines context switch + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs context switch. + * + * Halt or unhalt the async dma engines context switch (VI). + */ +static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) +{ + u32 f32_cntl; + int i; + + for (i = 0; i < SDMA_MAX_INSTANCE; i++) { + f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]); + if (enable) + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, 1); + else + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, 0); + WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl); + } +} + /** * sdma_v3_0_enable - stop the async dma engines * @@ -648,6 +673,8 @@ static int sdma_v3_0_start(struct amdgpu_device *adev) /* unhalt the MEs */ sdma_v3_0_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v3_0_ctx_switch_enable(adev, true); /* start the gfx rings and rlc compute queues */ r = sdma_v3_0_gfx_resume(adev); @@ -1079,6 +1106,7 @@ static int sdma_v3_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + sdma_v3_0_ctx_switch_enable(adev, false); sdma_v3_0_enable(adev, false); return 0; From 78ad5cdd21f0d614983fc397338944e797ec70b9 Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Fri, 12 Jun 2015 11:08:58 +0300 Subject: [PATCH 360/839] drm/amdgpu: Configure doorbell to maximum slots Signed-off-by: Ben Goz Acked-by: Oded Gabbay Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index e4aeb74d2599..7683d7fd6463 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3128,7 +3128,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, - AMDGPU_DOORBELL_MEC_RING7 << 2); + 0x7FFFF << 2); } tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, From 161569deaa03cf3c00ed63352006193f250b0648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Fri, 19 Jun 2015 10:32:15 -0400 Subject: [PATCH 361/839] drm/radeon: compute ring fix hibernation (CI GPU family) v2. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order for hibernation to reliably work we need to cleanup more thoroughly the compute ring. Hibernation is different from suspend resume as when we resume from hibernation the hardware is first fully initialize by regular kernel then freeze callback happens (which correspond to a suspend inside the radeon kernel driver) and turn off each of the block. It turns out we were not cleanly shutting down the compute ring. This patch fix that. Hibernation and suspend to ram were tested (several times) on : Bonaire Hawaii Mullins Kaveri Kabini Changed since v1: - Factor the ring stop logic into a function taking ring as arg. Cc: stable@vger.kernel.org Signed-off-by: Jérôme Glisse Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index b0688b0c8908..4ecf5caa8c6d 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4604,6 +4604,31 @@ void cik_compute_set_wptr(struct radeon_device *rdev, WDOORBELL32(ring->doorbell_index, ring->wptr); } +static void cik_compute_stop(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + u32 j, tmp; + + cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); + /* Disable wptr polling. */ + tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); + tmp &= ~WPTR_POLL_EN; + WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); + /* Disable HQD. */ + if (RREG32(CP_HQD_ACTIVE) & 1) { + WREG32(CP_HQD_DEQUEUE_REQUEST, 1); + for (j = 0; j < rdev->usec_timeout; j++) { + if (!(RREG32(CP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + WREG32(CP_HQD_DEQUEUE_REQUEST, 0); + WREG32(CP_HQD_PQ_RPTR, 0); + WREG32(CP_HQD_PQ_WPTR, 0); + } + cik_srbm_select(rdev, 0, 0, 0, 0); +} + /** * cik_cp_compute_enable - enable/disable the compute CP MEs * @@ -4617,6 +4642,15 @@ static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) if (enable) WREG32(CP_MEC_CNTL, 0); else { + /* + * To make hibernation reliable we need to clear compute ring + * configuration before halting the compute ring. + */ + mutex_lock(&rdev->srbm_mutex); + cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); + cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); + mutex_unlock(&rdev->srbm_mutex); + WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; From 2ba8d1bb8f6b589037f7db1f01144fc80750e8f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Fri, 19 Jun 2015 10:32:16 -0400 Subject: [PATCH 362/839] drm/radeon: SDMA fix hibernation (CI GPU family). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order for hibernation to reliably work we need to properly turn off the SDMA block, sadly after numerous attemps i haven't not found proper sequence for clean and full shutdown. So simply reset both SDMA block, this makes hibernation works reliably on sea island GPU family (CI) Hibernation and suspend to ram were tested (several times) on : Bonaire Hawaii Mullins Kaveri Kabini Cc: stable@vger.kernel.org Signed-off-by: Jérôme Glisse Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik_sdma.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index f86eb54e7763..d16f2eebd95e 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -268,6 +268,17 @@ static void cik_sdma_gfx_stop(struct radeon_device *rdev) } rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; + + /* FIXME use something else than big hammer but after few days can not + * seem to find good combination so reset SDMA blocks as it seems we + * do not shut them down properly. This fix hibernation and does not + * affect suspend to ram. + */ + WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1); + (void)RREG32(SRBM_SOFT_RESET); + udelay(50); + WREG32(SRBM_SOFT_RESET, 0); + (void)RREG32(SRBM_SOFT_RESET); } /** From 344c19f9558d69faec75869b33d558258429bcd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 2 Jun 2015 15:47:16 +0200 Subject: [PATCH 363/839] drm/amdgpu: simplify fence debugfs output a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 5c9918d01bf9..8f0fc22f6810 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -1023,7 +1023,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) amdgpu_fence_process(ring); - seq_printf(m, "--- ring %d ---\n", i); + seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name); seq_printf(m, "Last signaled fence 0x%016llx\n", (unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); seq_printf(m, "Last emitted 0x%016llx\n", @@ -1031,7 +1031,8 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) for (j = 0; j < AMDGPU_MAX_RINGS; ++j) { struct amdgpu_ring *other = adev->rings[j]; - if (i != j && other && other->fence_drv.initialized) + if (i != j && other && other->fence_drv.initialized && + ring->fence_drv.sync_seq[j]) seq_printf(m, "Last sync to ring %d 0x%016llx\n", j, ring->fence_drv.sync_seq[j]); } From 93e3e4385b69d8885e0dd64d1c5b8c351a151e91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 9 Jun 2015 16:58:33 +0200 Subject: [PATCH 364/839] drm/amdgpu: add BO map/unmap trace point MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Acked-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 48 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++ 2 files changed, 52 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index b56dd64bd4ea..9fe4f3ccd319 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -61,6 +61,54 @@ TRACE_EVENT(amdgpu_vm_grab_id, TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring) ); +TRACE_EVENT(amdgpu_vm_bo_map, + TP_PROTO(struct amdgpu_bo_va *bo_va, + struct amdgpu_bo_va_mapping *mapping), + TP_ARGS(bo_va, mapping), + TP_STRUCT__entry( + __field(struct amdgpu_bo *, bo) + __field(long, start) + __field(long, last) + __field(u64, offset) + __field(u32, flags) + ), + + TP_fast_assign( + __entry->bo = bo_va->bo; + __entry->start = mapping->it.start; + __entry->last = mapping->it.last; + __entry->offset = mapping->offset; + __entry->flags = mapping->flags; + ), + TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x", + __entry->bo, __entry->start, __entry->last, + __entry->offset, __entry->flags) +); + +TRACE_EVENT(amdgpu_vm_bo_unmap, + TP_PROTO(struct amdgpu_bo_va *bo_va, + struct amdgpu_bo_va_mapping *mapping), + TP_ARGS(bo_va, mapping), + TP_STRUCT__entry( + __field(struct amdgpu_bo *, bo) + __field(long, start) + __field(long, last) + __field(u64, offset) + __field(u32, flags) + ), + + TP_fast_assign( + __entry->bo = bo_va->bo; + __entry->start = mapping->it.start; + __entry->last = mapping->it.last; + __entry->offset = mapping->offset; + __entry->flags = mapping->flags; + ), + TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x", + __entry->bo, __entry->start, __entry->last, + __entry->offset, __entry->flags) +); + TRACE_EVENT(amdgpu_vm_bo_update, TP_PROTO(struct amdgpu_bo_va_mapping *mapping), TP_ARGS(mapping), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 407882b233c7..9a4e3b63f1cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1001,6 +1001,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, list_add(&mapping->list, &bo_va->mappings); interval_tree_insert(&mapping->it, &vm->va); + trace_amdgpu_vm_bo_map(bo_va, mapping); bo_va->addr = 0; @@ -1058,6 +1059,7 @@ error_free: mutex_lock(&vm->mutex); list_del(&mapping->list); interval_tree_remove(&mapping->it, &vm->va); + trace_amdgpu_vm_bo_unmap(bo_va, mapping); kfree(mapping); error_unlock: @@ -1099,6 +1101,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, mutex_lock(&vm->mutex); list_del(&mapping->list); interval_tree_remove(&mapping->it, &vm->va); + trace_amdgpu_vm_bo_unmap(bo_va, mapping); if (bo_va->addr) { /* clear the old address */ @@ -1139,6 +1142,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, list_for_each_entry_safe(mapping, next, &bo_va->mappings, list) { list_del(&mapping->list); interval_tree_remove(&mapping->it, &vm->va); + trace_amdgpu_vm_bo_unmap(bo_va, mapping); if (bo_va->addr) list_add(&mapping->list, &vm->freed); else From ec74407ac047c150080758f2a4cec47393a6c0f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 10 Jun 2015 14:45:21 +0200 Subject: [PATCH 365/839] drm/amdgpu: add amdgpu_bo_list_set trace point MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Acked-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 36d34e0afbc3..f82a2dd83874 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -30,6 +30,7 @@ #include #include "amdgpu.h" +#include "amdgpu_trace.h" static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, struct amdgpu_bo_list **result, @@ -124,6 +125,8 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, gws_obj = entry->robj; if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA) oa_obj = entry->robj; + + trace_amdgpu_bo_list_set(list, entry->robj); } for (i = 0; i < list->num_entries; ++i) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 9fe4f3ccd319..ed20f44b924d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -169,6 +169,21 @@ TRACE_EVENT(amdgpu_vm_flush, __entry->pd_addr, __entry->ring, __entry->id) ); +TRACE_EVENT(amdgpu_bo_list_set, + TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo), + TP_ARGS(list, bo), + TP_STRUCT__entry( + __field(struct amdgpu_bo_list *, list) + __field(struct amdgpu_bo *, bo) + ), + + TP_fast_assign( + __entry->list = list; + __entry->bo = bo; + ), + TP_printk("list=%p, bo=%p", __entry->list, __entry->bo) +); + DECLARE_EVENT_CLASS(amdgpu_fence_request, TP_PROTO(struct drm_device *dev, int ring, u32 seqno), From e30590e6e36992a6db28f03409286c164868939f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 10 Jun 2015 19:21:14 +0200 Subject: [PATCH 366/839] drm/amdgpu: print the bo_list in the CS trace point as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Acked-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index ed20f44b924d..961d7265c286 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -30,19 +30,21 @@ TRACE_EVENT(amdgpu_cs, TP_PROTO(struct amdgpu_cs_parser *p, int i), TP_ARGS(p, i), TP_STRUCT__entry( + __field(struct amdgpu_bo_list *, bo_list) __field(u32, ring) __field(u32, dw) __field(u32, fences) ), TP_fast_assign( + __entry->bo_list = p->bo_list; __entry->ring = p->ibs[i].ring->idx; __entry->dw = p->ibs[i].length_dw; __entry->fences = amdgpu_fence_count_emitted( p->ibs[i].ring); ), - TP_printk("ring=%u, dw=%u, fences=%u", - __entry->ring, __entry->dw, + TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u", + __entry->bo_list, __entry->ring, __entry->dw, __entry->fences) ); From 332300b97e700e000739c8db782406b6fd8f092d Mon Sep 17 00:00:00 2001 From: "monk.liu" Date: Mon, 8 Jun 2015 14:48:15 +0800 Subject: [PATCH 367/839] drm/amdgpu: fix wrong type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: monk.liu Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 8f0fc22f6810..f2d885c1da8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -522,7 +522,8 @@ long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, u64 *target_seq, { uint64_t last_seq[AMDGPU_MAX_RINGS]; bool signaled; - int i, r; + int i; + long r; if (timeout == 0) { return amdgpu_fence_any_seq_signaled(adev, target_seq); From 68fdd3df79ee4bff4d63dab920b01d9ed5731115 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 16 Jun 2015 14:50:02 +0200 Subject: [PATCH 368/839] drm/amdgpu: silence invalid error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Leo Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 0ec222295fee..c3ea3635e1fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -496,7 +496,7 @@ error_unreserve: error_free: drm_free_large(vm_bos); - if (r) + if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); } From f1689ec1b0b1256d0e69653cd4aaeee44aafdf5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 11 Jun 2015 20:56:18 +0200 Subject: [PATCH 369/839] drm/amdgpu: check VCE relocation buffer range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit port of radeon commit 2fc5703abda201f138faf63bdca743d04dbf4b1a. Signed-off-by: Christian König Reviewed-by: Leo Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 119 ++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 1 - 3 files changed, 92 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 22866d1c3d69..963c4ba5dcba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1622,6 +1622,7 @@ struct amdgpu_vce { unsigned fb_version; atomic_t handles[AMDGPU_MAX_VCE_HANDLES]; struct drm_file *filp[AMDGPU_MAX_VCE_HANDLES]; + uint32_t img_size[AMDGPU_MAX_VCE_HANDLES]; struct delayed_work idle_work; const struct firmware *fw; /* VCE firmware */ struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 1127a504f118..cb1bff742550 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -464,10 +464,12 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, * @p: parser context * @lo: address of lower dword * @hi: address of higher dword + * @size: minimum size * * Patch relocation inside command stream with real buffer address */ -int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int hi) +static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, + int lo, int hi, unsigned size) { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_ib *ib = &p->ibs[ib_idx]; @@ -484,6 +486,13 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int return -EINVAL; } + if ((addr + (uint64_t)size) > + ((uint64_t)mapping->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) { + DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n", + addr, lo, hi); + return -EINVAL; + } + addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; addr += amdgpu_bo_gpu_offset(bo); @@ -493,6 +502,39 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int return 0; } +/** + * amdgpu_vce_validate_handle - validate stream handle + * + * @p: parser context + * @handle: handle to validate + * + * Validates the handle and return the found session index or -EINVAL + * we we don't have another free session index. + */ +static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, + uint32_t handle) +{ + unsigned i; + + /* validate the handle */ + for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { + if (atomic_read(&p->adev->vce.handles[i]) == handle) + return i; + } + + /* handle not found try to alloc a new one */ + for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { + if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) { + p->adev->vce.filp[i] = p->filp; + p->adev->vce.img_size[i] = 0; + return i; + } + } + + DRM_ERROR("No more free VCE handles!\n"); + return -EINVAL; +} + /** * amdgpu_vce_cs_parse - parse and validate the command stream * @@ -501,10 +543,12 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int */ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) { - uint32_t handle = 0; - bool destroy = false; - int i, r, idx = 0; struct amdgpu_ib *ib = &p->ibs[ib_idx]; + int session_idx = -1; + bool destroyed = false; + uint32_t tmp, handle = 0; + uint32_t *size = &tmp; + int i, r, idx = 0; amdgpu_vce_note_usage(p->adev); @@ -517,13 +561,29 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) return -EINVAL; } + if (destroyed) { + DRM_ERROR("No other command allowed after destroy!\n"); + return -EINVAL; + } + switch (cmd) { case 0x00000001: // session handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); + session_idx = amdgpu_vce_validate_handle(p, handle); + if (session_idx < 0) + return session_idx; + size = &p->adev->vce.img_size[session_idx]; break; case 0x00000002: // task info + break; + case 0x01000001: // create + *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) * + amdgpu_get_ib_value(p, ib_idx, idx + 10) * + 8 * 3 / 2; + break; + case 0x04000001: // config extension case 0x04000002: // pic control case 0x04000005: // rate control @@ -534,23 +594,39 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x03000001: // encode - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9); + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9, + *size); if (r) return r; - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11); + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11, + *size / 3); if (r) return r; break; case 0x02000001: // destroy - destroy = true; + destroyed = true; break; case 0x05000001: // context buffer + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + *size * 2); + if (r) + return r; + break; + case 0x05000004: // video bitstream buffer + tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + tmp); + if (r) + return r; + break; + case 0x05000005: // feedback buffer - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2); + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + 4096); if (r) return r; break; @@ -560,34 +636,21 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) return -EINVAL; } + if (session_idx == -1) { + DRM_ERROR("no session command at start of IB\n"); + return -EINVAL; + } + idx += len / 4; } - if (destroy) { + if (destroyed) { /* IB contains a destroy msg, free the handle */ for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0); - - return 0; } - /* create or encode, validate the handle */ - for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { - if (atomic_read(&p->adev->vce.handles[i]) == handle) - return 0; - } - - /* handle not found try to alloc a new one */ - for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { - if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) { - p->adev->vce.filp[i] = p->filp; - return 0; - } - } - - DRM_ERROR("No more free VCE handles!\n"); - - return -EINVAL; + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index b6a9d0956c60..7ccdb5927da5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -33,7 +33,6 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, struct amdgpu_fence **fence); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); -int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int hi); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring, struct amdgpu_semaphore *semaphore, From 2f4b936869e6432a3361217da7e071d4c0c662f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 11 Jun 2015 21:33:55 +0200 Subject: [PATCH 370/839] drm/amdgpu: make VCE handle check more strict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port of radeon commit 29c63fe22a17c64e54016040cd882481bd45ee5a. Signed-off-by: Christian König Reviewed-by: Leo Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 64 ++++++++++++++++++------- 1 file changed, 48 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index cb1bff742550..70181c1da91b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -507,19 +507,27 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, * * @p: parser context * @handle: handle to validate + * @allocated: allocated a new handle? * * Validates the handle and return the found session index or -EINVAL * we we don't have another free session index. */ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, - uint32_t handle) + uint32_t handle, bool *allocated) { unsigned i; + *allocated = false; + /* validate the handle */ for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { - if (atomic_read(&p->adev->vce.handles[i]) == handle) + if (atomic_read(&p->adev->vce.handles[i]) == handle) { + if (p->adev->vce.filp[i] != p->filp) { + DRM_ERROR("VCE handle collision detected!\n"); + return -EINVAL; + } return i; + } } /* handle not found try to alloc a new one */ @@ -527,6 +535,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) { p->adev->vce.filp[i] = p->filp; p->adev->vce.img_size[i] = 0; + *allocated = true; return i; } } @@ -546,9 +555,11 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) struct amdgpu_ib *ib = &p->ibs[ib_idx]; int session_idx = -1; bool destroyed = false; + bool created = false; + bool allocated = false; uint32_t tmp, handle = 0; uint32_t *size = &tmp; - int i, r, idx = 0; + int i, r = 0, idx = 0; amdgpu_vce_note_usage(p->adev); @@ -558,18 +569,21 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) if ((len < 8) || (len & 3)) { DRM_ERROR("invalid VCE command length (%d)!\n", len); - return -EINVAL; + r = -EINVAL; + goto out; } if (destroyed) { DRM_ERROR("No other command allowed after destroy!\n"); - return -EINVAL; + r = -EINVAL; + goto out; } switch (cmd) { case 0x00000001: // session handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); - session_idx = amdgpu_vce_validate_handle(p, handle); + session_idx = amdgpu_vce_validate_handle(p, handle, + &allocated); if (session_idx < 0) return session_idx; size = &p->adev->vce.img_size[session_idx]; @@ -579,6 +593,13 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x01000001: // create + created = true; + if (!allocated) { + DRM_ERROR("Handle already in use!\n"); + r = -EINVAL; + goto out; + } + *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) * amdgpu_get_ib_value(p, ib_idx, idx + 10) * 8 * 3 / 2; @@ -597,12 +618,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9, *size); if (r) - return r; + goto out; r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11, *size / 3); if (r) - return r; + goto out; break; case 0x02000001: // destroy @@ -613,7 +634,7 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, *size * 2); if (r) - return r; + goto out; break; case 0x05000004: // video bitstream buffer @@ -621,36 +642,47 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, tmp); if (r) - return r; + goto out; break; case 0x05000005: // feedback buffer r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, 4096); if (r) - return r; + goto out; break; default: DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); - return -EINVAL; + r = -EINVAL; + goto out; } if (session_idx == -1) { DRM_ERROR("no session command at start of IB\n"); - return -EINVAL; + r = -EINVAL; + goto out; } idx += len / 4; } - if (destroyed) { - /* IB contains a destroy msg, free the handle */ + if (allocated && !created) { + DRM_ERROR("New session without create command!\n"); + r = -ENOENT; + } + +out: + if ((!r && destroyed) || (r && allocated)) { + /* + * IB contains a destroy msg or we have allocated an + * handle and got an error, anyway free the handle + */ for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0); } - return 0; + return r; } /** From dc78330a8e07285a26f165242c3a86744ef3ad20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 12 Jun 2015 14:16:20 +0200 Subject: [PATCH 371/839] drm/amdgpu: check VCE feedback and bitstream index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Leo Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 70181c1da91b..d3ca73090e39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -469,20 +469,24 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, * Patch relocation inside command stream with real buffer address */ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, - int lo, int hi, unsigned size) + int lo, int hi, unsigned size, uint32_t index) { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_ib *ib = &p->ibs[ib_idx]; struct amdgpu_bo *bo; uint64_t addr; + if (index == 0xffffffff) + index = 0; + addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; + addr += ((uint64_t)size) * ((uint64_t)index); mapping = amdgpu_cs_find_mapping(p, addr, &bo); if (mapping == NULL) { - DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d\n", - addr, lo, hi); + DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", + addr, lo, hi, size, index); return -EINVAL; } @@ -495,6 +499,7 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; addr += amdgpu_bo_gpu_offset(bo); + addr -= ((uint64_t)size) * ((uint64_t)index); ib->ptr[lo] = addr & 0xFFFFFFFF; ib->ptr[hi] = addr >> 32; @@ -553,6 +558,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) { struct amdgpu_ib *ib = &p->ibs[ib_idx]; + unsigned fb_idx = 0, bs_idx = 0; int session_idx = -1; bool destroyed = false; bool created = false; @@ -590,6 +596,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x00000002: // task info + fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); + bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); break; case 0x01000001: // create @@ -616,12 +624,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) case 0x03000001: // encode r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9, - *size); + *size, 0); if (r) goto out; r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11, - *size / 3); + *size / 3, 0); if (r) goto out; break; @@ -632,7 +640,7 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) case 0x05000001: // context buffer r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, - *size * 2); + *size * 2, 0); if (r) goto out; break; @@ -640,14 +648,14 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) case 0x05000004: // video bitstream buffer tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, - tmp); + tmp, bs_idx); if (r) goto out; break; case 0x05000005: // feedback buffer r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, - 4096); + 4096, fb_idx); if (r) goto out; break; From 845253e78a7871ae87cdc5c135a914c58d6d4f64 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Tue, 23 Jun 2015 11:59:55 -0400 Subject: [PATCH 372/839] drm/amdgpu: reset wptr at cp compute resume (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch is to resolve compute hang at resume time. v2: (agd5f) squash in second fix Signed-off-by: Sonny Jiang Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 7683d7fd6463..7b683fb2173c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3144,6 +3144,12 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_hqd_pq_wptr = ring->wptr; + WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + /* set the vmid for the queue */ mqd->cp_hqd_vmid = 0; WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); From b8682ac253bdab6c15678f926bdc028b2e5dc0d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 22 Jun 2015 14:54:32 +0200 Subject: [PATCH 373/839] drm/amdgpu: fix crash on invalid CS IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f09b2cba40ca..7cd567b268fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -445,8 +445,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo for (i = 0; i < parser->nchunks; i++) drm_free_large(parser->chunks[i].kdata); kfree(parser->chunks); - for (i = 0; i < parser->num_ibs; i++) - amdgpu_ib_free(parser->adev, &parser->ibs[i]); + if (parser->ibs) + for (i = 0; i < parser->num_ibs; i++) + amdgpu_ib_free(parser->adev, &parser->ibs[i]); kfree(parser->ibs); if (parser->uf.bo) drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); From 9a5e8fb1c8372ea4b58a76bbb82cb97683bb204b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 23 Jun 2015 17:07:03 +0200 Subject: [PATCH 374/839] drm/amdgpu: add chunk id validity check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 7cd567b268fd..86b78c799176 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -181,8 +181,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } p->chunks[i].chunk_id = user_chunk.chunk_id; p->chunks[i].length_dw = user_chunk.length_dw; - if (p->chunks[i].chunk_id == AMDGPU_CHUNK_ID_IB) - p->num_ibs++; size = p->chunks[i].length_dw; cdata = (void __user *)(unsigned long)user_chunk.chunk_data; @@ -199,7 +197,12 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto out; } - if (p->chunks[i].chunk_id == AMDGPU_CHUNK_ID_FENCE) { + switch (p->chunks[i].chunk_id) { + case AMDGPU_CHUNK_ID_IB: + p->num_ibs++; + break; + + case AMDGPU_CHUNK_ID_FENCE: size = sizeof(struct drm_amdgpu_cs_chunk_fence); if (p->chunks[i].length_dw * sizeof(uint32_t) >= size) { uint32_t handle; @@ -221,6 +224,11 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) r = -EINVAL; goto out; } + break; + + default: + r = -EINVAL; + goto out; } } From b13e22aeba06aea2acdae657e988c93c22080858 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 26 Jun 2015 12:31:29 +0200 Subject: [PATCH 375/839] drm/radeon: fix adding all VAs to the freed list on remove v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only should do so when the BO_VA was actually mapped. Otherwise we get a nice error message on the next CS. v2: It actually doesn't matter if it was invalidated or not, if it was mapped we need to clear the area where it was mapped. Signed-off-by: Christian König Tested-by: Michel Dänzer (v1) Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_vm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 9739ded91b7a..618ab5ec8e62 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -1122,12 +1122,12 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev, interval_tree_remove(&bo_va->it, &vm->va); spin_lock(&vm->status_lock); - if (list_empty(&bo_va->vm_status)) { + list_del(&bo_va->vm_status); + if (bo_va->it.start || bo_va->it.last) { bo_va->bo = radeon_bo_ref(bo_va->bo); list_add(&bo_va->vm_status, &vm->freed); } else { radeon_fence_unref(&bo_va->last_pt_update); - list_del(&bo_va->vm_status); kfree(bo_va); } spin_unlock(&vm->status_lock); From 5f0b34cc72aa627fd28230be3de469321e0af03e Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Fri, 26 Jun 2015 13:28:50 +0530 Subject: [PATCH 376/839] drm/amdgpu: use kzalloc for allocating one thing Use kzalloc rather than kcalloc(1.. for allocating one thing. Signed-off-by: Maninder Singh Reviewed-by: Vaneet Narang Reviewed-by: Christian Konig Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d3706a498293..dd3415d2e45d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -674,7 +674,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) return 0; if (gtt && gtt->userptr) { - ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL); + ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) return -ENOMEM; From a08c1d516fb3eb302ff14a10eedda844e75c625f Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Fri, 26 Jun 2015 13:26:57 +0530 Subject: [PATCH 377/839] drm/amdgpu: remove unnecessary check before kfree kfree(NULL) is safe and this check is probably not required Signed-off-by: Maninder Singh Reviewed-by: Vaneet Narang Reviewed-by: Christian Konig Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fec487d1c870..a85cd08901a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1575,8 +1575,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_fini(adev); - if (adev->ip_block_enabled) - kfree(adev->ip_block_enabled); + kfree(adev->ip_block_enabled); adev->ip_block_enabled = NULL; adev->accel_working = false; /* free i2c buses */ From d8d090b711bc56e095e59f9b536ed3c73c47b36d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 26 Jun 2015 13:02:57 -0400 Subject: [PATCH 378/839] drm/amdgpu: allocate ip_block_enabled memory in common code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove duplication across asic families and make it symmetric with the freeing of the code in amdgpu_device.c Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++- drivers/gpu/drm/amd/amdgpu/cik.c | 4 ---- drivers/gpu/drm/amd/amdgpu/vi.c | 4 ---- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a85cd08901a7..e19097554093 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1191,7 +1191,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev) return -EINVAL; } - + adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL); + if (adev->ip_block_enabled == NULL) + return -ENOMEM; if (adev->ip_blocks == NULL) { DRM_ERROR("No IP blocks found!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 5dab578d6462..341c56681841 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -2256,10 +2256,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) return -EINVAL; } - adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL); - if (adev->ip_block_enabled == NULL) - return -ENOMEM; - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 90fc93c2c1d0..fa5a4448531d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1189,10 +1189,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) return -EINVAL; } - adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL); - if (adev->ip_block_enabled == NULL) - return -ENOMEM; - return 0; } From 966c62fb40610e3cbda7bcd9a0193d8cbb3b77e3 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Fri, 26 Jun 2015 12:26:45 -0400 Subject: [PATCH 379/839] drm/amdgpu: correct define SMU_EnabledFeatureScoreboard_SclkDpmOn Signed-off-by: Sonny Jiang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cz_dpm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.h b/drivers/gpu/drm/amd/amdgpu/cz_dpm.h index 782a74107664..99e1afc89629 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.h @@ -46,7 +46,7 @@ /* Do not change the following, it is also defined in SMU8.h */ #define SMU_EnabledFeatureScoreboard_AcpDpmOn 0x00000001 -#define SMU_EnabledFeatureScoreboard_SclkDpmOn 0x00100000 +#define SMU_EnabledFeatureScoreboard_SclkDpmOn 0x00200000 #define SMU_EnabledFeatureScoreboard_UvdDpmOn 0x00800000 #define SMU_EnabledFeatureScoreboard_VceDpmOn 0x01000000 From 9dcabece15d16da48bb7c7071306bf6158b42ba3 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Fri, 26 Jun 2015 12:31:46 -0400 Subject: [PATCH 380/839] drm/amdgpu: disable enable_nb_ps_policy temporarily Fixes a hang on resume. Signed-off-by: Sonny Jiang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cz_dpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c index e4936a452bc6..f75a31df30bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c @@ -425,7 +425,7 @@ static int cz_dpm_init(struct amdgpu_device *adev) pi->mgcg_cgtt_local1 = 0x0; pi->clock_slow_down_step = 25000; pi->skip_clock_slow_down = 1; - pi->enable_nb_ps_policy = 1; + pi->enable_nb_ps_policy = 0; pi->caps_power_containment = true; pi->caps_cac = true; pi->didt_enabled = false; From 06d7137e5c566e1e8a4acd4a30e7e12170a57b58 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 29 Jun 2015 12:07:04 -0400 Subject: [PATCH 381/839] namei: make set_root_rcu() return void The only caller that cares about its return value can just as easily pick it from nd->root_seq itself. We used to just calculate it and return to caller, but these days we are storing it in nd->root_seq in all cases. Signed-off-by: Al Viro --- fs/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 2dad0eaf91d3..ae4e4c18b2ac 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -792,7 +792,7 @@ static void set_root(struct nameidata *nd) get_fs_root(current->fs, &nd->root); } -static unsigned set_root_rcu(struct nameidata *nd) +static void set_root_rcu(struct nameidata *nd) { struct fs_struct *fs = current->fs; unsigned seq; @@ -802,7 +802,6 @@ static unsigned set_root_rcu(struct nameidata *nd) nd->root = fs->root; nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq); } while (read_seqcount_retry(&fs->seq, seq)); - return nd->root_seq; } static void path_put_conditional(struct path *path, struct nameidata *nd) @@ -1998,7 +1997,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags) if (*s == '/') { if (flags & LOOKUP_RCU) { rcu_read_lock(); - nd->seq = set_root_rcu(nd); + set_root_rcu(nd); + nd->seq = nd->root_seq; } else { set_root(nd); path_get(&nd->root); From c2cfa19400979dc1a14bba75f83b451b0cd9507a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Canet?= Date: Thu, 25 Jun 2015 20:32:34 +0300 Subject: [PATCH 382/839] libceph: Fix ceph_tcp_sendpage()'s more boolean usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From struct ceph_msg_data_cursor in include/linux/ceph/messenger.h: bool last_piece; /* current is last piece */ In ceph_msg_data_next(): *last_piece = cursor->last_piece; A call to ceph_msg_data_next() is followed by: ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, last_piece); while ceph_tcp_sendpage() is: static int ceph_tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, bool more) The logic is inverted: correct it. Signed-off-by: Benoît Canet Reviewed-by: Alex Elder Signed-off-by: Ilya Dryomov --- net/ceph/messenger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 38f06a4c3c9e..1441aeff8bd7 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1544,7 +1544,7 @@ static int write_partial_message_data(struct ceph_connection *con) page = ceph_msg_data_next(&msg->cursor, &page_offset, &length, &last_piece); ret = ceph_tcp_sendpage(con->sock, page, page_offset, - length, last_piece); + length, !last_piece); if (ret <= 0) { if (do_datacrc) msg->footer.data_crc = cpu_to_le32(crc); From ac9134906b3f5c2b45dc80dab0fee792bd516d52 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 29 Jun 2015 11:09:11 -0400 Subject: [PATCH 383/839] Revert "drm/radeon: dont switch vt on suspend" This reverts commit b9729b17a414f99c61f4db9ac9f9ed987fa0cbfe. This seems to break the cursor on resume for lots of systems. Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_fb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index aeb676708e60..634793ea8418 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -257,7 +257,6 @@ static int radeonfb_create(struct drm_fb_helper *helper, } info->par = rfbdev; - info->skip_vt_switch = true; ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj); if (ret) { From 7cebc728174424d67df91dfb14f8b6dc13bed993 Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Sat, 27 Jun 2015 13:16:05 +0600 Subject: [PATCH 384/839] gpu/drm/amdgpu: Fix build when CONFIG_DEBUG_FS is not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the CONFIG_DEBUG_FS is not selected, compilation of the drivers/gpu/drm/amd/amdgpu/amdgpu_device.c provides two warnings that amdgpu_debugfs_regs_init and amdgpu_debugfs_regs_cleanup are used but never defined. And as result: ERROR: "amdgpu_debugfs_regs_cleanup" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined! ERROR: "amdgpu_debugfs_regs_init" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined! ^ Reviewed-by: Christian König Signed-off-by: Alexander Kuleshov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e19097554093..ba46be361c9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2001,4 +2001,10 @@ int amdgpu_debugfs_init(struct drm_minor *minor) void amdgpu_debugfs_cleanup(struct drm_minor *minor) { } +#else +static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) +{ + return 0; +} +static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { } #endif From 03507c4f2f63d8d98c2455cf4d192589fac553c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 19 Jun 2015 17:00:19 +0200 Subject: [PATCH 385/839] drm/amdgpu: recreate fence from user seq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And use common fence infrastructure for the wait. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 11 ++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 37 +++++++++++++++++++++-- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 963c4ba5dcba..01657830b470 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -425,6 +425,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, unsigned irq_type); int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, struct amdgpu_fence **fence); +int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner, + uint64_t seq, struct amdgpu_fence **fence); void amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_next(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); @@ -435,9 +437,6 @@ int amdgpu_fence_wait(struct amdgpu_fence *fence, bool interruptible); int amdgpu_fence_wait_any(struct amdgpu_device *adev, struct amdgpu_fence **fences, bool intr); -long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, - u64 *target_seq, bool intr, - long timeout); struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence); void amdgpu_fence_unref(struct amdgpu_fence **fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 86b78c799176..84ba1d1bf327 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -739,9 +739,9 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, { union drm_amdgpu_wait_cs *wait = data; struct amdgpu_device *adev = dev->dev_private; - uint64_t seq[AMDGPU_MAX_RINGS] = {0}; - struct amdgpu_ring *ring = NULL; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); + struct amdgpu_fence *fence = NULL; + struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; long r; @@ -754,9 +754,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, if (r) return r; - seq[ring->idx] = wait->in.handle; + r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence); + if (r) + return r; - r = amdgpu_fence_wait_seq_timeout(adev, seq, true, timeout); + r = fence_wait_timeout(&fence->base, true, timeout); + amdgpu_fence_unref(&fence); amdgpu_ctx_put(ctx); if (r < 0) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index f2d885c1da8f..a7189a1fa6a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -135,6 +135,38 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, return 0; } +/** + * amdgpu_fence_recreate - recreate a fence from an user fence + * + * @ring: ring the fence is associated with + * @owner: creator of the fence + * @seq: user fence sequence number + * @fence: resulting amdgpu fence object + * + * Recreates a fence command from the user fence sequence number (all asics). + * Returns 0 on success, -ENOMEM on failure. + */ +int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner, + uint64_t seq, struct amdgpu_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + + if (seq > ring->fence_drv.sync_seq[ring->idx]) + return -EINVAL; + + *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); + if ((*fence) == NULL) + return -ENOMEM; + + (*fence)->seq = seq; + (*fence)->ring = ring; + (*fence)->owner = owner; + fence_init(&(*fence)->base, &amdgpu_fence_ops, + &adev->fence_queue.lock, adev->fence_context + ring->idx, + (*fence)->seq); + return 0; +} + /** * amdgpu_fence_check_signaled - callback from fence_queue * @@ -517,8 +549,9 @@ static bool amdgpu_fence_any_seq_signaled(struct amdgpu_device *adev, u64 *seq) * the wait timeout, or an error for all other cases. * -EDEADLK is returned when a GPU lockup has been detected. */ -long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, u64 *target_seq, - bool intr, long timeout) +static long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, + u64 *target_seq, bool intr, + long timeout) { uint64_t last_seq[AMDGPU_MAX_RINGS]; bool signaled; From 2b48d323b26c37555df3447e11ab9e962eccdc26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 19 Jun 2015 17:31:29 +0200 Subject: [PATCH 386/839] drm/amdgpu: add optional dependencies to the CS IOCTL v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: remove unrelated whitespace change, fix C comment Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 59 +++++++++++++++++++++++++- include/uapi/drm/amdgpu_drm.h | 9 ++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 84ba1d1bf327..d63135bf29c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -226,6 +226,9 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } break; + case AMDGPU_CHUNK_ID_DEPENDENCIES: + break; + default: r = -EINVAL; goto out; @@ -663,6 +666,55 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return 0; } +static int amdgpu_cs_dependencies(struct amdgpu_device *adev, + struct amdgpu_cs_parser *p) +{ + struct amdgpu_ib *ib; + int i, j, r; + + if (!p->num_ibs) + return 0; + + /* Add dependencies to first IB */ + ib = &p->ibs[0]; + for (i = 0; i < p->nchunks; ++i) { + struct drm_amdgpu_cs_chunk_dep *deps; + struct amdgpu_cs_chunk *chunk; + unsigned num_deps; + + chunk = &p->chunks[i]; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES) + continue; + + deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_dep); + + for (j = 0; j < num_deps; ++j) { + struct amdgpu_fence *fence; + struct amdgpu_ring *ring; + + r = amdgpu_cs_get_ring(adev, deps[j].ip_type, + deps[j].ip_instance, + deps[j].ring, &ring); + if (r) + return r; + + r = amdgpu_fence_recreate(ring, p->filp, + deps[j].handle, + &fence); + if (r) + return r; + + amdgpu_sync_fence(&ib->sync, fence); + amdgpu_fence_unref(&fence); + } + } + + return 0; +} + int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; @@ -697,11 +749,16 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) else DRM_ERROR("Failed to process the buffer list %d!\n", r); } - } else { + } + + if (!r) { reserved_buffers = true; r = amdgpu_cs_ib_fill(adev, &parser); } + if (!r) + r = amdgpu_cs_dependencies(adev, &parser); + if (r) { amdgpu_cs_parser_fini(&parser, r, reserved_buffers); up_read(&adev->exclusive_lock); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index d3f4832db289..8edf10420361 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -348,6 +348,7 @@ struct drm_amdgpu_gem_va { #define AMDGPU_CHUNK_ID_IB 0x01 #define AMDGPU_CHUNK_ID_FENCE 0x02 +#define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 struct drm_amdgpu_cs_chunk { uint32_t chunk_id; @@ -399,6 +400,14 @@ struct drm_amdgpu_cs_chunk_ib { uint32_t ring; }; +struct drm_amdgpu_cs_chunk_dep { + uint32_t ip_type; + uint32_t ip_instance; + uint32_t ring; + uint32_t ctx_id; + uint64_t handle; +}; + struct drm_amdgpu_cs_chunk_fence { uint32_t handle; uint32_t offset; From fc220f6580be80c36dfba9964b2fd71ceb1d3d97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 29 Jun 2015 17:12:20 +0200 Subject: [PATCH 387/839] drm/amdgpu: add flag to delay VM updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 6 +++--- include/uapi/drm/amdgpu_drm.h | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index c3ea3635e1fc..975edb1000a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -525,8 +525,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - invalid_flags = ~(AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | - AMDGPU_VM_PAGE_EXECUTABLE); + invalid_flags = ~(AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE | + AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_PAGE_EXECUTABLE); if ((args->flags & invalid_flags)) { dev_err(&dev->pdev->dev, "invalid flags 0x%08X vs 0x%08X\n", args->flags, invalid_flags); @@ -579,7 +579,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, break; } - if (!r) + if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE)) amdgpu_gem_va_update_vm(adev, bo_va); drm_gem_object_unreference_unlocked(gobj); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 8edf10420361..b6fce900a833 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -313,6 +313,9 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VA_OP_MAP 1 #define AMDGPU_VA_OP_UNMAP 2 +/* Delay the page table update till the next CS */ +#define AMDGPU_VM_DELAY_UPDATE (1 << 0) + /* Mapping flags */ /* readable mapping */ #define AMDGPU_VM_PAGE_READABLE (1 << 1) From 11ddba28325a66c2d38dbf987e2d5da790c6d684 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 29 Jun 2015 11:56:27 -0700 Subject: [PATCH 388/839] Input: wdt87xx_i2c - fix format warning This fixes the following warning: drivers/input/touchscreen/wdt87xx_i2c.c: In function 'wdt87xx_validate_firmware': >> drivers/input/touchscreen/wdt87xx_i2c.c:472:4: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'size_t' [-Wformat=] size, fw->size); Reported-by: kbuild test robot Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/wdt87xx_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/wdt87xx_i2c.c b/drivers/input/touchscreen/wdt87xx_i2c.c index c073ea9345e2..43c3bee4b2e9 100644 --- a/drivers/input/touchscreen/wdt87xx_i2c.c +++ b/drivers/input/touchscreen/wdt87xx_i2c.c @@ -468,7 +468,7 @@ static int wdt87xx_validate_firmware(struct wdt87xx_data *wdt, size = get_unaligned_le32(fw->data + FW_SIZE_OFFSET); if (size != fw->size) { dev_err(&wdt->client->dev, - "fw size mismatch: expected %d, actual %zd\n", + "fw size mismatch: expected %d, actual %zu\n", size, fw->size); return -EINVAL; } From 580a64bf2b712e43ea04bdded17d7049e7b06ce6 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 29 Jun 2015 12:13:05 -0700 Subject: [PATCH 389/839] Input: arc_ps2 - add HAS_IOMEM dependency Fix this compile error: drivers/built-in.o: In function `arc_ps2_probe': /mnt/linux/drivers/input/serio/arc_ps2.c:206: undefined reference to `devm_ioremap_resource' Signed-off-by: Sebastian Ott Signed-off-by: Dmitry Torokhov --- drivers/input/serio/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig index 77833d7a004b..200841b77edb 100644 --- a/drivers/input/serio/Kconfig +++ b/drivers/input/serio/Kconfig @@ -244,6 +244,7 @@ config SERIO_PS2MULT config SERIO_ARC_PS2 tristate "ARC PS/2 support" + depends on HAS_IOMEM help Say Y here if you have an ARC FPGA platform with a PS/2 controller in it. From 0a8b83530b6f67b9a50bd7937d57a5deea187b5b Mon Sep 17 00:00:00 2001 From: "qipeng.zha" Date: Sat, 27 Jun 2015 00:32:15 +0800 Subject: [PATCH 390/839] intel_pmc_ipc: Add Intel Apollo Lake PMC IPC driver This driver provides support for PMC control on Apollo Lake platforms. The PMC is an ARC processor which defines some IPC commands for communication with other entities in the CPU. Signed-off-by: qipeng.zha [fengguang.wu@intel.com: Fix Sparse and Cocinelle warnings] Signed-off-by: Fengguang Wu Signed-off-by: Darren Hart --- MAINTAINERS | 7 + arch/x86/include/asm/intel_pmc_ipc.h | 82 +++ drivers/platform/x86/Kconfig | 7 + drivers/platform/x86/Makefile | 1 + drivers/platform/x86/intel_pmc_ipc.c | 767 +++++++++++++++++++++++++++ 5 files changed, 864 insertions(+) create mode 100644 arch/x86/include/asm/intel_pmc_ipc.h create mode 100644 drivers/platform/x86/intel_pmc_ipc.c diff --git a/MAINTAINERS b/MAINTAINERS index 4188e84bd257..3dbdd6d6d3ac 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5264,6 +5264,13 @@ F: include/uapi/linux/mei.h F: drivers/misc/mei/* F: Documentation/misc-devices/mei/* +INTEL PMC IPC DRIVER +M: Zha Qipeng +L: platform-driver-x86@vger.kernel.org +S: Maintained +F: drivers/platform/x86/intel_pmc_ipc.c +F: arch/x86/include/asm/intel_pmc_ipc.h + IOC3 ETHERNET DRIVER M: Ralf Baechle L: linux-mips@linux-mips.org diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h new file mode 100644 index 000000000000..200ec2e7821d --- /dev/null +++ b/arch/x86/include/asm/intel_pmc_ipc.h @@ -0,0 +1,82 @@ +#ifndef _ASM_X86_INTEL_PMC_IPC_H_ +#define _ASM_X86_INTEL_PMC_IPC_H_ + +/* Commands */ +#define PMC_IPC_PMIC_ACCESS 0xFF +#define PMC_IPC_PMIC_ACCESS_READ 0x0 +#define PMC_IPC_PMIC_ACCESS_WRITE 0x1 +#define PMC_IPC_USB_PWR_CTRL 0xF0 +#define PMC_IPC_PMIC_BLACKLIST_SEL 0xEF +#define PMC_IPC_PHY_CONFIG 0xEE +#define PMC_IPC_NORTHPEAK_CTRL 0xED +#define PMC_IPC_PM_DEBUG 0xEC +#define PMC_IPC_PMC_TELEMTRY 0xEB +#define PMC_IPC_PMC_FW_MSG_CTRL 0xEA + +/* IPC return code */ +#define IPC_ERR_NONE 0 +#define IPC_ERR_CMD_NOT_SUPPORTED 1 +#define IPC_ERR_CMD_NOT_SERVICED 2 +#define IPC_ERR_UNABLE_TO_SERVICE 3 +#define IPC_ERR_CMD_INVALID 4 +#define IPC_ERR_CMD_FAILED 5 +#define IPC_ERR_EMSECURITY 6 +#define IPC_ERR_UNSIGNEDKERNEL 7 + +#if IS_ENABLED(CONFIG_INTEL_PMC_IPC) + +/* + * intel_pmc_ipc_simple_command + * @cmd: command + * @sub: sub type + */ +int intel_pmc_ipc_simple_command(int cmd, int sub); + +/* + * intel_pmc_ipc_raw_cmd + * @cmd: command + * @sub: sub type + * @in: input data + * @inlen: input length in bytes + * @out: output data + * @outlen: output length in dwords + * @sptr: data writing to SPTR register + * @dptr: data writing to DPTR register + */ +int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, + u32 *out, u32 outlen, u32 dptr, u32 sptr); + +/* + * intel_pmc_ipc_command + * @cmd: command + * @sub: sub type + * @in: input data + * @inlen: input length in bytes + * @out: output data + * @outlen: output length in dwords + */ +int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, + u32 *out, u32 outlen); + +#else + +static inline int intel_pmc_ipc_simple_command(int cmd, int sub) +{ + return -EINVAL; +} + +static inline int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, + u32 *out, u32 outlen, u32 dptr, u32 sptr) +{ + return -EINVAL; +} + +static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, + u32 *out, u32 outlen) +{ + return -EINVAL; +} + +#endif /*CONFIG_INTEL_PMC_IPC*/ + +#endif diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index bca0aee22978..b92f592a4b81 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -901,4 +901,11 @@ config PVPANIC a paravirtualized device provided by QEMU; it lets a virtual machine (guest) communicate panic events to the host. +config INTEL_PMC_IPC + tristate "Intel PMC IPC Driver" + ---help--- + This driver provides support for PMC control on some Intel platforms. + The PMC is an ARC processor which defines IPC commands for communication + with other entities in the CPU. + endif # X86_PLATFORM_DEVICES diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index b3e54ed863c3..dda95a985321 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -59,3 +59,4 @@ obj-$(CONFIG_INTEL_SMARTCONNECT) += intel-smartconnect.o obj-$(CONFIG_PVPANIC) += pvpanic.o obj-$(CONFIG_ALIENWARE_WMI) += alienware-wmi.o +obj-$(CONFIG_INTEL_PMC_IPC) += intel_pmc_ipc.o diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c new file mode 100644 index 000000000000..d734763dab69 --- /dev/null +++ b/drivers/platform/x86/intel_pmc_ipc.c @@ -0,0 +1,767 @@ +/* + * intel_pmc_ipc.c: Driver for the Intel PMC IPC mechanism + * + * (C) Copyright 2014-2015 Intel Corporation + * + * This driver is based on Intel SCU IPC driver(intel_scu_opc.c) by + * Sreedhara DS + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + * + * PMC running in ARC processor communicates with other entity running in IA + * core through IPC mechanism which in turn messaging between IA core ad PMC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * IPC registers + * The IA write to IPC_CMD command register triggers an interrupt to the ARC, + * The ARC handles the interrupt and services it, writing optional data to + * the IPC1 registers, updates the IPC_STS response register with the status. + */ +#define IPC_CMD 0x0 +#define IPC_CMD_MSI 0x100 +#define IPC_CMD_SIZE 16 +#define IPC_CMD_SUBCMD 12 +#define IPC_STATUS 0x04 +#define IPC_STATUS_IRQ 0x4 +#define IPC_STATUS_ERR 0x2 +#define IPC_STATUS_BUSY 0x1 +#define IPC_SPTR 0x08 +#define IPC_DPTR 0x0C +#define IPC_WRITE_BUFFER 0x80 +#define IPC_READ_BUFFER 0x90 + +/* + * 16-byte buffer for sending data associated with IPC command. + */ +#define IPC_DATA_BUFFER_SIZE 16 + +#define IPC_LOOP_CNT 3000000 +#define IPC_MAX_SEC 3 + +#define IPC_TRIGGER_MODE_IRQ true + +/* exported resources from IFWI */ +#define PLAT_RESOURCE_IPC_INDEX 0 +#define PLAT_RESOURCE_IPC_SIZE 0x1000 +#define PLAT_RESOURCE_GCR_SIZE 0x1000 +#define PLAT_RESOURCE_PUNIT_DATA_INDEX 1 +#define PLAT_RESOURCE_PUNIT_INTER_INDEX 2 +#define PLAT_RESOURCE_ACPI_IO_INDEX 0 + +/* + * BIOS does not create an ACPI device for each PMC function, + * but exports multiple resources from one ACPI device(IPC) for + * multiple functions. This driver is responsible to create a + * platform device and to export resources for those functions. + */ +#define TCO_DEVICE_NAME "iTCO_wdt" +#define SMI_EN_OFFSET 0x30 +#define SMI_EN_SIZE 4 +#define TCO_BASE_OFFSET 0x60 +#define TCO_REGS_SIZE 16 +#define PUNIT_DEVICE_NAME "intel_punit_ipc" + +static const int iTCO_version = 3; + +static struct intel_pmc_ipc_dev { + struct device *dev; + void __iomem *ipc_base; + bool irq_mode; + int irq; + int cmd; + struct completion cmd_complete; + + /* The following PMC BARs share the same ACPI device with the IPC */ + void *acpi_io_base; + int acpi_io_size; + struct platform_device *tco_dev; + + /* gcr */ + void *gcr_base; + int gcr_size; + + /* punit */ + void *punit_base; + int punit_size; + void *punit_base2; + int punit_size2; + struct platform_device *punit_dev; +} ipcdev; + +static char *ipc_err_sources[] = { + [IPC_ERR_NONE] = + "no error", + [IPC_ERR_CMD_NOT_SUPPORTED] = + "command not supported", + [IPC_ERR_CMD_NOT_SERVICED] = + "command not serviced", + [IPC_ERR_UNABLE_TO_SERVICE] = + "unable to service", + [IPC_ERR_CMD_INVALID] = + "command invalid", + [IPC_ERR_CMD_FAILED] = + "command failed", + [IPC_ERR_EMSECURITY] = + "Invalid Battery", + [IPC_ERR_UNSIGNEDKERNEL] = + "Unsigned kernel", +}; + +/* Prevent concurrent calls to the PMC */ +static DEFINE_MUTEX(ipclock); + +static inline void ipc_send_command(u32 cmd) +{ + ipcdev.cmd = cmd; + if (ipcdev.irq_mode) { + reinit_completion(&ipcdev.cmd_complete); + cmd |= IPC_CMD_MSI; + } + writel(cmd, ipcdev.ipc_base + IPC_CMD); +} + +static inline u32 ipc_read_status(void) +{ + return readl(ipcdev.ipc_base + IPC_STATUS); +} + +static inline void ipc_data_writel(u32 data, u32 offset) +{ + writel(data, ipcdev.ipc_base + IPC_WRITE_BUFFER + offset); +} + +static inline u8 ipc_data_readb(u32 offset) +{ + return readb(ipcdev.ipc_base + IPC_READ_BUFFER + offset); +} + +static inline u32 ipc_data_readl(u32 offset) +{ + return readl(ipcdev.ipc_base + IPC_READ_BUFFER + offset); +} + +static int intel_pmc_ipc_check_status(void) +{ + int status; + int ret = 0; + + if (ipcdev.irq_mode) { + if (0 == wait_for_completion_timeout( + &ipcdev.cmd_complete, IPC_MAX_SEC * HZ)) + ret = -ETIMEDOUT; + } else { + int loop_count = IPC_LOOP_CNT; + + while ((ipc_read_status() & IPC_STATUS_BUSY) && --loop_count) + udelay(1); + if (loop_count == 0) + ret = -ETIMEDOUT; + } + + status = ipc_read_status(); + if (ret == -ETIMEDOUT) { + dev_err(ipcdev.dev, + "IPC timed out, TS=0x%x, CMD=0x%x\n", + status, ipcdev.cmd); + return ret; + } + + if (status & IPC_STATUS_ERR) { + int i; + + ret = -EIO; + i = (status >> IPC_CMD_SIZE) & 0xFF; + if (i < ARRAY_SIZE(ipc_err_sources)) + dev_err(ipcdev.dev, + "IPC failed: %s, STS=0x%x, CMD=0x%x\n", + ipc_err_sources[i], status, ipcdev.cmd); + else + dev_err(ipcdev.dev, + "IPC failed: unknown, STS=0x%x, CMD=0x%x\n", + status, ipcdev.cmd); + if ((i == IPC_ERR_UNSIGNEDKERNEL) || (i == IPC_ERR_EMSECURITY)) + ret = -EACCES; + } + + return ret; +} + +/* + * intel_pmc_ipc_simple_command + * @cmd: command + * @sub: sub type + */ +int intel_pmc_ipc_simple_command(int cmd, int sub) +{ + int ret; + + mutex_lock(&ipclock); + if (ipcdev.dev == NULL) { + mutex_unlock(&ipclock); + return -ENODEV; + } + ipc_send_command(sub << IPC_CMD_SUBCMD | cmd); + ret = intel_pmc_ipc_check_status(); + mutex_unlock(&ipclock); + + return ret; +} +EXPORT_SYMBOL_GPL(intel_pmc_ipc_simple_command); + +/* + * intel_pmc_ipc_raw_cmd + * @cmd: command + * @sub: sub type + * @in: input data + * @inlen: input length in bytes + * @out: output data + * @outlen: output length in dwords + * @sptr: data writing to SPTR register + * @dptr: data writing to DPTR register + */ +int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, + u32 outlen, u32 dptr, u32 sptr) +{ + u32 wbuf[4] = { 0 }; + int ret; + int i; + + if (inlen > IPC_DATA_BUFFER_SIZE || outlen > IPC_DATA_BUFFER_SIZE / 4) + return -EINVAL; + + mutex_lock(&ipclock); + if (ipcdev.dev == NULL) { + mutex_unlock(&ipclock); + return -ENODEV; + } + memcpy(wbuf, in, inlen); + writel(dptr, ipcdev.ipc_base + IPC_DPTR); + writel(sptr, ipcdev.ipc_base + IPC_SPTR); + /* The input data register is 32bit register and inlen is in Byte */ + for (i = 0; i < ((inlen + 3) / 4); i++) + ipc_data_writel(wbuf[i], 4 * i); + ipc_send_command((inlen << IPC_CMD_SIZE) | + (sub << IPC_CMD_SUBCMD) | cmd); + ret = intel_pmc_ipc_check_status(); + if (!ret) { + /* out is read from 32bit register and outlen is in 32bit */ + for (i = 0; i < outlen; i++) + *out++ = ipc_data_readl(4 * i); + } + mutex_unlock(&ipclock); + + return ret; +} +EXPORT_SYMBOL_GPL(intel_pmc_ipc_raw_cmd); + +/* + * intel_pmc_ipc_command + * @cmd: command + * @sub: sub type + * @in: input data + * @inlen: input length in bytes + * @out: output data + * @outlen: output length in dwords + */ +int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, + u32 *out, u32 outlen) +{ + return intel_pmc_ipc_raw_cmd(cmd, sub, in, inlen, out, outlen, 0, 0); +} +EXPORT_SYMBOL_GPL(intel_pmc_ipc_command); + +static irqreturn_t ioc(int irq, void *dev_id) +{ + int status; + + if (ipcdev.irq_mode) { + status = ipc_read_status(); + writel(status | IPC_STATUS_IRQ, ipcdev.ipc_base + IPC_STATUS); + } + complete(&ipcdev.cmd_complete); + + return IRQ_HANDLED; +} + +static int ipc_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + resource_size_t pci_resource; + int ret; + int len; + + ipcdev.dev = &pci_dev_get(pdev)->dev; + ipcdev.irq_mode = IPC_TRIGGER_MODE_IRQ; + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + ret = pci_request_regions(pdev, "intel_pmc_ipc"); + if (ret) + return ret; + + pci_resource = pci_resource_start(pdev, 0); + len = pci_resource_len(pdev, 0); + if (!pci_resource || !len) { + dev_err(&pdev->dev, "Failed to get resource\n"); + return -ENOMEM; + } + + init_completion(&ipcdev.cmd_complete); + + if (request_irq(pdev->irq, ioc, 0, "intel_pmc_ipc", &ipcdev)) { + dev_err(&pdev->dev, "Failed to request irq\n"); + return -EBUSY; + } + + ipcdev.ipc_base = ioremap_nocache(pci_resource, len); + if (!ipcdev.ipc_base) { + dev_err(&pdev->dev, "Failed to ioremap ipc base\n"); + free_irq(pdev->irq, &ipcdev); + ret = -ENOMEM; + } + + return ret; +} + +static void ipc_pci_remove(struct pci_dev *pdev) +{ + free_irq(pdev->irq, &ipcdev); + pci_release_regions(pdev); + pci_dev_put(pdev); + iounmap(ipcdev.ipc_base); + ipcdev.dev = NULL; +} + +static const struct pci_device_id ipc_pci_ids[] = { + {PCI_VDEVICE(INTEL, 0x0a94), 0}, + {PCI_VDEVICE(INTEL, 0x1a94), 0}, + { 0,} +}; +MODULE_DEVICE_TABLE(pci, ipc_pci_ids); + +static struct pci_driver ipc_pci_driver = { + .name = "intel_pmc_ipc", + .id_table = ipc_pci_ids, + .probe = ipc_pci_probe, + .remove = ipc_pci_remove, +}; + +static ssize_t intel_pmc_ipc_simple_cmd_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int subcmd; + int cmd; + int ret; + + ret = sscanf(buf, "%d %d", &cmd, &subcmd); + if (ret != 2) { + dev_err(dev, "Error args\n"); + return -EINVAL; + } + + ret = intel_pmc_ipc_simple_command(cmd, subcmd); + if (ret) { + dev_err(dev, "command %d error with %d\n", cmd, ret); + return ret; + } + return (ssize_t)count; +} + +static ssize_t intel_pmc_ipc_northpeak_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + int subcmd; + int ret; + + if (kstrtoul(buf, 0, &val)) + return -EINVAL; + + if (val) + subcmd = 1; + else + subcmd = 0; + ret = intel_pmc_ipc_simple_command(PMC_IPC_NORTHPEAK_CTRL, subcmd); + if (ret) { + dev_err(dev, "command north %d error with %d\n", subcmd, ret); + return ret; + } + return (ssize_t)count; +} + +static DEVICE_ATTR(simplecmd, S_IWUSR, + NULL, intel_pmc_ipc_simple_cmd_store); +static DEVICE_ATTR(northpeak, S_IWUSR, + NULL, intel_pmc_ipc_northpeak_store); + +static struct attribute *intel_ipc_attrs[] = { + &dev_attr_northpeak.attr, + &dev_attr_simplecmd.attr, + NULL +}; + +static const struct attribute_group intel_ipc_group = { + .attrs = intel_ipc_attrs, +}; + +#define PUNIT_RESOURCE_INTER 1 +static struct resource punit_res[] = { + /* Punit */ + { + .flags = IORESOURCE_MEM, + }, + { + .flags = IORESOURCE_MEM, + }, +}; + +#define TCO_RESOURCE_ACPI_IO 0 +#define TCO_RESOURCE_SMI_EN_IO 1 +#define TCO_RESOURCE_GCR_MEM 2 +static struct resource tco_res[] = { + /* ACPI - TCO */ + { + .flags = IORESOURCE_IO, + }, + /* ACPI - SMI */ + { + .flags = IORESOURCE_IO, + }, + /* GCS */ + { + .flags = IORESOURCE_MEM, + }, +}; + +static struct lpc_ich_info tco_info = { + .name = "Apollo Lake SoC", + .iTCO_version = 3, +}; + +static int ipc_create_punit_device(void) +{ + struct platform_device *pdev; + struct resource *res; + int ret; + + pdev = platform_device_alloc(PUNIT_DEVICE_NAME, -1); + if (!pdev) { + dev_err(ipcdev.dev, "Failed to alloc punit platform device\n"); + return -ENOMEM; + } + + pdev->dev.parent = ipcdev.dev; + + res = punit_res; + res->start = (resource_size_t)ipcdev.punit_base; + res->end = res->start + ipcdev.punit_size - 1; + + res = punit_res + PUNIT_RESOURCE_INTER; + res->start = (resource_size_t)ipcdev.punit_base2; + res->end = res->start + ipcdev.punit_size2 - 1; + + ret = platform_device_add_resources(pdev, punit_res, + ARRAY_SIZE(punit_res)); + if (ret) { + dev_err(ipcdev.dev, "Failed to add platform punit resources\n"); + goto err; + } + + ret = platform_device_add(pdev); + if (ret) { + dev_err(ipcdev.dev, "Failed to add punit platform device\n"); + goto err; + } + ipcdev.punit_dev = pdev; + + return 0; +err: + platform_device_put(pdev); + return ret; +} + +static int ipc_create_tco_device(void) +{ + struct platform_device *pdev; + struct resource *res; + int ret; + + pdev = platform_device_alloc(TCO_DEVICE_NAME, -1); + if (!pdev) { + dev_err(ipcdev.dev, "Failed to alloc tco platform device\n"); + return -ENOMEM; + } + + pdev->dev.parent = ipcdev.dev; + + res = tco_res + TCO_RESOURCE_ACPI_IO; + res->start = (resource_size_t)ipcdev.acpi_io_base + TCO_BASE_OFFSET; + res->end = res->start + TCO_REGS_SIZE - 1; + + res = tco_res + TCO_RESOURCE_SMI_EN_IO; + res->start = (resource_size_t)ipcdev.acpi_io_base + SMI_EN_OFFSET; + res->end = res->start + SMI_EN_SIZE - 1; + + res = tco_res + TCO_RESOURCE_GCR_MEM; + res->start = (resource_size_t)ipcdev.gcr_base; + res->end = res->start + ipcdev.gcr_size - 1; + + ret = platform_device_add_resources(pdev, tco_res, ARRAY_SIZE(tco_res)); + if (ret) { + dev_err(ipcdev.dev, "Failed to add tco platform resources\n"); + goto err; + } + + ret = platform_device_add_data(pdev, &tco_info, + sizeof(struct lpc_ich_info)); + if (ret) { + dev_err(ipcdev.dev, "Failed to add tco platform data\n"); + goto err; + } + + ret = platform_device_add(pdev); + if (ret) { + dev_err(ipcdev.dev, "Failed to add tco platform device\n"); + goto err; + } + ipcdev.tco_dev = pdev; + + return 0; +err: + platform_device_put(pdev); + return ret; +} + +static int ipc_create_pmc_devices(void) +{ + int ret; + + ret = ipc_create_tco_device(); + if (ret) { + dev_err(ipcdev.dev, "Failed to add tco platform device\n"); + return ret; + } + ret = ipc_create_punit_device(); + if (ret) { + dev_err(ipcdev.dev, "Failed to add punit platform device\n"); + platform_device_unregister(ipcdev.tco_dev); + } + return ret; +} + +static int ipc_plat_get_res(struct platform_device *pdev) +{ + struct resource *res; + void __iomem *addr; + int size; + + res = platform_get_resource(pdev, IORESOURCE_IO, + PLAT_RESOURCE_ACPI_IO_INDEX); + if (!res) { + dev_err(&pdev->dev, "Failed to get io resource\n"); + return -ENXIO; + } + size = resource_size(res); + ipcdev.acpi_io_base = (void *)res->start; + ipcdev.acpi_io_size = size; + dev_info(&pdev->dev, "io res: %llx %x\n", + (long long)res->start, (int)resource_size(res)); + + res = platform_get_resource(pdev, IORESOURCE_MEM, + PLAT_RESOURCE_PUNIT_DATA_INDEX); + if (!res) { + dev_err(&pdev->dev, "Failed to get punit resource\n"); + return -ENXIO; + } + size = resource_size(res); + ipcdev.punit_base = (void *)res->start; + ipcdev.punit_size = size; + dev_info(&pdev->dev, "punit data res: %llx %x\n", + (long long)res->start, (int)resource_size(res)); + + res = platform_get_resource(pdev, IORESOURCE_MEM, + PLAT_RESOURCE_PUNIT_INTER_INDEX); + if (!res) { + dev_err(&pdev->dev, "Failed to get punit inter resource\n"); + return -ENXIO; + } + size = resource_size(res); + ipcdev.punit_base2 = (void *)res->start; + ipcdev.punit_size2 = size; + dev_info(&pdev->dev, "punit interface res: %llx %x\n", + (long long)res->start, (int)resource_size(res)); + + res = platform_get_resource(pdev, IORESOURCE_MEM, + PLAT_RESOURCE_IPC_INDEX); + if (!res) { + dev_err(&pdev->dev, "Failed to get ipc resource\n"); + return -ENXIO; + } + size = PLAT_RESOURCE_IPC_SIZE; + if (!request_mem_region(res->start, size, pdev->name)) { + dev_err(&pdev->dev, "Failed to request ipc resource\n"); + return -EBUSY; + } + addr = ioremap_nocache(res->start, size); + if (!addr) { + dev_err(&pdev->dev, "I/O memory remapping failed\n"); + release_mem_region(res->start, size); + return -ENOMEM; + } + ipcdev.ipc_base = addr; + + ipcdev.gcr_base = (void *)(res->start + size); + ipcdev.gcr_size = PLAT_RESOURCE_GCR_SIZE; + dev_info(&pdev->dev, "ipc res: %llx %x\n", + (long long)res->start, (int)resource_size(res)); + + return 0; +} + +#ifdef CONFIG_ACPI +static const struct acpi_device_id ipc_acpi_ids[] = { + { "INT34D2", 0}, + { } +}; +MODULE_DEVICE_TABLE(acpi, ipc_acpi_ids); +#endif + +static int ipc_plat_probe(struct platform_device *pdev) +{ + struct resource *res; + int ret; + + ipcdev.dev = &pdev->dev; + ipcdev.irq_mode = IPC_TRIGGER_MODE_IRQ; + init_completion(&ipcdev.cmd_complete); + + ipcdev.irq = platform_get_irq(pdev, 0); + if (ipcdev.irq < 0) { + dev_err(&pdev->dev, "Failed to get irq\n"); + return -EINVAL; + } + + ret = ipc_plat_get_res(pdev); + if (ret) { + dev_err(&pdev->dev, "Failed to request resource\n"); + return ret; + } + + ret = ipc_create_pmc_devices(); + if (ret) { + dev_err(&pdev->dev, "Failed to create pmc devices\n"); + goto err_device; + } + + if (request_irq(ipcdev.irq, ioc, 0, "intel_pmc_ipc", &ipcdev)) { + dev_err(&pdev->dev, "Failed to request irq\n"); + ret = -EBUSY; + goto err_irq; + } + + ret = sysfs_create_group(&pdev->dev.kobj, &intel_ipc_group); + if (ret) { + dev_err(&pdev->dev, "Failed to create sysfs group %d\n", + ret); + goto err_sys; + } + + return 0; +err_sys: + free_irq(ipcdev.irq, &ipcdev); +err_irq: + platform_device_unregister(ipcdev.tco_dev); + platform_device_unregister(ipcdev.punit_dev); +err_device: + iounmap(ipcdev.ipc_base); + res = platform_get_resource(pdev, IORESOURCE_MEM, + PLAT_RESOURCE_IPC_INDEX); + if (res) + release_mem_region(res->start, PLAT_RESOURCE_IPC_SIZE); + return ret; +} + +static int ipc_plat_remove(struct platform_device *pdev) +{ + struct resource *res; + + sysfs_remove_group(&pdev->dev.kobj, &intel_ipc_group); + free_irq(ipcdev.irq, &ipcdev); + platform_device_unregister(ipcdev.tco_dev); + platform_device_unregister(ipcdev.punit_dev); + iounmap(ipcdev.ipc_base); + res = platform_get_resource(pdev, IORESOURCE_MEM, + PLAT_RESOURCE_IPC_INDEX); + if (res) + release_mem_region(res->start, PLAT_RESOURCE_IPC_SIZE); + ipcdev.dev = NULL; + return 0; +} + +static struct platform_driver ipc_plat_driver = { + .remove = ipc_plat_remove, + .probe = ipc_plat_probe, + .driver = { + .name = "pmc-ipc-plat", + .acpi_match_table = ACPI_PTR(ipc_acpi_ids), + }, +}; + +static int __init intel_pmc_ipc_init(void) +{ + int ret; + + ret = platform_driver_register(&ipc_plat_driver); + if (ret) { + pr_err("Failed to register PMC ipc platform driver\n"); + return ret; + } + ret = pci_register_driver(&ipc_pci_driver); + if (ret) { + pr_err("Failed to register PMC ipc pci driver\n"); + platform_driver_unregister(&ipc_plat_driver); + return ret; + } + return ret; +} + +static void __exit intel_pmc_ipc_exit(void) +{ + pci_unregister_driver(&ipc_pci_driver); + platform_driver_unregister(&ipc_plat_driver); +} + +MODULE_AUTHOR("Zha Qipeng "); +MODULE_DESCRIPTION("Intel PMC IPC driver"); +MODULE_LICENSE("GPL"); + +/* Some modules are dependent on this, so init earlier */ +fs_initcall(intel_pmc_ipc_init); +module_exit(intel_pmc_ipc_exit); From db52ef74b35dcb91fd154fa52c618bdd1b90e28e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 27 Jun 2015 10:25:14 +0200 Subject: [PATCH 391/839] x86/fpu: Fix FPU related boot regression when CPUID masking BIOS feature is enabled Mike Galbraith reported: " My i7-4790 box is having one hell of a time with this merge window, dead in the water. BIOS setting "Limit CPUID Maximum" upsets new fpu code mightily. " It turns out that Linux does a double workaround here, as per: 066941bd4eeb ("x86: unmask CPUID levels on Intel CPUs") it undoes the BIOS workaround - but as a side effect the CPUID state is not completely constant during early init anymore, and the new FPU init code did not take this into account. So what happened is that the xstate init code did not have full CPUID available, which broke subsequent attempts to use xstate features. Fix this by ordering the early FPU init code to after we've stabilized the CPUID state. Reported-bisected-and-tested-by: Mike Galbraith Cc: Andrew Morton Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150627082514.GA10894@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9fc5e3d9d9c8..922c5e0cea4c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -742,7 +742,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) cpu_detect(c); get_cpu_vendor(c); get_cpu_cap(c); - fpu__init_system(c); if (this_cpu->c_early_init) this_cpu->c_early_init(c); @@ -754,6 +753,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_bsp_init(c); setup_force_cpu_cap(X86_FEATURE_ALWAYS); + fpu__init_system(c); } void __init early_cpu_init(void) From 93472aff802fd7b61f2209335207e9bd793012f7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Jun 2015 16:47:50 +0200 Subject: [PATCH 392/839] perf/x86: Fix 'active_events' imbalance Commit 1b7b938f1817 ("perf/x86/intel: Fix PMI handling for Intel PT") conditionally increments active_events in x86_add_exclusive() but unconditionally decrements in x86_del_exclusive(). These extra decrements can lead to the situation where active_events is zero and thus the PMI handler is 'disabled' while we have active events on the PMU generating PMIs. This leads to a truckload of: Uhhuh. NMI received for unknown reason 21 on CPU 28. Do you have a strange power saving mode enabled? Dazed and confused, but trying to continue messages and generally messes up perf. Remove the condition on the increment, double increment balanced by a double decrement is perfectly fine. Restructure the code a little bit to make the unconditional inc a bit more natural. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: alexander.shishkin@linux.intel.com Cc: brgerst@gmail.com Cc: dvlasenk@redhat.com Cc: luto@amacapital.net Cc: oleg@redhat.com Fixes: 1b7b938f1817 ("perf/x86/intel: Fix PMI handling for Intel PT") Link: http://lkml.kernel.org/r/20150624144750.GJ18673@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 36 ++++++++++++-------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5801a14f7524..3658de47900f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -357,34 +357,24 @@ void x86_release_hardware(void) */ int x86_add_exclusive(unsigned int what) { - int ret = -EBUSY, i; + int i; - if (atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) - return 0; - - mutex_lock(&pmc_reserve_mutex); - for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) { - if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i])) - goto out; + if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) { + mutex_lock(&pmc_reserve_mutex); + for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) { + if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i])) + goto fail_unlock; + } + atomic_inc(&x86_pmu.lbr_exclusive[what]); + mutex_unlock(&pmc_reserve_mutex); } - atomic_inc(&x86_pmu.lbr_exclusive[what]); - ret = 0; + atomic_inc(&active_events); + return 0; -out: +fail_unlock: mutex_unlock(&pmc_reserve_mutex); - - /* - * Assuming that all exclusive events will share the PMI handler - * (which checks active_events for whether there is work to do), - * we can bump active_events counter right here, except for - * x86_lbr_exclusive_lbr events that go through x86_pmu_event_init() - * path, which already bumps active_events for them. - */ - if (!ret && what != x86_lbr_exclusive_lbr) - atomic_inc(&active_events); - - return ret; + return -EBUSY; } void x86_del_exclusive(unsigned int what) From 479e9a95120aaae0bf0d3e0b5b26b36ac4a347b6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 30 Jun 2015 09:30:01 -0400 Subject: [PATCH 393/839] drm/radeon: only check the sink type on DP connectors Avoids a crash on pre-DP asics that support HDMI. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_audio.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index c89215275053..fa719c53449b 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -469,22 +469,22 @@ void radeon_audio_detect(struct drm_connector *connector, dig = radeon_encoder->enc_priv; if (status == connector_status_connected) { - struct radeon_connector *radeon_connector; - int sink_type; - if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) { radeon_encoder->audio = NULL; return; } - radeon_connector = to_radeon_connector(connector); - sink_type = radeon_dp_getsinktype(radeon_connector); + if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { + struct radeon_connector *radeon_connector = to_radeon_connector(connector); - if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && - sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) - radeon_encoder->audio = rdev->audio.dp_funcs; - else + if (radeon_dp_getsinktype(radeon_connector) == + CONNECTOR_OBJECT_ID_DISPLAYPORT) + radeon_encoder->audio = rdev->audio.dp_funcs; + else + radeon_encoder->audio = rdev->audio.hdmi_funcs; + } else { radeon_encoder->audio = rdev->audio.hdmi_funcs; + } dig->afmt->pin = radeon_audio_get_pin(connector->encoder); radeon_audio_enable(rdev, dig->afmt->pin, 0xf); From ee46f3c7d79c334e8bdc9947503e329dc27c0b47 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Tue, 30 Jun 2015 16:10:38 +0300 Subject: [PATCH 394/839] drm/i915: Clear pipe's pll hw state in hsw_dp_set_ddi_pll_sel() Similarly to what is done for SKL, clear the dpll_hw_state of the pipe config in hsw_dp_set_ddi_pll_sel(), since it main contain stale values. That can happen if a crtc that was previously driving an HDMI connector switches to a DP connector. In that case, the wrpll field was left with its old value, leading to warnings like the one below: [drm:check_crtc_state [i915]] *ERROR* mismatch in dpll_hw_state.wrpll (expected 0xb035061f, found 0x00000000) ------------[ cut here ]------------ WARNING: CPU: 1 PID: 767 at drivers/gpu/drm/i915/intel_display.c:12324 check_crtc_state+0x975/0x10b0 [i915]() pipe state doesn't match! This regression was indroduced in commit dd3cd74acf12723045a64f1f2c6298ac7b34a5d5 Author: Ander Conselvan de Oliveira Date: Fri May 15 13:34:29 2015 +0300 drm/i915: Don't overwrite (e)DP PLL selection on SKL Reported-by: Linus Torvalds Signed-off-by: Ander Conselvan de Oliveira Tested-by: Jani Nikula Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 280c282da9bd..16c86ce942ea 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1141,6 +1141,9 @@ skl_edp_set_pll_config(struct intel_crtc_state *pipe_config, int link_clock) static void hsw_dp_set_ddi_pll_sel(struct intel_crtc_state *pipe_config, int link_bw) { + memset(&pipe_config->dpll_hw_state, 0, + sizeof(pipe_config->dpll_hw_state)); + switch (link_bw) { case DP_LINK_BW_1_62: pipe_config->ddi_pll_sel = PORT_CLK_SEL_LCPLL_810; From 31f02455455d405320e2f749696bef4e02903b35 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Jun 2015 12:07:17 -0400 Subject: [PATCH 395/839] sparse: fix misplaced __pmem definition Move the definition of __pmem outside of CONFIG_SPARSE_RCU_POINTER to fix: drivers/nvdimm/pmem.c:198:17: sparse: too many arguments for function __builtin_expect drivers/nvdimm/pmem.c:36:33: sparse: expected ; at end of declaration drivers/nvdimm/pmem.c:48:21: sparse: void declaration ...due to __pmem failing to be defined in some configurations when CONFIG_SPARSE_RCU_POINTER=y. Reported-by: kbuild test robot Reported-by: Dan Carpenter Signed-off-by: Dan Williams --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 26fc8bc77f85..d8fbd500330e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -17,11 +17,11 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) +# define __pmem __attribute__((noderef, address_space(5))) #ifdef CONFIG_SPARSE_RCU_POINTER # define __rcu __attribute__((noderef, address_space(4))) #else # define __rcu -# define __pmem __attribute__((noderef, address_space(5))) #endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); From 1e43ba9cd867f05f3e85579c370b939e1ce585e9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 30 Jun 2015 18:04:49 +0200 Subject: [PATCH 396/839] arm64: fix incorrect use of pgprot_t variable This fixes a build failure under STRICT_MM_TYPECHECKS, by adding a missing pgprot_val() around a pgport_t reference. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 82d3435bf14f..a4ede4e2ddd1 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -117,7 +117,7 @@ void split_pud(pud_t *old_pud, pmd_t *pmd) int i = 0; do { - set_pmd(pmd, __pmd(addr | prot)); + set_pmd(pmd, __pmd(addr | pgprot_val(prot))); addr += PMD_SIZE; } while (pmd++, i++, i < PTRS_PER_PMD); } From 18a11b5e79697ddc61e30181737c91ce21eaa859 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 3 Jun 2015 21:53:37 +0100 Subject: [PATCH 397/839] arm64: perf: Don't use of_node after putting it It's possible, albeit unlikely, that using the of_node here will reference freed memory. Call of_node_put() after printing the name to be safe. Signed-off-by: Stephen Boyd Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 702591f6180a..8af7784a8e35 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1340,12 +1340,13 @@ static int armpmu_device_probe(struct platform_device *pdev) if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) break; - of_node_put(dn); if (cpu >= nr_cpu_ids) { pr_warn("Failed to find logical CPU for %s\n", dn->name); + of_node_put(dn); break; } + of_node_put(dn); irqs[i] = cpu; } From b265da5a45ce60bd3d7505cc0eaa6cfba50946a1 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Mon, 29 Jun 2015 09:02:40 +0100 Subject: [PATCH 398/839] arm64: perf: fix unassigned cpu_pmu->plat_device when probing PMU PPIs Commit d795ef9aa831 ("arm64: perf: don't warn about missing interrupt-affinity property for PPIs") added a check for PPIs so that we avoid parsing the interrupt-affinity property for these naturally affine interrupts. Unfortunately, this check can trigger an early (successful) return and we will not assign the value of cpu_pmu->plat_device. This patch fixes the issue. Signed-off-by: Shannon Zhao Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 8af7784a8e35..b31e9a4b6275 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1318,7 +1318,7 @@ static int armpmu_device_probe(struct platform_device *pdev) /* Don't bother with PPIs; they're already affine */ irq = platform_get_irq(pdev, 0); if (irq >= 0 && irq_is_percpu(irq)) - return 0; + goto out; irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); if (!irqs) @@ -1356,6 +1356,7 @@ static int armpmu_device_probe(struct platform_device *pdev) else kfree(irqs); +out: cpu_pmu->plat_device = pdev; return 0; } From af834d457d9ed69e14836b63d0da198fdd2ec706 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Jun 2015 14:10:09 -0400 Subject: [PATCH 399/839] libnvdimm: smatch cleanups in __nd_ioctl Drop use of access_ok() since we are already using copy_{to|from}_user() which do their own access_ok(). Reported-by: Dan Carpenter Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 8eb22c0ca7ce..73442bd824a7 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -535,8 +535,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, __func__, dimm_name, cmd_name, i); return -ENXIO; } - if (!access_ok(VERIFY_READ, p + in_len, in_size)) - return -EFAULT; if (in_len < sizeof(in_env)) copy = min_t(u32, sizeof(in_env) - in_len, in_size); else @@ -557,8 +555,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, __func__, dimm_name, cmd_name, i); return -EFAULT; } - if (!access_ok(VERIFY_WRITE, p + in_len + out_len, out_size)) - return -EFAULT; if (out_len < sizeof(out_env)) copy = min_t(u32, sizeof(out_env) - out_len, out_size); else @@ -570,9 +566,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, } buf_len = out_len + in_len; - if (!access_ok(VERIFY_WRITE, p, sizeof(buf_len))) - return -EFAULT; - if (buf_len > ND_IOCTL_MAX_BUFLEN) { dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__, dimm_name, cmd_name, buf_len, From daa1dee405d7d3d3e816b84a692e838a5647a02a Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 28 Jun 2015 17:00:57 +0800 Subject: [PATCH 400/839] nvdimm: Fix return value of nvdimm_bus_init() if class_create() fails Return proper error if class_create() fails. Signed-off-by: Axel Lin Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 73442bd824a7..7e2c43f701bc 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -699,8 +699,10 @@ int __init nvdimm_bus_init(void) nvdimm_major = rc; nd_class = class_create(THIS_MODULE, "nd"); - if (IS_ERR(nd_class)) + if (IS_ERR(nd_class)) { + rc = PTR_ERR(nd_class); goto err_class; + } return 0; From 193ccca43850d2355e7690a93ab9d7d78d38f905 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Jun 2015 16:09:39 -0400 Subject: [PATCH 401/839] nfit: fix smatch "use after null check" report drivers/acpi/nfit.c:1224 acpi_nfit_blk_region_enable() error: we previously assumed 'nfit_mem' could be null (see line 1223) drivers/acpi/nfit.c 1222 nfit_mem = nvdimm_provider_data(nvdimm); 1223 if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) { ^^^^^^^^ Check. 1224 dev_dbg(dev, "%s: missing%s%s%s\n", __func__, 1225 nfit_mem ? "" : " nfit_mem", 1226 nfit_mem->dcr ? "" : " dcr", ^^^^^^^^^^^^^ Unchecked dereference. Reported-by: Dan Carpenter Acked-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 2161fa178c8d..a20b7c883ca0 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -1223,8 +1223,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) { dev_dbg(dev, "%s: missing%s%s%s\n", __func__, nfit_mem ? "" : " nfit_mem", - nfit_mem->dcr ? "" : " dcr", - nfit_mem->bdw ? "" : " bdw"); + (nfit_mem && nfit_mem->dcr) ? "" : " dcr", + (nfit_mem && nfit_mem->bdw) ? "" : " bdw"); return -ENXIO; } From fe7599079b03d521d376da88920cc7b87f71ae25 Mon Sep 17 00:00:00 2001 From: Yang Dongsheng Date: Wed, 3 Jun 2015 14:57:32 +0800 Subject: [PATCH 402/839] btrfs: qgroup: allow user to clear the limitation on qgroup Currently, we can only set a limitation on a qgroup, but we can not clear it. This patch provide a choice to user to clear a limitation on qgroup by passing a value of CLEAR_VALUE(-1) to kernel. Reported-by: Tsutomu Itoh Signed-off-by: Dongsheng Yang Tested-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/qgroup.c | 49 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index d5f1f033b7a0..e9ace099162c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1349,6 +1349,11 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; int ret = 0; + /* Sometimes we would want to clear the limit on this qgroup. + * To meet this requirement, we treat the -1 as a special value + * which tell kernel to clear the limit on this qgroup. + */ + const u64 CLEAR_VALUE = -1; mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; @@ -1364,14 +1369,42 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, } spin_lock(&fs_info->qgroup_lock); - if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) - qgroup->max_rfer = limit->max_rfer; - if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) - qgroup->max_excl = limit->max_excl; - if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) - qgroup->rsv_rfer = limit->rsv_rfer; - if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) - qgroup->rsv_excl = limit->rsv_excl; + if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { + if (limit->max_rfer == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; + limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; + qgroup->max_rfer = 0; + } else { + qgroup->max_rfer = limit->max_rfer; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { + if (limit->max_excl == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; + limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; + qgroup->max_excl = 0; + } else { + qgroup->max_excl = limit->max_excl; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { + if (limit->rsv_rfer == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; + limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; + qgroup->rsv_rfer = 0; + } else { + qgroup->rsv_rfer = limit->rsv_rfer; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { + if (limit->rsv_excl == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; + limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; + qgroup->rsv_excl = 0; + } else { + qgroup->rsv_excl = limit->rsv_excl; + } + } qgroup->lim_flags |= limit->flags; spin_unlock(&fs_info->qgroup_lock); From 65f5333875d7bbfc13436e224a181d10a80d1ada Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Mon, 8 Jun 2015 20:05:50 +0800 Subject: [PATCH 403/839] btrfs: cleanup noused initialization of dev in btrfs_end_bio() It is introduced by: c404e0dc2c843b154f9a36c3aec10d0a715d88eb Btrfs: fix use-after-free in the finishing procedure of the device replace But seems no relationship with that bug, this patch revirt these code block for cleanup. Signed-off-by: Zhao Lei Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 83966996aacb..d4cd4059bded 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5715,7 +5715,6 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e static void btrfs_end_bio(struct bio *bio, int err) { struct btrfs_bio *bbio = bio->bi_private; - struct btrfs_device *dev = bbio->stripes[0].dev; int is_orig_bio = 0; if (err) { @@ -5723,6 +5722,7 @@ static void btrfs_end_bio(struct bio *bio, int err) if (err == -EIO || err == -EREMOTEIO) { unsigned int stripe_index = btrfs_io_bio(bio)->stripe_index; + struct btrfs_device *dev; BUG_ON(stripe_index >= bbio->num_stripes); dev = bbio->stripes[stripe_index].dev; From e82afc52abff07a4acbc90f899598ebafb662831 Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Fri, 12 Jun 2015 20:36:58 +0800 Subject: [PATCH 404/839] btrfs: add error handling for scrub_workers_get() Although it is a rare case, we'd better free previous allocated memory on error. Signed-off-by: Zhao Lei Signed-off-by: Qu Wenruo Signed-off-by: Chris Mason --- fs/btrfs/scrub.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 9f2feabe99f2..94db0fa5225a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3571,7 +3571,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, int is_dev_replace) { - int ret = 0; unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; int max_active = fs_info->thread_pool_size; @@ -3584,34 +3583,36 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, fs_info->scrub_workers = btrfs_alloc_workqueue("btrfs-scrub", flags, max_active, 4); - if (!fs_info->scrub_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_workers) + goto fail_scrub_workers; + fs_info->scrub_wr_completion_workers = btrfs_alloc_workqueue("btrfs-scrubwrc", flags, max_active, 2); - if (!fs_info->scrub_wr_completion_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_wr_completion_workers) + goto fail_scrub_wr_completion_workers; + fs_info->scrub_nocow_workers = btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); - if (!fs_info->scrub_nocow_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_nocow_workers) + goto fail_scrub_nocow_workers; fs_info->scrub_parity_workers = btrfs_alloc_workqueue("btrfs-scrubparity", flags, max_active, 2); - if (!fs_info->scrub_parity_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_parity_workers) + goto fail_scrub_parity_workers; } ++fs_info->scrub_workers_refcnt; -out: - return ret; + return 0; + +fail_scrub_parity_workers: + btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); +fail_scrub_nocow_workers: + btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); +fail_scrub_wr_completion_workers: + btrfs_destroy_workqueue(fs_info->scrub_workers); +fail_scrub_workers: + return -ENOMEM; } static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) From 67c5e7d464bc466471b05e027abe8a6b29687ebd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 11 Jun 2015 00:58:53 +0100 Subject: [PATCH 405/839] Btrfs: fix race between balance and unused block group deletion We have a race between deleting an unused block group and balancing the same block group that leads to an assertion failure/BUG(), producing the following trace: [181631.208236] BTRFS: assertion failed: 0, file: fs/btrfs/volumes.c, line: 2622 [181631.220591] ------------[ cut here ]------------ [181631.222959] kernel BUG at fs/btrfs/ctree.h:4062! [181631.223932] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [181631.224566] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse acpi_cpufreq parpor$ [181631.224566] CPU: 8 PID: 17451 Comm: btrfs Tainted: G W 4.1.0-rc5-btrfs-next-10+ #1 [181631.224566] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [181631.224566] task: ffff880127e09590 ti: ffff8800b5824000 task.ti: ffff8800b5824000 [181631.224566] RIP: 0010:[] [] assfail.constprop.50+0x1e/0x20 [btrfs] [181631.224566] RSP: 0018:ffff8800b5827ae8 EFLAGS: 00010246 [181631.224566] RAX: 0000000000000040 RBX: ffff8800109fc218 RCX: ffffffff81095dce [181631.224566] RDX: 0000000000005124 RSI: ffffffff81464819 RDI: 00000000ffffffff [181631.224566] RBP: ffff8800b5827ae8 R08: 0000000000000001 R09: 0000000000000000 [181631.224566] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800109fc200 [181631.224566] R13: ffff880020095000 R14: ffff8800b1a13f38 R15: ffff880020095000 [181631.224566] FS: 00007f70ca0b0c80(0000) GS:ffff88013ec00000(0000) knlGS:0000000000000000 [181631.224566] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [181631.224566] CR2: 00007f2872ab6e68 CR3: 00000000a717c000 CR4: 00000000000006e0 [181631.224566] Stack: [181631.224566] ffff8800b5827ba8 ffffffffa03f3916 ffff8800b5827b38 ffffffffa03d080e [181631.224566] ffffffffa03d1423 ffff880020095000 ffff88001233c000 0000000000000001 [181631.224566] ffff880020095000 ffff8800b1a13f38 0000000a69c00000 0000000000000000 [181631.224566] Call Trace: [181631.224566] [] btrfs_remove_chunk+0xa4/0x6bb [btrfs] [181631.224566] [] ? join_transaction.isra.8+0xb9/0x3ba [btrfs] [181631.224566] [] ? wait_current_trans.isra.13+0x22/0xfc [btrfs] [181631.224566] [] btrfs_relocate_chunk.isra.29+0x8f/0xa7 [btrfs] [181631.224566] [] btrfs_balance+0xaa4/0xc52 [btrfs] [181631.224566] [] btrfs_ioctl_balance+0x23f/0x2b0 [btrfs] [181631.224566] [] ? trace_hardirqs_on+0xd/0xf [181631.224566] [] btrfs_ioctl+0xfe2/0x2220 [btrfs] [181631.224566] [] ? __this_cpu_preempt_check+0x13/0x15 [181631.224566] [] ? arch_local_irq_save+0x9/0xc [181631.224566] [] ? handle_mm_fault+0x834/0xcd2 [181631.224566] [] ? handle_mm_fault+0x834/0xcd2 [181631.224566] [] ? __do_page_fault+0x211/0x424 [181631.224566] [] do_vfs_ioctl+0x3c6/0x479 (...) The sequence of steps leading to this are: CPU 0 CPU 1 btrfs_balance() btrfs_relocate_chunk() btrfs_relocate_block_group(bg X) btrfs_lookup_block_group(bg X) cleaner_kthread locks fs_info->cleaner_mutex btrfs_delete_unused_bgs() finds bg X, which became unused in the previous transaction checks bg X ->ro == 0, so it proceeds sets bg X ->ro to 1 (btrfs_set_block_group_ro(bg X)) blocks on fs_info->cleaner_mutex btrfs_remove_chunk(bg X) unlocks fs_info->cleaner_mutex acquires fs_info->cleaner_mutex relocate_block_group() --> does nothing, no extents found in the extent tree from bg X unlocks fs_info->cleaner_mutex btrfs_relocate_block_group(bg X) returns btrfs_remove_chunk(bg X) extent map not found --> ASSERT(0) Fix this by using a new mutex to make sure these 2 operations, block group relocation and removal, are serialized. This issue is reproducible by running fstests generic/038 (which stresses chunk allocation and automatic removal of unused block groups) together with the following balance loop: while true; do btrfs balance start -dusage=0 ; done Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 12 ++++++++++- fs/btrfs/extent-tree.c | 3 +++ fs/btrfs/volumes.c | 48 +++++++++++++++++++++++++++++++++++++----- 4 files changed, 58 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 80a9aefb0c46..aac314e14188 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1778,6 +1778,7 @@ struct btrfs_fs_info { spinlock_t unused_bgs_lock; struct list_head unused_bgs; struct mutex unused_bg_unpin_mutex; + struct mutex delete_unused_bgs_mutex; /* For btrfs to record security options */ struct security_mnt_opts security_opts; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b977fc8d8201..b59deb2c63f4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1772,7 +1772,6 @@ static int cleaner_kthread(void *arg) } btrfs_run_delayed_iputs(root); - btrfs_delete_unused_bgs(root->fs_info); again = btrfs_clean_one_deleted_snapshot(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -1781,6 +1780,16 @@ static int cleaner_kthread(void *arg) * needn't do anything special here. */ btrfs_run_defrag_inodes(root->fs_info); + + /* + * Acquires fs_info->delete_unused_bgs_mutex to avoid racing + * with relocation (btrfs_relocate_chunk) and relocation + * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group) + * after acquiring fs_info->delete_unused_bgs_mutex. So we + * can't hold, nor need to, fs_info->cleaner_mutex when deleting + * unused block groups. + */ + btrfs_delete_unused_bgs(root->fs_info); sleep: if (!try_to_freeze() && !again) { set_current_state(TASK_INTERRUPTIBLE); @@ -2492,6 +2501,7 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->unused_bgs_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->unused_bg_unpin_mutex); + mutex_init(&fs_info->delete_unused_bgs_mutex); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); seqlock_init(&fs_info->profiles_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 38b76cc02f48..1c2bd1723e40 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9889,6 +9889,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) } spin_unlock(&fs_info->unused_bgs_lock); + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); + /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); spin_lock(&block_group->lock); @@ -9983,6 +9985,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) end_trans: btrfs_end_transaction(trans, root); next: + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); btrfs_put_block_group(block_group); spin_lock(&fs_info->unused_bgs_lock); } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d4cd4059bded..9b95503ddd00 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2766,6 +2766,20 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, root = root->fs_info->chunk_root; extent_root = root->fs_info->extent_root; + /* + * Prevent races with automatic removal of unused block groups. + * After we relocate and before we remove the chunk with offset + * chunk_offset, automatic removal of the block group can kick in, + * resulting in a failure when calling btrfs_remove_chunk() below. + * + * Make sure to acquire this mutex before doing a tree search (dev + * or chunk trees) to find chunks. Otherwise the cleaner kthread might + * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after + * we release the path used to search the chunk/dev tree and before + * the current task acquires this mutex and calls us. + */ + ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex)); + ret = btrfs_can_relocate(extent_root, chunk_offset); if (ret) return -ENOSPC; @@ -2814,13 +2828,18 @@ again: key.type = BTRFS_CHUNK_ITEM_KEY; while (1) { + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); goto error; + } BUG_ON(ret == 0); /* Corruption */ ret = btrfs_previous_item(chunk_root, path, key.objectid, key.type); + if (ret) + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret < 0) goto error; if (ret > 0) @@ -2843,6 +2862,7 @@ again: else BUG_ON(ret); } + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (found_key.offset == 0) break; @@ -3299,9 +3319,12 @@ again: goto error; } + mutex_lock(&fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); goto error; + } /* * this shouldn't happen, it means the last relocate @@ -3313,6 +3336,7 @@ again: ret = btrfs_previous_item(chunk_root, path, 0, BTRFS_CHUNK_ITEM_KEY); if (ret) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); ret = 0; break; } @@ -3321,8 +3345,10 @@ again: slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (found_key.objectid != key.objectid) + if (found_key.objectid != key.objectid) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); break; + } chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); @@ -3335,10 +3361,13 @@ again: ret = should_balance_chunk(chunk_root, leaf, chunk, found_key.offset); btrfs_release_path(path); - if (!ret) + if (!ret) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); goto loop; + } if (counting) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); spin_lock(&fs_info->balance_lock); bctl->stat.expected++; spin_unlock(&fs_info->balance_lock); @@ -3348,6 +3377,7 @@ again: ret = btrfs_relocate_chunk(chunk_root, found_key.objectid, found_key.offset); + mutex_unlock(&fs_info->delete_unused_bgs_mutex); if (ret && ret != -ENOSPC) goto error; if (ret == -ENOSPC) { @@ -4087,11 +4117,16 @@ again: key.type = BTRFS_DEV_EXTENT_KEY; do { + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); goto done; + } ret = btrfs_previous_item(root, path, 0, key.type); + if (ret) + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret < 0) goto done; if (ret) { @@ -4105,6 +4140,7 @@ again: btrfs_item_key_to_cpu(l, &key, path->slots[0]); if (key.objectid != device->devid) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); btrfs_release_path(path); break; } @@ -4113,6 +4149,7 @@ again: length = btrfs_dev_extent_length(l, dev_extent); if (key.offset + length <= new_size) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); btrfs_release_path(path); break; } @@ -4122,6 +4159,7 @@ again: btrfs_release_path(path); ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset); + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret && ret != -ENOSPC) goto done; if (ret == -ENOSPC) From c3f4a1685bb87e59c886ee68f7967eae07d4dffa Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 13 Jun 2015 06:52:56 +0100 Subject: [PATCH 406/839] Btrfs: use kmem_cache_free when freeing entry in inode cache The free space entries are allocated using kmem_cache_zalloc(), through __btrfs_add_free_space(), therefore we should use kmem_cache_free() and not kfree() to avoid any confusion and any potential problem. Looking at the kfree() definition at mm/slab.c it has the following comment: /* * (...) * * Don't free memory not originally allocated by kmalloc() * or you will run into trouble. */ So better be safe and use kmem_cache_free(). Cc: stable@vger.kernel.org Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/inode-map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index f6a596d5a637..218df701e607 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -271,7 +271,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) __btrfs_add_free_space(ctl, info->offset, count); free: rb_erase(&info->offset_index, rbroot); - kfree(info); + kmem_cache_free(btrfs_free_space_cachep, info); } } From ae9d8f17118551bedd797406a6768b87c2146234 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 13 Jun 2015 06:52:57 +0100 Subject: [PATCH 407/839] Btrfs: fix race between caching kthread and returning inode to inode cache While the inode cache caching kthread is calling btrfs_unpin_free_ino(), we could have a concurrent call to btrfs_return_ino() that adds a new entry to the root's free space cache of pinned inodes. This concurrent call does not acquire the fs_info->commit_root_sem before adding a new entry if the caching state is BTRFS_CACHE_FINISHED, which is a problem because the caching kthread calls btrfs_unpin_free_ino() after setting the caching state to BTRFS_CACHE_FINISHED and therefore races with the task calling btrfs_return_ino(), which is adding a new entry, while the former (caching kthread) is navigating the cache's rbtree, removing and freeing nodes from the cache's rbtree without acquiring the spinlock that protects the rbtree. This race resulted in memory corruption due to double free of struct btrfs_free_space objects because both tasks can end up doing freeing the same objects. Note that adding a new entry can result in merging it with other entries in the cache, in which case those entries are freed. This is particularly important as btrfs_free_space structures are also used for the block group free space caches. This memory corruption can be detected by a debugging kernel, which reports it with the following trace: [132408.501148] slab error in verify_redzone_free(): cache `btrfs_free_space': double free detected [132408.505075] CPU: 15 PID: 12248 Comm: btrfs-ino-cache Tainted: G W 4.1.0-rc5-btrfs-next-10+ #1 [132408.505075] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [132408.505075] ffff880023e7d320 ffff880163d73cd8 ffffffff8145eec7 ffffffff81095dce [132408.505075] ffff880009735d40 ffff880163d73ce8 ffffffff81154e1e ffff880163d73d68 [132408.505075] ffffffff81155733 ffffffffa054a95a ffff8801b6099f00 ffffffffa0505b5f [132408.505075] Call Trace: [132408.505075] [] dump_stack+0x4f/0x7b [132408.505075] [] ? console_unlock+0x356/0x3a2 [132408.505075] [] __slab_error.isra.28+0x25/0x36 [132408.505075] [] __cache_free+0xe2/0x4b6 [132408.505075] [] ? __btrfs_add_free_space+0x2f0/0x343 [btrfs] [132408.505075] [] ? btrfs_unpin_free_ino+0x8e/0x99 [btrfs] [132408.505075] [] ? time_hardirqs_off+0x15/0x28 [132408.505075] [] ? trace_hardirqs_off+0xd/0xf [132408.505075] [] ? kfree+0xb6/0x14e [132408.505075] [] kfree+0xe5/0x14e [132408.505075] [] btrfs_unpin_free_ino+0x8e/0x99 [btrfs] [132408.505075] [] caching_kthread+0x29e/0x2d9 [btrfs] [132408.505075] [] ? btrfs_unpin_free_ino+0x99/0x99 [btrfs] [132408.505075] [] kthread+0xef/0xf7 [132408.505075] [] ? time_hardirqs_on+0x15/0x28 [132408.505075] [] ? __kthread_parkme+0xad/0xad [132408.505075] [] ret_from_fork+0x42/0x70 [132408.505075] [] ? __kthread_parkme+0xad/0xad [132408.505075] ffff880023e7d320: redzone 1:0x9f911029d74e35b, redzone 2:0x9f911029d74e35b. [132409.501654] slab: double free detected in cache 'btrfs_free_space', objp ffff880023e7d320 [132409.503355] ------------[ cut here ]------------ [132409.504241] kernel BUG at mm/slab.c:2571! Therefore fix this by having btrfs_unpin_free_ino() acquire the lock that protects the rbtree while doing the searches and removing entries. Fixes: 1c70d8fb4dfa ("Btrfs: fix inode caching vs tree log") Cc: stable@vger.kernel.org Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/inode-map.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 218df701e607..d4a582ac3f73 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) { struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; + spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock; struct btrfs_free_space *info; struct rb_node *n; u64 count; @@ -254,23 +255,29 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) return; while (1) { + bool add_to_ctl = true; + + spin_lock(rbroot_lock); n = rb_first(rbroot); - if (!n) + if (!n) { + spin_unlock(rbroot_lock); break; + } info = rb_entry(n, struct btrfs_free_space, offset_index); BUG_ON(info->bitmap); /* Logic error */ if (info->offset > root->ino_cache_progress) - goto free; + add_to_ctl = false; else if (info->offset + info->bytes > root->ino_cache_progress) count = root->ino_cache_progress - info->offset + 1; else count = info->bytes; - __btrfs_add_free_space(ctl, info->offset, count); -free: rb_erase(&info->offset_index, rbroot); + spin_unlock(rbroot_lock); + if (add_to_ctl) + __btrfs_add_free_space(ctl, info->offset, count); kmem_cache_free(btrfs_free_space_cachep, info); } } From da288d280d16f4d7e4ada331cb33d381b408b10c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 13 Jun 2015 06:55:31 +0100 Subject: [PATCH 408/839] Btrfs: fix crash on close_ctree() if cleaner starts new transaction Often when running fstests btrfs/079 I was running into the following trace during umount on one of my qemu/kvm test vms: [ 8245.682441] WARNING: CPU: 8 PID: 25064 at fs/btrfs/extent-tree.c:138 btrfs_put_block_group+0x51/0x69 [btrfs]() [ 8245.685039] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse parport_pc i2c_piix4 acpi_cpufreq processor psmouse i2c_core thermal_sys parport evdev serio_raw button pcspkr microcode ext4 crc16 jbd2 mbcache sg sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata floppy virtio_pci virtio_ring scsi_mod virtio e1000 [last unloaded: btrfs] [ 8245.693860] CPU: 8 PID: 25064 Comm: umount Tainted: G W 4.1.0-rc5-btrfs-next-10+ #1 [ 8245.695081] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [ 8245.697583] 0000000000000009 ffff88020d047ce8 ffffffff8145eec7 ffffffff81095dce [ 8245.699234] 0000000000000000 ffff88020d047d28 ffffffff8104b399 0000000000000028 [ 8245.700995] ffffffffa04db07b ffff8801c6036c00 ffff8801c6036d68 ffff880202eb40b0 [ 8245.702510] Call Trace: [ 8245.703006] [] dump_stack+0x4f/0x7b [ 8245.705393] [] ? console_unlock+0x356/0x3a2 [ 8245.706569] [] warn_slowpath_common+0xa1/0xbb [ 8245.707747] [] ? btrfs_put_block_group+0x51/0x69 [btrfs] [ 8245.709101] [] warn_slowpath_null+0x1a/0x1c [ 8245.710274] [] btrfs_put_block_group+0x51/0x69 [btrfs] [ 8245.711823] [] btrfs_free_block_groups+0x145/0x322 [btrfs] [ 8245.713251] [] close_ctree+0x1ef/0x325 [btrfs] [ 8245.714448] [] ? evict_inodes+0xdc/0xeb [ 8245.715539] [] btrfs_put_super+0x19/0x1b [btrfs] [ 8245.716835] [] generic_shutdown_super+0x73/0xef [ 8245.718015] [] kill_anon_super+0x13/0x1e [ 8245.719101] [] btrfs_kill_super+0x17/0x23 [btrfs] [ 8245.720316] [] deactivate_locked_super+0x3b/0x68 [ 8245.721517] [] deactivate_super+0x3f/0x43 [ 8245.722581] [] cleanup_mnt+0x59/0x78 [ 8245.723538] [] __cleanup_mnt+0x12/0x14 [ 8245.724572] [] task_work_run+0x8f/0xbc [ 8245.725598] [] do_notify_resume+0x45/0x53 [ 8245.726892] [] int_signal+0x12/0x17 [ 8245.737887] ---[ end trace a01d038397e99b92 ]--- [ 8245.769363] general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 8245.770737] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse parport_pc i2c_piix4 acpi_cpufreq processor psmouse i2c_core thermal_sys parport evdev serio_raw button pcspkr microcode ext4 crc16 jbd2 mbcache sg sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata floppy virtio_pci virtio_ring scsi_mod virtio e1000 [last unloaded: btrfs] [ 8245.772641] CPU: 2 PID: 25064 Comm: umount Tainted: G W 4.1.0-rc5-btrfs-next-10+ #1 [ 8245.772641] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [ 8245.772641] task: ffff880013005810 ti: ffff88020d044000 task.ti: ffff88020d044000 [ 8245.772641] RIP: 0010:[] [] btrfs_queue_work+0x2c/0x14d [btrfs] [ 8245.772641] RSP: 0018:ffff88020d0478b8 EFLAGS: 00010202 [ 8245.772641] RAX: 0000000000000004 RBX: 6b6b6b6b6b6b6b6b RCX: ffffffffa0581488 [ 8245.772641] RDX: 0000000000000000 RSI: ffff880194b7bf48 RDI: ffff880144b6a7a0 [ 8245.772641] RBP: ffff88020d0478d8 R08: 0000000000000000 R09: 000000000000ffff [ 8245.772641] R10: 0000000000000004 R11: 0000000000000005 R12: ffff880194b7bf48 [ 8245.772641] R13: ffff880194b7bf48 R14: 0000000000000410 R15: 0000000000000000 [ 8245.772641] FS: 00007f991e77d840(0000) GS:ffff88023e280000(0000) knlGS:0000000000000000 [ 8245.772641] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 8245.772641] CR2: 00007fbbd325ee68 CR3: 000000021de8e000 CR4: 00000000000006e0 [ 8245.772641] Stack: [ 8245.772641] ffff880194b7bf00 ffff880202eb4000 ffff880194b7bf48 0000000000000410 [ 8245.772641] ffff88020d047958 ffffffffa04ec6d5 ffff8801629b2ee8 0000000082987570 [ 8245.772641] 0000000000a5813f 0000000000000001 ffff880013006100 0000000000000002 [ 8245.772641] Call Trace: [ 8245.772641] [] btrfs_wq_submit_bio+0xe1/0x17b [btrfs] [ 8245.772641] [] ? check_irq_usage+0x76/0x87 [ 8245.772641] [] btree_submit_bio_hook+0xb6/0xd9 [btrfs] [ 8245.772641] [] ? btree_csum_one_bio+0xad/0xad [btrfs] [ 8245.772641] [] ? btree_io_failed_hook+0x5e/0x5e [btrfs] [ 8245.772641] [] submit_one_bio+0x8c/0xc7 [btrfs] [ 8245.772641] [] submit_extent_page.isra.18+0x9d/0x186 [btrfs] [ 8245.772641] [] write_one_eb+0x117/0x1ae [btrfs] [ 8245.772641] [] ? end_extent_buffer_writeback+0x21/0x21 [btrfs] [ 8245.772641] [] btree_write_cache_pages+0x2ab/0x385 [btrfs] [ 8245.772641] [] btree_writepages+0x23/0x5c [btrfs] [ 8245.772641] [] do_writepages+0x23/0x2c [ 8245.772641] [] __writeback_single_inode+0xda/0x5bd [ 8245.772641] [] ? writeback_single_inode+0x2b/0x173 [ 8245.772641] [] writeback_single_inode+0xc8/0x173 [ 8245.772641] [] write_inode_now+0x8a/0x95 [ 8245.772641] [] ? _atomic_dec_and_lock+0x30/0x4e [ 8245.772641] [] iput+0x17d/0x26a [ 8245.772641] [] close_ctree+0x22a/0x325 [btrfs] [ 8245.772641] [] ? evict_inodes+0xdc/0xeb [ 8245.772641] [] btrfs_put_super+0x19/0x1b [btrfs] [ 8245.772641] [] generic_shutdown_super+0x73/0xef [ 8245.772641] [] kill_anon_super+0x13/0x1e [ 8245.772641] [] btrfs_kill_super+0x17/0x23 [btrfs] [ 8245.772641] [] deactivate_locked_super+0x3b/0x68 [ 8245.772641] [] deactivate_super+0x3f/0x43 [ 8245.772641] [] cleanup_mnt+0x59/0x78 [ 8245.772641] [] __cleanup_mnt+0x12/0x14 [ 8245.772641] [] task_work_run+0x8f/0xbc [ 8245.772641] [] do_notify_resume+0x45/0x53 [ 8245.772641] [] int_signal+0x12/0x17 [ 8245.772641] Code: 1f 44 00 00 55 48 89 e5 41 56 41 55 41 54 53 49 89 f4 48 8b 46 70 a8 04 74 09 48 8b 5f 08 48 85 db 75 03 48 8b 1f 49 89 5c 24 68 <83> 7b 5c ff 74 04 f0 ff 43 50 49 83 7c 24 08 00 74 2c 4c 8d 6b [ 8245.772641] RIP [] btrfs_queue_work+0x2c/0x14d [btrfs] [ 8245.772641] RSP [ 8245.845040] ---[ end trace a01d038397e99b93 ]--- For logical reasons such as the phase of the moon, this happened more often with "-o inode_cache" than without any mount options. After some debugging it turned out to be simple to understand what was happening: 1) close_ctree() is called; 2) It then stops the transaction kthread, which commits the current transaction; 3) It asks the cleaner kthread to stop, which is currently running btrfs_delete_unused_bgs(); 4) btrfs_delete_unused_bgs() finds an unused block group, starts a new transaction, deletes the block group, which implies COWing some tree nodes and leafs and dirtying their respective pages, and then finally it ends the transaction it started, without committing it; 5) The cleaner kthread stops; 6) close_ctree() releases (from memory) the block group objects, which produces the warning in the trace pasted above; 7) Then it invalidates all pages of the btree inode, by calling invalidate_inode_pages2(), which waits for any pages under writeback, and releases any non-dirty pages; 8) All work queues are destroyed (waiting first for their current tasks to finish execution); 9) A final iput() is called against the btree inode; 10) This iput triggers a writeback of the btree inode because it still has dirty pages; 11) This starts the whole chain of callbacks for the btree inode until it eventually reaches btrfs_wq_submit_bio() where it leads to a NULL pointer dereference because the work queues were already destroyed. Fix this by making the cleaner commit any transaction that it started after the transaction kthread was stopped. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b59deb2c63f4..e5aad7f535aa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1751,6 +1751,7 @@ static int cleaner_kthread(void *arg) { struct btrfs_root *root = arg; int again; + struct btrfs_trans_handle *trans; do { again = 0; @@ -1798,6 +1799,34 @@ sleep: __set_current_state(TASK_RUNNING); } } while (!kthread_should_stop()); + + /* + * Transaction kthread is stopped before us and wakes us up. + * However we might have started a new transaction and COWed some + * tree blocks when deleting unused block groups for example. So + * make sure we commit the transaction we started to have a clean + * shutdown when evicting the btree inode - if it has dirty pages + * when we do the final iput() on it, eviction will trigger a + * writeback for it which will fail with null pointer dereferences + * since work queues and other resources were already released and + * destroyed by the time the iput/eviction/writeback is made. + */ + trans = btrfs_attach_transaction(root); + if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT) + btrfs_err(root->fs_info, + "cleaner transaction attach returned %ld", + PTR_ERR(trans)); + } else { + int ret; + + ret = btrfs_commit_transaction(trans, root); + if (ret) + btrfs_err(root->fs_info, + "cleaner open transaction commit returned %d", + ret); + } + return 0; } From e4545de5b035c7debb73d260c78377dbb69cbfb5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 17 Jun 2015 12:49:23 +0100 Subject: [PATCH 409/839] Btrfs: fix fsync data loss after append write If we do an append write to a file (which increases its inode's i_size) that does not have the flag BTRFS_INODE_NEEDS_FULL_SYNC set in its inode, and the previous transaction added a new hard link to the file, which sets the flag BTRFS_INODE_COPY_EVERYTHING in the file's inode, and then fsync the file, the inode's new i_size isn't logged. This has the consequence that after the fsync log is replayed, the file size remains what it was before the append write operation, which means users/applications will not be able to read the data that was successsfully fsync'ed before. This happens because neither the inode item nor the delayed inode get their i_size updated when the append write is made - doing so would require starting a transaction in the buffered write path, something that we do not do intentionally for performance reasons. Fix this by making sure that when the flag BTRFS_INODE_COPY_EVERYTHING is set the inode is logged with its current i_size (log the in-memory inode into the log tree). This issue is not a recent regression and is easy to reproduce with the following test case for fstests: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" here=`pwd` tmp=/tmp/$$ status=1 # failure is the default! _cleanup() { _cleanup_flakey rm -f $tmp.* } trap "_cleanup; exit \$status" 0 1 2 3 15 # get standard environment, filters and checks . ./common/rc . ./common/filter . ./common/dmflakey # real QA test starts here _supported_fs generic _supported_os Linux _need_to_be_root _require_scratch _require_dm_flakey _require_metadata_journaling $SCRATCH_DEV _crash_and_mount() { # Simulate a crash/power loss. _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey # Allow writes again and mount. This makes the fs replay its fsync log. _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey } rm -f $seqres.full _scratch_mkfs >> $seqres.full 2>&1 _init_flakey _mount_flakey # Create the test file with some initial data and then fsync it. # The fsync here is only needed to trigger the issue in btrfs, as it causes the # the flag BTRFS_INODE_NEEDS_FULL_SYNC to be removed from the btrfs inode. $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 32k" \ -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io sync # Add a hard link to our file. # On btrfs this sets the flag BTRFS_INODE_COPY_EVERYTHING on the btrfs inode, # which is a necessary condition to trigger the issue. ln $SCRATCH_MNT/foo $SCRATCH_MNT/bar # Sync the filesystem to force a commit of the current btrfs transaction, this # is a necessary condition to trigger the bug on btrfs. sync # Now append more data to our file, increasing its size, and fsync the file. # In btrfs because the inode flag BTRFS_INODE_COPY_EVERYTHING was set and the # write path did not update the inode item in the btree nor the delayed inode # item (in memory struture) in the current transaction (created by the fsync # handler), the fsync did not record the inode's new i_size in the fsync # log/journal. This made the data unavailable after the fsync log/journal is # replayed. $XFS_IO_PROG -c "pwrite -S 0xbb 32K 32K" \ -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io echo "File content after fsync and before crash:" od -t x1 $SCRATCH_MNT/foo _crash_and_mount echo "File content after crash and log replay:" od -t x1 $SCRATCH_MNT/foo status=0 exit The expected file output before and after the crash/power failure expects the appended data to be available, which is: 0000000 aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa * 0100000 bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb * 0200000 Cc: stable@vger.kernel.org Signed-off-by: Filipe Manana Reviewed-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1ce80c1c4eb6..76c4f9d1b80a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4155,6 +4155,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, u64 ino = btrfs_ino(inode); struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 logged_isize = 0; + bool need_log_inode_item = true; path = btrfs_alloc_path(); if (!path) @@ -4263,11 +4264,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } else { if (inode_only == LOG_INODE_ALL) fast_search = true; - ret = log_inode_item(trans, log, dst_path, inode); - if (ret) { - err = ret; - goto out_unlock; - } goto log_extents; } @@ -4290,6 +4286,9 @@ again: if (min_key.type > max_key.type) break; + if (min_key.type == BTRFS_INODE_ITEM_KEY) + need_log_inode_item = false; + src = path->nodes[0]; if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ins_nr++; @@ -4360,6 +4359,11 @@ next_slot: log_extents: btrfs_release_path(path); btrfs_release_path(dst_path); + if (need_log_inode_item) { + err = log_inode_item(trans, log, dst_path, inode); + if (err) + goto out_unlock; + } if (fast_search) { /* * Some ordered extents started by fsync might have completed From 36283bf777d963fac099213297e155d071096994 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 20 Jun 2015 00:44:51 +0100 Subject: [PATCH 410/839] Btrfs: fix fsync xattr loss in the fast fsync path After commit 4f764e515361 ("Btrfs: remove deleted xattrs on fsync log replay"), we can end up in a situation where during log replay we end up deleting xattrs that were never deleted when their file was last fsynced. This happens in the fast fsync path (flag BTRFS_INODE_NEEDS_FULL_SYNC is not set in the inode) if the inode has the flag BTRFS_INODE_COPY_EVERYTHING set, the xattr was added in a past transaction and the leaf where the xattr is located was not updated (COWed or created) in the current transaction. In this scenario the xattr item never ends up in the log tree and therefore at log replay time, which makes the replay code delete the xattr from the fs/subvol tree as it thinks that xattr was deleted prior to the last fsync. Fix this by always logging all xattrs, which is the simplest and most reliable way to detect deleted xattrs and replay the deletes at log replay time. This issue is reproducible with the following test case for fstests: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" here=`pwd` tmp=/tmp/$$ status=1 # failure is the default! _cleanup() { _cleanup_flakey rm -f $tmp.* } trap "_cleanup; exit \$status" 0 1 2 3 15 # get standard environment, filters and checks . ./common/rc . ./common/filter . ./common/dmflakey . ./common/attr # real QA test starts here # We create a lot of xattrs for a single file. Only btrfs and xfs are currently # able to store such a large mount of xattrs per file, other filesystems such # as ext3/4 and f2fs for example, fail with ENOSPC even if we attempt to add # less than 1000 xattrs with very small values. _supported_fs btrfs xfs _supported_os Linux _need_to_be_root _require_scratch _require_dm_flakey _require_attrs _require_metadata_journaling $SCRATCH_DEV rm -f $seqres.full _scratch_mkfs >> $seqres.full 2>&1 _init_flakey _mount_flakey # Create the test file with some initial data and make sure everything is # durably persisted. $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 32k" $SCRATCH_MNT/foo | _filter_xfs_io sync # Add many small xattrs to our file. # We create such a large amount because it's needed to trigger the issue found # in btrfs - we need to have an amount that causes the fs to have at least 3 # btree leafs with xattrs stored in them, and it must work on any leaf size # (maximum leaf/node size is 64Kb). num_xattrs=2000 for ((i = 1; i <= $num_xattrs; i++)); do name="user.attr_$(printf "%04d" $i)" $SETFATTR_PROG -n $name -v "val_$(printf "%04d" $i)" $SCRATCH_MNT/foo done # Sync the filesystem to force a commit of the current btrfs transaction, this # is a necessary condition to trigger the bug on btrfs. sync # Now update our file's data and fsync the file. # After a successful fsync, if the fsync log/journal is replayed we expect to # see all the xattrs we added before with the same values (and the updated file # data of course). Btrfs used to delete some of these xattrs when it replayed # its fsync log/journal. $XFS_IO_PROG -c "pwrite -S 0xbb 8K 16K" \ -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io # Simulate a crash/power loss. _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey # Allow writes again and mount. This makes the fs replay its fsync log. _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey echo "File content after crash and log replay:" od -t x1 $SCRATCH_MNT/foo echo "File xattrs after crash and log replay:" for ((i = 1; i <= $num_xattrs; i++)); do name="user.attr_$(printf "%04d" $i)" echo -n "$name=" $GETFATTR_PROG --absolute-names -n $name --only-values $SCRATCH_MNT/foo echo done status=0 exit The golden output expects all xattrs to be available, and with the correct values, after the fsync log is replayed. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 104 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 76c4f9d1b80a..66f87156882f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4117,6 +4117,86 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode, return 0; } +/* + * At the moment we always log all xattrs. This is to figure out at log replay + * time which xattrs must have their deletion replayed. If a xattr is missing + * in the log tree and exists in the fs/subvol tree, we delete it. This is + * because if a xattr is deleted, the inode is fsynced and a power failure + * happens, causing the log to be replayed the next time the fs is mounted, + * we want the xattr to not exist anymore (same behaviour as other filesystems + * with a journal, ext3/4, xfs, f2fs, etc). + */ +static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + struct btrfs_path *path, + struct btrfs_path *dst_path) +{ + int ret; + struct btrfs_key key; + const u64 ino = btrfs_ino(inode); + int ins_nr = 0; + int start_slot = 0; + + key.objectid = ino; + key.type = BTRFS_XATTR_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + + while (true) { + int slot = path->slots[0]; + struct extent_buffer *leaf = path->nodes[0]; + int nritems = btrfs_header_nritems(leaf); + + if (slot >= nritems) { + if (ins_nr > 0) { + u64 last_extent = 0; + + ret = copy_items(trans, inode, dst_path, path, + &last_extent, start_slot, + ins_nr, 1, 0); + /* can't be 1, extent items aren't processed */ + ASSERT(ret <= 0); + if (ret < 0) + return ret; + ins_nr = 0; + } + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + else if (ret > 0) + break; + continue; + } + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) + break; + + if (ins_nr == 0) + start_slot = slot; + ins_nr++; + path->slots[0]++; + cond_resched(); + } + if (ins_nr > 0) { + u64 last_extent = 0; + + ret = copy_items(trans, inode, dst_path, path, + &last_extent, start_slot, + ins_nr, 1, 0); + /* can't be 1, extent items aren't processed */ + ASSERT(ret <= 0); + if (ret < 0) + return ret; + } + + return 0; +} + /* log a single inode in the tree log. * At least one parent directory for this inode must exist in the tree * or be logged already. @@ -4289,6 +4369,25 @@ again: if (min_key.type == BTRFS_INODE_ITEM_KEY) need_log_inode_item = false; + /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */ + if (min_key.type == BTRFS_XATTR_ITEM_KEY) { + if (ins_nr == 0) + goto next_slot; + ret = copy_items(trans, inode, dst_path, path, + &last_extent, ins_start_slot, + ins_nr, inode_only, logged_isize); + if (ret < 0) { + err = ret; + goto out_unlock; + } + ins_nr = 0; + if (ret) { + btrfs_release_path(path); + continue; + } + goto next_slot; + } + src = path->nodes[0]; if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ins_nr++; @@ -4356,6 +4455,11 @@ next_slot: ins_nr = 0; } + btrfs_release_path(path); + btrfs_release_path(dst_path); + err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); + if (err) + goto out_unlock; log_extents: btrfs_release_path(path); btrfs_release_path(dst_path); From 82cd003a77173c91b9acad8033fb7931dac8d751 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 29 Jun 2015 19:30:23 +0300 Subject: [PATCH 411/839] crush: fix a bug in tree bucket decode struct crush_bucket_tree::num_nodes is u8, so ceph_decode_8_safe() should be used. -Wconversion catches this, but I guess it went unnoticed in all the noise it spews. The actual problem (at least for common crushmaps) isn't the u32 -> u8 truncation though - it's the advancement by 4 bytes instead of 1 in the crushmap buffer. Fixes: http://tracker.ceph.com/issues/2759 Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin --- net/ceph/osdmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 15796696d64e..4a3125836b64 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -89,7 +89,7 @@ static int crush_decode_tree_bucket(void **p, void *end, { int j; dout("crush_decode_tree_bucket %p to %p\n", *p, end); - ceph_decode_32_safe(p, end, b->num_nodes, bad); + ceph_decode_8_safe(p, end, b->num_nodes, bad); b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS); if (b->node_weights == NULL) return -ENOMEM; From 5a60e87603c4c533492c515b7f62578189b03c9c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 24 Jun 2015 17:24:33 +0300 Subject: [PATCH 412/839] rbd: use GFP_NOIO in rbd_obj_request_create() rbd_obj_request_create() is called on the main I/O path, so we need to use GFP_NOIO to make sure allocation doesn't blow back on us. Not all callers need this, but I'm still hardcoding the flag inside rather than making it a parameter because a) this is going to stable, and b) those callers shouldn't really use rbd_obj_request_create() and will be fixed in the future. More memory allocation fixes will follow. Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- drivers/block/rbd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b316ee48a30b..d94529d5c8e9 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2022,11 +2022,11 @@ static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, rbd_assert(obj_request_type_valid(type)); size = strlen(object_name) + 1; - name = kmalloc(size, GFP_KERNEL); + name = kmalloc(size, GFP_NOIO); if (!name) return NULL; - obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_KERNEL); + obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO); if (!obj_request) { kfree(name); return NULL; From 2adc376c551943a07170cbe70f43e6d6065f8906 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 25 Jun 2015 12:25:58 -0300 Subject: [PATCH 413/839] vfs: avoid creation of inode number 0 in get_next_ino currently, get_next_ino() is able to create inodes with inode number = 0. This have a bad impact in the filesystems relying in this function to generate inode numbers. While there is no problem at all in having inodes with number 0, userspace tools which handle file management tasks can have problems handling these files, like for example, the impossiblity of users to delete these files, since glibc will ignore them. So, I believe the best way is kernel to avoid creating them. This problem has been raised previously, but the old thread didn't have any other update for a year+, and I've seen too many users hitting the same issue regarding the impossibility to delete files while using filesystems relying on this function. So, I'm starting the thread again, with the same patch that I believe is enough to address this problem. Signed-off-by: Carlos Maiolino Signed-off-by: Al Viro --- fs/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index 0401d2c6d087..648e71ce6ec2 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -840,7 +840,11 @@ unsigned int get_next_ino(void) } #endif - *p = ++res; + res++; + /* get_next_ino should not provide a 0 inode number */ + if (unlikely(!res)) + res++; + *p = res; put_cpu_var(last_ino); return res; } From 1af95de6f0119d5bde02d3a811a9f3a3661e954e Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Tue, 23 Jun 2015 18:54:45 +0800 Subject: [PATCH 414/839] fs:super:get_anon_bdev: fix race condition could cause dev exceed its upper limitation Execution of get_anon_bdev concurrently and preemptive kernel all could bring race condition, it isn't enough to check dev against its upper limitation with equality operator only. This patch fix it. Signed-off-by: Wang YanQing Signed-off-by: Al Viro --- fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/super.c b/fs/super.c index 928c20f47af9..b61372354f2b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -842,7 +842,7 @@ int get_anon_bdev(dev_t *p) else if (error) return -EAGAIN; - if (dev == (1 << MINORBITS)) { + if (dev >= (1 << MINORBITS)) { spin_lock(&unnamed_dev_lock); ida_remove(&unnamed_dev_ida, dev); if (unnamed_dev_start > dev) From 8a81252b774b53e628a8a0fe18e2b8fc236d92cc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Jun 2015 15:54:08 +0200 Subject: [PATCH 415/839] fs/file.c: don't acquire files->file_lock in fd_install() Mateusz Guzik reported : Currently obtaining a new file descriptor results in locking fdtable twice - once in order to reserve a slot and second time to fill it. Holding the spinlock in __fd_install() is needed in case a resize is done, or to prevent a resize. Mateusz provided an RFC patch and a micro benchmark : http://people.redhat.com/~mguzik/pipebench.c A resize is an unlikely operation in a process lifetime, as table size is at least doubled at every resize. We can use RCU instead of the spinlock. __fd_install() must wait if a resize is in progress. The resize must block new __fd_install() callers from starting, and wait that ongoing install are finished (synchronize_sched()) resize should be attempted by a single thread to not waste resources. rcu_sched variant is used, as __fd_install() and expand_fdtable() run from process context. It gives us a ~30% speedup using pipebench on a dual Intel(R) Xeon(R) CPU E5-2696 v2 @ 2.50GHz Signed-off-by: Eric Dumazet Reported-by: Mateusz Guzik Acked-by: Mateusz Guzik Tested-by: Mateusz Guzik Signed-off-by: Al Viro --- Documentation/filesystems/porting | 4 ++ fs/file.c | 67 ++++++++++++++++++++++--------- include/linux/fdtable.h | 3 ++ 3 files changed, 55 insertions(+), 19 deletions(-) diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 3eae250254d5..ec5456113072 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -500,3 +500,7 @@ in your dentry operations instead. dentry, it does not get nameidata at all and it gets called only when cookie is non-NULL. Note that link body isn't available anymore, so if you need it, store it as cookie. +-- +[mandatory] + __fd_install() & fd_install() can now sleep. Callers should not + hold a spinlock or other resources that do not allow a schedule. diff --git a/fs/file.c b/fs/file.c index 93c5f89c248b..3d2eb4c542a4 100644 --- a/fs/file.c +++ b/fs/file.c @@ -147,6 +147,13 @@ static int expand_fdtable(struct files_struct *files, int nr) spin_unlock(&files->file_lock); new_fdt = alloc_fdtable(nr); + + /* make sure all __fd_install() have seen resize_in_progress + * or have finished their rcu_read_lock_sched() section. + */ + if (atomic_read(&files->count) > 1) + synchronize_sched(); + spin_lock(&files->file_lock); if (!new_fdt) return -ENOMEM; @@ -158,21 +165,14 @@ static int expand_fdtable(struct files_struct *files, int nr) __free_fdtable(new_fdt); return -EMFILE; } - /* - * Check again since another task may have expanded the fd table while - * we dropped the lock - */ cur_fdt = files_fdtable(files); - if (nr >= cur_fdt->max_fds) { - /* Continue as planned */ - copy_fdtable(new_fdt, cur_fdt); - rcu_assign_pointer(files->fdt, new_fdt); - if (cur_fdt != &files->fdtab) - call_rcu(&cur_fdt->rcu, free_fdtable_rcu); - } else { - /* Somebody else expanded, so undo our attempt */ - __free_fdtable(new_fdt); - } + BUG_ON(nr < cur_fdt->max_fds); + copy_fdtable(new_fdt, cur_fdt); + rcu_assign_pointer(files->fdt, new_fdt); + if (cur_fdt != &files->fdtab) + call_rcu(&cur_fdt->rcu, free_fdtable_rcu); + /* coupled with smp_rmb() in __fd_install() */ + smp_wmb(); return 1; } @@ -185,21 +185,38 @@ static int expand_fdtable(struct files_struct *files, int nr) * The files->file_lock should be held on entry, and will be held on exit. */ static int expand_files(struct files_struct *files, int nr) + __releases(files->file_lock) + __acquires(files->file_lock) { struct fdtable *fdt; + int expanded = 0; +repeat: fdt = files_fdtable(files); /* Do we need to expand? */ if (nr < fdt->max_fds) - return 0; + return expanded; /* Can we expand? */ if (nr >= sysctl_nr_open) return -EMFILE; + if (unlikely(files->resize_in_progress)) { + spin_unlock(&files->file_lock); + expanded = 1; + wait_event(files->resize_wait, !files->resize_in_progress); + spin_lock(&files->file_lock); + goto repeat; + } + /* All good, so we try */ - return expand_fdtable(files, nr); + files->resize_in_progress = true; + expanded = expand_fdtable(files, nr); + files->resize_in_progress = false; + + wake_up_all(&files->resize_wait); + return expanded; } static inline void __set_close_on_exec(int fd, struct fdtable *fdt) @@ -256,6 +273,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) atomic_set(&newf->count, 1); spin_lock_init(&newf->file_lock); + newf->resize_in_progress = false; + init_waitqueue_head(&newf->resize_wait); newf->next_fd = 0; new_fdt = &newf->fdtab; new_fdt->max_fds = NR_OPEN_DEFAULT; @@ -553,11 +572,21 @@ void __fd_install(struct files_struct *files, unsigned int fd, struct file *file) { struct fdtable *fdt; - spin_lock(&files->file_lock); - fdt = files_fdtable(files); + + might_sleep(); + rcu_read_lock_sched(); + + while (unlikely(files->resize_in_progress)) { + rcu_read_unlock_sched(); + wait_event(files->resize_wait, !files->resize_in_progress); + rcu_read_lock_sched(); + } + /* coupled with smp_wmb() in expand_fdtable() */ + smp_rmb(); + fdt = rcu_dereference_sched(files->fdt); BUG_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); - spin_unlock(&files->file_lock); + rcu_read_unlock_sched(); } void fd_install(unsigned int fd, struct file *file) diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 230f87bdf5ad..fbb88740634a 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -47,6 +47,9 @@ struct files_struct { * read mostly part */ atomic_t count; + bool resize_in_progress; + wait_queue_head_t resize_wait; + struct fdtable __rcu *fdt; struct fdtable fdtab; /* From 5ba97d2832f87943c43bb69cb1ef86dbc59df5bc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Jun 2015 17:10:30 +0200 Subject: [PATCH 416/839] fs/file.c: __fget() and dup2() atomicity rules __fget() does lockless fetch of pointer from the descriptor table, attempts to grab a reference and treats "it was already zero" as "it's already gone from the table, we just hadn't seen the store, let's fail". Unfortunately, that breaks the atomicity of dup2() - __fget() might see the old pointer, notice that it's been already dropped and treat that as "it's closed". What we should be getting is either the old file or new one, depending whether we come before or after dup2(). Dmitry had following test failing sometimes : int fd; void *Thread(void *x) { char buf; int n = read(fd, &buf, 1); if (n != 1) exit(printf("read failed: n=%d errno=%d\n", n, errno)); return 0; } int main() { fd = open("/dev/urandom", O_RDONLY); int fd2 = open("/dev/urandom", O_RDONLY); if (fd == -1 || fd2 == -1) exit(printf("open failed\n")); pthread_t th; pthread_create(&th, 0, Thread, 0); if (dup2(fd2, fd) == -1) exit(printf("dup2 failed\n")); pthread_join(th, 0); if (close(fd) == -1) exit(printf("close failed\n")); if (close(fd2) == -1) exit(printf("close failed\n")); printf("DONE\n"); return 0; } Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: Al Viro --- fs/file.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/file.c b/fs/file.c index 3d2eb4c542a4..6c672ad329e9 100644 --- a/fs/file.c +++ b/fs/file.c @@ -664,11 +664,17 @@ static struct file *__fget(unsigned int fd, fmode_t mask) struct file *file; rcu_read_lock(); +loop: file = fcheck_files(files, fd); if (file) { - /* File object ref couldn't be taken */ - if ((file->f_mode & mask) || !get_file_rcu(file)) + /* File object ref couldn't be taken. + * dup2() atomicity guarantee is the reason + * we loop to catch the new file (or NULL pointer) + */ + if (file->f_mode & mask) file = NULL; + else if (!get_file_rcu(file)) + goto loop; } rcu_read_unlock(); From fb39f98d14d2c92531e79bcda5a0b802e2aaf202 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Jul 2015 10:19:11 +0200 Subject: [PATCH 417/839] printk: Increase maximum CONFIG_LOG_BUF_SHIFT from 21 to 25 So I tried to some kernel debugging that produced a ton of kernel messages on a big box, and wanted to save them all: but CONFIG_LOG_BUF_SHIFT maxes out at 21 (2 MB). Increase it to 25 (32 MB). This does not affect any existing config or defaults. Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index b999fa381bf9..db615e720d28 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -820,7 +820,7 @@ config IKCONFIG_PROC config LOG_BUF_SHIFT int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" - range 12 21 + range 12 25 default 17 depends on PRINTK help From bd5717a4632cdecafe82d03de7dcb3b1876e2828 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 26 Jun 2015 14:47:21 -0700 Subject: [PATCH 418/839] hwspinlock: qcom: Correct msb in regmap_field msb of the regmap_field was mistakenly given the value 32, to set all bits in the regmap update mask; although incorrect this worked until 921cc294, where the mask calculation was corrected. Signed-off-by: Bjorn Andersson Signed-off-by: Ohad Ben-Cohen --- drivers/hwspinlock/qcom_hwspinlock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwspinlock/qcom_hwspinlock.c b/drivers/hwspinlock/qcom_hwspinlock.c index 93b62e055ff6..c752447fbac7 100644 --- a/drivers/hwspinlock/qcom_hwspinlock.c +++ b/drivers/hwspinlock/qcom_hwspinlock.c @@ -123,7 +123,7 @@ static int qcom_hwspinlock_probe(struct platform_device *pdev) for (i = 0; i < QCOM_MUTEX_NUM_LOCKS; i++) { field.reg = base + i * stride; field.lsb = 0; - field.msb = 32; + field.msb = 31; bank->lock[i].priv = devm_regmap_field_alloc(&pdev->dev, regmap, field); From fd28f5d439fca77348c129d5b73043a56f8a0296 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 1 Jul 2015 14:08:31 +0200 Subject: [PATCH 419/839] arm64: Don't report clear pmds and puds as huge The current pmd_huge() and pud_huge() functions simply check if the table bit is not set and reports the entries as huge in that case. This is counter-intuitive as a clear pmd/pud cannot also be a huge pmd/pud, and it is inconsistent with at least arm and x86. To prevent others from making the same mistake as me in looking at code that calls these functions and to fix an issue with KVM on arm64 that causes memory corruption due to incorrect page reference counting resulting from this mistake, let's change the behavior. Signed-off-by: Christoffer Dall Reviewed-by: Steve Capper Acked-by: Marc Zyngier Fixes: 084bd29810a5 ("ARM64: mm: HugeTLB support.") Cc: # 3.11+ Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 2de9d2e59d96..0eeb4f0930a0 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -40,13 +40,13 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) int pmd_huge(pmd_t pmd) { - return !(pmd_val(pmd) & PMD_TABLE_BIT); + return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); } int pud_huge(pud_t pud) { #ifndef __PAGETABLE_PMD_FOLDED - return !(pud_val(pud) & PUD_TABLE_BIT); + return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT); #else return 0; #endif From 0ad0b3255a08020eaf50e34ef0d6df5bdf5e09ed Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:25:55 +0200 Subject: [PATCH 420/839] fuse: initialize fc->release before calling it fc->release is called from fuse_conn_put() which was used in the error cleanup before fc->release was initialized. [Jeremiah Mahler : assign fc->release after calling fuse_conn_init(fc) instead of before.] Signed-off-by: Miklos Szeredi Fixes: a325f9b92273 ("fuse: update fuse_conn_init() and separate out fuse_conn_kill()") Cc: #v2.6.31+ --- fs/fuse/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 082ac1c97f39..cec8abbe2c8c 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1026,6 +1026,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) goto err_fput; fuse_conn_init(fc); + fc->release = fuse_free_conn; fc->dev = sb->s_dev; fc->sb = sb; @@ -1040,7 +1041,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->dont_mask = 1; sb->s_flags |= MS_POSIXACL; - fc->release = fuse_free_conn; fc->flags = d.flags; fc->user_id = d.user_id; fc->group_id = d.group_id; From 42dc6211c54aa93e6a483469d07bd06f91713af7 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:25:56 +0200 Subject: [PATCH 421/839] fuse: fix background request if not connected request_end() expects fc->num_background and fc->active_background to have been incremented, which is not the case in fuse_request_send_nowait() failure path. So instead just call the ->end() callback (which is actually set by all callers). Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c8b68ab2e574..6aa4803510e7 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -604,13 +604,16 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) { + BUG_ON(!req->end); spin_lock(&fc->lock); if (fc->connected) { fuse_request_send_nowait_locked(fc, req); spin_unlock(&fc->lock); } else { + spin_unlock(&fc->lock); req->out.h.error = -ENOTCONN; - request_end(fc, req); + req->end(fc, req); + fuse_put_request(fc, req); } } From 73e0e738441b26a2dfc1ccdf1462cd1dc13c8cea Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:25:56 +0200 Subject: [PATCH 422/839] fuse: reset waiting Reset req->waiting in fuse_put_request(). This is needed for correct accounting in fc->num_waiting for reserved requests. Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6aa4803510e7..24f1d77b87a4 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -287,8 +287,10 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) spin_unlock(&fc->lock); } - if (req->waiting) + if (req->waiting) { atomic_dec(&fc->num_waiting); + req->waiting = 0; + } if (req->stolen_file) put_reserved_req(fc, req); From 5437f2417225dc89c785867f4790012668006abc Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:25:56 +0200 Subject: [PATCH 423/839] fuse: account as waiting before queuing for background Move accounting of fc->num_waiting to the point where the request actually starts waiting. This is earlier than the current queue_request() for background requests, since they might be waiting on the fc->bg_queue before being queued on fc->pending. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 24f1d77b87a4..9e0ed3e714cb 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -327,10 +327,6 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) len_args(req->in.numargs, (struct fuse_arg *) req->in.args); list_add_tail(&req->list, &fc->pending); req->state = FUSE_REQ_PENDING; - if (!req->waiting) { - req->waiting = 1; - atomic_inc(&fc->num_waiting); - } wake_up(&fc->waitq); kill_fasync(&fc->fasync, SIGIO, POLL_IN); } @@ -519,6 +515,10 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) { req->isreply = 1; + if (!req->waiting) { + req->waiting = 1; + atomic_inc(&fc->num_waiting); + } __fuse_request_send(fc, req); } EXPORT_SYMBOL_GPL(fuse_request_send); @@ -592,6 +592,10 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, struct fuse_req *req) { BUG_ON(!req->background); + if (!req->waiting) { + req->waiting = 1; + atomic_inc(&fc->num_waiting); + } fc->num_background++; if (fc->num_background == fc->max_background) fc->blocked = 1; From de15522646b9822d82b613d84cfeb4482370db3d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:25:57 +0200 Subject: [PATCH 424/839] fuse: check conn_error earlier fc->conn_error is set once in FUSE_INIT reply and never cleared. Check it in request allocation, there's no sense in doing all the preparation if sending will surely fail. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9e0ed3e714cb..155610fef4a7 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -168,6 +168,10 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, if (!fc->connected) goto out; + err = -ECONNREFUSED; + if (fc->conn_error) + goto out; + req = fuse_request_alloc(npages); err = -ENOMEM; if (!req) { @@ -498,8 +502,6 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) spin_lock(&fc->lock); if (!fc->connected) req->out.h.error = -ENOTCONN; - else if (fc->conn_error) - req->out.h.error = -ECONNREFUSED; else { req->in.h.unique = fuse_get_unique(fc); queue_request(fc, req); From f0139aa819dfc8e80f664bd08800fc48233cb94e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:25:57 +0200 Subject: [PATCH 425/839] fuse: fold fuse_request_send_nowait() into single caller And the same with fuse_request_send_nowait_locked(). Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 155610fef4a7..de110de881f7 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -590,14 +590,20 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) return ret; } -static void fuse_request_send_nowait_locked(struct fuse_conn *fc, - struct fuse_req *req) +/* + * Called under fc->lock + * + * fc->connected must have been checked previously + */ +void fuse_request_send_background_locked(struct fuse_conn *fc, + struct fuse_req *req) { BUG_ON(!req->background); if (!req->waiting) { req->waiting = 1; atomic_inc(&fc->num_waiting); } + req->isreply = 1; fc->num_background++; if (fc->num_background == fc->max_background) fc->blocked = 1; @@ -610,12 +616,12 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, flush_bg_queue(fc); } -static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) +void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) { BUG_ON(!req->end); spin_lock(&fc->lock); if (fc->connected) { - fuse_request_send_nowait_locked(fc, req); + fuse_request_send_background_locked(fc, req); spin_unlock(&fc->lock); } else { spin_unlock(&fc->lock); @@ -624,12 +630,6 @@ static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) fuse_put_request(fc, req); } } - -void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) -{ - req->isreply = 1; - fuse_request_send_nowait(fc, req); -} EXPORT_SYMBOL_GPL(fuse_request_send_background); static int fuse_request_send_notify_reply(struct fuse_conn *fc, @@ -649,18 +649,6 @@ static int fuse_request_send_notify_reply(struct fuse_conn *fc, return err; } -/* - * Called under fc->lock - * - * fc->connected must have been checked previously - */ -void fuse_request_send_background_locked(struct fuse_conn *fc, - struct fuse_req *req) -{ - req->isreply = 1; - fuse_request_send_nowait_locked(fc, req); -} - void fuse_force_forget(struct file *file, u64 nodeid) { struct inode *inode = file_inode(file); From ccd0a0bd16ff01bb27bacbeb2e0e4983dc299502 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:25:57 +0200 Subject: [PATCH 426/839] fuse: call fuse_abort_conn() in dev release fuse_abort_conn() does all the work done by fuse_dev_release() and more. "More" consists of: end_io_requests(fc); wake_up_all(&fc->waitq); kill_fasync(&fc->fasync, SIGIO, POLL_IN); All of which should be no-op (WARN_ON's added). Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index de110de881f7..e5901bf8d600 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2199,14 +2199,9 @@ int fuse_dev_release(struct inode *inode, struct file *file) { struct fuse_conn *fc = fuse_get_conn(file); if (fc) { - spin_lock(&fc->lock); - fc->connected = 0; - fc->blocked = 0; - fuse_set_initialized(fc); - end_queued_requests(fc); - end_polls(fc); - wake_up_all(&fc->blocked_waitq); - spin_unlock(&fc->lock); + WARN_ON(!list_empty(&fc->io)); + WARN_ON(fc->fasync != NULL); + fuse_abort_conn(fc); fuse_conn_put(fc); } From 0d8e84b0432beb6d11a1c82deeb9dc1a7bee02c0 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:25:58 +0200 Subject: [PATCH 427/839] fuse: simplify request abort - don't end the request while req->locked is true - make unlock_request() return an error if the connection was aborted Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 119 +++++++++++++++++++------------------------------- 1 file changed, 46 insertions(+), 73 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e5901bf8d600..3b979abb7b54 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -382,8 +382,8 @@ __releases(fc->lock) { void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; req->end = NULL; - list_del(&req->list); - list_del(&req->intr_entry); + list_del_init(&req->list); + list_del_init(&req->intr_entry); req->state = FUSE_REQ_FINISHED; if (req->background) { req->background = 0; @@ -439,8 +439,6 @@ __acquires(fc->lock) /* Any signal may interrupt this */ wait_answer_interruptible(fc, req); - if (req->aborted) - goto aborted; if (req->state == FUSE_REQ_FINISHED) return; @@ -457,8 +455,6 @@ __acquires(fc->lock) wait_answer_interruptible(fc, req); restore_sigs(&oldset); - if (req->aborted) - goto aborted; if (req->state == FUSE_REQ_FINISHED) return; @@ -478,22 +474,6 @@ __acquires(fc->lock) spin_unlock(&fc->lock); wait_event(req->waitq, req->state == FUSE_REQ_FINISHED); spin_lock(&fc->lock); - - if (!req->aborted) - return; - - aborted: - BUG_ON(req->state != FUSE_REQ_FINISHED); - if (req->locked) { - /* This is uninterruptible sleep, because data is - being copied to/from the buffers of req. During - locked state, there mustn't be any filesystem - operation (e.g. page fault), since that could lead - to deadlock */ - spin_unlock(&fc->lock); - wait_event(req->waitq, !req->locked); - spin_lock(&fc->lock); - } } static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) @@ -690,19 +670,21 @@ static int lock_request(struct fuse_conn *fc, struct fuse_req *req) } /* - * Unlock request. If it was aborted during being locked, the - * requester thread is currently waiting for it to be unlocked, so - * wake it up. + * Unlock request. If it was aborted while locked, caller is responsible + * for unlocking and ending the request. */ -static void unlock_request(struct fuse_conn *fc, struct fuse_req *req) +static int unlock_request(struct fuse_conn *fc, struct fuse_req *req) { + int err = 0; if (req) { spin_lock(&fc->lock); - req->locked = 0; if (req->aborted) - wake_up(&req->waitq); + err = -ENOENT; + else + req->locked = 0; spin_unlock(&fc->lock); } + return err; } struct fuse_copy_state { @@ -759,7 +741,10 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) struct page *page; int err; - unlock_request(cs->fc, cs->req); + err = unlock_request(cs->fc, cs->req); + if (err) + return err; + fuse_copy_finish(cs); if (cs->pipebufs) { struct pipe_buffer *buf = cs->pipebufs; @@ -859,7 +844,10 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) struct page *newpage; struct pipe_buffer *buf = cs->pipebufs; - unlock_request(cs->fc, cs->req); + err = unlock_request(cs->fc, cs->req); + if (err) + return err; + fuse_copy_finish(cs); err = buf->ops->confirm(cs->pipe, buf); @@ -949,11 +937,15 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, unsigned offset, unsigned count) { struct pipe_buffer *buf; + int err; if (cs->nr_segs == cs->pipe->buffers) return -EIO; - unlock_request(cs->fc, cs->req); + err = unlock_request(cs->fc, cs->req); + if (err) + return err; + fuse_copy_finish(cs); buf = cs->pipebufs; @@ -1318,7 +1310,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, fuse_copy_finish(cs); spin_lock(&fc->lock); req->locked = 0; - if (req->aborted) { + if (!fc->connected) { request_end(fc, req); return -ENODEV; } @@ -1910,13 +1902,6 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, if (!req) goto err_unlock; - if (req->aborted) { - spin_unlock(&fc->lock); - fuse_copy_finish(cs); - spin_lock(&fc->lock); - request_end(fc, req); - return -ENOENT; - } /* Is it an interrupt reply? */ if (req->intr_unique == oh.unique) { err = -EINVAL; @@ -1947,10 +1932,9 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, spin_lock(&fc->lock); req->locked = 0; - if (!err) { - if (req->aborted) - err = -ENOENT; - } else if (!req->aborted) + if (!fc->connected) + err = -ENOENT; + else if (err) req->out.h.error = -EIO; request_end(fc, req); @@ -2097,37 +2081,31 @@ __acquires(fc->lock) /* * Abort requests under I/O * - * The requests are set to aborted and finished, and the request - * waiter is woken up. This will make request_wait_answer() wait - * until the request is unlocked and then return. + * Separate out unlocked requests, they should be finished off immediately. + * Locked requests will be finished after unlock; see unlock_request(). * - * If the request is asynchronous, then the end function needs to be - * called after waiting for the request to be unlocked (if it was - * locked). + * Next finish off the unlocked requests. It is possible that some request will + * finish before we can. This is OK, the request will in that case be removed + * from the list before we touch it. */ static void end_io_requests(struct fuse_conn *fc) __releases(fc->lock) __acquires(fc->lock) { - while (!list_empty(&fc->io)) { - struct fuse_req *req = - list_entry(fc->io.next, struct fuse_req, list); - void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; + struct fuse_req *req, *next; + LIST_HEAD(to_end); - req->aborted = 1; + list_for_each_entry_safe(req, next, &fc->io, list) { req->out.h.error = -ECONNABORTED; - req->state = FUSE_REQ_FINISHED; - list_del_init(&req->list); - wake_up(&req->waitq); - if (end) { - req->end = NULL; - __fuse_get_request(req); - spin_unlock(&fc->lock); - wait_event(req->waitq, !req->locked); - end(fc, req); - fuse_put_request(fc, req); - spin_lock(&fc->lock); - } + req->aborted = 1; + if (!req->locked) + list_move(&req->list, &to_end); + } + while (!list_empty(&to_end)) { + req = list_first_entry(&to_end, struct fuse_req, list); + __fuse_get_request(req); + request_end(fc, req); + spin_lock(&fc->lock); } } @@ -2169,13 +2147,8 @@ static void end_polls(struct fuse_conn *fc) * is the combination of an asynchronous request and the tricky * deadlock (see Documentation/filesystems/fuse.txt). * - * During the aborting, progression of requests from the pending and - * processing lists onto the io list, and progression of new requests - * onto the pending list is prevented by req->connected being false. - * - * Progression of requests under I/O to the processing list is - * prevented by the req->aborted flag being true for these requests. - * For this reason requests on the io list must be aborted first. + * Request progression from one list to the next is prevented by + * fc->connected being false. */ void fuse_abort_conn(struct fuse_conn *fc) { From 825d6d3395e88a616e4c953984d77eeacbad4310 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:25:58 +0200 Subject: [PATCH 428/839] fuse: req use bitops Finer grained locking will mean there's no single lock to protect modification of bitfileds in fuse_req. So move to using bitops. Can use the non-atomic variants for those which happen while the request definitely has only one reference. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 71 ++++++++++++++++++++++++------------------------ fs/fuse/file.c | 17 ++++++------ fs/fuse/fuse_i.h | 49 ++++++++++++++++----------------- fs/fuse/inode.c | 6 ++-- 4 files changed, 71 insertions(+), 72 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 3b979abb7b54..dcfef5475ada 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -181,8 +181,10 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, } fuse_req_init_context(req); - req->waiting = 1; - req->background = for_background; + __set_bit(FR_WAITING, &req->flags); + if (for_background) + __set_bit(FR_BACKGROUND, &req->flags); + return req; out: @@ -272,15 +274,15 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, req = get_reserved_req(fc, file); fuse_req_init_context(req); - req->waiting = 1; - req->background = 0; + __set_bit(FR_WAITING, &req->flags); + __clear_bit(FR_BACKGROUND, &req->flags); return req; } void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) { if (atomic_dec_and_test(&req->count)) { - if (unlikely(req->background)) { + if (test_bit(FR_BACKGROUND, &req->flags)) { /* * We get here in the unlikely case that a background * request was allocated but not sent @@ -291,9 +293,9 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) spin_unlock(&fc->lock); } - if (req->waiting) { + if (test_bit(FR_WAITING, &req->flags)) { + __clear_bit(FR_WAITING, &req->flags); atomic_dec(&fc->num_waiting); - req->waiting = 0; } if (req->stolen_file) @@ -385,9 +387,8 @@ __releases(fc->lock) list_del_init(&req->list); list_del_init(&req->intr_entry); req->state = FUSE_REQ_FINISHED; - if (req->background) { - req->background = 0; - + if (test_bit(FR_BACKGROUND, &req->flags)) { + clear_bit(FR_BACKGROUND, &req->flags); if (fc->num_background == fc->max_background) fc->blocked = 0; @@ -442,12 +443,12 @@ __acquires(fc->lock) if (req->state == FUSE_REQ_FINISHED) return; - req->interrupted = 1; + set_bit(FR_INTERRUPTED, &req->flags); if (req->state == FUSE_REQ_SENT) queue_interrupt(fc, req); } - if (!req->force) { + if (!test_bit(FR_FORCE, &req->flags)) { sigset_t oldset; /* Only fatal signals may interrupt this */ @@ -478,7 +479,7 @@ __acquires(fc->lock) static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) { - BUG_ON(req->background); + BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); spin_lock(&fc->lock); if (!fc->connected) req->out.h.error = -ENOTCONN; @@ -496,9 +497,9 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) { - req->isreply = 1; - if (!req->waiting) { - req->waiting = 1; + __set_bit(FR_ISREPLY, &req->flags); + if (!test_bit(FR_WAITING, &req->flags)) { + __set_bit(FR_WAITING, &req->flags); atomic_inc(&fc->num_waiting); } __fuse_request_send(fc, req); @@ -578,12 +579,12 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) void fuse_request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req) { - BUG_ON(!req->background); - if (!req->waiting) { - req->waiting = 1; + BUG_ON(!test_bit(FR_BACKGROUND, &req->flags)); + if (!test_bit(FR_WAITING, &req->flags)) { + __set_bit(FR_WAITING, &req->flags); atomic_inc(&fc->num_waiting); } - req->isreply = 1; + __set_bit(FR_ISREPLY, &req->flags); fc->num_background++; if (fc->num_background == fc->max_background) fc->blocked = 1; @@ -617,7 +618,7 @@ static int fuse_request_send_notify_reply(struct fuse_conn *fc, { int err = -ENODEV; - req->isreply = 0; + __clear_bit(FR_ISREPLY, &req->flags); req->in.h.unique = unique; spin_lock(&fc->lock); if (fc->connected) { @@ -644,7 +645,7 @@ void fuse_force_forget(struct file *file, u64 nodeid) req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; - req->isreply = 0; + __clear_bit(FR_ISREPLY, &req->flags); __fuse_request_send(fc, req); /* ignore errors */ fuse_put_request(fc, req); @@ -660,10 +661,10 @@ static int lock_request(struct fuse_conn *fc, struct fuse_req *req) int err = 0; if (req) { spin_lock(&fc->lock); - if (req->aborted) + if (test_bit(FR_ABORTED, &req->flags)) err = -ENOENT; else - req->locked = 1; + set_bit(FR_LOCKED, &req->flags); spin_unlock(&fc->lock); } return err; @@ -678,10 +679,10 @@ static int unlock_request(struct fuse_conn *fc, struct fuse_req *req) int err = 0; if (req) { spin_lock(&fc->lock); - if (req->aborted) + if (test_bit(FR_ABORTED, &req->flags)) err = -ENOENT; else - req->locked = 0; + clear_bit(FR_LOCKED, &req->flags); spin_unlock(&fc->lock); } return err; @@ -902,7 +903,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) err = 0; spin_lock(&cs->fc->lock); - if (cs->req->aborted) + if (test_bit(FR_ABORTED, &cs->req->flags)) err = -ENOENT; else *pagep = newpage; @@ -1309,7 +1310,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, (struct fuse_arg *) in->args, 0); fuse_copy_finish(cs); spin_lock(&fc->lock); - req->locked = 0; + clear_bit(FR_LOCKED, &req->flags); if (!fc->connected) { request_end(fc, req); return -ENODEV; @@ -1319,12 +1320,12 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, request_end(fc, req); return err; } - if (!req->isreply) + if (!test_bit(FR_ISREPLY, &req->flags)) { request_end(fc, req); - else { + } else { req->state = FUSE_REQ_SENT; list_move_tail(&req->list, &fc->processing); - if (req->interrupted) + if (test_bit(FR_INTERRUPTED, &req->flags)) queue_interrupt(fc, req); spin_unlock(&fc->lock); } @@ -1921,7 +1922,7 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, req->state = FUSE_REQ_WRITING; list_move(&req->list, &fc->io); req->out.h = oh; - req->locked = 1; + set_bit(FR_LOCKED, &req->flags); cs->req = req; if (!req->out.page_replace) cs->move_pages = 0; @@ -1931,7 +1932,7 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, fuse_copy_finish(cs); spin_lock(&fc->lock); - req->locked = 0; + clear_bit(FR_LOCKED, &req->flags); if (!fc->connected) err = -ENOENT; else if (err) @@ -2097,8 +2098,8 @@ __acquires(fc->lock) list_for_each_entry_safe(req, next, &fc->io, list) { req->out.h.error = -ECONNABORTED; - req->aborted = 1; - if (!req->locked) + set_bit(FR_ABORTED, &req->flags); + if (!test_bit(FR_LOCKED, &req->flags)) list_move(&req->list, &to_end); } while (!list_empty(&to_end)) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5ef05b5c4cff..bf272263c1a2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -96,17 +96,17 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) * Drop the release request when client does not * implement 'open' */ - req->background = 0; + __clear_bit(FR_BACKGROUND, &req->flags); iput(req->misc.release.inode); fuse_put_request(ff->fc, req); } else if (sync) { - req->background = 0; + __clear_bit(FR_BACKGROUND, &req->flags); fuse_request_send(ff->fc, req); iput(req->misc.release.inode); fuse_put_request(ff->fc, req); } else { req->end = fuse_release_end; - req->background = 1; + __set_bit(FR_BACKGROUND, &req->flags); fuse_request_send_background(ff->fc, req); } kfree(ff); @@ -299,8 +299,8 @@ void fuse_sync_release(struct fuse_file *ff, int flags) { WARN_ON(atomic_read(&ff->count) > 1); fuse_prepare_release(ff, flags, FUSE_RELEASE); - ff->reserved_req->force = 1; - ff->reserved_req->background = 0; + __set_bit(FR_FORCE, &ff->reserved_req->flags); + __clear_bit(FR_BACKGROUND, &ff->reserved_req->flags); fuse_request_send(ff->fc, ff->reserved_req); fuse_put_request(ff->fc, ff->reserved_req); kfree(ff); @@ -426,7 +426,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; - req->force = 1; + __set_bit(FR_FORCE, &req->flags); fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); @@ -1611,7 +1611,8 @@ static int fuse_writepage_locked(struct page *page) if (!req) goto err; - req->background = 1; /* writeback always goes to bg_queue */ + /* writeback always goes to bg_queue */ + __set_bit(FR_BACKGROUND, &req->flags); tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); if (!tmp_page) goto err_free; @@ -1830,7 +1831,7 @@ static int fuse_writepages_fill(struct page *page, req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; req->misc.write.next = NULL; req->in.argpages = 1; - req->background = 1; + __set_bit(FR_BACKGROUND, &req->flags); req->num_pages = 0; req->end = fuse_writepage_end; req->inode = inode; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7354dc142a50..4503e995c7b2 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -266,6 +266,27 @@ struct fuse_io_priv { struct completion *done; }; +/** + * Request flags + * + * FR_ISREPLY: set if the request has reply + * FR_FORCE: force sending of the request even if interrupted + * FR_BACKGROUND: request is sent in the background + * FR_WAITING: request is counted as "waiting" + * FR_ABORTED: the request was aborted + * FR_INTERRUPTED: the request has been interrupted + * FR_LOCKED: data is being copied to/from the request + */ +enum fuse_req_flag { + FR_ISREPLY, + FR_FORCE, + FR_BACKGROUND, + FR_WAITING, + FR_ABORTED, + FR_INTERRUPTED, + FR_LOCKED, +}; + /** * A request to the client */ @@ -283,32 +304,8 @@ struct fuse_req { /** Unique ID for the interrupt request */ u64 intr_unique; - /* - * The following bitfields are either set once before the - * request is queued or setting/clearing them is protected by - * fuse_conn->lock - */ - - /** True if the request has reply */ - unsigned isreply:1; - - /** Force sending of the request even if interrupted */ - unsigned force:1; - - /** The request was aborted */ - unsigned aborted:1; - - /** Request is sent in the background */ - unsigned background:1; - - /** The request has been interrupted */ - unsigned interrupted:1; - - /** Data is being copied to/from the request */ - unsigned locked:1; - - /** Request is counted as "waiting" */ - unsigned waiting:1; + /* Request flags, updated with test/set/clear_bit() */ + unsigned long flags; /** State of the request */ enum fuse_req_state state; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index cec8abbe2c8c..f7c9b7225ec5 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -362,8 +362,8 @@ static void fuse_send_destroy(struct fuse_conn *fc) if (req && fc->conn_init) { fc->destroy_req = NULL; req->in.h.opcode = FUSE_DESTROY; - req->force = 1; - req->background = 0; + __set_bit(FR_FORCE, &req->flags); + __clear_bit(FR_BACKGROUND, &req->flags); fuse_request_send(fc, req); fuse_put_request(fc, req); } @@ -1060,7 +1060,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) init_req = fuse_request_alloc(0); if (!init_req) goto err_put_root; - init_req->background = 1; + __set_bit(FR_BACKGROUND, &init_req->flags); if (is_bdev) { fc->destroy_req = fuse_request_alloc(0); From dc00809a53edd15369906b90407a2d5b976289f5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:25:58 +0200 Subject: [PATCH 429/839] fuse: use per req lock for lock/unlock_request() Reuse req->waitq.lock for protecting FR_ABORTED and FR_LOCKED flags. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 42 ++++++++++++++++++++---------------------- fs/fuse/fuse_i.h | 4 ++++ 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index dcfef5475ada..92c7691df429 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -656,16 +656,16 @@ void fuse_force_forget(struct file *file, u64 nodeid) * anything that could cause a page-fault. If the request was already * aborted bail out. */ -static int lock_request(struct fuse_conn *fc, struct fuse_req *req) +static int lock_request(struct fuse_req *req) { int err = 0; if (req) { - spin_lock(&fc->lock); + spin_lock(&req->waitq.lock); if (test_bit(FR_ABORTED, &req->flags)) err = -ENOENT; else set_bit(FR_LOCKED, &req->flags); - spin_unlock(&fc->lock); + spin_unlock(&req->waitq.lock); } return err; } @@ -674,22 +674,21 @@ static int lock_request(struct fuse_conn *fc, struct fuse_req *req) * Unlock request. If it was aborted while locked, caller is responsible * for unlocking and ending the request. */ -static int unlock_request(struct fuse_conn *fc, struct fuse_req *req) +static int unlock_request(struct fuse_req *req) { int err = 0; if (req) { - spin_lock(&fc->lock); + spin_lock(&req->waitq.lock); if (test_bit(FR_ABORTED, &req->flags)) err = -ENOENT; else clear_bit(FR_LOCKED, &req->flags); - spin_unlock(&fc->lock); + spin_unlock(&req->waitq.lock); } return err; } struct fuse_copy_state { - struct fuse_conn *fc; int write; struct fuse_req *req; struct iov_iter *iter; @@ -703,13 +702,10 @@ struct fuse_copy_state { unsigned move_pages:1; }; -static void fuse_copy_init(struct fuse_copy_state *cs, - struct fuse_conn *fc, - int write, +static void fuse_copy_init(struct fuse_copy_state *cs, int write, struct iov_iter *iter) { memset(cs, 0, sizeof(*cs)); - cs->fc = fc; cs->write = write; cs->iter = iter; } @@ -742,7 +738,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) struct page *page; int err; - err = unlock_request(cs->fc, cs->req); + err = unlock_request(cs->req); if (err) return err; @@ -794,7 +790,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) iov_iter_advance(cs->iter, err); } - return lock_request(cs->fc, cs->req); + return lock_request(cs->req); } /* Do as much copy to/from userspace buffer as we can */ @@ -845,7 +841,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) struct page *newpage; struct pipe_buffer *buf = cs->pipebufs; - err = unlock_request(cs->fc, cs->req); + err = unlock_request(cs->req); if (err) return err; @@ -902,12 +898,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) lru_cache_add_file(newpage); err = 0; - spin_lock(&cs->fc->lock); + spin_lock(&cs->req->waitq.lock); if (test_bit(FR_ABORTED, &cs->req->flags)) err = -ENOENT; else *pagep = newpage; - spin_unlock(&cs->fc->lock); + spin_unlock(&cs->req->waitq.lock); if (err) { unlock_page(newpage); @@ -927,7 +923,7 @@ out_fallback: cs->pg = buf->page; cs->offset = buf->offset; - err = lock_request(cs->fc, cs->req); + err = lock_request(cs->req); if (err) return err; @@ -943,7 +939,7 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, if (cs->nr_segs == cs->pipe->buffers) return -EIO; - err = unlock_request(cs->fc, cs->req); + err = unlock_request(cs->req); if (err) return err; @@ -1358,7 +1354,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to) if (!iter_is_iovec(to)) return -EINVAL; - fuse_copy_init(&cs, fc, 1, to); + fuse_copy_init(&cs, 1, to); return fuse_dev_do_read(fc, file, &cs, iov_iter_count(to)); } @@ -1380,7 +1376,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, if (!bufs) return -ENOMEM; - fuse_copy_init(&cs, fc, 1, NULL); + fuse_copy_init(&cs, 1, NULL); cs.pipebufs = bufs; cs.pipe = pipe; ret = fuse_dev_do_read(fc, in, &cs, len); @@ -1958,7 +1954,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from) if (!iter_is_iovec(from)) return -EINVAL; - fuse_copy_init(&cs, fc, 0, from); + fuse_copy_init(&cs, 0, from); return fuse_dev_do_write(fc, &cs, iov_iter_count(from)); } @@ -2023,7 +2019,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, } pipe_unlock(pipe); - fuse_copy_init(&cs, fc, 0, NULL); + fuse_copy_init(&cs, 0, NULL); cs.pipebufs = bufs; cs.nr_segs = nbuf; cs.pipe = pipe; @@ -2098,9 +2094,11 @@ __acquires(fc->lock) list_for_each_entry_safe(req, next, &fc->io, list) { req->out.h.error = -ECONNABORTED; + spin_lock(&req->waitq.lock); set_bit(FR_ABORTED, &req->flags); if (!test_bit(FR_LOCKED, &req->flags)) list_move(&req->list, &to_end); + spin_unlock(&req->waitq.lock); } while (!list_empty(&to_end)) { req = list_first_entry(&to_end, struct fuse_req, list); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 4503e995c7b2..7257adba7ecd 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -289,6 +289,10 @@ enum fuse_req_flag { /** * A request to the client + * + * .waitq.lock protects the following fields: + * - FR_ABORTED + * - FR_LOCKED (may also be modified under fc->lock, tested under both) */ struct fuse_req { /** This can be on either pending processing or io lists in From b716d425385ed392adc8e619020c1d77ae5ec1cb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:25:59 +0200 Subject: [PATCH 430/839] fuse: fold helpers into abort Fold end_io_requests() and end_queued_requests() into fuse_abort_conn(). Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 93 +++++++++++++++++++++------------------------------ 1 file changed, 38 insertions(+), 55 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 92c7691df429..fc3268b65bf8 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2075,51 +2075,6 @@ __acquires(fc->lock) } } -/* - * Abort requests under I/O - * - * Separate out unlocked requests, they should be finished off immediately. - * Locked requests will be finished after unlock; see unlock_request(). - * - * Next finish off the unlocked requests. It is possible that some request will - * finish before we can. This is OK, the request will in that case be removed - * from the list before we touch it. - */ -static void end_io_requests(struct fuse_conn *fc) -__releases(fc->lock) -__acquires(fc->lock) -{ - struct fuse_req *req, *next; - LIST_HEAD(to_end); - - list_for_each_entry_safe(req, next, &fc->io, list) { - req->out.h.error = -ECONNABORTED; - spin_lock(&req->waitq.lock); - set_bit(FR_ABORTED, &req->flags); - if (!test_bit(FR_LOCKED, &req->flags)) - list_move(&req->list, &to_end); - spin_unlock(&req->waitq.lock); - } - while (!list_empty(&to_end)) { - req = list_first_entry(&to_end, struct fuse_req, list); - __fuse_get_request(req); - request_end(fc, req); - spin_lock(&fc->lock); - } -} - -static void end_queued_requests(struct fuse_conn *fc) -__releases(fc->lock) -__acquires(fc->lock) -{ - fc->max_background = UINT_MAX; - flush_bg_queue(fc); - end_requests(fc, &fc->pending); - end_requests(fc, &fc->processing); - while (forget_pending(fc)) - kfree(dequeue_forget(fc, 1, NULL)); -} - static void end_polls(struct fuse_conn *fc) { struct rb_node *p; @@ -2138,26 +2093,54 @@ static void end_polls(struct fuse_conn *fc) /* * Abort all requests. * - * Emergency exit in case of a malicious or accidental deadlock, or - * just a hung filesystem. + * Emergency exit in case of a malicious or accidental deadlock, or just a hung + * filesystem. * - * The same effect is usually achievable through killing the - * filesystem daemon and all users of the filesystem. The exception - * is the combination of an asynchronous request and the tricky - * deadlock (see Documentation/filesystems/fuse.txt). + * The same effect is usually achievable through killing the filesystem daemon + * and all users of the filesystem. The exception is the combination of an + * asynchronous request and the tricky deadlock (see + * Documentation/filesystems/fuse.txt). * - * Request progression from one list to the next is prevented by - * fc->connected being false. + * Request progression from one list to the next is prevented by fc->connected + * being false. + * + * Aborting requests under I/O goes as follows: 1: Separate out unlocked + * requests, they should be finished off immediately. Locked requests will be + * finished after unlock; see unlock_request(). 2: Finish off the unlocked + * requests. It is possible that some request will finish before we can. This + * is OK, the request will in that case be removed from the list before we touch + * it. */ void fuse_abort_conn(struct fuse_conn *fc) { spin_lock(&fc->lock); if (fc->connected) { + struct fuse_req *req, *next; + LIST_HEAD(to_end); + fc->connected = 0; fc->blocked = 0; fuse_set_initialized(fc); - end_io_requests(fc); - end_queued_requests(fc); + list_for_each_entry_safe(req, next, &fc->io, list) { + req->out.h.error = -ECONNABORTED; + spin_lock(&req->waitq.lock); + set_bit(FR_ABORTED, &req->flags); + if (!test_bit(FR_LOCKED, &req->flags)) + list_move(&req->list, &to_end); + spin_unlock(&req->waitq.lock); + } + while (!list_empty(&to_end)) { + req = list_first_entry(&to_end, struct fuse_req, list); + __fuse_get_request(req); + request_end(fc, req); + spin_lock(&fc->lock); + } + fc->max_background = UINT_MAX; + flush_bg_queue(fc); + end_requests(fc, &fc->pending); + end_requests(fc, &fc->processing); + while (forget_pending(fc)) + kfree(dequeue_forget(fc, 1, NULL)); end_polls(fc); wake_up_all(&fc->waitq); wake_up_all(&fc->blocked_waitq); From 41f982747e8175a4eb5e8d1939bdbb10f435b7f6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:25:59 +0200 Subject: [PATCH 431/839] fuse: rework abort Splice fc->pending and fc->processing lists into a common kill list while holding fc->lock. By the time we release fc->lock, pending and processing lists are empty and the io list contains only locked requests. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index fc3268b65bf8..6cb0b0bc9029 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2101,9 +2101,6 @@ static void end_polls(struct fuse_conn *fc) * asynchronous request and the tricky deadlock (see * Documentation/filesystems/fuse.txt). * - * Request progression from one list to the next is prevented by fc->connected - * being false. - * * Aborting requests under I/O goes as follows: 1: Separate out unlocked * requests, they should be finished off immediately. Locked requests will be * finished after unlock; see unlock_request(). 2: Finish off the unlocked @@ -2116,7 +2113,8 @@ void fuse_abort_conn(struct fuse_conn *fc) spin_lock(&fc->lock); if (fc->connected) { struct fuse_req *req, *next; - LIST_HEAD(to_end); + LIST_HEAD(to_end1); + LIST_HEAD(to_end2); fc->connected = 0; fc->blocked = 0; @@ -2126,19 +2124,20 @@ void fuse_abort_conn(struct fuse_conn *fc) spin_lock(&req->waitq.lock); set_bit(FR_ABORTED, &req->flags); if (!test_bit(FR_LOCKED, &req->flags)) - list_move(&req->list, &to_end); + list_move(&req->list, &to_end1); spin_unlock(&req->waitq.lock); } - while (!list_empty(&to_end)) { - req = list_first_entry(&to_end, struct fuse_req, list); + fc->max_background = UINT_MAX; + flush_bg_queue(fc); + list_splice_init(&fc->pending, &to_end2); + list_splice_init(&fc->processing, &to_end2); + while (!list_empty(&to_end1)) { + req = list_first_entry(&to_end1, struct fuse_req, list); __fuse_get_request(req); request_end(fc, req); spin_lock(&fc->lock); } - fc->max_background = UINT_MAX; - flush_bg_queue(fc); - end_requests(fc, &fc->pending); - end_requests(fc, &fc->processing); + end_requests(fc, &to_end2); while (forget_pending(fc)) kfree(dequeue_forget(fc, 1, NULL)); end_polls(fc); From 7d2e0a099c7685a7355c27a2c3dc76ea7cfc8283 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:00 +0200 Subject: [PATCH 432/839] fuse: simplify unique ctr Since it's a 64bit counter, it's never gonna wrap around. Remove code dealing with that possibility. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 7 +------ fs/fuse/inode.c | 1 - 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6cb0b0bc9029..2014cee76036 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -319,12 +319,7 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args) static u64 fuse_get_unique(struct fuse_conn *fc) { - fc->reqctr++; - /* zero is special */ - if (fc->reqctr == 0) - fc->reqctr = 1; - - return fc->reqctr; + return ++fc->reqctr; } static void queue_request(struct fuse_conn *fc, struct fuse_req *req) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f7c9b7225ec5..2f902b8edcf2 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -588,7 +588,6 @@ void fuse_conn_init(struct fuse_conn *fc) fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; fc->khctr = 0; fc->polled_files = RB_ROOT; - fc->reqctr = 0; fc->blocked = 0; fc->initialized = 0; fc->attr_version = 1; From c47752673acb130e5132db0e52363e15be260ca4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:00 +0200 Subject: [PATCH 433/839] fuse: don't hold lock over request_wait_answer() Only hold fc->lock over sections of request_wait_answer() that actually need it. If wait_event_interruptible() returns zero, it means that the request finished. Need to add memory barriers, though, to make sure that all relevant data in the request is synchronized. Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 2014cee76036..638aafde6e22 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -381,6 +381,7 @@ __releases(fc->lock) req->end = NULL; list_del_init(&req->list); list_del_init(&req->intr_entry); + smp_wmb(); req->state = FUSE_REQ_FINISHED; if (test_bit(FR_BACKGROUND, &req->flags)) { clear_bit(FR_BACKGROUND, &req->flags); @@ -407,19 +408,6 @@ __releases(fc->lock) fuse_put_request(fc, req); } -static void wait_answer_interruptible(struct fuse_conn *fc, - struct fuse_req *req) -__releases(fc->lock) -__acquires(fc->lock) -{ - if (signal_pending(current)) - return; - - spin_unlock(&fc->lock); - wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); - spin_lock(&fc->lock); -} - static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) { list_add_tail(&req->intr_entry, &fc->interrupts); @@ -428,19 +416,21 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) } static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) -__releases(fc->lock) -__acquires(fc->lock) { + int err; + if (!fc->no_interrupt) { /* Any signal may interrupt this */ - wait_answer_interruptible(fc, req); - - if (req->state == FUSE_REQ_FINISHED) + err = wait_event_interruptible(req->waitq, + req->state == FUSE_REQ_FINISHED); + if (!err) return; + spin_lock(&fc->lock); set_bit(FR_INTERRUPTED, &req->flags); if (req->state == FUSE_REQ_SENT) queue_interrupt(fc, req); + spin_unlock(&fc->lock); } if (!test_bit(FR_FORCE, &req->flags)) { @@ -448,46 +438,51 @@ __acquires(fc->lock) /* Only fatal signals may interrupt this */ block_sigs(&oldset); - wait_answer_interruptible(fc, req); + err = wait_event_interruptible(req->waitq, + req->state == FUSE_REQ_FINISHED); restore_sigs(&oldset); - if (req->state == FUSE_REQ_FINISHED) + if (!err) return; + spin_lock(&fc->lock); /* Request is not yet in userspace, bail out */ if (req->state == FUSE_REQ_PENDING) { list_del(&req->list); + spin_unlock(&fc->lock); __fuse_put_request(req); req->out.h.error = -EINTR; return; } + spin_unlock(&fc->lock); } /* * Either request is already in userspace, or it was forced. * Wait it out. */ - spin_unlock(&fc->lock); wait_event(req->waitq, req->state == FUSE_REQ_FINISHED); - spin_lock(&fc->lock); } static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) { BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); spin_lock(&fc->lock); - if (!fc->connected) + if (!fc->connected) { + spin_unlock(&fc->lock); req->out.h.error = -ENOTCONN; - else { + } else { req->in.h.unique = fuse_get_unique(fc); queue_request(fc, req); /* acquire extra reference, since request is still needed after request_end() */ __fuse_get_request(req); + spin_unlock(&fc->lock); request_wait_answer(fc, req); + /* Pairs with smp_wmb() in request_end() */ + smp_rmb(); } - spin_unlock(&fc->lock); } void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) From 7a3b2c754749c73b4a255b2a1070c24dba589098 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:00 +0200 Subject: [PATCH 434/839] fuse: simplify req states FUSE_REQ_INIT is actually the same state as FUSE_REQ_PENDING and FUSE_REQ_READING and FUSE_REQ_WRITING can be merged into a common FUSE_REQ_IO state. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 5 ++--- fs/fuse/file.c | 3 +-- fs/fuse/fuse_i.h | 6 ++---- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 638aafde6e22..e5c541b7c7af 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -327,7 +327,6 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) req->in.h.len = sizeof(struct fuse_in_header) + len_args(req->in.numargs, (struct fuse_arg *) req->in.args); list_add_tail(&req->list, &fc->pending); - req->state = FUSE_REQ_PENDING; wake_up(&fc->waitq); kill_fasync(&fc->fasync, SIGIO, POLL_IN); } @@ -1274,7 +1273,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, } req = list_entry(fc->pending.next, struct fuse_req, list); - req->state = FUSE_REQ_READING; + req->state = FUSE_REQ_IO; list_move(&req->list, &fc->io); in = &req->in; @@ -1905,7 +1904,7 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, return nbytes; } - req->state = FUSE_REQ_WRITING; + req->state = FUSE_REQ_IO; list_move(&req->list, &fc->io); req->out.h = oh; set_bit(FR_LOCKED, &req->flags); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index bf272263c1a2..d0c23d075427 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1743,8 +1743,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, } } - if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT || - old_req->state == FUSE_REQ_PENDING)) { + if (old_req->num_pages == 1 && old_req->state == FUSE_REQ_PENDING) { struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host); copy_highpage(old_req->pages[0], page); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7257adba7ecd..3fd65f613515 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -243,11 +243,9 @@ struct fuse_args { /** The request state */ enum fuse_req_state { - FUSE_REQ_INIT = 0, - FUSE_REQ_PENDING, - FUSE_REQ_READING, + FUSE_REQ_PENDING = 0, + FUSE_REQ_IO, FUSE_REQ_SENT, - FUSE_REQ_WRITING, FUSE_REQ_FINISHED }; From 33e14b4dfdc477344efbcd9b4218f2b350f0f893 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:01 +0200 Subject: [PATCH 435/839] fuse: req state use flags Use flags for representing the state in fuse_req. This is needed since req->list will be protected by different locks in different states, hence we'll want the state itself to be split into distinct bits, each protected with the relevant lock in that state. Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 23 ++++++++++++++--------- fs/fuse/file.c | 2 +- fs/fuse/fuse_i.h | 17 ++++++----------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e5c541b7c7af..f1e2efd6b186 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -48,6 +48,7 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages, req->pages = pages; req->page_descs = page_descs; req->max_pages = npages; + __set_bit(FR_PENDING, &req->flags); } static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags) @@ -380,8 +381,10 @@ __releases(fc->lock) req->end = NULL; list_del_init(&req->list); list_del_init(&req->intr_entry); + WARN_ON(test_bit(FR_PENDING, &req->flags)); + WARN_ON(test_bit(FR_SENT, &req->flags)); smp_wmb(); - req->state = FUSE_REQ_FINISHED; + set_bit(FR_FINISHED, &req->flags); if (test_bit(FR_BACKGROUND, &req->flags)) { clear_bit(FR_BACKGROUND, &req->flags); if (fc->num_background == fc->max_background) @@ -421,13 +424,13 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) if (!fc->no_interrupt) { /* Any signal may interrupt this */ err = wait_event_interruptible(req->waitq, - req->state == FUSE_REQ_FINISHED); + test_bit(FR_FINISHED, &req->flags)); if (!err) return; spin_lock(&fc->lock); set_bit(FR_INTERRUPTED, &req->flags); - if (req->state == FUSE_REQ_SENT) + if (test_bit(FR_SENT, &req->flags)) queue_interrupt(fc, req); spin_unlock(&fc->lock); } @@ -438,7 +441,7 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) /* Only fatal signals may interrupt this */ block_sigs(&oldset); err = wait_event_interruptible(req->waitq, - req->state == FUSE_REQ_FINISHED); + test_bit(FR_FINISHED, &req->flags)); restore_sigs(&oldset); if (!err) @@ -446,7 +449,7 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) spin_lock(&fc->lock); /* Request is not yet in userspace, bail out */ - if (req->state == FUSE_REQ_PENDING) { + if (test_bit(FR_PENDING, &req->flags)) { list_del(&req->list); spin_unlock(&fc->lock); __fuse_put_request(req); @@ -460,7 +463,7 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) * Either request is already in userspace, or it was forced. * Wait it out. */ - wait_event(req->waitq, req->state == FUSE_REQ_FINISHED); + wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags)); } static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) @@ -1273,7 +1276,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, } req = list_entry(fc->pending.next, struct fuse_req, list); - req->state = FUSE_REQ_IO; + clear_bit(FR_PENDING, &req->flags); list_move(&req->list, &fc->io); in = &req->in; @@ -1308,7 +1311,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, if (!test_bit(FR_ISREPLY, &req->flags)) { request_end(fc, req); } else { - req->state = FUSE_REQ_SENT; + set_bit(FR_SENT, &req->flags); list_move_tail(&req->list, &fc->processing); if (test_bit(FR_INTERRUPTED, &req->flags)) queue_interrupt(fc, req); @@ -1904,7 +1907,7 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, return nbytes; } - req->state = FUSE_REQ_IO; + clear_bit(FR_SENT, &req->flags); list_move(&req->list, &fc->io); req->out.h = oh; set_bit(FR_LOCKED, &req->flags); @@ -2059,6 +2062,8 @@ __acquires(fc->lock) struct fuse_req *req; req = list_entry(head->next, struct fuse_req, list); req->out.h.error = -ECONNABORTED; + clear_bit(FR_PENDING, &req->flags); + clear_bit(FR_SENT, &req->flags); request_end(fc, req); spin_lock(&fc->lock); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d0c23d075427..64835cf58936 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1743,7 +1743,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, } } - if (old_req->num_pages == 1 && old_req->state == FUSE_REQ_PENDING) { + if (old_req->num_pages == 1 && test_bit(FR_PENDING, &old_req->flags)) { struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host); copy_highpage(old_req->pages[0], page); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 3fd65f613515..8eaf3b0de033 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -241,14 +241,6 @@ struct fuse_args { #define FUSE_ARGS(args) struct fuse_args args = {} -/** The request state */ -enum fuse_req_state { - FUSE_REQ_PENDING = 0, - FUSE_REQ_IO, - FUSE_REQ_SENT, - FUSE_REQ_FINISHED -}; - /** The request IO state (for asynchronous processing) */ struct fuse_io_priv { int async; @@ -274,6 +266,9 @@ struct fuse_io_priv { * FR_ABORTED: the request was aborted * FR_INTERRUPTED: the request has been interrupted * FR_LOCKED: data is being copied to/from the request + * FR_PENDING: request is not yet in userspace + * FR_SENT: request is in userspace, waiting for an answer + * FR_FINISHED: request is finished */ enum fuse_req_flag { FR_ISREPLY, @@ -283,6 +278,9 @@ enum fuse_req_flag { FR_ABORTED, FR_INTERRUPTED, FR_LOCKED, + FR_PENDING, + FR_SENT, + FR_FINISHED, }; /** @@ -309,9 +307,6 @@ struct fuse_req { /* Request flags, updated with test/set/clear_bit() */ unsigned long flags; - /** State of the request */ - enum fuse_req_state state; - /** The request input */ struct fuse_in in; From f88996a93324483ff3ec027312bbacacf97a555b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:01 +0200 Subject: [PATCH 436/839] fuse: separate out input queue The input queue contains normal requests (fc->pending), forgets (fc->forget_*) and interrupts (fc->interrupts). There's also fc->waitq and fc->fasync for waking up the readers of the fuse device when a request is available. The fc->reqctr is also moved to the input queue (assigned to the request when the request is added to the input queue. This patch just rearranges the fields, no functional change. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 134 ++++++++++++++++++++++++++--------------------- fs/fuse/fuse_i.h | 47 +++++++++-------- fs/fuse/inode.c | 14 +++-- 3 files changed, 111 insertions(+), 84 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f1e2efd6b186..24407e21fb82 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -318,32 +318,34 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args) return nbytes; } -static u64 fuse_get_unique(struct fuse_conn *fc) +static u64 fuse_get_unique(struct fuse_iqueue *fiq) { - return ++fc->reqctr; + return ++fiq->reqctr; } -static void queue_request(struct fuse_conn *fc, struct fuse_req *req) +static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req) { req->in.h.len = sizeof(struct fuse_in_header) + len_args(req->in.numargs, (struct fuse_arg *) req->in.args); - list_add_tail(&req->list, &fc->pending); - wake_up(&fc->waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); + list_add_tail(&req->list, &fiq->pending); + wake_up(&fiq->waitq); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, u64 nodeid, u64 nlookup) { + struct fuse_iqueue *fiq = &fc->iq; + forget->forget_one.nodeid = nodeid; forget->forget_one.nlookup = nlookup; spin_lock(&fc->lock); if (fc->connected) { - fc->forget_list_tail->next = forget; - fc->forget_list_tail = forget; - wake_up(&fc->waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); + fiq->forget_list_tail->next = forget; + fiq->forget_list_tail = forget; + wake_up(&fiq->waitq); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } else { kfree(forget); } @@ -355,12 +357,13 @@ static void flush_bg_queue(struct fuse_conn *fc) while (fc->active_background < fc->max_background && !list_empty(&fc->bg_queue)) { struct fuse_req *req; + struct fuse_iqueue *fiq = &fc->iq; req = list_entry(fc->bg_queue.next, struct fuse_req, list); list_del(&req->list); fc->active_background++; - req->in.h.unique = fuse_get_unique(fc); - queue_request(fc, req); + req->in.h.unique = fuse_get_unique(fiq); + queue_request(fiq, req); } } @@ -410,11 +413,11 @@ __releases(fc->lock) fuse_put_request(fc, req); } -static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) +static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) { - list_add_tail(&req->intr_entry, &fc->interrupts); - wake_up(&fc->waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); + list_add_tail(&req->intr_entry, &fiq->interrupts); + wake_up(&fiq->waitq); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) @@ -431,7 +434,7 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) spin_lock(&fc->lock); set_bit(FR_INTERRUPTED, &req->flags); if (test_bit(FR_SENT, &req->flags)) - queue_interrupt(fc, req); + queue_interrupt(&fc->iq, req); spin_unlock(&fc->lock); } @@ -474,8 +477,10 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) spin_unlock(&fc->lock); req->out.h.error = -ENOTCONN; } else { - req->in.h.unique = fuse_get_unique(fc); - queue_request(fc, req); + struct fuse_iqueue *fiq = &fc->iq; + + req->in.h.unique = fuse_get_unique(fiq); + queue_request(fiq, req); /* acquire extra reference, since request is still needed after request_end() */ __fuse_get_request(req); @@ -609,12 +614,13 @@ static int fuse_request_send_notify_reply(struct fuse_conn *fc, struct fuse_req *req, u64 unique) { int err = -ENODEV; + struct fuse_iqueue *fiq = &fc->iq; __clear_bit(FR_ISREPLY, &req->flags); req->in.h.unique = unique; spin_lock(&fc->lock); if (fc->connected) { - queue_request(fc, req); + queue_request(fiq, req); err = 0; } spin_unlock(&fc->lock); @@ -1045,15 +1051,15 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, return err; } -static int forget_pending(struct fuse_conn *fc) +static int forget_pending(struct fuse_iqueue *fiq) { - return fc->forget_list_head.next != NULL; + return fiq->forget_list_head.next != NULL; } -static int request_pending(struct fuse_conn *fc) +static int request_pending(struct fuse_iqueue *fiq) { - return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) || - forget_pending(fc); + return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) || + forget_pending(fiq); } /* Wait until a request is available on the pending list */ @@ -1061,10 +1067,11 @@ static void request_wait(struct fuse_conn *fc) __releases(fc->lock) __acquires(fc->lock) { + struct fuse_iqueue *fiq = &fc->iq; DECLARE_WAITQUEUE(wait, current); - add_wait_queue_exclusive(&fc->waitq, &wait); - while (fc->connected && !request_pending(fc)) { + add_wait_queue_exclusive(&fiq->waitq, &wait); + while (fc->connected && !request_pending(fiq)) { set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) break; @@ -1074,7 +1081,7 @@ __acquires(fc->lock) spin_lock(&fc->lock); } set_current_state(TASK_RUNNING); - remove_wait_queue(&fc->waitq, &wait); + remove_wait_queue(&fiq->waitq, &wait); } /* @@ -1095,7 +1102,7 @@ __releases(fc->lock) int err; list_del_init(&req->intr_entry); - req->intr_unique = fuse_get_unique(fc); + req->intr_unique = fuse_get_unique(&fc->iq); memset(&ih, 0, sizeof(ih)); memset(&arg, 0, sizeof(arg)); ih.len = reqsize; @@ -1115,21 +1122,21 @@ __releases(fc->lock) return err ? err : reqsize; } -static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc, +static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq, unsigned max, unsigned *countp) { - struct fuse_forget_link *head = fc->forget_list_head.next; + struct fuse_forget_link *head = fiq->forget_list_head.next; struct fuse_forget_link **newhead = &head; unsigned count; for (count = 0; *newhead != NULL && count < max; count++) newhead = &(*newhead)->next; - fc->forget_list_head.next = *newhead; + fiq->forget_list_head.next = *newhead; *newhead = NULL; - if (fc->forget_list_head.next == NULL) - fc->forget_list_tail = &fc->forget_list_head; + if (fiq->forget_list_head.next == NULL) + fiq->forget_list_tail = &fiq->forget_list_head; if (countp != NULL) *countp = count; @@ -1143,14 +1150,15 @@ static int fuse_read_single_forget(struct fuse_conn *fc, __releases(fc->lock) { int err; - struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL); + struct fuse_iqueue *fiq = &fc->iq; + struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL); struct fuse_forget_in arg = { .nlookup = forget->forget_one.nlookup, }; struct fuse_in_header ih = { .opcode = FUSE_FORGET, .nodeid = forget->forget_one.nodeid, - .unique = fuse_get_unique(fc), + .unique = fuse_get_unique(fiq), .len = sizeof(ih) + sizeof(arg), }; @@ -1178,10 +1186,11 @@ __releases(fc->lock) unsigned max_forgets; unsigned count; struct fuse_forget_link *head; + struct fuse_iqueue *fiq = &fc->iq; struct fuse_batch_forget_in arg = { .count = 0 }; struct fuse_in_header ih = { .opcode = FUSE_BATCH_FORGET, - .unique = fuse_get_unique(fc), + .unique = fuse_get_unique(fiq), .len = sizeof(ih) + sizeof(arg), }; @@ -1191,7 +1200,7 @@ __releases(fc->lock) } max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); - head = dequeue_forget(fc, max_forgets, &count); + head = dequeue_forget(fiq, max_forgets, &count); spin_unlock(&fc->lock); arg.count = count; @@ -1223,7 +1232,9 @@ static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs, size_t nbytes) __releases(fc->lock) { - if (fc->minor < 16 || fc->forget_list_head.next->next == NULL) + struct fuse_iqueue *fiq = &fc->iq; + + if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL) return fuse_read_single_forget(fc, cs, nbytes); else return fuse_read_batch_forget(fc, cs, nbytes); @@ -1242,6 +1253,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, struct fuse_copy_state *cs, size_t nbytes) { int err; + struct fuse_iqueue *fiq = &fc->iq; struct fuse_req *req; struct fuse_in *in; unsigned reqsize; @@ -1250,7 +1262,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, spin_lock(&fc->lock); err = -EAGAIN; if ((file->f_flags & O_NONBLOCK) && fc->connected && - !request_pending(fc)) + !request_pending(fiq)) goto err_unlock; request_wait(fc); @@ -1258,24 +1270,24 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, if (!fc->connected) goto err_unlock; err = -ERESTARTSYS; - if (!request_pending(fc)) + if (!request_pending(fiq)) goto err_unlock; - if (!list_empty(&fc->interrupts)) { - req = list_entry(fc->interrupts.next, struct fuse_req, + if (!list_empty(&fiq->interrupts)) { + req = list_entry(fiq->interrupts.next, struct fuse_req, intr_entry); return fuse_read_interrupt(fc, cs, nbytes, req); } - if (forget_pending(fc)) { - if (list_empty(&fc->pending) || fc->forget_batch-- > 0) + if (forget_pending(fiq)) { + if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0) return fuse_read_forget(fc, cs, nbytes); - if (fc->forget_batch <= -8) - fc->forget_batch = 16; + if (fiq->forget_batch <= -8) + fiq->forget_batch = 16; } - req = list_entry(fc->pending.next, struct fuse_req, list); + req = list_entry(fiq->pending.next, struct fuse_req, list); clear_bit(FR_PENDING, &req->flags); list_move(&req->list, &fc->io); @@ -1314,7 +1326,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, set_bit(FR_SENT, &req->flags); list_move_tail(&req->list, &fc->processing); if (test_bit(FR_INTERRUPTED, &req->flags)) - queue_interrupt(fc, req); + queue_interrupt(fiq, req); spin_unlock(&fc->lock); } return reqsize; @@ -1900,7 +1912,7 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, if (oh.error == -ENOSYS) fc->no_interrupt = 1; else if (oh.error == -EAGAIN) - queue_interrupt(fc, req); + queue_interrupt(&fc->iq, req); spin_unlock(&fc->lock); fuse_copy_finish(cs); @@ -2033,16 +2045,18 @@ out: static unsigned fuse_dev_poll(struct file *file, poll_table *wait) { unsigned mask = POLLOUT | POLLWRNORM; + struct fuse_iqueue *fiq; struct fuse_conn *fc = fuse_get_conn(file); if (!fc) return POLLERR; - poll_wait(file, &fc->waitq, wait); + fiq = &fc->iq; + poll_wait(file, &fiq->waitq, wait); spin_lock(&fc->lock); if (!fc->connected) mask = POLLERR; - else if (request_pending(fc)) + else if (request_pending(fiq)) mask |= POLLIN | POLLRDNORM; spin_unlock(&fc->lock); @@ -2104,6 +2118,8 @@ static void end_polls(struct fuse_conn *fc) */ void fuse_abort_conn(struct fuse_conn *fc) { + struct fuse_iqueue *fiq = &fc->iq; + spin_lock(&fc->lock); if (fc->connected) { struct fuse_req *req, *next; @@ -2123,7 +2139,7 @@ void fuse_abort_conn(struct fuse_conn *fc) } fc->max_background = UINT_MAX; flush_bg_queue(fc); - list_splice_init(&fc->pending, &to_end2); + list_splice_init(&fiq->pending, &to_end2); list_splice_init(&fc->processing, &to_end2); while (!list_empty(&to_end1)) { req = list_first_entry(&to_end1, struct fuse_req, list); @@ -2132,12 +2148,12 @@ void fuse_abort_conn(struct fuse_conn *fc) spin_lock(&fc->lock); } end_requests(fc, &to_end2); - while (forget_pending(fc)) - kfree(dequeue_forget(fc, 1, NULL)); + while (forget_pending(fiq)) + kfree(dequeue_forget(fiq, 1, NULL)); end_polls(fc); - wake_up_all(&fc->waitq); + wake_up_all(&fiq->waitq); wake_up_all(&fc->blocked_waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } spin_unlock(&fc->lock); } @@ -2148,7 +2164,7 @@ int fuse_dev_release(struct inode *inode, struct file *file) struct fuse_conn *fc = fuse_get_conn(file); if (fc) { WARN_ON(!list_empty(&fc->io)); - WARN_ON(fc->fasync != NULL); + WARN_ON(fc->iq.fasync != NULL); fuse_abort_conn(fc); fuse_conn_put(fc); } @@ -2164,7 +2180,7 @@ static int fuse_dev_fasync(int fd, struct file *file, int on) return -EPERM; /* No locking - fasync_helper does its own locking */ - return fasync_helper(fd, file, on, &fc->fasync); + return fasync_helper(fd, file, on, &fc->iq.fasync); } const struct file_operations fuse_dev_operations = { diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 8eaf3b0de033..3d075417042a 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -374,6 +374,30 @@ struct fuse_req { struct file *stolen_file; }; +struct fuse_iqueue { + /** Readers of the connection are waiting on this */ + wait_queue_head_t waitq; + + /** The next unique request id */ + u64 reqctr; + + /** The list of pending requests */ + struct list_head pending; + + /** Pending interrupts */ + struct list_head interrupts; + + /** Queue of pending forgets */ + struct fuse_forget_link forget_list_head; + struct fuse_forget_link *forget_list_tail; + + /** Batching of FORGET requests (positive indicates FORGET batch) */ + int forget_batch; + + /** O_ASYNC requests */ + struct fasync_struct *fasync; +}; + /** * A Fuse connection. * @@ -405,11 +429,8 @@ struct fuse_conn { /** Maximum write size */ unsigned max_write; - /** Readers of the connection are waiting on this */ - wait_queue_head_t waitq; - - /** The list of pending requests */ - struct list_head pending; + /** Input queue */ + struct fuse_iqueue iq; /** The list of requests being processed */ struct list_head processing; @@ -438,16 +459,6 @@ struct fuse_conn { /** The list of background requests set aside for later queuing */ struct list_head bg_queue; - /** Pending interrupts */ - struct list_head interrupts; - - /** Queue of pending forgets */ - struct fuse_forget_link forget_list_head; - struct fuse_forget_link *forget_list_tail; - - /** Batching of FORGET requests (positive indicates FORGET batch) */ - int forget_batch; - /** Flag indicating that INIT reply has been received. Allocating * any fuse request will be suspended until the flag is set */ int initialized; @@ -463,9 +474,6 @@ struct fuse_conn { /** waitq for reserved requests */ wait_queue_head_t reserved_req_waitq; - /** The next unique request id */ - u64 reqctr; - /** Connection established, cleared on umount, connection abort and device release */ unsigned connected; @@ -588,9 +596,6 @@ struct fuse_conn { /** number of dentries used in the above array */ int ctl_ndents; - /** O_ASYNC requests */ - struct fasync_struct *fasync; - /** Key for lock owner ID scrambling */ u32 scramble_key[4]; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2f902b8edcf2..0890428bbad0 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -567,22 +567,28 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) return 0; } +static void fuse_iqueue_init(struct fuse_iqueue *fiq) +{ + memset(fiq, 0, sizeof(struct fuse_iqueue)); + init_waitqueue_head(&fiq->waitq); + INIT_LIST_HEAD(&fiq->pending); + INIT_LIST_HEAD(&fiq->interrupts); + fiq->forget_list_tail = &fiq->forget_list_head; +} + void fuse_conn_init(struct fuse_conn *fc) { memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); init_rwsem(&fc->killsb); atomic_set(&fc->count, 1); - init_waitqueue_head(&fc->waitq); init_waitqueue_head(&fc->blocked_waitq); init_waitqueue_head(&fc->reserved_req_waitq); - INIT_LIST_HEAD(&fc->pending); + fuse_iqueue_init(&fc->iq); INIT_LIST_HEAD(&fc->processing); INIT_LIST_HEAD(&fc->io); - INIT_LIST_HEAD(&fc->interrupts); INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); - fc->forget_list_tail = &fc->forget_list_head; atomic_set(&fc->num_waiting, 0); fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; From e16714d8756dc1237a66994e139b61feebcf707a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:01 +0200 Subject: [PATCH 437/839] fuse: duplicate ->connected in iqueue This will allow checking ->connected just with the input queue lock. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/cuse.c | 1 - fs/fuse/dev.c | 19 ++++++++++--------- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 3 ++- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index e5bbf748b698..0993d9a266d3 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -502,7 +502,6 @@ static int cuse_channel_open(struct inode *inode, struct file *file) INIT_LIST_HEAD(&cc->list); cc->fc.release = cuse_fc_release; - cc->fc.connected = 1; cc->fc.initialized = 1; rc = cuse_send_init(cc); if (rc) { diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 24407e21fb82..a24ead993650 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -341,7 +341,7 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, forget->forget_one.nlookup = nlookup; spin_lock(&fc->lock); - if (fc->connected) { + if (fiq->connected) { fiq->forget_list_tail->next = forget; fiq->forget_list_tail = forget; wake_up(&fiq->waitq); @@ -471,14 +471,14 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) { + struct fuse_iqueue *fiq = &fc->iq; + BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); spin_lock(&fc->lock); - if (!fc->connected) { + if (!fiq->connected) { spin_unlock(&fc->lock); req->out.h.error = -ENOTCONN; } else { - struct fuse_iqueue *fiq = &fc->iq; - req->in.h.unique = fuse_get_unique(fiq); queue_request(fiq, req); /* acquire extra reference, since request is still needed @@ -619,7 +619,7 @@ static int fuse_request_send_notify_reply(struct fuse_conn *fc, __clear_bit(FR_ISREPLY, &req->flags); req->in.h.unique = unique; spin_lock(&fc->lock); - if (fc->connected) { + if (fiq->connected) { queue_request(fiq, req); err = 0; } @@ -1071,7 +1071,7 @@ __acquires(fc->lock) DECLARE_WAITQUEUE(wait, current); add_wait_queue_exclusive(&fiq->waitq, &wait); - while (fc->connected && !request_pending(fiq)) { + while (fiq->connected && !request_pending(fiq)) { set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) break; @@ -1261,13 +1261,13 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, restart: spin_lock(&fc->lock); err = -EAGAIN; - if ((file->f_flags & O_NONBLOCK) && fc->connected && + if ((file->f_flags & O_NONBLOCK) && fiq->connected && !request_pending(fiq)) goto err_unlock; request_wait(fc); err = -ENODEV; - if (!fc->connected) + if (!fiq->connected) goto err_unlock; err = -ERESTARTSYS; if (!request_pending(fiq)) @@ -2054,7 +2054,7 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) poll_wait(file, &fiq->waitq, wait); spin_lock(&fc->lock); - if (!fc->connected) + if (!fiq->connected) mask = POLLERR; else if (request_pending(fiq)) mask |= POLLIN | POLLRDNORM; @@ -2127,6 +2127,7 @@ void fuse_abort_conn(struct fuse_conn *fc) LIST_HEAD(to_end2); fc->connected = 0; + fiq->connected = 0; fc->blocked = 0; fuse_set_initialized(fc); list_for_each_entry_safe(req, next, &fc->io, list) { diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 3d075417042a..e8be4611fb8e 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -375,6 +375,9 @@ struct fuse_req { }; struct fuse_iqueue { + /** Connection established */ + unsigned connected; + /** Readers of the connection are waiting on this */ wait_queue_head_t waitq; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 0890428bbad0..caf77d5a6e8d 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -574,6 +574,7 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq) INIT_LIST_HEAD(&fiq->pending); INIT_LIST_HEAD(&fiq->interrupts); fiq->forget_list_tail = &fiq->forget_list_head; + fiq->connected = 1; } void fuse_conn_init(struct fuse_conn *fc) @@ -596,6 +597,7 @@ void fuse_conn_init(struct fuse_conn *fc) fc->polled_files = RB_ROOT; fc->blocked = 0; fc->initialized = 0; + fc->connected = 1; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); } @@ -1084,7 +1086,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) list_add_tail(&fc->entry, &fuse_conn_list); sb->s_root = root_dentry; - fc->connected = 1; file->private_data = fuse_conn_get(fc); mutex_unlock(&fuse_mutex); /* From 8c91189a2a8f5e69457bea9f48350c48310cec5b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:02 +0200 Subject: [PATCH 438/839] fuse: abort: group iqueue accesses Rearrange fuse_abort_conn() so that input queue accesses are grouped together. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index a24ead993650..deafbdf278c6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2127,7 +2127,6 @@ void fuse_abort_conn(struct fuse_conn *fc) LIST_HEAD(to_end2); fc->connected = 0; - fiq->connected = 0; fc->blocked = 0; fuse_set_initialized(fc); list_for_each_entry_safe(req, next, &fc->io, list) { @@ -2140,7 +2139,14 @@ void fuse_abort_conn(struct fuse_conn *fc) } fc->max_background = UINT_MAX; flush_bg_queue(fc); + + fiq->connected = 0; list_splice_init(&fiq->pending, &to_end2); + while (forget_pending(fiq)) + kfree(dequeue_forget(fiq, 1, NULL)); + wake_up_all(&fiq->waitq); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); + list_splice_init(&fc->processing, &to_end2); while (!list_empty(&to_end1)) { req = list_first_entry(&to_end1, struct fuse_req, list); @@ -2149,12 +2155,8 @@ void fuse_abort_conn(struct fuse_conn *fc) spin_lock(&fc->lock); } end_requests(fc, &to_end2); - while (forget_pending(fiq)) - kfree(dequeue_forget(fiq, 1, NULL)); end_polls(fc); - wake_up_all(&fiq->waitq); wake_up_all(&fc->blocked_waitq); - kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } spin_unlock(&fc->lock); } From ef759258869c63e8df9b886ebaf9451c4bbe6cea Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:02 +0200 Subject: [PATCH 439/839] fuse: dev read: split list_move Different lists will need different locks. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index deafbdf278c6..a450940df45f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1289,7 +1289,8 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, req = list_entry(fiq->pending.next, struct fuse_req, list); clear_bit(FR_PENDING, &req->flags); - list_move(&req->list, &fc->io); + list_del_init(&req->list); + list_add(&req->list, &fc->io); in = &req->in; reqsize = in->h.len; From 4ce6081260ea4c1b5bfa8ecca5cbb93eea279ad4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:02 +0200 Subject: [PATCH 440/839] fuse: iqueue locking Use fiq->waitq.lock for protecting members of struct fuse_iqueue and FR_PENDING request flag, previously protected by fc->lock. Following patches will remove fc->lock protection from these members. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 51 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index a450940df45f..65ad9b1e055d 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -328,7 +328,7 @@ static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req) req->in.h.len = sizeof(struct fuse_in_header) + len_args(req->in.numargs, (struct fuse_arg *) req->in.args); list_add_tail(&req->list, &fiq->pending); - wake_up(&fiq->waitq); + wake_up_locked(&fiq->waitq); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } @@ -341,14 +341,16 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, forget->forget_one.nlookup = nlookup; spin_lock(&fc->lock); + spin_lock(&fiq->waitq.lock); if (fiq->connected) { fiq->forget_list_tail->next = forget; fiq->forget_list_tail = forget; - wake_up(&fiq->waitq); + wake_up_locked(&fiq->waitq); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } else { kfree(forget); } + spin_unlock(&fiq->waitq.lock); spin_unlock(&fc->lock); } @@ -362,8 +364,10 @@ static void flush_bg_queue(struct fuse_conn *fc) req = list_entry(fc->bg_queue.next, struct fuse_req, list); list_del(&req->list); fc->active_background++; + spin_lock(&fiq->waitq.lock); req->in.h.unique = fuse_get_unique(fiq); queue_request(fiq, req); + spin_unlock(&fiq->waitq.lock); } } @@ -380,10 +384,13 @@ static void flush_bg_queue(struct fuse_conn *fc) static void request_end(struct fuse_conn *fc, struct fuse_req *req) __releases(fc->lock) { + struct fuse_iqueue *fiq = &fc->iq; void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; req->end = NULL; list_del_init(&req->list); + spin_lock(&fiq->waitq.lock); list_del_init(&req->intr_entry); + spin_unlock(&fiq->waitq.lock); WARN_ON(test_bit(FR_PENDING, &req->flags)); WARN_ON(test_bit(FR_SENT, &req->flags)); smp_wmb(); @@ -415,13 +422,16 @@ __releases(fc->lock) static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) { + spin_lock(&fiq->waitq.lock); list_add_tail(&req->intr_entry, &fiq->interrupts); - wake_up(&fiq->waitq); + wake_up_locked(&fiq->waitq); + spin_unlock(&fiq->waitq.lock); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) { + struct fuse_iqueue *fiq = &fc->iq; int err; if (!fc->no_interrupt) { @@ -434,7 +444,7 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) spin_lock(&fc->lock); set_bit(FR_INTERRUPTED, &req->flags); if (test_bit(FR_SENT, &req->flags)) - queue_interrupt(&fc->iq, req); + queue_interrupt(fiq, req); spin_unlock(&fc->lock); } @@ -451,14 +461,17 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) return; spin_lock(&fc->lock); + spin_lock(&fiq->waitq.lock); /* Request is not yet in userspace, bail out */ if (test_bit(FR_PENDING, &req->flags)) { list_del(&req->list); + spin_unlock(&fiq->waitq.lock); spin_unlock(&fc->lock); __fuse_put_request(req); req->out.h.error = -EINTR; return; } + spin_unlock(&fiq->waitq.lock); spin_unlock(&fc->lock); } @@ -475,8 +488,10 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); spin_lock(&fc->lock); + spin_lock(&fiq->waitq.lock); if (!fiq->connected) { spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); req->out.h.error = -ENOTCONN; } else { req->in.h.unique = fuse_get_unique(fiq); @@ -484,6 +499,7 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) /* acquire extra reference, since request is still needed after request_end() */ __fuse_get_request(req); + spin_unlock(&fiq->waitq.lock); spin_unlock(&fc->lock); request_wait_answer(fc, req); @@ -619,10 +635,12 @@ static int fuse_request_send_notify_reply(struct fuse_conn *fc, __clear_bit(FR_ISREPLY, &req->flags); req->in.h.unique = unique; spin_lock(&fc->lock); + spin_lock(&fiq->waitq.lock); if (fiq->connected) { queue_request(fiq, req); err = 0; } + spin_unlock(&fiq->waitq.lock); spin_unlock(&fc->lock); return err; @@ -1064,8 +1082,10 @@ static int request_pending(struct fuse_iqueue *fiq) /* Wait until a request is available on the pending list */ static void request_wait(struct fuse_conn *fc) +__releases(fc->iq.waitq.lock) __releases(fc->lock) __acquires(fc->lock) +__acquires(fc->iq.waitq.lock) { struct fuse_iqueue *fiq = &fc->iq; DECLARE_WAITQUEUE(wait, current); @@ -1076,9 +1096,11 @@ __acquires(fc->lock) if (signal_pending(current)) break; + spin_unlock(&fiq->waitq.lock); spin_unlock(&fc->lock); schedule(); spin_lock(&fc->lock); + spin_lock(&fiq->waitq.lock); } set_current_state(TASK_RUNNING); remove_wait_queue(&fiq->waitq, &wait); @@ -1094,15 +1116,17 @@ __acquires(fc->lock) */ static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs, size_t nbytes, struct fuse_req *req) +__releases(fc->iq.waitq.lock) __releases(fc->lock) { + struct fuse_iqueue *fiq = &fc->iq; struct fuse_in_header ih; struct fuse_interrupt_in arg; unsigned reqsize = sizeof(ih) + sizeof(arg); int err; list_del_init(&req->intr_entry); - req->intr_unique = fuse_get_unique(&fc->iq); + req->intr_unique = fuse_get_unique(fiq); memset(&ih, 0, sizeof(ih)); memset(&arg, 0, sizeof(arg)); ih.len = reqsize; @@ -1110,6 +1134,7 @@ __releases(fc->lock) ih.unique = req->intr_unique; arg.unique = req->in.h.unique; + spin_unlock(&fiq->waitq.lock); spin_unlock(&fc->lock); if (nbytes < reqsize) return -EINVAL; @@ -1147,6 +1172,7 @@ static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq, static int fuse_read_single_forget(struct fuse_conn *fc, struct fuse_copy_state *cs, size_t nbytes) +__releases(fc->iq.waitq.lock) __releases(fc->lock) { int err; @@ -1162,6 +1188,7 @@ __releases(fc->lock) .len = sizeof(ih) + sizeof(arg), }; + spin_unlock(&fiq->waitq.lock); spin_unlock(&fc->lock); kfree(forget); if (nbytes < ih.len) @@ -1180,6 +1207,7 @@ __releases(fc->lock) static int fuse_read_batch_forget(struct fuse_conn *fc, struct fuse_copy_state *cs, size_t nbytes) +__releases(fc->iq.waitq.lock) __releases(fc->lock) { int err; @@ -1195,12 +1223,14 @@ __releases(fc->lock) }; if (nbytes < ih.len) { + spin_unlock(&fiq->waitq.lock); spin_unlock(&fc->lock); return -EINVAL; } max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); head = dequeue_forget(fiq, max_forgets, &count); + spin_unlock(&fiq->waitq.lock); spin_unlock(&fc->lock); arg.count = count; @@ -1230,6 +1260,7 @@ __releases(fc->lock) static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs, size_t nbytes) +__releases(fc->iq.waitq.lock) __releases(fc->lock) { struct fuse_iqueue *fiq = &fc->iq; @@ -1260,6 +1291,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, restart: spin_lock(&fc->lock); + spin_lock(&fiq->waitq.lock); err = -EAGAIN; if ((file->f_flags & O_NONBLOCK) && fiq->connected && !request_pending(fiq)) @@ -1290,6 +1322,8 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, req = list_entry(fiq->pending.next, struct fuse_req, list); clear_bit(FR_PENDING, &req->flags); list_del_init(&req->list); + spin_unlock(&fiq->waitq.lock); + list_add(&req->list, &fc->io); in = &req->in; @@ -1333,6 +1367,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, return reqsize; err_unlock: + spin_unlock(&fiq->waitq.lock); spin_unlock(&fc->lock); return err; } @@ -2055,10 +2090,12 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) poll_wait(file, &fiq->waitq, wait); spin_lock(&fc->lock); + spin_lock(&fiq->waitq.lock); if (!fiq->connected) mask = POLLERR; else if (request_pending(fiq)) mask |= POLLIN | POLLRDNORM; + spin_unlock(&fiq->waitq.lock); spin_unlock(&fc->lock); return mask; @@ -2141,11 +2178,13 @@ void fuse_abort_conn(struct fuse_conn *fc) fc->max_background = UINT_MAX; flush_bg_queue(fc); + spin_lock(&fiq->waitq.lock); fiq->connected = 0; list_splice_init(&fiq->pending, &to_end2); while (forget_pending(fiq)) kfree(dequeue_forget(fiq, 1, NULL)); - wake_up_all(&fiq->waitq); + wake_up_all_locked(&fiq->waitq); + spin_unlock(&fiq->waitq.lock); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); list_splice_init(&fc->processing, &to_end2); From 8f7bb368dbdda76f5e98e05ee49ae2dc138fd42f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:03 +0200 Subject: [PATCH 441/839] fuse: allow interrupt queuing without fc->lock Interrupt is only queued after the request has been sent to userspace. This is either done in request_wait_answer() or fuse_dev_do_read() depending on which state the request is in at the time of the interrupt. If it's not yet sent, then queuing the interrupt is postponed until the request is read. Otherwise (the request has already been read and is waiting for an answer) the interrupt is queued immedidately. We want to call queue_interrupt() without fc->lock protection, in which case there can be a race between the two functions: - neither of them queue the interrupt (thinking the other one has already done it). - both of them queue the interrupt The first one is prevented by adding memory barriers, the second is prevented by checking (under fiq->waitq.lock) if the interrupt has already been queued. Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 65ad9b1e055d..c7f1a633239f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -423,8 +423,10 @@ __releases(fc->lock) static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) { spin_lock(&fiq->waitq.lock); - list_add_tail(&req->intr_entry, &fiq->interrupts); - wake_up_locked(&fiq->waitq); + if (list_empty(&req->intr_entry)) { + list_add_tail(&req->intr_entry, &fiq->interrupts); + wake_up_locked(&fiq->waitq); + } spin_unlock(&fiq->waitq.lock); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } @@ -443,6 +445,8 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) spin_lock(&fc->lock); set_bit(FR_INTERRUPTED, &req->flags); + /* matches barrier in fuse_dev_do_read() */ + smp_mb__after_atomic(); if (test_bit(FR_SENT, &req->flags)) queue_interrupt(fiq, req); spin_unlock(&fc->lock); @@ -1358,8 +1362,10 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, if (!test_bit(FR_ISREPLY, &req->flags)) { request_end(fc, req); } else { - set_bit(FR_SENT, &req->flags); list_move_tail(&req->list, &fc->processing); + set_bit(FR_SENT, &req->flags); + /* matches barrier in request_wait_answer() */ + smp_mb__after_atomic(); if (test_bit(FR_INTERRUPTED, &req->flags)) queue_interrupt(fiq, req); spin_unlock(&fc->lock); From fd22d62ed0c36e260ac3e13167bc073f28407c70 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:03 +0200 Subject: [PATCH 442/839] fuse: no fc->lock for iqueue parts Remove fc->lock protection from input queue members, now protected by fiq->waitq.lock. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 71 +++++++++++++++------------------------------------ 1 file changed, 20 insertions(+), 51 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c7f1a633239f..35453f229ef3 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -340,7 +340,6 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, forget->forget_one.nodeid = nodeid; forget->forget_one.nlookup = nlookup; - spin_lock(&fc->lock); spin_lock(&fiq->waitq.lock); if (fiq->connected) { fiq->forget_list_tail->next = forget; @@ -351,7 +350,6 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, kfree(forget); } spin_unlock(&fiq->waitq.lock); - spin_unlock(&fc->lock); } static void flush_bg_queue(struct fuse_conn *fc) @@ -443,13 +441,11 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) if (!err) return; - spin_lock(&fc->lock); set_bit(FR_INTERRUPTED, &req->flags); /* matches barrier in fuse_dev_do_read() */ smp_mb__after_atomic(); if (test_bit(FR_SENT, &req->flags)) queue_interrupt(fiq, req); - spin_unlock(&fc->lock); } if (!test_bit(FR_FORCE, &req->flags)) { @@ -464,19 +460,16 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) if (!err) return; - spin_lock(&fc->lock); spin_lock(&fiq->waitq.lock); /* Request is not yet in userspace, bail out */ if (test_bit(FR_PENDING, &req->flags)) { list_del(&req->list); spin_unlock(&fiq->waitq.lock); - spin_unlock(&fc->lock); __fuse_put_request(req); req->out.h.error = -EINTR; return; } spin_unlock(&fiq->waitq.lock); - spin_unlock(&fc->lock); } /* @@ -491,10 +484,8 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) struct fuse_iqueue *fiq = &fc->iq; BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); - spin_lock(&fc->lock); spin_lock(&fiq->waitq.lock); if (!fiq->connected) { - spin_unlock(&fc->lock); spin_unlock(&fiq->waitq.lock); req->out.h.error = -ENOTCONN; } else { @@ -504,7 +495,6 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) after request_end() */ __fuse_get_request(req); spin_unlock(&fiq->waitq.lock); - spin_unlock(&fc->lock); request_wait_answer(fc, req); /* Pairs with smp_wmb() in request_end() */ @@ -638,14 +628,12 @@ static int fuse_request_send_notify_reply(struct fuse_conn *fc, __clear_bit(FR_ISREPLY, &req->flags); req->in.h.unique = unique; - spin_lock(&fc->lock); spin_lock(&fiq->waitq.lock); if (fiq->connected) { queue_request(fiq, req); err = 0; } spin_unlock(&fiq->waitq.lock); - spin_unlock(&fc->lock); return err; } @@ -1085,13 +1073,10 @@ static int request_pending(struct fuse_iqueue *fiq) } /* Wait until a request is available on the pending list */ -static void request_wait(struct fuse_conn *fc) -__releases(fc->iq.waitq.lock) -__releases(fc->lock) -__acquires(fc->lock) -__acquires(fc->iq.waitq.lock) +static void request_wait(struct fuse_iqueue *fiq) +__releases(fiq->waitq.lock) +__acquires(fiq->waitq.lock) { - struct fuse_iqueue *fiq = &fc->iq; DECLARE_WAITQUEUE(wait, current); add_wait_queue_exclusive(&fiq->waitq, &wait); @@ -1101,9 +1086,7 @@ __acquires(fc->iq.waitq.lock) break; spin_unlock(&fiq->waitq.lock); - spin_unlock(&fc->lock); schedule(); - spin_lock(&fc->lock); spin_lock(&fiq->waitq.lock); } set_current_state(TASK_RUNNING); @@ -1116,14 +1099,13 @@ __acquires(fc->iq.waitq.lock) * Unlike other requests this is assembled on demand, without a need * to allocate a separate fuse_req structure. * - * Called with fc->lock held, releases it + * Called with fiq->waitq.lock held, releases it */ -static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs, +static int fuse_read_interrupt(struct fuse_iqueue *fiq, + struct fuse_copy_state *cs, size_t nbytes, struct fuse_req *req) -__releases(fc->iq.waitq.lock) -__releases(fc->lock) +__releases(fiq->waitq.lock) { - struct fuse_iqueue *fiq = &fc->iq; struct fuse_in_header ih; struct fuse_interrupt_in arg; unsigned reqsize = sizeof(ih) + sizeof(arg); @@ -1139,7 +1121,6 @@ __releases(fc->lock) arg.unique = req->in.h.unique; spin_unlock(&fiq->waitq.lock); - spin_unlock(&fc->lock); if (nbytes < reqsize) return -EINVAL; @@ -1173,14 +1154,12 @@ static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq, return head; } -static int fuse_read_single_forget(struct fuse_conn *fc, +static int fuse_read_single_forget(struct fuse_iqueue *fiq, struct fuse_copy_state *cs, size_t nbytes) -__releases(fc->iq.waitq.lock) -__releases(fc->lock) +__releases(fiq->waitq.lock) { int err; - struct fuse_iqueue *fiq = &fc->iq; struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL); struct fuse_forget_in arg = { .nlookup = forget->forget_one.nlookup, @@ -1193,7 +1172,6 @@ __releases(fc->lock) }; spin_unlock(&fiq->waitq.lock); - spin_unlock(&fc->lock); kfree(forget); if (nbytes < ih.len) return -EINVAL; @@ -1209,16 +1187,14 @@ __releases(fc->lock) return ih.len; } -static int fuse_read_batch_forget(struct fuse_conn *fc, +static int fuse_read_batch_forget(struct fuse_iqueue *fiq, struct fuse_copy_state *cs, size_t nbytes) -__releases(fc->iq.waitq.lock) -__releases(fc->lock) +__releases(fiq->waitq.lock) { int err; unsigned max_forgets; unsigned count; struct fuse_forget_link *head; - struct fuse_iqueue *fiq = &fc->iq; struct fuse_batch_forget_in arg = { .count = 0 }; struct fuse_in_header ih = { .opcode = FUSE_BATCH_FORGET, @@ -1228,14 +1204,12 @@ __releases(fc->lock) if (nbytes < ih.len) { spin_unlock(&fiq->waitq.lock); - spin_unlock(&fc->lock); return -EINVAL; } max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); head = dequeue_forget(fiq, max_forgets, &count); spin_unlock(&fiq->waitq.lock); - spin_unlock(&fc->lock); arg.count = count; ih.len += count * sizeof(struct fuse_forget_one); @@ -1262,17 +1236,15 @@ __releases(fc->lock) return ih.len; } -static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs, +static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq, + struct fuse_copy_state *cs, size_t nbytes) -__releases(fc->iq.waitq.lock) -__releases(fc->lock) +__releases(fiq->waitq.lock) { - struct fuse_iqueue *fiq = &fc->iq; - if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL) - return fuse_read_single_forget(fc, cs, nbytes); + return fuse_read_single_forget(fiq, cs, nbytes); else - return fuse_read_batch_forget(fc, cs, nbytes); + return fuse_read_batch_forget(fiq, cs, nbytes); } /* @@ -1294,14 +1266,13 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, unsigned reqsize; restart: - spin_lock(&fc->lock); spin_lock(&fiq->waitq.lock); err = -EAGAIN; if ((file->f_flags & O_NONBLOCK) && fiq->connected && !request_pending(fiq)) goto err_unlock; - request_wait(fc); + request_wait(fiq); err = -ENODEV; if (!fiq->connected) goto err_unlock; @@ -1312,12 +1283,12 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, if (!list_empty(&fiq->interrupts)) { req = list_entry(fiq->interrupts.next, struct fuse_req, intr_entry); - return fuse_read_interrupt(fc, cs, nbytes, req); + return fuse_read_interrupt(fiq, cs, nbytes, req); } if (forget_pending(fiq)) { if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0) - return fuse_read_forget(fc, cs, nbytes); + return fuse_read_forget(fc, fiq, cs, nbytes); if (fiq->forget_batch <= -8) fiq->forget_batch = 16; @@ -1328,6 +1299,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, list_del_init(&req->list); spin_unlock(&fiq->waitq.lock); + spin_lock(&fc->lock); list_add(&req->list, &fc->io); in = &req->in; @@ -1374,7 +1346,6 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, err_unlock: spin_unlock(&fiq->waitq.lock); - spin_unlock(&fc->lock); return err; } @@ -2095,14 +2066,12 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) fiq = &fc->iq; poll_wait(file, &fiq->waitq, wait); - spin_lock(&fc->lock); spin_lock(&fiq->waitq.lock); if (!fiq->connected) mask = POLLERR; else if (request_pending(fiq)) mask |= POLLIN | POLLRDNORM; spin_unlock(&fiq->waitq.lock); - spin_unlock(&fc->lock); return mask; } From 5250921bb0b25e7fc77ba732e569224410743a05 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:03 +0200 Subject: [PATCH 443/839] fuse: simplify request_wait() wait_event_interruptible_exclusive_locked() will do everything request_wait() does, so replace it. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 35453f229ef3..b6f901a0dbdd 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1072,27 +1072,6 @@ static int request_pending(struct fuse_iqueue *fiq) forget_pending(fiq); } -/* Wait until a request is available on the pending list */ -static void request_wait(struct fuse_iqueue *fiq) -__releases(fiq->waitq.lock) -__acquires(fiq->waitq.lock) -{ - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue_exclusive(&fiq->waitq, &wait); - while (fiq->connected && !request_pending(fiq)) { - set_current_state(TASK_INTERRUPTIBLE); - if (signal_pending(current)) - break; - - spin_unlock(&fiq->waitq.lock); - schedule(); - spin_lock(&fiq->waitq.lock); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&fiq->waitq, &wait); -} - /* * Transfer an interrupt request to userspace * @@ -1272,13 +1251,14 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, !request_pending(fiq)) goto err_unlock; - request_wait(fiq); + err = wait_event_interruptible_exclusive_locked(fiq->waitq, + !fiq->connected || request_pending(fiq)); + if (err) + goto err_unlock; + err = -ENODEV; if (!fiq->connected) goto err_unlock; - err = -ERESTARTSYS; - if (!request_pending(fiq)) - goto err_unlock; if (!list_empty(&fiq->interrupts)) { req = list_entry(fiq->interrupts.next, struct fuse_req, From 3a2b5b9cd9610f789f5e5f91a010d9fa3ca78632 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:04 +0200 Subject: [PATCH 444/839] fuse: separate out processing queue This is just two fields: fc->io and fc->processing. This patch just rearranges the fields, no functional change. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 21 ++++++++++++--------- fs/fuse/fuse_i.h | 15 ++++++++++----- fs/fuse/inode.c | 10 ++++++++-- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index b6f901a0dbdd..0fae2a7ded77 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1240,6 +1240,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, { int err; struct fuse_iqueue *fiq = &fc->iq; + struct fuse_pqueue *fpq = &fc->pq; struct fuse_req *req; struct fuse_in *in; unsigned reqsize; @@ -1280,7 +1281,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, spin_unlock(&fiq->waitq.lock); spin_lock(&fc->lock); - list_add(&req->list, &fc->io); + list_add(&req->list, &fpq->io); in = &req->in; reqsize = in->h.len; @@ -1314,7 +1315,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, if (!test_bit(FR_ISREPLY, &req->flags)) { request_end(fc, req); } else { - list_move_tail(&req->list, &fc->processing); + list_move_tail(&req->list, &fpq->processing); set_bit(FR_SENT, &req->flags); /* matches barrier in request_wait_answer() */ smp_mb__after_atomic(); @@ -1815,11 +1816,11 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, } /* Look up request on processing list by unique ID */ -static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) +static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique) { struct fuse_req *req; - list_for_each_entry(req, &fc->processing, list) { + list_for_each_entry(req, &fpq->processing, list) { if (req->in.h.unique == unique || req->intr_unique == unique) return req; } @@ -1860,6 +1861,7 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, struct fuse_copy_state *cs, size_t nbytes) { int err; + struct fuse_pqueue *fpq = &fc->pq; struct fuse_req *req; struct fuse_out_header oh; @@ -1892,7 +1894,7 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, if (!fc->connected) goto err_unlock; - req = request_find(fc, oh.unique); + req = request_find(fpq, oh.unique); if (!req) goto err_unlock; @@ -1913,7 +1915,7 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, } clear_bit(FR_SENT, &req->flags); - list_move(&req->list, &fc->io); + list_move(&req->list, &fpq->io); req->out.h = oh; set_bit(FR_LOCKED, &req->flags); cs->req = req; @@ -2112,6 +2114,7 @@ static void end_polls(struct fuse_conn *fc) void fuse_abort_conn(struct fuse_conn *fc) { struct fuse_iqueue *fiq = &fc->iq; + struct fuse_pqueue *fpq = &fc->pq; spin_lock(&fc->lock); if (fc->connected) { @@ -2122,7 +2125,7 @@ void fuse_abort_conn(struct fuse_conn *fc) fc->connected = 0; fc->blocked = 0; fuse_set_initialized(fc); - list_for_each_entry_safe(req, next, &fc->io, list) { + list_for_each_entry_safe(req, next, &fpq->io, list) { req->out.h.error = -ECONNABORTED; spin_lock(&req->waitq.lock); set_bit(FR_ABORTED, &req->flags); @@ -2142,7 +2145,7 @@ void fuse_abort_conn(struct fuse_conn *fc) spin_unlock(&fiq->waitq.lock); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); - list_splice_init(&fc->processing, &to_end2); + list_splice_init(&fpq->processing, &to_end2); while (!list_empty(&to_end1)) { req = list_first_entry(&to_end1, struct fuse_req, list); __fuse_get_request(req); @@ -2161,7 +2164,7 @@ int fuse_dev_release(struct inode *inode, struct file *file) { struct fuse_conn *fc = fuse_get_conn(file); if (fc) { - WARN_ON(!list_empty(&fc->io)); + WARN_ON(!list_empty(&fc->pq.io)); WARN_ON(fc->iq.fasync != NULL); fuse_abort_conn(fc); fuse_conn_put(fc); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e8be4611fb8e..3620eec018a4 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -401,6 +401,14 @@ struct fuse_iqueue { struct fasync_struct *fasync; }; +struct fuse_pqueue { + /** The list of requests being processed */ + struct list_head processing; + + /** The list of requests under I/O */ + struct list_head io; +}; + /** * A Fuse connection. * @@ -435,11 +443,8 @@ struct fuse_conn { /** Input queue */ struct fuse_iqueue iq; - /** The list of requests being processed */ - struct list_head processing; - - /** The list of requests under I/O */ - struct list_head io; + /** Processing queue */ + struct fuse_pqueue pq; /** The next unique kernel file handle */ u64 khctr; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index caf77d5a6e8d..7b146bb68dd5 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -577,6 +577,13 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq) fiq->connected = 1; } +static void fuse_pqueue_init(struct fuse_pqueue *fpq) +{ + memset(fpq, 0, sizeof(struct fuse_pqueue)); + INIT_LIST_HEAD(&fpq->processing); + INIT_LIST_HEAD(&fpq->io); +} + void fuse_conn_init(struct fuse_conn *fc) { memset(fc, 0, sizeof(*fc)); @@ -586,8 +593,7 @@ void fuse_conn_init(struct fuse_conn *fc) init_waitqueue_head(&fc->blocked_waitq); init_waitqueue_head(&fc->reserved_req_waitq); fuse_iqueue_init(&fc->iq); - INIT_LIST_HEAD(&fc->processing); - INIT_LIST_HEAD(&fc->io); + fuse_pqueue_init(&fc->pq); INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); atomic_set(&fc->num_waiting, 0); From e96edd94d0887707fc41c5d21d5b488edcd31689 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:04 +0200 Subject: [PATCH 445/839] fuse: duplicate ->connected in pqueue This will allow checking ->connected just with the processing queue lock. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 7 ++++--- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 1 + 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 0fae2a7ded77..6321d761c3c3 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1303,7 +1303,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, fuse_copy_finish(cs); spin_lock(&fc->lock); clear_bit(FR_LOCKED, &req->flags); - if (!fc->connected) { + if (!fpq->connected) { request_end(fc, req); return -ENODEV; } @@ -1891,7 +1891,7 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, spin_lock(&fc->lock); err = -ENOENT; - if (!fc->connected) + if (!fpq->connected) goto err_unlock; req = request_find(fpq, oh.unique); @@ -1928,7 +1928,7 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, spin_lock(&fc->lock); clear_bit(FR_LOCKED, &req->flags); - if (!fc->connected) + if (!fpq->connected) err = -ENOENT; else if (err) req->out.h.error = -EIO; @@ -2125,6 +2125,7 @@ void fuse_abort_conn(struct fuse_conn *fc) fc->connected = 0; fc->blocked = 0; fuse_set_initialized(fc); + fpq->connected = 0; list_for_each_entry_safe(req, next, &fpq->io, list) { req->out.h.error = -ECONNABORTED; spin_lock(&req->waitq.lock); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 3620eec018a4..5897d89ea2ba 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -402,6 +402,9 @@ struct fuse_iqueue { }; struct fuse_pqueue { + /** Connection established */ + unsigned connected; + /** The list of requests being processed */ struct list_head processing; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 7b146bb68dd5..88b9ca401623 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -582,6 +582,7 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq) memset(fpq, 0, sizeof(struct fuse_pqueue)); INIT_LIST_HEAD(&fpq->processing); INIT_LIST_HEAD(&fpq->io); + fpq->connected = 1; } void fuse_conn_init(struct fuse_conn *fc) From f377cb799e4f667d743886ee025f9987cbb6cd12 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:04 +0200 Subject: [PATCH 446/839] fuse: move list_del_init() from request_end() into callers Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6321d761c3c3..98568bd2d81b 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -385,7 +385,6 @@ __releases(fc->lock) struct fuse_iqueue *fiq = &fc->iq; void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; req->end = NULL; - list_del_init(&req->list); spin_lock(&fiq->waitq.lock); list_del_init(&req->intr_entry); spin_unlock(&fiq->waitq.lock); @@ -1291,6 +1290,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, /* SETXATTR is special, since it may contain too large data */ if (in->h.opcode == FUSE_SETXATTR) req->out.h.error = -E2BIG; + list_del_init(&req->list); request_end(fc, req); goto restart; } @@ -1304,15 +1304,18 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, spin_lock(&fc->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) { + list_del_init(&req->list); request_end(fc, req); return -ENODEV; } if (err) { req->out.h.error = -EIO; + list_del_init(&req->list); request_end(fc, req); return err; } if (!test_bit(FR_ISREPLY, &req->flags)) { + list_del_init(&req->list); request_end(fc, req); } else { list_move_tail(&req->list, &fpq->processing); @@ -1932,6 +1935,7 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, err = -ENOENT; else if (err) req->out.h.error = -EIO; + list_del_init(&req->list); request_end(fc, req); return err ? err : nbytes; @@ -2073,6 +2077,7 @@ __acquires(fc->lock) req->out.h.error = -ECONNABORTED; clear_bit(FR_PENDING, &req->flags); clear_bit(FR_SENT, &req->flags); + list_del_init(&req->list); request_end(fc, req); spin_lock(&fc->lock); } @@ -2150,6 +2155,7 @@ void fuse_abort_conn(struct fuse_conn *fc) while (!list_empty(&to_end1)) { req = list_first_entry(&to_end1, struct fuse_req, list); __fuse_get_request(req); + list_del_init(&req->list); request_end(fc, req); spin_lock(&fc->lock); } From 82cbdcd320852ca68c8d43e8aacce2c07e7c1d4e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:05 +0200 Subject: [PATCH 447/839] fuse: cleanup fuse_dev_do_read() - locked list_add() + list_del_init() cancel out - common handling of case when request is ended here in the read phase Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 98568bd2d81b..1ad75e4ceba5 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1237,7 +1237,7 @@ __releases(fiq->waitq.lock) static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, struct fuse_copy_state *cs, size_t nbytes) { - int err; + ssize_t err; struct fuse_iqueue *fiq = &fc->iq; struct fuse_pqueue *fpq = &fc->pq; struct fuse_req *req; @@ -1280,8 +1280,6 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, spin_unlock(&fiq->waitq.lock); spin_lock(&fc->lock); - list_add(&req->list, &fpq->io); - in = &req->in; reqsize = in->h.len; /* If request is too large, reply with an error and restart the read */ @@ -1290,10 +1288,10 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, /* SETXATTR is special, since it may contain too large data */ if (in->h.opcode == FUSE_SETXATTR) req->out.h.error = -E2BIG; - list_del_init(&req->list); request_end(fc, req); goto restart; } + list_add(&req->list, &fpq->io); spin_unlock(&fc->lock); cs->req = req; err = fuse_copy_one(cs, &in->h, sizeof(in->h)); @@ -1304,30 +1302,32 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, spin_lock(&fc->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) { - list_del_init(&req->list); - request_end(fc, req); - return -ENODEV; + err = -ENODEV; + goto out_end; } if (err) { req->out.h.error = -EIO; - list_del_init(&req->list); - request_end(fc, req); - return err; + goto out_end; } if (!test_bit(FR_ISREPLY, &req->flags)) { - list_del_init(&req->list); - request_end(fc, req); - } else { - list_move_tail(&req->list, &fpq->processing); - set_bit(FR_SENT, &req->flags); - /* matches barrier in request_wait_answer() */ - smp_mb__after_atomic(); - if (test_bit(FR_INTERRUPTED, &req->flags)) - queue_interrupt(fiq, req); - spin_unlock(&fc->lock); + err = reqsize; + goto out_end; } + list_move_tail(&req->list, &fpq->processing); + set_bit(FR_SENT, &req->flags); + /* matches barrier in request_wait_answer() */ + smp_mb__after_atomic(); + if (test_bit(FR_INTERRUPTED, &req->flags)) + queue_interrupt(fiq, req); + spin_unlock(&fc->lock); + return reqsize; +out_end: + list_del_init(&req->list); + request_end(fc, req); + return err; + err_unlock: spin_unlock(&fiq->waitq.lock); return err; From 24b4d33d46e9c4c671a43f2640d80fe1159f488c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:05 +0200 Subject: [PATCH 448/839] fuse: abort: group pqueue accesses Rearrange fuse_abort_conn() so that processing queue accesses are grouped together. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 1ad75e4ceba5..3e8430074070 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2139,6 +2139,7 @@ void fuse_abort_conn(struct fuse_conn *fc) list_move(&req->list, &to_end1); spin_unlock(&req->waitq.lock); } + list_splice_init(&fpq->processing, &to_end2); fc->max_background = UINT_MAX; flush_bg_queue(fc); @@ -2151,7 +2152,6 @@ void fuse_abort_conn(struct fuse_conn *fc) spin_unlock(&fiq->waitq.lock); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); - list_splice_init(&fpq->processing, &to_end2); while (!list_empty(&to_end1)) { req = list_first_entry(&to_end1, struct fuse_req, list); __fuse_get_request(req); From 45a91cb1a4fd9bb0e53c95e3dc9185dd5b5ba245 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:06 +0200 Subject: [PATCH 449/839] fuse: pqueue locking Add a fpq->lock for protecting members of struct fuse_pqueue and FR_LOCKED request flag. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 19 +++++++++++++++++-- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 1 + 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 3e8430074070..32e0e74e8f4d 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1291,7 +1291,9 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, request_end(fc, req); goto restart; } + spin_lock(&fpq->lock); list_add(&req->list, &fpq->io); + spin_unlock(&fpq->lock); spin_unlock(&fc->lock); cs->req = req; err = fuse_copy_one(cs, &in->h, sizeof(in->h)); @@ -1300,6 +1302,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, (struct fuse_arg *) in->args, 0); fuse_copy_finish(cs); spin_lock(&fc->lock); + spin_lock(&fpq->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) { err = -ENODEV; @@ -1314,6 +1317,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, goto out_end; } list_move_tail(&req->list, &fpq->processing); + spin_unlock(&fpq->lock); set_bit(FR_SENT, &req->flags); /* matches barrier in request_wait_answer() */ smp_mb__after_atomic(); @@ -1325,6 +1329,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, out_end: list_del_init(&req->list); + spin_unlock(&fpq->lock); request_end(fc, req); return err; @@ -1893,16 +1898,19 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, goto err_finish; spin_lock(&fc->lock); + spin_lock(&fpq->lock); err = -ENOENT; if (!fpq->connected) - goto err_unlock; + goto err_unlock_pq; req = request_find(fpq, oh.unique); if (!req) - goto err_unlock; + goto err_unlock_pq; /* Is it an interrupt reply? */ if (req->intr_unique == oh.unique) { + spin_unlock(&fpq->lock); + err = -EINVAL; if (nbytes != sizeof(struct fuse_out_header)) goto err_unlock; @@ -1921,6 +1929,7 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, list_move(&req->list, &fpq->io); req->out.h = oh; set_bit(FR_LOCKED, &req->flags); + spin_unlock(&fpq->lock); cs->req = req; if (!req->out.page_replace) cs->move_pages = 0; @@ -1930,16 +1939,20 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, fuse_copy_finish(cs); spin_lock(&fc->lock); + spin_lock(&fpq->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) err = -ENOENT; else if (err) req->out.h.error = -EIO; list_del_init(&req->list); + spin_unlock(&fpq->lock); request_end(fc, req); return err ? err : nbytes; + err_unlock_pq: + spin_unlock(&fpq->lock); err_unlock: spin_unlock(&fc->lock); err_finish: @@ -2130,6 +2143,7 @@ void fuse_abort_conn(struct fuse_conn *fc) fc->connected = 0; fc->blocked = 0; fuse_set_initialized(fc); + spin_lock(&fpq->lock); fpq->connected = 0; list_for_each_entry_safe(req, next, &fpq->io, list) { req->out.h.error = -ECONNABORTED; @@ -2140,6 +2154,7 @@ void fuse_abort_conn(struct fuse_conn *fc) spin_unlock(&req->waitq.lock); } list_splice_init(&fpq->processing, &to_end2); + spin_unlock(&fpq->lock); fc->max_background = UINT_MAX; flush_bg_queue(fc); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 5897d89ea2ba..ad3799e57efd 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -405,6 +405,9 @@ struct fuse_pqueue { /** Connection established */ unsigned connected; + /** Lock protecting accessess to members of this structure */ + spinlock_t lock; + /** The list of requests being processed */ struct list_head processing; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 88b9ca401623..8373f59dc2a8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -580,6 +580,7 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq) static void fuse_pqueue_init(struct fuse_pqueue *fpq) { memset(fpq, 0, sizeof(struct fuse_pqueue)); + spin_lock_init(&fpq->lock); INIT_LIST_HEAD(&fpq->processing); INIT_LIST_HEAD(&fpq->io); fpq->connected = 1; From 77cd9d488b32d19be852ad4d310ef13701557d61 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:06 +0200 Subject: [PATCH 450/839] fuse: add req flag for private list When an unlocked request is aborted, it is moved from fpq->io to a private list. Then, after unlocking fpq->lock, the private list is processed and the requests are finished off. To protect the private list, we need to mark the request with a flag, so if in the meantime the request is unlocked the list is not corrupted. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 10 +++++++--- fs/fuse/fuse_i.h | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 32e0e74e8f4d..7f37e55edc0e 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1328,7 +1328,8 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, return reqsize; out_end: - list_del_init(&req->list); + if (!test_bit(FR_PRIVATE, &req->flags)) + list_del_init(&req->list); spin_unlock(&fpq->lock); request_end(fc, req); return err; @@ -1945,7 +1946,8 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, err = -ENOENT; else if (err) req->out.h.error = -EIO; - list_del_init(&req->list); + if (!test_bit(FR_PRIVATE, &req->flags)) + list_del_init(&req->list); spin_unlock(&fpq->lock); request_end(fc, req); @@ -2149,8 +2151,10 @@ void fuse_abort_conn(struct fuse_conn *fc) req->out.h.error = -ECONNABORTED; spin_lock(&req->waitq.lock); set_bit(FR_ABORTED, &req->flags); - if (!test_bit(FR_LOCKED, &req->flags)) + if (!test_bit(FR_LOCKED, &req->flags)) { + set_bit(FR_PRIVATE, &req->flags); list_move(&req->list, &to_end1); + } spin_unlock(&req->waitq.lock); } list_splice_init(&fpq->processing, &to_end2); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index ad3799e57efd..a9507fd97d5e 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -269,6 +269,7 @@ struct fuse_io_priv { * FR_PENDING: request is not yet in userspace * FR_SENT: request is in userspace, waiting for an answer * FR_FINISHED: request is finished + * FR_PRIVATE: request is on private list */ enum fuse_req_flag { FR_ISREPLY, @@ -281,6 +282,7 @@ enum fuse_req_flag { FR_PENDING, FR_SENT, FR_FINISHED, + FR_PRIVATE, }; /** From 365ae710df91edc97d24817e3271e01bffaaee32 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:06 +0200 Subject: [PATCH 451/839] fuse: request_end(): do once When the connection is aborted it is possible that request_end() will be called twice. Use atomic test and set to do the actual ending only once. test_and_set_bit() also provides the necessary barrier semantics so no explicit smp_wmb() is necessary. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 7f37e55edc0e..cd242fc6a92b 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -384,14 +384,18 @@ __releases(fc->lock) { struct fuse_iqueue *fiq = &fc->iq; void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; + + if (test_and_set_bit(FR_FINISHED, &req->flags)) { + spin_unlock(&fc->lock); + return; + } + req->end = NULL; spin_lock(&fiq->waitq.lock); list_del_init(&req->intr_entry); spin_unlock(&fiq->waitq.lock); WARN_ON(test_bit(FR_PENDING, &req->flags)); WARN_ON(test_bit(FR_SENT, &req->flags)); - smp_wmb(); - set_bit(FR_FINISHED, &req->flags); if (test_bit(FR_BACKGROUND, &req->flags)) { clear_bit(FR_BACKGROUND, &req->flags); if (fc->num_background == fc->max_background) From 1e6881c36ebbfd47298c42fa82b544c4988933fa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:07 +0200 Subject: [PATCH 452/839] fuse: cleanup request_end() Now that we atomically test having already done everything we no longer need other protection. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index cd242fc6a92b..4e1144a38438 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -383,14 +383,12 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) __releases(fc->lock) { struct fuse_iqueue *fiq = &fc->iq; - void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; if (test_and_set_bit(FR_FINISHED, &req->flags)) { spin_unlock(&fc->lock); return; } - req->end = NULL; spin_lock(&fiq->waitq.lock); list_del_init(&req->intr_entry); spin_unlock(&fiq->waitq.lock); @@ -416,8 +414,8 @@ __releases(fc->lock) } spin_unlock(&fc->lock); wake_up(&req->waitq); - if (end) - end(fc, req); + if (req->end) + req->end(fc, req); fuse_put_request(fc, req); } From efe2800facb4505696e602ec4ed1fc947bf114dd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:07 +0200 Subject: [PATCH 453/839] fuse: no fc->lock in request_end() No longer need to call request_end() with the connection lock held. We still protect the background counters and queue with fc->lock, so acquire it if necessary. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 4e1144a38438..f2c7dd90ebfc 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -376,18 +376,13 @@ static void flush_bg_queue(struct fuse_conn *fc) * was closed. The requester thread is woken up (if still waiting), * the 'end' callback is called if given, else the reference to the * request is released - * - * Called with fc->lock, unlocks it */ static void request_end(struct fuse_conn *fc, struct fuse_req *req) -__releases(fc->lock) { struct fuse_iqueue *fiq = &fc->iq; - if (test_and_set_bit(FR_FINISHED, &req->flags)) { - spin_unlock(&fc->lock); + if (test_and_set_bit(FR_FINISHED, &req->flags)) return; - } spin_lock(&fiq->waitq.lock); list_del_init(&req->intr_entry); @@ -395,6 +390,7 @@ __releases(fc->lock) WARN_ON(test_bit(FR_PENDING, &req->flags)); WARN_ON(test_bit(FR_SENT, &req->flags)); if (test_bit(FR_BACKGROUND, &req->flags)) { + spin_lock(&fc->lock); clear_bit(FR_BACKGROUND, &req->flags); if (fc->num_background == fc->max_background) fc->blocked = 0; @@ -411,8 +407,8 @@ __releases(fc->lock) fc->num_background--; fc->active_background--; flush_bg_queue(fc); + spin_unlock(&fc->lock); } - spin_unlock(&fc->lock); wake_up(&req->waitq); if (req->end) req->end(fc, req); @@ -1290,6 +1286,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, /* SETXATTR is special, since it may contain too large data */ if (in->h.opcode == FUSE_SETXATTR) req->out.h.error = -E2BIG; + spin_unlock(&fc->lock); request_end(fc, req); goto restart; } @@ -1333,6 +1330,7 @@ out_end: if (!test_bit(FR_PRIVATE, &req->flags)) list_del_init(&req->list); spin_unlock(&fpq->lock); + spin_unlock(&fc->lock); request_end(fc, req); return err; @@ -1951,6 +1949,7 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, if (!test_bit(FR_PRIVATE, &req->flags)) list_del_init(&req->list); spin_unlock(&fpq->lock); + spin_unlock(&fc->lock); request_end(fc, req); return err ? err : nbytes; @@ -2095,6 +2094,7 @@ __acquires(fc->lock) clear_bit(FR_PENDING, &req->flags); clear_bit(FR_SENT, &req->flags); list_del_init(&req->list); + spin_unlock(&fc->lock); request_end(fc, req); spin_lock(&fc->lock); } @@ -2177,6 +2177,7 @@ void fuse_abort_conn(struct fuse_conn *fc) req = list_first_entry(&to_end1, struct fuse_req, list); __fuse_get_request(req); list_del_init(&req->list); + spin_unlock(&fc->lock); request_end(fc, req); spin_lock(&fc->lock); } From 46c34a348b0ac74fc9646de8ef0c5b8b29f09da9 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:07 +0200 Subject: [PATCH 454/839] fuse: no fc->lock for pqueue parts Remove fc->lock protection from processing queue members, now protected by fpq->lock. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f2c7dd90ebfc..fa53e5e9261a 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1277,7 +1277,6 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, list_del_init(&req->list); spin_unlock(&fiq->waitq.lock); - spin_lock(&fc->lock); in = &req->in; reqsize = in->h.len; /* If request is too large, reply with an error and restart the read */ @@ -1286,21 +1285,18 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, /* SETXATTR is special, since it may contain too large data */ if (in->h.opcode == FUSE_SETXATTR) req->out.h.error = -E2BIG; - spin_unlock(&fc->lock); request_end(fc, req); goto restart; } spin_lock(&fpq->lock); list_add(&req->list, &fpq->io); spin_unlock(&fpq->lock); - spin_unlock(&fc->lock); cs->req = req; err = fuse_copy_one(cs, &in->h, sizeof(in->h)); if (!err) err = fuse_copy_args(cs, in->numargs, in->argpages, (struct fuse_arg *) in->args, 0); fuse_copy_finish(cs); - spin_lock(&fc->lock); spin_lock(&fpq->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) { @@ -1322,7 +1318,6 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, smp_mb__after_atomic(); if (test_bit(FR_INTERRUPTED, &req->flags)) queue_interrupt(fiq, req); - spin_unlock(&fc->lock); return reqsize; @@ -1330,7 +1325,6 @@ out_end: if (!test_bit(FR_PRIVATE, &req->flags)) list_del_init(&req->list); spin_unlock(&fpq->lock); - spin_unlock(&fc->lock); request_end(fc, req); return err; @@ -1898,7 +1892,6 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, if (oh.error <= -1000 || oh.error > 0) goto err_finish; - spin_lock(&fc->lock); spin_lock(&fpq->lock); err = -ENOENT; if (!fpq->connected) @@ -1914,14 +1907,13 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, err = -EINVAL; if (nbytes != sizeof(struct fuse_out_header)) - goto err_unlock; + goto err_finish; if (oh.error == -ENOSYS) fc->no_interrupt = 1; else if (oh.error == -EAGAIN) queue_interrupt(&fc->iq, req); - spin_unlock(&fc->lock); fuse_copy_finish(cs); return nbytes; } @@ -1934,12 +1926,10 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, cs->req = req; if (!req->out.page_replace) cs->move_pages = 0; - spin_unlock(&fc->lock); err = copy_out_args(cs, &req->out, nbytes); fuse_copy_finish(cs); - spin_lock(&fc->lock); spin_lock(&fpq->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) @@ -1949,15 +1939,13 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, if (!test_bit(FR_PRIVATE, &req->flags)) list_del_init(&req->list); spin_unlock(&fpq->lock); - spin_unlock(&fc->lock); + request_end(fc, req); return err ? err : nbytes; err_unlock_pq: spin_unlock(&fpq->lock); - err_unlock: - spin_unlock(&fc->lock); err_finish: fuse_copy_finish(cs); return err; From ee314a870e402f4e63b8a651bc96c740ed69cb31 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:08 +0200 Subject: [PATCH 455/839] fuse: abort: no fc->lock needed for request ending In fuse_abort_conn() when all requests are on private lists we no longer need fc->lock protection. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/dev.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index fa53e5e9261a..dcfc87172a47 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2072,8 +2072,6 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) * This function releases and reacquires fc->lock */ static void end_requests(struct fuse_conn *fc, struct list_head *head) -__releases(fc->lock) -__acquires(fc->lock) { while (!list_empty(head)) { struct fuse_req *req; @@ -2082,9 +2080,7 @@ __acquires(fc->lock) clear_bit(FR_PENDING, &req->flags); clear_bit(FR_SENT, &req->flags); list_del_init(&req->list); - spin_unlock(&fc->lock); request_end(fc, req); - spin_lock(&fc->lock); } } @@ -2160,20 +2156,20 @@ void fuse_abort_conn(struct fuse_conn *fc) wake_up_all_locked(&fiq->waitq); spin_unlock(&fiq->waitq.lock); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); + end_polls(fc); + wake_up_all(&fc->blocked_waitq); + spin_unlock(&fc->lock); while (!list_empty(&to_end1)) { req = list_first_entry(&to_end1, struct fuse_req, list); __fuse_get_request(req); list_del_init(&req->list); - spin_unlock(&fc->lock); request_end(fc, req); - spin_lock(&fc->lock); } end_requests(fc, &to_end2); - end_polls(fc); - wake_up_all(&fc->blocked_waitq); + } else { + spin_unlock(&fc->lock); } - spin_unlock(&fc->lock); } EXPORT_SYMBOL_GPL(fuse_abort_conn); From 00c570f4ba43ae73b41fa0a2269c3b0ac20386ef Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:08 +0200 Subject: [PATCH 456/839] fuse: device fd clone Allow an open fuse device to be "cloned". Userspace can create a clone by: newfd = open("/dev/fuse", O_RDWR) ioctl(newfd, FUSE_DEV_IOC_CLONE, &oldfd); At this point newfd will refer to the same fuse connection as oldfd. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- Documentation/ioctl/ioctl-number.txt | 1 + fs/fuse/dev.c | 40 ++++++++++++++++++++++++++++ include/uapi/linux/fuse.h | 3 +++ 3 files changed, 44 insertions(+) diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 51f4221657bf..611c52267d24 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -321,6 +321,7 @@ Code Seq#(hex) Include File Comments 0xDB 00-0F drivers/char/mwave/mwavepub.h 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ +0xE5 00-3F linux/fuse.h 0xEC 00-01 drivers/platform/chrome/cros_ec_dev.h ChromeOS EC driver 0xF3 00-3F drivers/usb/misc/sisusbvga/sisusb.h sisfb (in development) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index dcfc87172a47..d405c1fa4618 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2197,6 +2197,44 @@ static int fuse_dev_fasync(int fd, struct file *file, int on) return fasync_helper(fd, file, on, &fc->iq.fasync); } +static int fuse_device_clone(struct fuse_conn *fc, struct file *new) +{ + if (new->private_data) + return -EINVAL; + + new->private_data = fuse_conn_get(fc); + + return 0; +} + +static long fuse_dev_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int err = -ENOTTY; + + if (cmd == FUSE_DEV_IOC_CLONE) { + int oldfd; + + err = -EFAULT; + if (!get_user(oldfd, (__u32 __user *) arg)) { + struct file *old = fget(oldfd); + + err = -EINVAL; + if (old) { + struct fuse_conn *fc = fuse_get_conn(old); + + if (fc) { + mutex_lock(&fuse_mutex); + err = fuse_device_clone(fc, file); + mutex_unlock(&fuse_mutex); + } + fput(old); + } + } + } + return err; +} + const struct file_operations fuse_dev_operations = { .owner = THIS_MODULE, .open = fuse_dev_open, @@ -2208,6 +2246,8 @@ const struct file_operations fuse_dev_operations = { .poll = fuse_dev_poll, .release = fuse_dev_release, .fasync = fuse_dev_fasync, + .unlocked_ioctl = fuse_dev_ioctl, + .compat_ioctl = fuse_dev_ioctl, }; EXPORT_SYMBOL_GPL(fuse_dev_operations); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 25084a052a1e..c9aca042e61d 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -755,4 +755,7 @@ struct fuse_notify_retrieve_in { uint64_t dummy4; }; +/* Device ioctls: */ +#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) + #endif /* _LINUX_FUSE_H */ From cc080e9e9be16ccf26135d366d7d2b65209f1d56 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:08 +0200 Subject: [PATCH 457/839] fuse: introduce per-instance fuse_dev structure Allow fuse device clones to refer to be distinguished. This patch just adds the infrastructure by associating a separate "struct fuse_dev" with each clone. Signed-off-by: Miklos Szeredi Reviewed-by: Ashish Samant --- fs/fuse/cuse.c | 14 +++++++--- fs/fuse/dev.c | 70 +++++++++++++++++++++++++++++------------------- fs/fuse/fuse_i.h | 17 ++++++++++++ fs/fuse/inode.c | 47 +++++++++++++++++++++++++++++--- 4 files changed, 114 insertions(+), 34 deletions(-) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 0993d9a266d3..eae2c11268bc 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -489,6 +489,7 @@ static void cuse_fc_release(struct fuse_conn *fc) */ static int cuse_channel_open(struct inode *inode, struct file *file) { + struct fuse_dev *fud; struct cuse_conn *cc; int rc; @@ -499,16 +500,22 @@ static int cuse_channel_open(struct inode *inode, struct file *file) fuse_conn_init(&cc->fc); + fud = fuse_dev_alloc(&cc->fc); + if (!fud) { + kfree(cc); + return -ENOMEM; + } + INIT_LIST_HEAD(&cc->list); cc->fc.release = cuse_fc_release; cc->fc.initialized = 1; rc = cuse_send_init(cc); if (rc) { - fuse_conn_put(&cc->fc); + fuse_dev_free(fud); return rc; } - file->private_data = &cc->fc; /* channel owns base reference to cc */ + file->private_data = fud; return 0; } @@ -526,7 +533,8 @@ static int cuse_channel_open(struct inode *inode, struct file *file) */ static int cuse_channel_release(struct inode *inode, struct file *file) { - struct cuse_conn *cc = fc_to_cc(file->private_data); + struct fuse_dev *fud = file->private_data; + struct cuse_conn *cc = fc_to_cc(fud->fc); int rc; /* remove from the conntbl, no more access from this point on */ diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index d405c1fa4618..99e94584f17f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -25,13 +25,13 @@ MODULE_ALIAS("devname:fuse"); static struct kmem_cache *fuse_req_cachep; -static struct fuse_conn *fuse_get_conn(struct file *file) +static struct fuse_dev *fuse_get_dev(struct file *file) { /* * Lockless access is OK, because file->private data is set * once during mount and is valid until the file is released. */ - return file->private_data; + return ACCESS_ONCE(file->private_data); } static void fuse_request_init(struct fuse_req *req, struct page **pages, @@ -1348,8 +1348,9 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to) { struct fuse_copy_state cs; struct file *file = iocb->ki_filp; - struct fuse_conn *fc = fuse_get_conn(file); - if (!fc) + struct fuse_dev *fud = fuse_get_dev(file); + + if (!fud) return -EPERM; if (!iter_is_iovec(to)) @@ -1357,7 +1358,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to) fuse_copy_init(&cs, 1, to); - return fuse_dev_do_read(fc, file, &cs, iov_iter_count(to)); + return fuse_dev_do_read(fud->fc, file, &cs, iov_iter_count(to)); } static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, @@ -1369,8 +1370,9 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, int do_wakeup = 0; struct pipe_buffer *bufs; struct fuse_copy_state cs; - struct fuse_conn *fc = fuse_get_conn(in); - if (!fc) + struct fuse_dev *fud = fuse_get_dev(in); + + if (!fud) return -EPERM; bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); @@ -1380,7 +1382,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, fuse_copy_init(&cs, 1, NULL); cs.pipebufs = bufs; cs.pipe = pipe; - ret = fuse_dev_do_read(fc, in, &cs, len); + ret = fuse_dev_do_read(fud->fc, in, &cs, len); if (ret < 0) goto out; @@ -1954,8 +1956,9 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from) { struct fuse_copy_state cs; - struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); - if (!fc) + struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp); + + if (!fud) return -EPERM; if (!iter_is_iovec(from)) @@ -1963,7 +1966,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from) fuse_copy_init(&cs, 0, from); - return fuse_dev_do_write(fc, &cs, iov_iter_count(from)); + return fuse_dev_do_write(fud->fc, &cs, iov_iter_count(from)); } static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, @@ -1974,12 +1977,12 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, unsigned idx; struct pipe_buffer *bufs; struct fuse_copy_state cs; - struct fuse_conn *fc; + struct fuse_dev *fud; size_t rem; ssize_t ret; - fc = fuse_get_conn(out); - if (!fc) + fud = fuse_get_dev(out); + if (!fud) return -EPERM; bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); @@ -2034,7 +2037,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, if (flags & SPLICE_F_MOVE) cs.move_pages = 1; - ret = fuse_dev_do_write(fc, &cs, len); + ret = fuse_dev_do_write(fud->fc, &cs, len); for (idx = 0; idx < nbuf; idx++) { struct pipe_buffer *buf = &bufs[idx]; @@ -2049,11 +2052,12 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) { unsigned mask = POLLOUT | POLLWRNORM; struct fuse_iqueue *fiq; - struct fuse_conn *fc = fuse_get_conn(file); - if (!fc) + struct fuse_dev *fud = fuse_get_dev(file); + + if (!fud) return POLLERR; - fiq = &fc->iq; + fiq = &fud->fc->iq; poll_wait(file, &fiq->waitq, wait); spin_lock(&fiq->waitq.lock); @@ -2175,12 +2179,15 @@ EXPORT_SYMBOL_GPL(fuse_abort_conn); int fuse_dev_release(struct inode *inode, struct file *file) { - struct fuse_conn *fc = fuse_get_conn(file); - if (fc) { + struct fuse_dev *fud = fuse_get_dev(file); + + if (fud) { + struct fuse_conn *fc = fud->fc; + WARN_ON(!list_empty(&fc->pq.io)); WARN_ON(fc->iq.fasync != NULL); fuse_abort_conn(fc); - fuse_conn_put(fc); + fuse_dev_free(fud); } return 0; @@ -2189,20 +2196,27 @@ EXPORT_SYMBOL_GPL(fuse_dev_release); static int fuse_dev_fasync(int fd, struct file *file, int on) { - struct fuse_conn *fc = fuse_get_conn(file); - if (!fc) + struct fuse_dev *fud = fuse_get_dev(file); + + if (!fud) return -EPERM; /* No locking - fasync_helper does its own locking */ - return fasync_helper(fd, file, on, &fc->iq.fasync); + return fasync_helper(fd, file, on, &fud->fc->iq.fasync); } static int fuse_device_clone(struct fuse_conn *fc, struct file *new) { + struct fuse_dev *fud; + if (new->private_data) return -EINVAL; - new->private_data = fuse_conn_get(fc); + fud = fuse_dev_alloc(fc); + if (!fud) + return -ENOMEM; + + new->private_data = fud; return 0; } @@ -2221,11 +2235,11 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd, err = -EINVAL; if (old) { - struct fuse_conn *fc = fuse_get_conn(old); + struct fuse_dev *fud = fuse_get_dev(old); - if (fc) { + if (fud) { mutex_lock(&fuse_mutex); - err = fuse_device_clone(fc, file); + err = fuse_device_clone(fud->fc, file); mutex_unlock(&fuse_mutex); } fput(old); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index a9507fd97d5e..42d59cbd47e7 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -417,6 +417,17 @@ struct fuse_pqueue { struct list_head io; }; +/** + * Fuse device instance + */ +struct fuse_dev { + /** Fuse connection for this device */ + struct fuse_conn *fc; + + /** list entry on fc->devices */ + struct list_head entry; +}; + /** * A Fuse connection. * @@ -629,6 +640,9 @@ struct fuse_conn { /** Read/write semaphore to hold when accessing sb. */ struct rw_semaphore killsb; + + /** List of device instances belonging to this connection */ + struct list_head devices; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) @@ -841,6 +855,9 @@ void fuse_conn_init(struct fuse_conn *fc); */ void fuse_conn_put(struct fuse_conn *fc); +struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc); +void fuse_dev_free(struct fuse_dev *fud); + /** * Add connection to control filesystem */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 8373f59dc2a8..e399383d87c8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -598,6 +598,7 @@ void fuse_conn_init(struct fuse_conn *fc) fuse_pqueue_init(&fc->pq); INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); + INIT_LIST_HEAD(&fc->devices); atomic_set(&fc->num_waiting, 0); fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; @@ -945,6 +946,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) static void fuse_free_conn(struct fuse_conn *fc) { + WARN_ON(!list_empty(&fc->devices)); kfree_rcu(fc, rcu); } @@ -990,8 +992,41 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) return 0; } +struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc) +{ + struct fuse_dev *fud; + + fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL); + if (fud) { + fud->fc = fuse_conn_get(fc); + + spin_lock(&fc->lock); + list_add_tail(&fud->entry, &fc->devices); + spin_unlock(&fc->lock); + } + + return fud; +} +EXPORT_SYMBOL_GPL(fuse_dev_alloc); + +void fuse_dev_free(struct fuse_dev *fud) +{ + struct fuse_conn *fc = fud->fc; + + if (fc) { + spin_lock(&fc->lock); + list_del(&fud->entry); + spin_unlock(&fc->lock); + + fuse_conn_put(fc); + } + kfree(fud); +} +EXPORT_SYMBOL_GPL(fuse_dev_free); + static int fuse_fill_super(struct super_block *sb, void *data, int silent) { + struct fuse_dev *fud; struct fuse_conn *fc; struct inode *root; struct fuse_mount_data d; @@ -1043,11 +1078,15 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fuse_conn_init(fc); fc->release = fuse_free_conn; + fud = fuse_dev_alloc(fc); + if (!fud) + goto err_put_conn; + fc->dev = sb->s_dev; fc->sb = sb; err = fuse_bdi_init(fc, sb); if (err) - goto err_put_conn; + goto err_dev_free; sb->s_bdi = &fc->bdi; @@ -1068,7 +1107,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) root = fuse_get_root_inode(sb, d.rootmode); root_dentry = d_make_root(root); if (!root_dentry) - goto err_put_conn; + goto err_dev_free; /* only now - we want root dentry with NULL ->d_op */ sb->s_d_op = &fuse_dentry_operations; @@ -1094,7 +1133,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) list_add_tail(&fc->entry, &fuse_conn_list); sb->s_root = root_dentry; - file->private_data = fuse_conn_get(fc); + file->private_data = fud; mutex_unlock(&fuse_mutex); /* * atomic_dec_and_test() in fput() provides the necessary @@ -1113,6 +1152,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fuse_request_free(init_req); err_put_root: dput(root_dentry); + err_dev_free: + fuse_dev_free(fud); err_put_conn: fuse_bdi_destroy(fc); fuse_conn_put(fc); From c3696046beb3a4479715b48f67f6a8a3aef4b3bb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:26:09 +0200 Subject: [PATCH 458/839] fuse: separate pqueue for clones Make each fuse device clone refer to a separate processing queue. The only constraint on userspace code is that the request answer must be written to the same device clone as it was read off. Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 61 ++++++++++++++++++++++++++++-------------------- fs/fuse/fuse_i.h | 9 ++++--- fs/fuse/inode.c | 3 ++- 3 files changed, 44 insertions(+), 29 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 99e94584f17f..80cc1b35d460 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1232,12 +1232,13 @@ __releases(fiq->waitq.lock) * request_end(). Otherwise add it to the processing list, and set * the 'sent' flag. */ -static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, +static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, struct fuse_copy_state *cs, size_t nbytes) { ssize_t err; + struct fuse_conn *fc = fud->fc; struct fuse_iqueue *fiq = &fc->iq; - struct fuse_pqueue *fpq = &fc->pq; + struct fuse_pqueue *fpq = &fud->pq; struct fuse_req *req; struct fuse_in *in; unsigned reqsize; @@ -1358,7 +1359,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to) fuse_copy_init(&cs, 1, to); - return fuse_dev_do_read(fud->fc, file, &cs, iov_iter_count(to)); + return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to)); } static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, @@ -1382,7 +1383,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, fuse_copy_init(&cs, 1, NULL); cs.pipebufs = bufs; cs.pipe = pipe; - ret = fuse_dev_do_read(fud->fc, in, &cs, len); + ret = fuse_dev_do_read(fud, in, &cs, len); if (ret < 0) goto out; @@ -1862,11 +1863,12 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, * it from the list and copy the rest of the buffer to the request. * The request is finished by calling request_end() */ -static ssize_t fuse_dev_do_write(struct fuse_conn *fc, +static ssize_t fuse_dev_do_write(struct fuse_dev *fud, struct fuse_copy_state *cs, size_t nbytes) { int err; - struct fuse_pqueue *fpq = &fc->pq; + struct fuse_conn *fc = fud->fc; + struct fuse_pqueue *fpq = &fud->pq; struct fuse_req *req; struct fuse_out_header oh; @@ -1966,7 +1968,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from) fuse_copy_init(&cs, 0, from); - return fuse_dev_do_write(fud->fc, &cs, iov_iter_count(from)); + return fuse_dev_do_write(fud, &cs, iov_iter_count(from)); } static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, @@ -2037,7 +2039,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, if (flags & SPLICE_F_MOVE) cs.move_pages = 1; - ret = fuse_dev_do_write(fud->fc, &cs, len); + ret = fuse_dev_do_write(fud, &cs, len); for (idx = 0; idx < nbuf; idx++) { struct pipe_buffer *buf = &bufs[idx]; @@ -2124,10 +2126,10 @@ static void end_polls(struct fuse_conn *fc) void fuse_abort_conn(struct fuse_conn *fc) { struct fuse_iqueue *fiq = &fc->iq; - struct fuse_pqueue *fpq = &fc->pq; spin_lock(&fc->lock); if (fc->connected) { + struct fuse_dev *fud; struct fuse_req *req, *next; LIST_HEAD(to_end1); LIST_HEAD(to_end2); @@ -2135,20 +2137,24 @@ void fuse_abort_conn(struct fuse_conn *fc) fc->connected = 0; fc->blocked = 0; fuse_set_initialized(fc); - spin_lock(&fpq->lock); - fpq->connected = 0; - list_for_each_entry_safe(req, next, &fpq->io, list) { - req->out.h.error = -ECONNABORTED; - spin_lock(&req->waitq.lock); - set_bit(FR_ABORTED, &req->flags); - if (!test_bit(FR_LOCKED, &req->flags)) { - set_bit(FR_PRIVATE, &req->flags); - list_move(&req->list, &to_end1); + list_for_each_entry(fud, &fc->devices, entry) { + struct fuse_pqueue *fpq = &fud->pq; + + spin_lock(&fpq->lock); + fpq->connected = 0; + list_for_each_entry_safe(req, next, &fpq->io, list) { + req->out.h.error = -ECONNABORTED; + spin_lock(&req->waitq.lock); + set_bit(FR_ABORTED, &req->flags); + if (!test_bit(FR_LOCKED, &req->flags)) { + set_bit(FR_PRIVATE, &req->flags); + list_move(&req->list, &to_end1); + } + spin_unlock(&req->waitq.lock); } - spin_unlock(&req->waitq.lock); + list_splice_init(&fpq->processing, &to_end2); + spin_unlock(&fpq->lock); } - list_splice_init(&fpq->processing, &to_end2); - spin_unlock(&fpq->lock); fc->max_background = UINT_MAX; flush_bg_queue(fc); @@ -2183,13 +2189,17 @@ int fuse_dev_release(struct inode *inode, struct file *file) if (fud) { struct fuse_conn *fc = fud->fc; + struct fuse_pqueue *fpq = &fud->pq; - WARN_ON(!list_empty(&fc->pq.io)); - WARN_ON(fc->iq.fasync != NULL); - fuse_abort_conn(fc); + WARN_ON(!list_empty(&fpq->io)); + end_requests(fc, &fpq->processing); + /* Are we the last open device? */ + if (atomic_dec_and_test(&fc->dev_count)) { + WARN_ON(fc->iq.fasync != NULL); + fuse_abort_conn(fc); + } fuse_dev_free(fud); } - return 0; } EXPORT_SYMBOL_GPL(fuse_dev_release); @@ -2217,6 +2227,7 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new) return -ENOMEM; new->private_data = fud; + atomic_inc(&fc->dev_count); return 0; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 42d59cbd47e7..405113101db8 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -424,6 +424,9 @@ struct fuse_dev { /** Fuse connection for this device */ struct fuse_conn *fc; + /** Processing queue */ + struct fuse_pqueue pq; + /** list entry on fc->devices */ struct list_head entry; }; @@ -442,6 +445,9 @@ struct fuse_conn { /** Refcount */ atomic_t count; + /** Number of fuse_dev's */ + atomic_t dev_count; + struct rcu_head rcu; /** The user id for this mount */ @@ -462,9 +468,6 @@ struct fuse_conn { /** Input queue */ struct fuse_iqueue iq; - /** Processing queue */ - struct fuse_pqueue pq; - /** The next unique kernel file handle */ u64 khctr; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e399383d87c8..ac81f48ab2f4 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -592,10 +592,10 @@ void fuse_conn_init(struct fuse_conn *fc) spin_lock_init(&fc->lock); init_rwsem(&fc->killsb); atomic_set(&fc->count, 1); + atomic_set(&fc->dev_count, 1); init_waitqueue_head(&fc->blocked_waitq); init_waitqueue_head(&fc->reserved_req_waitq); fuse_iqueue_init(&fc->iq); - fuse_pqueue_init(&fc->pq); INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); INIT_LIST_HEAD(&fc->devices); @@ -999,6 +999,7 @@ struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc) fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL); if (fud) { fud->fc = fuse_conn_get(fc); + fuse_pqueue_init(&fud->pq); spin_lock(&fc->lock); list_add_tail(&fud->entry, &fc->devices); From 0a30f612d6cfd936235b41b090dbe0119c9039d1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 17:10:36 +0200 Subject: [PATCH 459/839] fuse: update MAINTAINERS entry Signed-off-by: Miklos Szeredi --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index d8afd2953678..903d43d680be 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4276,9 +4276,11 @@ FUSE: FILESYSTEM IN USERSPACE M: Miklos Szeredi L: fuse-devel@lists.sourceforge.net W: http://fuse.sourceforge.net/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git S: Maintained F: fs/fuse/ F: include/uapi/linux/fuse.h +F: Documentation/filesystems/fuse.txt FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit) M: Rik Faith From db2efec0caba4f81a22d95a34da640b86c313c8e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 30 Jun 2015 14:12:30 -0400 Subject: [PATCH 460/839] nfs: take extra reference to fl->fl_file when running a LOCKU operation Jean reported another crash, similar to the one fixed by feaff8e5b2cf: BUG: unable to handle kernel NULL pointer dereference at 0000000000000148 IP: [] locks_get_lock_context+0xf/0xa0 PGD 0 Oops: 0000 [#1] SMP Modules linked in: nfsv3 nfs_layout_flexfiles rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache vmw_vsock_vmci_transport vsock cfg80211 rfkill coretemp crct10dif_pclmul ppdev vmw_balloon crc32_pclmul crc32c_intel ghash_clmulni_intel pcspkr vmxnet3 parport_pc i2c_piix4 microcode serio_raw parport nfsd floppy vmw_vmci acpi_cpufreq auth_rpcgss shpchp nfs_acl lockd grace sunrpc vmwgfx drm_kms_helper ttm drm mptspi scsi_transport_spi mptscsih ata_generic mptbase i2c_core pata_acpi CPU: 0 PID: 329 Comm: kworker/0:1H Not tainted 4.1.0-rc7+ #2 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/30/2013 Workqueue: rpciod rpc_async_schedule [sunrpc] 30ec000 RIP: 0010:[] [] locks_get_lock_context+0xf/0xa0 RSP: 0018:ffff8802330efc08 EFLAGS: 00010296 RAX: ffff8802330efc58 RBX: ffff880097187c80 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000 RBP: ffff8802330efc18 R08: ffff88023fc173d8 R09: 3038b7bf00000000 R10: 00002f1a02000000 R11: 3038b7bf00000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff8802337a2300 R15: 0000000000000020 FS: 0000000000000000(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000148 CR3: 000000003680f000 CR4: 00000000001407f0 Stack: ffff880097187c80 ffff880097187cd8 ffff8802330efc98 ffffffff81250281 ffff8802330efc68 ffffffffa013e7df ffff8802330efc98 0000000000000246 ffff8801f6901c00 ffff880233d2b8d8 ffff8802330efc58 ffff8802330efc58 Call Trace: [] __posix_lock_file+0x31/0x5e0 [] ? rpc_wake_up_task_queue_locked.part.35+0xcf/0x240 [sunrpc] [] posix_lock_file_wait+0x3b/0xd0 [] ? nfs41_wake_and_assign_slot+0x32/0x40 [nfsv4] [] ? nfs41_sequence_done+0xd8/0x300 [nfsv4] [] do_vfs_lock+0x35/0x40 [nfsv4] [] nfs4_locku_done+0x81/0x120 [nfsv4] [] ? rpc_destroy_wait_queue+0x20/0x20 [sunrpc] [] ? rpc_destroy_wait_queue+0x20/0x20 [sunrpc] [] rpc_exit_task+0x2c/0x90 [sunrpc] [] ? call_refreshresult+0x170/0x170 [sunrpc] [] __rpc_execute+0x84/0x410 [sunrpc] [] rpc_async_schedule+0x15/0x20 [sunrpc] [] process_one_work+0x147/0x400 [] worker_thread+0x11b/0x460 [] ? rescuer_thread+0x2f0/0x2f0 [] kthread+0xc9/0xe0 [] ? perf_trace_xen_mmu_set_pmd+0xa0/0x160 [] ? kthread_create_on_node+0x170/0x170 [] ret_from_fork+0x42/0x70 [] ? kthread_create_on_node+0x170/0x170 Code: a5 81 e8 85 75 e4 ff c6 05 31 ee aa 00 01 eb 98 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 54 49 89 fc 53 <48> 8b 9f 48 01 00 00 48 85 db 74 08 48 89 d8 5b 41 5c 5d c3 83 RIP [] locks_get_lock_context+0xf/0xa0 RSP CR2: 0000000000000148 ---[ end trace 64484f16250de7ef ]--- The problem is almost exactly the same as the one fixed by feaff8e5b2cf. We must take a reference to the struct file when running the LOCKU compound to prevent the final fput from running until the operation is complete. Reported-by: Jean Spector Signed-off-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 605c203de556..5e7638d8e31b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5484,6 +5484,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, atomic_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); + get_file(fl->fl_file); memcpy(&p->fl, fl, sizeof(p->fl)); p->server = NFS_SERVER(inode); return p; @@ -5495,6 +5496,7 @@ static void nfs4_locku_release_calldata(void *data) nfs_free_seqid(calldata->arg.seqid); nfs4_put_lock_state(calldata->lsp); put_nfs_open_context(calldata->ctx); + fput(calldata->fl.fl_file); kfree(calldata); } From 6b55970b0f8057a40f964d3dcfa4a687f675b5e2 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 11:54:53 +0800 Subject: [PATCH 461/839] nfs: Fix a memory leak when meeting an unsupported state protect Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5e7638d8e31b..75a874fd1591 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6972,7 +6972,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, /* unsupported! */ WARN_ON_ONCE(1); status = -EINVAL; - goto out_server_scope; + goto out_impl_id; } status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); @@ -7000,6 +7000,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, /* use the most recent implementation id */ kfree(clp->cl_implid); clp->cl_implid = res.impl_id; + res.impl_id = NULL; if (clp->cl_serverscope != NULL && !nfs41_same_server_scope(clp->cl_serverscope, @@ -7013,15 +7014,16 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (clp->cl_serverscope == NULL) { clp->cl_serverscope = res.server_scope; - goto out; + res.server_scope = NULL; } - } else - kfree(res.impl_id); + } -out_server_owner: - kfree(res.server_owner); +out_impl_id: + kfree(res.impl_id); out_server_scope: kfree(res.server_scope); +out_server_owner: + kfree(res.server_owner); out: if (clp->cl_implid != NULL) dprintk("NFS reply exchange_id: Server Implementation ID: " From 71f81e51ee0dc6d523a5229096c32eca2ab58bbb Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 11:55:50 +0800 Subject: [PATCH 462/839] nfs: Remove unused argument in nfs_server_set_fsinfo() Commit e38eb6506f "NFS: set_pnfs_layoutdriver() from nfs4_proc_fsinfo()" have remove the using of mntfh from nfs_server_set_fsinfo(). Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 1e37491904ee..7b3e5c367f5b 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -825,7 +825,6 @@ error: * Load up the server record from information gained in an fsinfo record */ static void nfs_server_set_fsinfo(struct nfs_server *server, - struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo) { unsigned long max_rpc_payload; @@ -901,7 +900,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs if (error < 0) goto out_error; - nfs_server_set_fsinfo(server, mntfh, &fsinfo); + nfs_server_set_fsinfo(server, &fsinfo); /* Get some general file system info */ if (server->namelen == 0) { From 6a062a3687c20b80362b04e1ae7c9772d511977d Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 23:00:29 +0800 Subject: [PATCH 463/839] nfs: Use remove_proc_subtree() instead remove_proc_entry() Thanks for Al Viro's comments of killing proc_fs_nfs completely. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 7b3e5c367f5b..ecebb406cc1a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1192,8 +1192,6 @@ void nfs_clients_init(struct net *net) } #ifdef CONFIG_PROC_FS -static struct proc_dir_entry *proc_fs_nfs; - static int nfs_server_list_open(struct inode *inode, struct file *file); static void *nfs_server_list_start(struct seq_file *p, loff_t *pos); static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); @@ -1435,27 +1433,20 @@ void nfs_fs_proc_net_exit(struct net *net) */ int __init nfs_fs_proc_init(void) { - struct proc_dir_entry *p; - - proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL); - if (!proc_fs_nfs) + if (!proc_mkdir("fs/nfsfs", NULL)) goto error_0; /* a file of servers with which we're dealing */ - p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers"); - if (!p) + if (!proc_symlink("fs/nfsfs/servers", NULL, "../../net/nfsfs/servers")) goto error_1; /* a file of volumes that we have mounted */ - p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes"); - if (!p) - goto error_2; - return 0; + if (!proc_symlink("fs/nfsfs/volumes", NULL, "../../net/nfsfs/volumes")) + goto error_1; -error_2: - remove_proc_entry("servers", proc_fs_nfs); + return 0; error_1: - remove_proc_entry("fs/nfsfs", NULL); + remove_proc_subtree("fs/nfsfs", NULL); error_0: return -ENOMEM; } @@ -1465,9 +1456,7 @@ error_0: */ void nfs_fs_proc_exit(void) { - remove_proc_entry("volumes", proc_fs_nfs); - remove_proc_entry("servers", proc_fs_nfs); - remove_proc_entry("fs/nfsfs", NULL); + remove_proc_subtree("fs/nfsfs", NULL); } #endif /* CONFIG_PROC_FS */ From 2785110d2e5c5bad54c8364391e02e3dd21eccf3 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 11:57:49 +0800 Subject: [PATCH 464/839] nfs: Don't setting FILE_CREATED flags always Commit 5bc2afc2b5 "NFSv4: Honour the 'opened' parameter in the atomic_open() filesystem method" have support the opened arguments now. Also, Commit 03da633aa7 "atomic_open: take care of EEXIST in no-open case with O_CREAT|O_EXCL in fs/namei.c" have change vfs's logical. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b2c8b31b2be7..21457bb0edd6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1470,9 +1470,6 @@ static int nfs_finish_open(struct nfs_open_context *ctx, { int err; - if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) - *opened |= FILE_CREATED; - err = finish_open(file, dentry, do_open, opened); if (err) goto out; From cd738ee985ccc1162e7ba2b4ecd41b6072a5befc Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 11:58:31 +0800 Subject: [PATCH 465/839] nfs: Remove unneeded micro checking of CONFIG_PROC_FS Have checking CONFIG_PROC_FS in include/linux/sunrpc/stats.h. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 14b4709b9242..b77b328a06d7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2010,17 +2010,15 @@ static int __init init_nfs_fs(void) if (err) goto out1; -#ifdef CONFIG_PROC_FS rpc_proc_register(&init_net, &nfs_rpcstat); -#endif - if ((err = register_nfs_fs()) != 0) + + err = register_nfs_fs(); + if (err) goto out0; return 0; out0: -#ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); -#endif nfs_destroy_directcache(); out1: nfs_destroy_writepagecache(); @@ -2051,9 +2049,7 @@ static void __exit exit_nfs_fs(void) nfs_destroy_nfspagecache(); nfs_fscache_unregister(); unregister_pernet_subsys(&nfs_net_ops); -#ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); -#endif unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); From bc4da2a2bee2aaf5ed62b8bb676a15d010af0fc0 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 11:59:04 +0800 Subject: [PATCH 466/839] nfs: Drop bad comment in nfs41_walk_client_list() Commit 7b1f1fd184 "NFSv4/4.1: Fix bugs in nfs4[01]_walk_client_list" have change the logical of the list_for_each_entry(). Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index e42be52a8c18..3aa6a9ba5113 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -676,7 +676,6 @@ found: break; } - /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s status = %d\n", __func__, status); nfs_put_client(prev); From d356a7d1e75003063fd5f499366b1043a0f79f6c Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 11:59:42 +0800 Subject: [PATCH 467/839] nfs: Remove invalid NFS_ATTR_FATTR_V4_REFERRAL checking in nfs4_get_rootfh NFS_ATTR_FATTR_V4_REFERRAL is only set in nfs4_proc_lookup_common. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/nfs4getroot.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c index c0b3a16b4a00..039b3eb6d834 100644 --- a/fs/nfs/nfs4getroot.c +++ b/fs/nfs/nfs4getroot.c @@ -35,13 +35,6 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p goto out; } - if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { - printk(KERN_ERR "nfs4_get_rootfh:" - " getroot obtained referral\n"); - ret = -EREMOTE; - goto out; - } - memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); out: nfs_free_fattr(fsinfo.fattr); From b4839ebe21fc5d543b933d83644981ea73e9ba36 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 12:00:13 +0800 Subject: [PATCH 468/839] nfs: Remove invalid tk_pid from debug message Before rpc_run_task(), tk_pid is uninitiated as 0 always. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 ++--- fs/nfs/pagelist.c | 3 +-- fs/nfs/write.c | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 75a874fd1591..6f228b5af819 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8150,9 +8150,8 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) struct rpc_task *task; int status = 0; - dprintk("NFS: %4d initiating layoutcommit call. sync %d " - "lbw: %llu inode %lu\n", - data->task.tk_pid, sync, + dprintk("NFS: initiating layoutcommit call. sync %d " + "lbw: %llu inode %lu\n", sync, data->args.lastbytewritten, data->args.inode->i_ino); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d70228a81056..1da68d3b1eda 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -636,9 +636,8 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); - dprintk("NFS: %5u initiated pgio call " + dprintk("NFS: initiated pgio call " "(req %s/%llu, %u bytes @ offset %llu)\n", - hdr->task.tk_pid, hdr->inode->i_sb->s_id, (unsigned long long)NFS_FILEID(hdr->inode), hdr->args.count, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 84ba7268833d..359e9ad596c9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1551,7 +1551,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, /* Set up the initial task struct. */ nfs_ops->commit_setup(data, &msg); - dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); + dprintk("NFS: initiated commit call\n"); nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client, NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg); From ceeb0e5d39fcdf4dca2c997bf225c7fc49200b37 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 7 Jan 2015 08:10:09 -0600 Subject: [PATCH 469/839] vfs: Ignore unlocked mounts in fs_fully_visible Limit the mounts fs_fully_visible considers to locked mounts. Unlocked can always be unmounted so considering them adds hassle but no security benefit. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 63b9806235e6..8c7b8e0941b9 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3205,11 +3205,15 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK))) continue; - /* This mount is not fully visible if there are any child mounts - * that cover anything except for empty directories. + /* This mount is not fully visible if there are any + * locked child mounts that cover anything except for + * empty directories. */ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { struct inode *inode = child->mnt_mountpoint->d_inode; + /* Only worry about locked mounts */ + if (!(mnt->mnt.mnt_flags & MNT_LOCKED)) + continue; if (!S_ISDIR(inode->i_mode)) goto next; if (inode->i_nlink > 2) From fbabfd0f4ee2e8847bf56edf481249ad1bb8c44d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 9 May 2015 15:54:49 -0500 Subject: [PATCH 470/839] fs: Add helper functions for permanently empty directories. To ensure it is safe to mount proc and sysfs I need to check if filesystems that are mounted on top of them are mounted on truly empty directories. Given that some directories can gain entries over time, knowing that a directory is empty right now is insufficient. Therefore add supporting infrastructure for permantently empty directories that proc and sysfs can use when they create mount points for filesystems and fs_fully_visible can use to test for permanently empty directories to ensure that nothing will be gained by mounting a fresh copy of proc or sysfs. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/libfs.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 2 + 2 files changed, 98 insertions(+) diff --git a/fs/libfs.c b/fs/libfs.c index cb1fb4b9b637..02813592e121 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1093,3 +1093,99 @@ simple_nosetlease(struct file *filp, long arg, struct file_lock **flp, return -EINVAL; } EXPORT_SYMBOL(simple_nosetlease); + + +/* + * Operations for a permanently empty directory. + */ +static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ + return ERR_PTR(-ENOENT); +} + +static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = d_inode(dentry); + generic_fillattr(inode, stat); + return 0; +} + +static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr) +{ + return -EPERM; +} + +static int empty_dir_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + return -EOPNOTSUPP; +} + +static ssize_t empty_dir_getxattr(struct dentry *dentry, const char *name, + void *value, size_t size) +{ + return -EOPNOTSUPP; +} + +static int empty_dir_removexattr(struct dentry *dentry, const char *name) +{ + return -EOPNOTSUPP; +} + +static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size) +{ + return -EOPNOTSUPP; +} + +static const struct inode_operations empty_dir_inode_operations = { + .lookup = empty_dir_lookup, + .permission = generic_permission, + .setattr = empty_dir_setattr, + .getattr = empty_dir_getattr, + .setxattr = empty_dir_setxattr, + .getxattr = empty_dir_getxattr, + .removexattr = empty_dir_removexattr, + .listxattr = empty_dir_listxattr, +}; + +static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence) +{ + /* An empty directory has two entries . and .. at offsets 0 and 1 */ + return generic_file_llseek_size(file, offset, whence, 2, 2); +} + +static int empty_dir_readdir(struct file *file, struct dir_context *ctx) +{ + dir_emit_dots(file, ctx); + return 0; +} + +static const struct file_operations empty_dir_operations = { + .llseek = empty_dir_llseek, + .read = generic_read_dir, + .iterate = empty_dir_readdir, + .fsync = noop_fsync, +}; + + +void make_empty_dir_inode(struct inode *inode) +{ + set_nlink(inode, 2); + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_rdev = 0; + inode->i_size = 2; + inode->i_blkbits = PAGE_SHIFT; + inode->i_blocks = 0; + + inode->i_op = &empty_dir_inode_operations; + inode->i_fop = &empty_dir_operations; +} + +bool is_empty_dir_inode(struct inode *inode) +{ + return (inode->i_fop == &empty_dir_operations) && + (inode->i_op == &empty_dir_inode_operations); +} diff --git a/include/linux/fs.h b/include/linux/fs.h index 2d24eeb8e59c..571aab91bfc0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2780,6 +2780,8 @@ extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned in extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; +extern void make_empty_dir_inode(struct inode *inode); +extern bool is_empty_dir_inode(struct inode *inode); struct tree_descr { char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); From f9bd6733d3f11e24f3949becf277507d422ee1eb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 9 May 2015 22:09:14 -0500 Subject: [PATCH 471/839] sysctl: Allow creating permanently empty directories that serve as mountpoints. Add a magic sysctl table sysctl_mount_point that when used to create a directory forces that directory to be permanently empty. Update the code to use make_empty_dir_inode when accessing permanently empty directories. Update the code to not allow adding to permanently empty directories. Update /proc/sys/fs/binfmt_misc to be a permanently empty directory. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/proc/proc_sysctl.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/sysctl.h | 3 +++ kernel/sysctl.c | 8 +------- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index fea2561d773b..fdda62e6115e 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -19,6 +19,28 @@ static const struct inode_operations proc_sys_inode_operations; static const struct file_operations proc_sys_dir_file_operations; static const struct inode_operations proc_sys_dir_operations; +/* Support for permanently empty directories */ + +struct ctl_table sysctl_mount_point[] = { + { } +}; + +static bool is_empty_dir(struct ctl_table_header *head) +{ + return head->ctl_table[0].child == sysctl_mount_point; +} + +static void set_empty_dir(struct ctl_dir *dir) +{ + dir->header.ctl_table[0].child = sysctl_mount_point; +} + +static void clear_empty_dir(struct ctl_dir *dir) + +{ + dir->header.ctl_table[0].child = NULL; +} + void proc_sys_poll_notify(struct ctl_table_poll *poll) { if (!poll) @@ -187,6 +209,17 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) struct ctl_table *entry; int err; + /* Is this a permanently empty directory? */ + if (is_empty_dir(&dir->header)) + return -EROFS; + + /* Am I creating a permanently empty directory? */ + if (header->ctl_table == sysctl_mount_point) { + if (!RB_EMPTY_ROOT(&dir->root)) + return -EINVAL; + set_empty_dir(dir); + } + dir->header.nreg++; header->parent = dir; err = insert_links(header); @@ -202,6 +235,8 @@ fail: erase_header(header); put_links(header); fail_links: + if (header->ctl_table == sysctl_mount_point) + clear_empty_dir(dir); header->parent = NULL; drop_sysctl_table(&dir->header); return err; @@ -419,6 +454,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode->i_mode |= S_IFDIR; inode->i_op = &proc_sys_dir_operations; inode->i_fop = &proc_sys_dir_file_operations; + if (is_empty_dir(head)) + make_empty_dir_inode(inode); } out: return inode; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 795d5fea5697..fa7bc29925c9 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -188,6 +188,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); + +extern struct ctl_table sysctl_mount_point[]; + #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2082b1a88fb9..c3eee4c6d6c1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1531,12 +1531,6 @@ static struct ctl_table vm_table[] = { { } }; -#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) -static struct ctl_table binfmt_misc_table[] = { - { } -}; -#endif - static struct ctl_table fs_table[] = { { .procname = "inode-nr", @@ -1690,7 +1684,7 @@ static struct ctl_table fs_table[] = { { .procname = "binfmt_misc", .mode = 0555, - .child = binfmt_misc_table, + .child = sysctl_mount_point, }, #endif { From eb6d38d5427b3ad42f5268da0f1dd31bb0af1264 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 11 May 2015 16:44:25 -0500 Subject: [PATCH 472/839] proc: Allow creating permanently empty directories that serve as mount points Add a new function proc_create_mount_point that when used to creates a directory that can not be added to. Add a new function is_empty_pde to test if a function is a mount point. Update the code to use make_empty_dir_inode when reporting a permanently empty directory to the vfs. Update the code to not allow adding to permanently empty directories. Update /proc/openprom and /proc/fs/nfsd to be permanently empty directories. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/proc/generic.c | 23 +++++++++++++++++++++++ fs/proc/inode.c | 4 ++++ fs/proc/internal.h | 6 ++++++ fs/proc/root.c | 4 ++-- 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index df6327a2b865..e5dee5c3188e 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -373,6 +373,10 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, WARN(1, "create '/proc/%s' by hand\n", qstr.name); return NULL; } + if (is_empty_pde(*parent)) { + WARN(1, "attempt to add to permanently empty directory"); + return NULL; + } ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL); if (!ent) @@ -455,6 +459,25 @@ struct proc_dir_entry *proc_mkdir(const char *name, } EXPORT_SYMBOL(proc_mkdir); +struct proc_dir_entry *proc_create_mount_point(const char *name) +{ + umode_t mode = S_IFDIR | S_IRUGO | S_IXUGO; + struct proc_dir_entry *ent, *parent = NULL; + + ent = __proc_create(&parent, name, mode, 2); + if (ent) { + ent->data = NULL; + ent->proc_fops = NULL; + ent->proc_iops = NULL; + if (proc_register(parent, ent) < 0) { + kfree(ent); + parent->nlink--; + ent = NULL; + } + } + return ent; +} + struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 8272aaba1bb0..e3eb5524639f 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -423,6 +423,10 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; PROC_I(inode)->pde = de; + if (is_empty_pde(de)) { + make_empty_dir_inode(inode); + return inode; + } if (de->mode) { inode->i_mode = de->mode; inode->i_uid = de->uid; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c835b94c0cd3..aa2781095bd1 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -191,6 +191,12 @@ static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) } extern void pde_put(struct proc_dir_entry *); +static inline bool is_empty_pde(const struct proc_dir_entry *pde) +{ + return S_ISDIR(pde->mode) && !pde->proc_iops; +} +struct proc_dir_entry *proc_create_mount_point(const char *name); + /* * inode.c */ diff --git a/fs/proc/root.c b/fs/proc/root.c index 64e1ab64bde6..68feb0f70e63 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -179,10 +179,10 @@ void __init proc_root_init(void) #endif proc_mkdir("fs", NULL); proc_mkdir("driver", NULL); - proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ + proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) /* just give it a mountpoint */ - proc_mkdir("openprom", NULL); + proc_create_mount_point("openprom"); #endif proc_tty_init(); proc_mkdir("bus", NULL); From ea015218f2f7ace2dad9cedd21ed95bdba2886d7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 16:09:29 -0500 Subject: [PATCH 473/839] kernfs: Add support for always empty directories. Add a new function kernfs_create_empty_dir that can be used to create directory that can not be modified. Update the code to use make_empty_dir_inode when reporting a permanently empty directory to the vfs. Update the code to not allow adding to permanently empty directories. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/kernfs/dir.c | 38 +++++++++++++++++++++++++++++++++++++- fs/kernfs/inode.c | 2 ++ include/linux/kernfs.h | 3 +++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index f131fc23ffc4..47dc636d80ed 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -585,6 +585,9 @@ int kernfs_add_one(struct kernfs_node *kn) goto out_unlock; ret = -ENOENT; + if (parent->flags & KERNFS_EMPTY_DIR) + goto out_unlock; + if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent)) goto out_unlock; @@ -776,6 +779,38 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, return ERR_PTR(rc); } +/** + * kernfs_create_empty_dir - create an always empty directory + * @parent: parent in which to create a new directory + * @name: name of the new directory + * + * Returns the created node on success, ERR_PTR() value on failure. + */ +struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, + const char *name) +{ + struct kernfs_node *kn; + int rc; + + /* allocate */ + kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR); + if (!kn) + return ERR_PTR(-ENOMEM); + + kn->flags |= KERNFS_EMPTY_DIR; + kn->dir.root = parent->dir.root; + kn->ns = NULL; + kn->priv = NULL; + + /* link in */ + rc = kernfs_add_one(kn); + if (!rc) + return kn; + + kernfs_put(kn); + return ERR_PTR(rc); +} + static struct dentry *kernfs_iop_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) @@ -1247,7 +1282,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, mutex_lock(&kernfs_mutex); error = -ENOENT; - if (!kernfs_active(kn) || !kernfs_active(new_parent)) + if (!kernfs_active(kn) || !kernfs_active(new_parent) || + (new_parent->flags & KERNFS_EMPTY_DIR)) goto out; error = 0; diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 2da8493a380b..756dd56aaf60 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -296,6 +296,8 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode) case KERNFS_DIR: inode->i_op = &kernfs_dir_iops; inode->i_fop = &kernfs_dir_fops; + if (kn->flags & KERNFS_EMPTY_DIR) + make_empty_dir_inode(inode); break; case KERNFS_FILE: inode->i_size = kn->attr.size; diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 71ecdab1671b..29d1896c3ba5 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -45,6 +45,7 @@ enum kernfs_node_flag { KERNFS_LOCKDEP = 0x0100, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, + KERNFS_EMPTY_DIR = 0x1000, }; /* @flags for kernfs_create_root() */ @@ -285,6 +286,8 @@ void kernfs_destroy_root(struct kernfs_root *root); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, void *priv, const void *ns); +struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, + const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, loff_t size, From 87d2846fcf88113fae2341da1ca9a71f0d916f2c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 16:31:40 -0500 Subject: [PATCH 474/839] sysfs: Add support for permanently empty directories to serve as mount points. Add two functions sysfs_create_mount_point and sysfs_remove_mount_point that hang a permanently empty directory off of a kobject or remove a permanently emptpy directory hanging from a kobject. Export these new functions so modular filesystems can use them. Cc: stable@vger.kernel.org Acked-by: Greg Kroah-Hartman Signed-off-by: "Eric W. Biederman" --- fs/sysfs/dir.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/sysfs.h | 15 +++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 0b45ff42f374..94374e435025 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -121,3 +121,37 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, return kernfs_rename_ns(kn, new_parent, kn->name, new_ns); } + +/** + * sysfs_create_mount_point - create an always empty directory + * @parent_kobj: kobject that will contain this always empty directory + * @name: The name of the always empty directory to add + */ +int sysfs_create_mount_point(struct kobject *parent_kobj, const char *name) +{ + struct kernfs_node *kn, *parent = parent_kobj->sd; + + kn = kernfs_create_empty_dir(parent, name); + if (IS_ERR(kn)) { + if (PTR_ERR(kn) == -EEXIST) + sysfs_warn_dup(parent, name); + return PTR_ERR(kn); + } + + return 0; +} +EXPORT_SYMBOL_GPL(sysfs_create_mount_point); + +/** + * sysfs_remove_mount_point - remove an always empty directory. + * @parent_kobj: kobject that will contain this always empty directory + * @name: The name of the always empty directory to remove + * + */ +void sysfs_remove_mount_point(struct kobject *parent_kobj, const char *name) +{ + struct kernfs_node *parent = parent_kobj->sd; + + kernfs_remove_by_name_ns(parent, name, NULL); +} +EXPORT_SYMBOL_GPL(sysfs_remove_mount_point); diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 99382c0df17e..9f65758311a4 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -210,6 +210,10 @@ int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, int __must_check sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, const void *new_ns); +int __must_check sysfs_create_mount_point(struct kobject *parent_kobj, + const char *name); +void sysfs_remove_mount_point(struct kobject *parent_kobj, + const char *name); int __must_check sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, @@ -298,6 +302,17 @@ static inline int sysfs_move_dir_ns(struct kobject *kobj, return 0; } +static inline int sysfs_create_mount_point(struct kobject *parent_kobj, + const char *name) +{ + return 0; +} + +static inline void sysfs_remove_mount_point(struct kobject *parent_kobj, + const char *name) +{ +} + static inline int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) From f9bb48825a6b5d02f4cabcc78967c75db903dcdc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 17:35:41 -0500 Subject: [PATCH 475/839] sysfs: Create mountpoints with sysfs_create_mount_point This allows for better documentation in the code and it allows for a simpler and fully correct version of fs_fully_visible to be written. The mount points converted and their filesystems are: /sys/hypervisor/s390/ s390_hypfs /sys/kernel/config/ configfs /sys/kernel/debug/ debugfs /sys/firmware/efi/efivars/ efivarfs /sys/fs/fuse/connections/ fusectl /sys/fs/pstore/ pstore /sys/kernel/tracing/ tracefs /sys/fs/cgroup/ cgroup /sys/kernel/security/ securityfs /sys/fs/selinux/ selinuxfs /sys/fs/smackfs/ smackfs Cc: stable@vger.kernel.org Acked-by: Greg Kroah-Hartman Signed-off-by: "Eric W. Biederman" --- arch/s390/hypfs/inode.c | 12 ++++-------- drivers/firmware/efi/efi.c | 6 ++---- fs/configfs/mount.c | 10 ++++------ fs/debugfs/inode.c | 11 ++++------- fs/fuse/inode.c | 9 +++------ fs/pstore/inode.c | 12 ++++-------- fs/tracefs/inode.c | 6 ++---- kernel/cgroup.c | 10 ++++------ security/inode.c | 10 ++++------ security/selinux/selinuxfs.c | 11 +++++------ security/smack/smackfs.c | 8 ++++---- 11 files changed, 40 insertions(+), 65 deletions(-) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index d3f896a35b98..2eeb0a0f506d 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -456,8 +456,6 @@ static const struct super_operations hypfs_s_ops = { .show_options = hypfs_show_options, }; -static struct kobject *s390_kobj; - static int __init hypfs_init(void) { int rc; @@ -481,18 +479,16 @@ static int __init hypfs_init(void) rc = -ENODATA; goto fail_hypfs_sprp_exit; } - s390_kobj = kobject_create_and_add("s390", hypervisor_kobj); - if (!s390_kobj) { - rc = -ENOMEM; + rc = sysfs_create_mount_point(hypervisor_kobj, "s390"); + if (rc) goto fail_hypfs_diag0c_exit; - } rc = register_filesystem(&hypfs_type); if (rc) goto fail_filesystem; return 0; fail_filesystem: - kobject_put(s390_kobj); + sysfs_remove_mount_point(hypervisor_kobj, "s390"); fail_hypfs_diag0c_exit: hypfs_diag0c_exit(); fail_hypfs_sprp_exit: @@ -510,7 +506,7 @@ fail_dbfs_exit: static void __exit hypfs_exit(void) { unregister_filesystem(&hypfs_type); - kobject_put(s390_kobj); + sysfs_remove_mount_point(hypervisor_kobj, "s390"); hypfs_diag0c_exit(); hypfs_sprp_exit(); hypfs_vm_exit(); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 3061bb8629dc..e14363d12690 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -65,7 +65,6 @@ static int __init parse_efi_cmdline(char *str) early_param("efi", parse_efi_cmdline); static struct kobject *efi_kobj; -static struct kobject *efivars_kobj; /* * Let's not leave out systab information that snuck into @@ -212,10 +211,9 @@ static int __init efisubsys_init(void) goto err_remove_group; /* and the standard mountpoint for efivarfs */ - efivars_kobj = kobject_create_and_add("efivars", efi_kobj); - if (!efivars_kobj) { + error = sysfs_create_mount_point(efi_kobj, "efivars"); + if (error) { pr_err("efivars: Subsystem registration failed.\n"); - error = -ENOMEM; goto err_remove_group; } diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index da94e41bdbf6..bca58da65e2b 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -129,8 +129,6 @@ void configfs_release_fs(void) } -static struct kobject *config_kobj; - static int __init configfs_init(void) { int err = -ENOMEM; @@ -141,8 +139,8 @@ static int __init configfs_init(void) if (!configfs_dir_cachep) goto out; - config_kobj = kobject_create_and_add("config", kernel_kobj); - if (!config_kobj) + err = sysfs_create_mount_point(kernel_kobj, "config"); + if (err) goto out2; err = register_filesystem(&configfs_fs_type); @@ -152,7 +150,7 @@ static int __init configfs_init(void) return 0; out3: pr_err("Unable to register filesystem!\n"); - kobject_put(config_kobj); + sysfs_remove_mount_point(kernel_kobj, "config"); out2: kmem_cache_destroy(configfs_dir_cachep); configfs_dir_cachep = NULL; @@ -163,7 +161,7 @@ out: static void __exit configfs_exit(void) { unregister_filesystem(&configfs_fs_type); - kobject_put(config_kobj); + sysfs_remove_mount_point(kernel_kobj, "config"); kmem_cache_destroy(configfs_dir_cachep); configfs_dir_cachep = NULL; } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index c1e7ffb0dab6..12756040ca20 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -716,20 +716,17 @@ bool debugfs_initialized(void) } EXPORT_SYMBOL_GPL(debugfs_initialized); - -static struct kobject *debug_kobj; - static int __init debugfs_init(void) { int retval; - debug_kobj = kobject_create_and_add("debug", kernel_kobj); - if (!debug_kobj) - return -EINVAL; + retval = sysfs_create_mount_point(kernel_kobj, "debug"); + if (retval) + return retval; retval = register_filesystem(&debug_fs_type); if (retval) - kobject_put(debug_kobj); + sysfs_remove_mount_point(kernel_kobj, "debug"); else debugfs_registered = true; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 082ac1c97f39..18dacf9ed8ff 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1238,7 +1238,6 @@ static void fuse_fs_cleanup(void) } static struct kobject *fuse_kobj; -static struct kobject *connections_kobj; static int fuse_sysfs_init(void) { @@ -1250,11 +1249,9 @@ static int fuse_sysfs_init(void) goto out_err; } - connections_kobj = kobject_create_and_add("connections", fuse_kobj); - if (!connections_kobj) { - err = -ENOMEM; + err = sysfs_create_mount_point(fuse_kobj, "connections"); + if (err) goto out_fuse_unregister; - } return 0; @@ -1266,7 +1263,7 @@ static int fuse_sysfs_init(void) static void fuse_sysfs_cleanup(void) { - kobject_put(connections_kobj); + sysfs_remove_mount_point(fuse_kobj, "connections"); kobject_put(fuse_kobj); } diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index dc43b5f29305..3adcc4669fac 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -461,22 +461,18 @@ static struct file_system_type pstore_fs_type = { .kill_sb = pstore_kill_sb, }; -static struct kobject *pstore_kobj; - static int __init init_pstore_fs(void) { - int err = 0; + int err; /* Create a convenient mount point for people to access pstore */ - pstore_kobj = kobject_create_and_add("pstore", fs_kobj); - if (!pstore_kobj) { - err = -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "pstore"); + if (err) goto out; - } err = register_filesystem(&pstore_fs_type); if (err < 0) - kobject_put(pstore_kobj); + sysfs_remove_mount_point(fs_kobj, "pstore"); out: return err; diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index d92bdf3b079a..a43df11a163f 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -631,14 +631,12 @@ bool tracefs_initialized(void) return tracefs_registered; } -static struct kobject *trace_kobj; - static int __init tracefs_init(void) { int retval; - trace_kobj = kobject_create_and_add("tracing", kernel_kobj); - if (!trace_kobj) + retval = sysfs_create_mount_point(kernel_kobj, "tracing"); + if (retval) return -EINVAL; retval = register_filesystem(&trace_fs_type); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 469dd547770c..e8a5491be756 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1924,8 +1924,6 @@ static struct file_system_type cgroup_fs_type = { .kill_sb = cgroup_kill_sb, }; -static struct kobject *cgroup_kobj; - /** * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy * @task: target task @@ -5044,13 +5042,13 @@ int __init cgroup_init(void) ss->bind(init_css_set.subsys[ssid]); } - cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); - if (!cgroup_kobj) - return -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "cgroup"); + if (err) + return err; err = register_filesystem(&cgroup_fs_type); if (err < 0) { - kobject_put(cgroup_kobj); + sysfs_remove_mount_point(fs_kobj, "cgroup"); return err; } diff --git a/security/inode.c b/security/inode.c index 91503b79c5f8..0e37e4fba8fa 100644 --- a/security/inode.c +++ b/security/inode.c @@ -215,19 +215,17 @@ void securityfs_remove(struct dentry *dentry) } EXPORT_SYMBOL_GPL(securityfs_remove); -static struct kobject *security_kobj; - static int __init securityfs_init(void) { int retval; - security_kobj = kobject_create_and_add("security", kernel_kobj); - if (!security_kobj) - return -EINVAL; + retval = sysfs_create_mount_point(kernel_kobj, "security"); + if (retval) + return retval; retval = register_filesystem(&fs_type); if (retval) - kobject_put(security_kobj); + sysfs_remove_mount_point(kernel_kobj, "security"); return retval; } diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index d2787cca1fcb..3d2201413028 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1853,7 +1853,6 @@ static struct file_system_type sel_fs_type = { }; struct vfsmount *selinuxfs_mount; -static struct kobject *selinuxfs_kobj; static int __init init_sel_fs(void) { @@ -1862,13 +1861,13 @@ static int __init init_sel_fs(void) if (!selinux_enabled) return 0; - selinuxfs_kobj = kobject_create_and_add("selinux", fs_kobj); - if (!selinuxfs_kobj) - return -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "selinux"); + if (err) + return err; err = register_filesystem(&sel_fs_type); if (err) { - kobject_put(selinuxfs_kobj); + sysfs_remove_mount_point(fs_kobj, "selinux"); return err; } @@ -1887,7 +1886,7 @@ __initcall(init_sel_fs); #ifdef CONFIG_SECURITY_SELINUX_DISABLE void exit_sel_fs(void) { - kobject_put(selinuxfs_kobj); + sysfs_remove_mount_point(fs_kobj, "selinux"); kern_unmount(selinuxfs_mount); unregister_filesystem(&sel_fs_type); } diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index d9682985349e..ac4cac7c661a 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -2241,16 +2241,16 @@ static const struct file_operations smk_revoke_subj_ops = { .llseek = generic_file_llseek, }; -static struct kset *smackfs_kset; /** * smk_init_sysfs - initialize /sys/fs/smackfs * */ static int smk_init_sysfs(void) { - smackfs_kset = kset_create_and_add("smackfs", NULL, fs_kobj); - if (!smackfs_kset) - return -ENOMEM; + int err; + err = sysfs_create_mount_point(fs_kobj, "smackfs"); + if (err) + return err; return 0; } From 7236c85e1be51a9e25ba0f6e087a66ca89605a49 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 20:51:09 -0500 Subject: [PATCH 476/839] mnt: Update fs_fully_visible to test for permanently empty directories fs_fully_visible attempts to make fresh mounts of proc and sysfs give the mounter no more access to proc and sysfs than if they could have by creating a bind mount. One aspect of proc and sysfs that makes this particularly tricky is that there are other filesystems that typically mount on top of proc and sysfs. As those filesystems are mounted on empty directories in practice it is safe to ignore them. However testing to ensure filesystems are mounted on empty directories has not been something the in kernel data structures have supported so the current test for an empty directory which checks to see if nlink <= 2 is a bit lacking. proc and sysfs have recently been modified to use the new empty_dir infrastructure to create all of their dedicated mount points. Instead of testing for S_ISDIR(inode->i_mode) && i_nlink <= 2 to see if a directory is empty, test for is_empty_dir_inode(inode). That small change guaranteess mounts found on proc and sysfs really are safe to ignore, because the directories are not only empty but nothing can ever be added to them. This guarantees there is nothing to worry about when mounting proc and sysfs. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 8c7b8e0941b9..02c6875dd945 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3214,9 +3214,8 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) /* Only worry about locked mounts */ if (!(mnt->mnt.mnt_flags & MNT_LOCKED)) continue; - if (!S_ISDIR(inode->i_mode)) - goto next; - if (inode->i_nlink > 2) + /* Is the directory permanetly empty? */ + if (!is_empty_dir_inode(inode)) goto next; } /* Preserve the locked attributes */ From 93e3bce6287e1fb3e60d3324ed08555b5bbafa89 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 24 May 2015 09:25:00 -0500 Subject: [PATCH 477/839] vfs: Remove incorrect debugging WARN in prepend_path The warning message in prepend_path is unclear and outdated. It was added as a warning that the mechanism for generating names of pseudo files had been removed from prepend_path and d_dname should be used instead. Unfortunately the warning reads like a general warning, making it unclear what to do with it. Remove the warning. The transition it was added to warn about is long over, and I added code several years ago which in rare cases causes the warning to fire on legitimate code, and the warning is now firing and scaring people for no good reason. Cc: stable@vger.kernel.org Reported-by: Ivan Delalande Reported-by: Omar Sandoval Fixes: f48cfddc6729e ("vfs: In d_path don't call d_dname on a mount point") Signed-off-by: "Eric W. Biederman" --- fs/dcache.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 656ce522a218..615dfc2aa752 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2927,17 +2927,6 @@ restart: vfsmnt = &mnt->mnt; continue; } - /* - * Filesystems needing to implement special "root names" - * should do so with ->d_dname() - */ - if (IS_ROOT(dentry) && - (dentry->d_name.len != 1 || - dentry->d_name.name[0] != '/')) { - WARN(1, "Root dentry has weird name <%.*s>\n", - (int) dentry->d_name.len, - dentry->d_name.name); - } if (!error) error = is_mounted(vfsmnt) ? 1 : 2; break; From 57ff96e0f08bb16c49f66250c9d90452dbbf1293 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 30 Jun 2015 11:28:43 -0700 Subject: [PATCH 478/839] Input: wdt87xx_i2c - remove stray newline in diagnostic message There is no reason to have a newline between plat_id and xml_id1. Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/wdt87xx_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/wdt87xx_i2c.c b/drivers/input/touchscreen/wdt87xx_i2c.c index 43c3bee4b2e9..9eb8e8cec17e 100644 --- a/drivers/input/touchscreen/wdt87xx_i2c.c +++ b/drivers/input/touchscreen/wdt87xx_i2c.c @@ -442,7 +442,7 @@ static int wdt87xx_get_sysparam(struct i2c_client *client, param->fw_id = get_unaligned_le16(&buf[1]); dev_info(&client->dev, - "fw_id: 0x%x, plat_id: 0x%x\nxml_id1: %04x, xml_id2: %04x\n", + "fw_id: 0x%x, plat_id: 0x%x, xml_id1: %04x, xml_id2: %04x\n", param->fw_id, param->plat_id, param->xmls_id1, param->xmls_id2); From d55d0b56b9c3a0d94ca6cc1643063610c138dde9 Mon Sep 17 00:00:00 2001 From: HungNien Chen Date: Wed, 1 Jul 2015 10:06:49 -0700 Subject: [PATCH 479/839] Input: wdt87xx_i2c - add a scaling factor for TOUCH_MAJOR event Get the scaling factor when it reads the sys params. The width value will multiple the factor and report the value in the TOUCH_MAJOR event. Signed-off-by: HungNien Chen Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/wdt87xx_i2c.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/input/touchscreen/wdt87xx_i2c.c b/drivers/input/touchscreen/wdt87xx_i2c.c index 9eb8e8cec17e..fb92ae1c5fae 100644 --- a/drivers/input/touchscreen/wdt87xx_i2c.c +++ b/drivers/input/touchscreen/wdt87xx_i2c.c @@ -23,7 +23,7 @@ #include #define WDT87XX_NAME "wdt87xx_i2c" -#define WDT87XX_DRV_VER "0.9.5" +#define WDT87XX_DRV_VER "0.9.6" #define WDT87XX_FW_NAME "wdt87xx_fw.bin" #define WDT87XX_CFG_NAME "wdt87xx_cfg.bin" @@ -83,6 +83,7 @@ #define CTL_PARAM_OFFSET_PHY_Y1 18 #define CTL_PARAM_OFFSET_PHY_W 22 #define CTL_PARAM_OFFSET_PHY_H 24 +#define CTL_PARAM_OFFSET_FACTOR 32 /* Communication commands */ #define PACKET_SIZE 56 @@ -161,6 +162,7 @@ struct wdt87xx_sys_param { u16 phy_ch_y; u16 phy_w; u16 phy_h; + u16 scaling_factor; u32 max_x; u32 max_y; }; @@ -401,7 +403,7 @@ static int wdt87xx_get_sysparam(struct i2c_client *client, u8 buf[PKT_READ_SIZE]; int error; - error = wdt87xx_get_string(client, STRIDX_PARAMETERS, buf, 32); + error = wdt87xx_get_string(client, STRIDX_PARAMETERS, buf, 34); if (error) { dev_err(&client->dev, "failed to get parameters\n"); return error; @@ -414,6 +416,10 @@ static int wdt87xx_get_sysparam(struct i2c_client *client, param->phy_w = get_unaligned_le16(buf + CTL_PARAM_OFFSET_PHY_W) / 10; param->phy_h = get_unaligned_le16(buf + CTL_PARAM_OFFSET_PHY_H) / 10; + /* Get the scaling factor of pixel to logical coordinate */ + param->scaling_factor = + get_unaligned_le16(buf + CTL_PARAM_OFFSET_FACTOR); + param->max_x = MAX_UNIT_AXIS; param->max_y = DIV_ROUND_CLOSEST(MAX_UNIT_AXIS * param->phy_h, param->phy_w); @@ -905,8 +911,8 @@ static void wdt87xx_report_contact(struct input_dev *input, u8 *buf) { int finger_id; - u32 x, y; - u8 w, p; + u32 x, y, w; + u8 p; finger_id = (buf[FINGER_EV_V1_OFFSET_ID] >> 3) - 1; if (finger_id < 0) @@ -917,6 +923,8 @@ static void wdt87xx_report_contact(struct input_dev *input, return; w = buf[FINGER_EV_V1_OFFSET_W]; + w *= param->scaling_factor; + p = buf[FINGER_EV_V1_OFFSET_P]; x = get_unaligned_le16(buf + FINGER_EV_V1_OFFSET_X); @@ -995,7 +1003,8 @@ static int wdt87xx_ts_create_input_device(struct wdt87xx_data *wdt) input_abs_set_res(input, ABS_MT_POSITION_X, res); input_abs_set_res(input, ABS_MT_POSITION_Y, res); - input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, 0, 0xFF, 0, 0); + input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, + 0, wdt->param.max_x, 0, 0); input_set_abs_params(input, ABS_MT_PRESSURE, 0, 0xFF, 0, 0); input_mt_init_slots(input, WDT_MAX_FINGER, From 498f09bce4dc0a2803df7fc25befbdb249074b85 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 2 Jun 2015 12:12:58 -0700 Subject: [PATCH 480/839] soc: qcom: spm: Fix idle on THUMB2 kernels The ifc6410 firmware always enters the kernel in ARM state from deep idle. Use the cpu_resume_arm() wrapper instead of cpu_resume() to property switch into the THUMB2 state when we wake up from idle. This fixes a problem reported by Kevin Hilman on next-20150601 where the ifc6410 fails to boot a THUMB2 kernel because the platform's firmware always enters the kernel in ARM mode from deep idle states. Reported-by: Kevin Hilman Cc: Ard Biesheuvel Cc: Lina Iyer Signed-off-by: Stephen Boyd Signed-off-by: Kevin Hilman --- drivers/soc/qcom/spm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/qcom/spm.c b/drivers/soc/qcom/spm.c index b562af816c0a..b04b05a0904e 100644 --- a/drivers/soc/qcom/spm.c +++ b/drivers/soc/qcom/spm.c @@ -260,7 +260,7 @@ static int __init qcom_cpuidle_init(struct device_node *cpu_node, int cpu) /* We have atleast one power down mode */ cpumask_clear(&mask); cpumask_set_cpu(cpu, &mask); - qcom_scm_set_warm_boot_addr(cpu_resume, &mask); + qcom_scm_set_warm_boot_addr(cpu_resume_arm, &mask); } per_cpu(qcom_idle_ops, cpu) = fns; From ea21feb37e753213a093e1f77b2c05ce57997ccd Mon Sep 17 00:00:00 2001 From: Y Vo Date: Tue, 16 Jun 2015 16:49:44 +0700 Subject: [PATCH 481/839] arm64: dts: Add APM X-Gene standby GPIO controller DTS entries Add standby domain gpio controller for APM X-Gene SoC platform. Signed-off-by: Y Vo Acked-by: Linus Walleij Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/apm/apm-storm.dtsi | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index c8d3e0e86678..6bbab95307ae 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -607,6 +607,19 @@ phy-names = "sata-phy"; }; + sbgpio: sbgpio@17001000{ + compatible = "apm,xgene-gpio-sb"; + reg = <0x0 0x17001000 0x0 0x400>; + #gpio-cells = <2>; + gpio-controller; + interrupts = <0x0 0x28 0x1>, + <0x0 0x29 0x1>, + <0x0 0x2a 0x1>, + <0x0 0x2b 0x1>, + <0x0 0x2c 0x1>, + <0x0 0x2d 0x1>; + }; + rtc: rtc@10510000 { compatible = "apm,xgene-rtc"; reg = <0x0 0x10510000 0x0 0x400>; From 50f0a44991516b5b9744ecb2c080c2ec6ad21b25 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Wed, 17 Jun 2015 10:59:04 +0200 Subject: [PATCH 482/839] ARM: at91/dt: trivial: fix USB udc compatible string To please checkpatch and the tiresome reader, add the "atmel," prefix to the USB udc compatible string. Signed-off-by: Nicolas Ferre Cc: #4.0+ Signed-off-by: Kevin Hilman --- Documentation/devicetree/bindings/usb/atmel-usb.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/atmel-usb.txt b/Documentation/devicetree/bindings/usb/atmel-usb.txt index 1be8d7a26c15..5883b73ea1b5 100644 --- a/Documentation/devicetree/bindings/usb/atmel-usb.txt +++ b/Documentation/devicetree/bindings/usb/atmel-usb.txt @@ -79,9 +79,9 @@ Atmel High-Speed USB device controller Required properties: - compatible: Should be one of the following - "at91sam9rl-udc" - "at91sam9g45-udc" - "sama5d3-udc" + "atmel,at91sam9rl-udc" + "atmel,at91sam9g45-udc" + "atmel,sama5d3-udc" - reg: Address and length of the register set for the device - interrupts: Should contain usba interrupt - clocks: Should reference the peripheral and host clocks From 6540165cf41655810ee67b78f01537af022a636a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 17 Jun 2015 10:59:05 +0200 Subject: [PATCH 483/839] ARM: at91/dt: update udc compatible strings at91sam9g45, at91sam9x5 and sama5 SoCs should not use "atmel,at91sam9rl-udc" for their USB device compatible property since this compatible is attached to a specific hardware bug fix. Signed-off-by: Boris Brezillon Acked-by: Alexandre Belloni Tested-by: Bo Shen Acked-by: Nicolas Ferre Cc: #4.0+ Signed-off-by: Kevin Hilman --- arch/arm/boot/dts/at91sam9g45.dtsi | 2 +- arch/arm/boot/dts/at91sam9x5.dtsi | 2 +- arch/arm/boot/dts/sama5d3.dtsi | 2 +- arch/arm/boot/dts/sama5d4.dtsi | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index d260ba779ae5..18177f5a7464 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -1148,7 +1148,7 @@ usb2: gadget@fff78000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,at91sam9g45-udc"; reg = <0x00600000 0x80000 0xfff78000 0x400>; interrupts = <27 IRQ_TYPE_LEVEL_HIGH 0>; diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 7521bdf17ef2..b6c8df8d380e 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -1108,7 +1108,7 @@ usb2: gadget@f803c000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,at91sam9g45-udc"; reg = <0x00500000 0x80000 0xf803c000 0x400>; interrupts = <23 IRQ_TYPE_LEVEL_HIGH 0>; diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi index 5ab7548e04e1..9e2444b07bce 100644 --- a/arch/arm/boot/dts/sama5d3.dtsi +++ b/arch/arm/boot/dts/sama5d3.dtsi @@ -1321,7 +1321,7 @@ usb0: gadget@00500000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,sama5d3-udc"; reg = <0x00500000 0x100000 0xf8030000 0x4000>; interrupts = <33 IRQ_TYPE_LEVEL_HIGH 2>; diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index 653a1f851f2b..3ee22ee13c5a 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -127,7 +127,7 @@ usb0: gadget@00400000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,sama5d3-udc"; reg = <0x00400000 0x100000 0xfc02c000 0x4000>; interrupts = <47 IRQ_TYPE_LEVEL_HIGH 2>; From d80d942bcc8e1555a76774d20be9800cfef2d415 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 27 Jun 2015 09:25:43 -0700 Subject: [PATCH 484/839] ARM: BCM: Do not select CONFIG_MTD_NAND_BRCMNAND This reverts 7dc95b40f599293aedf30432749ad25b51549041 ("ARM: BCM: Enable NAND support for iProc SoCs") since it creates an unmet dependency for MTD_NAND_BRCMNAND which depends on MTD and MTD_NAND, this results in the following build failure for brcmnand: LD init/built-in.o drivers/built-in.o: In function `brcmnand_remove': /home/fainelli/dev/linux/drivers/mtd/nand/brcmnand/brcmnand.c:2234: undefined reference to `nand_release' drivers/built-in.o: In function `brcmnand_init_cs': /home/fainelli/dev/linux/drivers/mtd/nand/brcmnand/brcmnand.c:1933: undefined reference to `nand_scan_ident' /home/fainelli/dev/linux/drivers/mtd/nand/brcmnand/brcmnand.c:1958: undefined reference to `nand_scan_tail' Makefile:931: recipe for target 'vmlinux' failed make: *** [vmlinux] Error 1 Instead, select this driver an all dependencies on the multi_v7_defconfig. Signed-off-by: Florian Fainelli Signed-off-by: Kevin Hilman --- arch/arm/mach-bcm/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index e9184feffc4e..0ac9e4b3b265 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -19,7 +19,6 @@ config ARCH_BCM_IPROC select ARCH_REQUIRE_GPIOLIB select ARM_AMBA select PINCTRL - select MTD_NAND_BRCMNAND help This enables support for systems based on Broadcom IPROC architected SoCs. The IPROC complex contains one or more ARM CPUs along with common From ae41a0b7ed7740009b9ec87c7dc05b23f01d361a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 27 Jun 2015 09:25:44 -0700 Subject: [PATCH 485/839] ARM: multi_v7_defconfig: Enable BRCMNAND driver The Broadcom NAND driver is used by brcmstb, bcm63xx, bcm5301x and Cygnus/iProc under mach-bcm, this is enough critical mass to enable it. Signed-off-by: Florian Fainelli Signed-off-by: Kevin Hilman --- arch/arm/configs/multi_v7_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index fd6a6d23bc20..6d83a1bf0c74 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -169,6 +169,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_ATMEL=y +CONFIG_MTD_NAND_BRCMNAND=y CONFIG_MTD_NAND_DAVINCI=y CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=y From ceb92913078e41e2305250754e0ea144fc3e9b28 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 29 Jun 2015 13:27:45 +0200 Subject: [PATCH 486/839] perf tools: Add missing break for PERF_RECORD_ITRACE_START Missing switch break since introduction of new event: c4937a91ea56 perf tools: handle PERF_RECORD_LOST_SAMPLES Also removing unneeded break for PERF_RECORD_LOST_SAMPLES. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150629112745.GA21507@krava.brq.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 4744673aff1b..7ff682770fdb 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1448,10 +1448,9 @@ int machine__process_event(struct machine *machine, union perf_event *event, case PERF_RECORD_AUX: ret = machine__process_aux_event(machine, event); break; case PERF_RECORD_ITRACE_START: - ret = machine__process_itrace_start_event(machine, event); + ret = machine__process_itrace_start_event(machine, event); break; case PERF_RECORD_LOST_SAMPLES: ret = machine__process_lost_samples_event(machine, event, sample); break; - break; default: ret = -1; break; From 9fedfb0c5b05ae6c315de722a0548bb1f1328bf5 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Tue, 30 Jun 2015 17:15:20 +0900 Subject: [PATCH 487/839] perf inject: Fill in the missing session freeing after an error occurs When an error occur an error value is just returned without freeing the session. So allocating and freeing session have to be matched as a pair even if an error occurs. Signed-off-by: Taeung Song Acked-by: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1435652124-22414-2-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 52ec66b23607..01b06492bd6a 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -630,12 +630,13 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) if (inject.session == NULL) return -1; - if (symbol__init(&inject.session->header.env) < 0) - return -1; + ret = symbol__init(&inject.session->header.env); + if (ret < 0) + goto out_delete; ret = __cmd_inject(&inject); +out_delete: perf_session__delete(inject.session); - return ret; } From 249ca1a86067e6a4198f7b2b7e19b505e2f41864 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Tue, 30 Jun 2015 17:15:21 +0900 Subject: [PATCH 488/839] perf kmem: Fill in the missing session freeing after an error occurs When an error occurs an error value is just returned without freeing the session. So allocating and freeing session have to be matched as a pair even if an error occurs. Signed-off-by: Taeung Song Acked-by: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1435652124-22414-3-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 950f296dfcf7..23b1faaaa4cc 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1916,7 +1916,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) if (!perf_evlist__find_tracepoint_by_name(session->evlist, "kmem:kmalloc")) { pr_err(errmsg, "slab", "slab"); - return -1; + goto out_delete; } } @@ -1927,7 +1927,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) "kmem:mm_page_alloc"); if (evsel == NULL) { pr_err(errmsg, "page", "page"); - return -1; + goto out_delete; } kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent); From 07a716fff25b826461baa2a07faa2df8c171f220 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Tue, 30 Jun 2015 17:15:24 +0900 Subject: [PATCH 489/839] perf report: Fill in the missing session freeing after an error occurs When an error occurs an error value is just returned without freeing the session. So allocating and freeing session have to be matched as a pair even if an error occurs. Signed-off-by: Taeung Song Acked-by: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1435652124-22414-6-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 348bed4a2abf..95a47719aec3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -839,8 +839,10 @@ repeat: if (report.header || report.header_only) { perf_session__fprintf_info(session, stdout, report.show_full_info); - if (report.header_only) - return 0; + if (report.header_only) { + ret = 0; + goto error; + } } else if (use_browser == 0) { fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n", stdout); From 41b983609a1618e9fa70023d83232ed959056c8a Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Wed, 1 Jul 2015 00:18:44 +0900 Subject: [PATCH 490/839] perf kvm: Fill in the missing session freeing after an error occurs When an error occurs an error value is just returned without freeing the session. So allocating and freeing session have to be matched as a pair even if an error occurs. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1435677525-28055-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 74878cd75078..fc1cffb1b7a2 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1061,8 +1061,10 @@ static int read_events(struct perf_kvm_stat *kvm) symbol__init(&kvm->session->header.env); - if (!perf_session__has_traces(kvm->session, "kvm record")) - return -EINVAL; + if (!perf_session__has_traces(kvm->session, "kvm record")) { + ret = -EINVAL; + goto out_delete; + } /* * Do not use 'isa' recorded in kvm_exit tracepoint since it is not @@ -1070,9 +1072,13 @@ static int read_events(struct perf_kvm_stat *kvm) */ ret = cpu_isa_config(kvm); if (ret < 0) - return ret; + goto out_delete; - return perf_session__process_events(kvm->session); + ret = perf_session__process_events(kvm->session); + +out_delete: + perf_session__delete(kvm->session); + return ret; } static int parse_target_str(struct perf_kvm_stat *kvm) From 1df9fade87ea4831c024de9806f88427f9f33e62 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Wed, 1 Jul 2015 21:08:19 +0900 Subject: [PATCH 491/839] perf mem: Fill in the missing session freeing after an error occurs When an error occurs an error value is just returned without freeing the session. So allocating and freeing session have to be matched as a pair even if an error occurs. Signed-off-by: Taeung Song Acked-by: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1435752499-11752-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index da2ec06f0742..80170aace5d4 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -124,7 +124,6 @@ static int report_raw_events(struct perf_mem *mem) .mode = PERF_DATA_MODE_READ, .force = mem->force, }; - int err = -EINVAL; int ret; struct perf_session *session = perf_session__new(&file, false, &mem->tool); @@ -135,24 +134,21 @@ static int report_raw_events(struct perf_mem *mem) if (mem->cpu_list) { ret = perf_session__cpu_bitmap(session, mem->cpu_list, mem->cpu_bitmap); - if (ret) + if (ret < 0) goto out_delete; } - if (symbol__init(&session->header.env) < 0) - return -1; + ret = symbol__init(&session->header.env); + if (ret < 0) + goto out_delete; printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); - err = perf_session__process_events(session); - if (err) - return err; - - return 0; + ret = perf_session__process_events(session); out_delete: perf_session__delete(session); - return err; + return ret; } static int report_events(int argc, const char **argv, struct perf_mem *mem) From 642273795fa81da11290ffa90bce6ff242f2a7bb Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 1 Jul 2015 14:54:42 +0300 Subject: [PATCH 492/839] perf tools: Create config.detected into OUTPUT directory Create config.detected into OUTPUT directory instead of source directory. This fixes parallel builds that share the same source directory. Signed-off-by: Aaro Koskinen Acked-by: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1435751683-18500-1-git-send-email-aaro.koskinen@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.build | 2 +- tools/perf/Makefile.perf | 2 +- tools/perf/config/Makefile | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index a51244a8022f..faca2bf6a430 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -25,7 +25,7 @@ build-dir := $(srctree)/tools/build include $(build-dir)/Build.include # do not force detected configuration --include .config-detected +-include $(OUTPUT).config-detected # Init all relevant variables used in build files so # 1) they have correct type diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 1af0cfeb7a57..0e0938afbbb1 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -545,7 +545,7 @@ config-clean: clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete - $(Q)$(RM) .config-detected + $(Q)$(RM) $(OUTPUT).config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 317001c94660..094ddaee104c 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -11,9 +11,9 @@ ifneq ($(obj-perf),) obj-perf := $(abspath $(obj-perf))/ endif -$(shell echo -n > .config-detected) -detected = $(shell echo "$(1)=y" >> .config-detected) -detected_var = $(shell echo "$(1)=$($(1))" >> .config-detected) +$(shell echo -n > $(OUTPUT).config-detected) +detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) +detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) From 5ef7bbb09f7b91ef06524c72e1ab1fc48e0d6682 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 1 Jul 2015 14:54:43 +0300 Subject: [PATCH 493/839] perf tools: Allow to specify custom linker command Allow to specify custom linker command. This fixes MIPS64 builds for 64-bit userspace as it will allow to pass a linker using the correct linker flags for 64-bit ABI (by default GNU binutils ld will assume N32). Signed-off-by: Aaro Koskinen Acked-by: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1435751683-18500-2-git-send-email-aaro.koskinen@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 0e0938afbbb1..7a4b549214e3 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -110,7 +110,7 @@ $(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD $(Q)touch $(OUTPUT)PERF-VERSION-FILE CC = $(CROSS_COMPILE)gcc -LD = $(CROSS_COMPILE)ld +LD ?= $(CROSS_COMPILE)ld AR = $(CROSS_COMPILE)ar PKG_CONFIG = $(CROSS_COMPILE)pkg-config From 9026cae1c6d6c288dff0dc1d8350a949432819d8 Mon Sep 17 00:00:00 2001 From: Gabriele Mazzotta Date: Sat, 27 Jun 2015 15:22:05 +0200 Subject: [PATCH 494/839] hwmon: (dell-smm-hwmon) Use a valid name attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As per Documentation/hwmon/sysfs-interface, hwmon name attributes must not include '-', so replace 'dell-smm' with 'dell_smm'. Fixes: 039ae58503f3 ("hwmon: Allow to compile dell-smm-hwmon driver without /proc/i8k") Signed-off-by: Gabriele Mazzotta Acked-by: Pali Rohár Signed-off-by: Guenter Roeck --- drivers/hwmon/dell-smm-hwmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index 2a808822af21..37c16afe007a 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -777,7 +777,7 @@ static int __init i8k_init_hwmon(void) if (err >= 0) i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN2; - i8k_hwmon_dev = hwmon_device_register_with_groups(NULL, "dell-smm", + i8k_hwmon_dev = hwmon_device_register_with_groups(NULL, "dell_smm", NULL, i8k_groups); if (IS_ERR(i8k_hwmon_dev)) { err = PTR_ERR(i8k_hwmon_dev); From 56172d81a9bc37a69b95dd627b8d48135c9c7b31 Mon Sep 17 00:00:00 2001 From: Constantine Shulyupin Date: Fri, 26 Jun 2015 17:47:44 +0300 Subject: [PATCH 495/839] hwmon: (nct7802) fix visibility of temp3 Excerpt from datasheet: 7.2.32 Mode Selection Register RTD3_MD : 00=Closed , 01=Reserved , 10=Thermistor mode , 11=Voltage sense Show temp3 only in Thermistor mode Cc: stable@vger.kernel.org # v3.19+ Signed-off-by: Constantine Shulyupin Signed-off-by: Guenter Roeck --- drivers/hwmon/nct7802.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 55765790907b..28fcb2e246d5 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -547,7 +547,7 @@ static umode_t nct7802_temp_is_visible(struct kobject *kobj, if (index >= 9 && index < 18 && (reg & 0x0c) != 0x04 && (reg & 0x0c) != 0x08) /* RD2 */ return 0; - if (index >= 18 && index < 27 && (reg & 0x30) != 0x10) /* RD3 */ + if (index >= 18 && index < 27 && (reg & 0x30) != 0x20) /* RD3 */ return 0; if (index >= 27 && index < 35) /* local */ return attr->mode; From 347d7e45bd09ce09cbc30d5cea9de377eb22f55c Mon Sep 17 00:00:00 2001 From: "Stevens, Nick" Date: Wed, 1 Jul 2015 16:07:41 +0000 Subject: [PATCH 496/839] hwmon: (mcp3021) Fix broken output scaling The mcp3021 scaling code is dividing the VDD (full-scale) value in millivolts by the A2D resolution to obtain the scaling factor. When VDD is 3300mV (the standard value) and the resolution is 12-bit (4096 divisions), the result is a scale factor of 3300/4096, which is always one. Effectively, the raw A2D reading is always being returned because no scaling is applied. This patch fixes the issue and simplifies the register-to-volts calculation, removing the unneeded "output_scale" struct member. Signed-off-by: Nick Stevens Cc: stable@vger.kernel.org # v3.10+ [Guenter Roeck: Dropped unnecessary value check] Signed-off-by: Guenter Roeck --- drivers/hwmon/mcp3021.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/hwmon/mcp3021.c b/drivers/hwmon/mcp3021.c index d219c06a857b..972444a14cca 100644 --- a/drivers/hwmon/mcp3021.c +++ b/drivers/hwmon/mcp3021.c @@ -31,14 +31,11 @@ /* output format */ #define MCP3021_SAR_SHIFT 2 #define MCP3021_SAR_MASK 0x3ff - #define MCP3021_OUTPUT_RES 10 /* 10-bit resolution */ -#define MCP3021_OUTPUT_SCALE 4 #define MCP3221_SAR_SHIFT 0 #define MCP3221_SAR_MASK 0xfff #define MCP3221_OUTPUT_RES 12 /* 12-bit resolution */ -#define MCP3221_OUTPUT_SCALE 1 enum chips { mcp3021, @@ -54,7 +51,6 @@ struct mcp3021_data { u16 sar_shift; u16 sar_mask; u8 output_res; - u8 output_scale; }; static int mcp3021_read16(struct i2c_client *client) @@ -84,13 +80,7 @@ static int mcp3021_read16(struct i2c_client *client) static inline u16 volts_from_reg(struct mcp3021_data *data, u16 val) { - if (val == 0) - return 0; - - val = val * data->output_scale - data->output_scale / 2; - - return val * DIV_ROUND_CLOSEST(data->vdd, - (1 << data->output_res) * data->output_scale); + return DIV_ROUND_CLOSEST(data->vdd * val, 1 << data->output_res); } static ssize_t show_in_input(struct device *dev, struct device_attribute *attr, @@ -132,14 +122,12 @@ static int mcp3021_probe(struct i2c_client *client, data->sar_shift = MCP3021_SAR_SHIFT; data->sar_mask = MCP3021_SAR_MASK; data->output_res = MCP3021_OUTPUT_RES; - data->output_scale = MCP3021_OUTPUT_SCALE; break; case mcp3221: data->sar_shift = MCP3221_SAR_SHIFT; data->sar_mask = MCP3221_SAR_MASK; data->output_res = MCP3221_OUTPUT_RES; - data->output_scale = MCP3221_OUTPUT_SCALE; break; } From aca2a5d3a8fce1879627bc4ec9d180b74ae512e2 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 1 Jul 2015 14:43:04 +0800 Subject: [PATCH 497/839] ACPICA: Hardware: Enable 64-bit firmware waking vector for selected FACS ACPICA commit 7aa598d711644ab0de5f70ad88f1e2de253115e4 The following commit is reported to have broken s2ram on some platforms: Commit: 0249ed2444d65d65fc3f3f64f398f1ad0b7e54cd ACPICA: Add option to favor 32-bit FADT addresses. The platform reports 2 FACS tables (which is not allowed by ACPI specification) and the new 32-bit address favor rule forces OSPMs to use the FACS table reported via FADT's X_FIRMWARE_CTRL field. The root cause of the reported bug might be one of the followings: 1. BIOS may favor the 64-bit firmware waking vector address when the version of the FACS is greater than 0 and Linux currently only supports resuming from the real mode, so the 64-bit firmware waking vector has never been set and might be invalid to BIOS while the commit enables higher version FACS. 2. BIOS may favor the FACS reported via the "FIRMWARE_CTRL" field in the FADT while the commit doesn't set the firmware waking vector address of the FACS reported by "FIRMWARE_CTRL", it only sets the firware waking vector address of the FACS reported by "X_FIRMWARE_CTRL". This patch excludes the cases that can trigger the bugs caused by the root cause 1. ACPI specification says: A. 32-bit FACS address (FIRMWARE_CTRL field in FADT): Physical memory address of the FACS, where OSPM and firmware exchange control information. If the X_FIRMWARE_CTRL field contains a non zero value then this field must be zero. A zero value indicates that no FACS is specified by this field. B. 64-bit FACS address (X_FIRMWARE_CTRL field in FADT): 64bit physical memory address of the FACS. This field is used when the physical address of the FACS is above 4GB. If the FIRMWARE_CTRL field contains a non zero value then this field must be zero. A zero value indicates that no FACS is specified by this field. Thus the 32bit and 64bit firmware waking vector should indicate completely different resuming environment - real mode (1MB addressable) and non real mode (4GB+ addressable) and currently Linux only supports resuming from real mode. This patch enables 64-bit firmware waking vector for selected FACS via new acpi_set_firmware_waking_vectors() API so that it's up to OSPMs to determine which resuming mode should be used by BIOS and ACPICA changes won't trigger the bugs caused by the root cause 1. Lv Zheng. Link: https://bugzilla.kernel.org/show_bug.cgi?id=74021 Link: https://github.com/acpica/acpica/commit/7aa598d7 Reported-and-tested-by: Oswald Buddenhagen Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/hwxfsleep.c | 71 ++++++++++++++++++++++++--------- include/acpi/acpixf.h | 8 +++- 2 files changed, 59 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/acpica/hwxfsleep.c b/drivers/acpi/acpica/hwxfsleep.c index 82e310b94ae4..25cd9783b640 100644 --- a/drivers/acpi/acpica/hwxfsleep.c +++ b/drivers/acpi/acpica/hwxfsleep.c @@ -72,6 +72,7 @@ static struct acpi_sleep_functions acpi_sleep_dispatch[] = { /* * These functions are removed for the ACPI_REDUCED_HARDWARE case: + * acpi_set_firmware_waking_vectors * acpi_set_firmware_waking_vector * acpi_set_firmware_waking_vector64 * acpi_enter_sleep_state_s4bios @@ -80,20 +81,24 @@ static struct acpi_sleep_functions acpi_sleep_dispatch[] = { #if (!ACPI_REDUCED_HARDWARE) /******************************************************************************* * - * FUNCTION: acpi_set_firmware_waking_vector + * FUNCTION: acpi_set_firmware_waking_vectors * * PARAMETERS: physical_address - 32-bit physical address of ACPI real mode * entry point. + * physical_address64 - 64-bit physical address of ACPI protected + * mode entry point. * * RETURN: Status * - * DESCRIPTION: Sets the 32-bit firmware_waking_vector field of the FACS + * DESCRIPTION: Sets the firmware_waking_vector fields of the FACS * ******************************************************************************/ -acpi_status acpi_set_firmware_waking_vector(u32 physical_address) +acpi_status +acpi_set_firmware_waking_vectors(acpi_physical_address physical_address, + acpi_physical_address physical_address64) { - ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector); + ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vectors); /* @@ -106,17 +111,51 @@ acpi_status acpi_set_firmware_waking_vector(u32 physical_address) /* Set the 32-bit vector */ - acpi_gbl_FACS->firmware_waking_vector = physical_address; + acpi_gbl_FACS->firmware_waking_vector = (u32)physical_address; - /* Clear the 64-bit vector if it exists */ + if (acpi_gbl_FACS->length > 32) { + if (acpi_gbl_FACS->version >= 1) { - if ((acpi_gbl_FACS->length > 32) && (acpi_gbl_FACS->version >= 1)) { - acpi_gbl_FACS->xfirmware_waking_vector = 0; + /* Set the 64-bit vector */ + + acpi_gbl_FACS->xfirmware_waking_vector = + physical_address64; + } else { + /* Clear the 64-bit vector if it exists */ + + acpi_gbl_FACS->xfirmware_waking_vector = 0; + } } return_ACPI_STATUS(AE_OK); } +ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vectors) + +/******************************************************************************* + * + * FUNCTION: acpi_set_firmware_waking_vector + * + * PARAMETERS: physical_address - 32-bit physical address of ACPI real mode + * entry point. + * + * RETURN: Status + * + * DESCRIPTION: Sets the 32-bit firmware_waking_vector field of the FACS + * + ******************************************************************************/ +acpi_status acpi_set_firmware_waking_vector(u32 physical_address) +{ + acpi_status status; + + ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector); + + status = acpi_set_firmware_waking_vectors((acpi_physical_address) + physical_address, 0); + + return_ACPI_STATUS(status); +} + ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector) #if ACPI_MACHINE_WIDTH == 64 @@ -136,19 +175,15 @@ ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector) ******************************************************************************/ acpi_status acpi_set_firmware_waking_vector64(u64 physical_address) { + acpi_status status; + ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector64); - /* Determine if the 64-bit vector actually exists */ + status = acpi_set_firmware_waking_vectors(0, + (acpi_physical_address) + physical_address); - if ((acpi_gbl_FACS->length <= 32) || (acpi_gbl_FACS->version < 1)) { - return_ACPI_STATUS(AE_NOT_EXIST); - } - - /* Clear 32-bit vector, set the 64-bit X_ vector */ - - acpi_gbl_FACS->firmware_waking_vector = 0; - acpi_gbl_FACS->xfirmware_waking_vector = physical_address; - return_ACPI_STATUS(AE_OK); + return_ACPI_STATUS(status); } ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector64) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index d68f1cd39c49..821095ce5af7 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -814,8 +814,12 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_leave_sleep_state(u8 sleep_state)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status - acpi_set_firmware_waking_vector(u32 - physical_address)) + acpi_set_firmware_waking_vectors + (acpi_physical_address physical_address, + acpi_physical_address physical_address64)) +ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status + acpi_set_firmware_waking_vector(u32 + physical_address)) #if ACPI_MACHINE_WIDTH == 64 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_set_firmware_waking_vector64(u64 From c04e1fb4396d27f18296db0f914760fa7fe8223a Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 1 Jul 2015 14:43:11 +0800 Subject: [PATCH 498/839] ACPICA: Tables: Enable both 32-bit and 64-bit FACS ACPICA commit f7b86f35416e3d1f71c3d816ff5075ddd33ed486 The following commit is reported to have broken s2ram on some platforms: Commit: 0249ed2444d65d65fc3f3f64f398f1ad0b7e54cd ACPICA: Add option to favor 32-bit FADT addresses. The platform reports 2 FACS tables (which is not allowed by ACPI specification) and the new 32-bit address favor rule forces OSPMs to use the FACS table reported via FADT's X_FIRMWARE_CTRL field. The root cause of the reported bug might be one of the followings: 1. BIOS may favor the 64-bit firmware waking vector address when the version of the FACS is greater than 0 and Linux currently only supports resuming from the real mode, so the 64-bit firmware waking vector has never been set and might be invalid to BIOS while the commit enables higher version FACS. 2. BIOS may favor the FACS reported via the "FIRMWARE_CTRL" field in the FADT while the commit doesn't set the firmware waking vector address of the FACS reported by "FIRMWARE_CTRL", it only sets the firware waking vector address of the FACS reported by "X_FIRMWARE_CTRL". This patch excludes the cases that can trigger the bugs caused by the root cause 2. There is no handshaking mechanism can be used by OSPM to tell BIOS which FACS is currently used. Thus the FACS reported by "FIRMWARE_CTRL" may still be used by BIOS and the 0 value of the 32-bit firmware waking vector might trigger such failure. This patch tries to favor 32bit FACS address in another way where both the FACS reported by "FIRMWARE_CTRL" and the FACS reported by "X_FIRMWARE_CTRL" are loaded so that further commit can set firmware waking vector in the both tables to ensure we can exclude the cases that trigger the bugs caused by the root cause 2. The exclusion is split into 2 commits as this commit is also useful for dumping more ACPI tables, it won't get reverted when such exclusion is no longer necessary. Lv Zheng. Link: https://bugzilla.kernel.org/show_bug.cgi?id=74021 Link: https://github.com/acpica/acpica/commit/f7b86f35 Cc: 3.14.1+ # 3.14.1+ Reported-and-tested-by: Oswald Buddenhagen Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/aclocal.h | 1 + drivers/acpi/acpica/tbfadt.c | 21 +++++++++++++-------- drivers/acpi/acpica/tbutils.c | 34 +++++++++++++++++++++++----------- drivers/acpi/acpica/tbxfload.c | 3 ++- include/acpi/acpixf.h | 9 +++++++++ 5 files changed, 48 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index ffdb956391f6..bc600969c6a1 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -213,6 +213,7 @@ struct acpi_table_list { #define ACPI_TABLE_INDEX_DSDT (0) #define ACPI_TABLE_INDEX_FACS (1) +#define ACPI_TABLE_INDEX_X_FACS (2) struct acpi_find_context { char *search_for; diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c index 7d2486005e3f..05be59c772c7 100644 --- a/drivers/acpi/acpica/tbfadt.c +++ b/drivers/acpi/acpica/tbfadt.c @@ -350,9 +350,18 @@ void acpi_tb_parse_fadt(u32 table_index) /* If Hardware Reduced flag is set, there is no FACS */ if (!acpi_gbl_reduced_hardware) { - acpi_tb_install_fixed_table((acpi_physical_address) - acpi_gbl_FADT.Xfacs, ACPI_SIG_FACS, - ACPI_TABLE_INDEX_FACS); + if (acpi_gbl_FADT.facs) { + acpi_tb_install_fixed_table((acpi_physical_address) + acpi_gbl_FADT.facs, + ACPI_SIG_FACS, + ACPI_TABLE_INDEX_FACS); + } + if (acpi_gbl_FADT.Xfacs) { + acpi_tb_install_fixed_table((acpi_physical_address) + acpi_gbl_FADT.Xfacs, + ACPI_SIG_FACS, + ACPI_TABLE_INDEX_X_FACS); + } } } @@ -491,13 +500,9 @@ static void acpi_tb_convert_fadt(void) acpi_gbl_FADT.header.length = sizeof(struct acpi_table_fadt); /* - * Expand the 32-bit FACS and DSDT addresses to 64-bit as necessary. + * Expand the 32-bit DSDT addresses to 64-bit as necessary. * Later ACPICA code will always use the X 64-bit field. */ - acpi_gbl_FADT.Xfacs = acpi_tb_select_address("FACS", - acpi_gbl_FADT.facs, - acpi_gbl_FADT.Xfacs); - acpi_gbl_FADT.Xdsdt = acpi_tb_select_address("DSDT", acpi_gbl_FADT.dsdt, acpi_gbl_FADT.Xdsdt); diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c index 6559a58439c5..2fb1afaacc6d 100644 --- a/drivers/acpi/acpica/tbutils.c +++ b/drivers/acpi/acpica/tbutils.c @@ -68,7 +68,8 @@ acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size); acpi_status acpi_tb_initialize_facs(void) { - acpi_status status; + struct acpi_table_facs *facs32; + struct acpi_table_facs *facs64; /* If Hardware Reduced flag is set, there is no FACS */ @@ -77,11 +78,22 @@ acpi_status acpi_tb_initialize_facs(void) return (AE_OK); } - status = acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, - ACPI_CAST_INDIRECT_PTR(struct - acpi_table_header, - &acpi_gbl_FACS)); - return (status); + (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, + ACPI_CAST_INDIRECT_PTR(struct + acpi_table_header, + &facs32)); + (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_X_FACS, + ACPI_CAST_INDIRECT_PTR(struct + acpi_table_header, + &facs64)); + + if (acpi_gbl_use32_bit_facs_addresses) { + acpi_gbl_FACS = facs32 ? facs32 : facs64; + } else { + acpi_gbl_FACS = facs64 ? facs64 : facs32; + } + + return (AE_OK); } #endif /* !ACPI_REDUCED_HARDWARE */ @@ -101,7 +113,7 @@ acpi_status acpi_tb_initialize_facs(void) u8 acpi_tb_tables_loaded(void) { - if (acpi_gbl_root_table_list.current_table_count >= 3) { + if (acpi_gbl_root_table_list.current_table_count >= 4) { return (TRUE); } @@ -357,11 +369,11 @@ acpi_status __init acpi_tb_parse_root_table(acpi_physical_address rsdp_address) table_entry = ACPI_ADD_PTR(u8, table, sizeof(struct acpi_table_header)); /* - * First two entries in the table array are reserved for the DSDT - * and FACS, which are not actually present in the RSDT/XSDT - they - * come from the FADT + * First three entries in the table array are reserved for the DSDT + * and 32bit/64bit FACS, which are not actually present in the + * RSDT/XSDT - they come from the FADT */ - acpi_gbl_root_table_list.current_table_count = 2; + acpi_gbl_root_table_list.current_table_count = 3; /* Initialize the root table array from the RSDT/XSDT */ diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c index aadb3002a2dd..b63e35d6d1bf 100644 --- a/drivers/acpi/acpica/tbxfload.c +++ b/drivers/acpi/acpica/tbxfload.c @@ -166,7 +166,8 @@ static acpi_status acpi_tb_load_namespace(void) (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) { - if ((!ACPI_COMPARE_NAME + if (!acpi_gbl_root_table_list.tables[i].address || + (!ACPI_COMPARE_NAME (&(acpi_gbl_root_table_list.tables[i].signature), ACPI_SIG_SSDT) && diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 821095ce5af7..445fd1e1688c 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -199,6 +199,15 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_do_not_use_xsdt, FALSE); */ ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_fadt_addresses, TRUE); +/* + * Optionally use 32-bit FACS table addresses. + * It is reported that some platforms fail to resume from system suspending + * if 64-bit FACS table address is selected: + * https://bugzilla.kernel.org/show_bug.cgi?id=74021 + * Default is TRUE, favor the 32-bit addresses. + */ +ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_facs_addresses, TRUE); + /* * Optionally truncate I/O addresses to 16 bits. Provides compatibility * with other ACPI implementations. NOTE: During ACPICA initialization, From f06147f9fbf134c0258bd53aebeb4ed5efea5a25 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 1 Jul 2015 14:43:18 +0800 Subject: [PATCH 499/839] ACPICA: Hardware: Enable firmware waking vector for both 32-bit and 64-bit FACS ACPICA commit 368eb60778b27b6ae94d3658ddc902ca1342a963 ACPICA commit 70f62a80d65515e1285fdeeb50d94ee6f07df4bd ACPICA commit a04dbfa308a48ab0b2d10519c54a6c533c5c8949 ACPICA commit ebd544ed24c5a4faba11f265e228b7a821a729f5 The following commit is reported to have broken s2ram on some platforms: Commit: 0249ed2444d65d65fc3f3f64f398f1ad0b7e54cd ACPICA: Add option to favor 32-bit FADT addresses. The platform reports 2 FACS tables (which is not allowed by ACPI specification) and the new 32-bit address favor rule forces OSPMs to use the FACS table reported via FADT's X_FIRMWARE_CTRL field. The root cause of the reported bug might be one of the followings: 1. BIOS may favor the 64-bit firmware waking vector address when the version of the FACS is greater than 0 and Linux currently only supports resuming from the real mode, so the 64-bit firmware waking vector has never been set and might be invalid to BIOS while the commit enables higher version FACS. 2. BIOS may favor the FACS reported via the "FIRMWARE_CTRL" field in the FADT while the commit doesn't set the firmware waking vector address of the FACS reported by "FIRMWARE_CTRL", it only sets the firware waking vector address of the FACS reported by "X_FIRMWARE_CTRL". This patch excludes the cases that can trigger the bugs caused by the root cause 2. There is no handshaking mechanism can be used by OSPM to tell BIOS which FACS is currently used. Thus the FACS reported by "FIRMWARE_CTRL" may still be used by BIOS and the 0 value of the 32-bit firmware waking vector might trigger such failure. This patch enables the firmware waking vectors for both 32bit/64bit FACS tables in order to ensure we can exclude the cases that trigger the bugs caused by the root cause 2. The exclusion is split into 2 commits so that if it turns out not to be necessary, this single commit can be reverted without affecting the useful one. Lv Zheng, Bob Moore. Link: https://bugzilla.kernel.org/show_bug.cgi?id=74021 Link: https://github.com/acpica/acpica/commit/368eb607 Link: https://github.com/acpica/acpica/commit/70f62a80 Link: https://github.com/acpica/acpica/commit/a04dbfa3 Link: https://github.com/acpica/acpica/commit/ebd544ed Reported-and-tested-by: Oswald Buddenhagen Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acglobal.h | 2 + drivers/acpi/acpica/hwxfsleep.c | 96 +++++++++++++++++++++++++-------- drivers/acpi/acpica/tbutils.c | 17 +++--- 3 files changed, 84 insertions(+), 31 deletions(-) diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index a0c478784314..53f96a370762 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -61,6 +61,8 @@ ACPI_GLOBAL(struct acpi_table_header, acpi_gbl_original_dsdt_header); #if (!ACPI_REDUCED_HARDWARE) ACPI_GLOBAL(struct acpi_table_facs *, acpi_gbl_FACS); +ACPI_GLOBAL(struct acpi_table_facs *, acpi_gbl_facs32); +ACPI_GLOBAL(struct acpi_table_facs *, acpi_gbl_facs64); #endif /* !ACPI_REDUCED_HARDWARE */ diff --git a/drivers/acpi/acpica/hwxfsleep.c b/drivers/acpi/acpica/hwxfsleep.c index 25cd9783b640..52dfd0d050fa 100644 --- a/drivers/acpi/acpica/hwxfsleep.c +++ b/drivers/acpi/acpica/hwxfsleep.c @@ -50,6 +50,13 @@ ACPI_MODULE_NAME("hwxfsleep") /* Local prototypes */ +#if (!ACPI_REDUCED_HARDWARE) +static acpi_status +acpi_hw_set_firmware_waking_vectors(struct acpi_table_facs *facs, + acpi_physical_address physical_address, + acpi_physical_address physical_address64); +#endif + static acpi_status acpi_hw_sleep_dispatch(u8 sleep_state, u32 function_id); /* @@ -79,6 +86,58 @@ static struct acpi_sleep_functions acpi_sleep_dispatch[] = { */ #if (!ACPI_REDUCED_HARDWARE) +/******************************************************************************* + * + * FUNCTION: acpi_hw_set_firmware_waking_vectors + * + * PARAMETERS: facs - Pointer to FACS table + * physical_address - 32-bit physical address of ACPI real mode + * entry point. + * physical_address64 - 64-bit physical address of ACPI protected + * mode entry point. + * + * RETURN: Status + * + * DESCRIPTION: Sets the firmware_waking_vector fields of the FACS + * + ******************************************************************************/ + +static acpi_status +acpi_hw_set_firmware_waking_vectors(struct acpi_table_facs *facs, + acpi_physical_address physical_address, + acpi_physical_address physical_address64) +{ + ACPI_FUNCTION_TRACE(acpi_hw_set_firmware_waking_vectors); + + + /* + * According to the ACPI specification 2.0c and later, the 64-bit + * waking vector should be cleared and the 32-bit waking vector should + * be used, unless we want the wake-up code to be called by the BIOS in + * Protected Mode. Some systems (for example HP dv5-1004nr) are known + * to fail to resume if the 64-bit vector is used. + */ + + /* Set the 32-bit vector */ + + facs->firmware_waking_vector = (u32)physical_address; + + if (facs->length > 32) { + if (facs->version >= 1) { + + /* Set the 64-bit vector */ + + facs->xfirmware_waking_vector = physical_address64; + } else { + /* Clear the 64-bit vector if it exists */ + + facs->xfirmware_waking_vector = 0; + } + } + + return_ACPI_STATUS(AE_OK); +} + /******************************************************************************* * * FUNCTION: acpi_set_firmware_waking_vectors @@ -98,33 +157,24 @@ acpi_status acpi_set_firmware_waking_vectors(acpi_physical_address physical_address, acpi_physical_address physical_address64) { + ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vectors); + /* If Hardware Reduced flag is set, there is no FACS */ - /* - * According to the ACPI specification 2.0c and later, the 64-bit - * waking vector should be cleared and the 32-bit waking vector should - * be used, unless we want the wake-up code to be called by the BIOS in - * Protected Mode. Some systems (for example HP dv5-1004nr) are known - * to fail to resume if the 64-bit vector is used. - */ + if (acpi_gbl_reduced_hardware) { + return_ACPI_STATUS (AE_OK); + } - /* Set the 32-bit vector */ - - acpi_gbl_FACS->firmware_waking_vector = (u32)physical_address; - - if (acpi_gbl_FACS->length > 32) { - if (acpi_gbl_FACS->version >= 1) { - - /* Set the 64-bit vector */ - - acpi_gbl_FACS->xfirmware_waking_vector = - physical_address64; - } else { - /* Clear the 64-bit vector if it exists */ - - acpi_gbl_FACS->xfirmware_waking_vector = 0; - } + if (acpi_gbl_facs32) { + (void)acpi_hw_set_firmware_waking_vectors(acpi_gbl_facs32, + physical_address, + physical_address64); + } + if (acpi_gbl_facs64) { + (void)acpi_hw_set_firmware_waking_vectors(acpi_gbl_facs64, + physical_address, + physical_address64); } return_ACPI_STATUS(AE_OK); diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c index 2fb1afaacc6d..2bb6a117c840 100644 --- a/drivers/acpi/acpica/tbutils.c +++ b/drivers/acpi/acpica/tbutils.c @@ -68,8 +68,6 @@ acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size); acpi_status acpi_tb_initialize_facs(void) { - struct acpi_table_facs *facs32; - struct acpi_table_facs *facs64; /* If Hardware Reduced flag is set, there is no FACS */ @@ -81,18 +79,21 @@ acpi_status acpi_tb_initialize_facs(void) (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, ACPI_CAST_INDIRECT_PTR(struct acpi_table_header, - &facs32)); + &acpi_gbl_facs32)); (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_X_FACS, ACPI_CAST_INDIRECT_PTR(struct acpi_table_header, - &facs64)); + &acpi_gbl_facs64)); - if (acpi_gbl_use32_bit_facs_addresses) { - acpi_gbl_FACS = facs32 ? facs32 : facs64; - } else { - acpi_gbl_FACS = facs64 ? facs64 : facs32; + if (acpi_gbl_facs64 + && (!acpi_gbl_facs32 || !acpi_gbl_use32_bit_facs_addresses)) { + acpi_gbl_FACS = acpi_gbl_facs64; + } else if (acpi_gbl_facs32) { + acpi_gbl_FACS = acpi_gbl_facs32; } + /* If there is no FACS, just continue. There was already an error msg */ + return (AE_OK); } #endif /* !ACPI_REDUCED_HARDWARE */ From c04be18448355441a0c424362df65b6422e27bda Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 1 Jul 2015 14:43:26 +0800 Subject: [PATCH 500/839] ACPICA: Tables: Fix an issue that FACS initialization is performed twice ACPICA commit 90f5332a15e9d9ba83831ca700b2b9f708274658 This patch adds a new FACS initialization flag for acpi_tb_initialize(). acpi_enable_subsystem() might be invoked several times in OS bootup process, and we don't want FACS initialization to be invoked twice. Lv Zheng. Link: https://github.com/acpica/acpica/commit/90f5332a Cc: All applicable # All applicable Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/utxfinit.c | 10 ++++++---- include/acpi/actypes.h | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/utxfinit.c b/drivers/acpi/acpica/utxfinit.c index 083a76891889..42a32a66ef22 100644 --- a/drivers/acpi/acpica/utxfinit.c +++ b/drivers/acpi/acpica/utxfinit.c @@ -179,10 +179,12 @@ acpi_status __init acpi_enable_subsystem(u32 flags) * Obtain a permanent mapping for the FACS. This is required for the * Global Lock and the Firmware Waking Vector */ - status = acpi_tb_initialize_facs(); - if (ACPI_FAILURE(status)) { - ACPI_WARNING((AE_INFO, "Could not map the FACS table")); - return_ACPI_STATUS(status); + if (!(flags & ACPI_NO_FACS_INIT)) { + status = acpi_tb_initialize_facs(); + if (ACPI_FAILURE(status)) { + ACPI_WARNING((AE_INFO, "Could not map the FACS table")); + return_ACPI_STATUS(status); + } } #endif /* !ACPI_REDUCED_HARDWARE */ diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 63fd7f5e9fb3..ff0b53e128ee 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -568,6 +568,7 @@ typedef u64 acpi_integer; #define ACPI_NO_ACPI_ENABLE 0x10 #define ACPI_NO_DEVICE_INIT 0x20 #define ACPI_NO_OBJECT_INIT 0x40 +#define ACPI_NO_FACS_INIT 0x80 /* * Initialization state From 0ea61381788a37d864f9841b0fe97d40f7058f3b Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 1 Jul 2015 14:43:34 +0800 Subject: [PATCH 501/839] ACPICA: Tables: Enable default 64-bit FADT addresses favor ACPICA commit 4da56eeae0749dfe8491285c1e1fad48f6efafd8 The following commit temporarily disables correct 64-bit FADT addresses favor during the period the root cause of the bug is not fixed: Commit: 85dbd5801f62b66e2aa7826aaefcaebead44c8a6 ACPICA: Tables: Restore old behavor to favor 32-bit FADT addresses. With enough protections, this patch re-enables 64-bit FADT addresses by default. If regressions are reported against such change, this patch should be bisected and reverted. Note that 64-bit FACS favor and 64-bit firmware waking vector favor are excluded by this commit in order not to break OSPMs. Lv Zheng. Link: https://bugzilla.kernel.org/show_bug.cgi?id=74021 Link: https://github.com/acpica/acpica/commit/4da56eea Cc: 3.15.1+ # 3.15.1+ Reported-and-tested-by: Oswald Buddenhagen Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 445fd1e1688c..213ed7ecfd8d 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -195,9 +195,9 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_do_not_use_xsdt, FALSE); * address. Although ACPICA adheres to the ACPI specification which * requires the use of the corresponding 64-bit address if it is non-zero, * some machines have been found to have a corrupted non-zero 64-bit - * address. Default is TRUE, favor the 32-bit addresses. + * address. Default is FALSE, do not favor the 32-bit addresses. */ -ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_fadt_addresses, TRUE); +ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_fadt_addresses, FALSE); /* * Optionally use 32-bit FACS table addresses. From 7b09d8fdede65ea0ff4dc79654d10856f0aff0b8 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 1 Jul 2015 14:43:43 +0800 Subject: [PATCH 502/839] ACPICA: MSVC6: Fix build issue for variable argument macros ACPICA commit 72f5a358f28c5d154ed613c142c7dca03192c5ee This patch intoduces generic variable macro detection support and fixes build breakage issue with macros using __VA_ARGS__ feature defined in C99. This patch fixes this build issue. Lv Zheng. This patch doesn't affect Linux kernel. Link: https://github.com/acpica/acpica/commit/72f5a358 Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/acoutput.h | 13 +++++++++++-- include/acpi/platform/acgcc.h | 4 ++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h index a8f344363e77..f56de8c5d844 100644 --- a/include/acpi/acoutput.h +++ b/include/acpi/acoutput.h @@ -294,8 +294,12 @@ /* DEBUG_PRINT functions */ -#define ACPI_DEBUG_PRINT(plist) ACPI_ACTUAL_DEBUG plist -#define ACPI_DEBUG_PRINT_RAW(plist) ACPI_ACTUAL_DEBUG_RAW plist +#ifndef COMPILER_VA_MACRO + +#define ACPI_DEBUG_PRINT(plist) acpi_debug_print plist +#define ACPI_DEBUG_PRINT_RAW(plist) acpi_debug_print_raw plist + +#else /* Helper macros for DEBUG_PRINT */ @@ -315,6 +319,11 @@ ACPI_DO_DEBUG_PRINT (acpi_debug_print_raw, level, line, \ filename, modulename, component, __VA_ARGS__) +#define ACPI_DEBUG_PRINT(plist) ACPI_ACTUAL_DEBUG plist +#define ACPI_DEBUG_PRINT_RAW(plist) ACPI_ACTUAL_DEBUG_RAW plist + +#endif + /* * Function entry tracing * diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h index f54de0a63558..5457a06cb528 100644 --- a/include/acpi/platform/acgcc.h +++ b/include/acpi/platform/acgcc.h @@ -75,4 +75,8 @@ #undef strchr #endif +/* GCC supports __VA_ARGS__ in macros */ + +#define COMPILER_VA_MACRO 1 + #endif /* __ACGCC_H__ */ From 07cf6ce595882c71cc5bd6c551820398b9e7f613 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 1 Jul 2015 14:43:50 +0800 Subject: [PATCH 503/839] ACPICA: EFI: Add EFI interface definitions to eliminate dependency of GNU EFI ACPICA commit 5d00e67a74542d030f0a55e7a947a020ef0d9693 This patch copies EFI interface definitions to the ACPICA code base so that the EFI utility support can be ported to other EFI implementation. Known issues: 1. MS Builds of uefi_call_wrapper() The uefi_call_wrapper() in GNU EFI is implemented in a the way to work around the ABI difference between Unix and MS. While I don't have environment to test the MS builds. In order to port the ACPICA utilities to other EFI implementation, all that need to be done is to impelement the 64-bit division support and the program entry point where the efi_main() is invoked. Code to impelement these is platform specific, and ACPICA currently choose to hide such platform specific code within the specific EFI impelementation. Lv Zheng. This patch doesn't affect Linux kernel. Link: https://github.com/acpica/acpica/commit/5d00e67a Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/platform/acenvex.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/acpi/platform/acenvex.h b/include/acpi/platform/acenvex.h index 14dc6f68ca18..0a7dc8e583b1 100644 --- a/include/acpi/platform/acenvex.h +++ b/include/acpi/platform/acenvex.h @@ -56,6 +56,12 @@ #if defined(_LINUX) || defined(__linux__) #include +#elif defined(_AED_EFI) +#include "acefiex.h" + +#elif defined(_GNU_EFI) +#include "acefiex.h" + #elif defined(__DragonFly__) #include "acdragonflyex.h" From cbc823405a48c93a6e98bb97ccdcb23ae034ab93 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Wed, 1 Jul 2015 14:43:57 +0800 Subject: [PATCH 504/839] ACPICA: Add dragon_fly support to unix file mapping file ACPICA commit 795b215d6fd062386f0a1c23dff9ffa244683c4f ACPICA BZ 1130 This patch doesn't affect Linux kernel. Link: https://bugs.acpica.org/show_bug.cgi?id=1130 Link: https://github.com/acpica/acpica/commit/795b215d Signed-off-by: Sascha Wildner Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- tools/power/acpi/os_specific/service_layers/osunixmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index 0b1fa290245a..44ad4889d468 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -54,7 +54,7 @@ ACPI_MODULE_NAME("osunixmap") #ifndef O_BINARY #define O_BINARY 0 #endif -#ifdef _free_BSD +#if defined(_dragon_fly) || defined(_free_BSD) #define MMAP_FLAGS MAP_SHARED #else #define MMAP_FLAGS MAP_PRIVATE From f65358e5724890ef473d28762f6de0f3284b146e Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 1 Jul 2015 14:44:04 +0800 Subject: [PATCH 505/839] ACPICA: Utilities: Add _CLS processing ACPICA commit 9a2b638acb3a7215209432e070c6bd0312374229 ACPI Device object often contains a _CLS object to supply PCI-defined class code for the device. This patch introduces logic to process the _CLS object. Suravee Suthikulpanit, Lv Zheng. Link: https://github.com/acpica/acpica/commit/9a2b638a Acked-by: Mika Westerberg Reviewed-by: Hanjun Guo Signed-off-by: Suravee Suthikulpanit Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acinterp.h | 2 + drivers/acpi/acpica/acutils.h | 4 ++ drivers/acpi/acpica/exutils.c | 32 ++++++++++++ drivers/acpi/acpica/nsxfname.c | 23 +++++++-- drivers/acpi/acpica/utids.c | 91 +++++++++++++++++++++++++++++++++- include/acpi/acnames.h | 1 + include/acpi/actypes.h | 24 +++++---- 7 files changed, 164 insertions(+), 13 deletions(-) diff --git a/drivers/acpi/acpica/acinterp.h b/drivers/acpi/acpica/acinterp.h index 1886bde54b5d..7ac98000b46b 100644 --- a/drivers/acpi/acpica/acinterp.h +++ b/drivers/acpi/acpica/acinterp.h @@ -468,6 +468,8 @@ void acpi_ex_eisa_id_to_string(char *dest, u64 compressed_id); void acpi_ex_integer_to_string(char *dest, u64 value); +void acpi_ex_pci_cls_to_string(char *dest, u8 class_code[3]); + u8 acpi_is_valid_space_id(u8 space_id); /* diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h index d49f5c7a20d9..6391c9706e84 100644 --- a/drivers/acpi/acpica/acutils.h +++ b/drivers/acpi/acpica/acutils.h @@ -430,6 +430,10 @@ acpi_status acpi_ut_execute_CID(struct acpi_namespace_node *device_node, struct acpi_pnp_device_id_list ** return_cid_list); +acpi_status +acpi_ut_execute_CLS(struct acpi_namespace_node *device_node, + struct acpi_pnp_device_id **return_id); + /* * utlock - reader/writer locks */ diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c index 3f4225e95d93..30c3f464fda5 100644 --- a/drivers/acpi/acpica/exutils.c +++ b/drivers/acpi/acpica/exutils.c @@ -378,6 +378,38 @@ void acpi_ex_integer_to_string(char *out_string, u64 value) } } +/******************************************************************************* + * + * FUNCTION: acpi_ex_pci_cls_to_string + * + * PARAMETERS: out_string - Where to put the converted string (7 bytes) + * PARAMETERS: class_code - PCI class code to be converted (3 bytes) + * + * RETURN: None + * + * DESCRIPTION: Convert 3-bytes PCI class code to string representation. + * Return buffer must be large enough to hold the string. The + * string returned is always exactly of length + * ACPI_PCICLS_STRING_SIZE (includes null terminator). + * + ******************************************************************************/ + +void acpi_ex_pci_cls_to_string(char *out_string, u8 class_code[3]) +{ + + ACPI_FUNCTION_ENTRY(); + + /* All 3 bytes are hexadecimal */ + + out_string[0] = acpi_ut_hex_to_ascii_char((u64)class_code[0], 4); + out_string[1] = acpi_ut_hex_to_ascii_char((u64)class_code[0], 0); + out_string[2] = acpi_ut_hex_to_ascii_char((u64)class_code[1], 4); + out_string[3] = acpi_ut_hex_to_ascii_char((u64)class_code[1], 0); + out_string[4] = acpi_ut_hex_to_ascii_char((u64)class_code[2], 4); + out_string[5] = acpi_ut_hex_to_ascii_char((u64)class_code[2], 0); + out_string[6] = 0; +} + /******************************************************************************* * * FUNCTION: acpi_is_valid_space_id diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c index d66c326485d8..dc0836a9cf17 100644 --- a/drivers/acpi/acpica/nsxfname.c +++ b/drivers/acpi/acpica/nsxfname.c @@ -260,7 +260,7 @@ static char *acpi_ns_copy_device_id(struct acpi_pnp_device_id *dest, * control methods (Such as in the case of a device.) * * For Device and Processor objects, run the Device _HID, _UID, _CID, _SUB, - * _STA, _ADR, _sx_w, and _sx_d methods. + * _CLS, _STA, _ADR, _sx_w, and _sx_d methods. * * Note: Allocates the return buffer, must be freed by the caller. * @@ -276,11 +276,12 @@ acpi_get_object_info(acpi_handle handle, struct acpi_pnp_device_id *hid = NULL; struct acpi_pnp_device_id *uid = NULL; struct acpi_pnp_device_id *sub = NULL; + struct acpi_pnp_device_id *cls = NULL; char *next_id_string; acpi_object_type type; acpi_name name; u8 param_count = 0; - u8 valid = 0; + u16 valid = 0; u32 info_size; u32 i; acpi_status status; @@ -320,7 +321,7 @@ acpi_get_object_info(acpi_handle handle, if ((type == ACPI_TYPE_DEVICE) || (type == ACPI_TYPE_PROCESSOR)) { /* * Get extra info for ACPI Device/Processor objects only: - * Run the Device _HID, _UID, _SUB, and _CID methods. + * Run the Device _HID, _UID, _SUB, _CID, and _CLS methods. * * Note: none of these methods are required, so they may or may * not be present for this device. The Info->Valid bitfield is used @@ -363,6 +364,14 @@ acpi_get_object_info(acpi_handle handle, sizeof(struct acpi_pnp_device_id_list)); valid |= ACPI_VALID_CID; } + + /* Execute the Device._CLS method */ + + status = acpi_ut_execute_CLS(node, &cls); + if (ACPI_SUCCESS(status)) { + info_size += cls->length; + valid |= ACPI_VALID_CLS; + } } /* @@ -486,6 +495,11 @@ acpi_get_object_info(acpi_handle handle, } } + if (cls) { + next_id_string = acpi_ns_copy_device_id(&info->class_code, + cls, next_id_string); + } + /* Copy the fixed-length data */ info->info_size = info_size; @@ -510,6 +524,9 @@ cleanup: if (cid_list) { ACPI_FREE(cid_list); } + if (cls) { + ACPI_FREE(cls); + } return (status); } diff --git a/drivers/acpi/acpica/utids.c b/drivers/acpi/acpica/utids.c index 27431cfc1c44..3afe07f19b9f 100644 --- a/drivers/acpi/acpica/utids.c +++ b/drivers/acpi/acpica/utids.c @@ -1,6 +1,6 @@ /****************************************************************************** * - * Module Name: utids - support for device Ids - HID, UID, CID + * Module Name: utids - support for device Ids - HID, UID, CID, SUB, CLS * *****************************************************************************/ @@ -416,3 +416,92 @@ cleanup: acpi_ut_remove_reference(obj_desc); return_ACPI_STATUS(status); } + +/******************************************************************************* + * + * FUNCTION: acpi_ut_execute_CLS + * + * PARAMETERS: device_node - Node for the device + * return_id - Where the _CLS is returned + * + * RETURN: Status + * + * DESCRIPTION: Executes the _CLS control method that returns PCI-defined + * class code of the device. The _CLS value is always a package + * containing PCI class information as a list of integers. + * The returned string has format "BBSSPP", where: + * BB = Base-class code + * SS = Sub-class code + * PP = Programming Interface code + * + ******************************************************************************/ + +acpi_status +acpi_ut_execute_CLS(struct acpi_namespace_node *device_node, + struct acpi_pnp_device_id **return_id) +{ + union acpi_operand_object *obj_desc; + union acpi_operand_object **cls_objects; + u32 count; + struct acpi_pnp_device_id *cls; + u32 length; + acpi_status status; + u8 class_code[3] = { 0, 0, 0 }; + + ACPI_FUNCTION_TRACE(ut_execute_CLS); + + status = acpi_ut_evaluate_object(device_node, METHOD_NAME__CLS, + ACPI_BTYPE_PACKAGE, &obj_desc); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* Get the size of the String to be returned, includes null terminator */ + + length = ACPI_PCICLS_STRING_SIZE; + cls_objects = obj_desc->package.elements; + count = obj_desc->package.count; + + if (obj_desc->common.type == ACPI_TYPE_PACKAGE) { + if (count > 0 + && cls_objects[0]->common.type == ACPI_TYPE_INTEGER) { + class_code[0] = (u8)cls_objects[0]->integer.value; + } + if (count > 1 + && cls_objects[1]->common.type == ACPI_TYPE_INTEGER) { + class_code[1] = (u8)cls_objects[1]->integer.value; + } + if (count > 2 + && cls_objects[2]->common.type == ACPI_TYPE_INTEGER) { + class_code[2] = (u8)cls_objects[2]->integer.value; + } + } + + /* Allocate a buffer for the CLS */ + + cls = + ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_pnp_device_id) + + (acpi_size) length); + if (!cls) { + status = AE_NO_MEMORY; + goto cleanup; + } + + /* Area for the string starts after PNP_DEVICE_ID struct */ + + cls->string = + ACPI_ADD_PTR(char, cls, sizeof(struct acpi_pnp_device_id)); + + /* Simply copy existing string */ + + acpi_ex_pci_cls_to_string(cls->string, class_code); + cls->length = length; + *return_id = cls; + +cleanup: + + /* On exit, we must delete the return object */ + + acpi_ut_remove_reference(obj_desc); + return_ACPI_STATUS(status); +} diff --git a/include/acpi/acnames.h b/include/acpi/acnames.h index 273de709495c..b52c0dc4b492 100644 --- a/include/acpi/acnames.h +++ b/include/acpi/acnames.h @@ -51,6 +51,7 @@ #define METHOD_NAME__BBN "_BBN" #define METHOD_NAME__CBA "_CBA" #define METHOD_NAME__CID "_CID" +#define METHOD_NAME__CLS "_CLS" #define METHOD_NAME__CRS "_CRS" #define METHOD_NAME__DDN "_DDN" #define METHOD_NAME__HID "_HID" diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index ff0b53e128ee..d791b986002a 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -1141,6 +1141,10 @@ u32 (*acpi_interface_handler) (acpi_string interface_name, u32 supported); #define ACPI_UUID_LENGTH 16 +/* Length of 3-byte PCI class code values when converted back to a string */ + +#define ACPI_PCICLS_STRING_SIZE 7 /* Includes null terminator */ + /* Structures used for device/processor HID, UID, CID, and SUB */ struct acpi_pnp_device_id { @@ -1163,7 +1167,7 @@ struct acpi_device_info { u32 name; /* ACPI object Name */ acpi_object_type type; /* ACPI object Type */ u8 param_count; /* If a method, required parameter count */ - u8 valid; /* Indicates which optional fields are valid */ + u16 valid; /* Indicates which optional fields are valid */ u8 flags; /* Miscellaneous info */ u8 highest_dstates[4]; /* _sx_d values: 0xFF indicates not valid */ u8 lowest_dstates[5]; /* _sx_w values: 0xFF indicates not valid */ @@ -1172,6 +1176,7 @@ struct acpi_device_info { struct acpi_pnp_device_id hardware_id; /* _HID value */ struct acpi_pnp_device_id unique_id; /* _UID value */ struct acpi_pnp_device_id subsystem_id; /* _SUB value */ + struct acpi_pnp_device_id class_code; /* _CLS value */ struct acpi_pnp_device_id_list compatible_id_list; /* _CID list */ }; @@ -1181,14 +1186,15 @@ struct acpi_device_info { /* Flags for Valid field above (acpi_get_object_info) */ -#define ACPI_VALID_STA 0x01 -#define ACPI_VALID_ADR 0x02 -#define ACPI_VALID_HID 0x04 -#define ACPI_VALID_UID 0x08 -#define ACPI_VALID_SUB 0x10 -#define ACPI_VALID_CID 0x20 -#define ACPI_VALID_SXDS 0x40 -#define ACPI_VALID_SXWS 0x80 +#define ACPI_VALID_STA 0x0001 +#define ACPI_VALID_ADR 0x0002 +#define ACPI_VALID_HID 0x0004 +#define ACPI_VALID_UID 0x0008 +#define ACPI_VALID_SUB 0x0010 +#define ACPI_VALID_CID 0x0020 +#define ACPI_VALID_CLS 0x0040 +#define ACPI_VALID_SXDS 0x0100 +#define ACPI_VALID_SXWS 0x0200 /* Flags for _STA return value (current_status above) */ From a8bd0f07eb594df51845d33f272fb1952fec1a6f Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Wed, 1 Jul 2015 14:44:10 +0800 Subject: [PATCH 506/839] ACPICA: ACPI 6.0: Add values for MADT GIC version field ACPICA commit 4b100dc43e8baee8c8b4891b23bc7ad03eba6a28 Support for the new version field in the generic distributor subtable. Hanjun Guo Link: https://github.com/acpica/acpica/commit/4b100dc4 Signed-off-by: Hanjun Guo Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl1.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 06b61f01ea59..fcd570999f35 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -835,6 +835,17 @@ struct acpi_madt_generic_distributor { u8 reserved2[3]; /* reserved - must be zero */ }; +/* Values for Version field above */ + +enum acpi_madt_gic_version { + ACPI_MADT_GIC_VERSION_NONE = 0, + ACPI_MADT_GIC_VERSION_V1 = 1, + ACPI_MADT_GIC_VERSION_V2 = 2, + ACPI_MADT_GIC_VERSION_V3 = 3, + ACPI_MADT_GIC_VERSION_V4 = 4, + ACPI_MADT_GIC_VERSION_RESERVED = 5 /* 5 and greater are reserved */ +}; + /* 13: Generic MSI Frame (ACPI 5.1) */ struct acpi_madt_generic_msi_frame { From 8f6f03610483962a9ebfaf38592909a3f0bcfa53 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 1 Jul 2015 14:44:17 +0800 Subject: [PATCH 507/839] ACPICA: Namespace: Add support to allow overriding objects ACPICA commit 6084e34e44565c6293f446c0202b5e59b055e351 This patch adds an "NamespaceOverride" flag in struct acpi_walk_state, and allows namespace objects to be overridden when this flag is set. Lv Zheng. Link: https://github.com/acpica/acpica/commit/6084e34e Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acnamesp.h | 1 + drivers/acpi/acpica/acstruct.h | 1 + drivers/acpi/acpica/dswload.c | 17 +++++++++++++---- drivers/acpi/acpica/nsaccess.c | 10 +++++++++- drivers/acpi/acpica/nssearch.c | 22 ++++++++++++++++++++-- 5 files changed, 44 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h index 952fbe0b7231..0dd088290d80 100644 --- a/drivers/acpi/acpica/acnamesp.h +++ b/drivers/acpi/acpica/acnamesp.h @@ -66,6 +66,7 @@ #define ACPI_NS_PREFIX_IS_SCOPE 0x10 #define ACPI_NS_EXTERNAL 0x20 #define ACPI_NS_TEMPORARY 0x40 +#define ACPI_NS_OVERRIDE_IF_FOUND 0x80 /* Flags for acpi_ns_walk_namespace */ diff --git a/drivers/acpi/acpica/acstruct.h b/drivers/acpi/acpica/acstruct.h index 87c7860b3394..44997ca02ae2 100644 --- a/drivers/acpi/acpica/acstruct.h +++ b/drivers/acpi/acpica/acstruct.h @@ -82,6 +82,7 @@ struct acpi_walk_state { u8 return_used; u8 scope_depth; u8 pass_number; /* Parse pass during table load */ + u8 namespace_override; /* Override existing objects */ u8 result_size; /* Total elements for the result stack */ u8 result_count; /* Current number of occupied elements of result stack */ u32 aml_offset; diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c index 843942fb4be5..845ff44919c3 100644 --- a/drivers/acpi/acpica/dswload.c +++ b/drivers/acpi/acpica/dswload.c @@ -315,10 +315,19 @@ acpi_ds_load1_begin_op(struct acpi_walk_state * walk_state, flags = ACPI_NS_NO_UPSEARCH; if ((walk_state->opcode != AML_SCOPE_OP) && (!(walk_state->parse_flags & ACPI_PARSE_DEFERRED_OP))) { - flags |= ACPI_NS_ERROR_IF_FOUND; - ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, - "[%s] Cannot already exist\n", - acpi_ut_get_type_name(object_type))); + if (walk_state->namespace_override) { + flags |= ACPI_NS_OVERRIDE_IF_FOUND; + ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, + "[%s] Override allowed\n", + acpi_ut_get_type_name + (object_type))); + } else { + flags |= ACPI_NS_ERROR_IF_FOUND; + ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, + "[%s] Cannot already exist\n", + acpi_ut_get_type_name + (object_type))); + } } else { ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "[%s] Both Find or Create allowed\n", diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c index 24fa19a76d70..19ad0b5993d5 100644 --- a/drivers/acpi/acpica/nsaccess.c +++ b/drivers/acpi/acpica/nsaccess.c @@ -304,7 +304,9 @@ acpi_ns_lookup(union acpi_generic_state *scope_info, return_ACPI_STATUS(AE_BAD_PARAMETER); } - local_flags = flags & ~(ACPI_NS_ERROR_IF_FOUND | ACPI_NS_SEARCH_PARENT); + local_flags = flags & + ~(ACPI_NS_ERROR_IF_FOUND | ACPI_NS_OVERRIDE_IF_FOUND | + ACPI_NS_SEARCH_PARENT); *return_node = ACPI_ENTRY_NOT_FOUND; acpi_gbl_ns_lookup_count++; @@ -547,6 +549,12 @@ acpi_ns_lookup(union acpi_generic_state *scope_info, if (flags & ACPI_NS_ERROR_IF_FOUND) { local_flags |= ACPI_NS_ERROR_IF_FOUND; } + + /* Set override flag according to caller */ + + if (flags & ACPI_NS_OVERRIDE_IF_FOUND) { + local_flags |= ACPI_NS_OVERRIDE_IF_FOUND; + } } /* Extract one ACPI name from the front of the pathname */ diff --git a/drivers/acpi/acpica/nssearch.c b/drivers/acpi/acpica/nssearch.c index 4a9d4a66016e..47fbe5b33524 100644 --- a/drivers/acpi/acpica/nssearch.c +++ b/drivers/acpi/acpica/nssearch.c @@ -325,8 +325,26 @@ acpi_ns_search_and_enter(u32 target_name, * If we found it AND the request specifies that a find is an error, * return the error */ - if ((status == AE_OK) && (flags & ACPI_NS_ERROR_IF_FOUND)) { - status = AE_ALREADY_EXISTS; + if (status == AE_OK) { + + /* The node was found in the namespace */ + + /* + * If the namespace override feature is enabled for this node, + * delete any existing node. This can only happen during the + * boot stage, thus it is safe to remove the node here. + */ + if (flags & ACPI_NS_OVERRIDE_IF_FOUND) { + acpi_ns_delete_children(*return_node); + acpi_ns_remove_node(*return_node); + *return_node = ACPI_ENTRY_NOT_FOUND; + } + + /* Return an error if we don't expect to find the object */ + + else if (flags & ACPI_NS_ERROR_IF_FOUND) { + status = AE_ALREADY_EXISTS; + } } #ifdef ACPI_ASL_COMPILER if (*return_node && (*return_node)->type == ACPI_TYPE_ANY) { From fe536995f2fe26fff111a9a06bf3f71e179c92cf Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 1 Jul 2015 14:44:23 +0800 Subject: [PATCH 508/839] ACPICA: Namespace: Add support of OSDT table ACPICA commit 27415c82fcecf467446f66d1007a0691cc5f3709 This patch adds OSDT (Override System Definition Table) support. When OSDT is loaded, conflict namespace objects will be overridden by the AML interpreter. Bob Moore, Lv Zheng. Link: https://github.com/acpica/acpica/commit/27415c82 Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/nsparse.c | 7 +++++++ drivers/acpi/acpica/tbxfload.c | 6 +++++- drivers/acpi/acpica/utmisc.c | 3 ++- include/acpi/actbl.h | 1 + 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c index c95a119767b5..57a4cfe547e4 100644 --- a/drivers/acpi/acpica/nsparse.c +++ b/drivers/acpi/acpica/nsparse.c @@ -117,6 +117,13 @@ acpi_ns_one_complete_parse(u32 pass_number, (u8) pass_number); } + /* Found OSDT table, enable the namespace override feature */ + + if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_OSDT) && + pass_number == ACPI_IMODE_LOAD_PASS1) { + walk_state->namespace_override = TRUE; + } + if (ACPI_FAILURE(status)) { acpi_ds_delete_walk_state(walk_state); goto cleanup; diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c index b63e35d6d1bf..960bd999a6b3 100644 --- a/drivers/acpi/acpica/tbxfload.c +++ b/drivers/acpi/acpica/tbxfload.c @@ -173,7 +173,11 @@ static acpi_status acpi_tb_load_namespace(void) && !ACPI_COMPARE_NAME(& (acpi_gbl_root_table_list.tables[i]. - signature), ACPI_SIG_PSDT)) + signature), ACPI_SIG_PSDT) + && + !ACPI_COMPARE_NAME(& + (acpi_gbl_root_table_list.tables[i]. + signature), ACPI_SIG_OSDT)) || ACPI_FAILURE(acpi_tb_validate_table (&acpi_gbl_root_table_list.tables[i]))) { diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c index cbb7034d28d8..28099e230008 100644 --- a/drivers/acpi/acpica/utmisc.c +++ b/drivers/acpi/acpica/utmisc.c @@ -97,7 +97,8 @@ u8 acpi_ut_is_aml_table(struct acpi_table_header *table) if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_DSDT) || ACPI_COMPARE_NAME(table->signature, ACPI_SIG_PSDT) || - ACPI_COMPARE_NAME(table->signature, ACPI_SIG_SSDT)) { + ACPI_COMPARE_NAME(table->signature, ACPI_SIG_SSDT) || + ACPI_COMPARE_NAME(table->signature, ACPI_SIG_OSDT)) { return (TRUE); } diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index cb8a6b97ceda..2d5faf508cad 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -65,6 +65,7 @@ #define ACPI_SIG_DSDT "DSDT" /* Differentiated System Description Table */ #define ACPI_SIG_FADT "FACP" /* Fixed ACPI Description Table */ #define ACPI_SIG_FACS "FACS" /* Firmware ACPI Control Structure */ +#define ACPI_SIG_OSDT "OSDT" /* Override System Description Table */ #define ACPI_SIG_PSDT "PSDT" /* Persistent System Description Table */ #define ACPI_SIG_RSDP "RSD PTR " /* Root System Description Pointer */ #define ACPI_SIG_RSDT "RSDT" /* Root System Description Table */ From 8ea98655775e904ea406d74a887d0b2254651b5a Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 1 Jul 2015 14:44:31 +0800 Subject: [PATCH 509/839] ACPICA: Namespace: Change namespace override to avoid node deletion ACPICA commit c0ce529e1fbb8ec47d2522a3aa10f3ab77e16e41 There is no reference counting implemented for struct acpi_namespace_node, so it is currently not removable during runtime. This patch changes the namespace override code to keep the old struct acpi_namespace_node undeleted so that the override mechanism can happen during runtime. Bob Moore. Link: https://github.com/acpica/acpica/commit/c0ce529e Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/nssearch.c | 23 +++++++++++++++++++---- include/acpi/acpixf.h | 5 +++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/nssearch.c b/drivers/acpi/acpica/nssearch.c index 47fbe5b33524..d73904013830 100644 --- a/drivers/acpi/acpica/nssearch.c +++ b/drivers/acpi/acpica/nssearch.c @@ -331,13 +331,28 @@ acpi_ns_search_and_enter(u32 target_name, /* * If the namespace override feature is enabled for this node, - * delete any existing node. This can only happen during the - * boot stage, thus it is safe to remove the node here. + * delete any existing attached sub-object and make the node + * look like a new node that is owned by the override table. */ if (flags & ACPI_NS_OVERRIDE_IF_FOUND) { + ACPI_DEBUG_PRINT((ACPI_DB_NAMES, + "Namespace override: %4.4s pass %u type %X Owner %X\n", + ACPI_CAST_PTR(char, + &target_name), + interpreter_mode, + (*return_node)->type, + walk_state->owner_id)); + acpi_ns_delete_children(*return_node); - acpi_ns_remove_node(*return_node); - *return_node = ACPI_ENTRY_NOT_FOUND; + if (acpi_gbl_runtime_namespace_override) { + acpi_ut_remove_reference((*return_node)->object); + (*return_node)->object = NULL; + (*return_node)->owner_id = + walk_state->owner_id; + } else { + acpi_ns_remove_node(*return_node); + *return_node = ACPI_ENTRY_NOT_FOUND; + } } /* Return an error if we don't expect to find the object */ diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 213ed7ecfd8d..f6d807170346 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -228,6 +228,11 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_disable_auto_repair, FALSE); */ ACPI_INIT_GLOBAL(u8, acpi_gbl_disable_ssdt_table_install, FALSE); +/* + * Optionally enable runtime namespace override. + */ +ACPI_INIT_GLOBAL(u8, acpi_gbl_runtime_namespace_override, TRUE); + /* * We keep track of the latest version of Windows that has been requested by * the BIOS. ACPI 5.0. From dc00203b6c6fb1b0ce99f37118d9a3e7a6807d7b Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 1 Jul 2015 14:44:37 +0800 Subject: [PATCH 510/839] ACPICA: Update for acpi_install_table memory types ACPICA commit 3f78b7fb3f98f35d62f532c1891deb748ad196c9 Physical/virtual address flags were reversed. Link: https://github.com/acpica/acpica/commit/3f78b7fb Signed-off-by: Zhang Rui Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/tbxfload.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c index 960bd999a6b3..cf56e18cdc64 100644 --- a/drivers/acpi/acpica/tbxfload.c +++ b/drivers/acpi/acpica/tbxfload.c @@ -224,9 +224,9 @@ acpi_install_table(acpi_physical_address address, u8 physical) ACPI_FUNCTION_TRACE(acpi_install_table); if (physical) { - flags = ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL; - } else { flags = ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL; + } else { + flags = ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL; } status = acpi_tb_install_standard_table(address, flags, From fde175e385fc28c31a50be2608113689449a4b75 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 1 Jul 2015 14:44:44 +0800 Subject: [PATCH 511/839] ACPICA: Cleanup output for the ASL Debug object ACPICA commit d4a53a396fe5d384425251b0257f8d125bbed617 Especially for use of the Index operator. For buffers and strings, only output the actual byte pointed to by the index. For packages, only print the package element decoded by the index. Link: https://github.com/acpica/acpica/commit/d4a53a39 Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acobject.h | 1 + drivers/acpi/acpica/exdebug.c | 42 ++++++++++++++++++++++++++++++++-- drivers/acpi/acpica/exdump.c | 9 ++++---- drivers/acpi/acpica/exoparg2.c | 4 ++++ 4 files changed, 50 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/acpica/acobject.h b/drivers/acpi/acpica/acobject.h index 3e9720e1f34f..c81d98d09cac 100644 --- a/drivers/acpi/acpica/acobject.h +++ b/drivers/acpi/acpica/acobject.h @@ -335,6 +335,7 @@ struct acpi_object_reference { void *object; /* name_op=>HANDLE to obj, index_op=>union acpi_operand_object */ struct acpi_namespace_node *node; /* ref_of or Namepath */ union acpi_operand_object **where; /* Target of Index */ + u8 *index_pointer; /* Used for Buffers and Strings */ u32 value; /* Used for Local/Arg/Index/ddb_handle */ }; diff --git a/drivers/acpi/acpica/exdebug.c b/drivers/acpi/acpica/exdebug.c index e67d0aca3fe6..815442bbd051 100644 --- a/drivers/acpi/acpica/exdebug.c +++ b/drivers/acpi/acpica/exdebug.c @@ -76,6 +76,8 @@ acpi_ex_do_debug_object(union acpi_operand_object *source_desc, { u32 i; u32 timer; + union acpi_operand_object *object_desc; + u32 value; ACPI_FUNCTION_TRACE_PTR(ex_do_debug_object, source_desc); @@ -254,8 +256,44 @@ acpi_ex_do_debug_object(union acpi_operand_object *source_desc, object)->object, level + 4, 0); } else { - acpi_ex_do_debug_object(source_desc->reference. - object, level + 4, 0); + object_desc = source_desc->reference.object; + value = source_desc->reference.value; + + switch (object_desc->common.type) { + case ACPI_TYPE_BUFFER: + + acpi_os_printf("Buffer[%u] = 0x%2.2X\n", + value, + *source_desc->reference. + index_pointer); + break; + + case ACPI_TYPE_STRING: + + acpi_os_printf + ("String[%u] = \"%c\" (0x%2.2X)\n", + value, + *source_desc->reference. + index_pointer, + *source_desc->reference. + index_pointer); + break; + + case ACPI_TYPE_PACKAGE: + + acpi_os_printf("Package[%u] = ", value); + acpi_ex_do_debug_object(*source_desc-> + reference.where, + level + 4, 0); + break; + + default: + + acpi_os_printf + ("Unknown Reference object type %X\n", + object_desc->common.type); + break; + } } } break; diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c index 1da52bef632e..401e7edcd419 100644 --- a/drivers/acpi/acpica/exdump.c +++ b/drivers/acpi/acpica/exdump.c @@ -224,7 +224,7 @@ static struct acpi_exdump_info acpi_ex_dump_index_field[5] = { {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(index_field.data_obj), "Data Object"} }; -static struct acpi_exdump_info acpi_ex_dump_reference[8] = { +static struct acpi_exdump_info acpi_ex_dump_reference[9] = { {ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE(acpi_ex_dump_reference), NULL}, {ACPI_EXD_UINT8, ACPI_EXD_OFFSET(reference.class), "Class"}, {ACPI_EXD_UINT8, ACPI_EXD_OFFSET(reference.target_type), "Target Type"}, @@ -232,6 +232,8 @@ static struct acpi_exdump_info acpi_ex_dump_reference[8] = { {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(reference.object), "Object Desc"}, {ACPI_EXD_NODE, ACPI_EXD_OFFSET(reference.node), "Node"}, {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(reference.where), "Where"}, + {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(reference.index_pointer), + "Index Pointer"}, {ACPI_EXD_REFERENCE, 0, NULL} }; @@ -1005,14 +1007,13 @@ static void acpi_ex_dump_reference_obj(union acpi_operand_object *obj_desc) } else if (obj_desc->reference.object) { if (ACPI_GET_DESCRIPTOR_TYPE(obj_desc) == ACPI_DESC_TYPE_OPERAND) { - acpi_os_printf(" Target: %p", + acpi_os_printf("%22s %p", "Target :", obj_desc->reference.object); if (obj_desc->reference.class == ACPI_REFCLASS_TABLE) { acpi_os_printf(" Table Index: %X\n", obj_desc->reference.value); } else { - acpi_os_printf(" Target: %p [%s]\n", - obj_desc->reference.object, + acpi_os_printf(" [%s]\n", acpi_ut_get_type_name(((union acpi_operand_object *) diff --git a/drivers/acpi/acpica/exoparg2.c b/drivers/acpi/acpica/exoparg2.c index fcc618aa2061..6fac5e0a698a 100644 --- a/drivers/acpi/acpica/exoparg2.c +++ b/drivers/acpi/acpica/exoparg2.c @@ -380,6 +380,8 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state) return_desc->reference.target_type = ACPI_TYPE_BUFFER_FIELD; + return_desc->reference.index_pointer = + &(operand[0]->buffer.pointer[index]); break; case ACPI_TYPE_BUFFER: @@ -391,6 +393,8 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state) return_desc->reference.target_type = ACPI_TYPE_BUFFER_FIELD; + return_desc->reference.index_pointer = + &(operand[0]->buffer.pointer[index]); break; case ACPI_TYPE_PACKAGE: From 428394dfdfc4596110269873cf5a2e7d82b2422e Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 1 Jul 2015 14:44:52 +0800 Subject: [PATCH 512/839] ACPICA: acpidump: Allow customized tables to be dumped without accessing /dev/mem ACPICA commit ab29013cfa2424140446aff196a70b211ab343a9 The /dev/mem can be configured out, in which case, acpidump should still work with "-c" option as tables can be found in /sys/firmware/acpi/tables. This patch allows acpidump to work without /dev/mem. This patch has been tested with "acpidump -c" and "acpidump -c -n FADT". And it worked as expected. Lv Zheng. Link: https://github.com/acpica/acpica/commit/ab29013c Reported-by: Al Stone Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- .../os_specific/service_layers/oslinuxtbl.c | 117 +++++++++--------- 1 file changed, 60 insertions(+), 57 deletions(-) diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index db15c9d2049e..3aff9a2c7088 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -582,65 +582,68 @@ static acpi_status osl_table_initialize(void) return (AE_OK); } - /* Get RSDP from memory */ - - status = osl_load_rsdp(); - if (ACPI_FAILURE(status)) { - return (status); - } - - /* Get XSDT from memory */ - - if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) { - if (gbl_xsdt) { - free(gbl_xsdt); - gbl_xsdt = NULL; - } - - gbl_revision = 2; - status = osl_get_bios_table(ACPI_SIG_XSDT, 0, - ACPI_CAST_PTR(struct - acpi_table_header *, - &gbl_xsdt), &address); - if (ACPI_FAILURE(status)) { - return (status); - } - } - - /* Get RSDT from memory */ - - if (gbl_rsdp.rsdt_physical_address) { - if (gbl_rsdt) { - free(gbl_rsdt); - gbl_rsdt = NULL; - } - - status = osl_get_bios_table(ACPI_SIG_RSDT, 0, - ACPI_CAST_PTR(struct - acpi_table_header *, - &gbl_rsdt), &address); - if (ACPI_FAILURE(status)) { - return (status); - } - } - - /* Get FADT from memory */ - - if (gbl_fadt) { - free(gbl_fadt); - gbl_fadt = NULL; - } - - status = osl_get_bios_table(ACPI_SIG_FADT, 0, - ACPI_CAST_PTR(struct acpi_table_header *, - &gbl_fadt), - &gbl_fadt_address); - if (ACPI_FAILURE(status)) { - return (status); - } - if (!gbl_dump_customized_tables) { + /* Get RSDP from memory */ + + status = osl_load_rsdp(); + if (ACPI_FAILURE(status)) { + return (status); + } + + /* Get XSDT from memory */ + + if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) { + if (gbl_xsdt) { + free(gbl_xsdt); + gbl_xsdt = NULL; + } + + gbl_revision = 2; + status = osl_get_bios_table(ACPI_SIG_XSDT, 0, + ACPI_CAST_PTR(struct + acpi_table_header + *, &gbl_xsdt), + &address); + if (ACPI_FAILURE(status)) { + return (status); + } + } + + /* Get RSDT from memory */ + + if (gbl_rsdp.rsdt_physical_address) { + if (gbl_rsdt) { + free(gbl_rsdt); + gbl_rsdt = NULL; + } + + status = osl_get_bios_table(ACPI_SIG_RSDT, 0, + ACPI_CAST_PTR(struct + acpi_table_header + *, &gbl_rsdt), + &address); + if (ACPI_FAILURE(status)) { + return (status); + } + } + + /* Get FADT from memory */ + + if (gbl_fadt) { + free(gbl_fadt); + gbl_fadt = NULL; + } + + status = osl_get_bios_table(ACPI_SIG_FADT, 0, + ACPI_CAST_PTR(struct + acpi_table_header *, + &gbl_fadt), + &gbl_fadt_address); + if (ACPI_FAILURE(status)) { + return (status); + } + /* Add mandatory tables to global table list first */ status = osl_add_table_to_list(ACPI_RSDP_NAME, 0); From 4fb80c3769b707524b0f2f6843633c6bd2131afc Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 1 Jul 2015 14:44:58 +0800 Subject: [PATCH 513/839] ACPICA: acpidump: Convert the default behavior to dump from /sys/firmware/acpi/tables ACPICA commit 04c3bd7e9d6aeb2b3edebe99c90dc271ae4e6353 In order to work without any additional option to dump tables when /dev/mem doesn't exist, this patch switches the default behavior of acpidump to dump from /sys/firmware/acpi/tables. Reported by Al Stone, Fixed by Lv Zheng. Link: https://github.com/acpica/acpica/commit/04c3bd7e Reported-by: Al Stone Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- tools/power/acpi/tools/acpidump/acpidump.h | 2 +- tools/power/acpi/tools/acpidump/apmain.c | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index 84bdef0136cb..eed534481434 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -66,7 +66,7 @@ EXTERN u8 INIT_GLOBAL(gbl_summary_mode, FALSE); EXTERN u8 INIT_GLOBAL(gbl_verbose_mode, FALSE); EXTERN u8 INIT_GLOBAL(gbl_binary_mode, FALSE); -EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, FALSE); +EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, TRUE); EXTERN u8 INIT_GLOBAL(gbl_do_not_dump_xsdt, FALSE); EXTERN ACPI_FILE INIT_GLOBAL(gbl_output_file, NULL); EXTERN char INIT_GLOBAL(*gbl_output_filename, NULL); diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index d0ba6535f5af..57620f66ae6c 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -80,7 +80,7 @@ struct ap_dump_action action_table[AP_MAX_ACTIONS]; u32 current_action = 0; #define AP_UTILITY_NAME "ACPI Binary Table Dump Utility" -#define AP_SUPPORTED_OPTIONS "?a:bcf:hn:o:r:svxz" +#define AP_SUPPORTED_OPTIONS "?a:bc:f:hn:o:r:svxz" /****************************************************************************** * @@ -96,7 +96,6 @@ static void ap_display_usage(void) ACPI_USAGE_HEADER("acpidump [options]"); ACPI_OPTION("-b", "Dump tables to binary files"); - ACPI_OPTION("-c", "Dump customized tables"); ACPI_OPTION("-h -?", "This help message"); ACPI_OPTION("-o ", "Redirect output to file"); ACPI_OPTION("-r
", "Dump tables from specified RSDP"); @@ -107,6 +106,7 @@ static void ap_display_usage(void) ACPI_USAGE_TEXT("\nTable Options:\n"); ACPI_OPTION("-a
", "Get table via a physical address"); + ACPI_OPTION("-c ", "Turning on/off customized table dumping"); ACPI_OPTION("-f ", "Get table via a binary file"); ACPI_OPTION("-n ", "Get table via a name/signature"); ACPI_OPTION("-x", "Do not use but dump XSDT"); @@ -181,7 +181,16 @@ static int ap_do_options(int argc, char **argv) case 'c': /* Dump customized tables */ - gbl_dump_customized_tables = TRUE; + if (!strcmp(acpi_gbl_optarg, "on")) { + gbl_dump_customized_tables = TRUE; + } else if (!strcmp(acpi_gbl_optarg, "off")) { + gbl_dump_customized_tables = FALSE; + } else { + acpi_log_error + ("%s: Cannot handle this switch, please use on|off\n", + acpi_gbl_optarg); + return (-1); + } continue; case 'h': From 63c43812ee99efe7903955bae8cd928e9582477a Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 1 Jul 2015 14:45:05 +0800 Subject: [PATCH 514/839] ACPI / acpidump: Update acpidump manual This patch updates acpidump manual according to the recent changes. Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- tools/power/acpi/man/acpidump.8 | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/power/acpi/man/acpidump.8 b/tools/power/acpi/man/acpidump.8 index 38f095d86b52..79e2d1d435d1 100644 --- a/tools/power/acpi/man/acpidump.8 +++ b/tools/power/acpi/man/acpidump.8 @@ -22,9 +22,6 @@ acpidump options are as follow: .B \-b Dump tables to binary files .TP -.B \-c -Dump customized tables -.TP .B \-h \-? This help message .TP @@ -48,15 +45,25 @@ Verbose mode .B \-a
Get table via a physical address .TP +.B \-c +Turning on/off customized table dumping +.TP .B \-f Get table via a binary file .TP .B \-n Get table via a name/signature .TP -Invocation without parameters dumps all available tables +.B \-x +Do not use but dump XSDT .TP -Multiple mixed instances of -a, -f, and -n are supported +.B \-x \-x +Do not use or dump XSDT +.TP +.fi +Invocation without parameters dumps all available tables. +.TP +Multiple mixed instances of -a, -f, and -n are supported. .SH EXAMPLES From 4fa4616e279df89baeb36287bbee83ab272edaed Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 1 Jul 2015 14:45:11 +0800 Subject: [PATCH 515/839] ACPICA: De-macroize calls to standard C library functions ACPICA commit 3b1026e0bdd3c32eb6d5d313f3ba0b1fee7597b4 ACPICA commit 00f0dc83f5cfca53b27a3213ae0d7719b88c2d6b ACPICA commit 47d22a738d0e19fd241ffe4e3e9d4e198e4afc69 Across all of ACPICA. Replace C library macros such as ACPI_STRLEN with the standard names such as strlen. The original purpose for these macros is long since obsolete. Also cast various invocations as necessary. Bob Moore, Jung-uk Kim, Lv Zheng. Link: https://github.com/acpica/acpica/commit/3b1026e0 Link: https://github.com/acpica/acpica/commit/00f0dc83 Link: https://github.com/acpica/acpica/commit/47d22a73 Signed-off-by: Bob Moore Signed-off-by: Jung-uk Kim Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acutils.h | 46 ++++++++-------- drivers/acpi/acpica/dsfield.c | 2 +- drivers/acpi/acpica/dsinit.c | 2 +- drivers/acpi/acpica/dsobject.c | 7 ++- drivers/acpi/acpica/dsutils.c | 4 +- drivers/acpi/acpica/evgpeinit.c | 2 +- drivers/acpi/acpica/exconfig.c | 2 +- drivers/acpi/acpica/exconvrt.c | 9 ++-- drivers/acpi/acpica/exfield.c | 2 +- drivers/acpi/acpica/exfldio.c | 52 +++++++++---------- drivers/acpi/acpica/exmisc.c | 36 ++++++------- drivers/acpi/acpica/exnames.c | 2 +- drivers/acpi/acpica/exoparg2.c | 4 +- drivers/acpi/acpica/exoparg3.c | 4 +- drivers/acpi/acpica/exregion.c | 9 ++-- drivers/acpi/acpica/exstorob.c | 18 +++---- drivers/acpi/acpica/nsaccess.c | 6 +-- drivers/acpi/acpica/nsconvert.c | 10 ++-- drivers/acpi/acpica/nsdump.c | 2 +- drivers/acpi/acpica/nseval.c | 2 +- drivers/acpi/acpica/nsinit.c | 4 +- drivers/acpi/acpica/nsrepair2.c | 2 +- drivers/acpi/acpica/nsutils.c | 3 +- drivers/acpi/acpica/nsxfeval.c | 5 +- drivers/acpi/acpica/nsxfname.c | 6 +-- drivers/acpi/acpica/psutils.c | 7 ++- drivers/acpi/acpica/rscreate.c | 6 +-- drivers/acpi/acpica/rsmisc.c | 8 +-- drivers/acpi/acpica/rsutils.c | 11 ++-- drivers/acpi/acpica/rsxface.c | 8 +-- drivers/acpi/acpica/tbdata.c | 8 +-- drivers/acpi/acpica/tbfadt.c | 6 +-- drivers/acpi/acpica/tbfind.c | 21 ++++---- drivers/acpi/acpica/tbinstal.c | 7 ++- drivers/acpi/acpica/tbprint.c | 10 ++-- drivers/acpi/acpica/tbutils.c | 2 +- drivers/acpi/acpica/tbxface.c | 16 +++--- drivers/acpi/acpica/tbxfload.c | 4 +- drivers/acpi/acpica/utalloc.c | 6 +-- drivers/acpi/acpica/utbuffer.c | 4 +- drivers/acpi/acpica/utcache.c | 6 +-- drivers/acpi/acpica/utcopy.c | 42 +++++++-------- drivers/acpi/acpica/utids.c | 9 ++-- drivers/acpi/acpica/utmisc.c | 6 +-- drivers/acpi/acpica/utosi.c | 9 ++-- drivers/acpi/acpica/utpredef.c | 4 +- drivers/acpi/acpica/utprint.c | 6 +-- drivers/acpi/acpica/utstring.c | 33 ++++++------ drivers/acpi/acpica/uttrack.c | 8 +-- drivers/acpi/acpica/utxface.c | 8 +-- include/acpi/actypes.h | 8 +-- include/acpi/platform/acenv.h | 38 -------------- tools/power/acpi/common/getopt.c | 4 +- .../os_specific/service_layers/oslinuxtbl.c | 6 +-- tools/power/acpi/tools/acpidump/apdump.c | 8 +-- tools/power/acpi/tools/acpidump/apfiles.c | 12 ++--- 56 files changed, 260 insertions(+), 312 deletions(-) diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h index 6391c9706e84..ef1e51d0f038 100644 --- a/drivers/acpi/acpica/acutils.h +++ b/drivers/acpi/acpica/acutils.h @@ -210,37 +210,35 @@ void acpi_ut_subsystem_shutdown(void); */ #ifndef ACPI_USE_SYSTEM_CLIBRARY -acpi_size acpi_ut_strlen(const char *string); +acpi_size strlen(const char *string); -char *acpi_ut_strchr(const char *string, int ch); +char *strchr(const char *string, int ch); -char *acpi_ut_strcpy(char *dst_string, const char *src_string); +char *strcpy(char *dst_string, const char *src_string); -char *acpi_ut_strncpy(char *dst_string, - const char *src_string, acpi_size count); +char *strncpy(char *dst_string, const char *src_string, acpi_size count); -int acpi_ut_memcmp(const char *buffer1, const char *buffer2, acpi_size count); +int strncmp(const char *string1, const char *string2, acpi_size count); -int acpi_ut_strncmp(const char *string1, const char *string2, acpi_size count); +int strcmp(const char *string1, const char *string2); -int acpi_ut_strcmp(const char *string1, const char *string2); +char *strcat(char *dst_string, const char *src_string); -char *acpi_ut_strcat(char *dst_string, const char *src_string); +char *strncat(char *dst_string, const char *src_string, acpi_size count); -char *acpi_ut_strncat(char *dst_string, - const char *src_string, acpi_size count); +u32 strtoul(const char *string, char **terminator, u32 base); -u32 acpi_ut_strtoul(const char *string, char **terminator, u32 base); +char *strstr(char *string1, char *string2); -char *acpi_ut_strstr(char *string1, char *string2); +int memcmp(void *buffer1, void *buffer2, acpi_size count); -void *acpi_ut_memcpy(void *dest, const void *src, acpi_size count); +void *memcpy(void *dest, const void *src, acpi_size count); -void *acpi_ut_memset(void *dest, u8 value, acpi_size count); +void *memset(void *dest, int value, acpi_size count); -int acpi_ut_to_upper(int c); +int toupper(int c); -int acpi_ut_to_lower(int c); +int tolower(int c); extern const u8 _acpi_ctype[]; @@ -255,13 +253,13 @@ extern const u8 _acpi_ctype[]; #define _ACPI_UP 0x01 /* 'A'-'Z' */ #define _ACPI_XD 0x80 /* '0'-'9', 'A'-'F', 'a'-'f' */ -#define ACPI_IS_DIGIT(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_DI)) -#define ACPI_IS_SPACE(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_SP)) -#define ACPI_IS_XDIGIT(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_XD)) -#define ACPI_IS_UPPER(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_UP)) -#define ACPI_IS_LOWER(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO)) -#define ACPI_IS_PRINT(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO | _ACPI_UP | _ACPI_DI | _ACPI_XS | _ACPI_PU)) -#define ACPI_IS_ALPHA(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO | _ACPI_UP)) +#define isdigit(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_DI)) +#define isspace(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_SP)) +#define isxdigit(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_XD)) +#define isupper(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_UP)) +#define islower(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO)) +#define isprint(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO | _ACPI_UP | _ACPI_DI | _ACPI_XS | _ACPI_PU)) +#define isalpha(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO | _ACPI_UP)) #endif /* !ACPI_USE_SYSTEM_CLIBRARY */ diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c index 43b40de90484..20de148594fd 100644 --- a/drivers/acpi/acpica/dsfield.c +++ b/drivers/acpi/acpica/dsfield.c @@ -502,7 +502,7 @@ acpi_ds_create_field(union acpi_parse_object *op, } } - ACPI_MEMSET(&info, 0, sizeof(struct acpi_create_field_info)); + memset(&info, 0, sizeof(struct acpi_create_field_info)); /* Second arg is the field flags */ diff --git a/drivers/acpi/acpica/dsinit.c b/drivers/acpi/acpica/dsinit.c index bbe74bcebbae..95779e8ec3bb 100644 --- a/drivers/acpi/acpica/dsinit.c +++ b/drivers/acpi/acpica/dsinit.c @@ -207,7 +207,7 @@ acpi_ds_initialize_objects(u32 table_index, /* Set all init info to zero */ - ACPI_MEMSET(&info, 0, sizeof(struct acpi_init_walk_info)); + memset(&info, 0, sizeof(struct acpi_init_walk_info)); info.owner_id = owner_id; info.table_index = table_index; diff --git a/drivers/acpi/acpica/dsobject.c b/drivers/acpi/acpica/dsobject.c index 4a4b2f343713..2beb7fd674ae 100644 --- a/drivers/acpi/acpica/dsobject.c +++ b/drivers/acpi/acpica/dsobject.c @@ -339,8 +339,8 @@ acpi_ds_build_internal_buffer_obj(struct acpi_walk_state *walk_state, /* Initialize buffer from the byte_list (if present) */ if (byte_list) { - ACPI_MEMCPY(obj_desc->buffer.pointer, - byte_list->named.data, byte_list_length); + memcpy(obj_desc->buffer.pointer, byte_list->named.data, + byte_list_length); } } @@ -750,8 +750,7 @@ acpi_ds_init_object_from_op(struct acpi_walk_state *walk_state, case ACPI_TYPE_STRING: obj_desc->string.pointer = op->common.value.string; - obj_desc->string.length = - (u32)ACPI_STRLEN(op->common.value.string); + obj_desc->string.length = (u32)strlen(op->common.value.string); /* * The string is contained in the ACPI table, don't ever try diff --git a/drivers/acpi/acpica/dsutils.c b/drivers/acpi/acpica/dsutils.c index deeddd6d2f05..ebc577baeaf9 100644 --- a/drivers/acpi/acpica/dsutils.c +++ b/drivers/acpi/acpica/dsutils.c @@ -572,8 +572,8 @@ acpi_ds_create_operand(struct acpi_walk_state *walk_state, obj_desc = acpi_ut_create_string_object((acpi_size) name_length); - ACPI_STRNCPY(obj_desc->string.pointer, - name_string, name_length); + strncpy(obj_desc->string.pointer, + name_string, name_length); status = AE_OK; } else { /* diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c index 8840296d5b20..ea4c0d3fca2d 100644 --- a/drivers/acpi/acpica/evgpeinit.c +++ b/drivers/acpi/acpica/evgpeinit.c @@ -377,7 +377,7 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle, /* 4) The last two characters of the name are the hex GPE Number */ - gpe_number = ACPI_STRTOUL(&name[2], NULL, 16); + gpe_number = strtoul(&name[2], NULL, 16); if (gpe_number == ACPI_UINT32_MAX) { /* Conversion failed; invalid method, just ignore it */ diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c index 6e0df2b9d5a4..24a4c5c2b124 100644 --- a/drivers/acpi/acpica/exconfig.c +++ b/drivers/acpi/acpica/exconfig.c @@ -470,7 +470,7 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc, return_ACPI_STATUS(AE_NO_MEMORY); } - ACPI_MEMCPY(table, table_header, length); + memcpy(table, table_header, length); break; default: diff --git a/drivers/acpi/acpica/exconvrt.c b/drivers/acpi/acpica/exconvrt.c index 89a976b4ccf2..075d654c837f 100644 --- a/drivers/acpi/acpica/exconvrt.c +++ b/drivers/acpi/acpica/exconvrt.c @@ -227,9 +227,8 @@ acpi_ex_convert_to_buffer(union acpi_operand_object *obj_desc, /* Copy the integer to the buffer, LSB first */ new_buf = return_desc->buffer.pointer; - ACPI_MEMCPY(new_buf, - &obj_desc->integer.value, - acpi_gbl_integer_byte_width); + memcpy(new_buf, + &obj_desc->integer.value, acpi_gbl_integer_byte_width); break; case ACPI_TYPE_STRING: @@ -252,8 +251,8 @@ acpi_ex_convert_to_buffer(union acpi_operand_object *obj_desc, /* Copy the string to the buffer */ new_buf = return_desc->buffer.pointer; - ACPI_STRNCPY((char *)new_buf, (char *)obj_desc->string.pointer, - obj_desc->string.length); + strncpy((char *)new_buf, (char *)obj_desc->string.pointer, + obj_desc->string.length); break; default: diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c index c161dd974f74..61fd9c7b88bc 100644 --- a/drivers/acpi/acpica/exfield.c +++ b/drivers/acpi/acpica/exfield.c @@ -428,7 +428,7 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc, } buffer = buffer_desc->buffer.pointer; - ACPI_MEMCPY(buffer, source_desc->buffer.pointer, length); + memcpy(buffer, source_desc->buffer.pointer, length); /* Lock entire transaction if requested */ diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c index 725a3746a2df..70b7bbbb860b 100644 --- a/drivers/acpi/acpica/exfldio.c +++ b/drivers/acpi/acpica/exfldio.c @@ -416,22 +416,22 @@ acpi_ex_field_datum_io(union acpi_operand_object *obj_desc, * Copy the data from the source buffer. * Length is the field width in bytes. */ - ACPI_MEMCPY(value, - (obj_desc->buffer_field.buffer_obj)->buffer. - pointer + - obj_desc->buffer_field.base_byte_offset + - field_datum_byte_offset, - obj_desc->common_field.access_byte_width); + memcpy(value, + (obj_desc->buffer_field.buffer_obj)->buffer. + pointer + + obj_desc->buffer_field.base_byte_offset + + field_datum_byte_offset, + obj_desc->common_field.access_byte_width); } else { /* * Copy the data to the target buffer. * Length is the field width in bytes. */ - ACPI_MEMCPY((obj_desc->buffer_field.buffer_obj)->buffer. - pointer + - obj_desc->buffer_field.base_byte_offset + - field_datum_byte_offset, value, - obj_desc->common_field.access_byte_width); + memcpy((obj_desc->buffer_field.buffer_obj)->buffer. + pointer + + obj_desc->buffer_field.base_byte_offset + + field_datum_byte_offset, value, + obj_desc->common_field.access_byte_width); } status = AE_OK; @@ -703,7 +703,7 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc, return_ACPI_STATUS(AE_BUFFER_OVERFLOW); } - ACPI_MEMSET(buffer, 0, buffer_length); + memset(buffer, 0, buffer_length); access_bit_width = ACPI_MUL_8(obj_desc->common_field.access_byte_width); /* Handle the simple case here */ @@ -720,7 +720,7 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc, status = acpi_ex_field_datum_io(obj_desc, 0, &raw_datum, ACPI_READ); - ACPI_MEMCPY(buffer, &raw_datum, buffer_length); + memcpy(buffer, &raw_datum, buffer_length); } return_ACPI_STATUS(status); @@ -793,9 +793,9 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc, /* Write merged datum to target buffer */ - ACPI_MEMCPY(((char *)buffer) + buffer_offset, &merged_datum, - ACPI_MIN(obj_desc->common_field.access_byte_width, - buffer_length - buffer_offset)); + memcpy(((char *)buffer) + buffer_offset, &merged_datum, + ACPI_MIN(obj_desc->common_field.access_byte_width, + buffer_length - buffer_offset)); buffer_offset += obj_desc->common_field.access_byte_width; merged_datum = @@ -811,9 +811,9 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc, /* Write the last datum to the buffer */ - ACPI_MEMCPY(((char *)buffer) + buffer_offset, &merged_datum, - ACPI_MIN(obj_desc->common_field.access_byte_width, - buffer_length - buffer_offset)); + memcpy(((char *)buffer) + buffer_offset, &merged_datum, + ACPI_MIN(obj_desc->common_field.access_byte_width, + buffer_length - buffer_offset)); return_ACPI_STATUS(AE_OK); } @@ -878,7 +878,7 @@ acpi_ex_insert_into_field(union acpi_operand_object *obj_desc, * at Byte zero. All unused (upper) bytes of the * buffer will be 0. */ - ACPI_MEMCPY((char *)new_buffer, (char *)buffer, buffer_length); + memcpy((char *)new_buffer, (char *)buffer, buffer_length); buffer = new_buffer; buffer_length = required_length; } @@ -918,9 +918,9 @@ acpi_ex_insert_into_field(union acpi_operand_object *obj_desc, /* Get initial Datum from the input buffer */ - ACPI_MEMCPY(&raw_datum, buffer, - ACPI_MIN(obj_desc->common_field.access_byte_width, - buffer_length - buffer_offset)); + memcpy(&raw_datum, buffer, + ACPI_MIN(obj_desc->common_field.access_byte_width, + buffer_length - buffer_offset)); merged_datum = raw_datum << obj_desc->common_field.start_field_bit_offset; @@ -970,9 +970,9 @@ acpi_ex_insert_into_field(union acpi_operand_object *obj_desc, /* Get the next input datum from the buffer */ buffer_offset += obj_desc->common_field.access_byte_width; - ACPI_MEMCPY(&raw_datum, ((char *)buffer) + buffer_offset, - ACPI_MIN(obj_desc->common_field.access_byte_width, - buffer_length - buffer_offset)); + memcpy(&raw_datum, ((char *)buffer) + buffer_offset, + ACPI_MIN(obj_desc->common_field.access_byte_width, + buffer_length - buffer_offset)); merged_datum |= raw_datum << obj_desc->common_field.start_field_bit_offset; diff --git a/drivers/acpi/acpica/exmisc.c b/drivers/acpi/acpica/exmisc.c index b56fc9d6f48e..d02afece0f10 100644 --- a/drivers/acpi/acpica/exmisc.c +++ b/drivers/acpi/acpica/exmisc.c @@ -209,8 +209,8 @@ acpi_ex_concat_template(union acpi_operand_object *operand0, * end_tag descriptor is copied from Operand1. */ new_buf = return_desc->buffer.pointer; - ACPI_MEMCPY(new_buf, operand0->buffer.pointer, length0); - ACPI_MEMCPY(new_buf + length0, operand1->buffer.pointer, length1); + memcpy(new_buf, operand0->buffer.pointer, length0); + memcpy(new_buf + length0, operand1->buffer.pointer, length1); /* Insert end_tag and set the checksum to zero, means "ignore checksum" */ @@ -318,14 +318,14 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0, /* Copy the first integer, LSB first */ - ACPI_MEMCPY(new_buf, &operand0->integer.value, - acpi_gbl_integer_byte_width); + memcpy(new_buf, &operand0->integer.value, + acpi_gbl_integer_byte_width); /* Copy the second integer (LSB first) after the first */ - ACPI_MEMCPY(new_buf + acpi_gbl_integer_byte_width, - &local_operand1->integer.value, - acpi_gbl_integer_byte_width); + memcpy(new_buf + acpi_gbl_integer_byte_width, + &local_operand1->integer.value, + acpi_gbl_integer_byte_width); break; case ACPI_TYPE_STRING: @@ -346,9 +346,9 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0, /* Concatenate the strings */ - ACPI_STRCPY(new_buf, operand0->string.pointer); - ACPI_STRCPY(new_buf + operand0->string.length, - local_operand1->string.pointer); + strcpy(new_buf, operand0->string.pointer); + strcpy(new_buf + operand0->string.length, + local_operand1->string.pointer); break; case ACPI_TYPE_BUFFER: @@ -369,11 +369,11 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0, /* Concatenate the buffers */ - ACPI_MEMCPY(new_buf, operand0->buffer.pointer, - operand0->buffer.length); - ACPI_MEMCPY(new_buf + operand0->buffer.length, - local_operand1->buffer.pointer, - local_operand1->buffer.length); + memcpy(new_buf, operand0->buffer.pointer, + operand0->buffer.length); + memcpy(new_buf + operand0->buffer.length, + local_operand1->buffer.pointer, + local_operand1->buffer.length); break; default: @@ -660,9 +660,9 @@ acpi_ex_do_logical_op(u16 opcode, /* Lexicographic compare: compare the data bytes */ - compare = ACPI_MEMCMP(operand0->buffer.pointer, - local_operand1->buffer.pointer, - (length0 > length1) ? length1 : length0); + compare = memcmp(operand0->buffer.pointer, + local_operand1->buffer.pointer, + (length0 > length1) ? length1 : length0); switch (opcode) { case AML_LEQUAL_OP: /* LEqual (Operand0, Operand1) */ diff --git a/drivers/acpi/acpica/exnames.c b/drivers/acpi/acpica/exnames.c index 453b00c30177..20e87813c7d7 100644 --- a/drivers/acpi/acpica/exnames.c +++ b/drivers/acpi/acpica/exnames.c @@ -192,7 +192,7 @@ static acpi_status acpi_ex_name_segment(u8 ** in_aml_address, char *name_string) char_buf[4] = '\0'; if (name_string) { - ACPI_STRCAT(name_string, char_buf); + strcat(name_string, char_buf); ACPI_DEBUG_PRINT((ACPI_DB_NAMES, "Appended to - %s\n", name_string)); } else { diff --git a/drivers/acpi/acpica/exoparg2.c b/drivers/acpi/acpica/exoparg2.c index 6fac5e0a698a..b8944ebb1081 100644 --- a/drivers/acpi/acpica/exoparg2.c +++ b/drivers/acpi/acpica/exoparg2.c @@ -337,8 +337,8 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state) * Copy the raw buffer data with no transform. * (NULL terminated already) */ - ACPI_MEMCPY(return_desc->string.pointer, - operand[0]->buffer.pointer, length); + memcpy(return_desc->string.pointer, + operand[0]->buffer.pointer, length); break; case AML_CONCAT_RES_OP: diff --git a/drivers/acpi/acpica/exoparg3.c b/drivers/acpi/acpica/exoparg3.c index 1c64a988cbee..fa100b3b92ee 100644 --- a/drivers/acpi/acpica/exoparg3.c +++ b/drivers/acpi/acpica/exoparg3.c @@ -237,8 +237,8 @@ acpi_status acpi_ex_opcode_3A_1T_1R(struct acpi_walk_state *walk_state) /* We have a buffer, copy the portion requested */ - ACPI_MEMCPY(buffer, operand[0]->string.pointer + index, - length); + memcpy(buffer, operand[0]->string.pointer + index, + length); } /* Set the length of the new String/Buffer */ diff --git a/drivers/acpi/acpica/exregion.c b/drivers/acpi/acpica/exregion.c index f6c2f5499935..b4a5e44c00dd 100644 --- a/drivers/acpi/acpica/exregion.c +++ b/drivers/acpi/acpica/exregion.c @@ -517,15 +517,14 @@ acpi_ex_data_table_space_handler(u32 function, switch (function) { case ACPI_READ: - ACPI_MEMCPY(ACPI_CAST_PTR(char, value), - ACPI_PHYSADDR_TO_PTR(address), - ACPI_DIV_8(bit_width)); + memcpy(ACPI_CAST_PTR(char, value), + ACPI_PHYSADDR_TO_PTR(address), ACPI_DIV_8(bit_width)); break; case ACPI_WRITE: - ACPI_MEMCPY(ACPI_PHYSADDR_TO_PTR(address), - ACPI_CAST_PTR(char, value), ACPI_DIV_8(bit_width)); + memcpy(ACPI_PHYSADDR_TO_PTR(address), + ACPI_CAST_PTR(char, value), ACPI_DIV_8(bit_width)); break; default: diff --git a/drivers/acpi/acpica/exstorob.c b/drivers/acpi/acpica/exstorob.c index 6fa3c8d8fc5f..e1d4f4d51b97 100644 --- a/drivers/acpi/acpica/exstorob.c +++ b/drivers/acpi/acpica/exstorob.c @@ -100,9 +100,9 @@ acpi_ex_store_buffer_to_buffer(union acpi_operand_object *source_desc, /* Clear existing buffer and copy in the new one */ - ACPI_MEMSET(target_desc->buffer.pointer, 0, - target_desc->buffer.length); - ACPI_MEMCPY(target_desc->buffer.pointer, buffer, length); + memset(target_desc->buffer.pointer, 0, + target_desc->buffer.length); + memcpy(target_desc->buffer.pointer, buffer, length); #ifdef ACPI_OBSOLETE_BEHAVIOR /* @@ -129,8 +129,8 @@ acpi_ex_store_buffer_to_buffer(union acpi_operand_object *source_desc, } else { /* Truncate the source, copy only what will fit */ - ACPI_MEMCPY(target_desc->buffer.pointer, buffer, - target_desc->buffer.length); + memcpy(target_desc->buffer.pointer, buffer, + target_desc->buffer.length); ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Truncating source buffer from %X to %X\n", @@ -187,9 +187,9 @@ acpi_ex_store_string_to_string(union acpi_operand_object *source_desc, * String will fit in existing non-static buffer. * Clear old string and copy in the new one */ - ACPI_MEMSET(target_desc->string.pointer, 0, - (acpi_size) target_desc->string.length + 1); - ACPI_MEMCPY(target_desc->string.pointer, buffer, length); + memset(target_desc->string.pointer, 0, + (acpi_size) target_desc->string.length + 1); + memcpy(target_desc->string.pointer, buffer, length); } else { /* * Free the current buffer, then allocate a new buffer @@ -210,7 +210,7 @@ acpi_ex_store_string_to_string(union acpi_operand_object *source_desc, } target_desc->common.flags &= ~AOPOBJ_STATIC_POINTER; - ACPI_MEMCPY(target_desc->string.pointer, buffer, length); + memcpy(target_desc->string.pointer, buffer, length); } /* Set the new target length */ diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c index 19ad0b5993d5..c687b9979fb2 100644 --- a/drivers/acpi/acpica/nsaccess.c +++ b/drivers/acpi/acpica/nsaccess.c @@ -102,7 +102,7 @@ acpi_status acpi_ns_root_initialize(void) /* _OSI is optional for now, will be permanent later */ - if (!ACPI_STRCMP(init_val->name, "_OSI") + if (!strcmp(init_val->name, "_OSI") && !acpi_gbl_create_osi_method) { continue; } @@ -180,7 +180,7 @@ acpi_status acpi_ns_root_initialize(void) /* Build an object around the static string */ - obj_desc->string.length = (u32)ACPI_STRLEN(val); + obj_desc->string.length = (u32)strlen(val); obj_desc->string.pointer = val; obj_desc->common.flags |= AOPOBJ_STATIC_POINTER; break; @@ -203,7 +203,7 @@ acpi_status acpi_ns_root_initialize(void) /* Special case for ACPI Global Lock */ - if (ACPI_STRCMP(init_val->name, "_GL_") == 0) { + if (strcmp(init_val->name, "_GL_") == 0) { acpi_gbl_global_lock_mutex = obj_desc; /* Create additional counting semaphore for global lock */ diff --git a/drivers/acpi/acpica/nsconvert.c b/drivers/acpi/acpica/nsconvert.c index 1a8b39c8d969..da55a1c60da1 100644 --- a/drivers/acpi/acpica/nsconvert.c +++ b/drivers/acpi/acpica/nsconvert.c @@ -187,8 +187,8 @@ acpi_ns_convert_to_string(union acpi_operand_object *original_object, * Copy the raw buffer data with no transform. String is already NULL * terminated at Length+1. */ - ACPI_MEMCPY(new_object->string.pointer, - original_object->buffer.pointer, length); + memcpy(new_object->string.pointer, + original_object->buffer.pointer, length); break; default: @@ -251,9 +251,9 @@ acpi_ns_convert_to_buffer(union acpi_operand_object *original_object, return (AE_NO_MEMORY); } - ACPI_MEMCPY(new_object->buffer.pointer, - original_object->string.pointer, - original_object->string.length); + memcpy(new_object->buffer.pointer, + original_object->string.pointer, + original_object->string.length); break; case ACPI_TYPE_PACKAGE: diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c index d259393505fa..0f1daba640e7 100644 --- a/drivers/acpi/acpica/nsdump.c +++ b/drivers/acpi/acpica/nsdump.c @@ -101,7 +101,7 @@ void acpi_ns_print_pathname(u32 num_segments, char *pathname) while (num_segments) { for (i = 0; i < 4; i++) { - ACPI_IS_PRINT(pathname[i]) ? + isprint((int)pathname[i]) ? acpi_os_printf("%c", pathname[i]) : acpi_os_printf("?"); } diff --git a/drivers/acpi/acpica/nseval.c b/drivers/acpi/acpica/nseval.c index 7bcc68f57afa..a725d88b036b 100644 --- a/drivers/acpi/acpica/nseval.c +++ b/drivers/acpi/acpica/nseval.c @@ -440,7 +440,7 @@ acpi_ns_exec_module_code(union acpi_operand_object *method_obj, /* Initialize the evaluation information block */ - ACPI_MEMSET(info, 0, sizeof(struct acpi_evaluate_info)); + memset(info, 0, sizeof(struct acpi_evaluate_info)); info->prefix_node = parent_node; /* diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c index 4a85c4517988..b744a53618eb 100644 --- a/drivers/acpi/acpica/nsinit.c +++ b/drivers/acpi/acpica/nsinit.c @@ -90,7 +90,7 @@ acpi_status acpi_ns_initialize_objects(void) /* Set all init info to zero */ - ACPI_MEMSET(&info, 0, sizeof(struct acpi_init_walk_info)); + memset(&info, 0, sizeof(struct acpi_init_walk_info)); /* Walk entire namespace from the supplied root */ @@ -566,7 +566,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle, ACPI_DEBUG_EXEC(acpi_ut_display_init_pathname (ACPI_TYPE_METHOD, device_node, METHOD_NAME__INI)); - ACPI_MEMSET(info, 0, sizeof(struct acpi_evaluate_info)); + memset(info, 0, sizeof(struct acpi_evaluate_info)); info->prefix_node = device_node; info->relative_pathname = METHOD_NAME__INI; info->parameters = NULL; diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index c30672d23878..0515a70f42a4 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -580,7 +580,7 @@ acpi_ns_repair_HID(struct acpi_evaluate_info *info, * # is a hex digit. */ for (dest = new_string->string.pointer; *source; dest++, source++) { - *dest = (char)ACPI_TOUPPER(*source); + *dest = (char)toupper((int)*source); } acpi_ut_remove_reference(return_object); diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c index 6ad02008c0c2..8d8104b8bd28 100644 --- a/drivers/acpi/acpica/nsutils.c +++ b/drivers/acpi/acpica/nsutils.c @@ -292,8 +292,7 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info) } else { /* Convert the character to uppercase and save it */ - result[i] = - (char)ACPI_TOUPPER((int)*external_name); + result[i] = (char)toupper((int)*external_name); external_name++; } } diff --git a/drivers/acpi/acpica/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c index b6030a2deee1..6ee1e52b903d 100644 --- a/drivers/acpi/acpica/nsxfeval.c +++ b/drivers/acpi/acpica/nsxfeval.c @@ -696,7 +696,7 @@ acpi_ns_get_device_callback(acpi_handle obj_handle, return (AE_CTRL_DEPTH); } - no_match = ACPI_STRCMP(hid->string, info->hid); + no_match = strcmp(hid->string, info->hid); ACPI_FREE(hid); if (no_match) { @@ -715,8 +715,7 @@ acpi_ns_get_device_callback(acpi_handle obj_handle, found = FALSE; for (i = 0; i < cid->count; i++) { - if (ACPI_STRCMP(cid->ids[i].string, info->hid) - == 0) { + if (strcmp(cid->ids[i].string, info->hid) == 0) { /* Found a matching CID */ diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c index dc0836a9cf17..9ff643b9553f 100644 --- a/drivers/acpi/acpica/nsxfname.c +++ b/drivers/acpi/acpica/nsxfname.c @@ -114,7 +114,7 @@ acpi_get_handle(acpi_handle parent, /* Special case for root-only, since we can't search for it */ - if (!ACPI_STRCMP(pathname, ACPI_NS_ROOT_PATH)) { + if (!strcmp(pathname, ACPI_NS_ROOT_PATH)) { *ret_handle = ACPI_CAST_PTR(acpi_handle, acpi_gbl_root_node); return (AE_OK); @@ -242,7 +242,7 @@ static char *acpi_ns_copy_device_id(struct acpi_pnp_device_id *dest, /* Copy actual string and return a pointer to the next string area */ - ACPI_MEMCPY(string_area, source->string, source->length); + memcpy(string_area, source->string, source->length); return (string_area + source->length); } @@ -637,7 +637,7 @@ acpi_status acpi_install_method(u8 *buffer) /* Copy the method AML to the local buffer */ - ACPI_MEMCPY(aml_buffer, aml_start, aml_length); + memcpy(aml_buffer, aml_start, aml_length); /* Initialize the method object with the new method's information */ diff --git a/drivers/acpi/acpica/psutils.c b/drivers/acpi/acpica/psutils.c index 960505ab409a..32440912023a 100644 --- a/drivers/acpi/acpica/psutils.c +++ b/drivers/acpi/acpica/psutils.c @@ -93,10 +93,9 @@ void acpi_ps_init_op(union acpi_parse_object *op, u16 opcode) op->common.descriptor_type = ACPI_DESC_TYPE_PARSER; op->common.aml_opcode = opcode; - ACPI_DISASM_ONLY_MEMBERS(ACPI_STRNCPY(op->common.aml_op_name, - (acpi_ps_get_opcode_info - (opcode))->name, - sizeof(op->common.aml_op_name))); + ACPI_DISASM_ONLY_MEMBERS(strncpy(op->common.aml_op_name, + (acpi_ps_get_opcode_info(opcode))-> + name, sizeof(op->common.aml_op_name))); } /******************************************************************************* diff --git a/drivers/acpi/acpica/rscreate.c b/drivers/acpi/acpica/rscreate.c index f30f35e6ff2a..3fa829e96c2a 100644 --- a/drivers/acpi/acpica/rscreate.c +++ b/drivers/acpi/acpica/rscreate.c @@ -353,13 +353,13 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, /* +1 to include null terminator */ user_prt->length += - (u32)ACPI_STRLEN(user_prt->source) + 1; + (u32)strlen(user_prt->source) + 1; break; case ACPI_TYPE_STRING: - ACPI_STRCPY(user_prt->source, - obj_desc->string.pointer); + strcpy(user_prt->source, + obj_desc->string.pointer); /* * Add to the Length field the length of the string diff --git a/drivers/acpi/acpica/rsmisc.c b/drivers/acpi/acpica/rsmisc.c index 1fe49d223663..ac37852e0821 100644 --- a/drivers/acpi/acpica/rsmisc.c +++ b/drivers/acpi/acpica/rsmisc.c @@ -119,7 +119,7 @@ acpi_rs_convert_aml_to_resource(struct acpi_resource *resource, /* * Get the resource type and the initial (minimum) length */ - ACPI_MEMSET(resource, 0, INIT_RESOURCE_LENGTH(info)); + memset(resource, 0, INIT_RESOURCE_LENGTH(info)); resource->type = INIT_RESOURCE_TYPE(info); resource->length = INIT_RESOURCE_LENGTH(info); break; @@ -324,13 +324,13 @@ acpi_rs_convert_aml_to_resource(struct acpi_resource *resource, case ACPI_RSC_SET8: - ACPI_MEMSET(destination, info->aml_offset, info->value); + memset(destination, info->aml_offset, info->value); break; case ACPI_RSC_DATA8: target = ACPI_ADD_PTR(char, resource, info->value); - ACPI_MEMCPY(destination, source, ACPI_GET16(target)); + memcpy(destination, source, ACPI_GET16(target)); break; case ACPI_RSC_ADDRESS: @@ -502,7 +502,7 @@ acpi_rs_convert_resource_to_aml(struct acpi_resource *resource, switch (info->opcode) { case ACPI_RSC_INITSET: - ACPI_MEMSET(aml, 0, INIT_RESOURCE_LENGTH(info)); + memset(aml, 0, INIT_RESOURCE_LENGTH(info)); aml_length = INIT_RESOURCE_LENGTH(info); acpi_rs_set_resource_header(INIT_RESOURCE_TYPE(info), aml_length, aml); diff --git a/drivers/acpi/acpica/rsutils.c b/drivers/acpi/acpica/rsutils.c index 90417de38dab..52b024df0052 100644 --- a/drivers/acpi/acpica/rsutils.c +++ b/drivers/acpi/acpica/rsutils.c @@ -148,7 +148,7 @@ acpi_rs_move_data(void *destination, void *source, u16 item_count, u8 move_type) case ACPI_RSC_MOVE_SERIAL_VEN: case ACPI_RSC_MOVE_SERIAL_RES: - ACPI_MEMCPY(destination, source, item_count); + memcpy(destination, source, item_count); return; /* @@ -364,12 +364,11 @@ acpi_rs_get_resource_source(acpi_rs_length resource_length, * Zero the entire area of the buffer. */ total_length = - (u32) - ACPI_STRLEN(ACPI_CAST_PTR(char, &aml_resource_source[1])) + + (u32)strlen(ACPI_CAST_PTR(char, &aml_resource_source[1])) + 1; total_length = (u32)ACPI_ROUND_UP_TO_NATIVE_WORD(total_length); - ACPI_MEMSET(resource_source->string_ptr, 0, total_length); + memset(resource_source->string_ptr, 0, total_length); /* Copy the resource_source string to the destination */ @@ -432,8 +431,8 @@ acpi_rs_set_resource_source(union aml_resource * aml, /* Copy the resource_source string */ - ACPI_STRCPY(ACPI_CAST_PTR(char, &aml_resource_source[1]), - resource_source->string_ptr); + strcpy(ACPI_CAST_PTR(char, &aml_resource_source[1]), + resource_source->string_ptr); /* * Add the length of the string (+ 1 for null terminator) to the diff --git a/drivers/acpi/acpica/rsxface.c b/drivers/acpi/acpica/rsxface.c index 8e6276df0226..de51f836ef68 100644 --- a/drivers/acpi/acpica/rsxface.c +++ b/drivers/acpi/acpica/rsxface.c @@ -398,8 +398,8 @@ acpi_resource_to_address64(struct acpi_resource *resource, /* Simple copy for 64 bit source */ - ACPI_MEMCPY(out, &resource->data, - sizeof(struct acpi_resource_address64)); + memcpy(out, &resource->data, + sizeof(struct acpi_resource_address64)); break; default: @@ -499,7 +499,7 @@ acpi_rs_match_vendor_resource(struct acpi_resource *resource, void *context) */ if ((vendor->byte_length < (ACPI_UUID_LENGTH + 1)) || (vendor->uuid_subtype != info->uuid->subtype) || - (ACPI_MEMCMP(vendor->uuid, info->uuid->data, ACPI_UUID_LENGTH))) { + (memcmp(vendor->uuid, info->uuid->data, ACPI_UUID_LENGTH))) { return (AE_OK); } @@ -513,7 +513,7 @@ acpi_rs_match_vendor_resource(struct acpi_resource *resource, void *context) /* Found the correct resource, copy and return it */ - ACPI_MEMCPY(buffer->pointer, resource, resource->length); + memcpy(buffer->pointer, resource, resource->length); buffer->length = resource->length; /* Found the desired descriptor, terminate resource walk */ diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c index d7f8386455bd..5c9d5abf1588 100644 --- a/drivers/acpi/acpica/tbdata.c +++ b/drivers/acpi/acpica/tbdata.c @@ -73,7 +73,7 @@ acpi_tb_init_table_descriptor(struct acpi_table_desc *table_desc, * Initialize the table descriptor. Set the pointer to NULL, since the * table is not fully mapped at this time. */ - ACPI_MEMSET(table_desc, 0, sizeof(struct acpi_table_desc)); + memset(table_desc, 0, sizeof(struct acpi_table_desc)); table_desc->address = address; table_desc->length = table->length; table_desc->flags = flags; @@ -465,9 +465,9 @@ acpi_status acpi_tb_resize_root_table_list(void) /* Copy and free the previous table array */ if (acpi_gbl_root_table_list.tables) { - ACPI_MEMCPY(tables, acpi_gbl_root_table_list.tables, - (acpi_size) table_count * - sizeof(struct acpi_table_desc)); + memcpy(tables, acpi_gbl_root_table_list.tables, + (acpi_size) table_count * + sizeof(struct acpi_table_desc)); if (acpi_gbl_root_table_list.flags & ACPI_ROOT_ORIGIN_ALLOCATED) { ACPI_FREE(acpi_gbl_root_table_list.tables); diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c index 05be59c772c7..6253001b6375 100644 --- a/drivers/acpi/acpica/tbfadt.c +++ b/drivers/acpi/acpica/tbfadt.c @@ -398,12 +398,12 @@ void acpi_tb_create_local_fadt(struct acpi_table_header *table, u32 length) /* Clear the entire local FADT */ - ACPI_MEMSET(&acpi_gbl_FADT, 0, sizeof(struct acpi_table_fadt)); + memset(&acpi_gbl_FADT, 0, sizeof(struct acpi_table_fadt)); /* Copy the original FADT, up to sizeof (struct acpi_table_fadt) */ - ACPI_MEMCPY(&acpi_gbl_FADT, table, - ACPI_MIN(length, sizeof(struct acpi_table_fadt))); + memcpy(&acpi_gbl_FADT, table, + ACPI_MIN(length, sizeof(struct acpi_table_fadt))); /* Take a copy of the Hardware Reduced flag */ diff --git a/drivers/acpi/acpica/tbfind.c b/drivers/acpi/acpica/tbfind.c index 0b879fcfef67..119c84ad9833 100644 --- a/drivers/acpi/acpica/tbfind.c +++ b/drivers/acpi/acpica/tbfind.c @@ -76,16 +76,16 @@ acpi_tb_find_table(char *signature, /* Normalize the input strings */ - ACPI_MEMSET(&header, 0, sizeof(struct acpi_table_header)); + memset(&header, 0, sizeof(struct acpi_table_header)); ACPI_MOVE_NAME(header.signature, signature); - ACPI_STRNCPY(header.oem_id, oem_id, ACPI_OEM_ID_SIZE); - ACPI_STRNCPY(header.oem_table_id, oem_table_id, ACPI_OEM_TABLE_ID_SIZE); + strncpy(header.oem_id, oem_id, ACPI_OEM_ID_SIZE); + strncpy(header.oem_table_id, oem_table_id, ACPI_OEM_TABLE_ID_SIZE); /* Search for the table */ for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) { - if (ACPI_MEMCMP(&(acpi_gbl_root_table_list.tables[i].signature), - header.signature, ACPI_NAME_SIZE)) { + if (memcmp(&(acpi_gbl_root_table_list.tables[i].signature), + header.signature, ACPI_NAME_SIZE)) { /* Not the requested table */ @@ -112,21 +112,20 @@ acpi_tb_find_table(char *signature, /* Check for table match on all IDs */ - if (!ACPI_MEMCMP + if (!memcmp (acpi_gbl_root_table_list.tables[i].pointer->signature, header.signature, ACPI_NAME_SIZE) && (!oem_id[0] || - !ACPI_MEMCMP + !memcmp (acpi_gbl_root_table_list. tables[i].pointer-> oem_id, header.oem_id, ACPI_OEM_ID_SIZE)) && (!oem_table_id[0] - || !ACPI_MEMCMP(acpi_gbl_root_table_list.tables[i]. - pointer->oem_table_id, - header.oem_table_id, - ACPI_OEM_TABLE_ID_SIZE))) { + || !memcmp(acpi_gbl_root_table_list.tables[i].pointer-> + oem_table_id, header.oem_table_id, + ACPI_OEM_TABLE_ID_SIZE))) { *table_index = i; ACPI_DEBUG_PRINT((ACPI_DB_TABLES, diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c index 008a251780f4..15ea98e0068d 100644 --- a/drivers/acpi/acpica/tbinstal.c +++ b/drivers/acpi/acpica/tbinstal.c @@ -87,8 +87,8 @@ acpi_tb_compare_tables(struct acpi_table_desc *table_desc, u32 table_index) * not just the header. */ is_identical = (u8)((table_desc->length != table_length || - ACPI_MEMCMP(table_desc->pointer, table, - table_length)) ? FALSE : TRUE); + memcmp(table_desc->pointer, table, table_length)) ? + FALSE : TRUE); /* Release the acquired table */ @@ -289,8 +289,7 @@ acpi_tb_install_standard_table(acpi_physical_address address, if ((new_table_desc.signature.ascii[0] != 0x00) && (!ACPI_COMPARE_NAME (&new_table_desc.signature, ACPI_SIG_SSDT)) - && (ACPI_STRNCMP(new_table_desc.signature.ascii, "OEM", 3))) - { + && (strncmp(new_table_desc.signature.ascii, "OEM", 3))) { ACPI_BIOS_ERROR((AE_INFO, "Table has invalid signature [%4.4s] (0x%8.8X), " "must be SSDT or OEMx", diff --git a/drivers/acpi/acpica/tbprint.c b/drivers/acpi/acpica/tbprint.c index 77ba5c71c6e7..709d5112fc16 100644 --- a/drivers/acpi/acpica/tbprint.c +++ b/drivers/acpi/acpica/tbprint.c @@ -73,7 +73,7 @@ static void acpi_tb_fix_string(char *string, acpi_size length) { while (length && *string) { - if (!ACPI_IS_PRINT(*string)) { + if (!isprint((int)*string)) { *string = '?'; } string++; @@ -100,7 +100,7 @@ acpi_tb_cleanup_table_header(struct acpi_table_header *out_header, struct acpi_table_header *header) { - ACPI_MEMCPY(out_header, header, sizeof(struct acpi_table_header)); + memcpy(out_header, header, sizeof(struct acpi_table_header)); acpi_tb_fix_string(out_header->signature, ACPI_NAME_SIZE); acpi_tb_fix_string(out_header->oem_id, ACPI_OEM_ID_SIZE); @@ -138,9 +138,9 @@ acpi_tb_print_table_header(acpi_physical_address address, /* RSDP has no common fields */ - ACPI_MEMCPY(local_header.oem_id, - ACPI_CAST_PTR(struct acpi_table_rsdp, - header)->oem_id, ACPI_OEM_ID_SIZE); + memcpy(local_header.oem_id, + ACPI_CAST_PTR(struct acpi_table_rsdp, header)->oem_id, + ACPI_OEM_ID_SIZE); acpi_tb_fix_string(local_header.oem_id, ACPI_OEM_ID_SIZE); ACPI_INFO((AE_INFO, "RSDP 0x%8.8X%8.8X %06X (v%.2d %-6.6s)", diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c index 2bb6a117c840..568ac0e4a3c6 100644 --- a/drivers/acpi/acpica/tbutils.c +++ b/drivers/acpi/acpica/tbutils.c @@ -188,7 +188,7 @@ struct acpi_table_header *acpi_tb_copy_dsdt(u32 table_index) return (NULL); } - ACPI_MEMCPY(new_table, table_desc->pointer, table_desc->length); + memcpy(new_table, table_desc->pointer, table_desc->length); acpi_tb_uninstall_table(table_desc); acpi_tb_init_table_descriptor(&acpi_gbl_root_table_list. diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c index 54b9f7957ff1..5559e2c70b15 100644 --- a/drivers/acpi/acpica/tbxface.c +++ b/drivers/acpi/acpica/tbxface.c @@ -119,9 +119,9 @@ acpi_initialize_tables(struct acpi_table_desc * initial_table_array, } else { /* Root Table Array has been statically allocated by the host */ - ACPI_MEMSET(initial_table_array, 0, - (acpi_size) initial_table_count * - sizeof(struct acpi_table_desc)); + memset(initial_table_array, 0, + (acpi_size) initial_table_count * + sizeof(struct acpi_table_desc)); acpi_gbl_root_table_list.tables = initial_table_array; acpi_gbl_root_table_list.max_table_count = initial_table_count; @@ -243,8 +243,8 @@ acpi_get_table_header(char *signature, return (AE_NO_MEMORY); } - ACPI_MEMCPY(out_table_header, header, - sizeof(struct acpi_table_header)); + memcpy(out_table_header, header, + sizeof(struct acpi_table_header)); acpi_os_unmap_memory(header, sizeof(struct acpi_table_header)); @@ -252,9 +252,9 @@ acpi_get_table_header(char *signature, return (AE_NOT_FOUND); } } else { - ACPI_MEMCPY(out_table_header, - acpi_gbl_root_table_list.tables[i].pointer, - sizeof(struct acpi_table_header)); + memcpy(out_table_header, + acpi_gbl_root_table_list.tables[i].pointer, + sizeof(struct acpi_table_header)); } return (AE_OK); } diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c index cf56e18cdc64..9682d40ca6ff 100644 --- a/drivers/acpi/acpica/tbxfload.c +++ b/drivers/acpi/acpica/tbxfload.c @@ -150,8 +150,8 @@ static acpi_status acpi_tb_load_namespace(void) * Save the original DSDT header for detection of table corruption * and/or replacement of the DSDT from outside the OS. */ - ACPI_MEMCPY(&acpi_gbl_original_dsdt_header, acpi_gbl_DSDT, - sizeof(struct acpi_table_header)); + memcpy(&acpi_gbl_original_dsdt_header, acpi_gbl_DSDT, + sizeof(struct acpi_table_header)); (void)acpi_ut_release_mutex(ACPI_MTX_TABLES); diff --git a/drivers/acpi/acpica/utalloc.c b/drivers/acpi/acpica/utalloc.c index 61d8f6d186d1..7a4101f0685e 100644 --- a/drivers/acpi/acpica/utalloc.c +++ b/drivers/acpi/acpica/utalloc.c @@ -73,7 +73,7 @@ void *acpi_os_allocate_zeroed(acpi_size size) /* Clear the memory block */ - ACPI_MEMSET(allocation, 0, size); + memset(allocation, 0, size); } return (allocation); @@ -181,7 +181,7 @@ acpi_status acpi_ut_delete_caches(void) char buffer[7]; if (acpi_gbl_display_final_mem_stats) { - ACPI_STRCPY(buffer, "MEMORY"); + strcpy(buffer, "MEMORY"); (void)acpi_db_display_statistics(buffer); } #endif @@ -337,6 +337,6 @@ acpi_ut_initialize_buffer(struct acpi_buffer * buffer, /* Have a valid buffer, clear it */ - ACPI_MEMSET(buffer->pointer, 0, required_length); + memset(buffer->pointer, 0, required_length); return (AE_OK); } diff --git a/drivers/acpi/acpica/utbuffer.c b/drivers/acpi/acpica/utbuffer.c index a8c39643e618..01c8709ca586 100644 --- a/drivers/acpi/acpica/utbuffer.c +++ b/drivers/acpi/acpica/utbuffer.c @@ -159,7 +159,7 @@ void acpi_ut_dump_buffer(u8 *buffer, u32 count, u32 display, u32 base_offset) } buf_char = buffer[(acpi_size) i + j]; - if (ACPI_IS_PRINT(buf_char)) { + if (isprint(buf_char)) { acpi_os_printf("%c", buf_char); } else { acpi_os_printf("."); @@ -319,7 +319,7 @@ acpi_ut_dump_buffer_to_file(ACPI_FILE file, } buf_char = buffer[(acpi_size) i + j]; - if (ACPI_IS_PRINT(buf_char)) { + if (isprint(buf_char)) { acpi_ut_file_printf(file, "%c", buf_char); } else { acpi_ut_file_printf(file, "."); diff --git a/drivers/acpi/acpica/utcache.c b/drivers/acpi/acpica/utcache.c index eacc5eee362e..0d21fbd99363 100644 --- a/drivers/acpi/acpica/utcache.c +++ b/drivers/acpi/acpica/utcache.c @@ -84,7 +84,7 @@ acpi_os_create_cache(char *cache_name, /* Populate the cache object and return it */ - ACPI_MEMSET(cache, 0, sizeof(struct acpi_memory_list)); + memset(cache, 0, sizeof(struct acpi_memory_list)); cache->list_name = cache_name; cache->object_size = object_size; cache->max_depth = max_depth; @@ -212,7 +212,7 @@ acpi_os_release_object(struct acpi_memory_list * cache, void *object) /* Mark the object as cached */ - ACPI_MEMSET(object, 0xCA, cache->object_size); + memset(object, 0xCA, cache->object_size); ACPI_SET_DESCRIPTOR_TYPE(object, ACPI_DESC_TYPE_CACHED); /* Put the object at the head of the cache list */ @@ -281,7 +281,7 @@ void *acpi_os_acquire_object(struct acpi_memory_list *cache) /* Clear (zero) the previously used Object */ - ACPI_MEMSET(object, 0, cache->object_size); + memset(object, 0, cache->object_size); } else { /* The cache is empty, create a new object */ diff --git a/drivers/acpi/acpica/utcopy.c b/drivers/acpi/acpica/utcopy.c index c37ec5035f4c..257221d452c8 100644 --- a/drivers/acpi/acpica/utcopy.c +++ b/drivers/acpi/acpica/utcopy.c @@ -129,7 +129,7 @@ acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object, /* Always clear the external object */ - ACPI_MEMSET(external_object, 0, sizeof(union acpi_object)); + memset(external_object, 0, sizeof(union acpi_object)); /* * In general, the external object will be the same type as @@ -149,9 +149,9 @@ acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object, string. length + 1); - ACPI_MEMCPY((void *)data_space, - (void *)internal_object->string.pointer, - (acpi_size) internal_object->string.length + 1); + memcpy((void *)data_space, + (void *)internal_object->string.pointer, + (acpi_size) internal_object->string.length + 1); break; case ACPI_TYPE_BUFFER: @@ -162,9 +162,9 @@ acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object, ACPI_ROUND_UP_TO_NATIVE_WORD(internal_object->string. length); - ACPI_MEMCPY((void *)data_space, - (void *)internal_object->buffer.pointer, - internal_object->buffer.length); + memcpy((void *)data_space, + (void *)internal_object->buffer.pointer, + internal_object->buffer.length); break; case ACPI_TYPE_INTEGER: @@ -502,9 +502,9 @@ acpi_ut_copy_esimple_to_isimple(union acpi_object *external_object, goto error_exit; } - ACPI_MEMCPY(internal_object->string.pointer, - external_object->string.pointer, - external_object->string.length); + memcpy(internal_object->string.pointer, + external_object->string.pointer, + external_object->string.length); internal_object->string.length = external_object->string.length; break; @@ -517,9 +517,9 @@ acpi_ut_copy_esimple_to_isimple(union acpi_object *external_object, goto error_exit; } - ACPI_MEMCPY(internal_object->buffer.pointer, - external_object->buffer.pointer, - external_object->buffer.length); + memcpy(internal_object->buffer.pointer, + external_object->buffer.pointer, + external_object->buffer.length); internal_object->buffer.length = external_object->buffer.length; @@ -694,8 +694,8 @@ acpi_ut_copy_simple_object(union acpi_operand_object *source_desc, copy_size = sizeof(struct acpi_namespace_node); } - ACPI_MEMCPY(ACPI_CAST_PTR(char, dest_desc), - ACPI_CAST_PTR(char, source_desc), copy_size); + memcpy(ACPI_CAST_PTR(char, dest_desc), + ACPI_CAST_PTR(char, source_desc), copy_size); /* Restore the saved fields */ @@ -725,9 +725,9 @@ acpi_ut_copy_simple_object(union acpi_operand_object *source_desc, /* Copy the actual buffer data */ - ACPI_MEMCPY(dest_desc->buffer.pointer, - source_desc->buffer.pointer, - source_desc->buffer.length); + memcpy(dest_desc->buffer.pointer, + source_desc->buffer.pointer, + source_desc->buffer.length); } break; @@ -747,9 +747,9 @@ acpi_ut_copy_simple_object(union acpi_operand_object *source_desc, /* Copy the actual string data */ - ACPI_MEMCPY(dest_desc->string.pointer, - source_desc->string.pointer, - (acpi_size) source_desc->string.length + 1); + memcpy(dest_desc->string.pointer, + source_desc->string.pointer, + (acpi_size) source_desc->string.length + 1); } break; diff --git a/drivers/acpi/acpica/utids.c b/drivers/acpi/acpica/utids.c index 3afe07f19b9f..7956df1e263c 100644 --- a/drivers/acpi/acpica/utids.c +++ b/drivers/acpi/acpica/utids.c @@ -111,7 +111,7 @@ acpi_ut_execute_HID(struct acpi_namespace_node *device_node, if (obj_desc->common.type == ACPI_TYPE_INTEGER) { acpi_ex_eisa_id_to_string(hid->string, obj_desc->integer.value); } else { - ACPI_STRCPY(hid->string, obj_desc->string.pointer); + strcpy(hid->string, obj_desc->string.pointer); } hid->length = length; @@ -180,7 +180,7 @@ acpi_ut_execute_SUB(struct acpi_namespace_node *device_node, /* Simply copy existing string */ - ACPI_STRCPY(sub->string, obj_desc->string.pointer); + strcpy(sub->string, obj_desc->string.pointer); sub->length = length; *return_id = sub; @@ -256,7 +256,7 @@ acpi_ut_execute_UID(struct acpi_namespace_node *device_node, if (obj_desc->common.type == ACPI_TYPE_INTEGER) { acpi_ex_integer_to_string(uid->string, obj_desc->integer.value); } else { - ACPI_STRCPY(uid->string, obj_desc->string.pointer); + strcpy(uid->string, obj_desc->string.pointer); } uid->length = length; @@ -393,8 +393,7 @@ acpi_ut_execute_CID(struct acpi_namespace_node *device_node, /* Copy the String CID from the returned object */ - ACPI_STRCPY(next_id_string, - cid_objects[i]->string.pointer); + strcpy(next_id_string, cid_objects[i]->string.pointer); length = cid_objects[i]->string.length + 1; } diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c index 28099e230008..71b66537f826 100644 --- a/drivers/acpi/acpica/utmisc.c +++ b/drivers/acpi/acpica/utmisc.c @@ -66,9 +66,9 @@ u8 acpi_ut_is_pci_root_bridge(char *id) * Check if this is a PCI root bridge. * ACPI 3.0+: check for a PCI Express root also. */ - if (!(ACPI_STRCMP(id, - PCI_ROOT_HID_STRING)) || - !(ACPI_STRCMP(id, PCI_EXPRESS_ROOT_HID_STRING))) { + if (!(strcmp(id, + PCI_ROOT_HID_STRING)) || + !(strcmp(id, PCI_EXPRESS_ROOT_HID_STRING))) { return (TRUE); } diff --git a/drivers/acpi/acpica/utosi.c b/drivers/acpi/acpica/utosi.c index 44035abdbf29..8f3d203aed79 100644 --- a/drivers/acpi/acpica/utosi.c +++ b/drivers/acpi/acpica/utosi.c @@ -232,8 +232,7 @@ acpi_status acpi_ut_install_interface(acpi_string interface_name) return (AE_NO_MEMORY); } - interface_info->name = - ACPI_ALLOCATE_ZEROED(ACPI_STRLEN(interface_name) + 1); + interface_info->name = ACPI_ALLOCATE_ZEROED(strlen(interface_name) + 1); if (!interface_info->name) { ACPI_FREE(interface_info); return (AE_NO_MEMORY); @@ -241,7 +240,7 @@ acpi_status acpi_ut_install_interface(acpi_string interface_name) /* Initialize new info and insert at the head of the global list */ - ACPI_STRCPY(interface_info->name, interface_name); + strcpy(interface_info->name, interface_name); interface_info->flags = ACPI_OSI_DYNAMIC; interface_info->next = acpi_gbl_supported_interfaces; @@ -269,7 +268,7 @@ acpi_status acpi_ut_remove_interface(acpi_string interface_name) previous_interface = next_interface = acpi_gbl_supported_interfaces; while (next_interface) { - if (!ACPI_STRCMP(interface_name, next_interface->name)) { + if (!strcmp(interface_name, next_interface->name)) { /* Found: name is in either the static list or was added at runtime */ @@ -373,7 +372,7 @@ struct acpi_interface_info *acpi_ut_get_interface(acpi_string interface_name) next_interface = acpi_gbl_supported_interfaces; while (next_interface) { - if (!ACPI_STRCMP(interface_name, next_interface->name)) { + if (!strcmp(interface_name, next_interface->name)) { return (next_interface); } diff --git a/drivers/acpi/acpica/utpredef.c b/drivers/acpi/acpica/utpredef.c index 29e449935a82..97898ed71b4b 100644 --- a/drivers/acpi/acpica/utpredef.c +++ b/drivers/acpi/acpica/utpredef.c @@ -148,7 +148,7 @@ void acpi_ut_get_expected_return_types(char *buffer, u32 expected_btypes) u32 j; if (!expected_btypes) { - ACPI_STRCPY(buffer, "NONE"); + strcpy(buffer, "NONE"); return; } @@ -161,7 +161,7 @@ void acpi_ut_get_expected_return_types(char *buffer, u32 expected_btypes) /* If one of the expected types, concatenate the name of this type */ if (expected_btypes & this_rtype) { - ACPI_STRCAT(buffer, &ut_rtype_names[i][j]); + strcat(buffer, &ut_rtype_names[i][j]); j = 0; /* Use name separator from now on */ } diff --git a/drivers/acpi/acpica/utprint.c b/drivers/acpi/acpica/utprint.c index 2be6bd4bdc09..b26297c5de49 100644 --- a/drivers/acpi/acpica/utprint.c +++ b/drivers/acpi/acpica/utprint.c @@ -180,7 +180,7 @@ const char *acpi_ut_scan_number(const char *string, u64 *number_ptr) { u64 number = 0; - while (ACPI_IS_DIGIT(*string)) { + while (isdigit((int)*string)) { number *= 10; number += *(string++) - '0'; } @@ -405,7 +405,7 @@ acpi_ut_vsnprintf(char *string, /* Process width */ width = -1; - if (ACPI_IS_DIGIT(*format)) { + if (isdigit((int)*format)) { format = acpi_ut_scan_number(format, &number); width = (s32) number; } else if (*format == '*') { @@ -422,7 +422,7 @@ acpi_ut_vsnprintf(char *string, precision = -1; if (*format == '.') { ++format; - if (ACPI_IS_DIGIT(*format)) { + if (isdigit((int)*format)) { format = acpi_ut_scan_number(format, &number); precision = (s32) number; } else if (*format == '*') { diff --git a/drivers/acpi/acpica/utstring.c b/drivers/acpi/acpica/utstring.c index 83b6c52490dc..8f3c883dfe0e 100644 --- a/drivers/acpi/acpica/utstring.c +++ b/drivers/acpi/acpica/utstring.c @@ -79,7 +79,7 @@ void acpi_ut_strlwr(char *src_string) /* Walk entire string, lowercasing the letters */ for (string = src_string; *string; string++) { - *string = (char)ACPI_TOLOWER(*string); + *string = (char)tolower((int)*string); } return; @@ -145,7 +145,7 @@ void acpi_ut_strupr(char *src_string) /* Walk entire string, uppercasing the letters */ for (string = src_string; *string; string++) { - *string = (char)ACPI_TOUPPER(*string); + *string = (char)toupper((int)*string); } return; @@ -202,7 +202,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) /* Skip over any white space in the buffer */ - while ((*string) && (ACPI_IS_SPACE(*string) || *string == '\t')) { + while ((*string) && (isspace((int)*string) || *string == '\t')) { string++; } @@ -211,7 +211,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) * Base equal to ACPI_ANY_BASE means 'ToInteger operation case'. * We need to determine if it is decimal or hexadecimal. */ - if ((*string == '0') && (ACPI_TOLOWER(*(string + 1)) == 'x')) { + if ((*string == '0') && (tolower((int)*(string + 1)) == 'x')) { sign_of0x = 1; base = 16; @@ -224,7 +224,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) /* Any string left? Check that '0x' is not followed by white space. */ - if (!(*string) || ACPI_IS_SPACE(*string) || *string == '\t') { + if (!(*string) || isspace((int)*string) || *string == '\t') { if (to_integer_op) { goto error_exit; } else { @@ -241,7 +241,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) /* Main loop: convert the string to a 32- or 64-bit integer */ while (*string) { - if (ACPI_IS_DIGIT(*string)) { + if (isdigit((int)*string)) { /* Convert ASCII 0-9 to Decimal value */ @@ -252,8 +252,8 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) term = 1; } else { - this_digit = (u8)ACPI_TOUPPER(*string); - if (ACPI_IS_XDIGIT((char)this_digit)) { + this_digit = (u8)toupper((int)*string); + if (isxdigit((int)this_digit)) { /* Convert ASCII Hex char to value */ @@ -404,7 +404,7 @@ void acpi_ut_print_string(char *string, u16 max_length) /* Check for printable character or hex escape */ - if (ACPI_IS_PRINT(string[i])) { + if (isprint((int)string[i])) { /* This is a normal character */ acpi_os_printf("%c", (int)string[i]); @@ -609,22 +609,22 @@ void ut_convert_backslashes(char *pathname) u8 acpi_ut_safe_strcpy(char *dest, acpi_size dest_size, char *source) { - if (ACPI_STRLEN(source) >= dest_size) { + if (strlen(source) >= dest_size) { return (TRUE); } - ACPI_STRCPY(dest, source); + strcpy(dest, source); return (FALSE); } u8 acpi_ut_safe_strcat(char *dest, acpi_size dest_size, char *source) { - if ((ACPI_STRLEN(dest) + ACPI_STRLEN(source)) >= dest_size) { + if ((strlen(dest) + strlen(source)) >= dest_size) { return (TRUE); } - ACPI_STRCAT(dest, source); + strcat(dest, source); return (FALSE); } @@ -635,14 +635,13 @@ acpi_ut_safe_strncat(char *dest, { acpi_size actual_transfer_length; - actual_transfer_length = - ACPI_MIN(max_transfer_length, ACPI_STRLEN(source)); + actual_transfer_length = ACPI_MIN(max_transfer_length, strlen(source)); - if ((ACPI_STRLEN(dest) + actual_transfer_length) >= dest_size) { + if ((strlen(dest) + actual_transfer_length) >= dest_size) { return (TRUE); } - ACPI_STRNCAT(dest, source, max_transfer_length); + strncat(dest, source, max_transfer_length); return (FALSE); } #endif diff --git a/drivers/acpi/acpica/uttrack.c b/drivers/acpi/acpica/uttrack.c index 130dd9f96f0f..9a7dc8196a5d 100644 --- a/drivers/acpi/acpica/uttrack.c +++ b/drivers/acpi/acpica/uttrack.c @@ -100,7 +100,7 @@ acpi_ut_create_list(char *list_name, return (AE_NO_MEMORY); } - ACPI_MEMSET(cache, 0, sizeof(struct acpi_memory_list)); + memset(cache, 0, sizeof(struct acpi_memory_list)); cache->list_name = list_name; cache->object_size = object_size; @@ -402,7 +402,7 @@ acpi_ut_track_allocation(struct acpi_debug_mem_block *allocation, allocation->component = component; allocation->line = line; - ACPI_STRNCPY(allocation->module, module, ACPI_MAX_MODULE_NAME); + strncpy(allocation->module, module, ACPI_MAX_MODULE_NAME); allocation->module[ACPI_MAX_MODULE_NAME - 1] = 0; if (!element) { @@ -497,7 +497,7 @@ acpi_ut_remove_allocation(struct acpi_debug_mem_block *allocation, /* Mark the segment as deleted */ - ACPI_MEMSET(&allocation->user_space, 0xEA, allocation->size); + memset(&allocation->user_space, 0xEA, allocation->size); status = acpi_ut_release_mutex(ACPI_MTX_MEMORY); return (status); @@ -595,7 +595,7 @@ void acpi_ut_dump_allocations(u32 component, const char *module) while (element) { if ((element->component & component) && ((module == NULL) - || (0 == ACPI_STRCMP(module, element->module)))) { + || (0 == strcmp(module, element->module)))) { descriptor = ACPI_CAST_PTR(union acpi_descriptor, &element->user_space); diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c index 0929187bdce0..51cf52d52243 100644 --- a/drivers/acpi/acpica/utxface.c +++ b/drivers/acpi/acpica/utxface.c @@ -234,8 +234,8 @@ acpi_status acpi_get_statistics(struct acpi_statistics *stats) stats->sci_count = acpi_sci_count; stats->gpe_count = acpi_gpe_count; - ACPI_MEMCPY(stats->fixed_event_count, acpi_fixed_event_count, - sizeof(acpi_fixed_event_count)); + memcpy(stats->fixed_event_count, acpi_fixed_event_count, + sizeof(acpi_fixed_event_count)); /* Other counters */ @@ -322,7 +322,7 @@ acpi_status acpi_install_interface(acpi_string interface_name) /* Parameter validation */ - if (!interface_name || (ACPI_STRLEN(interface_name) == 0)) { + if (!interface_name || (strlen(interface_name) == 0)) { return (AE_BAD_PARAMETER); } @@ -374,7 +374,7 @@ acpi_status acpi_remove_interface(acpi_string interface_name) /* Parameter validation */ - if (!interface_name || (ACPI_STRLEN(interface_name) == 0)) { + if (!interface_name || (strlen(interface_name) == 0)) { return (AE_BAD_PARAMETER); } diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index d791b986002a..c2a41d223162 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -542,14 +542,14 @@ typedef u64 acpi_integer; #define ACPI_COMPARE_NAME(a,b) (*ACPI_CAST_PTR (u32, (a)) == *ACPI_CAST_PTR (u32, (b))) #define ACPI_MOVE_NAME(dest,src) (*ACPI_CAST_PTR (u32, (dest)) = *ACPI_CAST_PTR (u32, (src))) #else -#define ACPI_COMPARE_NAME(a,b) (!ACPI_STRNCMP (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAME_SIZE)) -#define ACPI_MOVE_NAME(dest,src) (ACPI_STRNCPY (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAME_SIZE)) +#define ACPI_COMPARE_NAME(a,b) (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAME_SIZE)) +#define ACPI_MOVE_NAME(dest,src) (strncpy (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAME_SIZE)) #endif /* Support for the special RSDP signature (8 characters) */ -#define ACPI_VALIDATE_RSDP_SIG(a) (!ACPI_STRNCMP (ACPI_CAST_PTR (char, (a)), ACPI_SIG_RSDP, 8)) -#define ACPI_MAKE_RSDP_SIG(dest) (ACPI_MEMCPY (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8)) +#define ACPI_VALIDATE_RSDP_SIG(a) (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_SIG_RSDP, 8)) +#define ACPI_MAKE_RSDP_SIG(dest) (memcpy (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8)) /******************************************************************************* * diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h index 1e84e62b448d..3cedd43943f4 100644 --- a/include/acpi/platform/acenv.h +++ b/include/acpi/platform/acenv.h @@ -346,28 +346,6 @@ /* We will be linking to the standard Clib functions */ -#define ACPI_STRSTR(s1,s2) strstr((s1), (s2)) -#define ACPI_STRCHR(s1,c) strchr((s1), (c)) -#define ACPI_STRLEN(s) (acpi_size) strlen((s)) -#define ACPI_STRCPY(d,s) (void) strcpy((d), (s)) -#define ACPI_STRNCPY(d,s,n) (void) strncpy((d), (s), (acpi_size)(n)) -#define ACPI_STRNCMP(d,s,n) strncmp((d), (s), (acpi_size)(n)) -#define ACPI_STRCMP(d,s) strcmp((d), (s)) -#define ACPI_STRCAT(d,s) (void) strcat((d), (s)) -#define ACPI_STRNCAT(d,s,n) strncat((d), (s), (acpi_size)(n)) -#define ACPI_STRTOUL(d,s,n) strtoul((d), (s), (acpi_size)(n)) -#define ACPI_MEMCMP(s1,s2,n) memcmp((const char *)(s1), (const char *)(s2), (acpi_size)(n)) -#define ACPI_MEMCPY(d,s,n) (void) memcpy((d), (s), (acpi_size)(n)) -#define ACPI_MEMSET(d,s,n) (void) memset((d), (s), (acpi_size)(n)) -#define ACPI_TOUPPER(i) toupper((int) (i)) -#define ACPI_TOLOWER(i) tolower((int) (i)) -#define ACPI_IS_XDIGIT(i) isxdigit((int) (i)) -#define ACPI_IS_DIGIT(i) isdigit((int) (i)) -#define ACPI_IS_SPACE(i) isspace((int) (i)) -#define ACPI_IS_UPPER(i) isupper((int) (i)) -#define ACPI_IS_PRINT(i) isprint((int) (i)) -#define ACPI_IS_ALPHA(i) isalpha((int) (i)) - #else /****************************************************************************** @@ -405,22 +383,6 @@ typedef char *va_list; /* Use the local (ACPICA) definitions of the clib functions */ -#define ACPI_STRSTR(s1,s2) acpi_ut_strstr ((s1), (s2)) -#define ACPI_STRCHR(s1,c) acpi_ut_strchr ((s1), (c)) -#define ACPI_STRLEN(s) (acpi_size) acpi_ut_strlen ((s)) -#define ACPI_STRCPY(d,s) (void) acpi_ut_strcpy ((d), (s)) -#define ACPI_STRNCPY(d,s,n) (void) acpi_ut_strncpy ((d), (s), (acpi_size)(n)) -#define ACPI_STRNCMP(d,s,n) acpi_ut_strncmp ((d), (s), (acpi_size)(n)) -#define ACPI_STRCMP(d,s) acpi_ut_strcmp ((d), (s)) -#define ACPI_STRCAT(d,s) (void) acpi_ut_strcat ((d), (s)) -#define ACPI_STRNCAT(d,s,n) acpi_ut_strncat ((d), (s), (acpi_size)(n)) -#define ACPI_STRTOUL(d,s,n) acpi_ut_strtoul ((d), (s), (acpi_size)(n)) -#define ACPI_MEMCMP(s1,s2,n) acpi_ut_memcmp((const char *)(s1), (const char *)(s2), (acpi_size)(n)) -#define ACPI_MEMCPY(d,s,n) (void) acpi_ut_memcpy ((d), (s), (acpi_size)(n)) -#define ACPI_MEMSET(d,v,n) (void) acpi_ut_memset ((d), (v), (acpi_size)(n)) -#define ACPI_TOUPPER(c) acpi_ut_to_upper ((int) (c)) -#define ACPI_TOLOWER(c) acpi_ut_to_lower ((int) (c)) - #endif /* ACPI_USE_SYSTEM_CLIBRARY */ #ifndef ACPI_FILE diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index 5da129e10aa2..326e826a5d20 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -127,7 +127,7 @@ int acpi_getopt(int argc, char **argv, char *opts) argv[acpi_gbl_optind][0] != '-' || argv[acpi_gbl_optind][1] == '\0') { return (ACPI_OPT_END); - } else if (ACPI_STRCMP(argv[acpi_gbl_optind], "--") == 0) { + } else if (strcmp(argv[acpi_gbl_optind], "--") == 0) { acpi_gbl_optind++; return (ACPI_OPT_END); } @@ -140,7 +140,7 @@ int acpi_getopt(int argc, char **argv, char *opts) /* Make sure that the option is legal */ if (current_char == ':' || - (opts_ptr = ACPI_STRCHR(opts, current_char)) == NULL) { + (opts_ptr = strchr(opts, current_char)) == NULL) { ACPI_OPTION_ERROR("Illegal option: -", current_char); if (argv[acpi_gbl_optind][++current_char_ptr] == '\0') { diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index 3aff9a2c7088..dd5008b0617a 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -222,7 +222,7 @@ acpi_os_get_table_by_address(acpi_physical_address address, goto exit; } - ACPI_MEMCPY(local_table, mapped_table, table_length); + memcpy(local_table, mapped_table, table_length); exit: osl_unmap_table(mapped_table); @@ -531,7 +531,7 @@ static acpi_status osl_load_rsdp(void) gbl_rsdp_address = rsdp_base + (ACPI_CAST8(mapped_table) - rsdp_address); - ACPI_MEMCPY(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp)); + memcpy(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp)); acpi_os_unmap_memory(rsdp_address, rsdp_size); return (AE_OK); @@ -964,7 +964,7 @@ osl_get_bios_table(char *signature, goto exit; } - ACPI_MEMCPY(local_table, mapped_table, table_length); + memcpy(local_table, mapped_table, table_length); *address = table_address; *table = local_table; diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index c736adf5fb55..61d0de804b70 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -329,7 +329,7 @@ int ap_dump_table_by_name(char *signature) acpi_status status; int table_status; - if (ACPI_STRLEN(signature) != ACPI_NAME_SIZE) { + if (strlen(signature) != ACPI_NAME_SIZE) { acpi_log_error ("Invalid table signature [%s]: must be exactly 4 characters\n", signature); @@ -338,15 +338,15 @@ int ap_dump_table_by_name(char *signature) /* Table signatures are expected to be uppercase */ - ACPI_STRCPY(local_signature, signature); + strcpy(local_signature, signature); acpi_ut_strupr(local_signature); /* To be friendly, handle tables whose signatures do not match the name */ if (ACPI_COMPARE_NAME(local_signature, "FADT")) { - ACPI_STRCPY(local_signature, ACPI_SIG_FADT); + strcpy(local_signature, ACPI_SIG_FADT); } else if (ACPI_COMPARE_NAME(local_signature, "MADT")) { - ACPI_STRCPY(local_signature, ACPI_SIG_MADT); + strcpy(local_signature, ACPI_SIG_MADT); } /* Dump all instances of this signature (to handle multiple SSDTs) */ diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index 8f2fe168228e..a37f9702b2a9 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -136,10 +136,10 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance) } else { ACPI_MOVE_NAME(filename, table->signature); } - filename[0] = (char)ACPI_TOLOWER(filename[0]); - filename[1] = (char)ACPI_TOLOWER(filename[1]); - filename[2] = (char)ACPI_TOLOWER(filename[2]); - filename[3] = (char)ACPI_TOLOWER(filename[3]); + filename[0] = (char)tolower((int)filename[0]); + filename[1] = (char)tolower((int)filename[1]); + filename[2] = (char)tolower((int)filename[2]); + filename[3] = (char)tolower((int)filename[3]); filename[ACPI_NAME_SIZE] = 0; /* Handle multiple SSDts - create different filenames for each */ @@ -147,10 +147,10 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance) if (instance > 0) { acpi_ut_snprintf(instance_str, sizeof(instance_str), "%u", instance); - ACPI_STRCAT(filename, instance_str); + strcat(filename, instance_str); } - ACPI_STRCAT(filename, ACPI_TABLE_FILE_SUFFIX); + strcat(filename, ACPI_TABLE_FILE_SUFFIX); if (gbl_verbose_mode) { acpi_log_error From 747ef1b1ce32b824a25ce1e70934b58733dfa6f9 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 1 Jul 2015 14:45:18 +0800 Subject: [PATCH 516/839] ACPICA: Split C library prototypes to new header ACPICA commit f51bf8497889a94046820639537165bbd7ccdee6 Adds acclib.h This patch doesn't affect the Linux kernel. Link: https://github.com/acpica/acpica/commit/f51bf849 Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/accommon.h | 3 ++ drivers/acpi/acpica/acutils.h | 58 ---------------------------------- 2 files changed, 3 insertions(+), 58 deletions(-) diff --git a/drivers/acpi/acpica/accommon.h b/drivers/acpi/acpica/accommon.h index 853aa2dbdb61..a8d8092ee391 100644 --- a/drivers/acpi/acpica/accommon.h +++ b/drivers/acpi/acpica/accommon.h @@ -59,5 +59,8 @@ #include "acglobal.h" /* All global variables */ #include "achware.h" /* Hardware defines and interfaces */ #include "acutils.h" /* Utility interfaces */ +#ifndef ACPI_USE_SYSTEM_CLIBRARY +#include "acclib.h" /* C library interfaces */ +#endif /* !ACPI_USE_SYSTEM_CLIBRARY */ #endif /* __ACCOMMON_H__ */ diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h index ef1e51d0f038..6de0d3573037 100644 --- a/drivers/acpi/acpica/acutils.h +++ b/drivers/acpi/acpica/acutils.h @@ -205,64 +205,6 @@ acpi_status acpi_ut_hardware_initialize(void); void acpi_ut_subsystem_shutdown(void); -/* - * utclib - Local implementations of C library functions - */ -#ifndef ACPI_USE_SYSTEM_CLIBRARY - -acpi_size strlen(const char *string); - -char *strchr(const char *string, int ch); - -char *strcpy(char *dst_string, const char *src_string); - -char *strncpy(char *dst_string, const char *src_string, acpi_size count); - -int strncmp(const char *string1, const char *string2, acpi_size count); - -int strcmp(const char *string1, const char *string2); - -char *strcat(char *dst_string, const char *src_string); - -char *strncat(char *dst_string, const char *src_string, acpi_size count); - -u32 strtoul(const char *string, char **terminator, u32 base); - -char *strstr(char *string1, char *string2); - -int memcmp(void *buffer1, void *buffer2, acpi_size count); - -void *memcpy(void *dest, const void *src, acpi_size count); - -void *memset(void *dest, int value, acpi_size count); - -int toupper(int c); - -int tolower(int c); - -extern const u8 _acpi_ctype[]; - -#define _ACPI_XA 0x00 /* extra alphabetic - not supported */ -#define _ACPI_XS 0x40 /* extra space */ -#define _ACPI_BB 0x00 /* BEL, BS, etc. - not supported */ -#define _ACPI_CN 0x20 /* CR, FF, HT, NL, VT */ -#define _ACPI_DI 0x04 /* '0'-'9' */ -#define _ACPI_LO 0x02 /* 'a'-'z' */ -#define _ACPI_PU 0x10 /* punctuation */ -#define _ACPI_SP 0x08 /* space, tab, CR, LF, VT, FF */ -#define _ACPI_UP 0x01 /* 'A'-'Z' */ -#define _ACPI_XD 0x80 /* '0'-'9', 'A'-'F', 'a'-'f' */ - -#define isdigit(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_DI)) -#define isspace(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_SP)) -#define isxdigit(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_XD)) -#define isupper(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_UP)) -#define islower(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO)) -#define isprint(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO | _ACPI_UP | _ACPI_DI | _ACPI_XS | _ACPI_PU)) -#define isalpha(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO | _ACPI_UP)) - -#endif /* !ACPI_USE_SYSTEM_CLIBRARY */ - #define ACPI_IS_ASCII(c) ((c) < 0x80) /* From 9005694e4043be76299916356723d5dc9c772ab0 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 1 Jul 2015 14:45:25 +0800 Subject: [PATCH 517/839] ACPICA: Update definitions for the TCPA and TPM2 ACPI tables ACPICA commit 33140b4498666337dd33a00cf3c4797a53981a7b Changes for a new version of the 3rd party spec for these tables, the "TCG ACPI Specification", December 14, 2014. Also, moved the definition of TPM2 to actbl2.h, next to TCPA, since both should be together. Update the table compiler/disassembler code for the tables. However, the "Server" TCPA table is not supported at this time. Link: https://github.com/acpica/acpica/commit/33140b44 Reported-by: Jarkko Sakkinen Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl2.h | 88 +++++++++++++++++++++++++++++++++++++++---- include/acpi/actbl3.h | 34 +---------------- 2 files changed, 82 insertions(+), 40 deletions(-) diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 370d69d871a0..2a9dcc15d292 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -51,8 +51,8 @@ * These tables are not consumed directly by the ACPICA subsystem, but are * included here to support device drivers and the AML disassembler. * - * The tables in this file are defined by third-party specifications, and are - * not defined directly by the ACPI specification itself. + * Generally, the tables in this file are defined by third-party specifications, + * and are not defined directly by the ACPI specification itself. * ******************************************************************************/ @@ -80,6 +80,7 @@ #define ACPI_SIG_SPCR "SPCR" /* Serial Port Console Redirection table */ #define ACPI_SIG_SPMI "SPMI" /* Server Platform Management Interface table */ #define ACPI_SIG_TCPA "TCPA" /* Trusted Computing Platform Alliance table */ +#define ACPI_SIG_TPM2 "TPM2" /* Trusted Platform Module 2.0 H/W interface table */ #define ACPI_SIG_UEFI "UEFI" /* Uefi Boot Optimization Table */ #define ACPI_SIG_VRTC "VRTC" /* Virtual Real Time Clock Table */ #define ACPI_SIG_WAET "WAET" /* Windows ACPI Emulated devices Table */ @@ -1179,20 +1180,91 @@ enum acpi_spmi_interface_types { /******************************************************************************* * * TCPA - Trusted Computing Platform Alliance table - * Version 1 + * Version 2 * - * Conforms to "TCG PC Specific Implementation Specification", - * Version 1.1, August 18, 2003 + * Conforms to "TCG ACPI Specification, Family 1.2 and 2.0", + * December 19, 2014 + * + * NOTE: There are two versions of the table with the same signature -- + * the client version and the server version. * ******************************************************************************/ -struct acpi_table_tcpa { +struct acpi_table_tcpa_client { struct acpi_table_header header; /* Common ACPI table header */ - u16 reserved; - u32 max_log_length; /* Maximum length for the event log area */ + u16 platform_class; + u32 minimum_log_length; /* Minimum length for the event log area */ u64 log_address; /* Address of the event log area */ }; +struct acpi_table_tcpa_server { + struct acpi_table_header header; /* Common ACPI table header */ + u16 platform_class; + u16 reserved; + u64 minimum_log_length; /* Minimum length for the event log area */ + u64 log_address; /* Address of the event log area */ + u16 spec_revision; + u8 device_flags; + u8 interrupt_flags; + u8 gpe_number; + u8 reserved2[3]; + u32 global_interrupt; + struct acpi_generic_address address; + u32 reserved3; + struct acpi_generic_address config_address; + u8 group; + u8 bus; /* PCI Bus/Segment/Function numbers */ + u8 device; + u8 function; +}; + +/* Values for device_flags above */ + +#define ACPI_TCPA_PCI_DEVICE (1) +#define ACPI_TCPA_BUS_PNP (1<<1) +#define ACPI_TCPA_ADDRESS_VALID (1<<2) + +/* Values for interrupt_flags above */ + +#define ACPI_TCPA_INTERRUPT_MODE (1) +#define ACPI_TCPA_INTERRUPT_POLARITY (1<<1) +#define ACPI_TCPA_SCI_VIA_GPE (1<<2) +#define ACPI_TCPA_GLOBAL_INTERRUPT (1<<3) + +/******************************************************************************* + * + * TPM2 - Trusted Platform Module (TPM) 2.0 Hardware Interface Table + * Version 4 + * + * Conforms to "TCG ACPI Specification, Family 1.2 and 2.0", + * December 19, 2014 + * + ******************************************************************************/ + +struct acpi_table_tpm2 { + struct acpi_table_header header; /* Common ACPI table header */ + u16 platform_class; + u16 reserved; + u64 control_address; + u32 start_method; + + /* Platform-specific data follows */ +}; + +/* Control area structure (not part of table, pointed to by control_address) */ + +struct acpi_tpm2_control { + u32 reserved; + u32 error; + u32 cancel; + u32 start; + u64 interrupt_control; + u32 command_size; + u64 command_address; + u32 response_size; + u64 response_address; +}; + /******************************************************************************* * * UEFI - UEFI Boot optimization Table diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h index 4018986d2a2e..1df891660f43 100644 --- a/include/acpi/actbl3.h +++ b/include/acpi/actbl3.h @@ -51,7 +51,8 @@ * These tables are not consumed directly by the ACPICA subsystem, but are * included here to support device drivers and the AML disassembler. * - * The tables in this file are fully defined within the ACPI specification. + * In general, the tables in this file are fully defined within the ACPI + * specification. * ******************************************************************************/ @@ -69,7 +70,6 @@ #define ACPI_SIG_PMTT "PMTT" /* Platform Memory Topology Table */ #define ACPI_SIG_RASF "RASF" /* RAS Feature table */ #define ACPI_SIG_STAO "STAO" /* Status Override table */ -#define ACPI_SIG_TPM2 "TPM2" /* Trusted Platform Module 2.0 H/W interface table */ #define ACPI_SIG_WPBT "WPBT" /* Windows Platform Binary Table */ #define ACPI_SIG_XENV "XENV" /* Xen Environment table */ @@ -720,36 +720,6 @@ struct acpi_table_stao { u8 ignore_uart; }; -/******************************************************************************* - * - * TPM2 - Trusted Platform Module (TPM) 2.0 Hardware Interface Table - * Version 3 - * - * Conforms to "TPM 2.0 Hardware Interface Table (TPM2)" 29 November 2011 - * - ******************************************************************************/ - -struct acpi_table_tpm2 { - struct acpi_table_header header; /* Common ACPI table header */ - u32 flags; - u64 control_address; - u32 start_method; -}; - -/* Control area structure (not part of table, pointed to by control_address) */ - -struct acpi_tpm2_control { - u32 reserved; - u32 error; - u32 cancel; - u32 start; - u64 interrupt_control; - u32 command_size; - u64 command_address; - u32 response_size; - u64 response_address; -}; - /******************************************************************************* * * WPBT - Windows Platform Environment Table (ACPI 6.0) From 413d4a6defe0467cd22d97bfc5c274f8e3220124 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 1 Jul 2015 14:45:32 +0800 Subject: [PATCH 518/839] ACPICA: Update TPM2 ACPI table ACPICA commit 254bf77e7ca01cb27e026fa5737f7df8dae03f2c - Add constans for the start_method. - Remove the control structure, not part of ACPI, not defined in the current TCG spec. Link: https://github.com/acpica/acpica/commit/254bf77e Reported-by: Jarkko Sakkinen Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl2.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 2a9dcc15d292..a948fc586b9b 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -1251,19 +1251,13 @@ struct acpi_table_tpm2 { /* Platform-specific data follows */ }; -/* Control area structure (not part of table, pointed to by control_address) */ +/* Values for start_method above */ -struct acpi_tpm2_control { - u32 reserved; - u32 error; - u32 cancel; - u32 start; - u64 interrupt_control; - u32 command_size; - u64 command_address; - u32 response_size; - u64 response_address; -}; +#define ACPI_TPM2_NOT_ALLOWED 0 +#define ACPI_TPM2_START_METHOD 2 +#define ACPI_TPM2_MEMORY_MAPPED 6 +#define ACPI_TPM2_COMMAND_BUFFER 7 +#define ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD 8 /******************************************************************************* * From 3d2967b5b5b29a863227273639b84cfd39bba7f3 Mon Sep 17 00:00:00 2001 From: gongzg Date: Wed, 1 Jul 2015 14:45:38 +0800 Subject: [PATCH 519/839] ACPICA: Comment update, no functional change ACPICA commit 1a8ec7b83d55c7b957247d685bd1c73f6a012f1e Remove redundant comment in nseval.c Link: https://github.com/acpica/acpica/commit/1a8ec7b8 Signed-off-by: gongzg Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/nseval.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpica/nseval.c b/drivers/acpi/acpica/nseval.c index a725d88b036b..80670cb32b5a 100644 --- a/drivers/acpi/acpica/nseval.c +++ b/drivers/acpi/acpica/nseval.c @@ -59,15 +59,14 @@ acpi_ns_exec_module_code(union acpi_operand_object *method_obj, * * FUNCTION: acpi_ns_evaluate * - * PARAMETERS: info - Evaluation info block, contains: + * PARAMETERS: info - Evaluation info block, contains these fields + * and more: * prefix_node - Prefix or Method/Object Node to execute * relative_path - Name of method to execute, If NULL, the * Node is the object to execute * parameters - List of parameters to pass to the method, * terminated by NULL. Params itself may be * NULL if no parameters are being passed. - * return_object - Where to put method's return value (if - * any). If NULL, no value is returned. * parameter_type - Type of Parameter list * return_object - Where to put method's return value (if * any). If NULL, no value is returned. From ce55d01e9b4daee5040777d023d52e6b42d3ad8f Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 1 Jul 2015 14:45:45 +0800 Subject: [PATCH 520/839] ACPICA: Update version to 20150619 ACPICA commit 2fcf4f4c95e6a4875f39a929f8f92ef50cc53bb5 ACPICA commit d7a940bb308d001b5d2b196174fee36c7daa61d6 Version 20150619. Link: https://github.com/acpica/acpica/commit/2fcf4f4c Link: https://github.com/acpica/acpica/commit/d7a940bb Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index f6d807170346..e8ec18a4a634 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -46,7 +46,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20150515 +#define ACPI_CA_VERSION 0x20150619 #include #include From a89ca6f24ffe435edad57de02eaabd37a2c6bff6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 25 Jun 2015 04:17:46 +0100 Subject: [PATCH 521/839] Btrfs: fix fsync after truncate when no_holes feature is enabled When we have the no_holes feature enabled, if a we truncate a file to a smaller size, truncate it again but to a size greater than or equals to its original size and fsync it, the log tree will not have any information about the hole covering the range [truncate_1_offset, new_file_size[. Which means if the fsync log is replayed, the file will remain with the state it had before both truncate operations. Without the no_holes feature this does not happen, since when the inode is logged (full sync flag is set) it will find in the fs/subvol tree a leaf with a generation matching the current transaction id that has an explicit extent item representing the hole. Fix this by adding an explicit extent item representing a hole between the last extent and the inode's i_size if we are doing a full sync. The issue is easy to reproduce with the following test case for fstests: . ./common/rc . ./common/filter . ./common/dmflakey _need_to_be_root _supported_fs generic _supported_os Linux _require_scratch _require_dm_flakey # This test was motivated by an issue found in btrfs when the btrfs # no-holes feature is enabled (introduced in kernel 3.14). So enable # the feature if the fs being tested is btrfs. if [ $FSTYP == "btrfs" ]; then _require_btrfs_fs_feature "no_holes" _require_btrfs_mkfs_feature "no-holes" MKFS_OPTIONS="$MKFS_OPTIONS -O no-holes" fi rm -f $seqres.full _scratch_mkfs >>$seqres.full 2>&1 _init_flakey _mount_flakey # Create our test files and make sure everything is durably persisted. $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 64K" \ -c "pwrite -S 0xbb 64K 61K" \ $SCRATCH_MNT/foo | _filter_xfs_io $XFS_IO_PROG -f -c "pwrite -S 0xee 0 64K" \ -c "pwrite -S 0xff 64K 61K" \ $SCRATCH_MNT/bar | _filter_xfs_io sync # Now truncate our file foo to a smaller size (64Kb) and then truncate # it to the size it had before the shrinking truncate (125Kb). Then # fsync our file. If a power failure happens after the fsync, we expect # our file to have a size of 125Kb, with the first 64Kb of data having # the value 0xaa and the second 61Kb of data having the value 0x00. $XFS_IO_PROG -c "truncate 64K" \ -c "truncate 125K" \ -c "fsync" \ $SCRATCH_MNT/foo # Do something similar to our file bar, but the first truncation sets # the file size to 0 and the second truncation expands the size to the # double of what it was initially. $XFS_IO_PROG -c "truncate 0" \ -c "truncate 253K" \ -c "fsync" \ $SCRATCH_MNT/bar _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey # Allow writes again, mount to trigger log replay and validate file # contents. _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey # We expect foo to have a size of 125Kb, the first 64Kb of data all # having the value 0xaa and the remaining 61Kb to be a hole (all bytes # with value 0x00). echo "File foo content after log replay:" od -t x1 $SCRATCH_MNT/foo # We expect bar to have a size of 253Kb and no extents (any byte read # from bar has the value 0x00). echo "File bar content after log replay:" od -t x1 $SCRATCH_MNT/bar status=0 exit The expected file contents in the golden output are: File foo content after log replay: 0000000 aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa * 0200000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0372000 File bar content after log replay: 0000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0772000 Without this fix, their contents are: File foo content after log replay: 0000000 aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa * 0200000 bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb * 0372000 File bar content after log replay: 0000000 ee ee ee ee ee ee ee ee ee ee ee ee ee ee ee ee * 0200000 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff * 0372000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0772000 A test case submission for fstests follows soon. Signed-off-by: Filipe Manana Reviewed-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 108 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 66f87156882f..9c45431e69ab 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4197,6 +4197,107 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, return 0; } +/* + * If the no holes feature is enabled we need to make sure any hole between the + * last extent and the i_size of our inode is explicitly marked in the log. This + * is to make sure that doing something like: + * + * 1) create file with 128Kb of data + * 2) truncate file to 64Kb + * 3) truncate file to 256Kb + * 4) fsync file + * 5) + * 6) mount fs and trigger log replay + * + * Will give us a file with a size of 256Kb, the first 64Kb of data match what + * the file had in its first 64Kb of data at step 1 and the last 192Kb of the + * file correspond to a hole. The presence of explicit holes in a log tree is + * what guarantees that log replay will remove/adjust file extent items in the + * fs/subvol tree. + * + * Here we do not need to care about holes between extents, that is already done + * by copy_items(). We also only need to do this in the full sync path, where we + * lookup for extents from the fs/subvol tree only. In the fast path case, we + * lookup the list of modified extent maps and if any represents a hole, we + * insert a corresponding extent representing a hole in the log tree. + */ +static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + struct btrfs_path *path) +{ + int ret; + struct btrfs_key key; + u64 hole_start; + u64 hole_size; + struct extent_buffer *leaf; + struct btrfs_root *log = root->log_root; + const u64 ino = btrfs_ino(inode); + const u64 i_size = i_size_read(inode); + + if (!btrfs_fs_incompat(root->fs_info, NO_HOLES)) + return 0; + + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ASSERT(ret != 0); + if (ret < 0) + return ret; + + ASSERT(path->slots[0] > 0); + path->slots[0]--; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { + /* inode does not have any extents */ + hole_start = 0; + hole_size = i_size; + } else { + struct btrfs_file_extent_item *extent; + u64 len; + + /* + * If there's an extent beyond i_size, an explicit hole was + * already inserted by copy_items(). + */ + if (key.offset >= i_size) + return 0; + + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, extent) == + BTRFS_FILE_EXTENT_INLINE) { + len = btrfs_file_extent_inline_len(leaf, + path->slots[0], + extent); + ASSERT(len == i_size); + return 0; + } + + len = btrfs_file_extent_num_bytes(leaf, extent); + /* Last extent goes beyond i_size, no need to log a hole. */ + if (key.offset + len > i_size) + return 0; + hole_start = key.offset + len; + hole_size = i_size - hole_start; + } + btrfs_release_path(path); + + /* Last extent ends at i_size. */ + if (hole_size == 0) + return 0; + + hole_size = ALIGN(hole_size, root->sectorsize); + ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, + hole_size, 0, hole_size, 0, 0, 0); + return ret; +} + /* log a single inode in the tree log. * At least one parent directory for this inode must exist in the tree * or be logged already. @@ -4460,6 +4561,13 @@ next_slot: err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); if (err) goto out_unlock; + if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { + btrfs_release_path(path); + btrfs_release_path(dst_path); + err = btrfs_log_trailing_hole(trans, root, inode, path); + if (err) + goto out_unlock; + } log_extents: btrfs_release_path(path); btrfs_release_path(dst_path); From 207910ddeeda38fd54544d94f8c8ca5a9632cc25 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 30 Jun 2015 14:42:04 -0700 Subject: [PATCH 522/839] btrfs: pass unaligned length to btrfs_cmp_data() In the case that we dedupe the tail of a file, we might expand the dedupe len out to the end of our last block. We don't want to compare data past i_size however, so pass the original length to btrfs_cmp_data(). Signed-off-by: Mark Fasheh Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c86b835da7a8..55504338491d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2943,7 +2943,8 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, goto out_unlock; } - ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); + /* pass original length for comparison so we stay within i_size */ + ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen); if (ret == 0) ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); From f441460202cb787c49963bcc1f54cb48c52f7512 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 30 Jun 2015 14:42:05 -0700 Subject: [PATCH 523/839] btrfs: fix deadlock with extent-same and readpage ->readpage() does page_lock() before extent_lock(), we do the opposite in extent-same. We want to reverse the order in btrfs_extent_same() but it's not quite straightforward since the page locks are taken inside btrfs_cmp_data(). So I split btrfs_cmp_data() into 3 parts with a small context structure that is passed between them. The first, btrfs_cmp_data_prepare() gathers up the pages needed (taking page lock as required) and puts them on our context structure. At this point, we are safe to lock the extent range. Afterwards, we use btrfs_cmp_data() to do the data compare as usual and btrfs_cmp_data_free() to clean up our context. Signed-off-by: Mark Fasheh Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 148 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 117 insertions(+), 31 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 55504338491d..9ebe2dd31f2a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2765,14 +2765,11 @@ out: return ret; } -static struct page *extent_same_get_page(struct inode *inode, u64 off) +static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) { struct page *page; - pgoff_t index; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; - index = off >> PAGE_CACHE_SHIFT; - page = grab_cache_page(inode->i_mapping, index); if (!page) return NULL; @@ -2793,6 +2790,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off) return page; } +static int gather_extent_pages(struct inode *inode, struct page **pages, + int num_pages, u64 off) +{ + int i; + pgoff_t index = off >> PAGE_CACHE_SHIFT; + + for (i = 0; i < num_pages; i++) { + pages[i] = extent_same_get_page(inode, index + i); + if (!pages[i]) + return -ENOMEM; + } + return 0; +} + static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) { /* do any pending delalloc/csum calc on src, one way or @@ -2818,52 +2829,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) } } -static void btrfs_double_unlock(struct inode *inode1, u64 loff1, - struct inode *inode2, u64 loff2, u64 len) +static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) { - unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); - unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); - mutex_unlock(&inode1->i_mutex); mutex_unlock(&inode2->i_mutex); } -static void btrfs_double_lock(struct inode *inode1, u64 loff1, - struct inode *inode2, u64 loff2, u64 len) +static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) +{ + if (inode1 < inode2) + swap(inode1, inode2); + + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + if (inode1 != inode2) + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); +} + +static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); + unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); +} + +static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) { if (inode1 < inode2) { swap(inode1, inode2); swap(loff1, loff2); } - - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); lock_extent_range(inode1, loff1, len); - if (inode1 != inode2) { - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + if (inode1 != inode2) lock_extent_range(inode2, loff2, len); +} + +struct cmp_pages { + int num_pages; + struct page **src_pages; + struct page **dst_pages; +}; + +static void btrfs_cmp_data_free(struct cmp_pages *cmp) +{ + int i; + struct page *pg; + + for (i = 0; i < cmp->num_pages; i++) { + pg = cmp->src_pages[i]; + if (pg) + page_cache_release(pg); + pg = cmp->dst_pages[i]; + if (pg) + page_cache_release(pg); } + kfree(cmp->src_pages); + kfree(cmp->dst_pages); +} + +static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, + struct inode *dst, u64 dst_loff, + u64 len, struct cmp_pages *cmp) +{ + int ret; + int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT; + struct page **src_pgarr, **dst_pgarr; + + /* + * We must gather up all the pages before we initiate our + * extent locking. We use an array for the page pointers. Size + * of the array is bounded by len, which is in turn bounded by + * BTRFS_MAX_DEDUPE_LEN. + */ + src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); + dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); + if (!src_pgarr || !dst_pgarr) { + kfree(src_pgarr); + kfree(dst_pgarr); + return -ENOMEM; + } + cmp->num_pages = num_pages; + cmp->src_pages = src_pgarr; + cmp->dst_pages = dst_pgarr; + + ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff); + if (ret) + goto out; + + ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff); + +out: + if (ret) + btrfs_cmp_data_free(cmp); + return 0; } static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, - u64 dst_loff, u64 len) + u64 dst_loff, u64 len, struct cmp_pages *cmp) { int ret = 0; + int i; struct page *src_page, *dst_page; unsigned int cmp_len = PAGE_CACHE_SIZE; void *addr, *dst_addr; + i = 0; while (len) { if (len < PAGE_CACHE_SIZE) cmp_len = len; - src_page = extent_same_get_page(src, loff); - if (!src_page) - return -EINVAL; - dst_page = extent_same_get_page(dst, dst_loff); - if (!dst_page) { - page_cache_release(src_page); - return -EINVAL; - } + BUG_ON(i >= cmp->num_pages); + + src_page = cmp->src_pages[i]; + dst_page = cmp->dst_pages[i]; + addr = kmap_atomic(src_page); dst_addr = kmap_atomic(dst_page); @@ -2875,15 +2954,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, kunmap_atomic(addr); kunmap_atomic(dst_addr); - page_cache_release(src_page); - page_cache_release(dst_page); if (ret) break; - loff += cmp_len; - dst_loff += cmp_len; len -= cmp_len; + i++; } return ret; @@ -2914,6 +2990,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, { int ret; u64 len = olen; + struct cmp_pages cmp; /* * btrfs_clone() can't handle extents in the same file @@ -2926,7 +3003,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, if (len == 0) return 0; - btrfs_double_lock(src, loff, dst, dst_loff, len); + btrfs_double_inode_lock(src, dst); ret = extent_same_check_offsets(src, loff, &len, olen); if (ret) @@ -2943,13 +3020,22 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, goto out_unlock; } + ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); + if (ret) + goto out_unlock; + + btrfs_double_extent_lock(src, loff, dst, dst_loff, len); + /* pass original length for comparison so we stay within i_size */ - ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen); + ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); if (ret == 0) ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); + btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); + + btrfs_cmp_data_free(&cmp); out_unlock: - btrfs_double_unlock(src, loff, dst, dst_loff, len); + btrfs_double_inode_unlock(src, dst); return ret; } From 0efa9f48c7e6c15e75946dd2b1c82d3d19e13545 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 30 Jun 2015 14:42:07 -0700 Subject: [PATCH 524/839] btrfs: allow dedupe of same inode clone() supports cloning within an inode so extent-same can do the same now. This patch fixes up the locking in extent-same to know about the single-inode case. In addition to that, we add a check for overlapping ranges, which clone does not allow. Signed-off-by: Mark Fasheh Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 76 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9ebe2dd31f2a..af064946c9b2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2991,27 +2991,61 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, int ret; u64 len = olen; struct cmp_pages cmp; + int same_inode = 0; + u64 same_lock_start = 0; + u64 same_lock_len = 0; - /* - * btrfs_clone() can't handle extents in the same file - * yet. Once that works, we can drop this check and replace it - * with a check for the same inode, but overlapping extents. - */ if (src == dst) - return -EINVAL; + same_inode = 1; if (len == 0) return 0; - btrfs_double_inode_lock(src, dst); + if (same_inode) { + mutex_lock(&src->i_mutex); - ret = extent_same_check_offsets(src, loff, &len, olen); - if (ret) - goto out_unlock; + ret = extent_same_check_offsets(src, loff, &len, olen); + if (ret) + goto out_unlock; - ret = extent_same_check_offsets(dst, dst_loff, &len, olen); - if (ret) - goto out_unlock; + /* + * Single inode case wants the same checks, except we + * don't want our length pushed out past i_size as + * comparing that data range makes no sense. + * + * extent_same_check_offsets() will do this for an + * unaligned length at i_size, so catch it here and + * reject the request. + * + * This effectively means we require aligned extents + * for the single-inode case, whereas the other cases + * allow an unaligned length so long as it ends at + * i_size. + */ + if (len != olen) { + ret = -EINVAL; + goto out_unlock; + } + + /* Check for overlapping ranges */ + if (dst_loff + len > loff && dst_loff < loff + len) { + ret = -EINVAL; + goto out_unlock; + } + + same_lock_start = min_t(u64, loff, dst_loff); + same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start; + } else { + btrfs_double_inode_lock(src, dst); + + ret = extent_same_check_offsets(src, loff, &len, olen); + if (ret) + goto out_unlock; + + ret = extent_same_check_offsets(dst, dst_loff, &len, olen); + if (ret) + goto out_unlock; + } /* don't make the dst file partly checksummed */ if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != @@ -3024,18 +3058,28 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, if (ret) goto out_unlock; - btrfs_double_extent_lock(src, loff, dst, dst_loff, len); + if (same_inode) + lock_extent_range(src, same_lock_start, same_lock_len); + else + btrfs_double_extent_lock(src, loff, dst, dst_loff, len); /* pass original length for comparison so we stay within i_size */ ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); if (ret == 0) ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); - btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); + if (same_inode) + unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start, + same_lock_start + same_lock_len - 1); + else + btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); btrfs_cmp_data_free(&cmp); out_unlock: - btrfs_double_inode_unlock(src, dst); + if (same_inode) + mutex_unlock(&src->i_mutex); + else + btrfs_double_inode_unlock(src, dst); return ret; } From 1c919a5e13702caffbe2d2c7c305f9d0d2925160 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 30 Jun 2015 14:42:08 -0700 Subject: [PATCH 525/839] btrfs: don't update mtime/ctime on deduped inodes One issue users have reported is that dedupe changes mtime on files, resulting in tools like rsync thinking that their contents have changed when in fact the data is exactly the same. We also skip the ctime update as no user-visible metadata changes here and we want dedupe to be transparent to the user. Clone still wants time changes, so we special case this in the code. This was tested with the btrfs-extent-same tool. Signed-off-by: Mark Fasheh Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index af064946c9b2..5d91776e12a2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -87,7 +87,8 @@ struct btrfs_ioctl_received_subvol_args_32 { static int btrfs_clone(struct inode *src, struct inode *inode, - u64 off, u64 olen, u64 olen_aligned, u64 destoff); + u64 off, u64 olen, u64 olen_aligned, u64 destoff, + int no_time_update); /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -3066,7 +3067,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, /* pass original length for comparison so we stay within i_size */ ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); if (ret == 0) - ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); + ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); if (same_inode) unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start, @@ -3231,13 +3232,15 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans, struct inode *inode, u64 endoff, const u64 destoff, - const u64 olen) + const u64 olen, + int no_time_update) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; inode_inc_iversion(inode); - inode->i_mtime = inode->i_ctime = CURRENT_TIME; + if (!no_time_update) + inode->i_mtime = inode->i_ctime = CURRENT_TIME; /* * We round up to the block size at eof when determining which * extents to clone above, but shouldn't round up the file size. @@ -3322,13 +3325,13 @@ static void clone_update_extent_map(struct inode *inode, * @inode: Inode to clone to * @off: Offset within source to start clone from * @olen: Original length, passed by user, of range to clone - * @olen_aligned: Block-aligned value of olen, extent_same uses - * identical values here + * @olen_aligned: Block-aligned value of olen * @destoff: Offset within @inode to start clone + * @no_time_update: Whether to update mtime/ctime on the target inode */ static int btrfs_clone(struct inode *src, struct inode *inode, const u64 off, const u64 olen, const u64 olen_aligned, - const u64 destoff) + const u64 destoff, int no_time_update) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_path *path = NULL; @@ -3652,7 +3655,8 @@ process_slot: root->sectorsize); ret = clone_finish_inode_update(trans, inode, last_dest_end, - destoff, olen); + destoff, olen, + no_time_update); if (ret) goto out; if (new_key.offset + datal >= destoff + len) @@ -3690,7 +3694,7 @@ process_slot: clone_update_extent_map(inode, trans, NULL, last_dest_end, destoff + len - last_dest_end); ret = clone_finish_inode_update(trans, inode, destoff + len, - destoff, olen); + destoff, olen, no_time_update); } out: @@ -3827,7 +3831,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, lock_extent_range(inode, destoff, len); } - ret = btrfs_clone(src, inode, off, olen, len, destoff); + ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); if (same_inode) { u64 lock_start = min_t(u64, off, destoff); From 61de718fceb6bc028dafe4d06a1f87a9e0998303 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 1 Jul 2015 12:13:10 +0100 Subject: [PATCH 526/839] Btrfs: fix memory corruption on failure to submit bio for direct IO If we fail to submit a bio for a direct IO request, we were grabbing the corresponding ordered extent and decrementing its reference count twice, once for our lookup reference and once for the ordered tree reference. This was a problem because it caused the ordered extent to be freed without removing it from the ordered tree and any lists it might be attached to, leaving dangling pointers to the ordered extent around. Example trace with CONFIG_DEBUG_PAGEALLOC=y: [161779.858707] BUG: unable to handle kernel paging request at 0000000087654330 [161779.859983] IP: [] rb_prev+0x22/0x3b [161779.860636] PGD 34d818067 PUD 0 [161779.860636] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC (...) [161779.860636] Call Trace: [161779.860636] [] __tree_search+0xd9/0xf9 [btrfs] [161779.860636] [] tree_search+0x42/0x63 [btrfs] [161779.860636] [] ? btrfs_lookup_ordered_range+0x2d/0xa5 [btrfs] [161779.860636] [] btrfs_lookup_ordered_range+0x38/0xa5 [btrfs] [161779.860636] [] btrfs_get_blocks_direct+0x11b/0x615 [btrfs] [161779.860636] [] do_blockdev_direct_IO+0x5ff/0xb43 [161779.860636] [] ? btrfs_page_exists_in_range+0x1ad/0x1ad [btrfs] [161779.860636] [] ? btrfs_get_extent_fiemap+0x1bc/0x1bc [btrfs] [161779.860636] [] __blockdev_direct_IO+0x32/0x34 [161779.860636] [] ? btrfs_get_extent_fiemap+0x1bc/0x1bc [btrfs] [161779.860636] [] btrfs_direct_IO+0x198/0x21f [btrfs] [161779.860636] [] ? btrfs_get_extent_fiemap+0x1bc/0x1bc [btrfs] [161779.860636] [] generic_file_direct_write+0xb3/0x128 [161779.860636] [] ? btrfs_file_write_iter+0x15f/0x3e0 [btrfs] [161779.860636] [] btrfs_file_write_iter+0x201/0x3e0 [btrfs] (...) We were also not freeing the btrfs_dio_private we allocated previously, which kmemleak reported with the following trace in its sysfs file: unreferenced object 0xffff8803f553bf80 (size 96): comm "xfs_io", pid 4501, jiffies 4295039588 (age 173.936s) hex dump (first 32 bytes): 88 6c 9b f5 02 88 ff ff 00 00 00 00 00 00 00 00 .l.............. 00 00 00 00 00 00 00 00 00 00 c4 00 00 00 00 00 ................ backtrace: [] create_object+0x172/0x29a [] kmemleak_alloc+0x25/0x41 [] kmemleak_alloc_recursive.constprop.40+0x16/0x18 [] kmem_cache_alloc_trace+0xfb/0x148 [] btrfs_submit_direct+0x65/0x16a [btrfs] [] dio_bio_submit+0x62/0x8f [] do_blockdev_direct_IO+0x97e/0xb43 [] __blockdev_direct_IO+0x32/0x34 [] btrfs_direct_IO+0x198/0x21f [btrfs] [] generic_file_direct_write+0xb3/0x128 [] btrfs_file_write_iter+0x201/0x3e0 [btrfs] [] __vfs_write+0x7c/0xa5 [] vfs_write+0xa0/0xe4 [] SyS_pwrite64+0x64/0x82 [] system_call_fastpath+0x12/0x6f [] 0xffffffffffffffff For read requests we weren't doing any cleanup either (none of the work done by btrfs_endio_direct_read()), so a failure submitting a bio for a read request would leave a range in the inode's io_tree locked forever, blocking any future operations (both reads and writes) against that range. So fix this by making sure we do the same cleanup that we do for the case where the bio submission succeeds. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 65 +++++++++++++++++++++++++++++------------ fs/btrfs/ordered-data.c | 5 ++++ 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 855935f6671a..c0b2b6b51b2a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8163,9 +8163,8 @@ out_err: static void btrfs_submit_direct(int rw, struct bio *dio_bio, struct inode *inode, loff_t file_offset) { - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_dio_private *dip; - struct bio *io_bio; + struct btrfs_dio_private *dip = NULL; + struct bio *io_bio = NULL; struct btrfs_io_bio *btrfs_bio; int skip_sum; int write = rw & REQ_WRITE; @@ -8182,7 +8181,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, dip = kzalloc(sizeof(*dip), GFP_NOFS); if (!dip) { ret = -ENOMEM; - goto free_io_bio; + goto free_ordered; } dip->private = dio_bio->bi_private; @@ -8210,25 +8209,55 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, if (btrfs_bio->end_io) btrfs_bio->end_io(btrfs_bio, ret); -free_io_bio: - bio_put(io_bio); free_ordered: /* - * If this is a write, we need to clean up the reserved space and kill - * the ordered extent. + * If we arrived here it means either we failed to submit the dip + * or we either failed to clone the dio_bio or failed to allocate the + * dip. If we cloned the dio_bio and allocated the dip, we can just + * call bio_endio against our io_bio so that we get proper resource + * cleanup if we fail to submit the dip, otherwise, we must do the + * same as btrfs_endio_direct_[write|read] because we can't call these + * callbacks - they require an allocated dip and a clone of dio_bio. */ - if (write) { - struct btrfs_ordered_extent *ordered; - ordered = btrfs_lookup_ordered_extent(inode, file_offset); - if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && - !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) - btrfs_free_reserved_extent(root, ordered->start, - ordered->disk_len, 1); - btrfs_put_ordered_extent(ordered); - btrfs_put_ordered_extent(ordered); + if (io_bio && dip) { + bio_endio(io_bio, ret); + /* + * The end io callbacks free our dip, do the final put on io_bio + * and all the cleanup and final put for dio_bio (through + * dio_end_io()). + */ + dip = NULL; + io_bio = NULL; + } else { + if (write) { + struct btrfs_ordered_extent *ordered; + + ordered = btrfs_lookup_ordered_extent(inode, + file_offset); + set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); + /* + * Decrements our ref on the ordered extent and removes + * the ordered extent from the inode's ordered tree, + * doing all the proper resource cleanup such as for the + * reserved space and waking up any waiters for this + * ordered extent (through btrfs_remove_ordered_extent). + */ + btrfs_finish_ordered_io(ordered); + } else { + unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, + file_offset + dio_bio->bi_iter.bi_size - 1); + } + clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); + /* + * Releases and cleans up our dio_bio, no need to bio_put() + * nor bio_endio()/bio_io_error() against dio_bio. + */ + dio_end_io(dio_bio, ret); } - bio_endio(dio_bio, ret); + if (io_bio) + bio_put(io_bio); + kfree(dip); } static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb, diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 89656d799ff6..52170cf1757e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -552,6 +552,10 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) trace_btrfs_ordered_extent_put(entry->inode, entry); if (atomic_dec_and_test(&entry->refs)) { + ASSERT(list_empty(&entry->log_list)); + ASSERT(list_empty(&entry->trans_list)); + ASSERT(list_empty(&entry->root_extent_list)); + ASSERT(RB_EMPTY_NODE(&entry->rb_node)); if (entry->inode) btrfs_add_delayed_iput(entry->inode); while (!list_empty(&entry->list)) { @@ -579,6 +583,7 @@ void btrfs_remove_ordered_extent(struct inode *inode, spin_lock_irq(&tree->lock); node = &entry->rb_node; rb_erase(node, &tree->tree); + RB_CLEAR_NODE(node); if (tree->last == node) tree->last = NULL; set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); From 9c6429d96daec64f6b5b10a1c6b02c7264541ea1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 10 Jun 2015 12:55:41 +0100 Subject: [PATCH 527/839] Btrfs: fix a comment in inode.c:evict_inode_truncate_pages() The comment was not correct about the part where it says the endio callback of the bio might have not yet been called - update it to mention that by that time the endio callback execution might still be in progress only. Signed-off-by: Filipe Manana Reviewed-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c0b2b6b51b2a..53afda0ef4e3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4989,8 +4989,9 @@ static void evict_inode_truncate_pages(struct inode *inode) /* * Keep looping until we have no more ranges in the io tree. * We can have ongoing bios started by readpages (called from readahead) - * that didn't get their end io callbacks called yet or they are still - * in progress ((extent_io.c:end_bio_extent_readpage()). This means some + * that have their endio callback (extent_io.c:end_bio_extent_readpage) + * still in progress (unlocked the pages in the bio but did not yet + * unlocked the ranges in the io tree). Therefore this means some * ranges can still be locked and eviction started because before * submitting those bios, which are executed by a separate task (work * queue kthread), inode references (inode->i_count) were not taken From ad9ee2053f3f2babebc09ebc4970daa66c56c7ee Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 17 Jun 2015 16:59:57 +0800 Subject: [PATCH 528/839] Btrfs: fix hang when failing to submit bio of directIO The hang is uncoverd by generic/019. btrfs_endio_direct_write() skips the "finish_ordered_fn" part when it hits an error, thus those added ordered extents will never get processed, which block processes that waiting for them via btrfs_start_ordered_extent(). This fixes the above, and meanwhile finish_ordered_fn will do the space accounting work. Signed-off-by: Liu Bo Reviewed-by: Filipe Manana Tested-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 53afda0ef4e3..0b9fb81ccf8a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7872,8 +7872,6 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) struct bio *dio_bio; int ret; - if (err) - goto out_done; again: ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, &ordered_offset, @@ -7896,7 +7894,6 @@ out_test: ordered = NULL; goto again; } -out_done: dio_bio = dip->dio_bio; kfree(dip); From ddba1bfc2369cd0566bcfdab47599834a32d1c19 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 17 Jun 2015 16:59:58 +0800 Subject: [PATCH 529/839] Btrfs: fix warning of bytes_may_use While running generic/019, dmesg got several warnings from btrfs_free_reserved_data_space(). Test generic/019 produces some disk failures so sumbit dio will get errors, in which case, btrfs_direct_IO() goes to the error handling and free bytes_may_use, but the problem is that bytes_may_use has been free'd during get_block(). This adds a runtime flag to show if we've gone through get_block(), if so, don't do the cleanup work. Signed-off-by: Liu Bo Reviewed-by: Filipe Manana Tested-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 2 ++ fs/btrfs/inode.c | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 0ef5cc13fae2..81220b2203c6 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -44,6 +44,8 @@ #define BTRFS_INODE_IN_DELALLOC_LIST 9 #define BTRFS_INODE_READDIO_NEED_LOCK 10 #define BTRFS_INODE_HAS_PROPS 11 +/* DIO is ready to submit */ +#define BTRFS_INODE_DIO_READY 12 /* * The following 3 bits are meant only for the btree inode. * When any of them is set, it means an error happened while writing an diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0b9fb81ccf8a..b33c0cf02668 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7547,6 +7547,7 @@ unlock: current->journal_info = outstanding_extents; btrfs_free_reserved_data_space(inode, len); + set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags); } /* @@ -8357,9 +8358,18 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, btrfs_submit_direct, flags); if (iov_iter_rw(iter) == WRITE) { current->journal_info = NULL; - if (ret < 0 && ret != -EIOCBQUEUED) - btrfs_delalloc_release_space(inode, count); - else if (ret >= 0 && (size_t)ret < count) + if (ret < 0 && ret != -EIOCBQUEUED) { + /* + * If the error comes from submitting stage, + * btrfs_get_blocsk_direct() has free'd data space, + * and metadata space will be handled by + * finish_ordered_fn, don't do that again to make + * sure bytes_may_use is correct. + */ + if (!test_and_clear_bit(BTRFS_INODE_DIO_READY, + &BTRFS_I(inode)->runtime_flags)) + btrfs_delalloc_release_space(inode, count); + } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, count - (size_t)ret); } From 9689457b5b0a2b69874c421a489d3fb50ca76b7b Mon Sep 17 00:00:00 2001 From: Shilong Wang Date: Sun, 12 Apr 2015 14:35:20 +0800 Subject: [PATCH 530/839] Btrfs: fix wrong check for btrfs_force_chunk_alloc() btrfs_force_chunk_alloc() return 1 for allocation chunk successfully. This problem exists since commit c87f08ca4. With this patch, we might fix some enospc problems for balances. Signed-off-by: Wang Shilong Reviewed-by: Filipe Manana Tested-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 827951fbf7fc..88cbb5995667 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4049,7 +4049,7 @@ restart: if (trans && progress && err == -ENOSPC) { ret = btrfs_force_chunk_alloc(trans, rc->extent_root, rc->block_group->flags); - if (ret == 0) { + if (ret == 1) { err = 0; progress = 0; goto restart; From 0f0ff9a9f3fa2ec6f427603fd521d5f3a0b076d1 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 1 Jul 2015 23:37:46 -0400 Subject: [PATCH 531/839] ext4: fix fencepost error in lazytime optimization Commit 8f4d8558391: "ext4: fix lazytime optimization" was not a complete fix. In the case where the inode number is a multiple of 16, and we could still end up updating an inode with dirty timestamps written to the wrong inode on disk. Oops. This can be easily reproduced by using generic/005 with a file system with metadata_csum and lazytime enabled. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e057c6fcc227..4ad73d3c1003 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4348,7 +4348,12 @@ static void ext4_update_other_inodes_time(struct super_block *sb, int inode_size = EXT4_INODE_SIZE(sb); oi.orig_ino = orig_ino; - ino = (orig_ino & ~(inodes_per_block - 1)) + 1; + /* + * Calculate the first inode in the inode table block. Inode + * numbers are one-based. That is, the first inode in a block + * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1). + */ + ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1; for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { if (ino == orig_ino) continue; From bd7ade3cd9b0850264306f5c2b79024a417b6396 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 2 Jul 2015 01:32:44 -0400 Subject: [PATCH 532/839] bufferhead: Add _gfp version for sb_getblk() sb_getblk() is used during ext4 (and possibly other FSes) writeback paths. Sometimes such path require allocating memory and guaranteeing that such allocation won't block. Currently, however, there is no way to provide user flags for sb_getblk which could lead to deadlocks. This patch implements a sb_getblk_gfp with the only difference it can accept user-provided GFP flags. Signed-off-by: Nikolay Borisov Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- include/linux/buffer_head.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 73b45225a7ca..e6797ded700e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -317,6 +317,13 @@ sb_getblk(struct super_block *sb, sector_t block) return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } + +static inline struct buffer_head * +sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) +{ + return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp); +} + static inline struct buffer_head * sb_find_get_block(struct super_block *sb, sector_t block) { From c45653c341f5c8a0ce19c8f0ad4678640849cb86 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 2 Jul 2015 01:34:07 -0400 Subject: [PATCH 533/839] ext4: avoid deadlocks in the writeback path by using sb_getblk_gfp Switch ext4 to using sb_getblk_gfp with GFP_NOFS added to fix possible deadlocks in the page writeback path. Signed-off-by: Nikolay Borisov Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d86d2622f826..69af30e0d75d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -503,7 +503,7 @@ __read_extent_tree_block(const char *function, unsigned int line, struct buffer_head *bh; int err; - bh = sb_getblk(inode->i_sb, pblk); + bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -1088,7 +1088,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, err = -EIO; goto cleanup; } - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) { err = -ENOMEM; goto cleanup; @@ -1282,7 +1282,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, if (newblock == 0) return err; - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return -ENOMEM; lock_buffer(bh); From 75a15a7864c9e281c74a1670b10b69d1d7ff1c82 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 1 Jul 2015 13:38:52 +0300 Subject: [PATCH 534/839] EDAC, octeon: Fix broken build due to model helper renames Commit debe6a623d3c ("MIPS: OCTEON: Update octeon-model.h code for new SoCs.") renamed some SoC model helper functions, but forgot to update the EDAC drivers resulting in build failures. Fix that. Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Aaro Koskinen Acked-by: David Daney Cc: Mauro Carvalho Chehab Cc: Ralf Baechle Cc: linux-edac Cc: linux-mips@linux-mips.org Link: http://lkml.kernel.org/r/1435747132-10954-1-git-send-email-aaro.koskinen@nokia.com Signed-off-by: Borislav Petkov --- drivers/edac/octeon_edac-l2c.c | 2 +- drivers/edac/octeon_edac-lmc.c | 2 +- drivers/edac/octeon_edac-pc.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/edac/octeon_edac-l2c.c b/drivers/edac/octeon_edac-l2c.c index 7e98084d3645..afea7fc625cc 100644 --- a/drivers/edac/octeon_edac-l2c.c +++ b/drivers/edac/octeon_edac-l2c.c @@ -151,7 +151,7 @@ static int octeon_l2c_probe(struct platform_device *pdev) l2c->ctl_name = "octeon_l2c_err"; - if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + if (OCTEON_IS_OCTEON1PLUS()) { union cvmx_l2t_err l2t_err; union cvmx_l2d_err l2d_err; diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c index bb19e0732681..cda6dab5067a 100644 --- a/drivers/edac/octeon_edac-lmc.c +++ b/drivers/edac/octeon_edac-lmc.c @@ -234,7 +234,7 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) layers[0].size = 1; layers[0].is_virt_csrow = false; - if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + if (OCTEON_IS_OCTEON1PLUS()) { union cvmx_lmcx_mem_cfg0 cfg0; cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(0)); diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c index 0f83c33a7d1f..2ab6cf24c959 100644 --- a/drivers/edac/octeon_edac-pc.c +++ b/drivers/edac/octeon_edac-pc.c @@ -73,7 +73,7 @@ static int co_cache_error_event(struct notifier_block *this, edac_device_handle_ce(p->ed, cpu, 0, "dcache"); /* Clear the error indication */ - if (OCTEON_IS_MODEL(OCTEON_FAM_2)) + if (OCTEON_IS_OCTEON2()) write_octeon_c0_dcacheerr(1); else write_octeon_c0_dcacheerr(0); From 864d5bb957138d1ed9641a5eff7849a56ddadc16 Mon Sep 17 00:00:00 2001 From: James C Boyd Date: Wed, 1 Jul 2015 15:13:26 -0500 Subject: [PATCH 535/839] x86/asm/entry/64: Update path names The paths mentioned in this file weren't updated through some file rename commits. Fix them to refer to the correct path. Signed-off-by: James C Boyd Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: corbet@lwn.net Cc: trivial@kernel.org Link: http://lkml.kernel.org/r/1435781606-3037-1-git-send-email-jcboyd.dev@gmail.com Signed-off-by: Ingo Molnar --- Documentation/x86/entry_64.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/x86/entry_64.txt b/Documentation/x86/entry_64.txt index 33884d156125..c1df8eba9dfd 100644 --- a/Documentation/x86/entry_64.txt +++ b/Documentation/x86/entry_64.txt @@ -1,14 +1,14 @@ This file documents some of the kernel entries in -arch/x86/kernel/entry_64.S. A lot of this explanation is adapted from +arch/x86/entry/entry_64.S. A lot of this explanation is adapted from an email from Ingo Molnar: http://lkml.kernel.org/r/<20110529191055.GC9835%40elte.hu> The x86 architecture has quite a few different ways to jump into kernel code. Most of these entry points are registered in -arch/x86/kernel/traps.c and implemented in arch/x86/kernel/entry_64.S -for 64-bit, arch/x86/kernel/entry_32.S for 32-bit and finally -arch/x86/ia32/ia32entry.S which implements the 32-bit compatibility +arch/x86/kernel/traps.c and implemented in arch/x86/entry/entry_64.S +for 64-bit, arch/x86/entry/entry_32.S for 32-bit and finally +arch/x86/entry/entry_64_compat.S which implements the 32-bit compatibility syscall entry points and thus provides for 32-bit processes the ability to execute syscalls when running on 64-bit kernels. From 0438ec90b17963662ef619b71c77c5c135b07014 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 30 Jun 2015 12:23:45 +0300 Subject: [PATCH 536/839] OMAPDSS: fix probing if rfbi device is enabled After the commit 736e60ddc215b85e73bbf7da26e1cde84cc9500f ("OMAPDSS: componentize omapdss") the dss core device will wait until all the subdevices have been successfully probed. However, we don't have a working driver for RFBI, so if RFBI device exists, omapdss will never get probed. All the .dtsi files set RFBI as disabled, except am4372.dtsi. This causes omapdss probe to not finish on AM4 devices. This patch makes omapdss driver skip adding rfbi device as a subcomponent, solving the issue. This should be reverted when we have a working RFBI driver. Signed-off-by: Tomi Valkeinen Reported-by: Felipe Balbi --- drivers/video/fbdev/omap2/dss/dss.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/video/fbdev/omap2/dss/dss.c b/drivers/video/fbdev/omap2/dss/dss.c index 612b093831d5..9200a8668b49 100644 --- a/drivers/video/fbdev/omap2/dss/dss.c +++ b/drivers/video/fbdev/omap2/dss/dss.c @@ -1225,6 +1225,15 @@ static int dss_add_child_component(struct device *dev, void *data) { struct component_match **match = data; + /* + * HACK + * We don't have a working driver for rfbi, so skip it here always. + * Otherwise dss will never get probed successfully, as it will wait + * for rfbi to get probed. + */ + if (strstr(dev_name(dev), "rfbi")) + return 0; + component_match_add(dev->parent, match, dss_component_compare, dev); return 0; From c423bc85097327fba71f9a831280d09b64bb62b6 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 16 Jun 2015 14:54:51 +0300 Subject: [PATCH 537/839] drm/omap: check that plane is inside crtc DRM allows planes to be partially off-screen, but DSS hardware does not. This patch adds the necessary check to reject plane configs if the plane is not fully inside the crtc. Signed-off-by: Tomi Valkeinen Acked-by: Laurent Pinchart --- drivers/gpu/drm/omapdrm/omap_plane.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c index cfa8276c4deb..098904696a5c 100644 --- a/drivers/gpu/drm/omapdrm/omap_plane.c +++ b/drivers/gpu/drm/omapdrm/omap_plane.c @@ -17,6 +17,7 @@ * this program. If not, see . */ +#include #include #include @@ -153,9 +154,34 @@ static void omap_plane_atomic_disable(struct drm_plane *plane, dispc_ovl_enable(omap_plane->id, false); } +static int omap_plane_atomic_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct drm_crtc_state *crtc_state; + + if (!state->crtc) + return 0; + + crtc_state = drm_atomic_get_crtc_state(state->state, state->crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + + if (state->crtc_x < 0 || state->crtc_y < 0) + return -EINVAL; + + if (state->crtc_x + state->crtc_w > crtc_state->adjusted_mode.hdisplay) + return -EINVAL; + + if (state->crtc_y + state->crtc_h > crtc_state->adjusted_mode.vdisplay) + return -EINVAL; + + return 0; +} + static const struct drm_plane_helper_funcs omap_plane_helper_funcs = { .prepare_fb = omap_plane_prepare_fb, .cleanup_fb = omap_plane_cleanup_fb, + .atomic_check = omap_plane_atomic_check, .atomic_update = omap_plane_atomic_update, .atomic_disable = omap_plane_atomic_disable, }; From 96cbd142310cc7281f94739b27d8e4c308d588c5 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 28 Apr 2015 14:01:32 +0300 Subject: [PATCH 538/839] drm/omap: increase DMM transaction timeout The DMM driver uses a timeout of 1 ms to wait for DMM transaction to finish. While DMM should always finish the operation within that time, the timeout is rather strict. Small misbehavior of the system (e.g. an irq taking too long) could trigger the timeout. As the DMM is a critical piece of code for display memory management, let's increase the timeout to 100 ms so that we are less likely to fail a memory allocation in case of system misbehaviors. 100 ms is just a guess of a reasonably large timeout. The HW should accomplish the task in less than 1 ms. Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/omap_dmm_tiler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index f2daad8c3d96..7841970de48d 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -285,7 +285,7 @@ static int dmm_txn_commit(struct dmm_txn *txn, bool wait) if (wait) { if (!wait_for_completion_timeout(&engine->compl, - msecs_to_jiffies(1))) { + msecs_to_jiffies(100))) { dev_err(dmm->dev, "timed out waiting for done\n"); ret = -ETIMEDOUT; } From 9c368506c96793f17934a61a0f06412afb7d2f8d Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 28 Apr 2015 14:01:35 +0300 Subject: [PATCH 539/839] drm/omap: fix omap_framebuffer_unpin() error handling omap_framebuffer_unpin() check the return value of omap_gem_put_paddr() and return immediately if omap_gem_put_paddr() fails. This patch removes the check for the return value, and also removes the return value of omap_framebuffer_unpin(), because: * Nothing checks the return value of omap_framebuffer_unpin(), and even something did check it, there's nothing the caller can do to handle the error. * If a omap_gem_put_paddr() fails, the framebuffer's other planes will be left unreleased. So it's better to call omap_gem_put_paddr() for all the planes, even if one would fail. Signed-off-by: Tomi Valkeinen Acked-by: Laurent Pinchart --- drivers/gpu/drm/omapdrm/omap_drv.h | 2 +- drivers/gpu/drm/omapdrm/omap_fb.c | 16 ++++------------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h index ae2df41f216f..2ef89c0c3006 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.h +++ b/drivers/gpu/drm/omapdrm/omap_drv.h @@ -177,7 +177,7 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos); struct drm_gem_object *omap_framebuffer_bo(struct drm_framebuffer *fb, int p); int omap_framebuffer_pin(struct drm_framebuffer *fb); -int omap_framebuffer_unpin(struct drm_framebuffer *fb); +void omap_framebuffer_unpin(struct drm_framebuffer *fb); void omap_framebuffer_update_scanout(struct drm_framebuffer *fb, struct omap_drm_window *win, struct omap_overlay_info *info); struct drm_connector *omap_framebuffer_get_next_connector( diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 0b967e76df1a..51b1219af87f 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -287,10 +287,10 @@ fail: } /* unpin, no longer being scanned out: */ -int omap_framebuffer_unpin(struct drm_framebuffer *fb) +void omap_framebuffer_unpin(struct drm_framebuffer *fb) { struct omap_framebuffer *omap_fb = to_omap_framebuffer(fb); - int ret, i, n = drm_format_num_planes(fb->pixel_format); + int i, n = drm_format_num_planes(fb->pixel_format); mutex_lock(&omap_fb->lock); @@ -298,24 +298,16 @@ int omap_framebuffer_unpin(struct drm_framebuffer *fb) if (omap_fb->pin_count > 0) { mutex_unlock(&omap_fb->lock); - return 0; + return; } for (i = 0; i < n; i++) { struct plane *plane = &omap_fb->planes[i]; - ret = omap_gem_put_paddr(plane->bo); - if (ret) - goto fail; + omap_gem_put_paddr(plane->bo); plane->paddr = 0; } mutex_unlock(&omap_fb->lock); - - return 0; - -fail: - mutex_unlock(&omap_fb->lock); - return ret; } struct drm_gem_object *omap_framebuffer_bo(struct drm_framebuffer *fb, int p) From 393a949f51994a67e8e33b2f79c6f2020959a7e2 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 28 Apr 2015 14:01:36 +0300 Subject: [PATCH 540/839] drm/omap: fix omap_gem_put_paddr() error handling If tiler_unpin() call in omap_gem_put_paddr() fails, omap_gem_put_paddr() will immediately stop processing and return an error. This patch remoes that error checking, and also removes omap_gem_put_paddr()'s return value, because: * The caller of omap_gem_put_paddr() can do nothing if an error happens, so it's pointless to return an error value * If tiler_unpin() fails, the GEM object will possibly be left in an undefined state, where the DMM mapping may have been removed, but the GEM object still thinks everything is as it should be, leading to crashes later. * There's no point in returning an error from a "free" call, as the caller can do nothing about it. So it's better to clean up as much as possible. Signed-off-by: Tomi Valkeinen Acked-by: Laurent Pinchart --- drivers/gpu/drm/omapdrm/omap_drv.h | 2 +- drivers/gpu/drm/omapdrm/omap_gem.c | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h index 2ef89c0c3006..1ae8477e4289 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.h +++ b/drivers/gpu/drm/omapdrm/omap_drv.h @@ -211,7 +211,7 @@ void omap_gem_dma_sync(struct drm_gem_object *obj, enum dma_data_direction dir); int omap_gem_get_paddr(struct drm_gem_object *obj, dma_addr_t *paddr, bool remap); -int omap_gem_put_paddr(struct drm_gem_object *obj); +void omap_gem_put_paddr(struct drm_gem_object *obj); int omap_gem_get_pages(struct drm_gem_object *obj, struct page ***pages, bool remap); int omap_gem_put_pages(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 874eb6dcf94b..7ed08fdc4c42 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -808,10 +808,10 @@ fail: /* Release physical address, when DMA is no longer being performed.. this * could potentially unpin and unmap buffers from TILER */ -int omap_gem_put_paddr(struct drm_gem_object *obj) +void omap_gem_put_paddr(struct drm_gem_object *obj) { struct omap_gem_object *omap_obj = to_omap_bo(obj); - int ret = 0; + int ret; mutex_lock(&obj->dev->struct_mutex); if (omap_obj->paddr_cnt > 0) { @@ -821,7 +821,6 @@ int omap_gem_put_paddr(struct drm_gem_object *obj) if (ret) { dev_err(obj->dev->dev, "could not unpin pages: %d\n", ret); - goto fail; } ret = tiler_release(omap_obj->block); if (ret) { @@ -832,9 +831,8 @@ int omap_gem_put_paddr(struct drm_gem_object *obj) omap_obj->block = NULL; } } -fail: + mutex_unlock(&obj->dev->struct_mutex); - return ret; } /* Get rotated scanout address (only valid if already pinned), at the From d642d3acd89454006fd6ca9cc4dd57f26959f9d1 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 8 May 2015 13:32:31 +0300 Subject: [PATCH 541/839] drm/omap: fix align_pitch() for 24 bits per pixel align_pitch() uses ALIGN() to ensure the pitch is aligned to SGX's requirement of 8 pixels. However, ALIGN() expects the alignment value to be a power of two, which is not the case for 24 bits per pixels. Use roundup() instead, which works for all alignments. This fixes the error seen with 24 bits per pixel modes: "buffer pitch (2176 bytes) is not a multiple of pixel size (3 bytes)" Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/omap_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h index 1ae8477e4289..12081e61d45a 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.h +++ b/drivers/gpu/drm/omapdrm/omap_drv.h @@ -236,7 +236,7 @@ static inline int align_pitch(int pitch, int width, int bpp) /* PVR needs alignment to 8 pixels.. right now that is the most * restrictive stride requirement.. */ - return ALIGN(pitch, 8 * bytespp); + return roundup(pitch, 8 * bytespp); } /* map crtc to vblank mask */ From 743c16719f671c206923d23dae4ac57edfd9483c Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 1 Jul 2014 20:52:50 +0200 Subject: [PATCH 542/839] drm/omap: replace ALIGN(PAGE_SIZE) by PAGE_ALIGN use mm.h definition Cc: David Airlie Cc: Tomi Valkeinen Cc: dri-devel@lists.freedesktop.org Signed-off-by: Fabian Frederick Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/omap_fbdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c index 23b5a84389e3..720d16bce7e8 100644 --- a/drivers/gpu/drm/omapdrm/omap_fbdev.c +++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c @@ -135,7 +135,7 @@ static int omap_fbdev_create(struct drm_fb_helper *helper, fbdev->ywrap_enabled = priv->has_dmm && ywrap_enabled; if (fbdev->ywrap_enabled) { /* need to align pitch to page size if using DMM scrolling */ - mode_cmd.pitches[0] = ALIGN(mode_cmd.pitches[0], PAGE_SIZE); + mode_cmd.pitches[0] = PAGE_ALIGN(mode_cmd.pitches[0]); } /* allocate backing bo */ From f307170d6e591a48529425b1ed6ca835790995a9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 19 Jun 2015 17:23:37 -0500 Subject: [PATCH 543/839] netfilter: nf_queue: Don't recompute the hook_list head If someone sends packets from one of the netdevice ingress hooks to the a userspace queue, and then userspace later accepts the packet, the netfilter code can enter an infinite loop as the list head will never be found. Pass in the saved list_head to avoid this. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index cd60d397fe05..8a8b2abc35ff 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -213,7 +213,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) if (verdict == NF_ACCEPT) { next_hook: - verdict = nf_iterate(&nf_hooks[entry->state.pf][entry->state.hook], + verdict = nf_iterate(entry->state.hook_list, skb, &entry->state, &elem); } From a1bc1b356a9d21bf29bc7c873718b5cacdf119b4 Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 20 Jun 2015 00:17:50 +0200 Subject: [PATCH 544/839] netfilter: bridge: fix CONFIG_NF_DEFRAG_IPV4/6 related warnings/errors br_nf_ip_fragment() is not needed when neither CONFIG_NF_DEFRAG_IPV4 nor CONFIG_NF_DEFRAG_IPV6 is set. struct brnf_frag_data must be available if either CONFIG_NF_DEFRAG_IPV4 or CONFIG_NF_DEFRAG_IPV6 is set. Fixes: efb6de9b4ba0 ("netfilter: bridge: forward IPv6 fragmented packets") Reported-by: kbuild test robot Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index d89f4fac0bc5..8a394bd3af83 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -111,7 +111,7 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) /* largest possible L2 header, see br_nf_dev_queue_xmit() */ #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct brnf_frag_data { char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; u8 encap_size; @@ -694,6 +694,7 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) } #endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, int (*output)(struct sock *, struct sk_buff *)) { @@ -712,6 +713,7 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, return ip_do_fragment(sk, skb, output); } +#endif static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { From ec110bc7cc48d7806c9b65094e6afb19452d458f Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Thu, 7 May 2015 06:45:21 -0400 Subject: [PATCH 545/839] NTB: Move files in preparation for NTB abstraction This patch only moves files to their new locations, before applying the next two patches adding the NTB Abstraction layer. Splitting this patch from the next is intended make distinct which code is changed only due to moving the files, versus which are substantial code changes in adding the NTB Abstraction layer. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/net/Kconfig | 4 +- drivers/net/ntb_netdev.c | 6 +- drivers/ntb/Makefile | 4 +- .../ntb/{ntb_hw.c => hw/intel/ntb_hw_intel.c} | 3 +- .../ntb/{ntb_hw.h => hw/intel/ntb_hw_intel.h} | 131 ++++++++++++- drivers/ntb/ntb_regs.h | 177 ------------------ drivers/ntb/ntb_transport.c | 14 +- include/linux/{ntb.h => ntb_transport.h} | 4 +- 8 files changed, 147 insertions(+), 196 deletions(-) rename drivers/ntb/{ntb_hw.c => hw/intel/ntb_hw_intel.c} (99%) rename drivers/ntb/{ntb_hw.h => hw/intel/ntb_hw_intel.h} (65%) delete mode 100644 drivers/ntb/ntb_regs.h rename include/linux/{ntb.h => ntb_transport.h} (96%) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index df51d6025a90..bda3cde62bb3 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -203,8 +203,8 @@ config NET_POLL_CONTROLLER def_bool NETPOLL config NTB_NETDEV - tristate "Virtual Ethernet over NTB" - depends on NTB + tristate "Virtual Ethernet over NTB Transport" + depends on NTB_TRANSPORT config RIONET tristate "RapidIO Ethernet over messaging driver support" diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 5a7e6397440a..6d3bfa62f5ec 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #define NTB_NETDEV_VER "0.7" @@ -410,13 +410,13 @@ static int __init ntb_netdev_init_module(void) rc = ntb_register_client_dev(KBUILD_MODNAME); if (rc) return rc; - return ntb_register_client(&ntb_netdev_client); + return ntb_transport_register_client(&ntb_netdev_client); } module_init(ntb_netdev_init_module); static void __exit ntb_netdev_exit_module(void) { - ntb_unregister_client(&ntb_netdev_client); + ntb_transport_unregister_client(&ntb_netdev_client); ntb_unregister_client_dev(KBUILD_MODNAME); } module_exit(ntb_netdev_exit_module); diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile index 15cb59fd354e..545b10a131af 100644 --- a/drivers/ntb/Makefile +++ b/drivers/ntb/Makefile @@ -1,3 +1,3 @@ -obj-$(CONFIG_NTB) += ntb.o +obj-$(CONFIG_NTB) += ntb_hw_intel.o -ntb-objs := ntb_hw.o ntb_transport.o +ntb_hw_intel-objs := hw/intel/ntb_hw_intel.o ntb_transport.o diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/hw/intel/ntb_hw_intel.c similarity index 99% rename from drivers/ntb/ntb_hw.c rename to drivers/ntb/hw/intel/ntb_hw_intel.c index 3f6738612f45..044534a995f1 100644 --- a/drivers/ntb/ntb_hw.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -53,8 +53,7 @@ #include #include #include -#include "ntb_hw.h" -#include "ntb_regs.h" +#include "ntb_hw_intel.h" #define NTB_NAME "Intel(R) PCI-E Non-Transparent Bridge Driver" #define NTB_VER "1.0" diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/hw/intel/ntb_hw_intel.h similarity index 65% rename from drivers/ntb/ntb_hw.h rename to drivers/ntb/hw/intel/ntb_hw_intel.h index 96de5fc95f90..935610454f70 100644 --- a/drivers/ntb/ntb_hw.h +++ b/drivers/ntb/hw/intel/ntb_hw_intel.h @@ -45,8 +45,137 @@ * Contact Information: * Jon Mason */ -#include +#include +#define NTB_LINK_STATUS_ACTIVE 0x2000 +#define NTB_LINK_SPEED_MASK 0x000f +#define NTB_LINK_WIDTH_MASK 0x03f0 + +#define SNB_MSIX_CNT 4 +#define SNB_MAX_B2B_SPADS 16 +#define SNB_MAX_COMPAT_SPADS 16 +/* Reserve the uppermost bit for link interrupt */ +#define SNB_MAX_DB_BITS 15 +#define SNB_LINK_DB 15 +#define SNB_DB_BITS_PER_VEC 5 +#define HSX_SPLITBAR_MAX_MW 3 +#define SNB_MAX_MW 2 +#define SNB_ERRATA_MAX_MW 1 + +#define SNB_DB_HW_LINK 0x8000 + +#define SNB_UNCERRSTS_OFFSET 0x014C +#define SNB_CORERRSTS_OFFSET 0x0158 +#define SNB_LINK_STATUS_OFFSET 0x01A2 +#define SNB_PCICMD_OFFSET 0x0504 +#define SNB_DEVCTRL_OFFSET 0x0598 +#define SNB_DEVSTS_OFFSET 0x059A +#define SNB_SLINK_STATUS_OFFSET 0x05A2 + +#define SNB_PBAR2LMT_OFFSET 0x0000 +#define SNB_PBAR4LMT_OFFSET 0x0008 +#define SNB_PBAR5LMT_OFFSET 0x000C +#define SNB_PBAR2XLAT_OFFSET 0x0010 +#define SNB_PBAR4XLAT_OFFSET 0x0018 +#define SNB_PBAR5XLAT_OFFSET 0x001C +#define SNB_SBAR2LMT_OFFSET 0x0020 +#define SNB_SBAR4LMT_OFFSET 0x0028 +#define SNB_SBAR5LMT_OFFSET 0x002C +#define SNB_SBAR2XLAT_OFFSET 0x0030 +#define SNB_SBAR4XLAT_OFFSET 0x0038 +#define SNB_SBAR5XLAT_OFFSET 0x003C +#define SNB_SBAR0BASE_OFFSET 0x0040 +#define SNB_SBAR2BASE_OFFSET 0x0048 +#define SNB_SBAR4BASE_OFFSET 0x0050 +#define SNB_SBAR5BASE_OFFSET 0x0054 +#define SNB_NTBCNTL_OFFSET 0x0058 +#define SNB_SBDF_OFFSET 0x005C +#define SNB_PDOORBELL_OFFSET 0x0060 +#define SNB_PDBMSK_OFFSET 0x0062 +#define SNB_SDOORBELL_OFFSET 0x0064 +#define SNB_SDBMSK_OFFSET 0x0066 +#define SNB_USMEMMISS_OFFSET 0x0070 +#define SNB_SPAD_OFFSET 0x0080 +#define SNB_SPADSEMA4_OFFSET 0x00c0 +#define SNB_WCCNTRL_OFFSET 0x00e0 +#define SNB_B2B_SPAD_OFFSET 0x0100 +#define SNB_B2B_DOORBELL_OFFSET 0x0140 +#define SNB_B2B_XLAT_OFFSETL 0x0144 +#define SNB_B2B_XLAT_OFFSETU 0x0148 + +/* + * The addresses are setup so the 32bit BARs can function. Thus + * the addresses are all in 32bit space + */ +#define SNB_MBAR01_USD_ADDR 0x000000002100000CULL +#define SNB_MBAR23_USD_ADDR 0x000000004100000CULL +#define SNB_MBAR4_USD_ADDR 0x000000008100000CULL +#define SNB_MBAR5_USD_ADDR 0x00000000A100000CULL +#define SNB_MBAR01_DSD_ADDR 0x000000002000000CULL +#define SNB_MBAR23_DSD_ADDR 0x000000004000000CULL +#define SNB_MBAR4_DSD_ADDR 0x000000008000000CULL +#define SNB_MBAR5_DSD_ADDR 0x00000000A000000CULL + +#define BWD_MSIX_CNT 34 +#define BWD_MAX_SPADS 16 +#define BWD_MAX_DB_BITS 34 +#define BWD_DB_BITS_PER_VEC 1 +#define BWD_MAX_MW 2 + +#define BWD_PCICMD_OFFSET 0xb004 +#define BWD_MBAR23_OFFSET 0xb018 +#define BWD_MBAR45_OFFSET 0xb020 +#define BWD_DEVCTRL_OFFSET 0xb048 +#define BWD_LINK_STATUS_OFFSET 0xb052 +#define BWD_ERRCORSTS_OFFSET 0xb110 + +#define BWD_SBAR2XLAT_OFFSET 0x0008 +#define BWD_SBAR4XLAT_OFFSET 0x0010 +#define BWD_PDOORBELL_OFFSET 0x0020 +#define BWD_PDBMSK_OFFSET 0x0028 +#define BWD_NTBCNTL_OFFSET 0x0060 +#define BWD_EBDF_OFFSET 0x0064 +#define BWD_SPAD_OFFSET 0x0080 +#define BWD_SPADSEMA_OFFSET 0x00c0 +#define BWD_STKYSPAD_OFFSET 0x00c4 +#define BWD_PBAR2XLAT_OFFSET 0x8008 +#define BWD_PBAR4XLAT_OFFSET 0x8010 +#define BWD_B2B_DOORBELL_OFFSET 0x8020 +#define BWD_B2B_SPAD_OFFSET 0x8080 +#define BWD_B2B_SPADSEMA_OFFSET 0x80c0 +#define BWD_B2B_STKYSPAD_OFFSET 0x80c4 + +#define BWD_MODPHY_PCSREG4 0x1c004 +#define BWD_MODPHY_PCSREG6 0x1c006 + +#define BWD_IP_BASE 0xC000 +#define BWD_DESKEWSTS_OFFSET (BWD_IP_BASE + 0x3024) +#define BWD_LTSSMERRSTS0_OFFSET (BWD_IP_BASE + 0x3180) +#define BWD_LTSSMSTATEJMP_OFFSET (BWD_IP_BASE + 0x3040) +#define BWD_IBSTERRRCRVSTS0_OFFSET (BWD_IP_BASE + 0x3324) + +#define BWD_DESKEWSTS_DBERR (1 << 15) +#define BWD_LTSSMERRSTS0_UNEXPECTEDEI (1 << 20) +#define BWD_LTSSMSTATEJMP_FORCEDETECT (1 << 2) +#define BWD_IBIST_ERR_OFLOW 0x7FFF7FFF + +#define NTB_CNTL_CFG_LOCK (1 << 0) +#define NTB_CNTL_LINK_DISABLE (1 << 1) +#define NTB_CNTL_S2P_BAR23_SNOOP (1 << 2) +#define NTB_CNTL_P2S_BAR23_SNOOP (1 << 4) +#define NTB_CNTL_S2P_BAR4_SNOOP (1 << 6) +#define NTB_CNTL_P2S_BAR4_SNOOP (1 << 8) +#define NTB_CNTL_S2P_BAR5_SNOOP (1 << 12) +#define NTB_CNTL_P2S_BAR5_SNOOP (1 << 14) +#define BWD_CNTL_LINK_DOWN (1 << 16) + +#define NTB_PPD_OFFSET 0x00D4 +#define SNB_PPD_CONN_TYPE 0x0003 +#define SNB_PPD_DEV_TYPE 0x0010 +#define SNB_PPD_SPLIT_BAR (1 << 6) +#define BWD_PPD_INIT_LINK 0x0008 +#define BWD_PPD_CONN_TYPE 0x0300 +#define BWD_PPD_DEV_TYPE 0x1000 #define PCI_DEVICE_ID_INTEL_NTB_B2B_JSF 0x3725 #define PCI_DEVICE_ID_INTEL_NTB_PS_JSF 0x3726 #define PCI_DEVICE_ID_INTEL_NTB_SS_JSF 0x3727 diff --git a/drivers/ntb/ntb_regs.h b/drivers/ntb/ntb_regs.h deleted file mode 100644 index f028ff81fd77..000000000000 --- a/drivers/ntb/ntb_regs.h +++ /dev/null @@ -1,177 +0,0 @@ -/* - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2012 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * BSD LICENSE - * - * Copyright(c) 2012 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copy - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Intel PCIe NTB Linux driver - * - * Contact Information: - * Jon Mason - */ - -#define NTB_LINK_STATUS_ACTIVE 0x2000 -#define NTB_LINK_SPEED_MASK 0x000f -#define NTB_LINK_WIDTH_MASK 0x03f0 - -#define SNB_MSIX_CNT 4 -#define SNB_MAX_B2B_SPADS 16 -#define SNB_MAX_COMPAT_SPADS 16 -/* Reserve the uppermost bit for link interrupt */ -#define SNB_MAX_DB_BITS 15 -#define SNB_LINK_DB 15 -#define SNB_DB_BITS_PER_VEC 5 -#define HSX_SPLITBAR_MAX_MW 3 -#define SNB_MAX_MW 2 -#define SNB_ERRATA_MAX_MW 1 - -#define SNB_DB_HW_LINK 0x8000 - -#define SNB_UNCERRSTS_OFFSET 0x014C -#define SNB_CORERRSTS_OFFSET 0x0158 -#define SNB_LINK_STATUS_OFFSET 0x01A2 -#define SNB_PCICMD_OFFSET 0x0504 -#define SNB_DEVCTRL_OFFSET 0x0598 -#define SNB_DEVSTS_OFFSET 0x059A -#define SNB_SLINK_STATUS_OFFSET 0x05A2 - -#define SNB_PBAR2LMT_OFFSET 0x0000 -#define SNB_PBAR4LMT_OFFSET 0x0008 -#define SNB_PBAR5LMT_OFFSET 0x000C -#define SNB_PBAR2XLAT_OFFSET 0x0010 -#define SNB_PBAR4XLAT_OFFSET 0x0018 -#define SNB_PBAR5XLAT_OFFSET 0x001C -#define SNB_SBAR2LMT_OFFSET 0x0020 -#define SNB_SBAR4LMT_OFFSET 0x0028 -#define SNB_SBAR5LMT_OFFSET 0x002C -#define SNB_SBAR2XLAT_OFFSET 0x0030 -#define SNB_SBAR4XLAT_OFFSET 0x0038 -#define SNB_SBAR5XLAT_OFFSET 0x003C -#define SNB_SBAR0BASE_OFFSET 0x0040 -#define SNB_SBAR2BASE_OFFSET 0x0048 -#define SNB_SBAR4BASE_OFFSET 0x0050 -#define SNB_SBAR5BASE_OFFSET 0x0054 -#define SNB_NTBCNTL_OFFSET 0x0058 -#define SNB_SBDF_OFFSET 0x005C -#define SNB_PDOORBELL_OFFSET 0x0060 -#define SNB_PDBMSK_OFFSET 0x0062 -#define SNB_SDOORBELL_OFFSET 0x0064 -#define SNB_SDBMSK_OFFSET 0x0066 -#define SNB_USMEMMISS_OFFSET 0x0070 -#define SNB_SPAD_OFFSET 0x0080 -#define SNB_SPADSEMA4_OFFSET 0x00c0 -#define SNB_WCCNTRL_OFFSET 0x00e0 -#define SNB_B2B_SPAD_OFFSET 0x0100 -#define SNB_B2B_DOORBELL_OFFSET 0x0140 -#define SNB_B2B_XLAT_OFFSETL 0x0144 -#define SNB_B2B_XLAT_OFFSETU 0x0148 - -/* - * The addresses are setup so the 32bit BARs can function. Thus - * the addresses are all in 32bit space - */ -#define SNB_MBAR01_USD_ADDR 0x000000002100000CULL -#define SNB_MBAR23_USD_ADDR 0x000000004100000CULL -#define SNB_MBAR4_USD_ADDR 0x000000008100000CULL -#define SNB_MBAR5_USD_ADDR 0x00000000A100000CULL -#define SNB_MBAR01_DSD_ADDR 0x000000002000000CULL -#define SNB_MBAR23_DSD_ADDR 0x000000004000000CULL -#define SNB_MBAR4_DSD_ADDR 0x000000008000000CULL -#define SNB_MBAR5_DSD_ADDR 0x00000000A000000CULL - -#define BWD_MSIX_CNT 34 -#define BWD_MAX_SPADS 16 -#define BWD_MAX_DB_BITS 34 -#define BWD_DB_BITS_PER_VEC 1 -#define BWD_MAX_MW 2 - -#define BWD_PCICMD_OFFSET 0xb004 -#define BWD_MBAR23_OFFSET 0xb018 -#define BWD_MBAR45_OFFSET 0xb020 -#define BWD_DEVCTRL_OFFSET 0xb048 -#define BWD_LINK_STATUS_OFFSET 0xb052 -#define BWD_ERRCORSTS_OFFSET 0xb110 - -#define BWD_SBAR2XLAT_OFFSET 0x0008 -#define BWD_SBAR4XLAT_OFFSET 0x0010 -#define BWD_PDOORBELL_OFFSET 0x0020 -#define BWD_PDBMSK_OFFSET 0x0028 -#define BWD_NTBCNTL_OFFSET 0x0060 -#define BWD_EBDF_OFFSET 0x0064 -#define BWD_SPAD_OFFSET 0x0080 -#define BWD_SPADSEMA_OFFSET 0x00c0 -#define BWD_STKYSPAD_OFFSET 0x00c4 -#define BWD_PBAR2XLAT_OFFSET 0x8008 -#define BWD_PBAR4XLAT_OFFSET 0x8010 -#define BWD_B2B_DOORBELL_OFFSET 0x8020 -#define BWD_B2B_SPAD_OFFSET 0x8080 -#define BWD_B2B_SPADSEMA_OFFSET 0x80c0 -#define BWD_B2B_STKYSPAD_OFFSET 0x80c4 - -#define BWD_MODPHY_PCSREG4 0x1c004 -#define BWD_MODPHY_PCSREG6 0x1c006 - -#define BWD_IP_BASE 0xC000 -#define BWD_DESKEWSTS_OFFSET (BWD_IP_BASE + 0x3024) -#define BWD_LTSSMERRSTS0_OFFSET (BWD_IP_BASE + 0x3180) -#define BWD_LTSSMSTATEJMP_OFFSET (BWD_IP_BASE + 0x3040) -#define BWD_IBSTERRRCRVSTS0_OFFSET (BWD_IP_BASE + 0x3324) - -#define BWD_DESKEWSTS_DBERR (1 << 15) -#define BWD_LTSSMERRSTS0_UNEXPECTEDEI (1 << 20) -#define BWD_LTSSMSTATEJMP_FORCEDETECT (1 << 2) -#define BWD_IBIST_ERR_OFLOW 0x7FFF7FFF - -#define NTB_CNTL_CFG_LOCK (1 << 0) -#define NTB_CNTL_LINK_DISABLE (1 << 1) -#define NTB_CNTL_S2P_BAR23_SNOOP (1 << 2) -#define NTB_CNTL_P2S_BAR23_SNOOP (1 << 4) -#define NTB_CNTL_S2P_BAR4_SNOOP (1 << 6) -#define NTB_CNTL_P2S_BAR4_SNOOP (1 << 8) -#define NTB_CNTL_S2P_BAR5_SNOOP (1 << 12) -#define NTB_CNTL_P2S_BAR5_SNOOP (1 << 14) -#define BWD_CNTL_LINK_DOWN (1 << 16) - -#define NTB_PPD_OFFSET 0x00D4 -#define SNB_PPD_CONN_TYPE 0x0003 -#define SNB_PPD_DEV_TYPE 0x0010 -#define SNB_PPD_SPLIT_BAR (1 << 6) -#define BWD_PPD_INIT_LINK 0x0008 -#define BWD_PPD_CONN_TYPE 0x0300 -#define BWD_PPD_DEV_TYPE 0x1000 diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index e9bf2f47b61a..c5f26cda9f97 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -56,7 +56,7 @@ #include #include #include -#include "ntb_hw.h" +#include "hw/intel/ntb_hw_intel.h" #define NTB_TRANSPORT_VERSION 3 @@ -360,14 +360,14 @@ err: EXPORT_SYMBOL_GPL(ntb_register_client_dev); /** - * ntb_register_client - Register NTB client driver + * ntb_transport_register_client - Register NTB client driver * @drv: NTB client driver to be registered * * Register an NTB client driver with the NTB transport layer * * RETURNS: An appropriate -ERRNO error value on error, or zero for success. */ -int ntb_register_client(struct ntb_client *drv) +int ntb_transport_register_client(struct ntb_client *drv) { drv->driver.bus = &ntb_bus_type; @@ -376,21 +376,21 @@ int ntb_register_client(struct ntb_client *drv) return driver_register(&drv->driver); } -EXPORT_SYMBOL_GPL(ntb_register_client); +EXPORT_SYMBOL_GPL(ntb_transport_register_client); /** - * ntb_unregister_client - Unregister NTB client driver + * ntb_transport_unregister_client - Unregister NTB client driver * @drv: NTB client driver to be unregistered * * Unregister an NTB client driver with the NTB transport layer * * RETURNS: An appropriate -ERRNO error value on error, or zero for success. */ -void ntb_unregister_client(struct ntb_client *drv) +void ntb_transport_unregister_client(struct ntb_client *drv) { driver_unregister(&drv->driver); } -EXPORT_SYMBOL_GPL(ntb_unregister_client); +EXPORT_SYMBOL_GPL(ntb_transport_unregister_client); static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, loff_t *offp) diff --git a/include/linux/ntb.h b/include/linux/ntb_transport.h similarity index 96% rename from include/linux/ntb.h rename to include/linux/ntb_transport.h index 9ac1a62fc6f5..f78b64cc09d5 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb_transport.h @@ -59,8 +59,8 @@ enum { NTB_LINK_UP, }; -int ntb_register_client(struct ntb_client *drvr); -void ntb_unregister_client(struct ntb_client *drvr); +int ntb_transport_register_client(struct ntb_client *drvr); +void ntb_transport_unregister_client(struct ntb_client *drvr); int ntb_register_client_dev(char *device_name); void ntb_unregister_client_dev(char *device_name); From a13f35e8714009145e32ebe2bf25b84e1376e314 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 2 Jul 2015 08:44:34 -0600 Subject: [PATCH 546/839] writeback: don't embed root bdi_writeback_congested in bdi_writeback 52ebea749aae ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks") made bdi (backing_dev_info) host per-cgroup wb's (bdi_writeback's). As the congested state needs to be per-wb and referenced from blkcg side and multiple wbs, the patch made all non-root cong's (bdi_writeback_congested's) reference counted and indexed on bdi. When a bdi is destroyed, cgwb_bdi_destroy() tries to drain all non-root cong's; however, this can hang indefinitely because wb's can also be referenced from blkcg_gq's which are destroyed after bdi destruction is complete. To fix the bug, bdi destruction will be updated to not wait for cong's to drain, which naturally means that cong's may outlive the associated bdi. This is fine for non-root cong's but is problematic for the root cong's which are embedded in their bdi's as they may end up getting dereferenced after the containing bdi's are freed. This patch makes root cong's behave the same as non-root cong's. They are no longer embedded in their bdi's but allocated separately during bdi initialization, indexed and reference counted the same way. * As cong handling is the same for all wb's, wb->congested initialization is moved into wb_init(). * When !CONFIG_CGROUP_WRITEBACK, there was no indexing or refcnting. bdi->wb_congested is now a pointer pointing to the root cong allocated during bdi init and minimal refcnting operations are implemented. * The above makes root wb init paths diverge depending on CONFIG_CGROUP_WRITEBACK. root wb init is moved to cgwb_bdi_init(). This patch in itself shouldn't cause any consequential behavior differences but prepares for the actual fix. Signed-off-by: Tejun Heo Reported-by: Jon Christopherson Link: https://bugzilla.kernel.org/show_bug.cgi?id=100681 Tested-by: Jon Christopherson Added include to backing-dev.h for kfree() definition. Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 5 +- include/linux/backing-dev.h | 6 ++- mm/backing-dev.c | 87 +++++++++++++++++--------------- 3 files changed, 54 insertions(+), 44 deletions(-) diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index a48d90e3bcbb..a23209b43842 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -50,10 +50,10 @@ enum wb_stat_item { */ struct bdi_writeback_congested { unsigned long state; /* WB_[a]sync_congested flags */ + atomic_t refcnt; /* nr of attached wb's and blkg */ #ifdef CONFIG_CGROUP_WRITEBACK struct backing_dev_info *bdi; /* the associated bdi */ - atomic_t refcnt; /* nr of attached wb's and blkg */ int blkcg_id; /* ID of the associated blkcg */ struct rb_node rb_node; /* on bdi->cgwb_congestion_tree */ #endif @@ -150,11 +150,12 @@ struct backing_dev_info { atomic_long_t tot_write_bandwidth; struct bdi_writeback wb; /* the root writeback info for this bdi */ - struct bdi_writeback_congested wb_congested; /* its congested state */ #ifdef CONFIG_CGROUP_WRITEBACK struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ struct rb_root cgwb_congested_tree; /* their congested states */ atomic_t usage_cnt; /* counts both cgwbs and cgwb_contested's */ +#else + struct bdi_writeback_congested *wb_congested; #endif wait_queue_head_t wb_waitq; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0e6d4828a77a..0fe9df983ab7 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -15,6 +15,7 @@ #include #include #include +#include int __must_check bdi_init(struct backing_dev_info *bdi); void bdi_destroy(struct backing_dev_info *bdi); @@ -465,11 +466,14 @@ static inline bool inode_cgwb_enabled(struct inode *inode) static inline struct bdi_writeback_congested * wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp) { - return bdi->wb.congested; + atomic_inc(&bdi->wb_congested->refcnt); + return bdi->wb_congested; } static inline void wb_congested_put(struct bdi_writeback_congested *congested) { + if (atomic_dec_and_test(&congested->refcnt)) + kfree(congested); } static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 7756da31b02b..51cc461e7256 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -287,7 +287,7 @@ void wb_wakeup_delayed(struct bdi_writeback *wb) #define INIT_BW (100 << (20 - PAGE_SHIFT)) static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, - gfp_t gfp) + int blkcg_id, gfp_t gfp) { int i, err; @@ -311,21 +311,29 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, INIT_LIST_HEAD(&wb->work_list); INIT_DELAYED_WORK(&wb->dwork, wb_workfn); + wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp); + if (!wb->congested) + return -ENOMEM; + err = fprop_local_init_percpu(&wb->completions, gfp); if (err) - return err; + goto out_put_cong; for (i = 0; i < NR_WB_STAT_ITEMS; i++) { err = percpu_counter_init(&wb->stat[i], 0, gfp); - if (err) { - while (--i) - percpu_counter_destroy(&wb->stat[i]); - fprop_local_destroy_percpu(&wb->completions); - return err; - } + if (err) + goto out_destroy_stat; } return 0; + +out_destroy_stat: + while (--i) + percpu_counter_destroy(&wb->stat[i]); + fprop_local_destroy_percpu(&wb->completions); +out_put_cong: + wb_congested_put(wb->congested); + return err; } /* @@ -361,6 +369,7 @@ static void wb_exit(struct bdi_writeback *wb) percpu_counter_destroy(&wb->stat[i]); fprop_local_destroy_percpu(&wb->completions); + wb_congested_put(wb->congested); } #ifdef CONFIG_CGROUP_WRITEBACK @@ -392,9 +401,6 @@ wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp) struct bdi_writeback_congested *new_congested = NULL, *congested; struct rb_node **node, *parent; unsigned long flags; - - if (blkcg_id == 1) - return &bdi->wb_congested; retry: spin_lock_irqsave(&cgwb_lock, flags); @@ -453,9 +459,6 @@ void wb_congested_put(struct bdi_writeback_congested *congested) struct backing_dev_info *bdi = congested->bdi; unsigned long flags; - if (congested->blkcg_id == 1) - return; - local_irq_save(flags); if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) { local_irq_restore(flags); @@ -480,7 +483,6 @@ static void cgwb_release_workfn(struct work_struct *work) css_put(wb->memcg_css); css_put(wb->blkcg_css); - wb_congested_put(wb->congested); fprop_local_destroy_percpu(&wb->memcg_completions); percpu_ref_exit(&wb->refcnt); @@ -541,7 +543,7 @@ static int cgwb_create(struct backing_dev_info *bdi, if (!wb) return -ENOMEM; - ret = wb_init(wb, bdi, gfp); + ret = wb_init(wb, bdi, blkcg_css->id, gfp); if (ret) goto err_free; @@ -553,12 +555,6 @@ static int cgwb_create(struct backing_dev_info *bdi, if (ret) goto err_ref_exit; - wb->congested = wb_congested_get_create(bdi, blkcg_css->id, gfp); - if (!wb->congested) { - ret = -ENOMEM; - goto err_fprop_exit; - } - wb->memcg_css = memcg_css; wb->blkcg_css = blkcg_css; INIT_WORK(&wb->release_work, cgwb_release_workfn); @@ -588,12 +584,10 @@ static int cgwb_create(struct backing_dev_info *bdi, if (ret) { if (ret == -EEXIST) ret = 0; - goto err_put_congested; + goto err_fprop_exit; } goto out_put; -err_put_congested: - wb_congested_put(wb->congested); err_fprop_exit: fprop_local_destroy_percpu(&wb->memcg_completions); err_ref_exit: @@ -662,14 +656,20 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, return wb; } -static void cgwb_bdi_init(struct backing_dev_info *bdi) +static int cgwb_bdi_init(struct backing_dev_info *bdi) { - bdi->wb.memcg_css = mem_cgroup_root_css; - bdi->wb.blkcg_css = blkcg_root_css; - bdi->wb_congested.blkcg_id = 1; + int ret; + INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); bdi->cgwb_congested_tree = RB_ROOT; atomic_set(&bdi->usage_cnt, 1); + + ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); + if (!ret) { + bdi->wb.memcg_css = mem_cgroup_root_css; + bdi->wb.blkcg_css = blkcg_root_css; + } + return ret; } static void cgwb_bdi_destroy(struct backing_dev_info *bdi) @@ -732,15 +732,28 @@ void wb_blkcg_offline(struct blkcg *blkcg) #else /* CONFIG_CGROUP_WRITEBACK */ -static void cgwb_bdi_init(struct backing_dev_info *bdi) { } +static int cgwb_bdi_init(struct backing_dev_info *bdi) +{ + int err; + + bdi->wb_congested = kzalloc(sizeof(*bdi->wb_congested), GFP_KERNEL); + if (!bdi->wb_congested) + return -ENOMEM; + + err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); + if (err) { + kfree(bdi->wb_congested); + return err; + } + return 0; +} + static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { } #endif /* CONFIG_CGROUP_WRITEBACK */ int bdi_init(struct backing_dev_info *bdi) { - int err; - bdi->dev = NULL; bdi->min_ratio = 0; @@ -749,15 +762,7 @@ int bdi_init(struct backing_dev_info *bdi) INIT_LIST_HEAD(&bdi->bdi_list); init_waitqueue_head(&bdi->wb_waitq); - err = wb_init(&bdi->wb, bdi, GFP_KERNEL); - if (err) - return err; - - bdi->wb_congested.state = 0; - bdi->wb.congested = &bdi->wb_congested; - - cgwb_bdi_init(bdi); - return 0; + return cgwb_bdi_init(bdi); } EXPORT_SYMBOL(bdi_init); From a20135ffbc44545596f9b99c970de097fb497bdd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 1 Jul 2015 20:53:37 -0400 Subject: [PATCH 547/839] writeback: don't drain bdi_writeback_congested on bdi destruction 52ebea749aae ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks") made bdi (backing_dev_info) host per-cgroup wb's (bdi_writeback's). As the congested state needs to be per-wb and referenced from blkcg side and multiple wbs, the patch made all non-root cong's (bdi_writeback_congested's) reference counted and indexed on bdi. When a bdi is destroyed, cgwb_bdi_destroy() tries to drain all non-root cong's; however, this can hang indefinitely because wb's can also be referenced from blkcg_gq's which are destroyed after bdi destruction is complete. This patch fixes the bug by updating bdi destruction to not wait for cong's to drain. A cong is unlinked from bdi->cgwb_congested_tree on bdi destuction regardless of its reference count as the bdi may go away any point after destruction. wb_congested_put() checks whether the cong is already unlinked on release. Signed-off-by: Tejun Heo Reported-by: Jon Christopherson Link: https://bugzilla.kernel.org/show_bug.cgi?id=100681 Fixes: 52ebea749aae ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks") Tested-by: Jon Christopherson Signed-off-by: Jens Axboe --- mm/backing-dev.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 51cc461e7256..dac5bf59309d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -425,7 +425,6 @@ retry: new_congested = NULL; rb_link_node(&congested->rb_node, parent, node); rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree); - atomic_inc(&bdi->usage_cnt); goto found; } @@ -456,7 +455,6 @@ found: */ void wb_congested_put(struct bdi_writeback_congested *congested) { - struct backing_dev_info *bdi = congested->bdi; unsigned long flags; local_irq_save(flags); @@ -465,12 +463,15 @@ void wb_congested_put(struct bdi_writeback_congested *congested) return; } - rb_erase(&congested->rb_node, &congested->bdi->cgwb_congested_tree); + /* bdi might already have been destroyed leaving @congested unlinked */ + if (congested->bdi) { + rb_erase(&congested->rb_node, + &congested->bdi->cgwb_congested_tree); + congested->bdi = NULL; + } + spin_unlock_irqrestore(&cgwb_lock, flags); kfree(congested); - - if (atomic_dec_and_test(&bdi->usage_cnt)) - wake_up_all(&cgwb_release_wait); } static void cgwb_release_workfn(struct work_struct *work) @@ -675,13 +676,22 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { struct radix_tree_iter iter; + struct bdi_writeback_congested *congested, *congested_n; void **slot; WARN_ON(test_bit(WB_registered, &bdi->wb.state)); spin_lock_irq(&cgwb_lock); + radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) cgwb_kill(*slot); + + rbtree_postorder_for_each_entry_safe(congested, congested_n, + &bdi->cgwb_congested_tree, rb_node) { + rb_erase(&congested->rb_node, &bdi->cgwb_congested_tree); + congested->bdi = NULL; /* mark @congested unlinked */ + } + spin_unlock_irq(&cgwb_lock); /* From 758dd7fdffd60507624edce34fff122a63163b3f Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Tue, 30 Jun 2015 11:22:52 -0600 Subject: [PATCH 548/839] NVMe: Fix irq freeing when queue_request_irq fails Fixes an issue when queue_reuest_irq fails in nvme_setup_io_queues. This patch initializes all vectors to -1 and resets the vector to -1 in the case of a failure in queue_request_irq. This avoids the free_irq in nvme_suspend_queue if the queue did not get an irq. Signed-off-by: Jon Derrick Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 34338d7438f5..d1d6141920d3 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1474,6 +1474,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; nvmeq->q_depth = depth; nvmeq->qid = qid; + nvmeq->cq_vector = -1; dev->queues[qid] = nvmeq; /* make sure queue descriptor is set before queue count, for kthread */ @@ -1726,8 +1727,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) nvmeq->cq_vector = 0; result = queue_request_irq(dev, nvmeq, nvmeq->irqname); - if (result) + if (result) { + nvmeq->cq_vector = -1; goto free_nvmeq; + } return result; @@ -2213,8 +2216,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) dev->max_qid = nr_io_queues; result = queue_request_irq(dev, adminq, adminq->irqname); - if (result) + if (result) { + adminq->cq_vector = -1; goto free_queues; + } /* Free previously allocated queues that are no longer usable */ nvme_free_queues(dev, nr_io_queues + 1); From 3bd229976f64bea64c60803f9fc8d9f0059ba2f2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Jun 2015 22:21:00 +0200 Subject: [PATCH 549/839] netfilter: arptables: use percpu jumpstack commit 482cfc318559 ("netfilter: xtables: avoid percpu ruleset duplication") Unlike ip and ip6tables, arp tables were never converted to use the percpu jump stack. It still uses the rule blob to store return address, which isn't safe anymore since we now share this blob among all processors. Because there is no TEE support for arptables, we don't need to cope with reentrancy, so we can use loocal variable to hold stack offset. Fixes: 482cfc318559 ("netfilter: xtables: avoid percpu ruleset duplication") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 95c9b6eece25..92305a1a021a 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -254,9 +254,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; const struct arphdr *arp; - struct arpt_entry *e, *back; + struct arpt_entry *e, **jumpstack; const char *indev, *outdev; const void *table_base; + unsigned int cpu, stackidx = 0; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; @@ -270,15 +271,16 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + cpu = smp_processor_id(); /* * Ensure we load private-> members after we've fetched the base * pointer. */ smp_read_barrier_depends(); table_base = private->entries; + jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; e = get_entry(table_base, private->hook_entry[hook]); - back = get_entry(table_base, private->underflow[hook]); acpar.in = state->in; acpar.out = state->out; @@ -312,18 +314,23 @@ unsigned int arpt_do_table(struct sk_buff *skb, verdict = (unsigned int)(-v) - 1; break; } - e = back; - back = get_entry(table_base, back->comefrom); + if (stackidx == 0) { + e = get_entry(table_base, + private->underflow[hook]); + } else { + e = jumpstack[--stackidx]; + e = arpt_next_entry(e); + } continue; } if (table_base + v != arpt_next_entry(e)) { - /* Save old back ptr in next entry */ - struct arpt_entry *next = arpt_next_entry(e); - next->comefrom = (void *)back - table_base; - /* set back pointer to next entry */ - back = next; + if (stackidx >= private->stacksize) { + verdict = NF_DROP; + break; + } + jumpstack[stackidx++] = e; } e = get_entry(table_base, v); From dd302b59bde0149c20df7278c0d36c765e66afbd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Jun 2015 22:27:51 +0200 Subject: [PATCH 550/839] netfilter: bridge: don't leak skb in error paths br_nf_dev_queue_xmit must free skb in its error path. NF_DROP is misleading -- its an okfn, not a netfilter hook. Fixes: 462fb2af9788a ("bridge : Sanitize skb before it enters the IP stack") Fixes: efb6de9b4ba00 ("netfilter: bridge: forward IPv6 fragmented packets") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 8a394bd3af83..c8b9bcfe997e 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -744,7 +744,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) struct brnf_frag_data *data; if (br_validate_ipv4(skb)) - return NF_DROP; + goto drop; IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; @@ -769,7 +769,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) struct brnf_frag_data *data; if (br_validate_ipv6(skb)) - return NF_DROP; + goto drop; IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; @@ -784,12 +784,16 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) if (v6ops) return v6ops->fragment(sk, skb, br_nf_push_frag_xmit); - else - return -EMSGSIZE; + + kfree_skb(skb); + return -EMSGSIZE; } #endif nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); + drop: + kfree_skb(skb); + return 0; } /* PF_BRIDGE/POST_ROUTING ********************************************/ From 6742b9e310bcf511b876532846e5302b07b7fedc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 1 Jul 2015 16:14:25 +0200 Subject: [PATCH 551/839] netfilter: nfnetlink: keep going batch handling on missing modules After a fresh boot with no modules in place at all and a large rulesets, the existing nfnetlink_rcv_batch() funcion can take long time to commit the ruleset due to the many abort path. This is specifically a problem for the existing client of this code, ie. nf_tables, since it results in several synchronize_rcu() call in a row. This patch changes the policy to keep full batch processing on missing modules errors so we abort only once. Reported-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 40 +++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 8b117c90ecd7..0c0e8ecf02ab 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -269,6 +269,12 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) } } +enum { + NFNL_BATCH_FAILURE = (1 << 0), + NFNL_BATCH_DONE = (1 << 1), + NFNL_BATCH_REPLAY = (1 << 2), +}; + static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, u_int16_t subsys_id) { @@ -276,13 +282,15 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; - bool success = true, done = false; static LIST_HEAD(err_list); + u32 status; int err; if (subsys_id >= NFNL_SUBSYS_COUNT) return netlink_ack(skb, nlh, -EINVAL); replay: + status = 0; + skb = netlink_skb_clone(oskb, GFP_KERNEL); if (!skb) return netlink_ack(oskb, nlh, -ENOMEM); @@ -336,10 +344,10 @@ replay: if (type == NFNL_MSG_BATCH_BEGIN) { /* Malformed: Batch begin twice */ nfnl_err_reset(&err_list); - success = false; + status |= NFNL_BATCH_FAILURE; goto done; } else if (type == NFNL_MSG_BATCH_END) { - done = true; + status |= NFNL_BATCH_DONE; goto done; } else if (type < NLMSG_MIN_TYPE) { err = -EINVAL; @@ -382,11 +390,8 @@ replay: * original skb. */ if (err == -EAGAIN) { - nfnl_err_reset(&err_list); - ss->abort(oskb); - nfnl_unlock(subsys_id); - kfree_skb(skb); - goto replay; + status |= NFNL_BATCH_REPLAY; + goto next; } } ack: @@ -402,7 +407,7 @@ ack: */ nfnl_err_reset(&err_list); netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM); - success = false; + status |= NFNL_BATCH_FAILURE; goto done; } /* We don't stop processing the batch on errors, thus, @@ -410,19 +415,26 @@ ack: * triggers. */ if (err) - success = false; + status |= NFNL_BATCH_FAILURE; } - +next: msglen = NLMSG_ALIGN(nlh->nlmsg_len); if (msglen > skb->len) msglen = skb->len; skb_pull(skb, msglen); } done: - if (success && done) - ss->commit(oskb); - else + if (status & NFNL_BATCH_REPLAY) { ss->abort(oskb); + nfnl_err_reset(&err_list); + nfnl_unlock(subsys_id); + kfree_skb(skb); + goto replay; + } else if (status == NFNL_BATCH_DONE) { + ss->commit(oskb); + } else { + ss->abort(oskb); + } nfnl_err_deliver(&err_list, oskb); nfnl_unlock(subsys_id); From ea389daa7fd91f2fed8b06c01d4460e861c27023 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 2 Jul 2015 09:33:07 +0800 Subject: [PATCH 552/839] arm64: cpuidle: add __init section marker to arm_cpuidle_init It is not needed after booting, this patch moves the arm_cpuidle_init() function to the __init section. Signed-off-by: Jisheng Zhang Reviewed-by: Krzysztof Kozlowski Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpuidle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 7ce589ca54a4..9047cab68fd3 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -15,7 +15,7 @@ #include #include -int arm_cpuidle_init(unsigned int cpu) +int __init arm_cpuidle_init(unsigned int cpu) { int ret = -EOPNOTSUPP; From 45a481c2176f6acca2efb6477c6018b2c3e3c60f Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Mon, 29 Jun 2015 14:30:09 -0700 Subject: [PATCH 553/839] clk: iproc: fix memory leak from clock name of_property_read_string_index takes array of pointers and assign them to strings read from device tree property. No additional memory allocation is needed prior to calling of_property_read_string_index. In fact, since the array of pointers will be re-assigned to other strings, any memory that it points to prior to calling of_property_read_string_index will be leaked Reported-by: Dan Carpenter Signed-off-by: Ray Jui Fixes: 5fe225c105fd ("clk: iproc: add initial common clock support") Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-iproc-asiu.c | 6 +----- drivers/clk/bcm/clk-iproc-pll.c | 8 +------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/clk/bcm/clk-iproc-asiu.c b/drivers/clk/bcm/clk-iproc-asiu.c index e19c09cd9645..f630e1bbdcfe 100644 --- a/drivers/clk/bcm/clk-iproc-asiu.c +++ b/drivers/clk/bcm/clk-iproc-asiu.c @@ -222,10 +222,6 @@ void __init iproc_asiu_setup(struct device_node *node, struct iproc_asiu_clk *asiu_clk; const char *clk_name; - clk_name = kzalloc(IPROC_CLK_NAME_LEN, GFP_KERNEL); - if (WARN_ON(!clk_name)) - goto err_clk_register; - ret = of_property_read_string_index(node, "clock-output-names", i, &clk_name); if (WARN_ON(ret)) @@ -259,7 +255,7 @@ void __init iproc_asiu_setup(struct device_node *node, err_clk_register: for (i = 0; i < num_clks; i++) - kfree(asiu->clks[i].name); + clk_unregister(asiu->clk_data.clks[i]); iounmap(asiu->gate_base); err_iomap_gate: diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c index 46fb84bc2674..a8d971b820b0 100644 --- a/drivers/clk/bcm/clk-iproc-pll.c +++ b/drivers/clk/bcm/clk-iproc-pll.c @@ -655,10 +655,6 @@ void __init iproc_pll_clk_setup(struct device_node *node, memset(&init, 0, sizeof(init)); parent_name = node->name; - clk_name = kzalloc(IPROC_CLK_NAME_LEN, GFP_KERNEL); - if (WARN_ON(!clk_name)) - goto err_clk_register; - ret = of_property_read_string_index(node, "clock-output-names", i, &clk_name); if (WARN_ON(ret)) @@ -690,10 +686,8 @@ void __init iproc_pll_clk_setup(struct device_node *node, return; err_clk_register: - for (i = 0; i < num_clks; i++) { - kfree(pll->clks[i].name); + for (i = 0; i < num_clks; i++) clk_unregister(pll->clk_data.clks[i]); - } err_pll_register: if (pll->asiu_base) From 69916d96094e1e16567c5f25515a13ed2896c730 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Mon, 29 Jun 2015 14:30:10 -0700 Subject: [PATCH 554/839] clk: iproc: fix bit manipulation arithmetic A 32-bit variable should be type casted to 64-bit before arithmetic operation and assigning it to a 64-bit variable Reported-by: Dan Carpenter Signed-off-by: Ray Jui Fixes: 5fe225c105fd ("clk: iproc: add initial common clock support") Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-iproc-pll.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c index a8d971b820b0..2dda4e8295a9 100644 --- a/drivers/clk/bcm/clk-iproc-pll.c +++ b/drivers/clk/bcm/clk-iproc-pll.c @@ -366,7 +366,7 @@ static unsigned long iproc_pll_recalc_rate(struct clk_hw *hw, val = readl(pll->pll_base + ctrl->ndiv_int.offset); ndiv_int = (val >> ctrl->ndiv_int.shift) & bit_mask(ctrl->ndiv_int.width); - ndiv = ndiv_int << ctrl->ndiv_int.shift; + ndiv = (u64)ndiv_int << ctrl->ndiv_int.shift; if (ctrl->flags & IPROC_CLK_PLL_HAS_NDIV_FRAC) { val = readl(pll->pll_base + ctrl->ndiv_frac.offset); @@ -374,7 +374,8 @@ static unsigned long iproc_pll_recalc_rate(struct clk_hw *hw, bit_mask(ctrl->ndiv_frac.width); if (ndiv_frac != 0) - ndiv = (ndiv_int << ctrl->ndiv_int.shift) | ndiv_frac; + ndiv = ((u64)ndiv_int << ctrl->ndiv_int.shift) | + ndiv_frac; } val = readl(pll->pll_base + ctrl->pdiv.offset); From 15ab38273d21a45487116ad4c428593427954848 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Sun, 28 Jun 2015 10:55:32 +0100 Subject: [PATCH 555/839] clk: stm32: Fix out-by-one error path in the index lookup If stm32f4_rcc_lookup() is called with primary == 0 and secondary == 192 then it will read beyond the end of the table array due to an out-by-one error in the range check. In addition to the fixing the inequality we also modify the r.h.s. to make it even more explicit that we are comparing against the size of table in bits. Reported-by: Dan Carpenter Signed-off-by: Daniel Thompson Acked-by: Maxime Coquelin Fixes: 358bdf892f6b ("clk: stm32: Add clock driver for STM32F4[23]xxx devices") Signed-off-by: Stephen Boyd --- drivers/clk/clk-stm32f4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c index b9b12a742970..3f6f7ad39490 100644 --- a/drivers/clk/clk-stm32f4.c +++ b/drivers/clk/clk-stm32f4.c @@ -268,7 +268,7 @@ static int stm32f4_rcc_lookup_clk_idx(u8 primary, u8 secondary) memcpy(table, stm32f42xx_gate_map, sizeof(table)); /* only bits set in table can be used as indices */ - if (WARN_ON(secondary > 8 * sizeof(table) || + if (WARN_ON(secondary >= BITS_PER_BYTE * sizeof(table) || 0 == (table[BIT_ULL_WORD(secondary)] & BIT_ULL_MASK(secondary)))) return -EINVAL; From c76a024e82bdb83a0f7d57e006f8e7f8ddf983e5 Mon Sep 17 00:00:00 2001 From: David Dueck Date: Fri, 26 Jun 2015 15:30:22 +0200 Subject: [PATCH 556/839] clk: at91: do not leak resources Do not leak memory and free irqs in case of an error. Acked-by: Boris Brezillon Signed-off-by: David Dueck Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-h32mx.c | 4 +++- drivers/clk/at91/clk-main.c | 4 +++- drivers/clk/at91/clk-master.c | 8 ++++++-- drivers/clk/at91/clk-pll.c | 8 ++++++-- drivers/clk/at91/clk-system.c | 8 ++++++-- drivers/clk/at91/clk-utmi.c | 8 ++++++-- 6 files changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/clk/at91/clk-h32mx.c b/drivers/clk/at91/clk-h32mx.c index 152dcb3f7b5f..61566bcefa53 100644 --- a/drivers/clk/at91/clk-h32mx.c +++ b/drivers/clk/at91/clk-h32mx.c @@ -116,8 +116,10 @@ void __init of_sama5d4_clk_h32mx_setup(struct device_node *np, h32mxclk->pmc = pmc; clk = clk_register(NULL, &h32mxclk->hw); - if (!clk) + if (!clk) { + kfree(h32mxclk); return; + } of_clk_add_provider(np, of_clk_src_simple_get, clk); } diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c index c2400456a044..27dfa965cfed 100644 --- a/drivers/clk/at91/clk-main.c +++ b/drivers/clk/at91/clk-main.c @@ -171,8 +171,10 @@ at91_clk_register_main_osc(struct at91_pmc *pmc, irq_set_status_flags(osc->irq, IRQ_NOAUTOEN); ret = request_irq(osc->irq, clk_main_osc_irq_handler, IRQF_TRIGGER_HIGH, name, osc); - if (ret) + if (ret) { + kfree(osc); return ERR_PTR(ret); + } if (bypass) pmc_write(pmc, AT91_CKGR_MOR, diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c index f98eafe9b12d..5b3ded5205a2 100644 --- a/drivers/clk/at91/clk-master.c +++ b/drivers/clk/at91/clk-master.c @@ -165,12 +165,16 @@ at91_clk_register_master(struct at91_pmc *pmc, unsigned int irq, irq_set_status_flags(master->irq, IRQ_NOAUTOEN); ret = request_irq(master->irq, clk_master_irq_handler, IRQF_TRIGGER_HIGH, "clk-master", master); - if (ret) + if (ret) { + kfree(master); return ERR_PTR(ret); + } clk = clk_register(NULL, &master->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(master->irq, master); kfree(master); + } return clk; } diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c index 6ec79dbc0840..23163be24b6f 100644 --- a/drivers/clk/at91/clk-pll.c +++ b/drivers/clk/at91/clk-pll.c @@ -338,12 +338,16 @@ at91_clk_register_pll(struct at91_pmc *pmc, unsigned int irq, const char *name, irq_set_status_flags(pll->irq, IRQ_NOAUTOEN); ret = request_irq(pll->irq, clk_pll_irq_handler, IRQF_TRIGGER_HIGH, id ? "clk-pllb" : "clk-plla", pll); - if (ret) + if (ret) { + kfree(pll); return ERR_PTR(ret); + } clk = clk_register(NULL, &pll->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(pll->irq, pll); kfree(pll); + } return clk; } diff --git a/drivers/clk/at91/clk-system.c b/drivers/clk/at91/clk-system.c index a76d03fd577b..58008b3e8bc1 100644 --- a/drivers/clk/at91/clk-system.c +++ b/drivers/clk/at91/clk-system.c @@ -130,13 +130,17 @@ at91_clk_register_system(struct at91_pmc *pmc, const char *name, irq_set_status_flags(sys->irq, IRQ_NOAUTOEN); ret = request_irq(sys->irq, clk_system_irq_handler, IRQF_TRIGGER_HIGH, name, sys); - if (ret) + if (ret) { + kfree(sys); return ERR_PTR(ret); + } } clk = clk_register(NULL, &sys->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(sys->irq, sys); kfree(sys); + } return clk; } diff --git a/drivers/clk/at91/clk-utmi.c b/drivers/clk/at91/clk-utmi.c index ae3263bc1476..30dd697b1668 100644 --- a/drivers/clk/at91/clk-utmi.c +++ b/drivers/clk/at91/clk-utmi.c @@ -118,12 +118,16 @@ at91_clk_register_utmi(struct at91_pmc *pmc, unsigned int irq, irq_set_status_flags(utmi->irq, IRQ_NOAUTOEN); ret = request_irq(utmi->irq, clk_utmi_irq_handler, IRQF_TRIGGER_HIGH, "clk-utmi", utmi); - if (ret) + if (ret) { + kfree(utmi); return ERR_PTR(ret); + } clk = clk_register(NULL, &utmi->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(utmi->irq, utmi); kfree(utmi); + } return clk; } From 91e20b5040c67c51aad88cf87db4305c5bd7f79d Mon Sep 17 00:00:00 2001 From: Joel Porquet Date: Thu, 2 Jul 2015 15:32:00 -0400 Subject: [PATCH 557/839] irqchip: Move IRQCHIP_DECLARE macro to include/linux/irqchip.h At the moment the IRQCHIP_DECLARE macro is only declared locally in drivers/irqchip/irqchip.h. It prevents from using it directly in arch/* directories whenever irqchip drivers only exist there, which happens in a few cases (e.g. arc, arm, microblaze and mips). This patch makes the macro to be globally defined, i.e. in include/linux/irqchip.h, and thus usable for arch-specific declarations of irqchip drivers. In this way, it is very similar to what clocksource does (ie CLOCKSOURCE_OF_DECLARE is defined in include/linux/clocksource.h). For now, this patch only moves the declaration of the macro IRQCHIP_DECLARE to the global header 'include/linux/irqchip.h' and make 'drivers/irqchip/irqchip.h' include 'include/linux/irqchip.h'. Later, other patches will get rid of 'drivers/irqchip/irqchip.h' and modify all the impacted irqchip drivers. Signed-off-by: Joel Porquet Cc: Jason Cooper Link: http://lkml.kernel.org/r/1435865565-14114-1-git-send-email-joel@porquet.org Signed-off-by: Thomas Gleixner --- drivers/irqchip/irqchip.h | 19 +------------------ include/linux/irqchip.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/drivers/irqchip/irqchip.h b/drivers/irqchip/irqchip.h index 0f6486d4f1b0..0f67ae32464f 100644 --- a/drivers/irqchip/irqchip.h +++ b/drivers/irqchip/irqchip.h @@ -8,21 +8,4 @@ * warranty of any kind, whether express or implied. */ -#ifndef _IRQCHIP_H -#define _IRQCHIP_H - -#include - -/* - * This macro must be used by the different irqchip drivers to declare - * the association between their DT compatible string and their - * initialization function. - * - * @name: name that must be unique accross all IRQCHIP_DECLARE of the - * same file. - * @compstr: compatible string of the irqchip driver - * @fn: initialization function - */ -#define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn) - -#endif +#include diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h index 14d79131f53d..638887376e58 100644 --- a/include/linux/irqchip.h +++ b/include/linux/irqchip.h @@ -11,6 +11,20 @@ #ifndef _LINUX_IRQCHIP_H #define _LINUX_IRQCHIP_H +#include + +/* + * This macro must be used by the different irqchip drivers to declare + * the association between their DT compatible string and their + * initialization function. + * + * @name: name that must be unique accross all IRQCHIP_DECLARE of the + * same file. + * @compstr: compatible string of the irqchip driver + * @fn: initialization function + */ +#define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn) + #ifdef CONFIG_IRQCHIP void irqchip_init(void); #else From 43c518d197e7758bc64e0485050de347797faab6 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 2 Jul 2015 12:12:53 -0700 Subject: [PATCH 558/839] [IA64] Drop debug test/printk that some special pages are marked reserved In commit 92923ca3aace "mm: meminit: only set page reserved in the memblock region" we dropped setting the reserved bits for all pages. This results in some warnings on ia64: put_kernel_page: page at 0xe000000005588000 not in reserved memory put_kernel_page: page at 0xe000000005588000 not in reserved memory put_kernel_page: page at 0xe000000005580000 not in reserved memory put_kernel_page: page at 0xe000000005580000 not in reserved memory put_kernel_page: page at 0xe000000005580000 not in reserved memory put_kernel_page: page at 0xe000000005580000 not in reserved memory the two different pages match up with two objects from the loaded kernel that get mapped by arch/ia64/mm/init.c:setup_gate() a000000101588000 D __start_gate_section a000000101580000 D empty_zero_page In a discussion with Mel Gorman: http://lkml.kernel.org/r/20150526102219.GB13750%40suse.de he suggested that while the preferred approach might be to set the reserved bit for these pages, it would also be OK to just drop the test: "as it's a debugging check that is ia-64 specific" After hunting around a bit and failin to find a good place to mark these pages as reserved - I decided to just delete the test. Signed-off-by: Tony Luck --- arch/ia64/mm/init.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 7f3028965064..97e48b0eefc7 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -215,10 +215,6 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot) pmd_t *pmd; pte_t *pte; - if (!PageReserved(page)) - printk(KERN_ERR "put_kernel_page: page at 0x%p not in reserved memory\n", - page_address(page)); - pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */ { From 18d78b64fddc11eb336f01e46ad3303a3f55d039 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Jul 2015 01:06:00 +0200 Subject: [PATCH 559/839] ACPI / init: Make it possible to override _REV The platform firmware on some systems expects Linux to return "5" as the supported ACPI revision which makes it expose system configuration information in a special way. For example, based on what ACPI exports as the supported revision, Dell XPS 13 (2015) configures its audio device to either work in HDA mode or in I2S mode, where the former is supposed to be used on Linux until the latter is fully supported (in the kernel as well as in user space). Since ACPI 6 mandates that _REV should return "2" if ACPI 2 or later is supported by the OS, a subsequent change will make that happen, so make it possible to override that on systems where "5" is expected to be returned for Linux to work correctly one them (such as the Dell machine mentioned above). Original-by: Dominik Brodowski Signed-off-by: Rafael J. Wysocki --- Documentation/kernel-parameters.txt | 6 ++++++ drivers/acpi/Kconfig | 20 ++++++++++++++++++++ drivers/acpi/blacklist.c | 26 ++++++++++++++++++++++++++ drivers/acpi/internal.h | 1 + drivers/acpi/osl.c | 18 ++++++++++++++++++ 5 files changed, 71 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 61ab1628a057..180102480fc0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -274,6 +274,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS Format: To spoof as Windows 98: ="Microsoft Windows" + acpi_rev_override [ACPI] Override the _REV object to return 5 (instead + of 2 which is mandated by ACPI 6) as the supported ACPI + specification revision (when using this switch, it may + be necessary to carry out a cold reboot _twice_ in a + row to make it take effect on the platform firmware). + acpi_osi= [HW,ACPI] Modify list of supported OS interface strings acpi_osi="string1" # add string1 acpi_osi="!string2" # remove string2 diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index ab2cbb51c6aa..2b652f1d4edb 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -77,6 +77,26 @@ config ACPI_PROCFS_POWER Say N to delete power /proc/acpi/ directories that have moved to /sys/ +config ACPI_REV_OVERRIDE_POSSIBLE + bool "Allow supported ACPI revision to be overriden" + depends on X86 + default y + help + The platform firmware on some systems expects Linux to return "5" as + the supported ACPI revision which makes it expose system configuration + information in a special way. + + For example, based on what ACPI exports as the supported revision, + Dell XPS 13 (2015) configures its audio device to either work in HDA + mode or in I2S mode, where the former is supposed to be used on Linux + until the latter is fully supported (in the kernel as well as in user + space). + + This option enables a DMI-based quirk for the above Dell machine (so + that HDA audio is exposed by the platform firmware to the kernel) and + makes it possible to force the kernel to return "5" as the supported + ACPI revision via the "acpi_rev_override" command line switch. + config ACPI_EC_DEBUGFS tristate "EC read/write access through /sys/kernel/debug/ec" default n diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index 1d1791935c31..278dc4be992a 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -162,6 +162,15 @@ static int __init dmi_disable_osi_win8(const struct dmi_system_id *d) acpi_osi_setup("!Windows 2012"); return 0; } +#ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE +static int __init dmi_enable_rev_override(const struct dmi_system_id *d) +{ + printk(KERN_NOTICE PREFIX "DMI detected: %s (force ACPI _REV to 5)\n", + d->ident); + acpi_rev_override_setup(NULL); + return 0; +} +#endif static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { { @@ -325,6 +334,23 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "1015PX"), }, }, + +#ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE + /* + * DELL XPS 13 (2015) switches sound between HDA and I2S + * depending on the ACPI _REV callback. If userspace supports + * I2S sufficiently (or if you do not care about sound), you + * can safely disable this quirk. + */ + { + .callback = dmi_enable_rev_override, + .ident = "DELL XPS 13 (2015)", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9343"), + }, + }, +#endif {} }; diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index ba4a61e964be..42f304288829 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -58,6 +58,7 @@ void acpi_cmos_rtc_init(void); #else static inline void acpi_cmos_rtc_init(void) {} #endif +int acpi_rev_override_setup(char *str); extern bool acpi_force_hot_remove; diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 39748bb3a543..37e592798cd5 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -537,6 +537,19 @@ acpi_os_get_physical_address(void *virt, acpi_physical_address * phys) } #endif +#ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE +static bool acpi_rev_override; + +int __init acpi_rev_override_setup(char *str) +{ + acpi_rev_override = true; + return 1; +} +__setup("acpi_rev_override", acpi_rev_override_setup); +#else +#define acpi_rev_override false +#endif + #define ACPI_MAX_OVERRIDE_LEN 100 static char acpi_os_name[ACPI_MAX_OVERRIDE_LEN]; @@ -555,6 +568,11 @@ acpi_os_predefined_override(const struct acpi_predefined_names *init_val, *new_val = acpi_os_name; } + if (!memcmp(init_val->name, "_REV", 4) && acpi_rev_override) { + printk(KERN_INFO PREFIX "Overriding _REV return value to 5\n"); + *new_val = (char *)5; + } + return AE_OK; } From ea7d521569a70418aa9f6309a1d1916709818b62 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 1 Jul 2015 23:24:05 +0200 Subject: [PATCH 560/839] Revert 'Revert "ACPICA: Permanently set _REV to the value '2'."' Revert commit ff284f37fc0e (Revert "ACPICA: Permanently set _REV to the value '2'.) as the regression introduced by commit b1ef29725865 reverted by it is now addressed via a blacklist entry. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/utglobal.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c index 5e8df9177da4..a72685c1e819 100644 --- a/drivers/acpi/acpica/utglobal.c +++ b/drivers/acpi/acpica/utglobal.c @@ -102,12 +102,19 @@ const struct acpi_predefined_names acpi_gbl_pre_defined_names[] = { {"_SB_", ACPI_TYPE_DEVICE, NULL}, {"_SI_", ACPI_TYPE_LOCAL_SCOPE, NULL}, {"_TZ_", ACPI_TYPE_DEVICE, NULL}, - {"_REV", ACPI_TYPE_INTEGER, (char *)ACPI_CA_SUPPORT_LEVEL}, + /* + * March, 2015: + * The _REV object is in the process of being deprecated, because + * other ACPI implementations permanently return 2. Thus, it + * has little or no value. Return 2 for compatibility with + * other ACPI implementations. + */ + {"_REV", ACPI_TYPE_INTEGER, ACPI_CAST_PTR(char, 2)}, {"_OS_", ACPI_TYPE_STRING, ACPI_OS_NAME}, - {"_GL_", ACPI_TYPE_MUTEX, (char *)1}, + {"_GL_", ACPI_TYPE_MUTEX, ACPI_CAST_PTR(char, 1)}, #if !defined (ACPI_NO_METHOD_EXECUTION) || defined (ACPI_CONSTANT_EVAL_ONLY) - {"_OSI", ACPI_TYPE_METHOD, (char *)1}, + {"_OSI", ACPI_TYPE_METHOD, ACPI_CAST_PTR(char, 1)}, #endif /* Table terminator */ From 7df9ab845ce5f473c84184873bf5be08bbe8fda5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 2 Jul 2015 16:42:13 -0700 Subject: [PATCH 561/839] make certificate list change message more useful It's a bug in our Makefile rules, make it show what the changing certificate list was, and make it a warning so that people actually see it. Signed-off-by: Linus Torvalds --- kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/Makefile b/kernel/Makefile index 60c302cfb4d3..43c4c920f30a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -137,7 +137,7 @@ endif ifneq ($(wildcard $(obj)/.x509.list),) ifneq ($(shell cat $(obj)/.x509.list),$(X509_CERTIFICATES)) -$(info X.509 certificate list changed) +$(warning X.509 certificate list changed to "$(X509_CERTIFICATES)" from "$(shell cat $(obj)/.x509.list)") $(shell rm $(obj)/.x509.list) endif endif From 13ee9fdba96577eb1583dcd7b15767ef623fae12 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Wed, 1 Jul 2015 23:08:10 +0100 Subject: [PATCH 562/839] ARM: 8397/1: fix vdsomunge not to depend on glibc specific error.h If the host toolchain is not glibc based then the arm kernel build fails with arch/arm/vdso/vdsomunge.c:53:19: fatal error: error.h: No such file or directory error.h is a glibc only header (ie not available in musl, newlib and bsd libcs). Changed the error reporting to standard conforming code to avoid depending on specific C implementations. Signed-off-by: Szabolcs Nagy Acked-by: Will Deacon Fixes: 8512287a8165 ("ARM: 8330/1: add VDSO user-space code") Cc: stable@vger.kernel.org Signed-off-by: Nathan Lynch Signed-off-by: Russell King --- arch/arm/vdso/vdsomunge.c | 56 +++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c index 9005b07296c8..aedec81d1198 100644 --- a/arch/arm/vdso/vdsomunge.c +++ b/arch/arm/vdso/vdsomunge.c @@ -45,13 +45,11 @@ * it does. */ -#define _GNU_SOURCE - #include #include #include -#include #include +#include #include #include #include @@ -82,11 +80,25 @@ #define EF_ARM_ABI_FLOAT_HARD 0x400 #endif +static int failed; +static const char *argv0; static const char *outfile; +static void fail(const char *fmt, ...) +{ + va_list ap; + + failed = 1; + fprintf(stderr, "%s: ", argv0); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + static void cleanup(void) { - if (error_message_count > 0 && outfile != NULL) + if (failed && outfile != NULL) unlink(outfile); } @@ -119,68 +131,66 @@ int main(int argc, char **argv) int infd; atexit(cleanup); + argv0 = argv[0]; if (argc != 3) - error(EXIT_FAILURE, 0, "Usage: %s [infile] [outfile]", argv[0]); + fail("Usage: %s [infile] [outfile]\n", argv[0]); infile = argv[1]; outfile = argv[2]; infd = open(infile, O_RDONLY); if (infd < 0) - error(EXIT_FAILURE, errno, "Cannot open %s", infile); + fail("Cannot open %s: %s\n", infile, strerror(errno)); if (fstat(infd, &stat) != 0) - error(EXIT_FAILURE, errno, "Failed stat for %s", infile); + fail("Failed stat for %s: %s\n", infile, strerror(errno)); inbuf = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, infd, 0); if (inbuf == MAP_FAILED) - error(EXIT_FAILURE, errno, "Failed to map %s", infile); + fail("Failed to map %s: %s\n", infile, strerror(errno)); close(infd); inhdr = inbuf; if (memcmp(&inhdr->e_ident, ELFMAG, SELFMAG) != 0) - error(EXIT_FAILURE, 0, "Not an ELF file"); + fail("Not an ELF file\n"); if (inhdr->e_ident[EI_CLASS] != ELFCLASS32) - error(EXIT_FAILURE, 0, "Unsupported ELF class"); + fail("Unsupported ELF class\n"); swap = inhdr->e_ident[EI_DATA] != HOST_ORDER; if (read_elf_half(inhdr->e_type, swap) != ET_DYN) - error(EXIT_FAILURE, 0, "Not a shared object"); + fail("Not a shared object\n"); - if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) { - error(EXIT_FAILURE, 0, "Unsupported architecture %#x", - inhdr->e_machine); - } + if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) + fail("Unsupported architecture %#x\n", inhdr->e_machine); e_flags = read_elf_word(inhdr->e_flags, swap); if (EF_ARM_EABI_VERSION(e_flags) != EF_ARM_EABI_VER5) { - error(EXIT_FAILURE, 0, "Unsupported EABI version %#x", - EF_ARM_EABI_VERSION(e_flags)); + fail("Unsupported EABI version %#x\n", + EF_ARM_EABI_VERSION(e_flags)); } if (e_flags & EF_ARM_ABI_FLOAT_HARD) - error(EXIT_FAILURE, 0, - "Unexpected hard-float flag set in e_flags"); + fail("Unexpected hard-float flag set in e_flags\n"); clear_soft_float = !!(e_flags & EF_ARM_ABI_FLOAT_SOFT); outfd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); if (outfd < 0) - error(EXIT_FAILURE, errno, "Cannot open %s", outfile); + fail("Cannot open %s: %s\n", outfile, strerror(errno)); if (ftruncate(outfd, stat.st_size) != 0) - error(EXIT_FAILURE, errno, "Cannot truncate %s", outfile); + fail("Cannot truncate %s: %s\n", outfile, strerror(errno)); outbuf = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, outfd, 0); if (outbuf == MAP_FAILED) - error(EXIT_FAILURE, errno, "Failed to map %s", outfile); + fail("Failed to map %s: %s\n", outfile, strerror(errno)); close(outfd); @@ -195,7 +205,7 @@ int main(int argc, char **argv) } if (msync(outbuf, stat.st_size, MS_SYNC) != 0) - error(EXIT_FAILURE, errno, "Failed to sync %s", outfile); + fail("Failed to sync %s: %s\n", outfile, strerror(errno)); return EXIT_SUCCESS; } From f9058929f2acbb273ec83104ebeeab0593595e15 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Fri, 3 Jul 2015 15:29:06 +0800 Subject: [PATCH 563/839] ARM64 / SMP: Switch pr_err() to pr_debug() for disabled GICC entry It is normal that firmware presents GICC entry or entries (processors) with disabled flag in ACPI MADT, taking a system of 16 cpus for example, ACPI firmware may present 8 ebabled first with another 8 cpus disabled in MADT, the disabled cpus can be hot-added later. Firmware may also present more cpus than the hardware actually has, but disabled the unused ones, and easily enable it when the hardware has such cpus to make the firmware code scalable. So that's not an error for disabled cpus in MADT, we can switch pr_err() to pr_debug() to make the boot a little quieter by default. Since hwid for disabled cpus often are invalid, and we check invalid hwid first in the code, for use case that hot add cpus later will be filtered out and will not be counted in possible cups, so move this check before the hwid one to prepare the code to count for disabeld cpus when cpu hot-plug is introduced. Signed-off-by: Hanjun Guo Reviewed-by: Al Stone Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index a1883bfdd9d6..695801a54ca5 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -396,13 +396,13 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) { u64 hwid = processor->arm_mpidr; - if (hwid & ~MPIDR_HWID_BITMASK || hwid == INVALID_HWID) { - pr_err("skipping CPU entry with invalid MPIDR 0x%llx\n", hwid); + if (!(processor->flags & ACPI_MADT_ENABLED)) { + pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid); return; } - if (!(processor->flags & ACPI_MADT_ENABLED)) { - pr_err("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid); + if (hwid & ~MPIDR_HWID_BITMASK || hwid == INVALID_HWID) { + pr_err("skipping CPU entry with invalid MPIDR 0x%llx\n", hwid); return; } From 11b8b25ce4f8acfd3b438683c0c9ade27756c6e8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 3 Jul 2015 12:42:36 +0100 Subject: [PATCH 564/839] ARM: fix lockdep unannotated irqs-off warning Wolfram Sang reported an unannotated irqs-off warning from lockdep: WARNING: CPU: 0 PID: 282 at kernel/locking/lockdep.c:3557 check_flags+0x84/0x1f4() DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled) CPU: 0 PID: 282 Comm: rcS Tainted: G W 4.1.0-00002-g5b076054611833 #179 Hardware name: Generic Emma Mobile EV2 (Flattened Device Tree) Backtrace: [] (dump_backtrace) from [] (show_stack+0x18/0x1c) r6:c02dcc67 r5:00000009 r4:00000000 r3:00400000 [] (show_stack) from [] (dump_stack+0x20/0x28) [] (dump_stack) from [] (warn_slowpath_common+0x8c/0xb4) [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x38/0x40) r8:c780f470 r7:00000000 r6:00000000 r5:c03b0570 r4:c0b7ec04 [] (warn_slowpath_fmt) from [] (check_flags+0x84/0x1f4) r3:c02e13d8 r2:c02dceaa [] (check_flags) from [] (lock_acquire+0x4c/0xbc) r5:00000000 r4:60000193 [] (lock_acquire) from [] (_raw_spin_lock+0x34/0x44) r9:000a8d5c r8:00000001 r7:c7806000 r6:c780f460 r5:c03b06a0 r4:c780f460 [] (_raw_spin_lock) from [] (handle_fasteoi_irq+0x20/0x11c) r4:c780f400 [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x28/0x38) r6:00000000 r5:c03b038c r4:00000012 r3:c005a8ac [] (generic_handle_irq) from [] (__handle_domain_irq+0x88/0xa8) r4:00000000 r3:00000026 [] (__handle_domain_irq) from [] (gic_handle_irq+0x40/0x58) r8:10c5347d r7:10c5347d r6:c35b1fb0 r5:c03a6304 r4:c8802000 r3:c35b1fb0 [] (gic_handle_irq) from [] (__irq_usr+0x48/0x60) Exception stack(0xc35b1fb0 to 0xc35b1ff8) 1fa0: 00000061 00000000 000ab736 00000066 1fc0: 00000061 000aa1f0 000a8d54 000a8d54 000a8d88 000a8d5c 000a8cc8 000a8d68 1fe0: 72727272 bef8a528 000398c0 00031334 20000010 ffffffff r6:ffffffff r5:20000010 r4:00031334 r3:00000061 ---[ end trace cb88537fdc8fa202 ]--- possible reason: unannotated irqs-off. irq event stamp: 769 hardirqs last enabled at (769): [] ret_fast_syscall+0x2c/0x54 hardirqs last disabled at (768): [] ret_fast_syscall+0xc/0x54 softirqs last enabled at (0): [] copy_process.part.65+0x2e8/0x11dc softirqs last disabled at (0): [< (null)>] (null) His kernel configuration had: CONFIG_PROVE_LOCKING=y CONFIG_TRACE_IRQFLAGS=y but no IRQSOFF_TRACER, which means entry from userspace can result in the kernel seeing IRQs off without being notified of that change of state. Change the IRQSOFF ifdef in the usr_entry macro to TRACE_IRQFLAGS instead. Tested-by: Wolfram Sang Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 570306c49406..dba6cf65c9e4 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -408,7 +408,7 @@ ENDPROC(__fiq_abt) zero_fp .if \trace -#ifdef CONFIG_IRQSOFF_TRACER +#ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif ct_user_exit save = 0 From f617f7310cfdca5245a10629979d85682323f1a4 Mon Sep 17 00:00:00 2001 From: Roger Lucas Date: Tue, 12 May 2015 22:01:37 +0100 Subject: [PATCH 565/839] hwmon: (w83792d) Additional PWM outputs support Add pwm[4-7] and the associated pwm[4-7]_mode attributes. Signed-off-by: Roger Lucas Signed-off-by: Jean Delvare --- Documentation/hwmon/w83792d | 18 +++++++++++++----- drivers/hwmon/w83792d.c | 27 ++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/Documentation/hwmon/w83792d b/Documentation/hwmon/w83792d index 53f7b6866fec..f2ffc402ea45 100644 --- a/Documentation/hwmon/w83792d +++ b/Documentation/hwmon/w83792d @@ -8,6 +8,7 @@ Supported chips: Datasheet: http://www.winbond.com.tw Author: Shane Huang (Winbond) +Updated: Roger Lucas Module Parameters @@ -38,9 +39,16 @@ parameter; this will put it into a more well-behaved state first. The driver implements three temperature sensors, seven fan rotation speed sensors, nine voltage sensors, and two automatic fan regulation strategies called: Smart Fan I (Thermal Cruise mode) and Smart Fan II. -Automatic fan control mode is possible only for fan1-fan3. Fan4-fan7 can run -synchronized with selected fan (fan1-fan3). This functionality and manual PWM -control for fan4-fan7 is not yet implemented. + +The driver also implements up to seven fan control outputs: pwm1-7. Pwm1-7 +can be configured to PWM output or Analogue DC output via their associated +pwmX_mode. Outputs pwm4 through pwm7 may or may not be present depending on +how the W83792AD/D was configured by the BIOS. + +Automatic fan control mode is possible only for fan1-fan3. + +For all pwmX outputs, a value of 0 means minimum fan speed and a value of +255 means maximum fan speed. Temperatures are measured in degrees Celsius and measurement resolution is 1 degC for temp1 and 0.5 degC for temp2 and temp3. An alarm is triggered when @@ -157,14 +165,14 @@ for each fan. /sys files ---------- -pwm[1-3] - this file stores PWM duty cycle or DC value (fan speed) in range: +pwm[1-7] - this file stores PWM duty cycle or DC value (fan speed) in range: 0 (stop) to 255 (full) pwm[1-3]_enable - this file controls mode of fan/temperature control: * 0 Disabled * 1 Manual mode * 2 Smart Fan II * 3 Thermal Cruise -pwm[1-3]_mode - Select PWM of DC mode +pwm[1-7]_mode - Select PWM or DC mode * 0 DC * 1 PWM thermal_cruise[1-3] - Selects the desired temperature for cruise (degC) diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c index 4068db4d9580..0a8bce726b4b 100644 --- a/drivers/hwmon/w83792d.c +++ b/drivers/hwmon/w83792d.c @@ -289,10 +289,7 @@ struct w83792d_data { u8 temp1[3]; /* current, over, thyst */ u8 temp_add[2][6]; /* Register value */ u8 fan_div[7]; /* Register encoding, shifted right */ - u8 pwm[7]; /* - * We only consider the first 3 set of pwm, - * although 792 chip has 7 set of pwm. - */ + u8 pwm[7]; /* The 7 PWM outputs */ u8 pwmenable[3]; u32 alarms; /* realtime status register encoding,combined */ u8 chassis; /* Chassis status */ @@ -1075,6 +1072,10 @@ static DEVICE_ATTR(intrusion0_alarm, S_IRUGO | S_IWUSR, static SENSOR_DEVICE_ATTR(pwm1, S_IWUSR | S_IRUGO, show_pwm, store_pwm, 0); static SENSOR_DEVICE_ATTR(pwm2, S_IWUSR | S_IRUGO, show_pwm, store_pwm, 1); static SENSOR_DEVICE_ATTR(pwm3, S_IWUSR | S_IRUGO, show_pwm, store_pwm, 2); +static SENSOR_DEVICE_ATTR(pwm4, S_IWUSR | S_IRUGO, show_pwm, store_pwm, 3); +static SENSOR_DEVICE_ATTR(pwm5, S_IWUSR | S_IRUGO, show_pwm, store_pwm, 4); +static SENSOR_DEVICE_ATTR(pwm6, S_IWUSR | S_IRUGO, show_pwm, store_pwm, 5); +static SENSOR_DEVICE_ATTR(pwm7, S_IWUSR | S_IRUGO, show_pwm, store_pwm, 6); static SENSOR_DEVICE_ATTR(pwm1_enable, S_IWUSR | S_IRUGO, show_pwmenable, store_pwmenable, 1); static SENSOR_DEVICE_ATTR(pwm2_enable, S_IWUSR | S_IRUGO, @@ -1087,6 +1088,14 @@ static SENSOR_DEVICE_ATTR(pwm2_mode, S_IWUSR | S_IRUGO, show_pwm_mode, store_pwm_mode, 1); static SENSOR_DEVICE_ATTR(pwm3_mode, S_IWUSR | S_IRUGO, show_pwm_mode, store_pwm_mode, 2); +static SENSOR_DEVICE_ATTR(pwm4_mode, S_IWUSR | S_IRUGO, + show_pwm_mode, store_pwm_mode, 3); +static SENSOR_DEVICE_ATTR(pwm5_mode, S_IWUSR | S_IRUGO, + show_pwm_mode, store_pwm_mode, 4); +static SENSOR_DEVICE_ATTR(pwm6_mode, S_IWUSR | S_IRUGO, + show_pwm_mode, store_pwm_mode, 5); +static SENSOR_DEVICE_ATTR(pwm7_mode, S_IWUSR | S_IRUGO, + show_pwm_mode, store_pwm_mode, 6); static SENSOR_DEVICE_ATTR(tolerance1, S_IWUSR | S_IRUGO, show_tolerance, store_tolerance, 1); static SENSOR_DEVICE_ATTR(tolerance2, S_IWUSR | S_IRUGO, @@ -1177,30 +1186,38 @@ static SENSOR_DEVICE_ATTR(fan6_div, S_IWUSR | S_IRUGO, static SENSOR_DEVICE_ATTR(fan7_div, S_IWUSR | S_IRUGO, show_fan_div, store_fan_div, 7); -static struct attribute *w83792d_attributes_fan[4][5] = { +static struct attribute *w83792d_attributes_fan[4][7] = { { &sensor_dev_attr_fan4_input.dev_attr.attr, &sensor_dev_attr_fan4_min.dev_attr.attr, &sensor_dev_attr_fan4_div.dev_attr.attr, &sensor_dev_attr_fan4_alarm.dev_attr.attr, + &sensor_dev_attr_pwm4.dev_attr.attr, + &sensor_dev_attr_pwm4_mode.dev_attr.attr, NULL }, { &sensor_dev_attr_fan5_input.dev_attr.attr, &sensor_dev_attr_fan5_min.dev_attr.attr, &sensor_dev_attr_fan5_div.dev_attr.attr, &sensor_dev_attr_fan5_alarm.dev_attr.attr, + &sensor_dev_attr_pwm5.dev_attr.attr, + &sensor_dev_attr_pwm5_mode.dev_attr.attr, NULL }, { &sensor_dev_attr_fan6_input.dev_attr.attr, &sensor_dev_attr_fan6_min.dev_attr.attr, &sensor_dev_attr_fan6_div.dev_attr.attr, &sensor_dev_attr_fan6_alarm.dev_attr.attr, + &sensor_dev_attr_pwm6.dev_attr.attr, + &sensor_dev_attr_pwm6_mode.dev_attr.attr, NULL }, { &sensor_dev_attr_fan7_input.dev_attr.attr, &sensor_dev_attr_fan7_min.dev_attr.attr, &sensor_dev_attr_fan7_div.dev_attr.attr, &sensor_dev_attr_fan7_alarm.dev_attr.attr, + &sensor_dev_attr_pwm7.dev_attr.attr, + &sensor_dev_attr_pwm7_mode.dev_attr.attr, NULL } }; From 97607f9882a0e0d3553382a4c7996d3a37d08268 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 3 Jul 2015 14:39:05 +0200 Subject: [PATCH 566/839] hwmon: Document which I2C addresses can be probed Add an item to the checklist when submitting a new hwmon driver: only some I2C addresses can be probed, others should not for safety reasons. Signed-off-by: Jean Delvare Reviewed-by: Guenter Roeck --- Documentation/hwmon/submitting-patches | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/hwmon/submitting-patches b/Documentation/hwmon/submitting-patches index 3d1bac399a22..d201828d202f 100644 --- a/Documentation/hwmon/submitting-patches +++ b/Documentation/hwmon/submitting-patches @@ -81,6 +81,13 @@ increase the chances of your change being accepted. * Provide a detect function if and only if a chip can be detected reliably. +* Only the following I2C addresses shall be probed: 0x18-0x1f, 0x28-0x2f, + 0x48-0x4f, 0x58, 0x5c, 0x73 and 0x77. Probing other addresses is strongly + discouraged as it is known to cause trouble with other (non-hwmon) I2C + chips. If your chip lives at an address which can't be probed then the + device will have to be instantiated explicitly (which is always better + anyway.) + * Avoid writing to chip registers in the detect function. If you have to write, only do it after you have already gathered enough data to be certain that the detection is going to be successful. From ffe3df535320acc337d69c60549d68ebfa863642 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Fri, 3 Jul 2015 14:39:06 +0200 Subject: [PATCH 567/839] hwmon: (w83627ehf) Use swap() in w82627ehf_swap_tempreg() Use kernel.h macro definition. Thanks to Julia Lawall for Coccinelle scripting support. Signed-off-by: Fabian Frederick Signed-off-by: Jean Delvare --- drivers/hwmon/w83627ehf.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index b10353b31806..697007afb99c 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -1937,27 +1937,11 @@ static inline void w83627ehf_init_device(struct w83627ehf_data *data, static void w82627ehf_swap_tempreg(struct w83627ehf_data *data, int r1, int r2) { - u16 tmp; - - tmp = data->temp_src[r1]; - data->temp_src[r1] = data->temp_src[r2]; - data->temp_src[r2] = tmp; - - tmp = data->reg_temp[r1]; - data->reg_temp[r1] = data->reg_temp[r2]; - data->reg_temp[r2] = tmp; - - tmp = data->reg_temp_over[r1]; - data->reg_temp_over[r1] = data->reg_temp_over[r2]; - data->reg_temp_over[r2] = tmp; - - tmp = data->reg_temp_hyst[r1]; - data->reg_temp_hyst[r1] = data->reg_temp_hyst[r2]; - data->reg_temp_hyst[r2] = tmp; - - tmp = data->reg_temp_config[r1]; - data->reg_temp_config[r1] = data->reg_temp_config[r2]; - data->reg_temp_config[r2] = tmp; + swap(data->temp_src[r1], data->temp_src[r2]); + swap(data->reg_temp[r1], data->reg_temp[r2]); + swap(data->reg_temp_over[r1], data->reg_temp_over[r2]); + swap(data->reg_temp_hyst[r1], data->reg_temp_hyst[r2]); + swap(data->reg_temp_config[r1], data->reg_temp_config[r2]); } static void From ac5e2f170f033e48cfcdc2c4f74b27083eabffa5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Jul 2015 10:02:39 +0100 Subject: [PATCH 568/839] ARM: io: document ARM specific behaviour of ioremap*() implementations Add documentation of the ARM specific behaviour of the mappings setup by the ioremap() series of macros. Signed-off-by: Russell King --- arch/arm/include/asm/io.h | 42 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 1c3938f26beb..ae10717f61d4 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -348,11 +348,47 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, #endif /* readl */ /* - * ioremap and friends. + * ioremap() and friends. * - * ioremap takes a PCI memory address, as specified in - * Documentation/io-mapping.txt. + * ioremap() takes a resource address, and size. Due to the ARM memory + * types, it is important to use the correct ioremap() function as each + * mapping has specific properties. * + * Function Memory type Cacheability Cache hint + * ioremap() Device n/a n/a + * ioremap_nocache() Device n/a n/a + * ioremap_cache() Normal Writeback Read allocate + * ioremap_wc() Normal Non-cacheable n/a + * ioremap_wt() Normal Non-cacheable n/a + * + * All device mappings have the following properties: + * - no access speculation + * - no repetition (eg, on return from an exception) + * - number, order and size of accesses are maintained + * - unaligned accesses are "unpredictable" + * - writes may be delayed before they hit the endpoint device + * + * ioremap_nocache() is the same as ioremap() as there are too many device + * drivers using this for device registers, and documentation which tells + * people to use it for such for this to be any different. This is not a + * safe fallback for memory-like mappings, or memory regions where the + * compiler may generate unaligned accesses - eg, via inlining its own + * memcpy. + * + * All normal memory mappings have the following properties: + * - reads can be repeated with no side effects + * - repeated reads return the last value written + * - reads can fetch additional locations without side effects + * - writes can be repeated (in certain cases) with no side effects + * - writes can be merged before accessing the target + * - unaligned accesses can be supported + * - ordering is not guaranteed without explicit dependencies or barrier + * instructions + * - writes may be delayed before they hit the endpoint memory + * + * The cache hint is only a performance hint: CPUs may alias these hints. + * Eg, a CPU not implementing read allocate but implementing write allocate + * will provide a write allocate mapping instead. */ #define ioremap(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) #define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) From 1e2c727f6c022778d4562147433ca4c0b101f0ad Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Jul 2015 10:17:55 +0100 Subject: [PATCH 569/839] ARM: io: fix ioremap_wt() implementation ioremap_wt() was added by aliasing it to ioremap_nocache(), which is a device mapping. Device mappings do not allow unaligned accesses, but it appears that GCC is able to inline its own memcpy() implementation which may use such accesses. The only user of this is pmem, which uses memcpy() on the region. Therefore, this is unsafe. We must implement ioremap_wt() correctly for ARM, or not at all. This patch adds a more correct implementation by re-using ioremap_wc() to provide a normal-memory non-cacheable mapping. Signed-off-by: Russell King --- arch/arm/include/asm/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index ae10717f61d4..f1083ebf214d 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -394,7 +394,7 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, #define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) #define ioremap_cache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_CACHED) #define ioremap_wc(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) -#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) +#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) #define iounmap __arm_iounmap /* From f871d26807078cf4cc0a64a97ee2c6bb513a4397 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Fri, 3 Jul 2015 15:08:08 +0100 Subject: [PATCH 570/839] arm64: Fix show_unhandled_signal_ratelimited usage Commit 86dca36e6ba introduced ratelimited usage for 'unhandled_signal' messages. The commit checks the ratelimit irrespective of whether the signal is handled or not, which is wrong and leads to false reports like the below in dmesg : __do_user_fault: 127 callbacks suppressed Do the ratelimit check only if the signal is unhandled. Fixes: 86dca36e6ba0 ("arm64: use private ratelimit state along with show_unhandled_signals") Cc: Vladimir Murzin Signed-off-by: Suzuki K. Poulose Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 2 +- arch/arm64/mm/fault.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index a12251c074a8..566bc4c35040 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -335,7 +335,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (call_undef_hook(regs) == 0) return; - if (show_unhandled_signals_ratelimited() && unhandled_signal(current, SIGILL)) { + if (unhandled_signal(current, SIGILL) && show_unhandled_signals_ratelimited()) { pr_info("%s[%d]: undefined instruction: pc=%p\n", current->comm, task_pid_nr(current), pc); dump_instr(KERN_INFO, regs); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 66bd92ab6f7b..ffa36e2d18e6 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -115,7 +115,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, { struct siginfo si; - if (show_unhandled_signals_ratelimited() && unhandled_signal(tsk, sig)) { + if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) { pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n", tsk->comm, task_pid_nr(tsk), fault_name(esr), sig, addr, esr); From 20a1080dff2f1be8933baa0d910c41882c7279ee Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Jul 2015 10:06:32 +0100 Subject: [PATCH 571/839] ARM: io: convert ioremap*() to functions Convert the ioremap*() preprocessor macros to real functions, moving them out of line. This allows us to kill off __arm_ioremap(), and __arm_iounmap() helpers, and remove __arm_ioremap_pfn_caller() from global view. Signed-off-by: Russell King --- arch/arm/include/asm/io.h | 24 ++++++++++--------- arch/arm/mm/ioremap.c | 33 ++++++++++++++++++-------- arch/arm/mm/nommu.c | 49 +++++++++++++++++++++++++-------------- 3 files changed, 67 insertions(+), 39 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index f1083ebf214d..a5ed237c87d9 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -140,16 +140,11 @@ static inline u32 __raw_readl(const volatile void __iomem *addr) * The _caller variety takes a __builtin_return_address(0) value for * /proc/vmalloc to use - and should only be used in non-inline functions. */ -extern void __iomem *__arm_ioremap_pfn_caller(unsigned long, unsigned long, - size_t, unsigned int, void *); extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int, void *); - extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int); -extern void __iomem *__arm_ioremap(phys_addr_t, size_t, unsigned int); extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached); extern void __iounmap(volatile void __iomem *addr); -extern void __arm_iounmap(volatile void __iomem *addr); extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); @@ -390,12 +385,19 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, * Eg, a CPU not implementing read allocate but implementing write allocate * will provide a write allocate mapping instead. */ -#define ioremap(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) -#define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) -#define ioremap_cache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_CACHED) -#define ioremap_wc(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) -#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) -#define iounmap __arm_iounmap +void __iomem *ioremap(resource_size_t res_cookie, size_t size); +#define ioremap ioremap +#define ioremap_nocache ioremap + +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size); +#define ioremap_cache ioremap_cache + +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size); +#define ioremap_wc ioremap_wc +#define ioremap_wt ioremap_wc + +void iounmap(volatile void __iomem *iomem_cookie); +#define iounmap iounmap /* * io{read,write}{16,32}be() macros diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index d1e5ad7ab3bc..0c81056c1dd7 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -255,7 +255,7 @@ remap_area_supersections(unsigned long virt, unsigned long pfn, } #endif -void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, +static void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, size_t size, unsigned int mtype, void *caller) { const struct mem_type *type; @@ -363,7 +363,7 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, unsigned int mtype) { return __arm_ioremap_pfn_caller(pfn, offset, size, mtype, - __builtin_return_address(0)); + __builtin_return_address(0)); } EXPORT_SYMBOL(__arm_ioremap_pfn); @@ -371,13 +371,26 @@ void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *) = __arm_ioremap_caller; -void __iomem * -__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) +void __iomem *ioremap(resource_size_t res_cookie, size_t size) { - return arch_ioremap_caller(phys_addr, size, mtype, - __builtin_return_address(0)); + return arch_ioremap_caller(res_cookie, size, MT_DEVICE, + __builtin_return_address(0)); } -EXPORT_SYMBOL(__arm_ioremap); +EXPORT_SYMBOL(ioremap); + +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); + +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_DEVICE_WC, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wc); /* * Remap an arbitrary physical address space into the kernel virtual @@ -431,11 +444,11 @@ void __iounmap(volatile void __iomem *io_addr) void (*arch_iounmap)(volatile void __iomem *) = __iounmap; -void __arm_iounmap(volatile void __iomem *io_addr) +void iounmap(volatile void __iomem *cookie) { - arch_iounmap(io_addr); + arch_iounmap(cookie); } -EXPORT_SYMBOL(__arm_iounmap); +EXPORT_SYMBOL(iounmap); #ifdef CONFIG_PCI static int pci_ioremap_mem_type = MT_DEVICE; diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index afd7e05d95f1..1dd10936d68d 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -351,30 +351,43 @@ void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, } EXPORT_SYMBOL(__arm_ioremap_pfn); -void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, - size_t size, unsigned int mtype, void *caller) -{ - return __arm_ioremap_pfn(pfn, offset, size, mtype); -} - -void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size, - unsigned int mtype) -{ - return (void __iomem *)phys_addr; -} -EXPORT_SYMBOL(__arm_ioremap); - -void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); - void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { - return __arm_ioremap(phys_addr, size, mtype); + return (void __iomem *)phys_addr; } +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); + +void __iomem *ioremap(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap); + +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); + +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_WC, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wc); + +void __iounmap(volatile void __iomem *addr) +{ +} +EXPORT_SYMBOL(__iounmap); + void (*arch_iounmap)(volatile void __iomem *); -void __arm_iounmap(volatile void __iomem *addr) +void iounmap(volatile void __iomem *addr) { } -EXPORT_SYMBOL(__arm_iounmap); +EXPORT_SYMBOL(iounmap); From 9ab79bb22cc77adcca5ebbadea6caec6a478f283 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Jul 2015 15:23:10 +0100 Subject: [PATCH 572/839] ARM: pgtable: document mapping types Signed-off-by: Russell King --- arch/arm/include/asm/pgtable-2level.h | 31 ++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index bfd662e49a25..aeddd28b3595 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -129,7 +129,36 @@ /* * These are the memory types, defined to be compatible with - * pre-ARMv6 CPUs cacheable and bufferable bits: XXCB + * pre-ARMv6 CPUs cacheable and bufferable bits: n/a,n/a,C,B + * ARMv6+ without TEX remapping, they are a table index. + * ARMv6+ with TEX remapping, they correspond to n/a,TEX(0),C,B + * + * MT type Pre-ARMv6 ARMv6+ type / cacheable status + * UNCACHED Uncached Strongly ordered + * BUFFERABLE Bufferable Normal memory / non-cacheable + * WRITETHROUGH Writethrough Normal memory / write through + * WRITEBACK Writeback Normal memory / write back, read alloc + * MINICACHE Minicache N/A + * WRITEALLOC Writeback Normal memory / write back, write alloc + * DEV_SHARED Uncached Device memory (shared) + * DEV_NONSHARED Uncached Device memory (non-shared) + * DEV_WC Bufferable Normal memory / non-cacheable + * DEV_CACHED Writeback Normal memory / write back, read alloc + * VECTORS Variable Normal memory / variable + * + * All normal memory mappings have the following properties: + * - reads can be repeated with no side effects + * - repeated reads return the last value written + * - reads can fetch additional locations without side effects + * - writes can be repeated (in certain cases) with no side effects + * - writes can be merged before accessing the target + * - unaligned accesses can be supported + * + * All device mappings have the following properties: + * - no access speculation + * - no repetition (eg, on return from an exception) + * - number, order and size of accesses are maintained + * - unaligned accesses are "unpredictable" */ #define L_PTE_MT_UNCACHED (_AT(pteval_t, 0x00) << 2) /* 0000 */ #define L_PTE_MT_BUFFERABLE (_AT(pteval_t, 0x01) << 2) /* 0001 */ From 2ecd9d29abb171d6e97a4f3eb29d7456a11401b7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 3 Jul 2015 18:53:58 +0200 Subject: [PATCH 573/839] sched, preempt_notifier: separate notifier registration from static_key inc/dec Commit 1cde2930e154 ("sched/preempt: Add static_key() to preempt_notifiers") had two problems. First, the preempt-notifier API needs to sleep with the addition of the static_key, we do however need to hold off preemption while modifying the preempt notifier list, otherwise a preemption could observe an inconsistent list state. KVM correctly registers and unregisters preempt notifiers with preemption disabled, so the sleep caused dmesg splats. Second, KVM registers and unregisters preemption notifiers very often (in vcpu_load/vcpu_put). With a single uniprocessor guest the static key would move between 0 and 1 continuously, hitting the slow path on every userspace exit. To fix this, wrap the static_key inc/dec in a new API, and call it from KVM. Fixes: 1cde2930e154 ("sched/preempt: Add static_key() to preempt_notifiers") Reported-by: Pontus Fuchs Reported-by: Takashi Iwai Tested-by: Takashi Iwai Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paolo Bonzini --- include/linux/preempt.h | 2 ++ kernel/sched/core.c | 17 +++++++++++++++-- virt/kvm/kvm_main.c | 3 +++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 0f1534acaf60..84991f185173 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -293,6 +293,8 @@ struct preempt_notifier { struct preempt_ops *ops; }; +void preempt_notifier_inc(void); +void preempt_notifier_dec(void); void preempt_notifier_register(struct preempt_notifier *notifier); void preempt_notifier_unregister(struct preempt_notifier *notifier); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b803e1b8ab0c..552710ab19e0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2320,13 +2320,27 @@ void wake_up_new_task(struct task_struct *p) static struct static_key preempt_notifier_key = STATIC_KEY_INIT_FALSE; +void preempt_notifier_inc(void) +{ + static_key_slow_inc(&preempt_notifier_key); +} +EXPORT_SYMBOL_GPL(preempt_notifier_inc); + +void preempt_notifier_dec(void) +{ + static_key_slow_dec(&preempt_notifier_key); +} +EXPORT_SYMBOL_GPL(preempt_notifier_dec); + /** * preempt_notifier_register - tell me when current is being preempted & rescheduled * @notifier: notifier struct to register */ void preempt_notifier_register(struct preempt_notifier *notifier) { - static_key_slow_inc(&preempt_notifier_key); + if (!static_key_false(&preempt_notifier_key)) + WARN(1, "registering preempt_notifier while notifiers disabled\n"); + hlist_add_head(¬ifier->link, ¤t->preempt_notifiers); } EXPORT_SYMBOL_GPL(preempt_notifier_register); @@ -2340,7 +2354,6 @@ EXPORT_SYMBOL_GPL(preempt_notifier_register); void preempt_notifier_unregister(struct preempt_notifier *notifier) { hlist_del(¬ifier->link); - static_key_slow_dec(&preempt_notifier_key); } EXPORT_SYMBOL_GPL(preempt_notifier_unregister); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 848af90b8091..8b8a44453670 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -553,6 +553,8 @@ static struct kvm *kvm_create_vm(unsigned long type) list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); + preempt_notifier_inc(); + return kvm; out_err: @@ -620,6 +622,7 @@ static void kvm_destroy_vm(struct kvm *kvm) cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); + preempt_notifier_dec(); hardware_disable_all(); mmdrop(mm); } From 42720138b06301cc8a7ee8a495a6d021c4b6a9bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 1 Jul 2015 15:31:49 +0200 Subject: [PATCH 574/839] KVM: x86: make vapics_in_nmi_mode atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Writes were a bit racy, but hard to turn into a bug at the same time. (Particularly because modern Linux doesn't use this feature anymore.) Signed-off-by: Radim Krčmář [Actually the next patch makes it much, much easier to trigger the race so I'm including this one for stable@ as well. - Paolo] Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/i8254.c | 2 +- arch/x86/kvm/lapic.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c7fa57b529d2..2a7f5d782c33 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -607,7 +607,7 @@ struct kvm_arch { struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; - int vapics_in_nmi_mode; + atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map *apic_map; diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 4dce6f8b6129..f90952f64e79 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -305,7 +305,7 @@ static void pit_do_work(struct kthread_work *work) * LVT0 to NMI delivery. Other PIC interrupts are just sent to * VCPU0, and only if its LVT0 is in EXTINT mode. */ - if (kvm->arch.vapics_in_nmi_mode > 0) + if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0) kvm_for_each_vcpu(i, vcpu, kvm) kvm_apic_nmi_wd_deliver(vcpu); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 36e9de1b4127..607a56b35327 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1263,10 +1263,10 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) if (!nmi_wd_enabled) { apic_debug("Receive NMI setting on APIC_LVT0 " "for cpu %d\n", apic->vcpu->vcpu_id); - apic->vcpu->kvm->arch.vapics_in_nmi_mode++; + atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); } } else if (nmi_wd_enabled) - apic->vcpu->kvm->arch.vapics_in_nmi_mode--; + atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); } static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) From db1385624c686fe99fe2d1b61a36e1537b915d08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 30 Jun 2015 22:19:17 +0200 Subject: [PATCH 575/839] KVM: x86: properly restore LVT0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Legacy NMI watchdog didn't work after migration/resume, because vapics_in_nmi_mode was left at 0. Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 607a56b35327..e0f721bfabcb 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1822,6 +1822,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, apic_update_ppr(apic); hrtimer_cancel(&apic->lapic_timer.timer); apic_update_lvtt(apic); + apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0)); update_divide_count(apic); start_apic_timer(apic); apic->irr_pending = true; From 59fd132340b3e37b83179d2fcb673980035edf62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 30 Jun 2015 22:19:16 +0200 Subject: [PATCH 576/839] KVM: x86: keep track of LVT0 changes under APICv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Memory-mapped LVT0 register already contains the new value when APICv traps so we can't directly detect a change. Memorize a bit we are interested in to enable legacy NMI watchdog. Suggested-by: Yoshida Nobuo Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 14 ++++++++------ arch/x86/kvm/lapic.h | 1 + 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e0f721bfabcb..954e98a8c2e3 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1257,16 +1257,17 @@ static void start_apic_timer(struct kvm_lapic *apic) static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) { - int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0)); + bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val); - if (apic_lvt_nmi_mode(lvt0_val)) { - if (!nmi_wd_enabled) { + if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) { + apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode; + if (lvt0_in_nmi_mode) { apic_debug("Receive NMI setting on APIC_LVT0 " "for cpu %d\n", apic->vcpu->vcpu_id); atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); - } - } else if (nmi_wd_enabled) - atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); + } else + atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); + } } static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) @@ -1597,6 +1598,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_LINT0_REENABLED)) apic_set_reg(apic, APIC_LVT0, SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); + apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0)); apic_set_reg(apic, APIC_DFR, 0xffffffffU); apic_set_spiv(apic, 0xff); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index f2f4e10ab772..71952748222a 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -26,6 +26,7 @@ struct kvm_lapic { struct kvm_vcpu *vcpu; bool sw_enabled; bool irr_pending; + bool lvt0_in_nmi_mode; /* Number of bits set in ISR. */ s16 isr_count; /* The highest vector set in ISR; if -1 - invalid, must scan ISR. */ From 431dae778aea4eed31bd12e5ee82edc571cd4d70 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 29 Jun 2015 16:44:01 +0200 Subject: [PATCH 577/839] KVM: s390: virtio-ccw: don't overwrite config space values Eric noticed problems with vhost-scsi and virtio-ccw: vhost-scsi complained about overwriting values in the config space, which was triggered by a broken implementation of virtio-ccw's config get/set routines. It was probably sheer luck that we did not hit this before. When writing a value to the config space, the WRITE_CONF ccw will always write from the beginning of the config space up to and including the value to be set. If the config space up to the value has not yet been retrieved from the device, however, we'll end up overwriting values. Keep track of the known config space and update if needed to avoid this. Moreover, READ_CONF will only read the number of bytes it has been instructed to retrieve, so we must not copy more than that to the buffer, or we might overwrite trailing values. Reported-by: Eric Farman Signed-off-by: Cornelia Huck Reviewed-by: Eric Farman Tested-by: Eric Farman Signed-off-by: Christian Borntraeger Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- drivers/s390/kvm/virtio_ccw.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index 6f1fa1773e76..f8d8fdb26b72 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -65,6 +65,7 @@ struct virtio_ccw_device { bool is_thinint; bool going_away; bool device_lost; + unsigned int config_ready; void *airq_info; }; @@ -833,8 +834,11 @@ static void virtio_ccw_get_config(struct virtio_device *vdev, if (ret) goto out_free; - memcpy(vcdev->config, config_area, sizeof(vcdev->config)); - memcpy(buf, &vcdev->config[offset], len); + memcpy(vcdev->config, config_area, offset + len); + if (buf) + memcpy(buf, &vcdev->config[offset], len); + if (vcdev->config_ready < offset + len) + vcdev->config_ready = offset + len; out_free: kfree(config_area); @@ -857,6 +861,9 @@ static void virtio_ccw_set_config(struct virtio_device *vdev, if (!config_area) goto out_free; + /* Make sure we don't overwrite fields. */ + if (vcdev->config_ready < offset) + virtio_ccw_get_config(vdev, 0, NULL, offset); memcpy(&vcdev->config[offset], buf, len); /* Write the config area to the host. */ memcpy(config_area, vcdev->config, sizeof(vcdev->config)); From b0996ae48285364710bce812e70ce67771ea6ef7 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Mon, 29 Jun 2015 18:39:23 +0800 Subject: [PATCH 578/839] KVM: x86: remove data variable from kvm_get_msr_common Commit 609e36d372ad ("KVM: x86: pass host_initiated to functions that read MSRs") modified kvm_get_msr_common function to use msr_info->data instead of data but missed one occurrence. Replace it and remove the unused local variable. Fixes: 609e36d372ad ("KVM: x86: pass host_initiated to functions that read MSRs") Signed-off-by: Nicolas Iooss Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ac165c2fb8e5..bbaf44e8f0d3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2379,8 +2379,6 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { - u64 data; - switch (msr_info->index) { case MSR_IA32_PLATFORM_ID: case MSR_IA32_EBL_CR_POWERON: @@ -2453,7 +2451,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* TSC increment by tick */ msr_info->data = 1000ULL; /* CPU multiplier */ - data |= (((uint64_t)4ULL) << 40); + msr_info->data |= (((uint64_t)4ULL) << 40); break; case MSR_EFER: msr_info->data = vcpu->arch.efer; From a88464a8b0ffb2f8dfb69d3ab982169578b50f22 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Thu, 2 Jul 2015 19:07:46 +0300 Subject: [PATCH 579/839] kvm: add hyper-v crash msrs values Added Hyper-V crash msrs values - HV_X64_MSR_CRASH*. Signed-off-by: Andrey Smetanin Signed-off-by: Denis V. Lunev Reviewed-by: Peter Hornyack CC: Paolo Bonzini CC: Gleb Natapov Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/hyperv.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index ce6068dbcfbc..8fba544e9cc4 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -199,6 +199,17 @@ #define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 #define HV_X64_MSR_STIMER3_COUNT 0x400000B7 +/* Hyper-V guest crash notification MSR's */ +#define HV_X64_MSR_CRASH_P0 0x40000100 +#define HV_X64_MSR_CRASH_P1 0x40000101 +#define HV_X64_MSR_CRASH_P2 0x40000102 +#define HV_X64_MSR_CRASH_P3 0x40000103 +#define HV_X64_MSR_CRASH_P4 0x40000104 +#define HV_X64_MSR_CRASH_CTL 0x40000105 +#define HV_X64_MSR_CRASH_CTL_NOTIFY (1ULL << 63) +#define HV_X64_MSR_CRASH_PARAMS \ + (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) + #define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ From 5a8011cb896ed9822ca9086cf9c4d4c5db16be64 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 3 Jul 2015 13:49:37 +1000 Subject: [PATCH 580/839] crypto: marvell/cesa - another fix up for of_get_named_gen_pool() rename Signed-off-by: Stephen Rothwell Signed-off-by: Linus Torvalds --- drivers/crypto/mv_cesa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index 5bcd575fa96f..e6b658faef63 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -1034,8 +1034,8 @@ static int mv_cesa_get_sram(struct platform_device *pdev, &sram_size); cp->sram_size = sram_size; - cp->sram_pool = of_get_named_gen_pool(pdev->dev.of_node, - "marvell,crypto-srams", 0); + cp->sram_pool = of_gen_pool_get(pdev->dev.of_node, + "marvell,crypto-srams", 0); if (cp->sram_pool) { cp->sram = gen_pool_dma_alloc(cp->sram_pool, sram_size, &cp->sram_dma); From 1bd46782d08b01b73df0085b51ea1021b19b44fd Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 3 Jul 2015 15:22:54 +0100 Subject: [PATCH 581/839] ARM: avoid unwanted GCC memset()/memcpy() optimisations for IO variants We don't want GCC optimising our memset_io(), memcpy_fromio() or memcpy_toio() variants, so we must not call one of the standard functions. Provide a separate name for our assembly memcpy() and memset() functions, and use that instead, thereby bypassing GCC's ability to optimise these operations. GCCs optimisation may introduce unaligned accesses which are invalid for device mappings. Signed-off-by: Russell King --- arch/arm/include/asm/io.h | 9 ++++++--- arch/arm/kernel/armksyms.c | 6 ++++++ arch/arm/lib/memcpy.S | 2 ++ arch/arm/lib/memset.S | 2 ++ 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index a5ed237c87d9..485982084fe9 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -316,21 +316,24 @@ extern void _memset_io(volatile void __iomem *, int, size_t); static inline void memset_io(volatile void __iomem *dst, unsigned c, size_t count) { - memset((void __force *)dst, c, count); + extern void mmioset(void *, unsigned int, size_t); + mmioset((void __force *)dst, c, count); } #define memset_io(dst,c,count) memset_io(dst,c,count) static inline void memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) { - memcpy(to, (const void __force *)from, count); + extern void mmiocpy(void *, const void *, size_t); + mmiocpy(to, (const void __force *)from, count); } #define memcpy_fromio(to,from,count) memcpy_fromio(to,from,count) static inline void memcpy_toio(volatile void __iomem *to, const void *from, size_t count) { - memcpy((void __force *)to, from, count); + extern void mmiocpy(void *, const void *, size_t); + mmiocpy((void __force *)to, from, count); } #define memcpy_toio(to,from,count) memcpy_toio(to,from,count) diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index a88671cfe1ff..5e5a51a99e68 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -50,6 +50,9 @@ extern void __aeabi_ulcmp(void); extern void fpundefinstr(void); +void mmioset(void *, unsigned int, size_t); +void mmiocpy(void *, const void *, size_t); + /* platform dependent support */ EXPORT_SYMBOL(arm_delay_ops); @@ -88,6 +91,9 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(__memzero); +EXPORT_SYMBOL(mmioset); +EXPORT_SYMBOL(mmiocpy); + #ifdef CONFIG_MMU EXPORT_SYMBOL(copy_page); diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index 7797e81e40e0..64111bd4440b 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -61,8 +61,10 @@ /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ +ENTRY(mmiocpy) ENTRY(memcpy) #include "copy_template.S" ENDPROC(memcpy) +ENDPROC(mmiocpy) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index a4ee97b5a2bf..3c65e3bd790f 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -16,6 +16,7 @@ .text .align 5 +ENTRY(mmioset) ENTRY(memset) UNWIND( .fnstart ) ands r3, r0, #3 @ 1 unaligned? @@ -133,3 +134,4 @@ UNWIND( .fnstart ) b 1b UNWIND( .fnend ) ENDPROC(memset) +ENDPROC(mmioset) From e24ff467e12e1560de753313976c46e84fa6306a Mon Sep 17 00:00:00 2001 From: Shixin Zeng Date: Fri, 3 Jul 2015 08:46:50 +0200 Subject: [PATCH 582/839] drm/crtc: Fix edid length computation The length of each EDID block is EDID_LENGTH, and number of blocks is (1 + edid->extensions) - we need to multiply not add them. This causes wrong EDID to be passed on, and is a regression introduced by d2ed34362a52 (drm: Introduce helper for replacing blob properties) Signed-off-by: Shixin Zeng Cc: Daniel Stone Cc: Maarten Lankhorst Reviewed-by: Daniel Stone [danvet: Add Cc: and fix commit summary.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index b69ed97d447c..b9ba06176eb1 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -4732,7 +4732,7 @@ int drm_mode_connector_update_edid_property(struct drm_connector *connector, return 0; if (edid) - size = EDID_LENGTH + (1 + edid->extensions); + size = EDID_LENGTH * (1 + edid->extensions); ret = drm_property_replace_global_blob(dev, &connector->edid_blob_ptr, From 9705acd63b125dee8b15c705216d7186daea4625 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 3 Jul 2015 21:13:55 -0400 Subject: [PATCH 583/839] ext4: fix reservation release on invalidatepage for delalloc fs On delalloc enabled file system on invalidatepage operation in ext4_da_page_release_reservation() we want to clear the delayed buffer and remove the extent covering the delayed buffer from the extent status tree. However currently there is a bug where on the systems with page size > block size we will always remove extents from the start of the page regardless where the actual delayed buffers are positioned in the page. This leads to the errors like this: EXT4-fs warning (device loop0): ext4_da_release_space:1225: ext4_da_release_space: ino 13, to_free 1 with only 0 reserved data blocks This however can cause data loss on writeback time if the file system is in ENOSPC condition because we're releasing reservation for someones else delayed buffer. Fix this by only removing extents that corresponds to the part of the page we want to invalidate. This problem is reproducible by the following fio receipt (however I was only able to reproduce it with fio-2.1 or older. [global] bs=8k iodepth=1024 iodepth_batch=60 randrepeat=1 size=1m directory=/mnt/test numjobs=20 [job1] ioengine=sync bs=1k direct=1 rw=randread filename=file1:file2 [job2] ioengine=libaio rw=randwrite direct=1 filename=file1:file2 [job3] bs=1k ioengine=posixaio rw=randwrite direct=1 filename=file1:file2 [job5] bs=1k ioengine=sync rw=randread filename=file1:file2 [job7] ioengine=libaio rw=randwrite filename=file1:file2 [job8] ioengine=posixaio rw=randwrite filename=file1:file2 [job10] ioengine=mmap rw=randwrite bs=1k filename=file1:file2 [job11] ioengine=mmap rw=randwrite direct=1 filename=file1:file2 Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@vger.kernel.org --- fs/ext4/inode.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4ad73d3c1003..2334e86d7447 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1330,7 +1330,7 @@ static void ext4_da_page_release_reservation(struct page *page, unsigned int offset, unsigned int length) { - int to_release = 0; + int to_release = 0, contiguous_blks = 0; struct buffer_head *head, *bh; unsigned int curr_off = 0; struct inode *inode = page->mapping->host; @@ -1351,14 +1351,23 @@ static void ext4_da_page_release_reservation(struct page *page, if ((offset <= curr_off) && (buffer_delay(bh))) { to_release++; + contiguous_blks++; clear_buffer_delay(bh); + } else if (contiguous_blks) { + lblk = page->index << + (PAGE_CACHE_SHIFT - inode->i_blkbits); + lblk += (curr_off >> inode->i_blkbits) - + contiguous_blks; + ext4_es_remove_extent(inode, lblk, contiguous_blks); + contiguous_blks = 0; } curr_off = next_off; } while ((bh = bh->b_this_page) != head); - if (to_release) { + if (contiguous_blks) { lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - ext4_es_remove_extent(inode, lblk, to_release); + lblk += (curr_off >> inode->i_blkbits) - contiguous_blks; + ext4_es_remove_extent(inode, lblk, contiguous_blks); } /* If we have released all the blocks belonging to a cluster, then we From d6f123a9297496ad0b6335fe881504c4b5b2a5e5 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Fri, 3 Jul 2015 23:56:50 -0400 Subject: [PATCH 584/839] ext4: be more strict when migrating to non-extent based file Currently the check in ext4_ind_migrate() is not enough before doing the real conversion: a) delayed allocated extents could bypass the check on eh->eh_entries and eh->eh_depth This can be demonstrated by this script xfs_io -fc "pwrite 0 4k" -c "pwrite 8k 4k" /mnt/ext4/testfile chattr -e /mnt/ext4/testfile where testfile has two extents but still be converted to non-extent based file format. b) only extent length is checked but not the offset, which would result in data lose (delalloc) or fs corruption (nodelalloc), because non-extent based file only supports at most (12 + 2^10 + 2^20 + 2^30) blocks This can be demostrated by xfs_io -fc "pwrite 5T 4k" /mnt/ext4/testfile chattr -e /mnt/ext4/testfile sync If delalloc is enabled, dmesg prints EXT4-fs warning (device dm-4): ext4_block_to_path:105: block 1342177280 > max in inode 53 EXT4-fs (dm-4): Delayed block allocation failed for inode 53 at logical offset 1342177280 with max blocks 1 with error 5 EXT4-fs (dm-4): This should not happen!! Data will be lost If delalloc is disabled, e2fsck -nf shows corruption Inode 53, i_size is 5497558142976, should be 4096. Fix? no Fix the two issues by a) forcing all delayed allocation blocks to be allocated before checking eh->eh_depth and eh->eh_entries b) limiting the last logical block of the extent is within direct map Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/migrate.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b52374e42102..6d8b0c917364 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -620,6 +620,7 @@ int ext4_ind_migrate(struct inode *inode) struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; + ext4_lblk_t end; ext4_fsblk_t blk; handle_t *handle; int ret; @@ -633,6 +634,14 @@ int ext4_ind_migrate(struct inode *inode) EXT4_FEATURE_RO_COMPAT_BIGALLOC)) return -EOPNOTSUPP; + /* + * In order to get correct extent info, force all delayed allocation + * blocks to be allocated, otherwise delayed allocation blocks may not + * be reflected and bypass the checks on extent header. + */ + if (test_opt(inode->i_sb, DELALLOC)) + ext4_alloc_da_blocks(inode); + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -654,7 +663,8 @@ int ext4_ind_migrate(struct inode *inode) else { len = le16_to_cpu(ex->ee_len); blk = ext4_ext_pblock(ex); - if (len > EXT4_NDIR_BLOCKS) { + end = le32_to_cpu(ex->ee_block) + len - 1; + if (end >= EXT4_NDIR_BLOCKS) { ret = -EOPNOTSUPP; goto errout; } From 8974fec7d72e3e02752fe0f27b4c3719c78d9a15 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Sat, 4 Jul 2015 00:03:44 -0400 Subject: [PATCH 585/839] ext4: correctly migrate a file with a hole at the beginning Currently ext4_ind_migrate() doesn't correctly handle a file which contains a hole at the beginning of the file. This caused the migration to be done incorrectly, and then if there is a subsequent following delayed allocation write to the "hole", this would reclaim the same data blocks again and results in fs corruption. # assmuing 4k block size ext4, with delalloc enabled # skip the first block and write to the second block xfs_io -fc "pwrite 4k 4k" -c "fsync" /mnt/ext4/testfile # converting to indirect-mapped file, which would move the data blocks # to the beginning of the file, but extent status cache still marks # that region as a hole chattr -e /mnt/ext4/testfile # delayed allocation writes to the "hole", reclaim the same data block # again, results in i_blocks corruption xfs_io -c "pwrite 0 4k" /mnt/ext4/testfile umount /mnt/ext4 e2fsck -nf /dev/sda6 ... Inode 53, i_blocks is 16, should be 8. Fix? no ... Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/migrate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 6d8b0c917364..6163ad21cb0e 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -620,7 +620,7 @@ int ext4_ind_migrate(struct inode *inode) struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; - ext4_lblk_t end; + ext4_lblk_t start, end; ext4_fsblk_t blk; handle_t *handle; int ret; @@ -659,11 +659,12 @@ int ext4_ind_migrate(struct inode *inode) goto errout; } if (eh->eh_entries == 0) - blk = len = 0; + blk = len = start = end = 0; else { len = le16_to_cpu(ex->ee_len); blk = ext4_ext_pblock(ex); - end = le32_to_cpu(ex->ee_block) + len - 1; + start = le32_to_cpu(ex->ee_block); + end = start + len - 1; if (end >= EXT4_NDIR_BLOCKS) { ret = -EOPNOTSUPP; goto errout; @@ -672,7 +673,7 @@ int ext4_ind_migrate(struct inode *inode) ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); memset(ei->i_data, 0, sizeof(ei->i_data)); - for (i=0; i < len; i++) + for (i = start; i <= end; i++) ei->i_data[i] = cpu_to_le32(blk++); ext4_mark_inode_dirty(handle, inode); errout: From f6db8347993256b58bd4746b0c4c5b935c32210d Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 25 Jun 2015 23:53:37 +0530 Subject: [PATCH 586/839] sched/stat: Simplify the sched_info accounting dependency Both CONFIG_SCHEDSTATS=y and CONFIG_TASK_DELAY_ACCT=y track task sched_info, which results in ugly #if clauses. Simplify the code by introducing a synthethic CONFIG_SCHED_INFO switch, selected by both. Signed-off-by: Naveen N. Rao Cc: Balbir Singh Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: a.p.zijlstra@chello.nl Cc: ricklind@us.ibm.com Link: http://lkml.kernel.org/r/8d19eef800811a94b0f91bcbeb27430a884d7433.1435255405.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 +++--- init/Kconfig | 1 + kernel/sched/core.c | 2 +- kernel/sched/stats.h | 4 ++-- lib/Kconfig.debug | 5 +++++ 5 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 6633e83e608a..9bf4bc0e3b8b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -849,7 +849,7 @@ extern struct user_struct root_user; struct backing_dev_info; struct reclaim_state; -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) +#ifdef CONFIG_SCHED_INFO struct sched_info { /* cumulative counters */ unsigned long pcount; /* # of times run on this cpu */ @@ -859,7 +859,7 @@ struct sched_info { unsigned long long last_arrival,/* when we last ran on a cpu */ last_queued; /* when we were last queued to run */ }; -#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ +#endif /* CONFIG_SCHED_INFO */ #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info { @@ -1408,7 +1408,7 @@ struct task_struct { int rcu_tasks_idle_cpu; #endif /* #ifdef CONFIG_TASKS_RCU */ -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) +#ifdef CONFIG_SCHED_INFO struct sched_info sched_info; #endif diff --git a/init/Kconfig b/init/Kconfig index b999fa381bf9..12cb556ad160 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -435,6 +435,7 @@ config TASKSTATS config TASK_DELAY_ACCT bool "Enable per-task delay accounting" depends on TASKSTATS + select SCHED_INFO help Collect information on time spent by a task waiting for system resources like cpu, synchronous block I/O completion and swapping diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c86935a7f1f8..abb8785ed1ba 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1975,7 +1975,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) set_task_cpu(p, cpu); raw_spin_unlock_irqrestore(&p->pi_lock, flags); -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) +#ifdef CONFIG_SCHED_INFO if (likely(sched_info_on())) memset(&p->sched_info, 0, sizeof(p->sched_info)); #endif diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 077ebbd5e10f..b0fbc7632de5 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -47,7 +47,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) # define schedstat_set(var, val) do { } while (0) #endif -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) +#ifdef CONFIG_SCHED_INFO static inline void sched_info_reset_dequeued(struct task_struct *t) { t->sched_info.last_queued = 0; @@ -156,7 +156,7 @@ sched_info_switch(struct rq *rq, #define sched_info_depart(rq, t) do { } while (0) #define sched_info_arrive(rq, next) do { } while (0) #define sched_info_switch(rq, t, next) do { } while (0) -#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ +#endif /* CONFIG_SCHED_INFO */ /* * The following are functions that support scheduler-internal time accounting. diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b908048f8d6a..e2894b23efb6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -841,9 +841,14 @@ config SCHED_DEBUG that can help debug the scheduler. The runtime overhead of this option is minimal. +config SCHED_INFO + bool + default n + config SCHEDSTATS bool "Collect scheduler statistics" depends on DEBUG_KERNEL && PROC_FS + select SCHED_INFO help If you say Y here, additional code will be inserted into the scheduler and related routines to collect statistics about From 5968cecedd7a09f23e9fcb5f9fb4e893712f35ba Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 30 Jun 2015 14:36:03 +0530 Subject: [PATCH 587/839] sched/stat: Expose /proc/pid/schedstat if CONFIG_SCHED_INFO=y Expand /proc/pid/schedstat output: - enable it on CONFIG_TASK_DELAY_ACCT=y && !CONFIG_SCHEDSTATS kernels. - dump all zeroes on kernels that are booted with the 'nodelayacct' option, which boot option disables delay accounting on CONFIG_TASK_DELAY_ACCT=y kernels. Signed-off-by: Naveen N. Rao Cc: Balbir Singh Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: a.p.zijlstra@chello.nl Cc: ricklind@us.ibm.com Link: http://lkml.kernel.org/r/5ccbef17d4bc841084ea6e6421d4e4a23b7b806f.1435654789.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- fs/proc/base.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 286a422f440e..ad63fa3306df 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -304,14 +304,17 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, } #endif -#ifdef CONFIG_SCHEDSTATS +#ifdef CONFIG_SCHED_INFO /* * Provides /proc/PID/schedstat */ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - seq_printf(m, "%llu %llu %lu\n", + if (unlikely(!sched_info_on())) + seq_printf(m, "0 0 0\n"); + else + seq_printf(m, "%llu %llu %lu\n", (unsigned long long)task->se.sum_exec_runtime, (unsigned long long)task->sched_info.run_delay, task->sched_info.pcount); @@ -2600,7 +2603,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_STACKTRACE ONE("stack", S_IRUSR, proc_pid_stack), #endif -#ifdef CONFIG_SCHEDSTATS +#ifdef CONFIG_SCHED_INFO ONE("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP @@ -2948,7 +2951,7 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_STACKTRACE ONE("stack", S_IRUSR, proc_pid_stack), #endif -#ifdef CONFIG_SCHEDSTATS +#ifdef CONFIG_SCHED_INFO ONE("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP From 6b55c9654fccf69ae7ace23ca101dc37b903181b Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 25 Jun 2015 22:51:41 +0530 Subject: [PATCH 588/839] sched/debug: Move print_cfs_rq() declaration to kernel/sched/sched.h Currently print_cfs_rq() is declared in include/linux/sched.h. However it's not used outside kernel/sched. Hence move the declaration to kernel/sched/sched.h Also some functions are only available for CONFIG_SCHED_DEBUG=y. Hence move the declarations to within the #ifdef. Signed-off-by: Srikar Dronamraju Acked-by: Rik van Riel Cc: Iulia Manda Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435252903-1081-2-git-send-email-srikar@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- kernel/sched/sched.h | 5 +++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 9bf4bc0e3b8b..3505352dd296 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -191,8 +191,6 @@ struct task_group; #ifdef CONFIG_SCHED_DEBUG extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); -extern void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); #endif /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index aea7c1f393cb..7d5895258fe3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1668,9 +1668,14 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq); extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq); + +#ifdef CONFIG_SCHED_DEBUG extern void print_cfs_stats(struct seq_file *m, int cpu); extern void print_rt_stats(struct seq_file *m, int cpu); extern void print_dl_stats(struct seq_file *m, int cpu); +extern void +print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); +#endif extern void init_cfs_rq(struct cfs_rq *cfs_rq); extern void init_rt_rq(struct rt_rq *rt_rq); From e3d24d0a6048a826de5562d75dedb664d3a2a1b2 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 25 Jun 2015 22:51:42 +0530 Subject: [PATCH 589/839] sched/numa: Show numa_group ID in /proc/sched_debug task listings Having the numa group ID in /proc/sched_debug helps to see how the numa groups have spread across the system. Signed-off-by: Srikar Dronamraju Acked-by: Rik van Riel Cc: Iulia Manda Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435252903-1081-3-git-send-email-srikar@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/sched/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 315c68e015d9..f1dcd1d390c1 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -142,7 +142,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 0LL, 0L); #endif #ifdef CONFIG_NUMA_BALANCING - SEQ_printf(m, " %d", task_node(p)); + SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); #endif #ifdef CONFIG_CGROUP_SCHED SEQ_printf(m, " %s", task_group_path(task_group(p))); From 397f2378f136128623fc237746157aa2564d1082 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 25 Jun 2015 22:51:43 +0530 Subject: [PATCH 590/839] sched/numa: Fix numa balancing stats in /proc/pid/sched Commit 44dba3d5d6a1 ("sched: Refactor task_struct to use numa_faults instead of numa_* pointers") modified the way tsk->numa_faults stats are accounted. However that commit never touched show_numa_stats() that is displayed in /proc/pid/sched and thus the numbers displayed in /proc/pid/sched don't match the actual numbers. Fix it by making sure that /proc/pid/sched reflects the task fault numbers. Also add group fault stats too. Also couple of more modifications are added here: 1. Format changes: - Previously we would list two entries per node, one for private and one for shared. Also the home node info was listed in each entry. - Now preferred node, total_faults and current node are displayed separately. - Now there is one entry per node, that lists private,shared task and group faults. 2. Unit changes: - p->numa_pages_migrated was getting reset after every read of /proc/pid/sched. It's more useful to have absolute numbers since differential migrations between two accesses can be more easily calculated. Signed-off-by: Srikar Dronamraju Acked-by: Rik van Riel Cc: Iulia Manda Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435252903-1081-4-git-send-email-srikar@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/sched/debug.c | 38 +++++++++++++++++--------------------- kernel/sched/fair.c | 22 +++++++++++++++++++++- kernel/sched/sched.h | 10 +++++++++- 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index f1dcd1d390c1..4222ec50ab88 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -517,11 +517,21 @@ __initcall(init_sched_debug_procfs); SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F)) +#ifdef CONFIG_NUMA_BALANCING +void print_numa_stats(struct seq_file *m, int node, unsigned long tsf, + unsigned long tpf, unsigned long gsf, unsigned long gpf) +{ + SEQ_printf(m, "numa_faults node=%d ", node); + SEQ_printf(m, "task_private=%lu task_shared=%lu ", tsf, tpf); + SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gsf, gpf); +} +#endif + + static void sched_show_numa(struct task_struct *p, struct seq_file *m) { #ifdef CONFIG_NUMA_BALANCING struct mempolicy *pol; - int node, i; if (p->mm) P(mm->numa_scan_seq); @@ -533,26 +543,12 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m) mpol_get(pol); task_unlock(p); - SEQ_printf(m, "numa_migrations, %ld\n", xchg(&p->numa_pages_migrated, 0)); - - for_each_online_node(node) { - for (i = 0; i < 2; i++) { - unsigned long nr_faults = -1; - int cpu_current, home_node; - - if (p->numa_faults) - nr_faults = p->numa_faults[2*node + i]; - - cpu_current = !i ? (task_node(p) == node) : - (pol && node_isset(node, pol->v.nodes)); - - home_node = (p->numa_preferred_nid == node); - - SEQ_printf(m, "numa_faults_memory, %d, %d, %d, %d, %ld\n", - i, node, cpu_current, home_node, nr_faults); - } - } - + P(numa_pages_migrated); + P(numa_preferred_nid); + P(total_numa_faults); + SEQ_printf(m, "current_node=%d, numa_group_id=%d\n", + task_node(p), task_numa_group_id(p)); + show_numa_stats(p, m); mpol_put(pol); #endif } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 40a7fcbf491e..7245039fc67b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8468,7 +8468,27 @@ void print_cfs_stats(struct seq_file *m, int cpu) print_cfs_rq(m, cpu, cfs_rq); rcu_read_unlock(); } -#endif + +#ifdef CONFIG_NUMA_BALANCING +void show_numa_stats(struct task_struct *p, struct seq_file *m) +{ + int node; + unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0; + + for_each_online_node(node) { + if (p->numa_faults) { + tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)]; + tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)]; + } + if (p->numa_group) { + gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)], + gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)]; + } + print_numa_stats(m, node, tsf, tpf, gsf, gpf); + } +} +#endif /* CONFIG_NUMA_BALANCING */ +#endif /* CONFIG_SCHED_DEBUG */ __init void init_sched_fair_class(void) { diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7d5895258fe3..7ef596837dac 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1675,7 +1675,15 @@ extern void print_rt_stats(struct seq_file *m, int cpu); extern void print_dl_stats(struct seq_file *m, int cpu); extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); -#endif + +#ifdef CONFIG_NUMA_BALANCING +extern void +show_numa_stats(struct task_struct *p, struct seq_file *m); +extern void +print_numa_stats(struct seq_file *m, int node, unsigned long tsf, + unsigned long tpf, unsigned long gsf, unsigned long gpf); +#endif /* CONFIG_NUMA_BALANCING */ +#endif /* CONFIG_SCHED_DEBUG */ extern void init_cfs_rq(struct cfs_rq *cfs_rq); extern void init_rt_rq(struct rt_rq *rt_rq); From b96fecbfa8c88b057e2bbf10021521c232bb3650 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 4 Jul 2015 09:58:19 +0200 Subject: [PATCH 591/839] x86/fpu: Fix boot crash in the early FPU code Jan Kara and Thomas Gleixner reported boot crashes in the FPU code: general protection fault: 0000 [#1] SMP RIP: 0010:[] [] mxcsr_feature_mask_init+0x1c/0x40 2b:* 0f ae 85 00 fe ff ff fxsave -0x200(%rbp) and bisected it down to the following FPU commit: 91a8c2a5b43f ("x86/fpu: Clean up and fix MXCSR handling") The reason is that the on-stack FPU registers state variable, used by the FXSAVE instruction, did not have the required minimum alignment of 16 bytes, causing the general protection fault. This is most likely a GCC bug in older GCC versions, but the offending commit also added a bogus extra 32-byte alignment (which GCC ignored too). So fix this bug by making the variable static again, but also mark it __initdata this time, because fpu__init_system_mxcsr() is now an __init function. Reported-and-bisected-by: Jan Kara Reported-bisected-and-tested-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Jan Kara Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150704075819.GA9201@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/init.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index fc878fee6a51..32826791e675 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -95,11 +95,12 @@ static void __init fpu__init_system_mxcsr(void) unsigned int mask = 0; if (cpu_has_fxsr) { - struct fxregs_state fx_tmp __aligned(32) = { }; + /* Static because GCC does not get 16-byte stack alignment right: */ + static struct fxregs_state fxregs __initdata; - asm volatile("fxsave %0" : "+m" (fx_tmp)); + asm volatile("fxsave %0" : "+m" (fxregs)); - mask = fx_tmp.mxcsr_mask; + mask = fxregs.mxcsr_mask; /* * If zero then use the default features mask, From a1bd3baeb2f18b2b3d0f98ce5fdaa725149b950b Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Thu, 9 Apr 2015 10:33:20 -0400 Subject: [PATCH 592/839] NTB: Add NTB hardware abstraction layer Abstract the NTB device behind a programming interface, so that it can support different hardware and client drivers. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- Documentation/ntb.txt | 32 ++ MAINTAINERS | 4 +- drivers/ntb/Makefile | 1 + drivers/ntb/ntb.c | 251 +++++++++++ include/linux/ntb.h | 984 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 1271 insertions(+), 1 deletion(-) create mode 100644 Documentation/ntb.txt create mode 100644 drivers/ntb/ntb.c create mode 100644 include/linux/ntb.h diff --git a/Documentation/ntb.txt b/Documentation/ntb.txt new file mode 100644 index 000000000000..9d46dc9712a8 --- /dev/null +++ b/Documentation/ntb.txt @@ -0,0 +1,32 @@ +# NTB Drivers + +NTB (Non-Transparent Bridge) is a type of PCI-Express bridge chip that connects +the separate memory systems of two computers to the same PCI-Express fabric. +Existing NTB hardware supports a common feature set, including scratchpad +registers, doorbell registers, and memory translation windows. Scratchpad +registers are read-and-writable registers that are accessible from either side +of the device, so that peers can exchange a small amount of information at a +fixed address. Doorbell registers provide a way for peers to send interrupt +events. Memory windows allow translated read and write access to the peer +memory. + +## NTB Core Driver (ntb) + +The NTB core driver defines an api wrapping the common feature set, and allows +clients interested in NTB features to discover NTB the devices supported by +hardware drivers. The term "client" is used here to mean an upper layer +component making use of the NTB api. The term "driver," or "hardware driver," +is used here to mean a driver for a specific vendor and model of NTB hardware. + +## NTB Client Drivers + +NTB client drivers should register with the NTB core driver. After +registering, the client probe and remove functions will be called appropriately +as ntb hardware, or hardware drivers, are inserted and removed. The +registration uses the Linux Device framework, so it should feel familiar to +anyone who has written a pci driver. + +## NTB Hardware Drivers + +NTB hardware drivers should register devices with the NTB core driver. After +registering, clients probe and remove functions will be called. diff --git a/MAINTAINERS b/MAINTAINERS index 663bc8ed1860..e2fc9eec2e16 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6995,15 +6995,17 @@ F: drivers/power/bq27x00_battery.c F: drivers/power/isp1704_charger.c F: drivers/power/rx51_battery.c -NTB DRIVER +NTB DRIVER CORE M: Jon Mason M: Dave Jiang +M: Allen Hubbe S: Supported W: https://github.com/jonmason/ntb/wiki T: git git://github.com/jonmason/ntb.git F: drivers/ntb/ F: drivers/net/ntb_netdev.c F: include/linux/ntb.h +F: include/linux/ntb_transport.h NTFS FILESYSTEM M: Anton Altaparmakov diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile index 545b10a131af..712e953a8fda 100644 --- a/drivers/ntb/Makefile +++ b/drivers/ntb/Makefile @@ -1,3 +1,4 @@ +obj-$(CONFIG_NTB) += ntb.o obj-$(CONFIG_NTB) += ntb_hw_intel.o ntb_hw_intel-objs := hw/intel/ntb_hw_intel.o ntb_transport.o diff --git a/drivers/ntb/ntb.c b/drivers/ntb/ntb.c new file mode 100644 index 000000000000..23435f2a5486 --- /dev/null +++ b/drivers/ntb/ntb.c @@ -0,0 +1,251 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * PCIe NTB Linux driver + * + * Contact Information: + * Allen Hubbe + */ + +#include +#include +#include + +#include +#include + +#define DRIVER_NAME "ntb" +#define DRIVER_DESCRIPTION "PCIe NTB Driver Framework" + +#define DRIVER_LICENSE "Dual BSD/GPL" +#define DRIVER_VERSION "1.0" +#define DRIVER_RELDATE "24 March 2015" +#define DRIVER_AUTHOR "Allen Hubbe " + +MODULE_LICENSE(DRIVER_LICENSE); +MODULE_VERSION(DRIVER_VERSION); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESCRIPTION); + +static struct bus_type ntb_bus; +static void ntb_dev_release(struct device *dev); + +int __ntb_register_client(struct ntb_client *client, struct module *mod, + const char *mod_name) +{ + if (!client) + return -EINVAL; + if (!ntb_client_ops_is_valid(&client->ops)) + return -EINVAL; + + memset(&client->drv, 0, sizeof(client->drv)); + client->drv.bus = &ntb_bus; + client->drv.name = mod_name; + client->drv.owner = mod; + + return driver_register(&client->drv); +} +EXPORT_SYMBOL(__ntb_register_client); + +void ntb_unregister_client(struct ntb_client *client) +{ + driver_unregister(&client->drv); +} +EXPORT_SYMBOL(ntb_unregister_client); + +int ntb_register_device(struct ntb_dev *ntb) +{ + if (!ntb) + return -EINVAL; + if (!ntb->pdev) + return -EINVAL; + if (!ntb->ops) + return -EINVAL; + if (!ntb_dev_ops_is_valid(ntb->ops)) + return -EINVAL; + + init_completion(&ntb->released); + + memset(&ntb->dev, 0, sizeof(ntb->dev)); + ntb->dev.bus = &ntb_bus; + ntb->dev.parent = &ntb->pdev->dev; + ntb->dev.release = ntb_dev_release; + dev_set_name(&ntb->dev, pci_name(ntb->pdev)); + + ntb->ctx = NULL; + ntb->ctx_ops = NULL; + spin_lock_init(&ntb->ctx_lock); + + return device_register(&ntb->dev); +} +EXPORT_SYMBOL(ntb_register_device); + +void ntb_unregister_device(struct ntb_dev *ntb) +{ + device_unregister(&ntb->dev); + wait_for_completion(&ntb->released); +} +EXPORT_SYMBOL(ntb_unregister_device); + +int ntb_set_ctx(struct ntb_dev *ntb, void *ctx, + const struct ntb_ctx_ops *ctx_ops) +{ + unsigned long irqflags; + + if (!ntb_ctx_ops_is_valid(ctx_ops)) + return -EINVAL; + if (ntb->ctx_ops) + return -EINVAL; + + spin_lock_irqsave(&ntb->ctx_lock, irqflags); + { + ntb->ctx = ctx; + ntb->ctx_ops = ctx_ops; + } + spin_unlock_irqrestore(&ntb->ctx_lock, irqflags); + + return 0; +} +EXPORT_SYMBOL(ntb_set_ctx); + +void ntb_clear_ctx(struct ntb_dev *ntb) +{ + unsigned long irqflags; + + spin_lock_irqsave(&ntb->ctx_lock, irqflags); + { + ntb->ctx_ops = NULL; + ntb->ctx = NULL; + } + spin_unlock_irqrestore(&ntb->ctx_lock, irqflags); +} +EXPORT_SYMBOL(ntb_clear_ctx); + +void ntb_link_event(struct ntb_dev *ntb) +{ + unsigned long irqflags; + + spin_lock_irqsave(&ntb->ctx_lock, irqflags); + { + if (ntb->ctx_ops && ntb->ctx_ops->link_event) + ntb->ctx_ops->link_event(ntb->ctx); + } + spin_unlock_irqrestore(&ntb->ctx_lock, irqflags); +} +EXPORT_SYMBOL(ntb_link_event); + +void ntb_db_event(struct ntb_dev *ntb, int vector) +{ + unsigned long irqflags; + + spin_lock_irqsave(&ntb->ctx_lock, irqflags); + { + if (ntb->ctx_ops && ntb->ctx_ops->db_event) + ntb->ctx_ops->db_event(ntb->ctx, vector); + } + spin_unlock_irqrestore(&ntb->ctx_lock, irqflags); +} +EXPORT_SYMBOL(ntb_db_event); + +static int ntb_probe(struct device *dev) +{ + struct ntb_dev *ntb; + struct ntb_client *client; + int rc; + + get_device(dev); + ntb = dev_ntb(dev); + client = drv_ntb_client(dev->driver); + + rc = client->ops.probe(client, ntb); + if (rc) + put_device(dev); + + return rc; +} + +static int ntb_remove(struct device *dev) +{ + struct ntb_dev *ntb; + struct ntb_client *client; + + if (dev->driver) { + ntb = dev_ntb(dev); + client = drv_ntb_client(dev->driver); + + client->ops.remove(client, ntb); + put_device(dev); + } + + return 0; +} + +static void ntb_dev_release(struct device *dev) +{ + struct ntb_dev *ntb = dev_ntb(dev); + + complete(&ntb->released); +} + +static struct bus_type ntb_bus = { + .name = "ntb", + .probe = ntb_probe, + .remove = ntb_remove, +}; + +static int __init ntb_driver_init(void) +{ + return bus_register(&ntb_bus); +} +module_init(ntb_driver_init); + +static void __exit ntb_driver_exit(void) +{ + bus_unregister(&ntb_bus); +} +module_exit(ntb_driver_exit); + diff --git a/include/linux/ntb.h b/include/linux/ntb.h new file mode 100644 index 000000000000..b02f72bb8e32 --- /dev/null +++ b/include/linux/ntb.h @@ -0,0 +1,984 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * PCIe NTB Linux driver + * + * Contact Information: + * Allen Hubbe + */ + +#ifndef _NTB_H_ +#define _NTB_H_ + +#include +#include + +struct ntb_client; +struct ntb_dev; +struct pci_dev; + +/** + * enum ntb_topo - NTB connection topology + * @NTB_TOPO_NONE: Topology is unknown or invalid. + * @NTB_TOPO_PRI: On primary side of local ntb. + * @NTB_TOPO_SEC: On secondary side of remote ntb. + * @NTB_TOPO_B2B_USD: On primary side of local ntb upstream of remote ntb. + * @NTB_TOPO_B2B_DSD: On primary side of local ntb downstream of remote ntb. + */ +enum ntb_topo { + NTB_TOPO_NONE = -1, + NTB_TOPO_PRI, + NTB_TOPO_SEC, + NTB_TOPO_B2B_USD, + NTB_TOPO_B2B_DSD, +}; + +static inline int ntb_topo_is_b2b(enum ntb_topo topo) +{ + switch ((int)topo) { + case NTB_TOPO_B2B_USD: + case NTB_TOPO_B2B_DSD: + return 1; + } + return 0; +} + +static inline char *ntb_topo_string(enum ntb_topo topo) +{ + switch (topo) { + case NTB_TOPO_NONE: return "NTB_TOPO_NONE"; + case NTB_TOPO_PRI: return "NTB_TOPO_PRI"; + case NTB_TOPO_SEC: return "NTB_TOPO_SEC"; + case NTB_TOPO_B2B_USD: return "NTB_TOPO_B2B_USD"; + case NTB_TOPO_B2B_DSD: return "NTB_TOPO_B2B_DSD"; + } + return "NTB_TOPO_INVALID"; +} + +/** + * enum ntb_speed - NTB link training speed + * @NTB_SPEED_AUTO: Request the max supported speed. + * @NTB_SPEED_NONE: Link is not trained to any speed. + * @NTB_SPEED_GEN1: Link is trained to gen1 speed. + * @NTB_SPEED_GEN2: Link is trained to gen2 speed. + * @NTB_SPEED_GEN3: Link is trained to gen3 speed. + */ +enum ntb_speed { + NTB_SPEED_AUTO = -1, + NTB_SPEED_NONE = 0, + NTB_SPEED_GEN1 = 1, + NTB_SPEED_GEN2 = 2, + NTB_SPEED_GEN3 = 3, +}; + +/** + * enum ntb_width - NTB link training width + * @NTB_WIDTH_AUTO: Request the max supported width. + * @NTB_WIDTH_NONE: Link is not trained to any width. + * @NTB_WIDTH_1: Link is trained to 1 lane width. + * @NTB_WIDTH_2: Link is trained to 2 lane width. + * @NTB_WIDTH_4: Link is trained to 4 lane width. + * @NTB_WIDTH_8: Link is trained to 8 lane width. + * @NTB_WIDTH_12: Link is trained to 12 lane width. + * @NTB_WIDTH_16: Link is trained to 16 lane width. + * @NTB_WIDTH_32: Link is trained to 32 lane width. + */ +enum ntb_width { + NTB_WIDTH_AUTO = -1, + NTB_WIDTH_NONE = 0, + NTB_WIDTH_1 = 1, + NTB_WIDTH_2 = 2, + NTB_WIDTH_4 = 4, + NTB_WIDTH_8 = 8, + NTB_WIDTH_12 = 12, + NTB_WIDTH_16 = 16, + NTB_WIDTH_32 = 32, +}; + +/** + * struct ntb_client_ops - ntb client operations + * @probe: Notify client of a new device. + * @remove: Notify client to remove a device. + */ +struct ntb_client_ops { + int (*probe)(struct ntb_client *client, struct ntb_dev *ntb); + void (*remove)(struct ntb_client *client, struct ntb_dev *ntb); +}; + +static inline int ntb_client_ops_is_valid(const struct ntb_client_ops *ops) +{ + /* commented callbacks are not required: */ + return + ops->probe && + ops->remove && + 1; +} + +/** + * struct ntb_ctx_ops - ntb driver context operations + * @link_event: See ntb_link_event(). + * @db_event: See ntb_db_event(). + */ +struct ntb_ctx_ops { + void (*link_event)(void *ctx); + void (*db_event)(void *ctx, int db_vector); +}; + +static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) +{ + /* commented callbacks are not required: */ + return + /* ops->link_event && */ + /* ops->db_event && */ + 1; +} + +/** + * struct ntb_ctx_ops - ntb device operations + * @mw_count: See ntb_mw_count(). + * @mw_get_range: See ntb_mw_get_range(). + * @mw_set_trans: See ntb_mw_set_trans(). + * @mw_clear_trans: See ntb_mw_clear_trans(). + * @link_is_up: See ntb_link_is_up(). + * @link_enable: See ntb_link_enable(). + * @link_disable: See ntb_link_disable(). + * @db_is_unsafe: See ntb_db_is_unsafe(). + * @db_valid_mask: See ntb_db_valid_mask(). + * @db_vector_count: See ntb_db_vector_count(). + * @db_vector_mask: See ntb_db_vector_mask(). + * @db_read: See ntb_db_read(). + * @db_set: See ntb_db_set(). + * @db_clear: See ntb_db_clear(). + * @db_read_mask: See ntb_db_read_mask(). + * @db_set_mask: See ntb_db_set_mask(). + * @db_clear_mask: See ntb_db_clear_mask(). + * @peer_db_addr: See ntb_peer_db_addr(). + * @peer_db_read: See ntb_peer_db_read(). + * @peer_db_set: See ntb_peer_db_set(). + * @peer_db_clear: See ntb_peer_db_clear(). + * @peer_db_read_mask: See ntb_peer_db_read_mask(). + * @peer_db_set_mask: See ntb_peer_db_set_mask(). + * @peer_db_clear_mask: See ntb_peer_db_clear_mask(). + * @spad_is_unsafe: See ntb_spad_is_unsafe(). + * @spad_count: See ntb_spad_count(). + * @spad_read: See ntb_spad_read(). + * @spad_write: See ntb_spad_write(). + * @peer_spad_addr: See ntb_peer_spad_addr(). + * @peer_spad_read: See ntb_peer_spad_read(). + * @peer_spad_write: See ntb_peer_spad_write(). + */ +struct ntb_dev_ops { + int (*mw_count)(struct ntb_dev *ntb); + int (*mw_get_range)(struct ntb_dev *ntb, int idx, + phys_addr_t *base, resource_size_t *size, + resource_size_t *align, resource_size_t *align_size); + int (*mw_set_trans)(struct ntb_dev *ntb, int idx, + dma_addr_t addr, resource_size_t size); + int (*mw_clear_trans)(struct ntb_dev *ntb, int idx); + + int (*link_is_up)(struct ntb_dev *ntb, + enum ntb_speed *speed, enum ntb_width *width); + int (*link_enable)(struct ntb_dev *ntb, + enum ntb_speed max_speed, enum ntb_width max_width); + int (*link_disable)(struct ntb_dev *ntb); + + int (*db_is_unsafe)(struct ntb_dev *ntb); + u64 (*db_valid_mask)(struct ntb_dev *ntb); + int (*db_vector_count)(struct ntb_dev *ntb); + u64 (*db_vector_mask)(struct ntb_dev *ntb, int db_vector); + + u64 (*db_read)(struct ntb_dev *ntb); + int (*db_set)(struct ntb_dev *ntb, u64 db_bits); + int (*db_clear)(struct ntb_dev *ntb, u64 db_bits); + + u64 (*db_read_mask)(struct ntb_dev *ntb); + int (*db_set_mask)(struct ntb_dev *ntb, u64 db_bits); + int (*db_clear_mask)(struct ntb_dev *ntb, u64 db_bits); + + int (*peer_db_addr)(struct ntb_dev *ntb, + phys_addr_t *db_addr, resource_size_t *db_size); + u64 (*peer_db_read)(struct ntb_dev *ntb); + int (*peer_db_set)(struct ntb_dev *ntb, u64 db_bits); + int (*peer_db_clear)(struct ntb_dev *ntb, u64 db_bits); + + u64 (*peer_db_read_mask)(struct ntb_dev *ntb); + int (*peer_db_set_mask)(struct ntb_dev *ntb, u64 db_bits); + int (*peer_db_clear_mask)(struct ntb_dev *ntb, u64 db_bits); + + int (*spad_is_unsafe)(struct ntb_dev *ntb); + int (*spad_count)(struct ntb_dev *ntb); + + u32 (*spad_read)(struct ntb_dev *ntb, int idx); + int (*spad_write)(struct ntb_dev *ntb, int idx, u32 val); + + int (*peer_spad_addr)(struct ntb_dev *ntb, int idx, + phys_addr_t *spad_addr); + u32 (*peer_spad_read)(struct ntb_dev *ntb, int idx); + int (*peer_spad_write)(struct ntb_dev *ntb, int idx, u32 val); +}; + +static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) +{ + /* commented callbacks are not required: */ + return + ops->mw_count && + ops->mw_get_range && + ops->mw_set_trans && + /* ops->mw_clear_trans && */ + ops->link_is_up && + ops->link_enable && + ops->link_disable && + /* ops->db_is_unsafe && */ + ops->db_valid_mask && + + /* both set, or both unset */ + (!ops->db_vector_count == !ops->db_vector_mask) && + + ops->db_read && + /* ops->db_set && */ + ops->db_clear && + /* ops->db_read_mask && */ + ops->db_set_mask && + ops->db_clear_mask && + ops->peer_db_addr && + /* ops->peer_db_read && */ + ops->peer_db_set && + /* ops->peer_db_clear && */ + /* ops->peer_db_read_mask && */ + /* ops->peer_db_set_mask && */ + /* ops->peer_db_clear_mask && */ + /* ops->spad_is_unsafe && */ + ops->spad_count && + ops->spad_read && + ops->spad_write && + ops->peer_spad_addr && + /* ops->peer_spad_read && */ + ops->peer_spad_write && + 1; +} + +/** + * struct ntb_client - client interested in ntb devices + * @drv: Linux driver object. + * @ops: See &ntb_client_ops. + */ +struct ntb_client { + struct device_driver drv; + const struct ntb_client_ops ops; +}; + +#define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv) + +/** + * struct ntb_device - ntb device + * @dev: Linux device object. + * @pdev: Pci device entry of the ntb. + * @topo: Detected topology of the ntb. + * @ops: See &ntb_dev_ops. + * @ctx: See &ntb_ctx_ops. + * @ctx_ops: See &ntb_ctx_ops. + */ +struct ntb_dev { + struct device dev; + struct pci_dev *pdev; + enum ntb_topo topo; + const struct ntb_dev_ops *ops; + void *ctx; + const struct ntb_ctx_ops *ctx_ops; + + /* private: */ + + /* synchronize setting, clearing, and calling ctx_ops */ + spinlock_t ctx_lock; + /* block unregister until device is fully released */ + struct completion released; +}; + +#define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev) + +/** + * ntb_register_client() - register a client for interest in ntb devices + * @client: Client context. + * + * The client will be added to the list of clients interested in ntb devices. + * The client will be notified of any ntb devices that are not already + * associated with a client, or if ntb devices are registered later. + * + * Return: Zero if the client is registered, otherwise an error number. + */ +#define ntb_register_client(client) \ + __ntb_register_client((client), THIS_MODULE, KBUILD_MODNAME) + +int __ntb_register_client(struct ntb_client *client, struct module *mod, + const char *mod_name); + +/** + * ntb_unregister_client() - unregister a client for interest in ntb devices + * @client: Client context. + * + * The client will be removed from the list of clients interested in ntb + * devices. If any ntb devices are associated with the client, the client will + * be notified to remove those devices. + */ +void ntb_unregister_client(struct ntb_client *client); + +#define module_ntb_client(__ntb_client) \ + module_driver(__ntb_client, ntb_register_client, \ + ntb_unregister_client) + +/** + * ntb_register_device() - register a ntb device + * @ntb: NTB device context. + * + * The device will be added to the list of ntb devices. If any clients are + * interested in ntb devices, each client will be notified of the ntb device, + * until at most one client accepts the device. + * + * Return: Zero if the device is registered, otherwise an error number. + */ +int ntb_register_device(struct ntb_dev *ntb); + +/** + * ntb_register_device() - unregister a ntb device + * @ntb: NTB device context. + * + * The device will be removed from the list of ntb devices. If the ntb device + * is associated with a client, the client will be notified to remove the + * device. + */ +void ntb_unregister_device(struct ntb_dev *ntb); + +/** + * ntb_set_ctx() - associate a driver context with an ntb device + * @ntb: NTB device context. + * @ctx: Driver context. + * @ctx_ops: Driver context operations. + * + * Associate a driver context and operations with a ntb device. The context is + * provided by the client driver, and the driver may associate a different + * context with each ntb device. + * + * Return: Zero if the context is associated, otherwise an error number. + */ +int ntb_set_ctx(struct ntb_dev *ntb, void *ctx, + const struct ntb_ctx_ops *ctx_ops); + +/** + * ntb_clear_ctx() - disassociate any driver context from an ntb device + * @ntb: NTB device context. + * + * Clear any association that may exist between a driver context and the ntb + * device. + */ +void ntb_clear_ctx(struct ntb_dev *ntb); + +/** + * ntb_link_event() - notify driver context of a change in link status + * @ntb: NTB device context. + * + * Notify the driver context that the link status may have changed. The driver + * should call ntb_link_is_up() to get the current status. + */ +void ntb_link_event(struct ntb_dev *ntb); + +/** + * ntb_db_event() - notify driver context of a doorbell event + * @ntb: NTB device context. + * @vector: Interrupt vector number. + * + * Notify the driver context of a doorbell event. If hardware supports + * multiple interrupt vectors for doorbells, the vector number indicates which + * vector received the interrupt. The vector number is relative to the first + * vector used for doorbells, starting at zero, and must be less than + ** ntb_db_vector_count(). The driver may call ntb_db_read() to check which + * doorbell bits need service, and ntb_db_vector_mask() to determine which of + * those bits are associated with the vector number. + */ +void ntb_db_event(struct ntb_dev *ntb, int vector); + +/** + * ntb_mw_count() - get the number of memory windows + * @ntb: NTB device context. + * + * Hardware and topology may support a different number of memory windows. + * + * Return: the number of memory windows. + */ +static inline int ntb_mw_count(struct ntb_dev *ntb) +{ + return ntb->ops->mw_count(ntb); +} + +/** + * ntb_mw_get_range() - get the range of a memory window + * @ntb: NTB device context. + * @idx: Memory window number. + * @base: OUT - the base address for mapping the memory window + * @size: OUT - the size for mapping the memory window + * @align: OUT - the base alignment for translating the memory window + * @align_size: OUT - the size alignment for translating the memory window + * + * Get the range of a memory window. NULL may be given for any output + * parameter if the value is not needed. The base and size may be used for + * mapping the memory window, to access the peer memory. The alignment and + * size may be used for translating the memory window, for the peer to access + * memory on the local system. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_mw_get_range(struct ntb_dev *ntb, int idx, + phys_addr_t *base, resource_size_t *size, + resource_size_t *align, resource_size_t *align_size) +{ + return ntb->ops->mw_get_range(ntb, idx, base, size, + align, align_size); +} + +/** + * ntb_mw_set_trans() - set the translation of a memory window + * @ntb: NTB device context. + * @idx: Memory window number. + * @addr: The dma address local memory to expose to the peer. + * @size: The size of the local memory to expose to the peer. + * + * Set the translation of a memory window. The peer may access local memory + * through the window starting at the address, up to the size. The address + * must be aligned to the alignment specified by ntb_mw_get_range(). The size + * must be aligned to the size alignment specified by ntb_mw_get_range(). + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int idx, + dma_addr_t addr, resource_size_t size) +{ + return ntb->ops->mw_set_trans(ntb, idx, addr, size); +} + +/** + * ntb_mw_clear_trans() - clear the translation of a memory window + * @ntb: NTB device context. + * @idx: Memory window number. + * + * Clear the translation of a memory window. The peer may no longer access + * local memory through the window. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx) +{ + if (!ntb->ops->mw_clear_trans) + return ntb->ops->mw_set_trans(ntb, idx, 0, 0); + + return ntb->ops->mw_clear_trans(ntb, idx); +} + +/** + * ntb_link_is_up() - get the current ntb link state + * @ntb: NTB device context. + * @speed: OUT - The link speed expressed as PCIe generation number. + * @width: OUT - The link width expressed as the number of PCIe lanes. + * + * Set the translation of a memory window. The peer may access local memory + * through the window starting at the address, up to the size. The address + * must be aligned to the alignment specified by ntb_mw_get_range(). The size + * must be aligned to the size alignment specified by ntb_mw_get_range(). + * + * Return: One if the link is up, zero if the link is down, otherwise a + * negative value indicating the error number. + */ +static inline int ntb_link_is_up(struct ntb_dev *ntb, + enum ntb_speed *speed, enum ntb_width *width) +{ + return ntb->ops->link_is_up(ntb, speed, width); +} + +/** + * ntb_link_enable() - enable the link on the secondary side of the ntb + * @ntb: NTB device context. + * @max_speed: The maximum link speed expressed as PCIe generation number. + * @max_width: The maximum link width expressed as the number of PCIe lanes. + * + * Enable the link on the secondary side of the ntb. This can only be done + * from the primary side of the ntb in primary or b2b topology. The ntb device + * should train the link to its maximum speed and width, or the requested speed + * and width, whichever is smaller, if supported. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_link_enable(struct ntb_dev *ntb, + enum ntb_speed max_speed, + enum ntb_width max_width) +{ + return ntb->ops->link_enable(ntb, max_speed, max_width); +} + +/** + * ntb_link_disable() - disable the link on the secondary side of the ntb + * @ntb: NTB device context. + * + * Disable the link on the secondary side of the ntb. This can only be + * done from the primary side of the ntb in primary or b2b topology. The ntb + * device should disable the link. Returning from this call must indicate that + * a barrier has passed, though with no more writes may pass in either + * direction across the link, except if this call returns an error number. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_link_disable(struct ntb_dev *ntb) +{ + return ntb->ops->link_disable(ntb); +} + +/** + * ntb_db_is_unsafe() - check if it is safe to use hardware doorbell + * @ntb: NTB device context. + * + * It is possible for some ntb hardware to be affected by errata. Hardware + * drivers can advise clients to avoid using doorbells. Clients may ignore + * this advice, though caution is recommended. + * + * Return: Zero if it is safe to use doorbells, or One if it is not safe. + */ +static inline int ntb_db_is_unsafe(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_is_unsafe) + return 0; + + return ntb->ops->db_is_unsafe(ntb); +} + +/** + * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb + * @ntb: NTB device context. + * + * Hardware may support different number or arrangement of doorbell bits. + * + * Return: A mask of doorbell bits supported by the ntb. + */ +static inline u64 ntb_db_valid_mask(struct ntb_dev *ntb) +{ + return ntb->ops->db_valid_mask(ntb); +} + +/** + * ntb_db_vector_count() - get the number of doorbell interrupt vectors + * @ntb: NTB device context. + * + * Hardware may support different number of interrupt vectors. + * + * Return: The number of doorbell interrupt vectors. + */ +static inline int ntb_db_vector_count(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_vector_count) + return 1; + + return ntb->ops->db_vector_count(ntb); +} + +/** + * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector + * @ntb: NTB device context. + * @vector: Doorbell vector number. + * + * Each interrupt vector may have a different number or arrangement of bits. + * + * Return: A mask of doorbell bits serviced by a vector. + */ +static inline u64 ntb_db_vector_mask(struct ntb_dev *ntb, int vector) +{ + if (!ntb->ops->db_vector_mask) + return ntb_db_valid_mask(ntb); + + return ntb->ops->db_vector_mask(ntb, vector); +} + +/** + * ntb_db_read() - read the local doorbell register + * @ntb: NTB device context. + * + * Read the local doorbell register, and return the bits that are set. + * + * Return: The bits currently set in the local doorbell register. + */ +static inline u64 ntb_db_read(struct ntb_dev *ntb) +{ + return ntb->ops->db_read(ntb); +} + +/** + * ntb_db_set() - set bits in the local doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to set. + * + * Set bits in the local doorbell register, which may generate a local doorbell + * interrupt. Bits that were already set must remain set. + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_set(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_set) + return -EINVAL; + + return ntb->ops->db_set(ntb, db_bits); +} + +/** + * ntb_db_clear() - clear bits in the local doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the local doorbell register, arming the bits for the next + * doorbell. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_clear(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->db_clear(ntb, db_bits); +} + +/** + * ntb_db_read_mask() - read the local doorbell mask + * @ntb: NTB device context. + * + * Read the local doorbell mask register, and return the bits that are set. + * + * This is unusual, though hardware is likely to support it. + * + * Return: The bits currently set in the local doorbell mask register. + */ +static inline u64 ntb_db_read_mask(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_read_mask) + return 0; + + return ntb->ops->db_read_mask(ntb); +} + +/** + * ntb_db_set_mask() - set bits in the local doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell mask bits to set. + * + * Set bits in the local doorbell mask register, preventing doorbell interrupts + * from being generated for those doorbell bits. Bits that were already set + * must remain set. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->db_set_mask(ntb, db_bits); +} + +/** + * ntb_db_clear_mask() - clear bits in the local doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the local doorbell mask register, allowing doorbell interrupts + * from being generated for those doorbell bits. If a doorbell bit is already + * set at the time the mask is cleared, and the corresponding mask bit is + * changed from set to clear, then the ntb driver must ensure that + * ntb_db_event() is called. If the hardware does not generate the interrupt + * on clearing the mask bit, then the driver must call ntb_db_event() anyway. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->db_clear_mask(ntb, db_bits); +} + +/** + * ntb_peer_db_addr() - address and size of the peer doorbell register + * @ntb: NTB device context. + * @db_addr: OUT - The address of the peer doorbell register. + * @db_size: OUT - The number of bytes to write the peer doorbell register. + * + * Return the address of the peer doorbell register. This may be used, for + * example, by drivers that offload memory copy operations to a dma engine. + * The drivers may wish to ring the peer doorbell at the completion of memory + * copy operations. For efficiency, and to simplify ordering of operations + * between the dma memory copies and the ringing doorbell, the driver may + * append one additional dma memory copy with the doorbell register as the + * destination, after the memory copy operations. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_addr(struct ntb_dev *ntb, + phys_addr_t *db_addr, + resource_size_t *db_size) +{ + return ntb->ops->peer_db_addr(ntb, db_addr, db_size); +} + +/** + * ntb_peer_db_read() - read the peer doorbell register + * @ntb: NTB device context. + * + * Read the peer doorbell register, and return the bits that are set. + * + * This is unusual, and hardware may not support it. + * + * Return: The bits currently set in the peer doorbell register. + */ +static inline u64 ntb_peer_db_read(struct ntb_dev *ntb) +{ + if (!ntb->ops->peer_db_read) + return 0; + + return ntb->ops->peer_db_read(ntb); +} + +/** + * ntb_peer_db_set() - set bits in the peer doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to set. + * + * Set bits in the peer doorbell register, which may generate a peer doorbell + * interrupt. Bits that were already set must remain set. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->peer_db_set(ntb, db_bits); +} + +/** + * ntb_peer_db_clear() - clear bits in the local doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the peer doorbell register, arming the bits for the next + * doorbell. + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_clear(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_clear) + return -EINVAL; + + return ntb->ops->peer_db_clear(ntb, db_bits); +} + +/** + * ntb_peer_db_read_mask() - read the peer doorbell mask + * @ntb: NTB device context. + * + * Read the peer doorbell mask register, and return the bits that are set. + * + * This is unusual, and hardware may not support it. + * + * Return: The bits currently set in the peer doorbell mask register. + */ +static inline u64 ntb_peer_db_read_mask(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_read_mask) + return 0; + + return ntb->ops->peer_db_read_mask(ntb); +} + +/** + * ntb_peer_db_set_mask() - set bits in the peer doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell mask bits to set. + * + * Set bits in the peer doorbell mask register, preventing doorbell interrupts + * from being generated for those doorbell bits. Bits that were already set + * must remain set. + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_set_mask(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_set_mask) + return -EINVAL; + + return ntb->ops->peer_db_set_mask(ntb, db_bits); +} + +/** + * ntb_peer_db_clear_mask() - clear bits in the peer doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the peer doorbell mask register, allowing doorbell interrupts + * from being generated for those doorbell bits. If the hardware does not + * generate the interrupt on clearing the mask bit, then the driver should not + * implement this function! + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_clear_mask(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_clear_mask) + return -EINVAL; + + return ntb->ops->peer_db_clear_mask(ntb, db_bits); +} + +/** + * ntb_spad_is_unsafe() - check if it is safe to use the hardware scratchpads + * @ntb: NTB device context. + * + * It is possible for some ntb hardware to be affected by errata. Hardware + * drivers can advise clients to avoid using scratchpads. Clients may ignore + * this advice, though caution is recommended. + * + * Return: Zero if it is safe to use scratchpads, or One if it is not safe. + */ +static inline int ntb_spad_is_unsafe(struct ntb_dev *ntb) +{ + if (!ntb->ops->spad_is_unsafe) + return 0; + + return ntb->ops->spad_is_unsafe(ntb); +} + +/** + * ntb_mw_count() - get the number of scratchpads + * @ntb: NTB device context. + * + * Hardware and topology may support a different number of scratchpads. + * + * Return: the number of scratchpads. + */ +static inline int ntb_spad_count(struct ntb_dev *ntb) +{ + return ntb->ops->spad_count(ntb); +} + +/** + * ntb_spad_read() - read the local scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * + * Read the local scratchpad register, and return the value. + * + * Return: The value of the local scratchpad register. + */ +static inline u32 ntb_spad_read(struct ntb_dev *ntb, int idx) +{ + return ntb->ops->spad_read(ntb, idx); +} + +/** + * ntb_spad_write() - write the local scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * @val: Scratchpad value. + * + * Write the value to the local scratchpad register. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val) +{ + return ntb->ops->spad_write(ntb, idx, val); +} + +/** + * ntb_peer_spad_addr() - address of the peer scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * @spad_addr: OUT - The address of the peer scratchpad register. + * + * Return the address of the peer doorbell register. This may be used, for + * example, by drivers that offload memory copy operations to a dma engine. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int idx, + phys_addr_t *spad_addr) +{ + return ntb->ops->peer_spad_addr(ntb, idx, spad_addr); +} + +/** + * ntb_peer_spad_read() - read the peer scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * + * Read the peer scratchpad register, and return the value. + * + * Return: The value of the local scratchpad register. + */ +static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int idx) +{ + return ntb->ops->peer_spad_read(ntb, idx); +} + +/** + * ntb_peer_spad_write() - write the peer scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * @val: Scratchpad value. + * + * Write the value to the peer scratchpad register. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32 val) +{ + return ntb->ops->peer_spad_write(ntb, idx, val); +} + +#endif From e26a5843f7f5014ae4460030ca4de029a3ac35d3 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Thu, 9 Apr 2015 10:33:20 -0400 Subject: [PATCH 593/839] NTB: Split ntb_hw_intel and ntb_transport drivers Change ntb_hw_intel to use the new NTB hardware abstraction layer. Split ntb_transport into its own driver. Change it to use the new NTB hardware abstraction layer. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- Documentation/ntb.txt | 26 + MAINTAINERS | 8 + drivers/net/ntb_netdev.c | 54 +- drivers/ntb/Kconfig | 35 +- drivers/ntb/Makefile | 6 +- drivers/ntb/hw/Kconfig | 1 + drivers/ntb/hw/Makefile | 1 + drivers/ntb/hw/intel/Kconfig | 7 + drivers/ntb/hw/intel/Makefile | 1 + drivers/ntb/hw/intel/ntb_hw_intel.c | 3817 ++++++++++++++------------- drivers/ntb/hw/intel/ntb_hw_intel.h | 601 ++--- drivers/ntb/ntb_transport.c | 940 ++++--- include/linux/ntb_transport.h | 25 +- 13 files changed, 2958 insertions(+), 2564 deletions(-) create mode 100644 drivers/ntb/hw/Kconfig create mode 100644 drivers/ntb/hw/Makefile create mode 100644 drivers/ntb/hw/intel/Kconfig create mode 100644 drivers/ntb/hw/intel/Makefile diff --git a/Documentation/ntb.txt b/Documentation/ntb.txt index 9d46dc9712a8..725ba1e6c127 100644 --- a/Documentation/ntb.txt +++ b/Documentation/ntb.txt @@ -26,7 +26,33 @@ as ntb hardware, or hardware drivers, are inserted and removed. The registration uses the Linux Device framework, so it should feel familiar to anyone who has written a pci driver. +### NTB Transport Client (ntb\_transport) and NTB Netdev (ntb\_netdev) + +The primary client for NTB is the Transport client, used in tandem with NTB +Netdev. These drivers function together to create a logical link to the peer, +across the ntb, to exchange packets of network data. The Transport client +establishes a logical link to the peer, and creates queue pairs to exchange +messages and data. The NTB Netdev then creates an ethernet device using a +Transport queue pair. Network data is copied between socket buffers and the +Transport queue pair buffer. The Transport client may be used for other things +besides Netdev, however no other applications have yet been written. + ## NTB Hardware Drivers NTB hardware drivers should register devices with the NTB core driver. After registering, clients probe and remove functions will be called. + +### NTB Intel Hardware Driver (ntb\_hw\_intel) + +The Intel hardware driver supports NTB on Xeon and Atom CPUs. + +Module Parameters: + +* b2b\_mw\_idx - If the peer ntb is to be accessed via a memory window, then use + this memory window to access the peer ntb. A value of zero or positive + starts from the first mw idx, and a negative value starts from the last + mw idx. Both sides MUST set the same value here! The default value is + `-1`. +* b2b\_mw\_share - If the peer ntb is to be accessed via a memory window, and if + the memory window is large enough, still allow the client to use the + second half of the memory window for address translation to the peer. diff --git a/MAINTAINERS b/MAINTAINERS index e2fc9eec2e16..a682805d6d56 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7007,6 +7007,14 @@ F: drivers/net/ntb_netdev.c F: include/linux/ntb.h F: include/linux/ntb_transport.h +NTB INTEL DRIVER +M: Jon Mason +M: Dave Jiang +S: Supported +W: https://github.com/jonmason/ntb/wiki +T: git git://github.com/jonmason/ntb.git +F: drivers/ntb/hw/intel/ + NTFS FILESYSTEM M: Anton Altaparmakov L: linux-ntfs-dev@lists.sourceforge.net diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 6d3bfa62f5ec..3cc316cb7e6b 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -5,6 +5,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -13,6 +14,7 @@ * BSD LICENSE * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -40,7 +42,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Intel PCIe NTB Network Linux driver + * PCIe NTB Network Linux driver * * Contact Information: * Jon Mason @@ -49,6 +51,7 @@ #include #include #include +#include #include #define NTB_NETDEV_VER "0.7" @@ -70,26 +73,19 @@ struct ntb_netdev { static LIST_HEAD(dev_list); -static void ntb_netdev_event_handler(void *data, int status) +static void ntb_netdev_event_handler(void *data, int link_is_up) { struct net_device *ndev = data; struct ntb_netdev *dev = netdev_priv(ndev); - netdev_dbg(ndev, "Event %x, Link %x\n", status, + netdev_dbg(ndev, "Event %x, Link %x\n", link_is_up, ntb_transport_link_query(dev->qp)); - switch (status) { - case NTB_LINK_DOWN: + if (link_is_up) { + if (ntb_transport_link_query(dev->qp)) + netif_carrier_on(ndev); + } else { netif_carrier_off(ndev); - break; - case NTB_LINK_UP: - if (!ntb_transport_link_query(dev->qp)) - return; - - netif_carrier_on(ndev); - break; - default: - netdev_warn(ndev, "Unsupported event type %d\n", status); } } @@ -160,8 +156,6 @@ static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb, struct ntb_netdev *dev = netdev_priv(ndev); int rc; - netdev_dbg(ndev, "%s: skb len %d\n", __func__, skb->len); - rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len); if (rc) goto err; @@ -322,20 +316,26 @@ static const struct ntb_queue_handlers ntb_netdev_handlers = { .event_handler = ntb_netdev_event_handler, }; -static int ntb_netdev_probe(struct pci_dev *pdev) +static int ntb_netdev_probe(struct device *client_dev) { + struct ntb_dev *ntb; struct net_device *ndev; + struct pci_dev *pdev; struct ntb_netdev *dev; int rc; - ndev = alloc_etherdev(sizeof(struct ntb_netdev)); + ntb = dev_ntb(client_dev->parent); + pdev = ntb->pdev; + if (!pdev) + return -ENODEV; + + ndev = alloc_etherdev(sizeof(*dev)); if (!ndev) return -ENOMEM; dev = netdev_priv(ndev); dev->ndev = ndev; dev->pdev = pdev; - BUG_ON(!dev->pdev); ndev->features = NETIF_F_HIGHDMA; ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; @@ -349,7 +349,8 @@ static int ntb_netdev_probe(struct pci_dev *pdev) ndev->netdev_ops = &ntb_netdev_ops; ndev->ethtool_ops = &ntb_ethtool_ops; - dev->qp = ntb_transport_create_queue(ndev, pdev, &ntb_netdev_handlers); + dev->qp = ntb_transport_create_queue(ndev, client_dev, + &ntb_netdev_handlers); if (!dev->qp) { rc = -EIO; goto err; @@ -372,12 +373,17 @@ err: return rc; } -static void ntb_netdev_remove(struct pci_dev *pdev) +static void ntb_netdev_remove(struct device *client_dev) { + struct ntb_dev *ntb; struct net_device *ndev; + struct pci_dev *pdev; struct ntb_netdev *dev; bool found = false; + ntb = dev_ntb(client_dev->parent); + pdev = ntb->pdev; + list_for_each_entry(dev, &dev_list, list) { if (dev->pdev == pdev) { found = true; @@ -396,7 +402,7 @@ static void ntb_netdev_remove(struct pci_dev *pdev) free_netdev(ndev); } -static struct ntb_client ntb_netdev_client = { +static struct ntb_transport_client ntb_netdev_client = { .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .probe = ntb_netdev_probe, @@ -407,7 +413,7 @@ static int __init ntb_netdev_init_module(void) { int rc; - rc = ntb_register_client_dev(KBUILD_MODNAME); + rc = ntb_transport_register_client_dev(KBUILD_MODNAME); if (rc) return rc; return ntb_transport_register_client(&ntb_netdev_client); @@ -417,6 +423,6 @@ module_init(ntb_netdev_init_module); static void __exit ntb_netdev_exit_module(void) { ntb_transport_unregister_client(&ntb_netdev_client); - ntb_unregister_client_dev(KBUILD_MODNAME); + ntb_transport_unregister_client_dev(KBUILD_MODNAME); } module_exit(ntb_netdev_exit_module); diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig index f69df793dbe2..53b042429673 100644 --- a/drivers/ntb/Kconfig +++ b/drivers/ntb/Kconfig @@ -1,13 +1,26 @@ -config NTB - tristate "Intel Non-Transparent Bridge support" - depends on PCI - depends on X86 - help - The PCI-E Non-transparent bridge hardware is a point-to-point PCI-E bus - connecting 2 systems. When configured, writes to the device's PCI - mapped memory will be mirrored to a buffer on the remote system. The - ntb Linux driver uses this point-to-point communication as a method to - transfer data from one system to the other. +menuconfig NTB + tristate "Non-Transparent Bridge support" + depends on PCI + help + The PCI-E Non-transparent bridge hardware is a point-to-point PCI-E bus + connecting 2 systems. When configured, writes to the device's PCI + mapped memory will be mirrored to a buffer on the remote system. The + ntb Linux driver uses this point-to-point communication as a method to + transfer data from one system to the other. - If unsure, say N. + If unsure, say N. +if NTB + +source "drivers/ntb/hw/Kconfig" + +config NTB_TRANSPORT + tristate "NTB Transport Client" + help + This is a transport driver that enables connected systems to exchange + messages over the ntb hardware. The transport exposes a queue pair api + to client drivers. + + If unsure, say N. + +endif # NTB diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile index 712e953a8fda..b9fa663ecfec 100644 --- a/drivers/ntb/Makefile +++ b/drivers/ntb/Makefile @@ -1,4 +1,2 @@ -obj-$(CONFIG_NTB) += ntb.o -obj-$(CONFIG_NTB) += ntb_hw_intel.o - -ntb_hw_intel-objs := hw/intel/ntb_hw_intel.o ntb_transport.o +obj-$(CONFIG_NTB) += ntb.o hw/ +obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o diff --git a/drivers/ntb/hw/Kconfig b/drivers/ntb/hw/Kconfig new file mode 100644 index 000000000000..4d5535c4cddf --- /dev/null +++ b/drivers/ntb/hw/Kconfig @@ -0,0 +1 @@ +source "drivers/ntb/hw/intel/Kconfig" diff --git a/drivers/ntb/hw/Makefile b/drivers/ntb/hw/Makefile new file mode 100644 index 000000000000..175d7c92a569 --- /dev/null +++ b/drivers/ntb/hw/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_NTB_INTEL) += intel/ diff --git a/drivers/ntb/hw/intel/Kconfig b/drivers/ntb/hw/intel/Kconfig new file mode 100644 index 000000000000..91f995e33ac6 --- /dev/null +++ b/drivers/ntb/hw/intel/Kconfig @@ -0,0 +1,7 @@ +config NTB_INTEL + tristate "Intel Non-Transparent Bridge support" + depends on X86_64 + help + This driver supports Intel NTB on capable Xeon and Atom hardware. + + If unsure, say N. diff --git a/drivers/ntb/hw/intel/Makefile b/drivers/ntb/hw/intel/Makefile new file mode 100644 index 000000000000..1b434568d2ad --- /dev/null +++ b/drivers/ntb/hw/intel/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_NTB_INTEL) += ntb_hw_intel.o diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 044534a995f1..686091756ba4 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -5,6 +5,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -13,6 +14,7 @@ * BSD LICENSE * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -45,6 +47,7 @@ * Contact Information: * Jon Mason */ + #include #include #include @@ -53,40 +56,2022 @@ #include #include #include +#include + #include "ntb_hw_intel.h" -#define NTB_NAME "Intel(R) PCI-E Non-Transparent Bridge Driver" -#define NTB_VER "1.0" +#define NTB_NAME "ntb_hw_intel" +#define NTB_DESC "Intel(R) PCI-E Non-Transparent Bridge Driver" +#define NTB_VER "2.0" -MODULE_DESCRIPTION(NTB_NAME); +MODULE_DESCRIPTION(NTB_DESC); MODULE_VERSION(NTB_VER); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel Corporation"); -enum { - NTB_CONN_TRANSPARENT = 0, - NTB_CONN_B2B, - NTB_CONN_RP, -}; +#define bar0_off(base, bar) ((base) + ((bar) << 2)) +#define bar2_off(base, bar) bar0_off(base, (bar) - 2) -enum { - NTB_DEV_USD = 0, - NTB_DEV_DSD, -}; +static int b2b_mw_idx = -1; +module_param(b2b_mw_idx, int, 0644); +MODULE_PARM_DESC(b2b_mw_idx, "Use this mw idx to access the peer ntb. A " + "value of zero or positive starts from first mw idx, and a " + "negative value starts from last mw idx. Both sides MUST " + "set the same value here!"); -enum { - SNB_HW = 0, - BWD_HW, -}; +static unsigned int b2b_mw_share; +module_param(b2b_mw_share, uint, 0644); +MODULE_PARM_DESC(b2b_mw_share, "If the b2b mw is large enough, configure the " + "ntb so that the peer ntb only occupies the first half of " + "the mw, so the second half can still be used as a mw. Both " + "sides MUST set the same value here!"); +static const struct intel_ntb_reg bwd_reg; +static const struct intel_ntb_alt_reg bwd_pri_reg; +static const struct intel_ntb_alt_reg bwd_sec_reg; +static const struct intel_ntb_alt_reg bwd_b2b_reg; +static const struct intel_ntb_xlat_reg bwd_pri_xlat; +static const struct intel_ntb_xlat_reg bwd_sec_xlat; +static const struct intel_ntb_reg snb_reg; +static const struct intel_ntb_alt_reg snb_pri_reg; +static const struct intel_ntb_alt_reg snb_sec_reg; +static const struct intel_ntb_alt_reg snb_b2b_reg; +static const struct intel_ntb_xlat_reg snb_pri_xlat; +static const struct intel_ntb_xlat_reg snb_sec_xlat; +static const struct intel_b2b_addr snb_b2b_usd_addr; +static const struct intel_b2b_addr snb_b2b_dsd_addr; + +static const struct ntb_dev_ops intel_ntb_ops; + +static const struct file_operations intel_ntb_debugfs_info; static struct dentry *debugfs_dir; -#define BWD_LINK_RECOVERY_TIME 500 +#ifndef ioread64 +#ifdef readq +#define ioread64 readq +#else +#define ioread64 _ioread64 +static inline u64 _ioread64(void __iomem *mmio) +{ + u64 low, high; -/* Translate memory window 0,1,2 to BAR 2,4,5 */ -#define MW_TO_BAR(mw) (mw == 0 ? 2 : (mw == 1 ? 4 : 5)) + low = ioread32(mmio); + high = ioread32(mmio + sizeof(u32)); + return low | (high << 32); +} +#endif +#endif -static const struct pci_device_id ntb_pci_tbl[] = { +#ifndef iowrite64 +#ifdef writeq +#define iowrite64 writeq +#else +#define iowrite64 _iowrite64 +static inline void _iowrite64(u64 val, void __iomem *mmio) +{ + iowrite32(val, mmio); + iowrite32(val >> 32, mmio + sizeof(u32)); +} +#endif +#endif + +static inline int pdev_is_bwd(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD: + return 1; + } + return 0; +} + +static inline int pdev_is_snb(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_NTB_SS_JSF: + case PCI_DEVICE_ID_INTEL_NTB_SS_SNB: + case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_PS_JSF: + case PCI_DEVICE_ID_INTEL_NTB_PS_SNB: + case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF: + case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB: + case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: + case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + return 1; + } + return 0; +} + +static inline void ndev_reset_unsafe_flags(struct intel_ntb_dev *ndev) +{ + ndev->unsafe_flags = 0; + ndev->unsafe_flags_ignore = 0; + + /* Only B2B has a workaround to avoid SDOORBELL */ + if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) + if (!ntb_topo_is_b2b(ndev->ntb.topo)) + ndev->unsafe_flags |= NTB_UNSAFE_DB; + + /* No low level workaround to avoid SB01BASE */ + if (ndev->hwerr_flags & NTB_HWERR_SB01BASE_LOCKUP) { + ndev->unsafe_flags |= NTB_UNSAFE_DB; + ndev->unsafe_flags |= NTB_UNSAFE_SPAD; + } +} + +static inline int ndev_is_unsafe(struct intel_ntb_dev *ndev, + unsigned long flag) +{ + return !!(flag & ndev->unsafe_flags & ~ndev->unsafe_flags_ignore); +} + +static inline int ndev_ignore_unsafe(struct intel_ntb_dev *ndev, + unsigned long flag) +{ + flag &= ndev->unsafe_flags; + ndev->unsafe_flags_ignore |= flag; + + return !!flag; +} + +static int ndev_mw_to_bar(struct intel_ntb_dev *ndev, int idx) +{ + if (idx < 0 || idx > ndev->mw_count) + return -EINVAL; + return ndev->reg->mw_bar[idx]; +} + +static inline int ndev_db_addr(struct intel_ntb_dev *ndev, + phys_addr_t *db_addr, resource_size_t *db_size, + phys_addr_t reg_addr, unsigned long reg) +{ + WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_DB)); + + if (db_addr) { + *db_addr = reg_addr + reg; + dev_dbg(ndev_dev(ndev), "Peer db addr %llx\n", *db_addr); + } + + if (db_size) { + *db_size = ndev->reg->db_size; + dev_dbg(ndev_dev(ndev), "Peer db size %llx\n", *db_size); + } + + return 0; +} + +static inline u64 ndev_db_read(struct intel_ntb_dev *ndev, + void __iomem *mmio) +{ + WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_DB)); + + return ndev->reg->db_ioread(mmio); +} + +static inline int ndev_db_write(struct intel_ntb_dev *ndev, u64 db_bits, + void __iomem *mmio) +{ + WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_DB)); + + if (db_bits & ~ndev->db_valid_mask) + return -EINVAL; + + ndev->reg->db_iowrite(db_bits, mmio); + + return 0; +} + +static inline int ndev_db_set_mask(struct intel_ntb_dev *ndev, u64 db_bits, + void __iomem *mmio) +{ + unsigned long irqflags; + + WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_DB)); + + if (db_bits & ~ndev->db_valid_mask) + return -EINVAL; + + spin_lock_irqsave(&ndev->db_mask_lock, irqflags); + { + ndev->db_mask |= db_bits; + ndev->reg->db_iowrite(ndev->db_mask, mmio); + } + spin_unlock_irqrestore(&ndev->db_mask_lock, irqflags); + + return 0; +} + +static inline int ndev_db_clear_mask(struct intel_ntb_dev *ndev, u64 db_bits, + void __iomem *mmio) +{ + unsigned long irqflags; + + WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_DB)); + + if (db_bits & ~ndev->db_valid_mask) + return -EINVAL; + + spin_lock_irqsave(&ndev->db_mask_lock, irqflags); + { + ndev->db_mask &= ~db_bits; + ndev->reg->db_iowrite(ndev->db_mask, mmio); + } + spin_unlock_irqrestore(&ndev->db_mask_lock, irqflags); + + return 0; +} + +static inline int ndev_vec_mask(struct intel_ntb_dev *ndev, int db_vector) +{ + u64 shift, mask; + + shift = ndev->db_vec_shift; + mask = BIT_ULL(shift) - 1; + + return mask << (shift * db_vector); +} + +static inline int ndev_spad_addr(struct intel_ntb_dev *ndev, int idx, + phys_addr_t *spad_addr, phys_addr_t reg_addr, + unsigned long reg) +{ + WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD)); + + if (idx < 0 || idx >= ndev->spad_count) + return -EINVAL; + + if (spad_addr) { + *spad_addr = reg_addr + reg + (idx << 2); + dev_dbg(ndev_dev(ndev), "Peer spad addr %llx\n", *spad_addr); + } + + return 0; +} + +static inline u32 ndev_spad_read(struct intel_ntb_dev *ndev, int idx, + void __iomem *mmio) +{ + WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD)); + + if (idx < 0 || idx >= ndev->spad_count) + return 0; + + return ioread32(mmio + (idx << 2)); +} + +static inline int ndev_spad_write(struct intel_ntb_dev *ndev, int idx, u32 val, + void __iomem *mmio) +{ + WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD)); + + if (idx < 0 || idx >= ndev->spad_count) + return -EINVAL; + + iowrite32(val, mmio + (idx << 2)); + + return 0; +} + +static irqreturn_t ndev_interrupt(struct intel_ntb_dev *ndev, int vec) +{ + u64 vec_mask; + + vec_mask = ndev_vec_mask(ndev, vec); + + dev_dbg(ndev_dev(ndev), "vec %d vec_mask %llx\n", vec, vec_mask); + + ndev->last_ts = jiffies; + + if (vec_mask & ndev->db_link_mask) { + if (ndev->reg->poll_link(ndev)) + ntb_link_event(&ndev->ntb); + } + + if (vec_mask & ndev->db_valid_mask) + ntb_db_event(&ndev->ntb, vec); + + return IRQ_HANDLED; +} + +static irqreturn_t ndev_vec_isr(int irq, void *dev) +{ + struct intel_ntb_vec *nvec = dev; + + return ndev_interrupt(nvec->ndev, nvec->num); +} + +static irqreturn_t ndev_irq_isr(int irq, void *dev) +{ + struct intel_ntb_dev *ndev = dev; + + return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq); +} + +static int ndev_init_isr(struct intel_ntb_dev *ndev, + int msix_min, int msix_max, + int msix_shift, int total_shift) +{ + struct pci_dev *pdev; + int rc, i, msix_count; + + pdev = ndev_pdev(ndev); + + /* Mask all doorbell interrupts */ + ndev->db_mask = ndev->db_valid_mask; + ndev->reg->db_iowrite(ndev->db_mask, + ndev->self_mmio + + ndev->self_reg->db_mask); + + /* Try to set up msix irq */ + + ndev->vec = kcalloc(msix_max, sizeof(*ndev->vec), GFP_KERNEL); + if (!ndev->vec) + goto err_msix_vec_alloc; + + ndev->msix = kcalloc(msix_max, sizeof(*ndev->msix), GFP_KERNEL); + if (!ndev->msix) + goto err_msix_alloc; + + for (i = 0; i < msix_max; ++i) + ndev->msix[i].entry = i; + + msix_count = pci_enable_msix_range(pdev, ndev->msix, + msix_min, msix_max); + if (msix_count < 0) + goto err_msix_enable; + + for (i = 0; i < msix_count; ++i) { + ndev->vec[i].ndev = ndev; + ndev->vec[i].num = i; + rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0, + "ndev_vec_isr", &ndev->vec[i]); + if (rc) + goto err_msix_request; + } + + dev_dbg(ndev_dev(ndev), "Using msix interrupts\n"); + ndev->db_vec_count = msix_count; + ndev->db_vec_shift = msix_shift; + return 0; + +err_msix_request: + while (i-- > 0) + free_irq(ndev->msix[i].vector, ndev); + pci_disable_msix(pdev); +err_msix_enable: + kfree(ndev->msix); +err_msix_alloc: + kfree(ndev->vec); +err_msix_vec_alloc: + ndev->msix = NULL; + ndev->vec = NULL; + + /* Try to set up msi irq */ + + rc = pci_enable_msi(pdev); + if (rc) + goto err_msi_enable; + + rc = request_irq(pdev->irq, ndev_irq_isr, 0, + "ndev_irq_isr", ndev); + if (rc) + goto err_msi_request; + + dev_dbg(ndev_dev(ndev), "Using msi interrupts\n"); + ndev->db_vec_count = 1; + ndev->db_vec_shift = total_shift; + return 0; + +err_msi_request: + pci_disable_msi(pdev); +err_msi_enable: + + /* Try to set up intx irq */ + + pci_intx(pdev, 1); + + rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED, + "ndev_irq_isr", ndev); + if (rc) + goto err_intx_request; + + dev_dbg(ndev_dev(ndev), "Using intx interrupts\n"); + ndev->db_vec_count = 1; + ndev->db_vec_shift = total_shift; + return 0; + +err_intx_request: + return rc; +} + +static void ndev_deinit_isr(struct intel_ntb_dev *ndev) +{ + struct pci_dev *pdev; + int i; + + pdev = ndev_pdev(ndev); + + /* Mask all doorbell interrupts */ + ndev->db_mask = ndev->db_valid_mask; + ndev->reg->db_iowrite(ndev->db_mask, + ndev->self_mmio + + ndev->self_reg->db_mask); + + if (ndev->msix) { + i = ndev->db_vec_count; + while (i--) + free_irq(ndev->msix[i].vector, &ndev->vec[i]); + pci_disable_msix(pdev); + kfree(ndev->msix); + kfree(ndev->vec); + } else { + free_irq(pdev->irq, ndev); + if (pci_dev_msi_enabled(pdev)) + pci_disable_msi(pdev); + } +} + +static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *offp) +{ + struct intel_ntb_dev *ndev; + void __iomem *mmio; + char *buf; + size_t buf_size; + ssize_t ret, off; + union { u64 v64; u32 v32; u16 v16; } u; + + ndev = filp->private_data; + mmio = ndev->self_mmio; + + buf_size = min(count, 0x800ul); + + buf = kmalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + off = 0; + + off += scnprintf(buf + off, buf_size - off, + "NTB Device Information:\n"); + + off += scnprintf(buf + off, buf_size - off, + "Connection Topology -\t%s\n", + ntb_topo_string(ndev->ntb.topo)); + + off += scnprintf(buf + off, buf_size - off, + "B2B Offset -\t\t%#lx\n", ndev->b2b_off); + off += scnprintf(buf + off, buf_size - off, + "B2B MW Idx -\t\t%d\n", ndev->b2b_idx); + off += scnprintf(buf + off, buf_size - off, + "BAR4 Split -\t\t%s\n", + ndev->bar4_split ? "yes" : "no"); + + off += scnprintf(buf + off, buf_size - off, + "NTB CTL -\t\t%#06x\n", ndev->ntb_ctl); + off += scnprintf(buf + off, buf_size - off, + "LNK STA -\t\t%#06x\n", ndev->lnk_sta); + + if (!ndev->reg->link_is_up(ndev)) { + off += scnprintf(buf + off, buf_size - off, + "Link Status -\t\tDown\n"); + } else { + off += scnprintf(buf + off, buf_size - off, + "Link Status -\t\tUp\n"); + off += scnprintf(buf + off, buf_size - off, + "Link Speed -\t\tPCI-E Gen %u\n", + NTB_LNK_STA_SPEED(ndev->lnk_sta)); + off += scnprintf(buf + off, buf_size - off, + "Link Width -\t\tx%u\n", + NTB_LNK_STA_WIDTH(ndev->lnk_sta)); + } + + off += scnprintf(buf + off, buf_size - off, + "Memory Window Count -\t%u\n", ndev->mw_count); + off += scnprintf(buf + off, buf_size - off, + "Scratchpad Count -\t%u\n", ndev->spad_count); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Count -\t%u\n", ndev->db_count); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Vector Count -\t%u\n", ndev->db_vec_count); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Vector Shift -\t%u\n", ndev->db_vec_shift); + + off += scnprintf(buf + off, buf_size - off, + "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Link Mask -\t%#llx\n", ndev->db_link_mask); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Mask Cached -\t%#llx\n", ndev->db_mask); + + u.v64 = ndev_db_read(ndev, mmio + ndev->self_reg->db_mask); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Mask -\t\t%#llx\n", u.v64); + + u.v64 = ndev_db_read(ndev, mmio + ndev->self_reg->db_bell); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Bell -\t\t%#llx\n", u.v64); + + off += scnprintf(buf + off, buf_size - off, + "\nNTB Incoming XLAT:\n"); + + u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 2)); + off += scnprintf(buf + off, buf_size - off, + "XLAT23 -\t\t%#018llx\n", u.v64); + + u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 4)); + off += scnprintf(buf + off, buf_size - off, + "XLAT45 -\t\t%#018llx\n", u.v64); + + u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 2)); + off += scnprintf(buf + off, buf_size - off, + "LMT23 -\t\t\t%#018llx\n", u.v64); + + u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 4)); + off += scnprintf(buf + off, buf_size - off, + "LMT45 -\t\t\t%#018llx\n", u.v64); + + if (pdev_is_snb(ndev->ntb.pdev)) { + if (ntb_topo_is_b2b(ndev->ntb.topo)) { + off += scnprintf(buf + off, buf_size - off, + "\nNTB Outgoing B2B XLAT:\n"); + + u.v64 = ioread64(mmio + SNB_PBAR23XLAT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B XLAT23 -\t\t%#018llx\n", u.v64); + + u.v64 = ioread64(mmio + SNB_PBAR45XLAT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B XLAT45 -\t\t%#018llx\n", u.v64); + + u.v64 = ioread64(mmio + SNB_PBAR23LMT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B LMT23 -\t\t%#018llx\n", u.v64); + + u.v64 = ioread64(mmio + SNB_PBAR45LMT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B LMT45 -\t\t%#018llx\n", u.v64); + + off += scnprintf(buf + off, buf_size - off, + "\nNTB Secondary BAR:\n"); + + u.v64 = ioread64(mmio + SNB_SBAR0BASE_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "SBAR01 -\t\t%#018llx\n", u.v64); + + u.v64 = ioread64(mmio + SNB_SBAR23BASE_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "SBAR23 -\t\t%#018llx\n", u.v64); + + u.v64 = ioread64(mmio + SNB_SBAR45BASE_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "SBAR45 -\t\t%#018llx\n", u.v64); + } + + off += scnprintf(buf + off, buf_size - off, + "\nSNB NTB Statistics:\n"); + + u.v16 = ioread16(mmio + SNB_USMEMMISS_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "Upstream Memory Miss -\t%u\n", u.v16); + + off += scnprintf(buf + off, buf_size - off, + "\nSNB NTB Hardware Errors:\n"); + + if (!pci_read_config_word(ndev->ntb.pdev, + SNB_DEVSTS_OFFSET, &u.v16)) + off += scnprintf(buf + off, buf_size - off, + "DEVSTS -\t\t%#06x\n", u.v16); + + if (!pci_read_config_word(ndev->ntb.pdev, + SNB_LINK_STATUS_OFFSET, &u.v16)) + off += scnprintf(buf + off, buf_size - off, + "LNKSTS -\t\t%#06x\n", u.v16); + + if (!pci_read_config_dword(ndev->ntb.pdev, + SNB_UNCERRSTS_OFFSET, &u.v32)) + off += scnprintf(buf + off, buf_size - off, + "UNCERRSTS -\t\t%#06x\n", u.v32); + + if (!pci_read_config_dword(ndev->ntb.pdev, + SNB_CORERRSTS_OFFSET, &u.v32)) + off += scnprintf(buf + off, buf_size - off, + "CORERRSTS -\t\t%#06x\n", u.v32); + } + + ret = simple_read_from_buffer(ubuf, count, offp, buf, off); + kfree(buf); + return ret; +} + +static void ndev_init_debugfs(struct intel_ntb_dev *ndev) +{ + if (!debugfs_dir) { + ndev->debugfs_dir = NULL; + ndev->debugfs_info = NULL; + } else { + ndev->debugfs_dir = + debugfs_create_dir(ndev_name(ndev), debugfs_dir); + if (!ndev->debugfs_dir) + ndev->debugfs_info = NULL; + else + ndev->debugfs_info = + debugfs_create_file("info", S_IRUSR, + ndev->debugfs_dir, ndev, + &intel_ntb_debugfs_info); + } +} + +static void ndev_deinit_debugfs(struct intel_ntb_dev *ndev) +{ + debugfs_remove_recursive(ndev->debugfs_dir); +} + +static int intel_ntb_mw_count(struct ntb_dev *ntb) +{ + return ntb_ndev(ntb)->mw_count; +} + +static int intel_ntb_mw_get_range(struct ntb_dev *ntb, int idx, + phys_addr_t *base, + resource_size_t *size, + resource_size_t *align, + resource_size_t *align_size) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + int bar; + + if (idx >= ndev->b2b_idx && !ndev->b2b_off) + idx += 1; + + bar = ndev_mw_to_bar(ndev, idx); + if (bar < 0) + return bar; + + if (base) + *base = pci_resource_start(ndev->ntb.pdev, bar) + + (idx == ndev->b2b_idx ? ndev->b2b_off : 0); + + if (size) + *size = pci_resource_len(ndev->ntb.pdev, bar) - + (idx == ndev->b2b_idx ? ndev->b2b_off : 0); + + if (align) + *align = pci_resource_len(ndev->ntb.pdev, bar); + + if (align_size) + *align_size = 1; + + return 0; +} + +static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx, + dma_addr_t addr, resource_size_t size) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + unsigned long base_reg, xlat_reg, limit_reg; + resource_size_t bar_size, mw_size; + void __iomem *mmio; + u64 base, limit, reg_val; + int bar; + + if (idx >= ndev->b2b_idx && !ndev->b2b_off) + idx += 1; + + bar = ndev_mw_to_bar(ndev, idx); + if (bar < 0) + return bar; + + bar_size = pci_resource_len(ndev->ntb.pdev, bar); + + if (idx == ndev->b2b_idx) + mw_size = bar_size - ndev->b2b_off; + else + mw_size = bar_size; + + /* hardware requires that addr is aligned to bar size */ + if (addr & (bar_size - 1)) + return -EINVAL; + + /* make sure the range fits in the usable mw size */ + if (size > mw_size) + return -EINVAL; + + mmio = ndev->self_mmio; + base_reg = bar0_off(ndev->xlat_reg->bar0_base, bar); + xlat_reg = bar2_off(ndev->xlat_reg->bar2_xlat, bar); + limit_reg = bar2_off(ndev->xlat_reg->bar2_limit, bar); + + if (bar < 4 || !ndev->bar4_split) { + base = ioread64(mmio + base_reg); + + /* Set the limit if supported, if size is not mw_size */ + if (limit_reg && size != mw_size) + limit = base + size; + else + limit = 0; + + /* set and verify setting the translation address */ + iowrite64(addr, mmio + xlat_reg); + reg_val = ioread64(mmio + xlat_reg); + if (reg_val != addr) { + iowrite64(0, mmio + xlat_reg); + return -EIO; + } + + /* set and verify setting the limit */ + iowrite64(limit, mmio + limit_reg); + reg_val = ioread64(mmio + limit_reg); + if (reg_val != limit) { + iowrite64(base, mmio + limit_reg); + iowrite64(0, mmio + xlat_reg); + return -EIO; + } + } else { + /* split bar addr range must all be 32 bit */ + if (addr & (~0ull << 32)) + return -EINVAL; + if ((addr + size) & (~0ull << 32)) + return -EINVAL; + + base = ioread32(mmio + base_reg); + + /* Set the limit if supported, if size is not mw_size */ + if (limit_reg && size != mw_size) + limit = base + size; + else + limit = 0; + + /* set and verify setting the translation address */ + iowrite32(addr, mmio + xlat_reg); + reg_val = ioread32(mmio + xlat_reg); + if (reg_val != addr) { + iowrite32(0, mmio + xlat_reg); + return -EIO; + } + + /* set and verify setting the limit */ + iowrite32(limit, mmio + limit_reg); + reg_val = ioread32(mmio + limit_reg); + if (reg_val != limit) { + iowrite32(base, mmio + limit_reg); + iowrite32(0, mmio + xlat_reg); + return -EIO; + } + } + + return 0; +} + +static int intel_ntb_link_is_up(struct ntb_dev *ntb, + enum ntb_speed *speed, + enum ntb_width *width) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + if (ndev->reg->link_is_up(ndev)) { + if (speed) + *speed = NTB_LNK_STA_SPEED(ndev->lnk_sta); + if (width) + *width = NTB_LNK_STA_WIDTH(ndev->lnk_sta); + return 1; + } else { + /* TODO MAYBE: is it possible to observe the link speed and + * width while link is training? */ + if (speed) + *speed = NTB_SPEED_NONE; + if (width) + *width = NTB_WIDTH_NONE; + return 0; + } +} + +static int intel_ntb_link_enable(struct ntb_dev *ntb, + enum ntb_speed max_speed, + enum ntb_width max_width) +{ + struct intel_ntb_dev *ndev; + u32 ntb_ctl; + + ndev = container_of(ntb, struct intel_ntb_dev, ntb); + + if (ndev->ntb.topo == NTB_TOPO_SEC) + return -EINVAL; + + dev_dbg(ndev_dev(ndev), + "Enabling link with max_speed %d max_width %d\n", + max_speed, max_width); + if (max_speed != NTB_SPEED_AUTO) + dev_dbg(ndev_dev(ndev), "ignoring max_speed %d\n", max_speed); + if (max_width != NTB_WIDTH_AUTO) + dev_dbg(ndev_dev(ndev), "ignoring max_width %d\n", max_width); + + ntb_ctl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl); + ntb_ctl &= ~(NTB_CTL_DISABLE | NTB_CTL_CFG_LOCK); + ntb_ctl |= NTB_CTL_P2S_BAR2_SNOOP | NTB_CTL_S2P_BAR2_SNOOP; + ntb_ctl |= NTB_CTL_P2S_BAR4_SNOOP | NTB_CTL_S2P_BAR4_SNOOP; + if (ndev->bar4_split) + ntb_ctl |= NTB_CTL_P2S_BAR5_SNOOP | NTB_CTL_S2P_BAR5_SNOOP; + iowrite32(ntb_ctl, ndev->self_mmio + ndev->reg->ntb_ctl); + + return 0; +} + +static int intel_ntb_link_disable(struct ntb_dev *ntb) +{ + struct intel_ntb_dev *ndev; + u32 ntb_cntl; + + ndev = container_of(ntb, struct intel_ntb_dev, ntb); + + if (ndev->ntb.topo == NTB_TOPO_SEC) + return -EINVAL; + + dev_dbg(ndev_dev(ndev), "Disabling link\n"); + + /* Bring NTB link down */ + ntb_cntl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl); + ntb_cntl &= ~(NTB_CTL_P2S_BAR2_SNOOP | NTB_CTL_S2P_BAR2_SNOOP); + ntb_cntl &= ~(NTB_CTL_P2S_BAR4_SNOOP | NTB_CTL_S2P_BAR4_SNOOP); + if (ndev->bar4_split) + ntb_cntl &= ~(NTB_CTL_P2S_BAR5_SNOOP | NTB_CTL_S2P_BAR5_SNOOP); + ntb_cntl |= NTB_CTL_DISABLE | NTB_CTL_CFG_LOCK; + iowrite32(ntb_cntl, ndev->self_mmio + ndev->reg->ntb_ctl); + + return 0; +} + +static int intel_ntb_db_is_unsafe(struct ntb_dev *ntb) +{ + return ndev_ignore_unsafe(ntb_ndev(ntb), NTB_UNSAFE_DB); +} + +static u64 intel_ntb_db_valid_mask(struct ntb_dev *ntb) +{ + return ntb_ndev(ntb)->db_valid_mask; +} + +static int intel_ntb_db_vector_count(struct ntb_dev *ntb) +{ + struct intel_ntb_dev *ndev; + + ndev = container_of(ntb, struct intel_ntb_dev, ntb); + + return ndev->db_vec_count; +} + +static u64 intel_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + if (db_vector < 0 || db_vector > ndev->db_vec_count) + return 0; + + return ndev->db_valid_mask & ndev_vec_mask(ndev, db_vector); +} + +static u64 intel_ntb_db_read(struct ntb_dev *ntb) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_db_read(ndev, + ndev->self_mmio + + ndev->self_reg->db_bell); +} + +static int intel_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_db_write(ndev, db_bits, + ndev->self_mmio + + ndev->self_reg->db_bell); +} + +static int intel_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_db_set_mask(ndev, db_bits, + ndev->self_mmio + + ndev->self_reg->db_mask); +} + +static int intel_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_db_clear_mask(ndev, db_bits, + ndev->self_mmio + + ndev->self_reg->db_mask); +} + +static int intel_ntb_peer_db_addr(struct ntb_dev *ntb, + phys_addr_t *db_addr, + resource_size_t *db_size) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_db_addr(ndev, db_addr, db_size, ndev->peer_addr, + ndev->peer_reg->db_bell); +} + +static int intel_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_db_write(ndev, db_bits, + ndev->peer_mmio + + ndev->peer_reg->db_bell); +} + +static int intel_ntb_spad_is_unsafe(struct ntb_dev *ntb) +{ + return ndev_ignore_unsafe(ntb_ndev(ntb), NTB_UNSAFE_SPAD); +} + +static int intel_ntb_spad_count(struct ntb_dev *ntb) +{ + struct intel_ntb_dev *ndev; + + ndev = container_of(ntb, struct intel_ntb_dev, ntb); + + return ndev->spad_count; +} + +static u32 intel_ntb_spad_read(struct ntb_dev *ntb, int idx) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_spad_read(ndev, idx, + ndev->self_mmio + + ndev->self_reg->spad); +} + +static int intel_ntb_spad_write(struct ntb_dev *ntb, + int idx, u32 val) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_spad_write(ndev, idx, val, + ndev->self_mmio + + ndev->self_reg->spad); +} + +static int intel_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx, + phys_addr_t *spad_addr) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_spad_addr(ndev, idx, spad_addr, ndev->peer_addr, + ndev->peer_reg->spad); +} + +static u32 intel_ntb_peer_spad_read(struct ntb_dev *ntb, int idx) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_spad_read(ndev, idx, + ndev->peer_mmio + + ndev->peer_reg->spad); +} + +static int intel_ntb_peer_spad_write(struct ntb_dev *ntb, + int idx, u32 val) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_spad_write(ndev, idx, val, + ndev->peer_mmio + + ndev->peer_reg->spad); +} + +/* BWD */ + +static u64 bwd_db_ioread(void __iomem *mmio) +{ + return ioread64(mmio); +} + +static void bwd_db_iowrite(u64 bits, void __iomem *mmio) +{ + iowrite64(bits, mmio); +} + +static int bwd_poll_link(struct intel_ntb_dev *ndev) +{ + u32 ntb_ctl; + + ntb_ctl = ioread32(ndev->self_mmio + BWD_NTBCNTL_OFFSET); + + if (ntb_ctl == ndev->ntb_ctl) + return 0; + + ndev->ntb_ctl = ntb_ctl; + + ndev->lnk_sta = ioread32(ndev->self_mmio + BWD_LINK_STATUS_OFFSET); + + return 1; +} + +static int bwd_link_is_up(struct intel_ntb_dev *ndev) +{ + return BWD_NTB_CTL_ACTIVE(ndev->ntb_ctl); +} + +static int bwd_link_is_err(struct intel_ntb_dev *ndev) +{ + if (ioread32(ndev->self_mmio + BWD_LTSSMSTATEJMP_OFFSET) + & BWD_LTSSMSTATEJMP_FORCEDETECT) + return 1; + + if (ioread32(ndev->self_mmio + BWD_IBSTERRRCRVSTS0_OFFSET) + & BWD_IBIST_ERR_OFLOW) + return 1; + + return 0; +} + +static inline enum ntb_topo bwd_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd) +{ + switch (ppd & BWD_PPD_TOPO_MASK) { + case BWD_PPD_TOPO_B2B_USD: + dev_dbg(ndev_dev(ndev), "PPD %d B2B USD\n", ppd); + return NTB_TOPO_B2B_USD; + + case BWD_PPD_TOPO_B2B_DSD: + dev_dbg(ndev_dev(ndev), "PPD %d B2B DSD\n", ppd); + return NTB_TOPO_B2B_DSD; + + case BWD_PPD_TOPO_PRI_USD: + case BWD_PPD_TOPO_PRI_DSD: /* accept bogus PRI_DSD */ + case BWD_PPD_TOPO_SEC_USD: + case BWD_PPD_TOPO_SEC_DSD: /* accept bogus SEC_DSD */ + dev_dbg(ndev_dev(ndev), "PPD %d non B2B disabled\n", ppd); + return NTB_TOPO_NONE; + } + + dev_dbg(ndev_dev(ndev), "PPD %d invalid\n", ppd); + return NTB_TOPO_NONE; +} + +static void bwd_link_hb(struct work_struct *work) +{ + struct intel_ntb_dev *ndev = hb_ndev(work); + unsigned long poll_ts; + void __iomem *mmio; + u32 status32; + + poll_ts = ndev->last_ts + BWD_LINK_HB_TIMEOUT; + + /* Delay polling the link status if an interrupt was received, + * unless the cached link status says the link is down. + */ + if (time_after(poll_ts, jiffies) && bwd_link_is_up(ndev)) { + schedule_delayed_work(&ndev->hb_timer, poll_ts - jiffies); + return; + } + + if (bwd_poll_link(ndev)) + ntb_link_event(&ndev->ntb); + + if (bwd_link_is_up(ndev) || !bwd_link_is_err(ndev)) { + schedule_delayed_work(&ndev->hb_timer, BWD_LINK_HB_TIMEOUT); + return; + } + + /* Link is down with error: recover the link! */ + + mmio = ndev->self_mmio; + + /* Driver resets the NTB ModPhy lanes - magic! */ + iowrite8(0xe0, mmio + BWD_MODPHY_PCSREG6); + iowrite8(0x40, mmio + BWD_MODPHY_PCSREG4); + iowrite8(0x60, mmio + BWD_MODPHY_PCSREG4); + iowrite8(0x60, mmio + BWD_MODPHY_PCSREG6); + + /* Driver waits 100ms to allow the NTB ModPhy to settle */ + msleep(100); + + /* Clear AER Errors, write to clear */ + status32 = ioread32(mmio + BWD_ERRCORSTS_OFFSET); + dev_dbg(ndev_dev(ndev), "ERRCORSTS = %x\n", status32); + status32 &= PCI_ERR_COR_REP_ROLL; + iowrite32(status32, mmio + BWD_ERRCORSTS_OFFSET); + + /* Clear unexpected electrical idle event in LTSSM, write to clear */ + status32 = ioread32(mmio + BWD_LTSSMERRSTS0_OFFSET); + dev_dbg(ndev_dev(ndev), "LTSSMERRSTS0 = %x\n", status32); + status32 |= BWD_LTSSMERRSTS0_UNEXPECTEDEI; + iowrite32(status32, mmio + BWD_LTSSMERRSTS0_OFFSET); + + /* Clear DeSkew Buffer error, write to clear */ + status32 = ioread32(mmio + BWD_DESKEWSTS_OFFSET); + dev_dbg(ndev_dev(ndev), "DESKEWSTS = %x\n", status32); + status32 |= BWD_DESKEWSTS_DBERR; + iowrite32(status32, mmio + BWD_DESKEWSTS_OFFSET); + + status32 = ioread32(mmio + BWD_IBSTERRRCRVSTS0_OFFSET); + dev_dbg(ndev_dev(ndev), "IBSTERRRCRVSTS0 = %x\n", status32); + status32 &= BWD_IBIST_ERR_OFLOW; + iowrite32(status32, mmio + BWD_IBSTERRRCRVSTS0_OFFSET); + + /* Releases the NTB state machine to allow the link to retrain */ + status32 = ioread32(mmio + BWD_LTSSMSTATEJMP_OFFSET); + dev_dbg(ndev_dev(ndev), "LTSSMSTATEJMP = %x\n", status32); + status32 &= ~BWD_LTSSMSTATEJMP_FORCEDETECT; + iowrite32(status32, mmio + BWD_LTSSMSTATEJMP_OFFSET); + + /* There is a potential race between the 2 NTB devices recovering at the + * same time. If the times are the same, the link will not recover and + * the driver will be stuck in this loop forever. Add a random interval + * to the recovery time to prevent this race. + */ + schedule_delayed_work(&ndev->hb_timer, BWD_LINK_RECOVERY_TIME + + prandom_u32() % BWD_LINK_RECOVERY_TIME); +} + +static int bwd_init_isr(struct intel_ntb_dev *ndev) +{ + int rc; + + rc = ndev_init_isr(ndev, 1, BWD_DB_MSIX_VECTOR_COUNT, + BWD_DB_MSIX_VECTOR_SHIFT, BWD_DB_TOTAL_SHIFT); + if (rc) + return rc; + + /* BWD doesn't have link status interrupt, poll on that platform */ + ndev->last_ts = jiffies; + INIT_DELAYED_WORK(&ndev->hb_timer, bwd_link_hb); + schedule_delayed_work(&ndev->hb_timer, BWD_LINK_HB_TIMEOUT); + + return 0; +} + +static void bwd_deinit_isr(struct intel_ntb_dev *ndev) +{ + cancel_delayed_work_sync(&ndev->hb_timer); + ndev_deinit_isr(ndev); +} + +static int bwd_init_ntb(struct intel_ntb_dev *ndev) +{ + ndev->mw_count = BWD_MW_COUNT; + ndev->spad_count = BWD_SPAD_COUNT; + ndev->db_count = BWD_DB_COUNT; + + switch (ndev->ntb.topo) { + case NTB_TOPO_B2B_USD: + case NTB_TOPO_B2B_DSD: + ndev->self_reg = &bwd_pri_reg; + ndev->peer_reg = &bwd_b2b_reg; + ndev->xlat_reg = &bwd_sec_xlat; + + /* Enable Bus Master and Memory Space on the secondary side */ + iowrite16(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER, + ndev->self_mmio + BWD_SPCICMD_OFFSET); + + break; + + default: + return -EINVAL; + } + + ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1; + + return 0; +} + +static int bwd_init_dev(struct intel_ntb_dev *ndev) +{ + u32 ppd; + int rc; + + rc = pci_read_config_dword(ndev->ntb.pdev, BWD_PPD_OFFSET, &ppd); + if (rc) + return -EIO; + + ndev->ntb.topo = bwd_ppd_topo(ndev, ppd); + if (ndev->ntb.topo == NTB_TOPO_NONE) + return -EINVAL; + + rc = bwd_init_ntb(ndev); + if (rc) + return rc; + + rc = bwd_init_isr(ndev); + if (rc) + return rc; + + if (ndev->ntb.topo != NTB_TOPO_SEC) { + /* Initiate PCI-E link training */ + rc = pci_write_config_dword(ndev->ntb.pdev, BWD_PPD_OFFSET, + ppd | BWD_PPD_INIT_LINK); + if (rc) + return rc; + } + + return 0; +} + +static void bwd_deinit_dev(struct intel_ntb_dev *ndev) +{ + bwd_deinit_isr(ndev); +} + +/* SNB */ + +static u64 snb_db_ioread(void __iomem *mmio) +{ + return (u64)ioread16(mmio); +} + +static void snb_db_iowrite(u64 bits, void __iomem *mmio) +{ + iowrite16((u16)bits, mmio); +} + +static int snb_poll_link(struct intel_ntb_dev *ndev) +{ + u16 reg_val; + int rc; + + ndev->reg->db_iowrite(ndev->db_link_mask, + ndev->self_mmio + + ndev->self_reg->db_bell); + + rc = pci_read_config_word(ndev->ntb.pdev, + SNB_LINK_STATUS_OFFSET, ®_val); + if (rc) + return 0; + + if (reg_val == ndev->lnk_sta) + return 0; + + ndev->lnk_sta = reg_val; + + return 1; +} + +static int snb_link_is_up(struct intel_ntb_dev *ndev) +{ + return NTB_LNK_STA_ACTIVE(ndev->lnk_sta); +} + +static inline enum ntb_topo snb_ppd_topo(struct intel_ntb_dev *ndev, u8 ppd) +{ + switch (ppd & SNB_PPD_TOPO_MASK) { + case SNB_PPD_TOPO_B2B_USD: + return NTB_TOPO_B2B_USD; + + case SNB_PPD_TOPO_B2B_DSD: + return NTB_TOPO_B2B_DSD; + + case SNB_PPD_TOPO_PRI_USD: + case SNB_PPD_TOPO_PRI_DSD: /* accept bogus PRI_DSD */ + return NTB_TOPO_PRI; + + case SNB_PPD_TOPO_SEC_USD: + case SNB_PPD_TOPO_SEC_DSD: /* accept bogus SEC_DSD */ + return NTB_TOPO_SEC; + } + + return NTB_TOPO_NONE; +} + +static inline int snb_ppd_bar4_split(struct intel_ntb_dev *ndev, u8 ppd) +{ + if (ppd & SNB_PPD_SPLIT_BAR_MASK) { + dev_dbg(ndev_dev(ndev), "PPD %d split bar\n", ppd); + return 1; + } + return 0; +} + +static int snb_init_isr(struct intel_ntb_dev *ndev) +{ + return ndev_init_isr(ndev, SNB_DB_MSIX_VECTOR_COUNT, + SNB_DB_MSIX_VECTOR_COUNT, + SNB_DB_MSIX_VECTOR_SHIFT, + SNB_DB_TOTAL_SHIFT); +} + +static void snb_deinit_isr(struct intel_ntb_dev *ndev) +{ + ndev_deinit_isr(ndev); +} + +static int snb_setup_b2b_mw(struct intel_ntb_dev *ndev, + const struct intel_b2b_addr *addr, + const struct intel_b2b_addr *peer_addr) +{ + struct pci_dev *pdev; + void __iomem *mmio; + resource_size_t bar_size; + phys_addr_t bar_addr; + int b2b_bar; + u8 bar_sz; + + pdev = ndev_pdev(ndev); + mmio = ndev->self_mmio; + + if (ndev->b2b_idx >= ndev->mw_count) { + dev_dbg(ndev_dev(ndev), "not using b2b mw\n"); + b2b_bar = 0; + ndev->b2b_off = 0; + } else { + b2b_bar = ndev_mw_to_bar(ndev, ndev->b2b_idx); + if (b2b_bar < 0) + return -EIO; + + dev_dbg(ndev_dev(ndev), "using b2b mw bar %d\n", b2b_bar); + + bar_size = pci_resource_len(ndev->ntb.pdev, b2b_bar); + + dev_dbg(ndev_dev(ndev), "b2b bar size %#llx\n", bar_size); + + if (b2b_mw_share && SNB_B2B_MIN_SIZE <= bar_size >> 1) { + dev_dbg(ndev_dev(ndev), + "b2b using first half of bar\n"); + ndev->b2b_off = bar_size >> 1; + } else if (SNB_B2B_MIN_SIZE <= bar_size) { + dev_dbg(ndev_dev(ndev), + "b2b using whole bar\n"); + ndev->b2b_off = 0; + --ndev->mw_count; + } else { + dev_dbg(ndev_dev(ndev), + "b2b bar size is too small\n"); + return -EIO; + } + } + + /* Reset the secondary bar sizes to match the primary bar sizes, + * except disable or halve the size of the b2b secondary bar. + * + * Note: code for each specific bar size register, because the register + * offsets are not in a consistent order (bar5sz comes after ppd, odd). + */ + pci_read_config_byte(pdev, SNB_PBAR23SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "PBAR23SZ %#x\n", bar_sz); + if (b2b_bar == 2) { + if (ndev->b2b_off) + bar_sz -= 1; + else + bar_sz = 0; + } + pci_write_config_byte(pdev, SNB_SBAR23SZ_OFFSET, bar_sz); + pci_read_config_byte(pdev, SNB_SBAR23SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "SBAR23SZ %#x\n", bar_sz); + + if (!ndev->bar4_split) { + pci_read_config_byte(pdev, SNB_PBAR45SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "PBAR45SZ %#x\n", bar_sz); + if (b2b_bar == 4) { + if (ndev->b2b_off) + bar_sz -= 1; + else + bar_sz = 0; + } + pci_write_config_byte(pdev, SNB_SBAR45SZ_OFFSET, bar_sz); + pci_read_config_byte(pdev, SNB_SBAR45SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "SBAR45SZ %#x\n", bar_sz); + } else { + pci_read_config_byte(pdev, SNB_PBAR4SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "PBAR4SZ %#x\n", bar_sz); + if (b2b_bar == 4) { + if (ndev->b2b_off) + bar_sz -= 1; + else + bar_sz = 0; + } + pci_write_config_byte(pdev, SNB_SBAR4SZ_OFFSET, bar_sz); + pci_read_config_byte(pdev, SNB_SBAR4SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "SBAR4SZ %#x\n", bar_sz); + + pci_read_config_byte(pdev, SNB_PBAR5SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "PBAR5SZ %#x\n", bar_sz); + if (b2b_bar == 5) { + if (ndev->b2b_off) + bar_sz -= 1; + else + bar_sz = 0; + } + pci_write_config_byte(pdev, SNB_SBAR5SZ_OFFSET, bar_sz); + pci_read_config_byte(pdev, SNB_SBAR5SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "SBAR5SZ %#x\n", bar_sz); + } + + /* SBAR01 hit by first part of the b2b bar */ + if (b2b_bar == 0) + bar_addr = addr->bar0_addr; + else if (b2b_bar == 2) + bar_addr = addr->bar2_addr64; + else if (b2b_bar == 4 && !ndev->bar4_split) + bar_addr = addr->bar4_addr64; + else if (b2b_bar == 4) + bar_addr = addr->bar4_addr32; + else if (b2b_bar == 5) + bar_addr = addr->bar5_addr32; + else + return -EIO; + + dev_dbg(ndev_dev(ndev), "SBAR01 %#018llx\n", bar_addr); + iowrite64(bar_addr, mmio + SNB_SBAR0BASE_OFFSET); + + /* Other SBAR are normally hit by the PBAR xlat, except for b2b bar. + * The b2b bar is either disabled above, or configured half-size, and + * it starts at the PBAR xlat + offset. + */ + + bar_addr = addr->bar2_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0); + iowrite64(bar_addr, mmio + SNB_SBAR23BASE_OFFSET); + bar_addr = ioread64(mmio + SNB_SBAR23BASE_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR23 %#018llx\n", bar_addr); + + if (!ndev->bar4_split) { + bar_addr = addr->bar4_addr64 + + (b2b_bar == 4 ? ndev->b2b_off : 0); + iowrite64(bar_addr, mmio + SNB_SBAR45BASE_OFFSET); + bar_addr = ioread64(mmio + SNB_SBAR45BASE_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR45 %#018llx\n", bar_addr); + } else { + bar_addr = addr->bar4_addr32 + + (b2b_bar == 4 ? ndev->b2b_off : 0); + iowrite32(bar_addr, mmio + SNB_SBAR4BASE_OFFSET); + bar_addr = ioread32(mmio + SNB_SBAR4BASE_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR4 %#010llx\n", bar_addr); + + bar_addr = addr->bar5_addr32 + + (b2b_bar == 5 ? ndev->b2b_off : 0); + iowrite32(bar_addr, mmio + SNB_SBAR5BASE_OFFSET); + bar_addr = ioread32(mmio + SNB_SBAR5BASE_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR5 %#010llx\n", bar_addr); + } + + /* setup incoming bar limits == base addrs (zero length windows) */ + + bar_addr = addr->bar2_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0); + iowrite64(bar_addr, mmio + SNB_SBAR23LMT_OFFSET); + bar_addr = ioread64(mmio + SNB_SBAR23LMT_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR23LMT %#018llx\n", bar_addr); + + if (!ndev->bar4_split) { + bar_addr = addr->bar4_addr64 + + (b2b_bar == 4 ? ndev->b2b_off : 0); + iowrite64(bar_addr, mmio + SNB_SBAR45LMT_OFFSET); + bar_addr = ioread64(mmio + SNB_SBAR45LMT_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR45LMT %#018llx\n", bar_addr); + } else { + bar_addr = addr->bar4_addr32 + + (b2b_bar == 4 ? ndev->b2b_off : 0); + iowrite32(bar_addr, mmio + SNB_SBAR4LMT_OFFSET); + bar_addr = ioread32(mmio + SNB_SBAR4LMT_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR4LMT %#010llx\n", bar_addr); + + bar_addr = addr->bar5_addr32 + + (b2b_bar == 5 ? ndev->b2b_off : 0); + iowrite32(bar_addr, mmio + SNB_SBAR5LMT_OFFSET); + bar_addr = ioread32(mmio + SNB_SBAR5LMT_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR5LMT %#05llx\n", bar_addr); + } + + /* zero incoming translation addrs */ + iowrite64(0, mmio + SNB_SBAR23XLAT_OFFSET); + + if (!ndev->bar4_split) { + iowrite64(0, mmio + SNB_SBAR45XLAT_OFFSET); + } else { + iowrite32(0, mmio + SNB_SBAR4XLAT_OFFSET); + iowrite32(0, mmio + SNB_SBAR5XLAT_OFFSET); + } + + /* zero outgoing translation limits (whole bar size windows) */ + iowrite64(0, mmio + SNB_PBAR23LMT_OFFSET); + if (!ndev->bar4_split) { + iowrite64(0, mmio + SNB_PBAR45LMT_OFFSET); + } else { + iowrite32(0, mmio + SNB_PBAR4LMT_OFFSET); + iowrite32(0, mmio + SNB_PBAR5LMT_OFFSET); + } + + /* set outgoing translation offsets */ + bar_addr = peer_addr->bar2_addr64; + iowrite64(bar_addr, mmio + SNB_PBAR23XLAT_OFFSET); + bar_addr = ioread64(mmio + SNB_PBAR23XLAT_OFFSET); + dev_dbg(ndev_dev(ndev), "PBAR23XLAT %#018llx\n", bar_addr); + + if (!ndev->bar4_split) { + bar_addr = peer_addr->bar4_addr64; + iowrite64(bar_addr, mmio + SNB_PBAR45XLAT_OFFSET); + bar_addr = ioread64(mmio + SNB_PBAR45XLAT_OFFSET); + dev_dbg(ndev_dev(ndev), "PBAR45XLAT %#018llx\n", bar_addr); + } else { + bar_addr = peer_addr->bar4_addr32; + iowrite32(bar_addr, mmio + SNB_PBAR4XLAT_OFFSET); + bar_addr = ioread32(mmio + SNB_PBAR4XLAT_OFFSET); + dev_dbg(ndev_dev(ndev), "PBAR4XLAT %#010llx\n", bar_addr); + + bar_addr = peer_addr->bar5_addr32; + iowrite32(bar_addr, mmio + SNB_PBAR5XLAT_OFFSET); + bar_addr = ioread32(mmio + SNB_PBAR5XLAT_OFFSET); + dev_dbg(ndev_dev(ndev), "PBAR5XLAT %#010llx\n", bar_addr); + } + + /* set the translation offset for b2b registers */ + if (b2b_bar == 0) + bar_addr = peer_addr->bar0_addr; + else if (b2b_bar == 2) + bar_addr = peer_addr->bar2_addr64; + else if (b2b_bar == 4 && !ndev->bar4_split) + bar_addr = peer_addr->bar4_addr64; + else if (b2b_bar == 4) + bar_addr = peer_addr->bar4_addr32; + else if (b2b_bar == 5) + bar_addr = peer_addr->bar5_addr32; + else + return -EIO; + + /* B2B_XLAT_OFFSET is 64bit, but can only take 32bit writes */ + dev_dbg(ndev_dev(ndev), "B2BXLAT %#018llx\n", bar_addr); + iowrite32(bar_addr, mmio + SNB_B2B_XLAT_OFFSETL); + iowrite32(bar_addr >> 32, mmio + SNB_B2B_XLAT_OFFSETU); + + if (b2b_bar) { + /* map peer ntb mmio config space registers */ + ndev->peer_mmio = pci_iomap(pdev, b2b_bar, + SNB_B2B_MIN_SIZE); + if (!ndev->peer_mmio) + return -EIO; + } + + return 0; +} + +static int snb_init_ntb(struct intel_ntb_dev *ndev) +{ + int rc; + + if (ndev->bar4_split) + ndev->mw_count = HSX_SPLIT_BAR_MW_COUNT; + else + ndev->mw_count = SNB_MW_COUNT; + + ndev->spad_count = SNB_SPAD_COUNT; + ndev->db_count = SNB_DB_COUNT; + ndev->db_link_mask = SNB_DB_LINK_BIT; + + switch (ndev->ntb.topo) { + case NTB_TOPO_PRI: + if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) { + dev_err(ndev_dev(ndev), "NTB Primary config disabled\n"); + return -EINVAL; + } + /* use half the spads for the peer */ + ndev->spad_count >>= 1; + ndev->self_reg = &snb_pri_reg; + ndev->peer_reg = &snb_sec_reg; + ndev->xlat_reg = &snb_sec_xlat; + break; + + case NTB_TOPO_SEC: + if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) { + dev_err(ndev_dev(ndev), "NTB Secondary config disabled\n"); + return -EINVAL; + } + /* use half the spads for the peer */ + ndev->spad_count >>= 1; + ndev->self_reg = &snb_sec_reg; + ndev->peer_reg = &snb_pri_reg; + ndev->xlat_reg = &snb_pri_xlat; + break; + + case NTB_TOPO_B2B_USD: + case NTB_TOPO_B2B_DSD: + ndev->self_reg = &snb_pri_reg; + ndev->peer_reg = &snb_b2b_reg; + ndev->xlat_reg = &snb_sec_xlat; + + if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) { + ndev->peer_reg = &snb_pri_reg; + + if (b2b_mw_idx < 0) + ndev->b2b_idx = b2b_mw_idx + ndev->mw_count; + else + ndev->b2b_idx = b2b_mw_idx; + + dev_dbg(ndev_dev(ndev), + "setting up b2b mw idx %d means %d\n", + b2b_mw_idx, ndev->b2b_idx); + + } else if (ndev->hwerr_flags & NTB_HWERR_B2BDOORBELL_BIT14) { + dev_warn(ndev_dev(ndev), "Reduce doorbell count by 1\n"); + ndev->db_count -= 1; + } + + if (ndev->ntb.topo == NTB_TOPO_B2B_USD) { + rc = snb_setup_b2b_mw(ndev, + &snb_b2b_dsd_addr, + &snb_b2b_usd_addr); + } else { + rc = snb_setup_b2b_mw(ndev, + &snb_b2b_usd_addr, + &snb_b2b_dsd_addr); + } + if (rc) + return rc; + + /* Enable Bus Master and Memory Space on the secondary side */ + iowrite16(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER, + ndev->self_mmio + SNB_SPCICMD_OFFSET); + + break; + + default: + return -EINVAL; + } + + ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1; + + ndev->reg->db_iowrite(ndev->db_valid_mask, + ndev->self_mmio + + ndev->self_reg->db_mask); + + return 0; +} + +static int snb_init_dev(struct intel_ntb_dev *ndev) +{ + struct pci_dev *pdev; + u8 ppd; + int rc, mem; + + /* There is a Xeon hardware errata related to writes to SDOORBELL or + * B2BDOORBELL in conjunction with inbound access to NTB MMIO Space, + * which may hang the system. To workaround this use the second memory + * window to access the interrupt and scratch pad registers on the + * remote system. + */ + ndev->hwerr_flags |= NTB_HWERR_SDOORBELL_LOCKUP; + + /* There is a hardware errata related to accessing any register in + * SB01BASE in the presence of bidirectional traffic crossing the NTB. + */ + ndev->hwerr_flags |= NTB_HWERR_SB01BASE_LOCKUP; + + /* HW Errata on bit 14 of b2bdoorbell register. Writes will not be + * mirrored to the remote system. Shrink the number of bits by one, + * since bit 14 is the last bit. + */ + ndev->hwerr_flags |= NTB_HWERR_B2BDOORBELL_BIT14; + + ndev->reg = &snb_reg; + + pdev = ndev_pdev(ndev); + + rc = pci_read_config_byte(pdev, SNB_PPD_OFFSET, &ppd); + if (rc) + return -EIO; + + ndev->ntb.topo = snb_ppd_topo(ndev, ppd); + dev_dbg(ndev_dev(ndev), "ppd %#x topo %s\n", ppd, + ntb_topo_string(ndev->ntb.topo)); + if (ndev->ntb.topo == NTB_TOPO_NONE) + return -EINVAL; + + if (ndev->ntb.topo != NTB_TOPO_SEC) { + ndev->bar4_split = snb_ppd_bar4_split(ndev, ppd); + dev_dbg(ndev_dev(ndev), "ppd %#x bar4_split %d\n", + ppd, ndev->bar4_split); + } else { + /* This is a way for transparent BAR to figure out if we are + * doing split BAR or not. There is no way for the hw on the + * transparent side to know and set the PPD. + */ + mem = pci_select_bars(pdev, IORESOURCE_MEM); + ndev->bar4_split = hweight32(mem) == + HSX_SPLIT_BAR_MW_COUNT + 1; + dev_dbg(ndev_dev(ndev), "mem %#x bar4_split %d\n", + mem, ndev->bar4_split); + } + + rc = snb_init_ntb(ndev); + if (rc) + return rc; + + return snb_init_isr(ndev); +} + +static void snb_deinit_dev(struct intel_ntb_dev *ndev) +{ + snb_deinit_isr(ndev); +} + +static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev) +{ + int rc; + + pci_set_drvdata(pdev, ndev); + + rc = pci_enable_device(pdev); + if (rc) + goto err_pci_enable; + + rc = pci_request_regions(pdev, NTB_NAME); + if (rc) + goto err_pci_regions; + + pci_set_master(pdev); + + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (rc) { + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (rc) + goto err_dma_mask; + dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n"); + } + + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (rc) { + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (rc) + goto err_dma_mask; + dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n"); + } + + ndev->self_mmio = pci_iomap(pdev, 0, 0); + if (!ndev->self_mmio) { + rc = -EIO; + goto err_mmio; + } + ndev->peer_mmio = ndev->self_mmio; + + return 0; + +err_mmio: +err_dma_mask: + pci_clear_master(pdev); + pci_release_regions(pdev); +err_pci_regions: + pci_disable_device(pdev); +err_pci_enable: + pci_set_drvdata(pdev, NULL); + return rc; +} + +static void intel_ntb_deinit_pci(struct intel_ntb_dev *ndev) +{ + struct pci_dev *pdev = ndev_pdev(ndev); + + if (ndev->peer_mmio && ndev->peer_mmio != ndev->self_mmio) + pci_iounmap(pdev, ndev->peer_mmio); + pci_iounmap(pdev, ndev->self_mmio); + + pci_clear_master(pdev); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +static inline void ndev_init_struct(struct intel_ntb_dev *ndev, + struct pci_dev *pdev) +{ + ndev->ntb.pdev = pdev; + ndev->ntb.topo = NTB_TOPO_NONE; + ndev->ntb.ops = &intel_ntb_ops; + + ndev->b2b_off = 0; + ndev->b2b_idx = INT_MAX; + + ndev->bar4_split = 0; + + ndev->mw_count = 0; + ndev->spad_count = 0; + ndev->db_count = 0; + ndev->db_vec_count = 0; + ndev->db_vec_shift = 0; + + ndev->ntb_ctl = 0; + ndev->lnk_sta = 0; + + ndev->db_valid_mask = 0; + ndev->db_link_mask = 0; + ndev->db_mask = 0; + + spin_lock_init(&ndev->db_mask_lock); +} + +static int intel_ntb_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct intel_ntb_dev *ndev; + int rc; + + if (pdev_is_bwd(pdev)) { + ndev = kzalloc(sizeof(*ndev), GFP_KERNEL); + if (!ndev) { + rc = -ENOMEM; + goto err_ndev; + } + + ndev_init_struct(ndev, pdev); + + rc = intel_ntb_init_pci(ndev, pdev); + if (rc) + goto err_init_pci; + + rc = bwd_init_dev(ndev); + if (rc) + goto err_init_dev; + + } else if (pdev_is_snb(pdev)) { + ndev = kzalloc(sizeof(*ndev), GFP_KERNEL); + if (!ndev) { + rc = -ENOMEM; + goto err_ndev; + } + + ndev_init_struct(ndev, pdev); + + rc = intel_ntb_init_pci(ndev, pdev); + if (rc) + goto err_init_pci; + + rc = snb_init_dev(ndev); + if (rc) + goto err_init_dev; + + } else { + rc = -EINVAL; + goto err_ndev; + } + + ndev_reset_unsafe_flags(ndev); + + ndev->reg->poll_link(ndev); + + ndev_init_debugfs(ndev); + + rc = ntb_register_device(&ndev->ntb); + if (rc) + goto err_register; + + return 0; + +err_register: + ndev_deinit_debugfs(ndev); + if (pdev_is_bwd(pdev)) + bwd_deinit_dev(ndev); + else if (pdev_is_snb(pdev)) + snb_deinit_dev(ndev); +err_init_dev: + intel_ntb_deinit_pci(ndev); +err_init_pci: + kfree(ndev); +err_ndev: + return rc; +} + +static void intel_ntb_pci_remove(struct pci_dev *pdev) +{ + struct intel_ntb_dev *ndev = pci_get_drvdata(pdev); + + ntb_unregister_device(&ndev->ntb); + ndev_deinit_debugfs(ndev); + if (pdev_is_bwd(pdev)) + bwd_deinit_dev(ndev); + else if (pdev_is_snb(pdev)) + snb_deinit_dev(ndev); + intel_ntb_deinit_pci(ndev); + kfree(ndev); +} + +static const struct intel_ntb_reg bwd_reg = { + .poll_link = bwd_poll_link, + .link_is_up = bwd_link_is_up, + .db_ioread = bwd_db_ioread, + .db_iowrite = bwd_db_iowrite, + .db_size = sizeof(u64), + .ntb_ctl = BWD_NTBCNTL_OFFSET, + .mw_bar = {2, 4}, +}; + +static const struct intel_ntb_alt_reg bwd_pri_reg = { + .db_bell = BWD_PDOORBELL_OFFSET, + .db_mask = BWD_PDBMSK_OFFSET, + .spad = BWD_SPAD_OFFSET, +}; + +static const struct intel_ntb_alt_reg bwd_b2b_reg = { + .db_bell = BWD_B2B_DOORBELL_OFFSET, + .spad = BWD_B2B_SPAD_OFFSET, +}; + +static const struct intel_ntb_xlat_reg bwd_sec_xlat = { + /* FIXME : .bar0_base = BWD_SBAR0BASE_OFFSET, */ + /* FIXME : .bar2_limit = BWD_SBAR2LMT_OFFSET, */ + .bar2_xlat = BWD_SBAR2XLAT_OFFSET, +}; + +static const struct intel_ntb_reg snb_reg = { + .poll_link = snb_poll_link, + .link_is_up = snb_link_is_up, + .db_ioread = snb_db_ioread, + .db_iowrite = snb_db_iowrite, + .db_size = sizeof(u32), + .ntb_ctl = SNB_NTBCNTL_OFFSET, + .mw_bar = {2, 4, 5}, +}; + +static const struct intel_ntb_alt_reg snb_pri_reg = { + .db_bell = SNB_PDOORBELL_OFFSET, + .db_mask = SNB_PDBMSK_OFFSET, + .spad = SNB_SPAD_OFFSET, +}; + +static const struct intel_ntb_alt_reg snb_sec_reg = { + .db_bell = SNB_SDOORBELL_OFFSET, + .db_mask = SNB_SDBMSK_OFFSET, + /* second half of the scratchpads */ + .spad = SNB_SPAD_OFFSET + (SNB_SPAD_COUNT << 1), +}; + +static const struct intel_ntb_alt_reg snb_b2b_reg = { + .db_bell = SNB_B2B_DOORBELL_OFFSET, + .spad = SNB_B2B_SPAD_OFFSET, +}; + +static const struct intel_ntb_xlat_reg snb_pri_xlat = { + /* Note: no primary .bar0_base visible to the secondary side. + * + * The secondary side cannot get the base address stored in primary + * bars. The base address is necessary to set the limit register to + * any value other than zero, or unlimited. + * + * WITHOUT THE BASE ADDRESS, THE SECONDARY SIDE CANNOT DISABLE the + * window by setting the limit equal to base, nor can it limit the size + * of the memory window by setting the limit to base + size. + */ + .bar2_limit = SNB_PBAR23LMT_OFFSET, + .bar2_xlat = SNB_PBAR23XLAT_OFFSET, +}; + +static const struct intel_ntb_xlat_reg snb_sec_xlat = { + .bar0_base = SNB_SBAR0BASE_OFFSET, + .bar2_limit = SNB_SBAR23LMT_OFFSET, + .bar2_xlat = SNB_SBAR23XLAT_OFFSET, +}; + +static const struct intel_b2b_addr snb_b2b_usd_addr = { + .bar2_addr64 = SNB_B2B_BAR2_USD_ADDR64, + .bar4_addr64 = SNB_B2B_BAR4_USD_ADDR64, + .bar4_addr32 = SNB_B2B_BAR4_USD_ADDR32, + .bar5_addr32 = SNB_B2B_BAR5_USD_ADDR32, +}; + +static const struct intel_b2b_addr snb_b2b_dsd_addr = { + .bar2_addr64 = SNB_B2B_BAR2_DSD_ADDR64, + .bar4_addr64 = SNB_B2B_BAR4_DSD_ADDR64, + .bar4_addr32 = SNB_B2B_BAR4_DSD_ADDR32, + .bar5_addr32 = SNB_B2B_BAR5_DSD_ADDR32, +}; + +/* operations for primary side of local ntb */ +static const struct ntb_dev_ops intel_ntb_ops = { + .mw_count = intel_ntb_mw_count, + .mw_get_range = intel_ntb_mw_get_range, + .mw_set_trans = intel_ntb_mw_set_trans, + .link_is_up = intel_ntb_link_is_up, + .link_enable = intel_ntb_link_enable, + .link_disable = intel_ntb_link_disable, + .db_is_unsafe = intel_ntb_db_is_unsafe, + .db_valid_mask = intel_ntb_db_valid_mask, + .db_vector_count = intel_ntb_db_vector_count, + .db_vector_mask = intel_ntb_db_vector_mask, + .db_read = intel_ntb_db_read, + .db_clear = intel_ntb_db_clear, + .db_set_mask = intel_ntb_db_set_mask, + .db_clear_mask = intel_ntb_db_clear_mask, + .peer_db_addr = intel_ntb_peer_db_addr, + .peer_db_set = intel_ntb_peer_db_set, + .spad_is_unsafe = intel_ntb_spad_is_unsafe, + .spad_count = intel_ntb_spad_count, + .spad_read = intel_ntb_spad_read, + .spad_write = intel_ntb_spad_write, + .peer_spad_addr = intel_ntb_peer_spad_addr, + .peer_spad_read = intel_ntb_peer_spad_read, + .peer_spad_write = intel_ntb_peer_spad_write, +}; + +static const struct file_operations intel_ntb_debugfs_info = { + .owner = THIS_MODULE, + .open = simple_open, + .read = ndev_debugfs_read, +}; + +static const struct pci_device_id intel_ntb_pci_tbl[] = { {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BWD)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_JSF)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)}, @@ -102,1793 +2087,29 @@ static const struct pci_device_id ntb_pci_tbl[] = { {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)}, {0} }; -MODULE_DEVICE_TABLE(pci, ntb_pci_tbl); +MODULE_DEVICE_TABLE(pci, intel_ntb_pci_tbl); -static int is_ntb_xeon(struct ntb_device *ndev) -{ - switch (ndev->pdev->device) { - case PCI_DEVICE_ID_INTEL_NTB_SS_JSF: - case PCI_DEVICE_ID_INTEL_NTB_SS_SNB: - case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: - case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: - case PCI_DEVICE_ID_INTEL_NTB_PS_JSF: - case PCI_DEVICE_ID_INTEL_NTB_PS_SNB: - case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: - case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: - case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF: - case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB: - case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: - case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: - return 1; - default: - return 0; - } - - return 0; -} - -static int is_ntb_atom(struct ntb_device *ndev) -{ - switch (ndev->pdev->device) { - case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD: - return 1; - default: - return 0; - } - - return 0; -} - -static void ntb_set_errata_flags(struct ntb_device *ndev) -{ - switch (ndev->pdev->device) { - /* - * this workaround applies to all platform up to IvyBridge - * Haswell has splitbar support and use a different workaround - */ - case PCI_DEVICE_ID_INTEL_NTB_SS_JSF: - case PCI_DEVICE_ID_INTEL_NTB_SS_SNB: - case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: - case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: - case PCI_DEVICE_ID_INTEL_NTB_PS_JSF: - case PCI_DEVICE_ID_INTEL_NTB_PS_SNB: - case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: - case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: - case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF: - case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB: - case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: - case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: - ndev->wa_flags |= WA_SNB_ERR; - break; - } -} - -/** - * ntb_register_event_callback() - register event callback - * @ndev: pointer to ntb_device instance - * @func: callback function to register - * - * This function registers a callback for any HW driver events such as link - * up/down, power management notices and etc. - * - * RETURNS: An appropriate -ERRNO error value on error, or zero for success. - */ -int ntb_register_event_callback(struct ntb_device *ndev, - void (*func)(void *handle, - enum ntb_hw_event event)) -{ - if (ndev->event_cb) - return -EINVAL; - - ndev->event_cb = func; - - return 0; -} - -/** - * ntb_unregister_event_callback() - unregisters the event callback - * @ndev: pointer to ntb_device instance - * - * This function unregisters the existing callback from transport - */ -void ntb_unregister_event_callback(struct ntb_device *ndev) -{ - ndev->event_cb = NULL; -} - -static void ntb_irq_work(unsigned long data) -{ - struct ntb_db_cb *db_cb = (struct ntb_db_cb *)data; - int rc; - - rc = db_cb->callback(db_cb->data, db_cb->db_num); - if (rc) - tasklet_schedule(&db_cb->irq_work); - else { - struct ntb_device *ndev = db_cb->ndev; - unsigned long mask; - - mask = readw(ndev->reg_ofs.ldb_mask); - clear_bit(db_cb->db_num * ndev->bits_per_vector, &mask); - writew(mask, ndev->reg_ofs.ldb_mask); - } -} - -/** - * ntb_register_db_callback() - register a callback for doorbell interrupt - * @ndev: pointer to ntb_device instance - * @idx: doorbell index to register callback, zero based - * @data: pointer to be returned to caller with every callback - * @func: callback function to register - * - * This function registers a callback function for the doorbell interrupt - * on the primary side. The function will unmask the doorbell as well to - * allow interrupt. - * - * RETURNS: An appropriate -ERRNO error value on error, or zero for success. - */ -int ntb_register_db_callback(struct ntb_device *ndev, unsigned int idx, - void *data, int (*func)(void *data, int db_num)) -{ - unsigned long mask; - - if (idx >= ndev->max_cbs || ndev->db_cb[idx].callback) { - dev_warn(&ndev->pdev->dev, "Invalid Index.\n"); - return -EINVAL; - } - - ndev->db_cb[idx].callback = func; - ndev->db_cb[idx].data = data; - ndev->db_cb[idx].ndev = ndev; - - tasklet_init(&ndev->db_cb[idx].irq_work, ntb_irq_work, - (unsigned long) &ndev->db_cb[idx]); - - /* unmask interrupt */ - mask = readw(ndev->reg_ofs.ldb_mask); - clear_bit(idx * ndev->bits_per_vector, &mask); - writew(mask, ndev->reg_ofs.ldb_mask); - - return 0; -} - -/** - * ntb_unregister_db_callback() - unregister a callback for doorbell interrupt - * @ndev: pointer to ntb_device instance - * @idx: doorbell index to register callback, zero based - * - * This function unregisters a callback function for the doorbell interrupt - * on the primary side. The function will also mask the said doorbell. - */ -void ntb_unregister_db_callback(struct ntb_device *ndev, unsigned int idx) -{ - unsigned long mask; - - if (idx >= ndev->max_cbs || !ndev->db_cb[idx].callback) - return; - - mask = readw(ndev->reg_ofs.ldb_mask); - set_bit(idx * ndev->bits_per_vector, &mask); - writew(mask, ndev->reg_ofs.ldb_mask); - - tasklet_disable(&ndev->db_cb[idx].irq_work); - - ndev->db_cb[idx].callback = NULL; -} - -/** - * ntb_find_transport() - find the transport pointer - * @transport: pointer to pci device - * - * Given the pci device pointer, return the transport pointer passed in when - * the transport attached when it was inited. - * - * RETURNS: pointer to transport. - */ -void *ntb_find_transport(struct pci_dev *pdev) -{ - struct ntb_device *ndev = pci_get_drvdata(pdev); - return ndev->ntb_transport; -} - -/** - * ntb_register_transport() - Register NTB transport with NTB HW driver - * @transport: transport identifier - * - * This function allows a transport to reserve the hardware driver for - * NTB usage. - * - * RETURNS: pointer to ntb_device, NULL on error. - */ -struct ntb_device *ntb_register_transport(struct pci_dev *pdev, void *transport) -{ - struct ntb_device *ndev = pci_get_drvdata(pdev); - - if (ndev->ntb_transport) - return NULL; - - ndev->ntb_transport = transport; - return ndev; -} - -/** - * ntb_unregister_transport() - Unregister the transport with the NTB HW driver - * @ndev - ntb_device of the transport to be freed - * - * This function unregisters the transport from the HW driver and performs any - * necessary cleanups. - */ -void ntb_unregister_transport(struct ntb_device *ndev) -{ - int i; - - if (!ndev->ntb_transport) - return; - - for (i = 0; i < ndev->max_cbs; i++) - ntb_unregister_db_callback(ndev, i); - - ntb_unregister_event_callback(ndev); - ndev->ntb_transport = NULL; -} - -/** - * ntb_write_local_spad() - write to the secondary scratchpad register - * @ndev: pointer to ntb_device instance - * @idx: index to the scratchpad register, 0 based - * @val: the data value to put into the register - * - * This function allows writing of a 32bit value to the indexed scratchpad - * register. This writes over the data mirrored to the local scratchpad register - * by the remote system. - * - * RETURNS: An appropriate -ERRNO error value on error, or zero for success. - */ -int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val) -{ - if (idx >= ndev->limits.max_spads) - return -EINVAL; - - dev_dbg(&ndev->pdev->dev, "Writing %x to local scratch pad index %d\n", - val, idx); - writel(val, ndev->reg_ofs.spad_read + idx * 4); - - return 0; -} - -/** - * ntb_read_local_spad() - read from the primary scratchpad register - * @ndev: pointer to ntb_device instance - * @idx: index to scratchpad register, 0 based - * @val: pointer to 32bit integer for storing the register value - * - * This function allows reading of the 32bit scratchpad register on - * the primary (internal) side. This allows the local system to read data - * written and mirrored to the scratchpad register by the remote system. - * - * RETURNS: An appropriate -ERRNO error value on error, or zero for success. - */ -int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val) -{ - if (idx >= ndev->limits.max_spads) - return -EINVAL; - - *val = readl(ndev->reg_ofs.spad_write + idx * 4); - dev_dbg(&ndev->pdev->dev, - "Reading %x from local scratch pad index %d\n", *val, idx); - - return 0; -} - -/** - * ntb_write_remote_spad() - write to the secondary scratchpad register - * @ndev: pointer to ntb_device instance - * @idx: index to the scratchpad register, 0 based - * @val: the data value to put into the register - * - * This function allows writing of a 32bit value to the indexed scratchpad - * register. The register resides on the secondary (external) side. This allows - * the local system to write data to be mirrored to the remote systems - * scratchpad register. - * - * RETURNS: An appropriate -ERRNO error value on error, or zero for success. - */ -int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val) -{ - if (idx >= ndev->limits.max_spads) - return -EINVAL; - - dev_dbg(&ndev->pdev->dev, "Writing %x to remote scratch pad index %d\n", - val, idx); - writel(val, ndev->reg_ofs.spad_write + idx * 4); - - return 0; -} - -/** - * ntb_read_remote_spad() - read from the primary scratchpad register - * @ndev: pointer to ntb_device instance - * @idx: index to scratchpad register, 0 based - * @val: pointer to 32bit integer for storing the register value - * - * This function allows reading of the 32bit scratchpad register on - * the primary (internal) side. This alloows the local system to read the data - * it wrote to be mirrored on the remote system. - * - * RETURNS: An appropriate -ERRNO error value on error, or zero for success. - */ -int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val) -{ - if (idx >= ndev->limits.max_spads) - return -EINVAL; - - *val = readl(ndev->reg_ofs.spad_read + idx * 4); - dev_dbg(&ndev->pdev->dev, - "Reading %x from remote scratch pad index %d\n", *val, idx); - - return 0; -} - -/** - * ntb_get_mw_base() - get addr for the NTB memory window - * @ndev: pointer to ntb_device instance - * @mw: memory window number - * - * This function provides the base address of the memory window specified. - * - * RETURNS: address, or NULL on error. - */ -resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw) -{ - if (mw >= ntb_max_mw(ndev)) - return 0; - - return pci_resource_start(ndev->pdev, MW_TO_BAR(mw)); -} - -/** - * ntb_get_mw_vbase() - get virtual addr for the NTB memory window - * @ndev: pointer to ntb_device instance - * @mw: memory window number - * - * This function provides the base virtual address of the memory window - * specified. - * - * RETURNS: pointer to virtual address, or NULL on error. - */ -void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw) -{ - if (mw >= ntb_max_mw(ndev)) - return NULL; - - return ndev->mw[mw].vbase; -} - -/** - * ntb_get_mw_size() - return size of NTB memory window - * @ndev: pointer to ntb_device instance - * @mw: memory window number - * - * This function provides the physical size of the memory window specified - * - * RETURNS: the size of the memory window or zero on error - */ -u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw) -{ - if (mw >= ntb_max_mw(ndev)) - return 0; - - return ndev->mw[mw].bar_sz; -} - -/** - * ntb_set_mw_addr - set the memory window address - * @ndev: pointer to ntb_device instance - * @mw: memory window number - * @addr: base address for data - * - * This function sets the base physical address of the memory window. This - * memory address is where data from the remote system will be transfered into - * or out of depending on how the transport is configured. - */ -void ntb_set_mw_addr(struct ntb_device *ndev, unsigned int mw, u64 addr) -{ - if (mw >= ntb_max_mw(ndev)) - return; - - dev_dbg(&ndev->pdev->dev, "Writing addr %Lx to BAR %d\n", addr, - MW_TO_BAR(mw)); - - ndev->mw[mw].phys_addr = addr; - - switch (MW_TO_BAR(mw)) { - case NTB_BAR_23: - writeq(addr, ndev->reg_ofs.bar2_xlat); - break; - case NTB_BAR_4: - if (ndev->split_bar) - writel(addr, ndev->reg_ofs.bar4_xlat); - else - writeq(addr, ndev->reg_ofs.bar4_xlat); - break; - case NTB_BAR_5: - writel(addr, ndev->reg_ofs.bar5_xlat); - break; - } -} - -/** - * ntb_ring_doorbell() - Set the doorbell on the secondary/external side - * @ndev: pointer to ntb_device instance - * @db: doorbell to ring - * - * This function allows triggering of a doorbell on the secondary/external - * side that will initiate an interrupt on the remote host - * - * RETURNS: An appropriate -ERRNO error value on error, or zero for success. - */ -void ntb_ring_doorbell(struct ntb_device *ndev, unsigned int db) -{ - dev_dbg(&ndev->pdev->dev, "%s: ringing doorbell %d\n", __func__, db); - - if (ndev->hw_type == BWD_HW) - writeq((u64) 1 << db, ndev->reg_ofs.rdb); - else - writew(((1 << ndev->bits_per_vector) - 1) << - (db * ndev->bits_per_vector), ndev->reg_ofs.rdb); -} - -static void bwd_recover_link(struct ntb_device *ndev) -{ - u32 status; - - /* Driver resets the NTB ModPhy lanes - magic! */ - writeb(0xe0, ndev->reg_base + BWD_MODPHY_PCSREG6); - writeb(0x40, ndev->reg_base + BWD_MODPHY_PCSREG4); - writeb(0x60, ndev->reg_base + BWD_MODPHY_PCSREG4); - writeb(0x60, ndev->reg_base + BWD_MODPHY_PCSREG6); - - /* Driver waits 100ms to allow the NTB ModPhy to settle */ - msleep(100); - - /* Clear AER Errors, write to clear */ - status = readl(ndev->reg_base + BWD_ERRCORSTS_OFFSET); - dev_dbg(&ndev->pdev->dev, "ERRCORSTS = %x\n", status); - status &= PCI_ERR_COR_REP_ROLL; - writel(status, ndev->reg_base + BWD_ERRCORSTS_OFFSET); - - /* Clear unexpected electrical idle event in LTSSM, write to clear */ - status = readl(ndev->reg_base + BWD_LTSSMERRSTS0_OFFSET); - dev_dbg(&ndev->pdev->dev, "LTSSMERRSTS0 = %x\n", status); - status |= BWD_LTSSMERRSTS0_UNEXPECTEDEI; - writel(status, ndev->reg_base + BWD_LTSSMERRSTS0_OFFSET); - - /* Clear DeSkew Buffer error, write to clear */ - status = readl(ndev->reg_base + BWD_DESKEWSTS_OFFSET); - dev_dbg(&ndev->pdev->dev, "DESKEWSTS = %x\n", status); - status |= BWD_DESKEWSTS_DBERR; - writel(status, ndev->reg_base + BWD_DESKEWSTS_OFFSET); - - status = readl(ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET); - dev_dbg(&ndev->pdev->dev, "IBSTERRRCRVSTS0 = %x\n", status); - status &= BWD_IBIST_ERR_OFLOW; - writel(status, ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET); - - /* Releases the NTB state machine to allow the link to retrain */ - status = readl(ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET); - dev_dbg(&ndev->pdev->dev, "LTSSMSTATEJMP = %x\n", status); - status &= ~BWD_LTSSMSTATEJMP_FORCEDETECT; - writel(status, ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET); -} - -static void ntb_link_event(struct ntb_device *ndev, int link_state) -{ - unsigned int event; - - if (ndev->link_status == link_state) - return; - - if (link_state == NTB_LINK_UP) { - u16 status; - - dev_info(&ndev->pdev->dev, "Link Up\n"); - ndev->link_status = NTB_LINK_UP; - event = NTB_EVENT_HW_LINK_UP; - - if (is_ntb_atom(ndev) || - ndev->conn_type == NTB_CONN_TRANSPARENT) - status = readw(ndev->reg_ofs.lnk_stat); - else { - int rc = pci_read_config_word(ndev->pdev, - SNB_LINK_STATUS_OFFSET, - &status); - if (rc) - return; - } - - ndev->link_width = (status & NTB_LINK_WIDTH_MASK) >> 4; - ndev->link_speed = (status & NTB_LINK_SPEED_MASK); - dev_info(&ndev->pdev->dev, "Link Width %d, Link Speed %d\n", - ndev->link_width, ndev->link_speed); - } else { - dev_info(&ndev->pdev->dev, "Link Down\n"); - ndev->link_status = NTB_LINK_DOWN; - event = NTB_EVENT_HW_LINK_DOWN; - /* Don't modify link width/speed, we need it in link recovery */ - } - - /* notify the upper layer if we have an event change */ - if (ndev->event_cb) - ndev->event_cb(ndev->ntb_transport, event); -} - -static int ntb_link_status(struct ntb_device *ndev) -{ - int link_state; - - if (is_ntb_atom(ndev)) { - u32 ntb_cntl; - - ntb_cntl = readl(ndev->reg_ofs.lnk_cntl); - if (ntb_cntl & BWD_CNTL_LINK_DOWN) - link_state = NTB_LINK_DOWN; - else - link_state = NTB_LINK_UP; - } else { - u16 status; - int rc; - - rc = pci_read_config_word(ndev->pdev, SNB_LINK_STATUS_OFFSET, - &status); - if (rc) - return rc; - - if (status & NTB_LINK_STATUS_ACTIVE) - link_state = NTB_LINK_UP; - else - link_state = NTB_LINK_DOWN; - } - - ntb_link_event(ndev, link_state); - - return 0; -} - -static void bwd_link_recovery(struct work_struct *work) -{ - struct ntb_device *ndev = container_of(work, struct ntb_device, - lr_timer.work); - u32 status32; - - bwd_recover_link(ndev); - /* There is a potential race between the 2 NTB devices recovering at the - * same time. If the times are the same, the link will not recover and - * the driver will be stuck in this loop forever. Add a random interval - * to the recovery time to prevent this race. - */ - msleep(BWD_LINK_RECOVERY_TIME + prandom_u32() % BWD_LINK_RECOVERY_TIME); - - status32 = readl(ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET); - if (status32 & BWD_LTSSMSTATEJMP_FORCEDETECT) - goto retry; - - status32 = readl(ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET); - if (status32 & BWD_IBIST_ERR_OFLOW) - goto retry; - - status32 = readl(ndev->reg_ofs.lnk_cntl); - if (!(status32 & BWD_CNTL_LINK_DOWN)) { - unsigned char speed, width; - u16 status16; - - status16 = readw(ndev->reg_ofs.lnk_stat); - width = (status16 & NTB_LINK_WIDTH_MASK) >> 4; - speed = (status16 & NTB_LINK_SPEED_MASK); - if (ndev->link_width != width || ndev->link_speed != speed) - goto retry; - } - - schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT); - return; - -retry: - schedule_delayed_work(&ndev->lr_timer, NTB_HB_TIMEOUT); -} - -/* BWD doesn't have link status interrupt, poll on that platform */ -static void bwd_link_poll(struct work_struct *work) -{ - struct ntb_device *ndev = container_of(work, struct ntb_device, - hb_timer.work); - unsigned long ts = jiffies; - - /* If we haven't gotten an interrupt in a while, check the BWD link - * status bit - */ - if (ts > ndev->last_ts + NTB_HB_TIMEOUT) { - int rc = ntb_link_status(ndev); - if (rc) - dev_err(&ndev->pdev->dev, - "Error determining link status\n"); - - /* Check to see if a link error is the cause of the link down */ - if (ndev->link_status == NTB_LINK_DOWN) { - u32 status32 = readl(ndev->reg_base + - BWD_LTSSMSTATEJMP_OFFSET); - if (status32 & BWD_LTSSMSTATEJMP_FORCEDETECT) { - schedule_delayed_work(&ndev->lr_timer, 0); - return; - } - } - } - - schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT); -} - -static int ntb_xeon_setup(struct ntb_device *ndev) -{ - switch (ndev->conn_type) { - case NTB_CONN_B2B: - ndev->reg_ofs.ldb = ndev->reg_base + SNB_PDOORBELL_OFFSET; - ndev->reg_ofs.ldb_mask = ndev->reg_base + SNB_PDBMSK_OFFSET; - ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET; - ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_SBAR2XLAT_OFFSET; - ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_SBAR4XLAT_OFFSET; - if (ndev->split_bar) - ndev->reg_ofs.bar5_xlat = - ndev->reg_base + SNB_SBAR5XLAT_OFFSET; - ndev->limits.max_spads = SNB_MAX_B2B_SPADS; - - /* There is a Xeon hardware errata related to writes to - * SDOORBELL or B2BDOORBELL in conjunction with inbound access - * to NTB MMIO Space, which may hang the system. To workaround - * this use the second memory window to access the interrupt and - * scratch pad registers on the remote system. - */ - if (ndev->wa_flags & WA_SNB_ERR) { - if (!ndev->mw[ndev->limits.max_mw - 1].bar_sz) - return -EINVAL; - - ndev->limits.max_db_bits = SNB_MAX_DB_BITS; - ndev->reg_ofs.spad_write = - ndev->mw[ndev->limits.max_mw - 1].vbase + - SNB_SPAD_OFFSET; - ndev->reg_ofs.rdb = - ndev->mw[ndev->limits.max_mw - 1].vbase + - SNB_PDOORBELL_OFFSET; - - /* Set the Limit register to 4k, the minimum size, to - * prevent an illegal access - */ - writeq(ndev->mw[1].bar_sz + 0x1000, ndev->reg_base + - SNB_PBAR4LMT_OFFSET); - /* HW errata on the Limit registers. They can only be - * written when the base register is 4GB aligned and - * < 32bit. This should already be the case based on - * the driver defaults, but write the Limit registers - * first just in case. - */ - - ndev->limits.max_mw = SNB_ERRATA_MAX_MW; - } else { - /* HW Errata on bit 14 of b2bdoorbell register. Writes - * will not be mirrored to the remote system. Shrink - * the number of bits by one, since bit 14 is the last - * bit. - */ - ndev->limits.max_db_bits = SNB_MAX_DB_BITS - 1; - ndev->reg_ofs.spad_write = ndev->reg_base + - SNB_B2B_SPAD_OFFSET; - ndev->reg_ofs.rdb = ndev->reg_base + - SNB_B2B_DOORBELL_OFFSET; - - /* Disable the Limit register, just incase it is set to - * something silly. A 64bit write should handle it - * regardless of whether it has a split BAR or not. - */ - writeq(0, ndev->reg_base + SNB_PBAR4LMT_OFFSET); - /* HW errata on the Limit registers. They can only be - * written when the base register is 4GB aligned and - * < 32bit. This should already be the case based on - * the driver defaults, but write the Limit registers - * first just in case. - */ - if (ndev->split_bar) - ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW; - else - ndev->limits.max_mw = SNB_MAX_MW; - } - - /* The Xeon errata workaround requires setting SBAR Base - * addresses to known values, so that the PBAR XLAT can be - * pointed at SBAR0 of the remote system. - */ - if (ndev->dev_type == NTB_DEV_USD) { - writeq(SNB_MBAR23_DSD_ADDR, ndev->reg_base + - SNB_PBAR2XLAT_OFFSET); - if (ndev->wa_flags & WA_SNB_ERR) - writeq(SNB_MBAR01_DSD_ADDR, ndev->reg_base + - SNB_PBAR4XLAT_OFFSET); - else { - if (ndev->split_bar) { - writel(SNB_MBAR4_DSD_ADDR, - ndev->reg_base + - SNB_PBAR4XLAT_OFFSET); - writel(SNB_MBAR5_DSD_ADDR, - ndev->reg_base + - SNB_PBAR5XLAT_OFFSET); - } else - writeq(SNB_MBAR4_DSD_ADDR, - ndev->reg_base + - SNB_PBAR4XLAT_OFFSET); - - /* B2B_XLAT_OFFSET is a 64bit register, but can - * only take 32bit writes - */ - writel(SNB_MBAR01_DSD_ADDR & 0xffffffff, - ndev->reg_base + SNB_B2B_XLAT_OFFSETL); - writel(SNB_MBAR01_DSD_ADDR >> 32, - ndev->reg_base + SNB_B2B_XLAT_OFFSETU); - } - - writeq(SNB_MBAR01_USD_ADDR, ndev->reg_base + - SNB_SBAR0BASE_OFFSET); - writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base + - SNB_SBAR2BASE_OFFSET); - if (ndev->split_bar) { - writel(SNB_MBAR4_USD_ADDR, ndev->reg_base + - SNB_SBAR4BASE_OFFSET); - writel(SNB_MBAR5_USD_ADDR, ndev->reg_base + - SNB_SBAR5BASE_OFFSET); - } else - writeq(SNB_MBAR4_USD_ADDR, ndev->reg_base + - SNB_SBAR4BASE_OFFSET); - } else { - writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base + - SNB_PBAR2XLAT_OFFSET); - if (ndev->wa_flags & WA_SNB_ERR) - writeq(SNB_MBAR01_USD_ADDR, ndev->reg_base + - SNB_PBAR4XLAT_OFFSET); - else { - if (ndev->split_bar) { - writel(SNB_MBAR4_USD_ADDR, - ndev->reg_base + - SNB_PBAR4XLAT_OFFSET); - writel(SNB_MBAR5_USD_ADDR, - ndev->reg_base + - SNB_PBAR5XLAT_OFFSET); - } else - writeq(SNB_MBAR4_USD_ADDR, - ndev->reg_base + - SNB_PBAR4XLAT_OFFSET); - - /* - * B2B_XLAT_OFFSET is a 64bit register, but can - * only take 32bit writes - */ - writel(SNB_MBAR01_USD_ADDR & 0xffffffff, - ndev->reg_base + SNB_B2B_XLAT_OFFSETL); - writel(SNB_MBAR01_USD_ADDR >> 32, - ndev->reg_base + SNB_B2B_XLAT_OFFSETU); - } - writeq(SNB_MBAR01_DSD_ADDR, ndev->reg_base + - SNB_SBAR0BASE_OFFSET); - writeq(SNB_MBAR23_DSD_ADDR, ndev->reg_base + - SNB_SBAR2BASE_OFFSET); - if (ndev->split_bar) { - writel(SNB_MBAR4_DSD_ADDR, ndev->reg_base + - SNB_SBAR4BASE_OFFSET); - writel(SNB_MBAR5_DSD_ADDR, ndev->reg_base + - SNB_SBAR5BASE_OFFSET); - } else - writeq(SNB_MBAR4_DSD_ADDR, ndev->reg_base + - SNB_SBAR4BASE_OFFSET); - - } - break; - case NTB_CONN_RP: - if (ndev->wa_flags & WA_SNB_ERR) { - dev_err(&ndev->pdev->dev, - "NTB-RP disabled due to hardware errata.\n"); - return -EINVAL; - } - - /* Scratch pads need to have exclusive access from the primary - * or secondary side. Halve the num spads so that each side can - * have an equal amount. - */ - ndev->limits.max_spads = SNB_MAX_COMPAT_SPADS / 2; - ndev->limits.max_db_bits = SNB_MAX_DB_BITS; - /* Note: The SDOORBELL is the cause of the errata. You REALLY - * don't want to touch it. - */ - ndev->reg_ofs.rdb = ndev->reg_base + SNB_SDOORBELL_OFFSET; - ndev->reg_ofs.ldb = ndev->reg_base + SNB_PDOORBELL_OFFSET; - ndev->reg_ofs.ldb_mask = ndev->reg_base + SNB_PDBMSK_OFFSET; - /* Offset the start of the spads to correspond to whether it is - * primary or secondary - */ - ndev->reg_ofs.spad_write = ndev->reg_base + SNB_SPAD_OFFSET + - ndev->limits.max_spads * 4; - ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET; - ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_SBAR2XLAT_OFFSET; - ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_SBAR4XLAT_OFFSET; - if (ndev->split_bar) { - ndev->reg_ofs.bar5_xlat = - ndev->reg_base + SNB_SBAR5XLAT_OFFSET; - ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW; - } else - ndev->limits.max_mw = SNB_MAX_MW; - break; - case NTB_CONN_TRANSPARENT: - if (ndev->wa_flags & WA_SNB_ERR) { - dev_err(&ndev->pdev->dev, - "NTB-TRANSPARENT disabled due to hardware errata.\n"); - return -EINVAL; - } - - /* Scratch pads need to have exclusive access from the primary - * or secondary side. Halve the num spads so that each side can - * have an equal amount. - */ - ndev->limits.max_spads = SNB_MAX_COMPAT_SPADS / 2; - ndev->limits.max_db_bits = SNB_MAX_DB_BITS; - ndev->reg_ofs.rdb = ndev->reg_base + SNB_PDOORBELL_OFFSET; - ndev->reg_ofs.ldb = ndev->reg_base + SNB_SDOORBELL_OFFSET; - ndev->reg_ofs.ldb_mask = ndev->reg_base + SNB_SDBMSK_OFFSET; - ndev->reg_ofs.spad_write = ndev->reg_base + SNB_SPAD_OFFSET; - /* Offset the start of the spads to correspond to whether it is - * primary or secondary - */ - ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET + - ndev->limits.max_spads * 4; - ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_PBAR2XLAT_OFFSET; - ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_PBAR4XLAT_OFFSET; - - if (ndev->split_bar) { - ndev->reg_ofs.bar5_xlat = - ndev->reg_base + SNB_PBAR5XLAT_OFFSET; - ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW; - } else - ndev->limits.max_mw = SNB_MAX_MW; - break; - default: - /* - * we should never hit this. the detect function should've - * take cared of everything. - */ - return -EINVAL; - } - - ndev->reg_ofs.lnk_cntl = ndev->reg_base + SNB_NTBCNTL_OFFSET; - ndev->reg_ofs.lnk_stat = ndev->reg_base + SNB_SLINK_STATUS_OFFSET; - ndev->reg_ofs.spci_cmd = ndev->reg_base + SNB_PCICMD_OFFSET; - - ndev->limits.msix_cnt = SNB_MSIX_CNT; - ndev->bits_per_vector = SNB_DB_BITS_PER_VEC; - - return 0; -} - -static int ntb_bwd_setup(struct ntb_device *ndev) -{ - int rc; - u32 val; - - ndev->hw_type = BWD_HW; - - rc = pci_read_config_dword(ndev->pdev, NTB_PPD_OFFSET, &val); - if (rc) - return rc; - - switch ((val & BWD_PPD_CONN_TYPE) >> 8) { - case NTB_CONN_B2B: - ndev->conn_type = NTB_CONN_B2B; - break; - case NTB_CONN_RP: - default: - dev_err(&ndev->pdev->dev, "Unsupported NTB configuration\n"); - return -EINVAL; - } - - if (val & BWD_PPD_DEV_TYPE) - ndev->dev_type = NTB_DEV_DSD; - else - ndev->dev_type = NTB_DEV_USD; - - /* Initiate PCI-E link training */ - rc = pci_write_config_dword(ndev->pdev, NTB_PPD_OFFSET, - val | BWD_PPD_INIT_LINK); - if (rc) - return rc; - - ndev->reg_ofs.ldb = ndev->reg_base + BWD_PDOORBELL_OFFSET; - ndev->reg_ofs.ldb_mask = ndev->reg_base + BWD_PDBMSK_OFFSET; - ndev->reg_ofs.rdb = ndev->reg_base + BWD_B2B_DOORBELL_OFFSET; - ndev->reg_ofs.bar2_xlat = ndev->reg_base + BWD_SBAR2XLAT_OFFSET; - ndev->reg_ofs.bar4_xlat = ndev->reg_base + BWD_SBAR4XLAT_OFFSET; - ndev->reg_ofs.lnk_cntl = ndev->reg_base + BWD_NTBCNTL_OFFSET; - ndev->reg_ofs.lnk_stat = ndev->reg_base + BWD_LINK_STATUS_OFFSET; - ndev->reg_ofs.spad_read = ndev->reg_base + BWD_SPAD_OFFSET; - ndev->reg_ofs.spad_write = ndev->reg_base + BWD_B2B_SPAD_OFFSET; - ndev->reg_ofs.spci_cmd = ndev->reg_base + BWD_PCICMD_OFFSET; - ndev->limits.max_mw = BWD_MAX_MW; - ndev->limits.max_spads = BWD_MAX_SPADS; - ndev->limits.max_db_bits = BWD_MAX_DB_BITS; - ndev->limits.msix_cnt = BWD_MSIX_CNT; - ndev->bits_per_vector = BWD_DB_BITS_PER_VEC; - - /* Since bwd doesn't have a link interrupt, setup a poll timer */ - INIT_DELAYED_WORK(&ndev->hb_timer, bwd_link_poll); - INIT_DELAYED_WORK(&ndev->lr_timer, bwd_link_recovery); - schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT); - - return 0; -} - -static int ntb_device_setup(struct ntb_device *ndev) -{ - int rc; - - if (is_ntb_xeon(ndev)) - rc = ntb_xeon_setup(ndev); - else if (is_ntb_atom(ndev)) - rc = ntb_bwd_setup(ndev); - else - rc = -ENODEV; - - if (rc) - return rc; - - if (ndev->conn_type == NTB_CONN_B2B) - /* Enable Bus Master and Memory Space on the secondary side */ - writew(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER, - ndev->reg_ofs.spci_cmd); - - return 0; -} - -static void ntb_device_free(struct ntb_device *ndev) -{ - if (is_ntb_atom(ndev)) { - cancel_delayed_work_sync(&ndev->hb_timer); - cancel_delayed_work_sync(&ndev->lr_timer); - } -} - -static irqreturn_t bwd_callback_msix_irq(int irq, void *data) -{ - struct ntb_db_cb *db_cb = data; - struct ntb_device *ndev = db_cb->ndev; - unsigned long mask; - - dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq, - db_cb->db_num); - - mask = readw(ndev->reg_ofs.ldb_mask); - set_bit(db_cb->db_num * ndev->bits_per_vector, &mask); - writew(mask, ndev->reg_ofs.ldb_mask); - - tasklet_schedule(&db_cb->irq_work); - - /* No need to check for the specific HB irq, any interrupt means - * we're connected. - */ - ndev->last_ts = jiffies; - - writeq((u64) 1 << db_cb->db_num, ndev->reg_ofs.ldb); - - return IRQ_HANDLED; -} - -static irqreturn_t xeon_callback_msix_irq(int irq, void *data) -{ - struct ntb_db_cb *db_cb = data; - struct ntb_device *ndev = db_cb->ndev; - unsigned long mask; - - dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq, - db_cb->db_num); - - mask = readw(ndev->reg_ofs.ldb_mask); - set_bit(db_cb->db_num * ndev->bits_per_vector, &mask); - writew(mask, ndev->reg_ofs.ldb_mask); - - tasklet_schedule(&db_cb->irq_work); - - /* On Sandybridge, there are 16 bits in the interrupt register - * but only 4 vectors. So, 5 bits are assigned to the first 3 - * vectors, with the 4th having a single bit for link - * interrupts. - */ - writew(((1 << ndev->bits_per_vector) - 1) << - (db_cb->db_num * ndev->bits_per_vector), ndev->reg_ofs.ldb); - - return IRQ_HANDLED; -} - -/* Since we do not have a HW doorbell in BWD, this is only used in JF/JT */ -static irqreturn_t xeon_event_msix_irq(int irq, void *dev) -{ - struct ntb_device *ndev = dev; - int rc; - - dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for Events\n", irq); - - rc = ntb_link_status(ndev); - if (rc) - dev_err(&ndev->pdev->dev, "Error determining link status\n"); - - /* bit 15 is always the link bit */ - writew(1 << SNB_LINK_DB, ndev->reg_ofs.ldb); - - return IRQ_HANDLED; -} - -static irqreturn_t ntb_interrupt(int irq, void *dev) -{ - struct ntb_device *ndev = dev; - unsigned int i = 0; - - if (is_ntb_atom(ndev)) { - u64 ldb = readq(ndev->reg_ofs.ldb); - - dev_dbg(&ndev->pdev->dev, "irq %d - ldb = %Lx\n", irq, ldb); - - while (ldb) { - i = __ffs(ldb); - ldb &= ldb - 1; - bwd_callback_msix_irq(irq, &ndev->db_cb[i]); - } - } else { - u16 ldb = readw(ndev->reg_ofs.ldb); - - dev_dbg(&ndev->pdev->dev, "irq %d - ldb = %x\n", irq, ldb); - - if (ldb & SNB_DB_HW_LINK) { - xeon_event_msix_irq(irq, dev); - ldb &= ~SNB_DB_HW_LINK; - } - - while (ldb) { - i = __ffs(ldb); - ldb &= ldb - 1; - xeon_callback_msix_irq(irq, &ndev->db_cb[i]); - } - } - - return IRQ_HANDLED; -} - -static int ntb_setup_snb_msix(struct ntb_device *ndev, int msix_entries) -{ - struct pci_dev *pdev = ndev->pdev; - struct msix_entry *msix; - int rc, i; - - if (msix_entries < ndev->limits.msix_cnt) - return -ENOSPC; - - rc = pci_enable_msix_exact(pdev, ndev->msix_entries, msix_entries); - if (rc < 0) - return rc; - - for (i = 0; i < msix_entries; i++) { - msix = &ndev->msix_entries[i]; - WARN_ON(!msix->vector); - - if (i == msix_entries - 1) { - rc = request_irq(msix->vector, - xeon_event_msix_irq, 0, - "ntb-event-msix", ndev); - if (rc) - goto err; - } else { - rc = request_irq(msix->vector, - xeon_callback_msix_irq, 0, - "ntb-callback-msix", - &ndev->db_cb[i]); - if (rc) - goto err; - } - } - - ndev->num_msix = msix_entries; - ndev->max_cbs = msix_entries - 1; - - return 0; - -err: - while (--i >= 0) { - /* Code never reaches here for entry nr 'ndev->num_msix - 1' */ - msix = &ndev->msix_entries[i]; - free_irq(msix->vector, &ndev->db_cb[i]); - } - - pci_disable_msix(pdev); - ndev->num_msix = 0; - - return rc; -} - -static int ntb_setup_bwd_msix(struct ntb_device *ndev, int msix_entries) -{ - struct pci_dev *pdev = ndev->pdev; - struct msix_entry *msix; - int rc, i; - - msix_entries = pci_enable_msix_range(pdev, ndev->msix_entries, - 1, msix_entries); - if (msix_entries < 0) - return msix_entries; - - for (i = 0; i < msix_entries; i++) { - msix = &ndev->msix_entries[i]; - WARN_ON(!msix->vector); - - rc = request_irq(msix->vector, bwd_callback_msix_irq, 0, - "ntb-callback-msix", &ndev->db_cb[i]); - if (rc) - goto err; - } - - ndev->num_msix = msix_entries; - ndev->max_cbs = msix_entries; - - return 0; - -err: - while (--i >= 0) - free_irq(msix->vector, &ndev->db_cb[i]); - - pci_disable_msix(pdev); - ndev->num_msix = 0; - - return rc; -} - -static int ntb_setup_msix(struct ntb_device *ndev) -{ - struct pci_dev *pdev = ndev->pdev; - int msix_entries; - int rc, i; - - msix_entries = pci_msix_vec_count(pdev); - if (msix_entries < 0) { - rc = msix_entries; - goto err; - } else if (msix_entries > ndev->limits.msix_cnt) { - rc = -EINVAL; - goto err; - } - - ndev->msix_entries = kmalloc(sizeof(struct msix_entry) * msix_entries, - GFP_KERNEL); - if (!ndev->msix_entries) { - rc = -ENOMEM; - goto err; - } - - for (i = 0; i < msix_entries; i++) - ndev->msix_entries[i].entry = i; - - if (is_ntb_atom(ndev)) - rc = ntb_setup_bwd_msix(ndev, msix_entries); - else - rc = ntb_setup_snb_msix(ndev, msix_entries); - if (rc) - goto err1; - - return 0; - -err1: - kfree(ndev->msix_entries); -err: - dev_err(&pdev->dev, "Error allocating MSI-X interrupt\n"); - return rc; -} - -static int ntb_setup_msi(struct ntb_device *ndev) -{ - struct pci_dev *pdev = ndev->pdev; - int rc; - - rc = pci_enable_msi(pdev); - if (rc) - return rc; - - rc = request_irq(pdev->irq, ntb_interrupt, 0, "ntb-msi", ndev); - if (rc) { - pci_disable_msi(pdev); - dev_err(&pdev->dev, "Error allocating MSI interrupt\n"); - return rc; - } - - return 0; -} - -static int ntb_setup_intx(struct ntb_device *ndev) -{ - struct pci_dev *pdev = ndev->pdev; - int rc; - - /* Verify intx is enabled */ - pci_intx(pdev, 1); - - rc = request_irq(pdev->irq, ntb_interrupt, IRQF_SHARED, "ntb-intx", - ndev); - if (rc) - return rc; - - return 0; -} - -static int ntb_setup_interrupts(struct ntb_device *ndev) -{ - int rc; - - /* On BWD, disable all interrupts. On SNB, disable all but Link - * Interrupt. The rest will be unmasked as callbacks are registered. - */ - if (is_ntb_atom(ndev)) - writeq(~0, ndev->reg_ofs.ldb_mask); - else { - u16 var = 1 << SNB_LINK_DB; - writew(~var, ndev->reg_ofs.ldb_mask); - } - - rc = ntb_setup_msix(ndev); - if (!rc) - goto done; - - ndev->bits_per_vector = 1; - ndev->max_cbs = ndev->limits.max_db_bits; - - rc = ntb_setup_msi(ndev); - if (!rc) - goto done; - - rc = ntb_setup_intx(ndev); - if (rc) { - dev_err(&ndev->pdev->dev, "no usable interrupts\n"); - return rc; - } - -done: - return 0; -} - -static void ntb_free_interrupts(struct ntb_device *ndev) -{ - struct pci_dev *pdev = ndev->pdev; - - /* mask interrupts */ - if (is_ntb_atom(ndev)) - writeq(~0, ndev->reg_ofs.ldb_mask); - else - writew(~0, ndev->reg_ofs.ldb_mask); - - if (ndev->num_msix) { - struct msix_entry *msix; - u32 i; - - for (i = 0; i < ndev->num_msix; i++) { - msix = &ndev->msix_entries[i]; - if (is_ntb_xeon(ndev) && i == ndev->num_msix - 1) - free_irq(msix->vector, ndev); - else - free_irq(msix->vector, &ndev->db_cb[i]); - } - pci_disable_msix(pdev); - kfree(ndev->msix_entries); - } else { - free_irq(pdev->irq, ndev); - - if (pci_dev_msi_enabled(pdev)) - pci_disable_msi(pdev); - } -} - -static int ntb_create_callbacks(struct ntb_device *ndev) -{ - int i; - - /* Chicken-egg issue. We won't know how many callbacks are necessary - * until we see how many MSI-X vectors we get, but these pointers need - * to be passed into the MSI-X register function. So, we allocate the - * max, knowing that they might not all be used, to work around this. - */ - ndev->db_cb = kcalloc(ndev->limits.max_db_bits, - sizeof(struct ntb_db_cb), - GFP_KERNEL); - if (!ndev->db_cb) - return -ENOMEM; - - for (i = 0; i < ndev->limits.max_db_bits; i++) { - ndev->db_cb[i].db_num = i; - ndev->db_cb[i].ndev = ndev; - } - - return 0; -} - -static void ntb_free_callbacks(struct ntb_device *ndev) -{ - int i; - - for (i = 0; i < ndev->limits.max_db_bits; i++) - ntb_unregister_db_callback(ndev, i); - - kfree(ndev->db_cb); -} - -static ssize_t ntb_debugfs_read(struct file *filp, char __user *ubuf, - size_t count, loff_t *offp) -{ - struct ntb_device *ndev; - char *buf; - ssize_t ret, offset, out_count; - - out_count = 500; - - buf = kmalloc(out_count, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - ndev = filp->private_data; - offset = 0; - offset += snprintf(buf + offset, out_count - offset, - "NTB Device Information:\n"); - offset += snprintf(buf + offset, out_count - offset, - "Connection Type - \t\t%s\n", - ndev->conn_type == NTB_CONN_TRANSPARENT ? - "Transparent" : (ndev->conn_type == NTB_CONN_B2B) ? - "Back to back" : "Root Port"); - offset += snprintf(buf + offset, out_count - offset, - "Device Type - \t\t\t%s\n", - ndev->dev_type == NTB_DEV_USD ? - "DSD/USP" : "USD/DSP"); - offset += snprintf(buf + offset, out_count - offset, - "Max Number of Callbacks - \t%u\n", - ntb_max_cbs(ndev)); - offset += snprintf(buf + offset, out_count - offset, - "Link Status - \t\t\t%s\n", - ntb_hw_link_status(ndev) ? "Up" : "Down"); - if (ntb_hw_link_status(ndev)) { - offset += snprintf(buf + offset, out_count - offset, - "Link Speed - \t\t\tPCI-E Gen %u\n", - ndev->link_speed); - offset += snprintf(buf + offset, out_count - offset, - "Link Width - \t\t\tx%u\n", - ndev->link_width); - } - - if (is_ntb_xeon(ndev)) { - u32 status32; - u16 status16; - int rc; - - offset += snprintf(buf + offset, out_count - offset, - "\nNTB Device Statistics:\n"); - offset += snprintf(buf + offset, out_count - offset, - "Upstream Memory Miss - \t%u\n", - readw(ndev->reg_base + - SNB_USMEMMISS_OFFSET)); - - offset += snprintf(buf + offset, out_count - offset, - "\nNTB Hardware Errors:\n"); - - rc = pci_read_config_word(ndev->pdev, SNB_DEVSTS_OFFSET, - &status16); - if (!rc) - offset += snprintf(buf + offset, out_count - offset, - "DEVSTS - \t%#06x\n", status16); - - rc = pci_read_config_word(ndev->pdev, SNB_LINK_STATUS_OFFSET, - &status16); - if (!rc) - offset += snprintf(buf + offset, out_count - offset, - "LNKSTS - \t%#06x\n", status16); - - rc = pci_read_config_dword(ndev->pdev, SNB_UNCERRSTS_OFFSET, - &status32); - if (!rc) - offset += snprintf(buf + offset, out_count - offset, - "UNCERRSTS - \t%#010x\n", status32); - - rc = pci_read_config_dword(ndev->pdev, SNB_CORERRSTS_OFFSET, - &status32); - if (!rc) - offset += snprintf(buf + offset, out_count - offset, - "CORERRSTS - \t%#010x\n", status32); - } - - if (offset > out_count) - offset = out_count; - - ret = simple_read_from_buffer(ubuf, count, offp, buf, offset); - kfree(buf); - return ret; -} - -static const struct file_operations ntb_debugfs_info = { - .owner = THIS_MODULE, - .open = simple_open, - .read = ntb_debugfs_read, +static struct pci_driver intel_ntb_pci_driver = { + .name = KBUILD_MODNAME, + .id_table = intel_ntb_pci_tbl, + .probe = intel_ntb_pci_probe, + .remove = intel_ntb_pci_remove, }; -static void ntb_setup_debugfs(struct ntb_device *ndev) +static int __init intel_ntb_pci_driver_init(void) { - if (!debugfs_initialized()) - return; - - if (!debugfs_dir) + if (debugfs_initialized()) debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); - ndev->debugfs_dir = debugfs_create_dir(pci_name(ndev->pdev), - debugfs_dir); - if (ndev->debugfs_dir) - ndev->debugfs_info = debugfs_create_file("info", S_IRUSR, - ndev->debugfs_dir, - ndev, - &ntb_debugfs_info); + return pci_register_driver(&intel_ntb_pci_driver); } +module_init(intel_ntb_pci_driver_init); -static void ntb_free_debugfs(struct ntb_device *ndev) +static void __exit intel_ntb_pci_driver_exit(void) { - debugfs_remove_recursive(ndev->debugfs_dir); + pci_unregister_driver(&intel_ntb_pci_driver); - if (debugfs_dir && simple_empty(debugfs_dir)) { - debugfs_remove_recursive(debugfs_dir); - debugfs_dir = NULL; - } + debugfs_remove_recursive(debugfs_dir); } +module_exit(intel_ntb_pci_driver_exit); -static void ntb_hw_link_up(struct ntb_device *ndev) -{ - if (ndev->conn_type == NTB_CONN_TRANSPARENT) - ntb_link_event(ndev, NTB_LINK_UP); - else { - u32 ntb_cntl; - - /* Let's bring the NTB link up */ - ntb_cntl = readl(ndev->reg_ofs.lnk_cntl); - ntb_cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK); - ntb_cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP; - ntb_cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP; - if (ndev->split_bar) - ntb_cntl |= NTB_CNTL_P2S_BAR5_SNOOP | - NTB_CNTL_S2P_BAR5_SNOOP; - - writel(ntb_cntl, ndev->reg_ofs.lnk_cntl); - } -} - -static void ntb_hw_link_down(struct ntb_device *ndev) -{ - u32 ntb_cntl; - - if (ndev->conn_type == NTB_CONN_TRANSPARENT) { - ntb_link_event(ndev, NTB_LINK_DOWN); - return; - } - - /* Bring NTB link down */ - ntb_cntl = readl(ndev->reg_ofs.lnk_cntl); - ntb_cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP); - ntb_cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP); - if (ndev->split_bar) - ntb_cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | - NTB_CNTL_S2P_BAR5_SNOOP); - ntb_cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK; - writel(ntb_cntl, ndev->reg_ofs.lnk_cntl); -} - -static void ntb_max_mw_detect(struct ntb_device *ndev) -{ - if (ndev->split_bar) - ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW; - else - ndev->limits.max_mw = SNB_MAX_MW; -} - -static int ntb_xeon_detect(struct ntb_device *ndev) -{ - int rc, bars_mask; - u32 bars; - u8 ppd; - - ndev->hw_type = SNB_HW; - - rc = pci_read_config_byte(ndev->pdev, NTB_PPD_OFFSET, &ppd); - if (rc) - return -EIO; - - if (ppd & SNB_PPD_DEV_TYPE) - ndev->dev_type = NTB_DEV_USD; - else - ndev->dev_type = NTB_DEV_DSD; - - ndev->split_bar = (ppd & SNB_PPD_SPLIT_BAR) ? 1 : 0; - - switch (ppd & SNB_PPD_CONN_TYPE) { - case NTB_CONN_B2B: - dev_info(&ndev->pdev->dev, "Conn Type = B2B\n"); - ndev->conn_type = NTB_CONN_B2B; - break; - case NTB_CONN_RP: - dev_info(&ndev->pdev->dev, "Conn Type = RP\n"); - ndev->conn_type = NTB_CONN_RP; - break; - case NTB_CONN_TRANSPARENT: - dev_info(&ndev->pdev->dev, "Conn Type = TRANSPARENT\n"); - ndev->conn_type = NTB_CONN_TRANSPARENT; - /* - * This mode is default to USD/DSP. HW does not report - * properly in transparent mode as it has no knowledge of - * NTB. We will just force correct here. - */ - ndev->dev_type = NTB_DEV_USD; - - /* - * This is a way for transparent BAR to figure out if we - * are doing split BAR or not. There is no way for the hw - * on the transparent side to know and set the PPD. - */ - bars_mask = pci_select_bars(ndev->pdev, IORESOURCE_MEM); - bars = hweight32(bars_mask); - if (bars == (HSX_SPLITBAR_MAX_MW + 1)) - ndev->split_bar = 1; - - break; - default: - dev_err(&ndev->pdev->dev, "Unknown PPD %x\n", ppd); - return -ENODEV; - } - - ntb_max_mw_detect(ndev); - - return 0; -} - -static int ntb_atom_detect(struct ntb_device *ndev) -{ - int rc; - u32 ppd; - - ndev->hw_type = BWD_HW; - ndev->limits.max_mw = BWD_MAX_MW; - - rc = pci_read_config_dword(ndev->pdev, NTB_PPD_OFFSET, &ppd); - if (rc) - return rc; - - switch ((ppd & BWD_PPD_CONN_TYPE) >> 8) { - case NTB_CONN_B2B: - dev_info(&ndev->pdev->dev, "Conn Type = B2B\n"); - ndev->conn_type = NTB_CONN_B2B; - break; - case NTB_CONN_RP: - default: - dev_err(&ndev->pdev->dev, "Unsupported NTB configuration\n"); - return -EINVAL; - } - - if (ppd & BWD_PPD_DEV_TYPE) - ndev->dev_type = NTB_DEV_DSD; - else - ndev->dev_type = NTB_DEV_USD; - - return 0; -} - -static int ntb_device_detect(struct ntb_device *ndev) -{ - int rc; - - if (is_ntb_xeon(ndev)) - rc = ntb_xeon_detect(ndev); - else if (is_ntb_atom(ndev)) - rc = ntb_atom_detect(ndev); - else - rc = -ENODEV; - - dev_info(&ndev->pdev->dev, "Device Type = %s\n", - ndev->dev_type == NTB_DEV_USD ? "USD/DSP" : "DSD/USP"); - - return 0; -} - -static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) -{ - struct ntb_device *ndev; - int rc, i; - - ndev = kzalloc(sizeof(struct ntb_device), GFP_KERNEL); - if (!ndev) - return -ENOMEM; - - ndev->pdev = pdev; - - ntb_set_errata_flags(ndev); - - ndev->link_status = NTB_LINK_DOWN; - pci_set_drvdata(pdev, ndev); - ntb_setup_debugfs(ndev); - - rc = pci_enable_device(pdev); - if (rc) - goto err; - - pci_set_master(ndev->pdev); - - rc = ntb_device_detect(ndev); - if (rc) - goto err; - - ndev->mw = kcalloc(ndev->limits.max_mw, sizeof(struct ntb_mw), - GFP_KERNEL); - if (!ndev->mw) { - rc = -ENOMEM; - goto err1; - } - - if (ndev->split_bar) - rc = pci_request_selected_regions(pdev, NTB_SPLITBAR_MASK, - KBUILD_MODNAME); - else - rc = pci_request_selected_regions(pdev, NTB_BAR_MASK, - KBUILD_MODNAME); - - if (rc) - goto err2; - - ndev->reg_base = pci_ioremap_bar(pdev, NTB_BAR_MMIO); - if (!ndev->reg_base) { - dev_warn(&pdev->dev, "Cannot remap BAR 0\n"); - rc = -EIO; - goto err3; - } - - for (i = 0; i < ndev->limits.max_mw; i++) { - ndev->mw[i].bar_sz = pci_resource_len(pdev, MW_TO_BAR(i)); - - /* - * with the errata we need to steal last of the memory - * windows for workarounds and they point to MMIO registers. - */ - if ((ndev->wa_flags & WA_SNB_ERR) && - (i == (ndev->limits.max_mw - 1))) { - ndev->mw[i].vbase = - ioremap_nocache(pci_resource_start(pdev, - MW_TO_BAR(i)), - ndev->mw[i].bar_sz); - } else { - ndev->mw[i].vbase = - ioremap_wc(pci_resource_start(pdev, - MW_TO_BAR(i)), - ndev->mw[i].bar_sz); - } - - dev_info(&pdev->dev, "MW %d size %llu\n", i, - (unsigned long long) ndev->mw[i].bar_sz); - if (!ndev->mw[i].vbase) { - dev_warn(&pdev->dev, "Cannot remap BAR %d\n", - MW_TO_BAR(i)); - rc = -EIO; - goto err4; - } - } - - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (rc) { - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) - goto err4; - - dev_warn(&pdev->dev, "Cannot DMA highmem\n"); - } - - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (rc) { - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) - goto err4; - - dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n"); - } - - rc = ntb_device_setup(ndev); - if (rc) - goto err4; - - rc = ntb_create_callbacks(ndev); - if (rc) - goto err5; - - rc = ntb_setup_interrupts(ndev); - if (rc) - goto err6; - - /* The scratchpad registers keep the values between rmmod/insmod, - * blast them now - */ - for (i = 0; i < ndev->limits.max_spads; i++) { - ntb_write_local_spad(ndev, i, 0); - ntb_write_remote_spad(ndev, i, 0); - } - - rc = ntb_transport_init(pdev); - if (rc) - goto err7; - - ntb_hw_link_up(ndev); - - return 0; - -err7: - ntb_free_interrupts(ndev); -err6: - ntb_free_callbacks(ndev); -err5: - ntb_device_free(ndev); -err4: - for (i--; i >= 0; i--) - iounmap(ndev->mw[i].vbase); - iounmap(ndev->reg_base); -err3: - if (ndev->split_bar) - pci_release_selected_regions(pdev, NTB_SPLITBAR_MASK); - else - pci_release_selected_regions(pdev, NTB_BAR_MASK); -err2: - kfree(ndev->mw); -err1: - pci_disable_device(pdev); -err: - ntb_free_debugfs(ndev); - kfree(ndev); - - dev_err(&pdev->dev, "Error loading %s module\n", KBUILD_MODNAME); - return rc; -} - -static void ntb_pci_remove(struct pci_dev *pdev) -{ - struct ntb_device *ndev = pci_get_drvdata(pdev); - int i; - - ntb_hw_link_down(ndev); - - ntb_transport_free(ndev->ntb_transport); - - ntb_free_interrupts(ndev); - ntb_free_callbacks(ndev); - ntb_device_free(ndev); - - /* need to reset max_mw limits so we can unmap properly */ - if (ndev->hw_type == SNB_HW) - ntb_max_mw_detect(ndev); - - for (i = 0; i < ndev->limits.max_mw; i++) - iounmap(ndev->mw[i].vbase); - - kfree(ndev->mw); - iounmap(ndev->reg_base); - if (ndev->split_bar) - pci_release_selected_regions(pdev, NTB_SPLITBAR_MASK); - else - pci_release_selected_regions(pdev, NTB_BAR_MASK); - pci_disable_device(pdev); - ntb_free_debugfs(ndev); - kfree(ndev); -} - -static struct pci_driver ntb_pci_driver = { - .name = KBUILD_MODNAME, - .id_table = ntb_pci_tbl, - .probe = ntb_pci_probe, - .remove = ntb_pci_remove, -}; - -module_pci_driver(ntb_pci_driver); diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h index 935610454f70..fec689dc95cf 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.h +++ b/drivers/ntb/hw/intel/ntb_hw_intel.h @@ -5,6 +5,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -13,6 +14,7 @@ * BSD LICENSE * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -45,341 +47,296 @@ * Contact Information: * Jon Mason */ -#include -#define NTB_LINK_STATUS_ACTIVE 0x2000 -#define NTB_LINK_SPEED_MASK 0x000f -#define NTB_LINK_WIDTH_MASK 0x03f0 +#ifndef NTB_HW_INTEL_H +#define NTB_HW_INTEL_H -#define SNB_MSIX_CNT 4 -#define SNB_MAX_B2B_SPADS 16 -#define SNB_MAX_COMPAT_SPADS 16 -/* Reserve the uppermost bit for link interrupt */ -#define SNB_MAX_DB_BITS 15 -#define SNB_LINK_DB 15 -#define SNB_DB_BITS_PER_VEC 5 -#define HSX_SPLITBAR_MAX_MW 3 -#define SNB_MAX_MW 2 -#define SNB_ERRATA_MAX_MW 1 +#include +#include -#define SNB_DB_HW_LINK 0x8000 +#define PCI_DEVICE_ID_INTEL_NTB_B2B_JSF 0x3725 +#define PCI_DEVICE_ID_INTEL_NTB_PS_JSF 0x3726 +#define PCI_DEVICE_ID_INTEL_NTB_SS_JSF 0x3727 +#define PCI_DEVICE_ID_INTEL_NTB_B2B_SNB 0x3C0D +#define PCI_DEVICE_ID_INTEL_NTB_PS_SNB 0x3C0E +#define PCI_DEVICE_ID_INTEL_NTB_SS_SNB 0x3C0F +#define PCI_DEVICE_ID_INTEL_NTB_B2B_IVT 0x0E0D +#define PCI_DEVICE_ID_INTEL_NTB_PS_IVT 0x0E0E +#define PCI_DEVICE_ID_INTEL_NTB_SS_IVT 0x0E0F +#define PCI_DEVICE_ID_INTEL_NTB_B2B_HSX 0x2F0D +#define PCI_DEVICE_ID_INTEL_NTB_PS_HSX 0x2F0E +#define PCI_DEVICE_ID_INTEL_NTB_SS_HSX 0x2F0F +#define PCI_DEVICE_ID_INTEL_NTB_B2B_BWD 0x0C4E -#define SNB_UNCERRSTS_OFFSET 0x014C -#define SNB_CORERRSTS_OFFSET 0x0158 -#define SNB_LINK_STATUS_OFFSET 0x01A2 -#define SNB_PCICMD_OFFSET 0x0504 -#define SNB_DEVCTRL_OFFSET 0x0598 -#define SNB_DEVSTS_OFFSET 0x059A -#define SNB_SLINK_STATUS_OFFSET 0x05A2 +/* SNB hardware (and JSF, IVT, HSX) */ -#define SNB_PBAR2LMT_OFFSET 0x0000 -#define SNB_PBAR4LMT_OFFSET 0x0008 -#define SNB_PBAR5LMT_OFFSET 0x000C -#define SNB_PBAR2XLAT_OFFSET 0x0010 -#define SNB_PBAR4XLAT_OFFSET 0x0018 -#define SNB_PBAR5XLAT_OFFSET 0x001C -#define SNB_SBAR2LMT_OFFSET 0x0020 -#define SNB_SBAR4LMT_OFFSET 0x0028 -#define SNB_SBAR5LMT_OFFSET 0x002C -#define SNB_SBAR2XLAT_OFFSET 0x0030 -#define SNB_SBAR4XLAT_OFFSET 0x0038 -#define SNB_SBAR5XLAT_OFFSET 0x003C -#define SNB_SBAR0BASE_OFFSET 0x0040 -#define SNB_SBAR2BASE_OFFSET 0x0048 -#define SNB_SBAR4BASE_OFFSET 0x0050 -#define SNB_SBAR5BASE_OFFSET 0x0054 -#define SNB_NTBCNTL_OFFSET 0x0058 -#define SNB_SBDF_OFFSET 0x005C -#define SNB_PDOORBELL_OFFSET 0x0060 -#define SNB_PDBMSK_OFFSET 0x0062 -#define SNB_SDOORBELL_OFFSET 0x0064 -#define SNB_SDBMSK_OFFSET 0x0066 -#define SNB_USMEMMISS_OFFSET 0x0070 -#define SNB_SPAD_OFFSET 0x0080 -#define SNB_SPADSEMA4_OFFSET 0x00c0 -#define SNB_WCCNTRL_OFFSET 0x00e0 -#define SNB_B2B_SPAD_OFFSET 0x0100 -#define SNB_B2B_DOORBELL_OFFSET 0x0140 -#define SNB_B2B_XLAT_OFFSETL 0x0144 -#define SNB_B2B_XLAT_OFFSETU 0x0148 +#define SNB_PBAR23LMT_OFFSET 0x0000 +#define SNB_PBAR45LMT_OFFSET 0x0008 +#define SNB_PBAR4LMT_OFFSET 0x0008 +#define SNB_PBAR5LMT_OFFSET 0x000c +#define SNB_PBAR23XLAT_OFFSET 0x0010 +#define SNB_PBAR45XLAT_OFFSET 0x0018 +#define SNB_PBAR4XLAT_OFFSET 0x0018 +#define SNB_PBAR5XLAT_OFFSET 0x001c +#define SNB_SBAR23LMT_OFFSET 0x0020 +#define SNB_SBAR45LMT_OFFSET 0x0028 +#define SNB_SBAR4LMT_OFFSET 0x0028 +#define SNB_SBAR5LMT_OFFSET 0x002c +#define SNB_SBAR23XLAT_OFFSET 0x0030 +#define SNB_SBAR45XLAT_OFFSET 0x0038 +#define SNB_SBAR4XLAT_OFFSET 0x0038 +#define SNB_SBAR5XLAT_OFFSET 0x003c +#define SNB_SBAR0BASE_OFFSET 0x0040 +#define SNB_SBAR23BASE_OFFSET 0x0048 +#define SNB_SBAR45BASE_OFFSET 0x0050 +#define SNB_SBAR4BASE_OFFSET 0x0050 +#define SNB_SBAR5BASE_OFFSET 0x0054 +#define SNB_SBDF_OFFSET 0x005c +#define SNB_NTBCNTL_OFFSET 0x0058 +#define SNB_PDOORBELL_OFFSET 0x0060 +#define SNB_PDBMSK_OFFSET 0x0062 +#define SNB_SDOORBELL_OFFSET 0x0064 +#define SNB_SDBMSK_OFFSET 0x0066 +#define SNB_USMEMMISS_OFFSET 0x0070 +#define SNB_SPAD_OFFSET 0x0080 +#define SNB_PBAR23SZ_OFFSET 0x00d0 +#define SNB_PBAR45SZ_OFFSET 0x00d1 +#define SNB_PBAR4SZ_OFFSET 0x00d1 +#define SNB_SBAR23SZ_OFFSET 0x00d2 +#define SNB_SBAR45SZ_OFFSET 0x00d3 +#define SNB_SBAR4SZ_OFFSET 0x00d3 +#define SNB_PPD_OFFSET 0x00d4 +#define SNB_PBAR5SZ_OFFSET 0x00d5 +#define SNB_SBAR5SZ_OFFSET 0x00d6 +#define SNB_WCCNTRL_OFFSET 0x00e0 +#define SNB_UNCERRSTS_OFFSET 0x014c +#define SNB_CORERRSTS_OFFSET 0x0158 +#define SNB_LINK_STATUS_OFFSET 0x01a2 +#define SNB_SPCICMD_OFFSET 0x0504 +#define SNB_DEVCTRL_OFFSET 0x0598 +#define SNB_DEVSTS_OFFSET 0x059a +#define SNB_SLINK_STATUS_OFFSET 0x05a2 +#define SNB_B2B_SPAD_OFFSET 0x0100 +#define SNB_B2B_DOORBELL_OFFSET 0x0140 +#define SNB_B2B_XLAT_OFFSETL 0x0144 +#define SNB_B2B_XLAT_OFFSETU 0x0148 +#define SNB_PPD_CONN_MASK 0x03 +#define SNB_PPD_CONN_TRANSPARENT 0x00 +#define SNB_PPD_CONN_B2B 0x01 +#define SNB_PPD_CONN_RP 0x02 +#define SNB_PPD_DEV_MASK 0x10 +#define SNB_PPD_DEV_USD 0x00 +#define SNB_PPD_DEV_DSD 0x10 +#define SNB_PPD_SPLIT_BAR_MASK 0x40 -/* - * The addresses are setup so the 32bit BARs can function. Thus - * the addresses are all in 32bit space - */ -#define SNB_MBAR01_USD_ADDR 0x000000002100000CULL -#define SNB_MBAR23_USD_ADDR 0x000000004100000CULL -#define SNB_MBAR4_USD_ADDR 0x000000008100000CULL -#define SNB_MBAR5_USD_ADDR 0x00000000A100000CULL -#define SNB_MBAR01_DSD_ADDR 0x000000002000000CULL -#define SNB_MBAR23_DSD_ADDR 0x000000004000000CULL -#define SNB_MBAR4_DSD_ADDR 0x000000008000000CULL -#define SNB_MBAR5_DSD_ADDR 0x00000000A000000CULL +#define SNB_PPD_TOPO_MASK (SNB_PPD_CONN_MASK | SNB_PPD_DEV_MASK) +#define SNB_PPD_TOPO_PRI_USD (SNB_PPD_CONN_RP | SNB_PPD_DEV_USD) +#define SNB_PPD_TOPO_PRI_DSD (SNB_PPD_CONN_RP | SNB_PPD_DEV_DSD) +#define SNB_PPD_TOPO_SEC_USD (SNB_PPD_CONN_TRANSPARENT | SNB_PPD_DEV_USD) +#define SNB_PPD_TOPO_SEC_DSD (SNB_PPD_CONN_TRANSPARENT | SNB_PPD_DEV_DSD) +#define SNB_PPD_TOPO_B2B_USD (SNB_PPD_CONN_B2B | SNB_PPD_DEV_USD) +#define SNB_PPD_TOPO_B2B_DSD (SNB_PPD_CONN_B2B | SNB_PPD_DEV_DSD) -#define BWD_MSIX_CNT 34 -#define BWD_MAX_SPADS 16 -#define BWD_MAX_DB_BITS 34 -#define BWD_DB_BITS_PER_VEC 1 -#define BWD_MAX_MW 2 +#define SNB_MW_COUNT 2 +#define HSX_SPLIT_BAR_MW_COUNT 3 +#define SNB_DB_COUNT 15 +#define SNB_DB_LINK 15 +#define SNB_DB_LINK_BIT BIT_ULL(SNB_DB_LINK) +#define SNB_DB_MSIX_VECTOR_COUNT 4 +#define SNB_DB_MSIX_VECTOR_SHIFT 5 +#define SNB_DB_TOTAL_SHIFT 16 +#define SNB_SPAD_COUNT 16 -#define BWD_PCICMD_OFFSET 0xb004 -#define BWD_MBAR23_OFFSET 0xb018 -#define BWD_MBAR45_OFFSET 0xb020 -#define BWD_DEVCTRL_OFFSET 0xb048 -#define BWD_LINK_STATUS_OFFSET 0xb052 -#define BWD_ERRCORSTS_OFFSET 0xb110 +/* BWD hardware */ -#define BWD_SBAR2XLAT_OFFSET 0x0008 -#define BWD_SBAR4XLAT_OFFSET 0x0010 -#define BWD_PDOORBELL_OFFSET 0x0020 -#define BWD_PDBMSK_OFFSET 0x0028 -#define BWD_NTBCNTL_OFFSET 0x0060 -#define BWD_EBDF_OFFSET 0x0064 -#define BWD_SPAD_OFFSET 0x0080 -#define BWD_SPADSEMA_OFFSET 0x00c0 -#define BWD_STKYSPAD_OFFSET 0x00c4 -#define BWD_PBAR2XLAT_OFFSET 0x8008 -#define BWD_PBAR4XLAT_OFFSET 0x8010 -#define BWD_B2B_DOORBELL_OFFSET 0x8020 -#define BWD_B2B_SPAD_OFFSET 0x8080 -#define BWD_B2B_SPADSEMA_OFFSET 0x80c0 -#define BWD_B2B_STKYSPAD_OFFSET 0x80c4 - -#define BWD_MODPHY_PCSREG4 0x1c004 -#define BWD_MODPHY_PCSREG6 0x1c006 - -#define BWD_IP_BASE 0xC000 -#define BWD_DESKEWSTS_OFFSET (BWD_IP_BASE + 0x3024) -#define BWD_LTSSMERRSTS0_OFFSET (BWD_IP_BASE + 0x3180) +#define BWD_SBAR2XLAT_OFFSET 0x0008 +#define BWD_PDOORBELL_OFFSET 0x0020 +#define BWD_PDBMSK_OFFSET 0x0028 +#define BWD_NTBCNTL_OFFSET 0x0060 +#define BWD_SPAD_OFFSET 0x0080 +#define BWD_PPD_OFFSET 0x00d4 +#define BWD_PBAR2XLAT_OFFSET 0x8008 +#define BWD_B2B_DOORBELL_OFFSET 0x8020 +#define BWD_B2B_SPAD_OFFSET 0x8080 +#define BWD_SPCICMD_OFFSET 0xb004 +#define BWD_LINK_STATUS_OFFSET 0xb052 +#define BWD_ERRCORSTS_OFFSET 0xb110 +#define BWD_IP_BASE 0xc000 +#define BWD_DESKEWSTS_OFFSET (BWD_IP_BASE + 0x3024) +#define BWD_LTSSMERRSTS0_OFFSET (BWD_IP_BASE + 0x3180) #define BWD_LTSSMSTATEJMP_OFFSET (BWD_IP_BASE + 0x3040) #define BWD_IBSTERRRCRVSTS0_OFFSET (BWD_IP_BASE + 0x3324) +#define BWD_MODPHY_PCSREG4 0x1c004 +#define BWD_MODPHY_PCSREG6 0x1c006 -#define BWD_DESKEWSTS_DBERR (1 << 15) -#define BWD_LTSSMERRSTS0_UNEXPECTEDEI (1 << 20) -#define BWD_LTSSMSTATEJMP_FORCEDETECT (1 << 2) -#define BWD_IBIST_ERR_OFLOW 0x7FFF7FFF +#define BWD_PPD_INIT_LINK 0x0008 +#define BWD_PPD_CONN_MASK 0x0300 +#define BWD_PPD_CONN_TRANSPARENT 0x0000 +#define BWD_PPD_CONN_B2B 0x0100 +#define BWD_PPD_CONN_RP 0x0200 +#define BWD_PPD_DEV_MASK 0x1000 +#define BWD_PPD_DEV_USD 0x0000 +#define BWD_PPD_DEV_DSD 0x1000 +#define BWD_PPD_TOPO_MASK (BWD_PPD_CONN_MASK | BWD_PPD_DEV_MASK) +#define BWD_PPD_TOPO_PRI_USD (BWD_PPD_CONN_TRANSPARENT | BWD_PPD_DEV_USD) +#define BWD_PPD_TOPO_PRI_DSD (BWD_PPD_CONN_TRANSPARENT | BWD_PPD_DEV_DSD) +#define BWD_PPD_TOPO_SEC_USD (BWD_PPD_CONN_RP | BWD_PPD_DEV_USD) +#define BWD_PPD_TOPO_SEC_DSD (BWD_PPD_CONN_RP | BWD_PPD_DEV_DSD) +#define BWD_PPD_TOPO_B2B_USD (BWD_PPD_CONN_B2B | BWD_PPD_DEV_USD) +#define BWD_PPD_TOPO_B2B_DSD (BWD_PPD_CONN_B2B | BWD_PPD_DEV_DSD) -#define NTB_CNTL_CFG_LOCK (1 << 0) -#define NTB_CNTL_LINK_DISABLE (1 << 1) -#define NTB_CNTL_S2P_BAR23_SNOOP (1 << 2) -#define NTB_CNTL_P2S_BAR23_SNOOP (1 << 4) -#define NTB_CNTL_S2P_BAR4_SNOOP (1 << 6) -#define NTB_CNTL_P2S_BAR4_SNOOP (1 << 8) -#define NTB_CNTL_S2P_BAR5_SNOOP (1 << 12) -#define NTB_CNTL_P2S_BAR5_SNOOP (1 << 14) -#define BWD_CNTL_LINK_DOWN (1 << 16) +#define BWD_MW_COUNT 2 +#define BWD_DB_COUNT 34 +#define BWD_DB_VALID_MASK (BIT_ULL(BWD_DB_COUNT) - 1) +#define BWD_DB_MSIX_VECTOR_COUNT 34 +#define BWD_DB_MSIX_VECTOR_SHIFT 1 +#define BWD_DB_TOTAL_SHIFT 34 +#define BWD_SPAD_COUNT 16 -#define NTB_PPD_OFFSET 0x00D4 -#define SNB_PPD_CONN_TYPE 0x0003 -#define SNB_PPD_DEV_TYPE 0x0010 -#define SNB_PPD_SPLIT_BAR (1 << 6) -#define BWD_PPD_INIT_LINK 0x0008 -#define BWD_PPD_CONN_TYPE 0x0300 -#define BWD_PPD_DEV_TYPE 0x1000 -#define PCI_DEVICE_ID_INTEL_NTB_B2B_JSF 0x3725 -#define PCI_DEVICE_ID_INTEL_NTB_PS_JSF 0x3726 -#define PCI_DEVICE_ID_INTEL_NTB_SS_JSF 0x3727 -#define PCI_DEVICE_ID_INTEL_NTB_B2B_SNB 0x3C0D -#define PCI_DEVICE_ID_INTEL_NTB_PS_SNB 0x3C0E -#define PCI_DEVICE_ID_INTEL_NTB_SS_SNB 0x3C0F -#define PCI_DEVICE_ID_INTEL_NTB_B2B_IVT 0x0E0D -#define PCI_DEVICE_ID_INTEL_NTB_PS_IVT 0x0E0E -#define PCI_DEVICE_ID_INTEL_NTB_SS_IVT 0x0E0F -#define PCI_DEVICE_ID_INTEL_NTB_B2B_HSX 0x2F0D -#define PCI_DEVICE_ID_INTEL_NTB_PS_HSX 0x2F0E -#define PCI_DEVICE_ID_INTEL_NTB_SS_HSX 0x2F0F -#define PCI_DEVICE_ID_INTEL_NTB_B2B_BWD 0x0C4E +#define BWD_NTB_CTL_DOWN_BIT BIT(16) +#define BWD_NTB_CTL_ACTIVE(x) !(x & BWD_NTB_CTL_DOWN_BIT) + +#define BWD_DESKEWSTS_DBERR BIT(15) +#define BWD_LTSSMERRSTS0_UNEXPECTEDEI BIT(20) +#define BWD_LTSSMSTATEJMP_FORCEDETECT BIT(2) +#define BWD_IBIST_ERR_OFLOW 0x7FFF7FFF + +#define BWD_LINK_HB_TIMEOUT msecs_to_jiffies(1000) +#define BWD_LINK_RECOVERY_TIME msecs_to_jiffies(500) + +/* Ntb control and link status */ + +#define NTB_CTL_CFG_LOCK BIT(0) +#define NTB_CTL_DISABLE BIT(1) +#define NTB_CTL_S2P_BAR2_SNOOP BIT(2) +#define NTB_CTL_P2S_BAR2_SNOOP BIT(4) +#define NTB_CTL_S2P_BAR4_SNOOP BIT(6) +#define NTB_CTL_P2S_BAR4_SNOOP BIT(8) +#define NTB_CTL_S2P_BAR5_SNOOP BIT(12) +#define NTB_CTL_P2S_BAR5_SNOOP BIT(14) + +#define NTB_LNK_STA_ACTIVE_BIT 0x2000 +#define NTB_LNK_STA_SPEED_MASK 0x000f +#define NTB_LNK_STA_WIDTH_MASK 0x03f0 +#define NTB_LNK_STA_ACTIVE(x) (!!((x) & NTB_LNK_STA_ACTIVE_BIT)) +#define NTB_LNK_STA_SPEED(x) ((x) & NTB_LNK_STA_SPEED_MASK) +#define NTB_LNK_STA_WIDTH(x) (((x) & NTB_LNK_STA_WIDTH_MASK) >> 4) + +/* Use the following addresses for translation between b2b ntb devices in case + * the hardware default values are not reliable. */ +#define SNB_B2B_BAR0_USD_ADDR 0x1000000000000000ull +#define SNB_B2B_BAR2_USD_ADDR64 0x2000000000000000ull +#define SNB_B2B_BAR4_USD_ADDR64 0x4000000000000000ull +#define SNB_B2B_BAR4_USD_ADDR32 0x20000000u +#define SNB_B2B_BAR5_USD_ADDR32 0x40000000u +#define SNB_B2B_BAR0_DSD_ADDR 0x9000000000000000ull +#define SNB_B2B_BAR2_DSD_ADDR64 0xa000000000000000ull +#define SNB_B2B_BAR4_DSD_ADDR64 0xc000000000000000ull +#define SNB_B2B_BAR4_DSD_ADDR32 0xa0000000u +#define SNB_B2B_BAR5_DSD_ADDR32 0xc0000000u + +/* The peer ntb secondary config space is 32KB fixed size */ +#define SNB_B2B_MIN_SIZE 0x8000 + +/* flags to indicate hardware errata */ +#define NTB_HWERR_SDOORBELL_LOCKUP BIT_ULL(0) +#define NTB_HWERR_SB01BASE_LOCKUP BIT_ULL(1) +#define NTB_HWERR_B2BDOORBELL_BIT14 BIT_ULL(2) + +/* flags to indicate unsafe api */ +#define NTB_UNSAFE_DB BIT_ULL(0) +#define NTB_UNSAFE_SPAD BIT_ULL(1) + +struct intel_ntb_dev; + +struct intel_ntb_reg { + int (*poll_link)(struct intel_ntb_dev *ndev); + int (*link_is_up)(struct intel_ntb_dev *ndev); + u64 (*db_ioread)(void __iomem *mmio); + void (*db_iowrite)(u64 db_bits, void __iomem *mmio); + unsigned long ntb_ctl; + resource_size_t db_size; + int mw_bar[]; +}; + +struct intel_ntb_alt_reg { + unsigned long db_bell; + unsigned long db_mask; + unsigned long spad; +}; + +struct intel_ntb_xlat_reg { + unsigned long bar0_base; + unsigned long bar2_xlat; + unsigned long bar2_limit; +}; + +struct intel_b2b_addr { + phys_addr_t bar0_addr; + phys_addr_t bar2_addr64; + phys_addr_t bar4_addr64; + phys_addr_t bar4_addr32; + phys_addr_t bar5_addr32; +}; + +struct intel_ntb_vec { + struct intel_ntb_dev *ndev; + int num; +}; + +struct intel_ntb_dev { + struct ntb_dev ntb; + + /* offset of peer bar0 in b2b bar */ + unsigned long b2b_off; + /* mw idx used to access peer bar0 */ + unsigned int b2b_idx; + + /* BAR45 is split into BAR4 and BAR5 */ + bool bar4_split; + + u32 ntb_ctl; + u32 lnk_sta; + + unsigned char mw_count; + unsigned char spad_count; + unsigned char db_count; + unsigned char db_vec_count; + unsigned char db_vec_shift; + + u64 db_valid_mask; + u64 db_link_mask; + u64 db_mask; + + /* synchronize rmw access of db_mask and hw reg */ + spinlock_t db_mask_lock; + + struct msix_entry *msix; + struct intel_ntb_vec *vec; + + const struct intel_ntb_reg *reg; + const struct intel_ntb_alt_reg *self_reg; + const struct intel_ntb_alt_reg *peer_reg; + const struct intel_ntb_xlat_reg *xlat_reg; + void __iomem *self_mmio; + void __iomem *peer_mmio; + phys_addr_t peer_addr; + + unsigned long last_ts; + struct delayed_work hb_timer; + + unsigned long hwerr_flags; + unsigned long unsafe_flags; + unsigned long unsafe_flags_ignore; + + struct dentry *debugfs_dir; + struct dentry *debugfs_info; +}; + +#define ndev_pdev(ndev) ((ndev)->ntb.pdev) +#define ndev_name(ndev) pci_name(ndev_pdev(ndev)) +#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev) +#define ntb_ndev(ntb) container_of(ntb, struct intel_ntb_dev, ntb) +#define hb_ndev(work) container_of(work, struct intel_ntb_dev, hb_timer.work) -#ifndef readq -static inline u64 readq(void __iomem *addr) -{ - return readl(addr) | (((u64) readl(addr + 4)) << 32LL); -} #endif - -#ifndef writeq -static inline void writeq(u64 val, void __iomem *addr) -{ - writel(val & 0xffffffff, addr); - writel(val >> 32, addr + 4); -} -#endif - -#define NTB_BAR_MMIO 0 -#define NTB_BAR_23 2 -#define NTB_BAR_4 4 -#define NTB_BAR_5 5 - -#define NTB_BAR_MASK ((1 << NTB_BAR_MMIO) | (1 << NTB_BAR_23) |\ - (1 << NTB_BAR_4)) -#define NTB_SPLITBAR_MASK ((1 << NTB_BAR_MMIO) | (1 << NTB_BAR_23) |\ - (1 << NTB_BAR_4) | (1 << NTB_BAR_5)) - -#define NTB_HB_TIMEOUT msecs_to_jiffies(1000) - -enum ntb_hw_event { - NTB_EVENT_SW_EVENT0 = 0, - NTB_EVENT_SW_EVENT1, - NTB_EVENT_SW_EVENT2, - NTB_EVENT_HW_ERROR, - NTB_EVENT_HW_LINK_UP, - NTB_EVENT_HW_LINK_DOWN, -}; - -struct ntb_mw { - dma_addr_t phys_addr; - void __iomem *vbase; - resource_size_t bar_sz; -}; - -struct ntb_db_cb { - int (*callback)(void *data, int db_num); - unsigned int db_num; - void *data; - struct ntb_device *ndev; - struct tasklet_struct irq_work; -}; - -#define WA_SNB_ERR 0x00000001 - -struct ntb_device { - struct pci_dev *pdev; - struct msix_entry *msix_entries; - void __iomem *reg_base; - struct ntb_mw *mw; - struct { - unsigned char max_mw; - unsigned char max_spads; - unsigned char max_db_bits; - unsigned char msix_cnt; - } limits; - struct { - void __iomem *ldb; - void __iomem *ldb_mask; - void __iomem *rdb; - void __iomem *bar2_xlat; - void __iomem *bar4_xlat; - void __iomem *bar5_xlat; - void __iomem *spad_write; - void __iomem *spad_read; - void __iomem *lnk_cntl; - void __iomem *lnk_stat; - void __iomem *spci_cmd; - } reg_ofs; - struct ntb_transport *ntb_transport; - void (*event_cb)(void *handle, enum ntb_hw_event event); - - struct ntb_db_cb *db_cb; - unsigned char hw_type; - unsigned char conn_type; - unsigned char dev_type; - unsigned char num_msix; - unsigned char bits_per_vector; - unsigned char max_cbs; - unsigned char link_width; - unsigned char link_speed; - unsigned char link_status; - unsigned char split_bar; - - struct delayed_work hb_timer; - unsigned long last_ts; - - struct delayed_work lr_timer; - - struct dentry *debugfs_dir; - struct dentry *debugfs_info; - - unsigned int wa_flags; -}; - -/** - * ntb_max_cbs() - return the max callbacks - * @ndev: pointer to ntb_device instance - * - * Given the ntb pointer, return the maximum number of callbacks - * - * RETURNS: the maximum number of callbacks - */ -static inline unsigned char ntb_max_cbs(struct ntb_device *ndev) -{ - return ndev->max_cbs; -} - -/** - * ntb_max_mw() - return the max number of memory windows - * @ndev: pointer to ntb_device instance - * - * Given the ntb pointer, return the maximum number of memory windows - * - * RETURNS: the maximum number of memory windows - */ -static inline unsigned char ntb_max_mw(struct ntb_device *ndev) -{ - return ndev->limits.max_mw; -} - -/** - * ntb_hw_link_status() - return the hardware link status - * @ndev: pointer to ntb_device instance - * - * Returns true if the hardware is connected to the remote system - * - * RETURNS: true or false based on the hardware link state - */ -static inline bool ntb_hw_link_status(struct ntb_device *ndev) -{ - return ndev->link_status == NTB_LINK_UP; -} - -/** - * ntb_query_pdev() - return the pci_dev pointer - * @ndev: pointer to ntb_device instance - * - * Given the ntb pointer, return the pci_dev pointer for the NTB hardware device - * - * RETURNS: a pointer to the ntb pci_dev - */ -static inline struct pci_dev *ntb_query_pdev(struct ntb_device *ndev) -{ - return ndev->pdev; -} - -/** - * ntb_query_debugfs() - return the debugfs pointer - * @ndev: pointer to ntb_device instance - * - * Given the ntb pointer, return the debugfs directory pointer for the NTB - * hardware device - * - * RETURNS: a pointer to the debugfs directory - */ -static inline struct dentry *ntb_query_debugfs(struct ntb_device *ndev) -{ - return ndev->debugfs_dir; -} - -struct ntb_device *ntb_register_transport(struct pci_dev *pdev, - void *transport); -void ntb_unregister_transport(struct ntb_device *ndev); -void ntb_set_mw_addr(struct ntb_device *ndev, unsigned int mw, u64 addr); -int ntb_register_db_callback(struct ntb_device *ndev, unsigned int idx, - void *data, int (*db_cb_func)(void *data, - int db_num)); -void ntb_unregister_db_callback(struct ntb_device *ndev, unsigned int idx); -int ntb_register_event_callback(struct ntb_device *ndev, - void (*event_cb_func)(void *handle, - enum ntb_hw_event event)); -void ntb_unregister_event_callback(struct ntb_device *ndev); -int ntb_get_max_spads(struct ntb_device *ndev); -int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val); -int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); -int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val); -int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); -resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw); -void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); -u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); -void ntb_ring_doorbell(struct ntb_device *ndev, unsigned int idx); -void *ntb_find_transport(struct pci_dev *pdev); - -int ntb_transport_init(struct pci_dev *pdev); -void ntb_transport_free(void *transport); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index c5f26cda9f97..9faf1c6029af 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -5,6 +5,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -13,6 +14,7 @@ * BSD LICENSE * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -40,7 +42,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Intel PCIe NTB Linux driver + * PCIe NTB Transport Linux driver * * Contact Information: * Jon Mason @@ -56,9 +58,22 @@ #include #include #include -#include "hw/intel/ntb_hw_intel.h" +#include "linux/ntb.h" +#include "linux/ntb_transport.h" -#define NTB_TRANSPORT_VERSION 3 +#define NTB_TRANSPORT_VERSION 4 +#define NTB_TRANSPORT_VER "4" +#define NTB_TRANSPORT_NAME "ntb_transport" +#define NTB_TRANSPORT_DESC "Software Queue-Pair Transport over NTB" + +MODULE_DESCRIPTION(NTB_TRANSPORT_DESC); +MODULE_VERSION(NTB_TRANSPORT_VER); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); + +static unsigned long max_mw_size; +module_param(max_mw_size, ulong, 0644); +MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows"); static unsigned int transport_mtu = 0x401E; module_param(transport_mtu, uint, 0644); @@ -72,10 +87,12 @@ static unsigned int copy_bytes = 1024; module_param(copy_bytes, uint, 0644); MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the CPU to copy instead of DMA"); +static struct dentry *nt_debugfs_dir; + struct ntb_queue_entry { /* ntb_queue list reference */ struct list_head entry; - /* pointers to data to be transfered */ + /* pointers to data to be transferred */ void *cb_data; void *buf; unsigned int len; @@ -94,14 +111,16 @@ struct ntb_rx_info { }; struct ntb_transport_qp { - struct ntb_transport *transport; - struct ntb_device *ndev; + struct ntb_transport_ctx *transport; + struct ntb_dev *ndev; void *cb_data; struct dma_chan *dma_chan; bool client_ready; - bool qp_link; + bool link_is_up; + u8 qp_num; /* Only 64 QP's are allowed. 0-63 */ + u64 qp_bit; struct ntb_rx_info __iomem *rx_info; struct ntb_rx_info *remote_rx_info; @@ -127,6 +146,7 @@ struct ntb_transport_qp { unsigned int rx_max_entry; unsigned int rx_max_frame; dma_cookie_t last_cookie; + struct tasklet_struct rxc_db_work; void (*event_handler)(void *data, int status); struct delayed_work link_work; @@ -153,33 +173,44 @@ struct ntb_transport_qp { }; struct ntb_transport_mw { - size_t size; + phys_addr_t phys_addr; + resource_size_t phys_size; + resource_size_t xlat_align; + resource_size_t xlat_align_size; + void __iomem *vbase; + size_t xlat_size; + size_t buff_size; void *virt_addr; dma_addr_t dma_addr; }; struct ntb_transport_client_dev { struct list_head entry; + struct ntb_transport_ctx *nt; struct device dev; }; -struct ntb_transport { +struct ntb_transport_ctx { struct list_head entry; struct list_head client_devs; - struct ntb_device *ndev; - struct ntb_transport_mw *mw; - struct ntb_transport_qp *qps; - unsigned int max_qps; - unsigned long qp_bitmap; - bool transport_link; + struct ntb_dev *ndev; + + struct ntb_transport_mw *mw_vec; + struct ntb_transport_qp *qp_vec; + unsigned int mw_count; + unsigned int qp_count; + u64 qp_bitmap; + u64 qp_bitmap_free; + + bool link_is_up; struct delayed_work link_work; struct work_struct link_cleanup; }; enum { - DESC_DONE_FLAG = 1 << 0, - LINK_DOWN_FLAG = 1 << 1, + DESC_DONE_FLAG = BIT(0), + LINK_DOWN_FLAG = BIT(1), }; struct ntb_payload_header { @@ -200,68 +231,69 @@ enum { MAX_SPAD, }; -#define QP_TO_MW(ndev, qp) ((qp) % ntb_max_mw(ndev)) +#define dev_client_dev(__dev) \ + container_of((__dev), struct ntb_transport_client_dev, dev) + +#define drv_client(__drv) \ + container_of((__drv), struct ntb_transport_client, driver) + +#define QP_TO_MW(nt, qp) ((qp) % nt->mw_count) #define NTB_QP_DEF_NUM_ENTRIES 100 #define NTB_LINK_DOWN_TIMEOUT 10 -static int ntb_match_bus(struct device *dev, struct device_driver *drv) +static void ntb_transport_rxc_db(unsigned long data); +static const struct ntb_ctx_ops ntb_transport_ops; +static struct ntb_client ntb_transport_client; + +static int ntb_transport_bus_match(struct device *dev, + struct device_driver *drv) { return !strncmp(dev_name(dev), drv->name, strlen(drv->name)); } -static int ntb_client_probe(struct device *dev) +static int ntb_transport_bus_probe(struct device *dev) { - const struct ntb_client *drv = container_of(dev->driver, - struct ntb_client, driver); - struct pci_dev *pdev = container_of(dev->parent, struct pci_dev, dev); + const struct ntb_transport_client *client; int rc = -EINVAL; get_device(dev); - if (drv && drv->probe) - rc = drv->probe(pdev); + + client = drv_client(dev->driver); + rc = client->probe(dev); if (rc) put_device(dev); return rc; } -static int ntb_client_remove(struct device *dev) +static int ntb_transport_bus_remove(struct device *dev) { - const struct ntb_client *drv = container_of(dev->driver, - struct ntb_client, driver); - struct pci_dev *pdev = container_of(dev->parent, struct pci_dev, dev); + const struct ntb_transport_client *client; - if (drv && drv->remove) - drv->remove(pdev); + client = drv_client(dev->driver); + client->remove(dev); put_device(dev); return 0; } -static struct bus_type ntb_bus_type = { - .name = "ntb_bus", - .match = ntb_match_bus, - .probe = ntb_client_probe, - .remove = ntb_client_remove, +static struct bus_type ntb_transport_bus = { + .name = "ntb_transport", + .match = ntb_transport_bus_match, + .probe = ntb_transport_bus_probe, + .remove = ntb_transport_bus_remove, }; static LIST_HEAD(ntb_transport_list); -static int ntb_bus_init(struct ntb_transport *nt) +static int ntb_bus_init(struct ntb_transport_ctx *nt) { - if (list_empty(&ntb_transport_list)) { - int rc = bus_register(&ntb_bus_type); - if (rc) - return rc; - } - list_add(&nt->entry, &ntb_transport_list); - return 0; } -static void ntb_bus_remove(struct ntb_transport *nt) +static void ntb_bus_remove(struct ntb_transport_ctx *nt) { struct ntb_transport_client_dev *client_dev, *cd; @@ -273,29 +305,26 @@ static void ntb_bus_remove(struct ntb_transport *nt) } list_del(&nt->entry); - - if (list_empty(&ntb_transport_list)) - bus_unregister(&ntb_bus_type); } -static void ntb_client_release(struct device *dev) +static void ntb_transport_client_release(struct device *dev) { struct ntb_transport_client_dev *client_dev; - client_dev = container_of(dev, struct ntb_transport_client_dev, dev); + client_dev = dev_client_dev(dev); kfree(client_dev); } /** - * ntb_unregister_client_dev - Unregister NTB client device + * ntb_transport_unregister_client_dev - Unregister NTB client device * @device_name: Name of NTB client device * * Unregister an NTB client device with the NTB transport layer */ -void ntb_unregister_client_dev(char *device_name) +void ntb_transport_unregister_client_dev(char *device_name) { struct ntb_transport_client_dev *client, *cd; - struct ntb_transport *nt; + struct ntb_transport_ctx *nt; list_for_each_entry(nt, &ntb_transport_list, entry) list_for_each_entry_safe(client, cd, &nt->client_devs, entry) @@ -305,18 +334,18 @@ void ntb_unregister_client_dev(char *device_name) device_unregister(&client->dev); } } -EXPORT_SYMBOL_GPL(ntb_unregister_client_dev); +EXPORT_SYMBOL_GPL(ntb_transport_unregister_client_dev); /** - * ntb_register_client_dev - Register NTB client device + * ntb_transport_register_client_dev - Register NTB client device * @device_name: Name of NTB client device * * Register an NTB client device with the NTB transport layer */ -int ntb_register_client_dev(char *device_name) +int ntb_transport_register_client_dev(char *device_name) { struct ntb_transport_client_dev *client_dev; - struct ntb_transport *nt; + struct ntb_transport_ctx *nt; int rc, i = 0; if (list_empty(&ntb_transport_list)) @@ -325,7 +354,7 @@ int ntb_register_client_dev(char *device_name) list_for_each_entry(nt, &ntb_transport_list, entry) { struct device *dev; - client_dev = kzalloc(sizeof(struct ntb_transport_client_dev), + client_dev = kzalloc(sizeof(*client_dev), GFP_KERNEL); if (!client_dev) { rc = -ENOMEM; @@ -336,9 +365,9 @@ int ntb_register_client_dev(char *device_name) /* setup and register client devices */ dev_set_name(dev, "%s%d", device_name, i); - dev->bus = &ntb_bus_type; - dev->release = ntb_client_release; - dev->parent = &ntb_query_pdev(nt->ndev)->dev; + dev->bus = &ntb_transport_bus; + dev->release = ntb_transport_client_release; + dev->parent = &nt->ndev->dev; rc = device_register(dev); if (rc) { @@ -353,11 +382,11 @@ int ntb_register_client_dev(char *device_name) return 0; err: - ntb_unregister_client_dev(device_name); + ntb_transport_unregister_client_dev(device_name); return rc; } -EXPORT_SYMBOL_GPL(ntb_register_client_dev); +EXPORT_SYMBOL_GPL(ntb_transport_register_client_dev); /** * ntb_transport_register_client - Register NTB client driver @@ -367,9 +396,9 @@ EXPORT_SYMBOL_GPL(ntb_register_client_dev); * * RETURNS: An appropriate -ERRNO error value on error, or zero for success. */ -int ntb_transport_register_client(struct ntb_client *drv) +int ntb_transport_register_client(struct ntb_transport_client *drv) { - drv->driver.bus = &ntb_bus_type; + drv->driver.bus = &ntb_transport_bus; if (list_empty(&ntb_transport_list)) return -ENODEV; @@ -386,7 +415,7 @@ EXPORT_SYMBOL_GPL(ntb_transport_register_client); * * RETURNS: An appropriate -ERRNO error value on error, or zero for success. */ -void ntb_transport_unregister_client(struct ntb_client *drv) +void ntb_transport_unregister_client(struct ntb_transport_client *drv) { driver_unregister(&drv->driver); } @@ -452,8 +481,8 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, "tx_max_entry - \t%u\n", qp->tx_max_entry); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "\nQP Link %s\n", (qp->qp_link == NTB_LINK_UP) ? - "Up" : "Down"); + "\nQP Link %s\n", + qp->link_is_up ? "Up" : "Down"); if (out_offset > out_count) out_offset = out_count; @@ -497,26 +526,31 @@ out: return entry; } -static void ntb_transport_setup_qp_mw(struct ntb_transport *nt, - unsigned int qp_num) +static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, + unsigned int qp_num) { - struct ntb_transport_qp *qp = &nt->qps[qp_num]; + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; + struct ntb_transport_mw *mw; unsigned int rx_size, num_qps_mw; - u8 mw_num, mw_max; + unsigned int mw_num, mw_count, qp_count; unsigned int i; - mw_max = ntb_max_mw(nt->ndev); - mw_num = QP_TO_MW(nt->ndev, qp_num); + mw_count = nt->mw_count; + qp_count = nt->qp_count; - WARN_ON(nt->mw[mw_num].virt_addr == NULL); + mw_num = QP_TO_MW(nt, qp_num); + mw = &nt->mw_vec[mw_num]; - if (nt->max_qps % mw_max && mw_num + 1 < nt->max_qps / mw_max) - num_qps_mw = nt->max_qps / mw_max + 1; + if (!mw->virt_addr) + return -ENOMEM; + + if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count) + num_qps_mw = qp_count / mw_count + 1; else - num_qps_mw = nt->max_qps / mw_max; + num_qps_mw = qp_count / mw_count; - rx_size = (unsigned int) nt->mw[mw_num].size / num_qps_mw; - qp->rx_buff = nt->mw[mw_num].virt_addr + qp_num / mw_max * rx_size; + rx_size = (unsigned int)mw->xlat_size / num_qps_mw; + qp->rx_buff = mw->virt_addr + rx_size * qp_num / mw_count; rx_size -= sizeof(struct ntb_rx_info); qp->remote_rx_info = qp->rx_buff + rx_size; @@ -530,49 +564,63 @@ static void ntb_transport_setup_qp_mw(struct ntb_transport *nt, /* setup the hdr offsets with 0's */ for (i = 0; i < qp->rx_max_entry; i++) { - void *offset = qp->rx_buff + qp->rx_max_frame * (i + 1) - - sizeof(struct ntb_payload_header); + void *offset = (qp->rx_buff + qp->rx_max_frame * (i + 1) - + sizeof(struct ntb_payload_header)); memset(offset, 0, sizeof(struct ntb_payload_header)); } qp->rx_pkts = 0; qp->tx_pkts = 0; qp->tx_index = 0; + + return 0; } -static void ntb_free_mw(struct ntb_transport *nt, int num_mw) +static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) { - struct ntb_transport_mw *mw = &nt->mw[num_mw]; - struct pci_dev *pdev = ntb_query_pdev(nt->ndev); + struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; + struct pci_dev *pdev = nt->ndev->pdev; if (!mw->virt_addr) return; - dma_free_coherent(&pdev->dev, mw->size, mw->virt_addr, mw->dma_addr); + ntb_mw_clear_trans(nt->ndev, num_mw); + dma_free_coherent(&pdev->dev, mw->buff_size, + mw->virt_addr, mw->dma_addr); + mw->xlat_size = 0; + mw->buff_size = 0; mw->virt_addr = NULL; } -static int ntb_set_mw(struct ntb_transport *nt, int num_mw, unsigned int size) +static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, + unsigned int size) { - struct ntb_transport_mw *mw = &nt->mw[num_mw]; - struct pci_dev *pdev = ntb_query_pdev(nt->ndev); + struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; + struct pci_dev *pdev = nt->ndev->pdev; + unsigned int xlat_size, buff_size; + int rc; + + xlat_size = round_up(size, mw->xlat_align_size); + buff_size = round_up(size, mw->xlat_align); /* No need to re-setup */ - if (mw->size == ALIGN(size, 4096)) + if (mw->xlat_size == xlat_size) return 0; - if (mw->size != 0) + if (mw->buff_size) ntb_free_mw(nt, num_mw); - /* Alloc memory for receiving data. Must be 4k aligned */ - mw->size = ALIGN(size, 4096); + /* Alloc memory for receiving data. Must be aligned */ + mw->xlat_size = xlat_size; + mw->buff_size = buff_size; - mw->virt_addr = dma_alloc_coherent(&pdev->dev, mw->size, &mw->dma_addr, - GFP_KERNEL); + mw->virt_addr = dma_alloc_coherent(&pdev->dev, buff_size, + &mw->dma_addr, GFP_KERNEL); if (!mw->virt_addr) { - mw->size = 0; - dev_err(&pdev->dev, "Unable to allocate MW buffer of size %d\n", - (int) mw->size); + mw->xlat_size = 0; + mw->buff_size = 0; + dev_err(&pdev->dev, "Unable to alloc MW buff of size %d\n", + buff_size); return -ENOMEM; } @@ -582,34 +630,39 @@ static int ntb_set_mw(struct ntb_transport *nt, int num_mw, unsigned int size) * is a requirement of the hardware. It is recommended to setup CMA * for BAR sizes equal or greater than 4MB. */ - if (!IS_ALIGNED(mw->dma_addr, mw->size)) { - dev_err(&pdev->dev, "DMA memory %pad not aligned to BAR size\n", + if (!IS_ALIGNED(mw->dma_addr, mw->xlat_align)) { + dev_err(&pdev->dev, "DMA memory %pad is not aligned\n", &mw->dma_addr); ntb_free_mw(nt, num_mw); return -ENOMEM; } /* Notify HW the memory location of the receive buffer */ - ntb_set_mw_addr(nt->ndev, num_mw, mw->dma_addr); + rc = ntb_mw_set_trans(nt->ndev, num_mw, mw->dma_addr, mw->xlat_size); + if (rc) { + dev_err(&pdev->dev, "Unable to set mw%d translation", num_mw); + ntb_free_mw(nt, num_mw); + return -EIO; + } return 0; } static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp) { - struct ntb_transport *nt = qp->transport; - struct pci_dev *pdev = ntb_query_pdev(nt->ndev); + struct ntb_transport_ctx *nt = qp->transport; + struct pci_dev *pdev = nt->ndev->pdev; - if (qp->qp_link == NTB_LINK_DOWN) { + if (qp->link_is_up) { cancel_delayed_work_sync(&qp->link_work); return; } - if (qp->event_handler) - qp->event_handler(qp->cb_data, NTB_LINK_DOWN); - dev_info(&pdev->dev, "qp %d: Link Down\n", qp->qp_num); - qp->qp_link = NTB_LINK_DOWN; + qp->link_is_up = false; + + if (qp->event_handler) + qp->event_handler(qp->cb_data, qp->link_is_up); } static void ntb_qp_link_cleanup_work(struct work_struct *work) @@ -617,11 +670,11 @@ static void ntb_qp_link_cleanup_work(struct work_struct *work) struct ntb_transport_qp *qp = container_of(work, struct ntb_transport_qp, link_cleanup); - struct ntb_transport *nt = qp->transport; + struct ntb_transport_ctx *nt = qp->transport; ntb_qp_link_cleanup(qp); - if (nt->transport_link == NTB_LINK_UP) + if (nt->link_is_up) schedule_delayed_work(&qp->link_work, msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); } @@ -631,180 +684,132 @@ static void ntb_qp_link_down(struct ntb_transport_qp *qp) schedule_work(&qp->link_cleanup); } -static void ntb_transport_link_cleanup(struct ntb_transport *nt) +static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt) { + struct ntb_transport_qp *qp; + u64 qp_bitmap_alloc; int i; - /* Pass along the info to any clients */ - for (i = 0; i < nt->max_qps; i++) - if (!test_bit(i, &nt->qp_bitmap)) - ntb_qp_link_cleanup(&nt->qps[i]); + qp_bitmap_alloc = nt->qp_bitmap & ~nt->qp_bitmap_free; - if (nt->transport_link == NTB_LINK_DOWN) + /* Pass along the info to any clients */ + for (i = 0; i < nt->qp_count; i++) + if (qp_bitmap_alloc & BIT_ULL(i)) { + qp = &nt->qp_vec[i]; + ntb_qp_link_cleanup(qp); + cancel_work_sync(&qp->link_cleanup); + cancel_delayed_work_sync(&qp->link_work); + } + + if (!nt->link_is_up) cancel_delayed_work_sync(&nt->link_work); - else - nt->transport_link = NTB_LINK_DOWN; /* The scratchpad registers keep the values if the remote side * goes down, blast them now to give them a sane value the next * time they are accessed */ for (i = 0; i < MAX_SPAD; i++) - ntb_write_local_spad(nt->ndev, i, 0); + ntb_spad_write(nt->ndev, i, 0); } static void ntb_transport_link_cleanup_work(struct work_struct *work) { - struct ntb_transport *nt = container_of(work, struct ntb_transport, - link_cleanup); + struct ntb_transport_ctx *nt = + container_of(work, struct ntb_transport_ctx, link_cleanup); ntb_transport_link_cleanup(nt); } -static void ntb_transport_event_callback(void *data, enum ntb_hw_event event) +static void ntb_transport_event_callback(void *data) { - struct ntb_transport *nt = data; + struct ntb_transport_ctx *nt = data; - switch (event) { - case NTB_EVENT_HW_LINK_UP: + if (ntb_link_is_up(nt->ndev, NULL, NULL) == 1) schedule_delayed_work(&nt->link_work, 0); - break; - case NTB_EVENT_HW_LINK_DOWN: + else schedule_work(&nt->link_cleanup); - break; - default: - BUG(); - } } static void ntb_transport_link_work(struct work_struct *work) { - struct ntb_transport *nt = container_of(work, struct ntb_transport, - link_work.work); - struct ntb_device *ndev = nt->ndev; - struct pci_dev *pdev = ntb_query_pdev(ndev); + struct ntb_transport_ctx *nt = + container_of(work, struct ntb_transport_ctx, link_work.work); + struct ntb_dev *ndev = nt->ndev; + struct pci_dev *pdev = ndev->pdev; + resource_size_t size; u32 val; - int rc, i; + int rc, i, spad; /* send the local info, in the opposite order of the way we read it */ - for (i = 0; i < ntb_max_mw(ndev); i++) { - rc = ntb_write_remote_spad(ndev, MW0_SZ_HIGH + (i * 2), - ntb_get_mw_size(ndev, i) >> 32); - if (rc) { - dev_err(&pdev->dev, "Error writing %u to remote spad %d\n", - (u32)(ntb_get_mw_size(ndev, i) >> 32), - MW0_SZ_HIGH + (i * 2)); - goto out; - } + for (i = 0; i < nt->mw_count; i++) { + size = nt->mw_vec[i].phys_size; - rc = ntb_write_remote_spad(ndev, MW0_SZ_LOW + (i * 2), - (u32) ntb_get_mw_size(ndev, i)); - if (rc) { - dev_err(&pdev->dev, "Error writing %u to remote spad %d\n", - (u32) ntb_get_mw_size(ndev, i), - MW0_SZ_LOW + (i * 2)); - goto out; - } + if (max_mw_size && size > max_mw_size) + size = max_mw_size; + + spad = MW0_SZ_HIGH + (i * 2); + ntb_peer_spad_write(ndev, spad, (u32)(size >> 32)); + + spad = MW0_SZ_LOW + (i * 2); + ntb_peer_spad_write(ndev, spad, (u32)size); } - rc = ntb_write_remote_spad(ndev, NUM_MWS, ntb_max_mw(ndev)); - if (rc) { - dev_err(&pdev->dev, "Error writing %x to remote spad %d\n", - ntb_max_mw(ndev), NUM_MWS); - goto out; - } + ntb_peer_spad_write(ndev, NUM_MWS, nt->mw_count); - rc = ntb_write_remote_spad(ndev, NUM_QPS, nt->max_qps); - if (rc) { - dev_err(&pdev->dev, "Error writing %x to remote spad %d\n", - nt->max_qps, NUM_QPS); - goto out; - } + ntb_peer_spad_write(ndev, NUM_QPS, nt->qp_count); - rc = ntb_write_remote_spad(ndev, VERSION, NTB_TRANSPORT_VERSION); - if (rc) { - dev_err(&pdev->dev, "Error writing %x to remote spad %d\n", - NTB_TRANSPORT_VERSION, VERSION); - goto out; - } + ntb_peer_spad_write(ndev, VERSION, NTB_TRANSPORT_VERSION); /* Query the remote side for its info */ - rc = ntb_read_remote_spad(ndev, VERSION, &val); - if (rc) { - dev_err(&pdev->dev, "Error reading remote spad %d\n", VERSION); - goto out; - } - + val = ntb_peer_spad_read(ndev, VERSION); + dev_dbg(&pdev->dev, "Remote version = %d\n", val); if (val != NTB_TRANSPORT_VERSION) goto out; - dev_dbg(&pdev->dev, "Remote version = %d\n", val); - rc = ntb_read_remote_spad(ndev, NUM_QPS, &val); - if (rc) { - dev_err(&pdev->dev, "Error reading remote spad %d\n", NUM_QPS); - goto out; - } - - if (val != nt->max_qps) - goto out; + val = ntb_peer_spad_read(ndev, NUM_QPS); dev_dbg(&pdev->dev, "Remote max number of qps = %d\n", val); - - rc = ntb_read_remote_spad(ndev, NUM_MWS, &val); - if (rc) { - dev_err(&pdev->dev, "Error reading remote spad %d\n", NUM_MWS); + if (val != nt->qp_count) goto out; - } - if (val != ntb_max_mw(ndev)) - goto out; + val = ntb_peer_spad_read(ndev, NUM_MWS); dev_dbg(&pdev->dev, "Remote number of mws = %d\n", val); + if (val != nt->mw_count) + goto out; - for (i = 0; i < ntb_max_mw(ndev); i++) { + for (i = 0; i < nt->mw_count; i++) { u64 val64; - rc = ntb_read_remote_spad(ndev, MW0_SZ_HIGH + (i * 2), &val); - if (rc) { - dev_err(&pdev->dev, "Error reading remote spad %d\n", - MW0_SZ_HIGH + (i * 2)); - goto out1; - } - - val64 = (u64) val << 32; - - rc = ntb_read_remote_spad(ndev, MW0_SZ_LOW + (i * 2), &val); - if (rc) { - dev_err(&pdev->dev, "Error reading remote spad %d\n", - MW0_SZ_LOW + (i * 2)); - goto out1; - } + val = ntb_peer_spad_read(ndev, MW0_SZ_HIGH + (i * 2)); + val64 = (u64)val << 32; + val = ntb_peer_spad_read(ndev, MW0_SZ_LOW + (i * 2)); val64 |= val; - dev_dbg(&pdev->dev, "Remote MW%d size = %llu\n", i, val64); + dev_dbg(&pdev->dev, "Remote MW%d size = %#llx\n", i, val64); rc = ntb_set_mw(nt, i, val64); if (rc) goto out1; } - nt->transport_link = NTB_LINK_UP; + nt->link_is_up = true; - for (i = 0; i < nt->max_qps; i++) { - struct ntb_transport_qp *qp = &nt->qps[i]; + for (i = 0; i < nt->qp_count; i++) { + struct ntb_transport_qp *qp = &nt->qp_vec[i]; ntb_transport_setup_qp_mw(nt, i); - if (qp->client_ready == NTB_LINK_UP) + if (qp->client_ready) schedule_delayed_work(&qp->link_work, 0); } return; out1: - for (i = 0; i < ntb_max_mw(ndev); i++) + for (i = 0; i < nt->mw_count; i++) ntb_free_mw(nt, i); out: - if (ntb_hw_link_status(ndev)) + if (ntb_link_is_up(ndev, NULL, NULL) == 1) schedule_delayed_work(&nt->link_work, msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); } @@ -814,73 +819,73 @@ static void ntb_qp_link_work(struct work_struct *work) struct ntb_transport_qp *qp = container_of(work, struct ntb_transport_qp, link_work.work); - struct pci_dev *pdev = ntb_query_pdev(qp->ndev); - struct ntb_transport *nt = qp->transport; - int rc, val; + struct pci_dev *pdev = qp->ndev->pdev; + struct ntb_transport_ctx *nt = qp->transport; + int val; - WARN_ON(nt->transport_link != NTB_LINK_UP); + WARN_ON(!nt->link_is_up); - rc = ntb_read_local_spad(nt->ndev, QP_LINKS, &val); - if (rc) { - dev_err(&pdev->dev, "Error reading spad %d\n", QP_LINKS); - return; - } + val = ntb_spad_read(nt->ndev, QP_LINKS); - rc = ntb_write_remote_spad(nt->ndev, QP_LINKS, val | 1 << qp->qp_num); - if (rc) - dev_err(&pdev->dev, "Error writing %x to remote spad %d\n", - val | 1 << qp->qp_num, QP_LINKS); + ntb_peer_spad_write(nt->ndev, QP_LINKS, val | BIT(qp->qp_num)); /* query remote spad for qp ready bits */ - rc = ntb_read_remote_spad(nt->ndev, QP_LINKS, &val); - if (rc) - dev_err(&pdev->dev, "Error reading remote spad %d\n", QP_LINKS); - + ntb_peer_spad_read(nt->ndev, QP_LINKS); dev_dbg(&pdev->dev, "Remote QP link status = %x\n", val); /* See if the remote side is up */ - if (1 << qp->qp_num & val) { - qp->qp_link = NTB_LINK_UP; - + if (val & BIT(qp->qp_num)) { dev_info(&pdev->dev, "qp %d: Link Up\n", qp->qp_num); + qp->link_is_up = true; + if (qp->event_handler) - qp->event_handler(qp->cb_data, NTB_LINK_UP); - } else if (nt->transport_link == NTB_LINK_UP) + qp->event_handler(qp->cb_data, qp->link_is_up); + } else if (nt->link_is_up) schedule_delayed_work(&qp->link_work, msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); } -static int ntb_transport_init_queue(struct ntb_transport *nt, +static int ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num) { struct ntb_transport_qp *qp; + struct ntb_transport_mw *mw; + phys_addr_t mw_base; + resource_size_t mw_size; unsigned int num_qps_mw, tx_size; - u8 mw_num, mw_max; + unsigned int mw_num, mw_count, qp_count; u64 qp_offset; - mw_max = ntb_max_mw(nt->ndev); - mw_num = QP_TO_MW(nt->ndev, qp_num); + mw_count = nt->mw_count; + qp_count = nt->qp_count; - qp = &nt->qps[qp_num]; + mw_num = QP_TO_MW(nt, qp_num); + mw = &nt->mw_vec[mw_num]; + + qp = &nt->qp_vec[qp_num]; qp->qp_num = qp_num; qp->transport = nt; qp->ndev = nt->ndev; - qp->qp_link = NTB_LINK_DOWN; - qp->client_ready = NTB_LINK_DOWN; + qp->link_is_up = false; + qp->client_ready = false; qp->event_handler = NULL; - if (nt->max_qps % mw_max && mw_num + 1 < nt->max_qps / mw_max) - num_qps_mw = nt->max_qps / mw_max + 1; + if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count) + num_qps_mw = qp_count / mw_count + 1; else - num_qps_mw = nt->max_qps / mw_max; + num_qps_mw = qp_count / mw_count; - tx_size = (unsigned int) ntb_get_mw_size(qp->ndev, mw_num) / num_qps_mw; - qp_offset = qp_num / mw_max * tx_size; - qp->tx_mw = ntb_get_mw_vbase(nt->ndev, mw_num) + qp_offset; + mw_base = nt->mw_vec[mw_num].phys_addr; + mw_size = nt->mw_vec[mw_num].phys_size; + + tx_size = (unsigned int)mw_size / num_qps_mw; + qp_offset = tx_size * qp_num / mw_count; + + qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset; if (!qp->tx_mw) return -EINVAL; - qp->tx_mw_phys = ntb_get_mw_base(qp->ndev, mw_num) + qp_offset; + qp->tx_mw_phys = mw_base + qp_offset; if (!qp->tx_mw_phys) return -EINVAL; @@ -891,16 +896,19 @@ static int ntb_transport_init_queue(struct ntb_transport *nt, qp->tx_max_frame = min(transport_mtu, tx_size / 2); qp->tx_max_entry = tx_size / qp->tx_max_frame; - if (ntb_query_debugfs(nt->ndev)) { + if (nt_debugfs_dir) { char debugfs_name[4]; snprintf(debugfs_name, 4, "qp%d", qp_num); qp->debugfs_dir = debugfs_create_dir(debugfs_name, - ntb_query_debugfs(nt->ndev)); + nt_debugfs_dir); qp->debugfs_stats = debugfs_create_file("stats", S_IRUSR, qp->debugfs_dir, qp, &ntb_qp_debugfs_stats); + } else { + qp->debugfs_dir = NULL; + qp->debugfs_stats = NULL; } INIT_DELAYED_WORK(&qp->link_work, ntb_qp_link_work); @@ -914,46 +922,84 @@ static int ntb_transport_init_queue(struct ntb_transport *nt, INIT_LIST_HEAD(&qp->rx_free_q); INIT_LIST_HEAD(&qp->tx_free_q); + tasklet_init(&qp->rxc_db_work, ntb_transport_rxc_db, + (unsigned long)qp); + return 0; } -int ntb_transport_init(struct pci_dev *pdev) +static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) { - struct ntb_transport *nt; + struct ntb_transport_ctx *nt; + struct ntb_transport_mw *mw; + unsigned int mw_count, qp_count; + u64 qp_bitmap; int rc, i; - nt = kzalloc(sizeof(struct ntb_transport), GFP_KERNEL); + if (ntb_db_is_unsafe(ndev)) + dev_dbg(&ndev->dev, + "doorbell is unsafe, proceed anyway...\n"); + if (ntb_spad_is_unsafe(ndev)) + dev_dbg(&ndev->dev, + "scratchpad is unsafe, proceed anyway...\n"); + + nt = kzalloc(sizeof(*nt), GFP_KERNEL); if (!nt) return -ENOMEM; - nt->ndev = ntb_register_transport(pdev, nt); - if (!nt->ndev) { - rc = -EIO; + nt->ndev = ndev; + + mw_count = ntb_mw_count(ndev); + + nt->mw_count = mw_count; + + nt->mw_vec = kcalloc(mw_count, sizeof(*nt->mw_vec), GFP_KERNEL); + if (!nt->mw_vec) { + rc = -ENOMEM; goto err; } - nt->mw = kcalloc(ntb_max_mw(nt->ndev), sizeof(struct ntb_transport_mw), - GFP_KERNEL); - if (!nt->mw) { - rc = -ENOMEM; - goto err1; + for (i = 0; i < mw_count; i++) { + mw = &nt->mw_vec[i]; + + rc = ntb_mw_get_range(ndev, i, &mw->phys_addr, &mw->phys_size, + &mw->xlat_align, &mw->xlat_align_size); + if (rc) + goto err1; + + mw->vbase = ioremap(mw->phys_addr, mw->phys_size); + if (!mw->vbase) { + rc = -ENOMEM; + goto err1; + } + + mw->buff_size = 0; + mw->xlat_size = 0; + mw->virt_addr = NULL; + mw->dma_addr = 0; } - if (max_num_clients) - nt->max_qps = min(ntb_max_cbs(nt->ndev), max_num_clients); - else - nt->max_qps = min(ntb_max_cbs(nt->ndev), ntb_max_mw(nt->ndev)); + qp_bitmap = ntb_db_valid_mask(ndev); - nt->qps = kcalloc(nt->max_qps, sizeof(struct ntb_transport_qp), - GFP_KERNEL); - if (!nt->qps) { + qp_count = ilog2(qp_bitmap); + if (max_num_clients && max_num_clients < qp_count) + qp_count = max_num_clients; + else if (mw_count < qp_count) + qp_count = mw_count; + + qp_bitmap &= BIT_ULL(qp_count) - 1; + + nt->qp_count = qp_count; + nt->qp_bitmap = qp_bitmap; + nt->qp_bitmap_free = qp_bitmap; + + nt->qp_vec = kcalloc(qp_count, sizeof(*nt->qp_vec), GFP_KERNEL); + if (!nt->qp_vec) { rc = -ENOMEM; goto err2; } - nt->qp_bitmap = ((u64) 1 << nt->max_qps) - 1; - - for (i = 0; i < nt->max_qps; i++) { + for (i = 0; i < qp_count; i++) { rc = ntb_transport_init_queue(nt, i); if (rc) goto err3; @@ -962,8 +1008,7 @@ int ntb_transport_init(struct pci_dev *pdev) INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work); INIT_WORK(&nt->link_cleanup, ntb_transport_link_cleanup_work); - rc = ntb_register_event_callback(nt->ndev, - ntb_transport_event_callback); + rc = ntb_set_ctx(ndev, nt, &ntb_transport_ops); if (rc) goto err3; @@ -972,51 +1017,61 @@ int ntb_transport_init(struct pci_dev *pdev) if (rc) goto err4; - if (ntb_hw_link_status(nt->ndev)) - schedule_delayed_work(&nt->link_work, 0); + nt->link_is_up = false; + ntb_link_enable(ndev, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + ntb_link_event(ndev); return 0; err4: - ntb_unregister_event_callback(nt->ndev); + ntb_clear_ctx(ndev); err3: - kfree(nt->qps); + kfree(nt->qp_vec); err2: - kfree(nt->mw); + kfree(nt->mw_vec); err1: - ntb_unregister_transport(nt->ndev); + while (i--) { + mw = &nt->mw_vec[i]; + iounmap(mw->vbase); + } err: kfree(nt); return rc; } -void ntb_transport_free(void *transport) +static void ntb_transport_free(struct ntb_client *self, struct ntb_dev *ndev) { - struct ntb_transport *nt = transport; - struct ntb_device *ndev = nt->ndev; + struct ntb_transport_ctx *nt = ndev->ctx; + struct ntb_transport_qp *qp; + u64 qp_bitmap_alloc; int i; ntb_transport_link_cleanup(nt); + cancel_work_sync(&nt->link_cleanup); + cancel_delayed_work_sync(&nt->link_work); + + qp_bitmap_alloc = nt->qp_bitmap & ~nt->qp_bitmap_free; /* verify that all the qp's are freed */ - for (i = 0; i < nt->max_qps; i++) { - if (!test_bit(i, &nt->qp_bitmap)) - ntb_transport_free_queue(&nt->qps[i]); - debugfs_remove_recursive(nt->qps[i].debugfs_dir); + for (i = 0; i < nt->qp_count; i++) { + qp = &nt->qp_vec[i]; + if (qp_bitmap_alloc & BIT_ULL(i)) + ntb_transport_free_queue(qp); + debugfs_remove_recursive(qp->debugfs_dir); } + ntb_link_disable(ndev); + ntb_clear_ctx(ndev); + ntb_bus_remove(nt); - cancel_delayed_work_sync(&nt->link_work); - - ntb_unregister_event_callback(ndev); - - for (i = 0; i < ntb_max_mw(ndev); i++) + for (i = nt->mw_count; i--; ) { ntb_free_mw(nt, i); + iounmap(nt->mw_vec[i].vbase); + } - kfree(nt->qps); - kfree(nt->mw); - ntb_unregister_transport(ndev); + kfree(nt->qp_vec); + kfree(nt->mw_vec); kfree(nt); } @@ -1028,15 +1083,13 @@ static void ntb_rx_copy_callback(void *data) unsigned int len = entry->len; struct ntb_payload_header *hdr = entry->rx_hdr; - /* Ensure that the data is fully copied out before clearing the flag */ - wmb(); hdr->flags = 0; iowrite32(entry->index, &qp->rx_info->entry); ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q); - if (qp->rx_handler && qp->client_ready == NTB_LINK_UP) + if (qp->rx_handler && qp->client_ready) qp->rx_handler(qp, qp->cb_data, cb_data, len); } @@ -1047,6 +1100,9 @@ static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset) memcpy(buf, offset, len); + /* Ensure that the data is fully copied out before clearing the flag */ + wmb(); + ntb_rx_copy_callback(entry); } @@ -1071,8 +1127,8 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset, goto err_wait; device = chan->device; - pay_off = (size_t) offset & ~PAGE_MASK; - buff_off = (size_t) buf & ~PAGE_MASK; + pay_off = (size_t)offset & ~PAGE_MASK; + buff_off = (size_t)buf & ~PAGE_MASK; if (!is_dma_copy_aligned(device, pay_off, buff_off, len)) goto err_wait; @@ -1138,86 +1194,104 @@ static int ntb_process_rxc(struct ntb_transport_qp *qp) struct ntb_payload_header *hdr; struct ntb_queue_entry *entry; void *offset; + int rc; offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index; hdr = offset + qp->rx_max_frame - sizeof(struct ntb_payload_header); - entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q); - if (!entry) { - dev_dbg(&ntb_query_pdev(qp->ndev)->dev, - "no buffer - HDR ver %u, len %d, flags %x\n", - hdr->ver, hdr->len, hdr->flags); - qp->rx_err_no_buf++; - return -ENOMEM; - } + dev_dbg(&qp->ndev->pdev->dev, "qp %d: RX ver %u len %d flags %x\n", + qp->qp_num, hdr->ver, hdr->len, hdr->flags); if (!(hdr->flags & DESC_DONE_FLAG)) { - ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, - &qp->rx_pend_q); + dev_dbg(&qp->ndev->pdev->dev, "done flag not set\n"); qp->rx_ring_empty++; return -EAGAIN; } - if (hdr->ver != (u32) qp->rx_pkts) { - dev_dbg(&ntb_query_pdev(qp->ndev)->dev, - "qp %d: version mismatch, expected %llu - got %u\n", - qp->qp_num, qp->rx_pkts, hdr->ver); - ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, - &qp->rx_pend_q); + if (hdr->flags & LINK_DOWN_FLAG) { + dev_dbg(&qp->ndev->pdev->dev, "link down flag set\n"); + ntb_qp_link_down(qp); + hdr->flags = 0; + iowrite32(qp->rx_index, &qp->rx_info->entry); + return 0; + } + + if (hdr->ver != (u32)qp->rx_pkts) { + dev_dbg(&qp->ndev->pdev->dev, + "version mismatch, expected %llu - got %u\n", + qp->rx_pkts, hdr->ver); qp->rx_err_ver++; return -EIO; } - if (hdr->flags & LINK_DOWN_FLAG) { - ntb_qp_link_down(qp); + entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q); + if (!entry) { + dev_dbg(&qp->ndev->pdev->dev, "no receive buffer\n"); + qp->rx_err_no_buf++; + rc = -ENOMEM; goto err; } - dev_dbg(&ntb_query_pdev(qp->ndev)->dev, - "rx offset %u, ver %u - %d payload received, buf size %d\n", + if (hdr->len > entry->len) { + dev_dbg(&qp->ndev->pdev->dev, + "receive buffer overflow! Wanted %d got %d\n", + hdr->len, entry->len); + qp->rx_err_oflow++; + + rc = -EIO; + goto err; + } + + dev_dbg(&qp->ndev->pdev->dev, + "RX OK index %u ver %u size %d into buf size %d\n", qp->rx_index, hdr->ver, hdr->len, entry->len); qp->rx_bytes += hdr->len; qp->rx_pkts++; - if (hdr->len > entry->len) { - qp->rx_err_oflow++; - dev_dbg(&ntb_query_pdev(qp->ndev)->dev, - "RX overflow! Wanted %d got %d\n", - hdr->len, entry->len); - - goto err; - } - entry->index = qp->rx_index; entry->rx_hdr = hdr; ntb_async_rx(entry, offset, hdr->len); -out: qp->rx_index++; qp->rx_index %= qp->rx_max_entry; return 0; err: - ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, &qp->rx_pend_q); - /* Ensure that the data is fully copied out before clearing the flag */ - wmb(); + /* FIXME: if this syncrhonous update of the rx_index gets ahead of + * asyncrhonous ntb_rx_copy_callback of previous entry, there are three + * scenarios: + * + * 1) The peer might miss this update, but observe the update + * from the memcpy completion callback. In this case, the buffer will + * not be freed on the peer to be reused for a different packet. The + * successful rx of a later packet would clear the condition, but the + * condition could persist if several rx fail in a row. + * + * 2) The peer may observe this update before the asyncrhonous copy of + * prior packets is completed. The peer may overwrite the buffers of + * the prior packets before they are copied. + * + * 3) Both: the peer may observe the update, and then observe the index + * decrement by the asynchronous completion callback. Who knows what + * badness that will cause. + */ hdr->flags = 0; iowrite32(qp->rx_index, &qp->rx_info->entry); - goto out; + return rc; } -static int ntb_transport_rxc_db(void *data, int db_num) +static void ntb_transport_rxc_db(unsigned long data) { - struct ntb_transport_qp *qp = data; + struct ntb_transport_qp *qp = (void *)data; int rc, i; - dev_dbg(&ntb_query_pdev(qp->ndev)->dev, "%s: doorbell %d received\n", - __func__, db_num); + dev_dbg(&qp->ndev->pdev->dev, "%s: doorbell %d received\n", + __func__, qp->qp_num); /* Limit the number of packets processed in a single interrupt to * provide fairness to others @@ -1231,7 +1305,21 @@ static int ntb_transport_rxc_db(void *data, int db_num) if (qp->dma_chan) dma_async_issue_pending(qp->dma_chan); - return i; + if (i == qp->rx_max_entry) { + /* there is more work to do */ + tasklet_schedule(&qp->rxc_db_work); + } else if (ntb_db_read(qp->ndev) & BIT_ULL(qp->qp_num)) { + /* the doorbell bit is set: clear it */ + ntb_db_clear(qp->ndev, BIT_ULL(qp->qp_num)); + /* ntb_db_read ensures ntb_db_clear write is committed */ + ntb_db_read(qp->ndev); + + /* an interrupt may have arrived between finishing + * ntb_process_rxc and clearing the doorbell bit: + * there might be some more work to do. + */ + tasklet_schedule(&qp->rxc_db_work); + } } static void ntb_tx_copy_callback(void *data) @@ -1240,11 +1328,9 @@ static void ntb_tx_copy_callback(void *data) struct ntb_transport_qp *qp = entry->qp; struct ntb_payload_header __iomem *hdr = entry->tx_hdr; - /* Ensure that the data is fully copied out before setting the flags */ - wmb(); iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); - ntb_ring_doorbell(qp->ndev, qp->qp_num); + ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); /* The entry length can only be zero if the packet is intended to be a * "link down" or similar. Since no payload is being sent in these @@ -1265,6 +1351,9 @@ static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset) { memcpy_toio(offset, entry->buf, entry->len); + /* Ensure that the data is fully copied out before setting the flags */ + wmb(); + ntb_tx_copy_callback(entry); } @@ -1288,7 +1377,7 @@ static void ntb_async_tx(struct ntb_transport_qp *qp, entry->tx_hdr = hdr; iowrite32(entry->len, &hdr->len); - iowrite32((u32) qp->tx_pkts, &hdr->ver); + iowrite32((u32)qp->tx_pkts, &hdr->ver); if (!chan) goto err; @@ -1298,8 +1387,8 @@ static void ntb_async_tx(struct ntb_transport_qp *qp, device = chan->device; dest = qp->tx_mw_phys + qp->tx_max_frame * qp->tx_index; - buff_off = (size_t) buf & ~PAGE_MASK; - dest_off = (size_t) dest & ~PAGE_MASK; + buff_off = (size_t)buf & ~PAGE_MASK; + dest_off = (size_t)dest & ~PAGE_MASK; if (!is_dma_copy_aligned(device, buff_off, dest_off, len)) goto err; @@ -1347,9 +1436,6 @@ err: static int ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry) { - dev_dbg(&ntb_query_pdev(qp->ndev)->dev, "%lld - tx %u, entry len %d flags %x buff %p\n", - qp->tx_pkts, qp->tx_index, entry->len, entry->flags, - entry->buf); if (qp->tx_index == qp->remote_rx_info->entry) { qp->tx_ring_full++; return -EAGAIN; @@ -1376,14 +1462,14 @@ static int ntb_process_tx(struct ntb_transport_qp *qp, static void ntb_send_link_down(struct ntb_transport_qp *qp) { - struct pci_dev *pdev = ntb_query_pdev(qp->ndev); + struct pci_dev *pdev = qp->ndev->pdev; struct ntb_queue_entry *entry; int i, rc; - if (qp->qp_link == NTB_LINK_DOWN) + if (!qp->link_is_up) return; - qp->qp_link = NTB_LINK_DOWN; + qp->link_is_up = false; dev_info(&pdev->dev, "qp %d: Link Down\n", qp->qp_num); for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) { @@ -1422,18 +1508,21 @@ static void ntb_send_link_down(struct ntb_transport_qp *qp) * RETURNS: pointer to newly created ntb_queue, NULL on error. */ struct ntb_transport_qp * -ntb_transport_create_queue(void *data, struct pci_dev *pdev, +ntb_transport_create_queue(void *data, struct device *client_dev, const struct ntb_queue_handlers *handlers) { + struct ntb_dev *ndev; + struct pci_dev *pdev; + struct ntb_transport_ctx *nt; struct ntb_queue_entry *entry; struct ntb_transport_qp *qp; - struct ntb_transport *nt; + u64 qp_bit; unsigned int free_queue; - int rc, i; + int i; - nt = ntb_find_transport(pdev); - if (!nt) - goto err; + ndev = dev_ntb(client_dev->parent); + pdev = ndev->pdev; + nt = ndev->ctx; free_queue = ffs(nt->qp_bitmap); if (!free_queue) @@ -1442,9 +1531,11 @@ ntb_transport_create_queue(void *data, struct pci_dev *pdev, /* decrement free_queue to make it zero based */ free_queue--; - clear_bit(free_queue, &nt->qp_bitmap); + qp = &nt->qp_vec[free_queue]; + qp_bit = BIT_ULL(qp->qp_num); + + nt->qp_bitmap_free &= ~qp_bit; - qp = &nt->qps[free_queue]; qp->cb_data = data; qp->rx_handler = handlers->rx_handler; qp->tx_handler = handlers->tx_handler; @@ -1458,7 +1549,7 @@ ntb_transport_create_queue(void *data, struct pci_dev *pdev, } for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { - entry = kzalloc(sizeof(struct ntb_queue_entry), GFP_ATOMIC); + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) goto err1; @@ -1468,7 +1559,7 @@ ntb_transport_create_queue(void *data, struct pci_dev *pdev, } for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { - entry = kzalloc(sizeof(struct ntb_queue_entry), GFP_ATOMIC); + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) goto err2; @@ -1477,10 +1568,8 @@ ntb_transport_create_queue(void *data, struct pci_dev *pdev, &qp->tx_free_q); } - rc = ntb_register_db_callback(qp->ndev, free_queue, qp, - ntb_transport_rxc_db); - if (rc) - goto err2; + ntb_db_clear(qp->ndev, qp_bit); + ntb_db_clear_mask(qp->ndev, qp_bit); dev_info(&pdev->dev, "NTB Transport QP %d created\n", qp->qp_num); @@ -1494,7 +1583,7 @@ err1: kfree(entry); if (qp->dma_chan) dmaengine_put(); - set_bit(free_queue, &nt->qp_bitmap); + nt->qp_bitmap_free |= qp_bit; err: return NULL; } @@ -1508,13 +1597,15 @@ EXPORT_SYMBOL_GPL(ntb_transport_create_queue); */ void ntb_transport_free_queue(struct ntb_transport_qp *qp) { + struct ntb_transport_ctx *nt = qp->transport; struct pci_dev *pdev; struct ntb_queue_entry *entry; + u64 qp_bit; if (!qp) return; - pdev = ntb_query_pdev(qp->ndev); + pdev = qp->ndev->pdev; if (qp->dma_chan) { struct dma_chan *chan = qp->dma_chan; @@ -1531,10 +1622,18 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) dmaengine_put(); } - ntb_unregister_db_callback(qp->ndev, qp->qp_num); + qp_bit = BIT_ULL(qp->qp_num); + + ntb_db_set_mask(qp->ndev, qp_bit); + tasklet_disable(&qp->rxc_db_work); cancel_delayed_work_sync(&qp->link_work); + qp->cb_data = NULL; + qp->rx_handler = NULL; + qp->tx_handler = NULL; + qp->event_handler = NULL; + while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) kfree(entry); @@ -1546,7 +1645,7 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) kfree(entry); - set_bit(qp->qp_num, &qp->transport->qp_bitmap); + nt->qp_bitmap_free |= qp_bit; dev_info(&pdev->dev, "NTB Transport QP %d freed\n", qp->qp_num); } @@ -1567,7 +1666,7 @@ void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len) struct ntb_queue_entry *entry; void *buf; - if (!qp || qp->client_ready == NTB_LINK_UP) + if (!qp || qp->client_ready) return NULL; entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q); @@ -1636,7 +1735,7 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, struct ntb_queue_entry *entry; int rc; - if (!qp || qp->qp_link != NTB_LINK_UP || !len) + if (!qp || !qp->link_is_up || !len) return -EINVAL; entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); @@ -1670,9 +1769,9 @@ void ntb_transport_link_up(struct ntb_transport_qp *qp) if (!qp) return; - qp->client_ready = NTB_LINK_UP; + qp->client_ready = true; - if (qp->transport->transport_link == NTB_LINK_UP) + if (qp->transport->link_is_up) schedule_delayed_work(&qp->link_work, 0); } EXPORT_SYMBOL_GPL(ntb_transport_link_up); @@ -1688,27 +1787,20 @@ EXPORT_SYMBOL_GPL(ntb_transport_link_up); void ntb_transport_link_down(struct ntb_transport_qp *qp) { struct pci_dev *pdev; - int rc, val; + int val; if (!qp) return; - pdev = ntb_query_pdev(qp->ndev); - qp->client_ready = NTB_LINK_DOWN; + pdev = qp->ndev->pdev; + qp->client_ready = false; - rc = ntb_read_local_spad(qp->ndev, QP_LINKS, &val); - if (rc) { - dev_err(&pdev->dev, "Error reading spad %d\n", QP_LINKS); - return; - } + val = ntb_spad_read(qp->ndev, QP_LINKS); - rc = ntb_write_remote_spad(qp->ndev, QP_LINKS, - val & ~(1 << qp->qp_num)); - if (rc) - dev_err(&pdev->dev, "Error writing %x to remote spad %d\n", - val & ~(1 << qp->qp_num), QP_LINKS); + ntb_peer_spad_write(qp->ndev, QP_LINKS, + val & ~BIT(qp->qp_num)); - if (qp->qp_link == NTB_LINK_UP) + if (qp->link_is_up) ntb_send_link_down(qp); else cancel_delayed_work_sync(&qp->link_work); @@ -1728,7 +1820,7 @@ bool ntb_transport_link_query(struct ntb_transport_qp *qp) if (!qp) return false; - return qp->qp_link == NTB_LINK_UP; + return qp->link_is_up; } EXPORT_SYMBOL_GPL(ntb_transport_link_query); @@ -1774,3 +1866,69 @@ unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) return max; } EXPORT_SYMBOL_GPL(ntb_transport_max_size); + +static void ntb_transport_doorbell_callback(void *data, int vector) +{ + struct ntb_transport_ctx *nt = data; + struct ntb_transport_qp *qp; + u64 db_bits; + unsigned int qp_num; + + db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free & + ntb_db_vector_mask(nt->ndev, vector)); + + while (db_bits) { + qp_num = __ffs(db_bits); + qp = &nt->qp_vec[qp_num]; + + tasklet_schedule(&qp->rxc_db_work); + + db_bits &= ~BIT_ULL(qp_num); + } +} + +static const struct ntb_ctx_ops ntb_transport_ops = { + .link_event = ntb_transport_event_callback, + .db_event = ntb_transport_doorbell_callback, +}; + +static struct ntb_client ntb_transport_client = { + .ops = { + .probe = ntb_transport_probe, + .remove = ntb_transport_free, + }, +}; + +static int __init ntb_transport_init(void) +{ + int rc; + + if (debugfs_initialized()) + nt_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + + rc = bus_register(&ntb_transport_bus); + if (rc) + goto err_bus; + + rc = ntb_register_client(&ntb_transport_client); + if (rc) + goto err_client; + + return 0; + +err_client: + bus_unregister(&ntb_transport_bus); +err_bus: + debugfs_remove_recursive(nt_debugfs_dir); + return rc; +} +module_init(ntb_transport_init); + +static void __exit ntb_transport_exit(void) +{ + debugfs_remove_recursive(nt_debugfs_dir); + + ntb_unregister_client(&ntb_transport_client); + bus_unregister(&ntb_transport_bus); +} +module_exit(ntb_transport_exit); diff --git a/include/linux/ntb_transport.h b/include/linux/ntb_transport.h index f78b64cc09d5..2862861366a5 100644 --- a/include/linux/ntb_transport.h +++ b/include/linux/ntb_transport.h @@ -5,6 +5,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -13,6 +14,7 @@ * BSD LICENSE * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -40,7 +42,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Intel PCIe NTB Linux driver + * PCIe NTB Transport Linux driver * * Contact Information: * Jon Mason @@ -48,21 +50,16 @@ struct ntb_transport_qp; -struct ntb_client { +struct ntb_transport_client { struct device_driver driver; - int (*probe)(struct pci_dev *pdev); - void (*remove)(struct pci_dev *pdev); + int (*probe)(struct device *client_dev); + void (*remove)(struct device *client_dev); }; -enum { - NTB_LINK_DOWN = 0, - NTB_LINK_UP, -}; - -int ntb_transport_register_client(struct ntb_client *drvr); -void ntb_transport_unregister_client(struct ntb_client *drvr); -int ntb_register_client_dev(char *device_name); -void ntb_unregister_client_dev(char *device_name); +int ntb_transport_register_client(struct ntb_transport_client *drvr); +void ntb_transport_unregister_client(struct ntb_transport_client *drvr); +int ntb_transport_register_client_dev(char *device_name); +void ntb_transport_unregister_client_dev(char *device_name); struct ntb_queue_handlers { void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, @@ -75,7 +72,7 @@ struct ntb_queue_handlers { unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp); unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp); struct ntb_transport_qp * -ntb_transport_create_queue(void *data, struct pci_dev *pdev, +ntb_transport_create_queue(void *data, struct device *client_dev, const struct ntb_queue_handlers *handlers); void ntb_transport_free_queue(struct ntb_transport_qp *qp); int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, From 0f69a7dff94d3e82ee51685f6a8fea7860b023b8 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 2 Jun 2015 03:45:07 -0400 Subject: [PATCH 594/839] NTB: Read peer info from local SPAD in transport The transport was writing and then reading the peer scratch pad, essentially reading what it just wrote instead of exchanging any information with the peer. The transport expects the peer values to be the same as the local values, so this issue was not obvious. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 9faf1c6029af..d9538e911df6 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -761,17 +761,17 @@ static void ntb_transport_link_work(struct work_struct *work) ntb_peer_spad_write(ndev, VERSION, NTB_TRANSPORT_VERSION); /* Query the remote side for its info */ - val = ntb_peer_spad_read(ndev, VERSION); + val = ntb_spad_read(ndev, VERSION); dev_dbg(&pdev->dev, "Remote version = %d\n", val); if (val != NTB_TRANSPORT_VERSION) goto out; - val = ntb_peer_spad_read(ndev, NUM_QPS); + val = ntb_spad_read(ndev, NUM_QPS); dev_dbg(&pdev->dev, "Remote max number of qps = %d\n", val); if (val != nt->qp_count) goto out; - val = ntb_peer_spad_read(ndev, NUM_MWS); + val = ntb_spad_read(ndev, NUM_MWS); dev_dbg(&pdev->dev, "Remote number of mws = %d\n", val); if (val != nt->mw_count) goto out; @@ -779,10 +779,10 @@ static void ntb_transport_link_work(struct work_struct *work) for (i = 0; i < nt->mw_count; i++) { u64 val64; - val = ntb_peer_spad_read(ndev, MW0_SZ_HIGH + (i * 2)); + val = ntb_spad_read(ndev, MW0_SZ_HIGH + (i * 2)); val64 = (u64)val << 32; - val = ntb_peer_spad_read(ndev, MW0_SZ_LOW + (i * 2)); + val = ntb_spad_read(ndev, MW0_SZ_LOW + (i * 2)); val64 |= val; dev_dbg(&pdev->dev, "Remote MW%d size = %#llx\n", i, val64); From 5ae0beb6bcb781ba0439618a4883fdb243916f46 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 19 May 2015 16:59:34 -0400 Subject: [PATCH 595/839] NTB: Enable link for Intel root port mode in probe Link training should be enabled in the driver probe for root port mode. We should not have to wait for transport to be loaded for this to happen. Otherwise the ntb device will not show up on the transparent bridge side of the link. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 686091756ba4..3810ad11cfdf 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -1317,6 +1317,9 @@ static int snb_poll_link(struct intel_ntb_dev *ndev) static int snb_link_is_up(struct intel_ntb_dev *ndev) { + if (ndev->ntb.topo == NTB_TOPO_SEC) + return 1; + return NTB_LNK_STA_ACTIVE(ndev->lnk_sta); } @@ -1613,6 +1616,7 @@ static int snb_setup_b2b_mw(struct intel_ntb_dev *ndev, static int snb_init_ntb(struct intel_ntb_dev *ndev) { int rc; + u32 ntb_ctl; if (ndev->bar4_split) ndev->mw_count = HSX_SPLIT_BAR_MW_COUNT; @@ -1629,6 +1633,12 @@ static int snb_init_ntb(struct intel_ntb_dev *ndev) dev_err(ndev_dev(ndev), "NTB Primary config disabled\n"); return -EINVAL; } + + /* enable link to allow secondary side device to appear */ + ntb_ctl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl); + ntb_ctl &= ~NTB_CTL_DISABLE; + iowrite32(ntb_ctl, ndev->self_mmio + ndev->reg->ntb_ctl); + /* use half the spads for the peer */ ndev->spad_count >>= 1; ndev->self_reg = &snb_pri_reg; From dd5d4d8edb6b40f75e5c8f0db65a4562bffc2e00 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 8 May 2015 12:24:40 -0400 Subject: [PATCH 596/839] NTB: Check the device ID to set errata flags Set errata flags for the specific device IDs to which they apply, instead of the whole Xeon hardware class. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 49 ++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 3810ad11cfdf..499222285439 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -1718,29 +1718,68 @@ static int snb_init_dev(struct intel_ntb_dev *ndev) u8 ppd; int rc, mem; + pdev = ndev_pdev(ndev); + + switch (pdev->device) { /* There is a Xeon hardware errata related to writes to SDOORBELL or * B2BDOORBELL in conjunction with inbound access to NTB MMIO Space, * which may hang the system. To workaround this use the second memory * window to access the interrupt and scratch pad registers on the * remote system. */ - ndev->hwerr_flags |= NTB_HWERR_SDOORBELL_LOCKUP; + case PCI_DEVICE_ID_INTEL_NTB_SS_JSF: + case PCI_DEVICE_ID_INTEL_NTB_PS_JSF: + case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF: + case PCI_DEVICE_ID_INTEL_NTB_SS_SNB: + case PCI_DEVICE_ID_INTEL_NTB_PS_SNB: + case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB: + case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: + case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + ndev->hwerr_flags |= NTB_HWERR_SDOORBELL_LOCKUP; + break; + } + switch (pdev->device) { /* There is a hardware errata related to accessing any register in * SB01BASE in the presence of bidirectional traffic crossing the NTB. */ - ndev->hwerr_flags |= NTB_HWERR_SB01BASE_LOCKUP; + case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: + case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + ndev->hwerr_flags |= NTB_HWERR_SB01BASE_LOCKUP; + break; + } + switch (pdev->device) { /* HW Errata on bit 14 of b2bdoorbell register. Writes will not be * mirrored to the remote system. Shrink the number of bits by one, * since bit 14 is the last bit. */ - ndev->hwerr_flags |= NTB_HWERR_B2BDOORBELL_BIT14; + case PCI_DEVICE_ID_INTEL_NTB_SS_JSF: + case PCI_DEVICE_ID_INTEL_NTB_PS_JSF: + case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF: + case PCI_DEVICE_ID_INTEL_NTB_SS_SNB: + case PCI_DEVICE_ID_INTEL_NTB_PS_SNB: + case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB: + case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: + case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + ndev->hwerr_flags |= NTB_HWERR_B2BDOORBELL_BIT14; + break; + } ndev->reg = &snb_reg; - pdev = ndev_pdev(ndev); - rc = pci_read_config_byte(pdev, SNB_PPD_OFFSET, &ppd); if (rc) return -EIO; From e22e0b9dcf78ee7a72561a6770c9ea554eb65096 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Tue, 12 May 2015 06:55:44 -0400 Subject: [PATCH 597/839] NTB: Differentiate transport link down messages The same message "qp %d: Link Down\n" was printed at two locations in ntb_transport. Change the messages so they are distinct. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index d9538e911df6..8d9b59f7fa07 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -658,7 +658,7 @@ static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp) return; } - dev_info(&pdev->dev, "qp %d: Link Down\n", qp->qp_num); + dev_info(&pdev->dev, "qp %d: Link Cleanup\n", qp->qp_num); qp->link_is_up = false; if (qp->event_handler) @@ -1470,7 +1470,7 @@ static void ntb_send_link_down(struct ntb_transport_qp *qp) return; qp->link_is_up = false; - dev_info(&pdev->dev, "qp %d: Link Down\n", qp->qp_num); + dev_info(&pdev->dev, "qp %d: Send Link Down\n", qp->qp_num); for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) { entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); From c0900b33d1c8c24246eaedb9553f078024c332fc Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Tue, 12 May 2015 06:24:27 -0400 Subject: [PATCH 598/839] NTB: Do not advance transport RX on link down On link down, don't advance RX index to the next entry. The next entry should never be valid after receiving the link down flag. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 8d9b59f7fa07..0f86b60a0c0e 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -1212,8 +1212,7 @@ static int ntb_process_rxc(struct ntb_transport_qp *qp) dev_dbg(&qp->ndev->pdev->dev, "link down flag set\n"); ntb_qp_link_down(qp); hdr->flags = 0; - iowrite32(qp->rx_index, &qp->rx_info->entry); - return 0; + return -EAGAIN; } if (hdr->ver != (u32)qp->rx_pkts) { From 2849b5d70641db76f9b674d00c6d204892c51515 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Tue, 12 May 2015 08:09:15 -0400 Subject: [PATCH 599/839] NTB: Reset transport QP link stats on down Reset the link stats when the link goes down. In particular, the TX and RX index and count must be reset, or else the TX side will be sending packets to the RX side where the RX side is not expecting them. Reset all the stats, to be consistent. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 0f86b60a0c0e..2efd1eae9a7c 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -648,18 +648,37 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, return 0; } +static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp) +{ + qp->link_is_up = false; + + qp->tx_index = 0; + qp->rx_index = 0; + qp->rx_bytes = 0; + qp->rx_pkts = 0; + qp->rx_ring_empty = 0; + qp->rx_err_no_buf = 0; + qp->rx_err_oflow = 0; + qp->rx_err_ver = 0; + qp->rx_memcpy = 0; + qp->rx_async = 0; + qp->tx_bytes = 0; + qp->tx_pkts = 0; + qp->tx_ring_full = 0; + qp->tx_err_no_buf = 0; + qp->tx_memcpy = 0; + qp->tx_async = 0; +} + static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp) { struct ntb_transport_ctx *nt = qp->transport; struct pci_dev *pdev = nt->ndev->pdev; - if (qp->link_is_up) { - cancel_delayed_work_sync(&qp->link_work); - return; - } - dev_info(&pdev->dev, "qp %d: Link Cleanup\n", qp->qp_num); - qp->link_is_up = false; + + cancel_delayed_work_sync(&qp->link_work); + ntb_qp_link_down_reset(qp); if (qp->event_handler) qp->event_handler(qp->cb_data, qp->link_is_up); @@ -866,9 +885,9 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt, qp->qp_num = qp_num; qp->transport = nt; qp->ndev = nt->ndev; - qp->link_is_up = false; qp->client_ready = false; qp->event_handler = NULL; + ntb_qp_link_down_reset(qp); if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count) num_qps_mw = qp_count / mw_count + 1; @@ -1468,7 +1487,6 @@ static void ntb_send_link_down(struct ntb_transport_qp *qp) if (!qp->link_is_up) return; - qp->link_is_up = false; dev_info(&pdev->dev, "qp %d: Send Link Down\n", qp->qp_num); for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) { @@ -1490,6 +1508,8 @@ static void ntb_send_link_down(struct ntb_transport_qp *qp) if (rc) dev_err(&pdev->dev, "ntb: QP%d unable to send linkdown msg\n", qp->qp_num); + + ntb_qp_link_down_reset(qp); } /** From 42fefc86a680f975e2816cd86c4ba50069766ef6 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 11 May 2015 05:45:30 -0400 Subject: [PATCH 600/839] NTB: Add parameters for Intel SNB B2B addresses Add module parameters for the addresses to be used in B2B topology. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- Documentation/ntb.txt | 10 ++++ drivers/ntb/hw/intel/ntb_hw_intel.c | 77 ++++++++++++++++++++++------- 2 files changed, 68 insertions(+), 19 deletions(-) diff --git a/Documentation/ntb.txt b/Documentation/ntb.txt index 725ba1e6c127..00a3f92832a5 100644 --- a/Documentation/ntb.txt +++ b/Documentation/ntb.txt @@ -56,3 +56,13 @@ Module Parameters: * b2b\_mw\_share - If the peer ntb is to be accessed via a memory window, and if the memory window is large enough, still allow the client to use the second half of the memory window for address translation to the peer. +* snb\_b2b\_usd\_bar2\_addr64 - If using B2B topology on Xeon hardware, use this + 64 bit address on the bus between the NTB devices for the window at + BAR2, on the upstream side of the link. +* snb\_b2b\_usd\_bar4\_addr64 - See *snb\_b2b\_bar2\_addr64*. +* snb\_b2b\_usd\_bar4\_addr32 - See *snb\_b2b\_bar2\_addr64*. +* snb\_b2b\_usd\_bar5\_addr32 - See *snb\_b2b\_bar2\_addr64*. +* snb\_b2b\_dsd\_bar2\_addr64 - See *snb\_b2b\_bar2\_addr64*. +* snb\_b2b\_dsd\_bar4\_addr64 - See *snb\_b2b\_bar2\_addr64*. +* snb\_b2b\_dsd\_bar4\_addr32 - See *snb\_b2b\_bar2\_addr64*. +* snb\_b2b\_dsd\_bar5\_addr32 - See *snb\_b2b\_bar2\_addr64*. diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 499222285439..c5ad7cb1706d 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -72,6 +72,25 @@ MODULE_AUTHOR("Intel Corporation"); #define bar0_off(base, bar) ((base) + ((bar) << 2)) #define bar2_off(base, bar) bar0_off(base, (bar) - 2) +static const struct intel_ntb_reg bwd_reg; +static const struct intel_ntb_alt_reg bwd_pri_reg; +static const struct intel_ntb_alt_reg bwd_sec_reg; +static const struct intel_ntb_alt_reg bwd_b2b_reg; +static const struct intel_ntb_xlat_reg bwd_pri_xlat; +static const struct intel_ntb_xlat_reg bwd_sec_xlat; +static const struct intel_ntb_reg snb_reg; +static const struct intel_ntb_alt_reg snb_pri_reg; +static const struct intel_ntb_alt_reg snb_sec_reg; +static const struct intel_ntb_alt_reg snb_b2b_reg; +static const struct intel_ntb_xlat_reg snb_pri_xlat; +static const struct intel_ntb_xlat_reg snb_sec_xlat; +static struct intel_b2b_addr snb_b2b_usd_addr; +static struct intel_b2b_addr snb_b2b_dsd_addr; +static const struct ntb_dev_ops intel_ntb_ops; + +static const struct file_operations intel_ntb_debugfs_info; +static struct dentry *debugfs_dir; + static int b2b_mw_idx = -1; module_param(b2b_mw_idx, int, 0644); MODULE_PARM_DESC(b2b_mw_idx, "Use this mw idx to access the peer ntb. A " @@ -86,25 +105,45 @@ MODULE_PARM_DESC(b2b_mw_share, "If the b2b mw is large enough, configure the " "the mw, so the second half can still be used as a mw. Both " "sides MUST set the same value here!"); -static const struct intel_ntb_reg bwd_reg; -static const struct intel_ntb_alt_reg bwd_pri_reg; -static const struct intel_ntb_alt_reg bwd_sec_reg; -static const struct intel_ntb_alt_reg bwd_b2b_reg; -static const struct intel_ntb_xlat_reg bwd_pri_xlat; -static const struct intel_ntb_xlat_reg bwd_sec_xlat; -static const struct intel_ntb_reg snb_reg; -static const struct intel_ntb_alt_reg snb_pri_reg; -static const struct intel_ntb_alt_reg snb_sec_reg; -static const struct intel_ntb_alt_reg snb_b2b_reg; -static const struct intel_ntb_xlat_reg snb_pri_xlat; -static const struct intel_ntb_xlat_reg snb_sec_xlat; -static const struct intel_b2b_addr snb_b2b_usd_addr; -static const struct intel_b2b_addr snb_b2b_dsd_addr; +module_param_named(snb_b2b_usd_bar2_addr64, + snb_b2b_usd_addr.bar2_addr64, ullong, 0644); +MODULE_PARM_DESC(snb_b2b_usd_bar2_addr64, + "SNB B2B USD BAR 2 64-bit address"); -static const struct ntb_dev_ops intel_ntb_ops; +module_param_named(snb_b2b_usd_bar4_addr64, + snb_b2b_usd_addr.bar4_addr64, ullong, 0644); +MODULE_PARM_DESC(snb_b2b_usd_bar2_addr64, + "SNB B2B USD BAR 4 64-bit address"); -static const struct file_operations intel_ntb_debugfs_info; -static struct dentry *debugfs_dir; +module_param_named(snb_b2b_usd_bar4_addr32, + snb_b2b_usd_addr.bar4_addr32, ullong, 0644); +MODULE_PARM_DESC(snb_b2b_usd_bar2_addr64, + "SNB B2B USD split-BAR 4 32-bit address"); + +module_param_named(snb_b2b_usd_bar5_addr32, + snb_b2b_usd_addr.bar5_addr32, ullong, 0644); +MODULE_PARM_DESC(snb_b2b_usd_bar2_addr64, + "SNB B2B USD split-BAR 5 32-bit address"); + +module_param_named(snb_b2b_dsd_bar2_addr64, + snb_b2b_dsd_addr.bar2_addr64, ullong, 0644); +MODULE_PARM_DESC(snb_b2b_dsd_bar2_addr64, + "SNB B2B DSD BAR 2 64-bit address"); + +module_param_named(snb_b2b_dsd_bar4_addr64, + snb_b2b_dsd_addr.bar4_addr64, ullong, 0644); +MODULE_PARM_DESC(snb_b2b_dsd_bar2_addr64, + "SNB B2B DSD BAR 4 64-bit address"); + +module_param_named(snb_b2b_dsd_bar4_addr32, + snb_b2b_dsd_addr.bar4_addr32, ullong, 0644); +MODULE_PARM_DESC(snb_b2b_dsd_bar2_addr64, + "SNB B2B DSD split-BAR 4 32-bit address"); + +module_param_named(snb_b2b_dsd_bar5_addr32, + snb_b2b_dsd_addr.bar5_addr32, ullong, 0644); +MODULE_PARM_DESC(snb_b2b_dsd_bar2_addr64, + "SNB B2B DSD split-BAR 5 32-bit address"); #ifndef ioread64 #ifdef readq @@ -2073,14 +2112,14 @@ static const struct intel_ntb_xlat_reg snb_sec_xlat = { .bar2_xlat = SNB_SBAR23XLAT_OFFSET, }; -static const struct intel_b2b_addr snb_b2b_usd_addr = { +static struct intel_b2b_addr snb_b2b_usd_addr = { .bar2_addr64 = SNB_B2B_BAR2_USD_ADDR64, .bar4_addr64 = SNB_B2B_BAR4_USD_ADDR64, .bar4_addr32 = SNB_B2B_BAR4_USD_ADDR32, .bar5_addr32 = SNB_B2B_BAR5_USD_ADDR32, }; -static const struct intel_b2b_addr snb_b2b_dsd_addr = { +static struct intel_b2b_addr snb_b2b_dsd_addr = { .bar2_addr64 = SNB_B2B_BAR2_DSD_ADDR64, .bar4_addr64 = SNB_B2B_BAR4_DSD_ADDR64, .bar4_addr32 = SNB_B2B_BAR4_DSD_ADDR32, From 963de4739fb4f8fa8d5cd87969109a7d3282ed13 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Wed, 15 Apr 2015 11:12:41 -0400 Subject: [PATCH 601/839] NTB: Add ping pong test client This is a simple ping pong driver that exercises the scratch pads and doorbells of the ntb hardware. This driver may be used to test that your ntb hardware and drivers are functioning at a basic level. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- Documentation/ntb.txt | 27 ++++ drivers/ntb/Kconfig | 2 + drivers/ntb/Makefile | 2 +- drivers/ntb/test/Kconfig | 8 + drivers/ntb/test/Makefile | 1 + drivers/ntb/test/ntb_pingpong.c | 250 ++++++++++++++++++++++++++++++++ 6 files changed, 289 insertions(+), 1 deletion(-) create mode 100644 drivers/ntb/test/Kconfig create mode 100644 drivers/ntb/test/Makefile create mode 100644 drivers/ntb/test/ntb_pingpong.c diff --git a/Documentation/ntb.txt b/Documentation/ntb.txt index 00a3f92832a5..e5d1591478ca 100644 --- a/Documentation/ntb.txt +++ b/Documentation/ntb.txt @@ -37,6 +37,33 @@ Transport queue pair. Network data is copied between socket buffers and the Transport queue pair buffer. The Transport client may be used for other things besides Netdev, however no other applications have yet been written. +### NTB Ping Pong Test Client (ntb\_pingpong) + +The Ping Pong test client serves as a demonstration to exercise the doorbell +and scratchpad registers of NTB hardware, and as an example simple NTB client. +Ping Pong enables the link when started, waits for the NTB link to come up, and +then proceeds to read and write the doorbell scratchpad registers of the NTB. +The peers interrupt each other using a bit mask of doorbell bits, which is +shifted by one in each round, to test the behavior of multiple doorbell bits +and interrupt vectors. The Ping Pong driver also reads the first local +scratchpad, and writes the value plus one to the first peer scratchpad, each +round before writing the peer doorbell register. + +Module Parameters: + +* unsafe - Some hardware has known issues with scratchpad and doorbell + registers. By default, Ping Pong will not attempt to exercise such + hardware. You may override this behavior at your own risk by setting + unsafe=1. +* delay\_ms - Specify the delay between receiving a doorbell + interrupt event and setting the peer doorbell register for the next + round. +* init\_db - Specify the doorbell bits to start new series of rounds. A new + series begins once all the doorbell bits have been shifted out of + range. +* dyndbg - It is suggested to specify dyndbg=+p when loading this module, and + then to observe debugging output on the console. + ## NTB Hardware Drivers NTB hardware drivers should register devices with the NTB core driver. After diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig index 53b042429673..95944e52fa36 100644 --- a/drivers/ntb/Kconfig +++ b/drivers/ntb/Kconfig @@ -14,6 +14,8 @@ if NTB source "drivers/ntb/hw/Kconfig" +source "drivers/ntb/test/Kconfig" + config NTB_TRANSPORT tristate "NTB Transport Client" help diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile index b9fa663ecfec..1921dec1949d 100644 --- a/drivers/ntb/Makefile +++ b/drivers/ntb/Makefile @@ -1,2 +1,2 @@ -obj-$(CONFIG_NTB) += ntb.o hw/ +obj-$(CONFIG_NTB) += ntb.o hw/ test/ obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o diff --git a/drivers/ntb/test/Kconfig b/drivers/ntb/test/Kconfig new file mode 100644 index 000000000000..72d255d220c8 --- /dev/null +++ b/drivers/ntb/test/Kconfig @@ -0,0 +1,8 @@ +config NTB_PINGPONG + tristate "NTB Ping Pong Test Client" + help + This is a simple ping pong driver that exercises the scratchpads and + doorbells of the ntb hardware. This driver may be used to test that + your ntb hardware and drivers are functioning at a basic level. + + If unsure, say N. diff --git a/drivers/ntb/test/Makefile b/drivers/ntb/test/Makefile new file mode 100644 index 000000000000..b32309307771 --- /dev/null +++ b/drivers/ntb/test/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c new file mode 100644 index 000000000000..fe1600566981 --- /dev/null +++ b/drivers/ntb/test/ntb_pingpong.c @@ -0,0 +1,250 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * PCIe NTB Pingpong Linux driver + * + * Contact Information: + * Allen Hubbe + */ + +/* Note: load this module with option 'dyndbg=+p' */ + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#define DRIVER_NAME "ntb_pingpong" +#define DRIVER_DESCRIPTION "PCIe NTB Simple Pingpong Client" + +#define DRIVER_LICENSE "Dual BSD/GPL" +#define DRIVER_VERSION "1.0" +#define DRIVER_RELDATE "24 March 2015" +#define DRIVER_AUTHOR "Allen Hubbe " + +MODULE_LICENSE(DRIVER_LICENSE); +MODULE_VERSION(DRIVER_VERSION); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESCRIPTION); + +static unsigned int unsafe; +module_param(unsafe, uint, 0644); +MODULE_PARM_DESC(unsafe, "Run even though ntb operations may be unsafe"); + +static unsigned int delay_ms = 1000; +module_param(delay_ms, uint, 0644); +MODULE_PARM_DESC(delay_ms, "Milliseconds to delay the response to peer"); + +static unsigned long db_init = 0x7; +module_param(db_init, ulong, 0644); +MODULE_PARM_DESC(delay_ms, "Initial doorbell bits to ring on the peer"); + +struct pp_ctx { + struct ntb_dev *ntb; + u64 db_bits; + /* synchronize access to db_bits by ping and pong */ + spinlock_t db_lock; + struct timer_list db_timer; + unsigned long db_delay; +}; + +static void pp_ping(unsigned long ctx) +{ + struct pp_ctx *pp = (void *)ctx; + unsigned long irqflags; + u64 db_bits, db_mask; + u32 spad_rd, spad_wr; + + spin_lock_irqsave(&pp->db_lock, irqflags); + { + db_mask = ntb_db_valid_mask(pp->ntb); + db_bits = ntb_db_read(pp->ntb); + + if (db_bits) { + dev_dbg(&pp->ntb->dev, + "Masked pongs %#llx\n", + db_bits); + ntb_db_clear(pp->ntb, db_bits); + } + + db_bits = ((pp->db_bits | db_bits) << 1) & db_mask; + + if (!db_bits) + db_bits = db_init; + + spad_rd = ntb_spad_read(pp->ntb, 0); + spad_wr = spad_rd + 1; + + dev_dbg(&pp->ntb->dev, + "Ping bits %#llx read %#x write %#x\n", + db_bits, spad_rd, spad_wr); + + ntb_peer_spad_write(pp->ntb, 0, spad_wr); + ntb_peer_db_set(pp->ntb, db_bits); + ntb_db_clear_mask(pp->ntb, db_mask); + + pp->db_bits = 0; + } + spin_unlock_irqrestore(&pp->db_lock, irqflags); +} + +static void pp_link_event(void *ctx) +{ + struct pp_ctx *pp = ctx; + + if (ntb_link_is_up(pp->ntb, NULL, NULL) == 1) { + dev_dbg(&pp->ntb->dev, "link is up\n"); + pp_ping((unsigned long)pp); + } else { + dev_dbg(&pp->ntb->dev, "link is down\n"); + del_timer(&pp->db_timer); + } +} + +static void pp_db_event(void *ctx, int vec) +{ + struct pp_ctx *pp = ctx; + u64 db_bits, db_mask; + unsigned long irqflags; + + spin_lock_irqsave(&pp->db_lock, irqflags); + { + db_mask = ntb_db_vector_mask(pp->ntb, vec); + db_bits = db_mask & ntb_db_read(pp->ntb); + ntb_db_set_mask(pp->ntb, db_mask); + ntb_db_clear(pp->ntb, db_bits); + + pp->db_bits |= db_bits; + + mod_timer(&pp->db_timer, jiffies + pp->db_delay); + + dev_dbg(&pp->ntb->dev, + "Pong vec %d bits %#llx\n", + vec, db_bits); + } + spin_unlock_irqrestore(&pp->db_lock, irqflags); +} + +static const struct ntb_ctx_ops pp_ops = { + .link_event = pp_link_event, + .db_event = pp_db_event, +}; + +static int pp_probe(struct ntb_client *client, + struct ntb_dev *ntb) +{ + struct pp_ctx *pp; + int rc; + + if (ntb_db_is_unsafe(ntb)) { + dev_dbg(&ntb->dev, "doorbell is unsafe\n"); + if (!unsafe) { + rc = -EINVAL; + goto err_pp; + } + } + + if (ntb_spad_is_unsafe(ntb)) { + dev_dbg(&ntb->dev, "scratchpad is unsafe\n"); + if (!unsafe) { + rc = -EINVAL; + goto err_pp; + } + } + + pp = kmalloc(sizeof(*pp), GFP_KERNEL); + if (!pp) { + rc = -ENOMEM; + goto err_pp; + } + + pp->ntb = ntb; + pp->db_bits = 0; + spin_lock_init(&pp->db_lock); + setup_timer(&pp->db_timer, pp_ping, (unsigned long)pp); + pp->db_delay = msecs_to_jiffies(delay_ms); + + rc = ntb_set_ctx(ntb, pp, &pp_ops); + if (rc) + goto err_ctx; + + ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + ntb_link_event(ntb); + + return 0; + +err_ctx: + kfree(pp); +err_pp: + return rc; +} + +static void pp_remove(struct ntb_client *client, + struct ntb_dev *ntb) +{ + struct pp_ctx *pp = ntb->ctx; + + ntb_clear_ctx(ntb); + del_timer_sync(&pp->db_timer); + ntb_link_disable(ntb); + + kfree(pp); +} + +static struct ntb_client pp_client = { + .ops = { + .probe = pp_probe, + .remove = pp_remove, + }, +}; +module_ntb_client(pp_client); From 578b881ba9c4b253482903bf8fae438318f5629b Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Thu, 21 May 2015 02:51:39 -0400 Subject: [PATCH 602/839] NTB: Add tool test client This is a simple debugging driver that enables the doorbell and scratch pad registers to be read and written from the debugfs. This tool enables more complicated debugging to be scripted from user space. This driver may be used to test that your ntb hardware and drivers are functioning at a basic level. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- Documentation/ntb.txt | 32 +++ drivers/ntb/test/Kconfig | 11 + drivers/ntb/test/Makefile | 1 + drivers/ntb/test/ntb_tool.c | 556 ++++++++++++++++++++++++++++++++++++ 4 files changed, 600 insertions(+) create mode 100644 drivers/ntb/test/ntb_tool.c diff --git a/Documentation/ntb.txt b/Documentation/ntb.txt index e5d1591478ca..b48249a7b607 100644 --- a/Documentation/ntb.txt +++ b/Documentation/ntb.txt @@ -64,6 +64,38 @@ Module Parameters: * dyndbg - It is suggested to specify dyndbg=+p when loading this module, and then to observe debugging output on the console. +### NTB Tool Test Client (ntb\_tool) + +The Tool test client serves for debugging, primarily, ntb hardware and drivers. +The Tool provides access through debugfs for reading, setting, and clearing the +NTB doorbell, and reading and writing scratchpads. + +The Tool does not currently have any module parameters. + +Debugfs Files: + +* *debugfs*/ntb\_tool/*hw*/ - A directory in debugfs will be created for each + NTB device probed by the tool. This directory is shortened to *hw* + below. +* *hw*/db - This file is used to read, set, and clear the local doorbell. Not + all operations may be supported by all hardware. To read the doorbell, + read the file. To set the doorbell, write `s` followed by the bits to + set (eg: `echo 's 0x0101' > db`). To clear the doorbell, write `c` + followed by the bits to clear. +* *hw*/mask - This file is used to read, set, and clear the local doorbell mask. + See *db* for details. +* *hw*/peer\_db - This file is used to read, set, and clear the peer doorbell. + See *db* for details. +* *hw*/peer\_mask - This file is used to read, set, and clear the peer doorbell + mask. See *db* for details. +* *hw*/spad - This file is used to read and write local scratchpads. To read + the values of all scratchpads, read the file. To write values, write a + series of pairs of scratchpad number and value + (eg: `echo '4 0x123 7 0xabc' > spad` + # to set scratchpads `4` and `7` to `0x123` and `0xabc`, respectively). +* *hw*/peer\_spad - This file is used to read and write peer scratchpads. See + *spad* for details. + ## NTB Hardware Drivers NTB hardware drivers should register devices with the NTB core driver. After diff --git a/drivers/ntb/test/Kconfig b/drivers/ntb/test/Kconfig index 72d255d220c8..01852f98a843 100644 --- a/drivers/ntb/test/Kconfig +++ b/drivers/ntb/test/Kconfig @@ -6,3 +6,14 @@ config NTB_PINGPONG your ntb hardware and drivers are functioning at a basic level. If unsure, say N. + +config NTB_TOOL + tristate "NTB Debugging Tool Test Client" + help + This is a simple debugging driver that enables the doorbell and + scratchpad registers to be read and written from the debugfs. This + enables more complicated debugging to be scripted from user space. + This driver may be used to test that your ntb hardware and drivers are + functioning at a basic level. + + If unsure, say N. diff --git a/drivers/ntb/test/Makefile b/drivers/ntb/test/Makefile index b32309307771..0ea32a324b6c 100644 --- a/drivers/ntb/test/Makefile +++ b/drivers/ntb/test/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o +obj-$(CONFIG_NTB_TOOL) += ntb_tool.o diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c new file mode 100644 index 000000000000..6f5dc6ca673d --- /dev/null +++ b/drivers/ntb/test/ntb_tool.c @@ -0,0 +1,556 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * PCIe NTB Debugging Tool Linux driver + * + * Contact Information: + * Allen Hubbe + */ + +/* + * How to use this tool, by example. + * + * Assuming $DBG_DIR is something like: + * '/sys/kernel/debug/ntb_tool/0000:00:03.0' + * + * Eg: check if clearing the doorbell mask generates an interrupt. + * + * # Set the doorbell mask + * root@self# echo 's 1' > $DBG_DIR/mask + * + * # Ring the doorbell from the peer + * root@peer# echo 's 1' > $DBG_DIR/peer_db + * + * # Clear the doorbell mask + * root@self# echo 'c 1' > $DBG_DIR/mask + * + * Observe debugging output in dmesg or your console. You should see a + * doorbell event triggered by clearing the mask. If not, this may indicate an + * issue with the hardware that needs to be worked around in the driver. + * + * Eg: read and write scratchpad registers + * + * root@peer# echo '0 0x01010101 1 0x7f7f7f7f' > $DBG_DIR/peer_spad + * + * root@self# cat $DBG_DIR/spad + * + * Observe that spad 0 and 1 have the values set by the peer. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#define DRIVER_NAME "ntb_tool" +#define DRIVER_DESCRIPTION "PCIe NTB Debugging Tool" + +#define DRIVER_LICENSE "Dual BSD/GPL" +#define DRIVER_VERSION "1.0" +#define DRIVER_RELDATE "22 April 2015" +#define DRIVER_AUTHOR "Allen Hubbe " + +MODULE_LICENSE(DRIVER_LICENSE); +MODULE_VERSION(DRIVER_VERSION); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESCRIPTION); + +static struct dentry *tool_dbgfs; + +struct tool_ctx { + struct ntb_dev *ntb; + struct dentry *dbgfs; +}; + +#define SPAD_FNAME_SIZE 0x10 +#define INT_PTR(x) ((void *)(unsigned long)x) +#define PTR_INT(x) ((int)(unsigned long)x) + +#define TOOL_FOPS_RDWR(__name, __read, __write) \ + const struct file_operations __name = { \ + .owner = THIS_MODULE, \ + .open = simple_open, \ + .read = __read, \ + .write = __write, \ + } + +static void tool_link_event(void *ctx) +{ + struct tool_ctx *tc = ctx; + enum ntb_speed speed; + enum ntb_width width; + int up; + + up = ntb_link_is_up(tc->ntb, &speed, &width); + + dev_dbg(&tc->ntb->dev, "link is %s speed %d width %d\n", + up ? "up" : "down", speed, width); +} + +static void tool_db_event(void *ctx, int vec) +{ + struct tool_ctx *tc = ctx; + u64 db_bits, db_mask; + + db_mask = ntb_db_vector_mask(tc->ntb, vec); + db_bits = ntb_db_read(tc->ntb); + + dev_dbg(&tc->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n", + vec, db_mask, db_bits); +} + +static const struct ntb_ctx_ops tool_ops = { + .link_event = tool_link_event, + .db_event = tool_db_event, +}; + +static ssize_t tool_dbfn_read(struct tool_ctx *tc, char __user *ubuf, + size_t size, loff_t *offp, + u64 (*db_read_fn)(struct ntb_dev *)) +{ + size_t buf_size; + char *buf; + ssize_t pos, rc; + + if (!db_read_fn) + return -EINVAL; + + buf_size = min_t(size_t, size, 0x20); + + buf = kmalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + pos = scnprintf(buf, buf_size, "%#llx\n", + db_read_fn(tc->ntb)); + + rc = simple_read_from_buffer(ubuf, size, offp, buf, pos); + + kfree(buf); + + return rc; +} + +static ssize_t tool_dbfn_write(struct tool_ctx *tc, + const char __user *ubuf, + size_t size, loff_t *offp, + int (*db_set_fn)(struct ntb_dev *, u64), + int (*db_clear_fn)(struct ntb_dev *, u64)) +{ + u64 db_bits; + char *buf, cmd; + ssize_t rc; + int n; + + buf = kmalloc(size + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + rc = simple_write_to_buffer(buf, size, offp, ubuf, size); + if (rc < 0) { + kfree(buf); + return rc; + } + + buf[size] = 0; + + n = sscanf(buf, "%c %lli", &cmd, &db_bits); + + kfree(buf); + + if (n != 2) { + rc = -EINVAL; + } else if (cmd == 's') { + if (!db_set_fn) + rc = -EINVAL; + else + rc = db_set_fn(tc->ntb, db_bits); + } else if (cmd == 'c') { + if (!db_clear_fn) + rc = -EINVAL; + else + rc = db_clear_fn(tc->ntb, db_bits); + } else { + rc = -EINVAL; + } + + return rc ? : size; +} + +static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf, + size_t size, loff_t *offp, + u32 (*spad_read_fn)(struct ntb_dev *, int)) +{ + size_t buf_size; + char *buf; + ssize_t pos, rc; + int i, spad_count; + + if (!spad_read_fn) + return -EINVAL; + + buf_size = min_t(size_t, size, 0x100); + + buf = kmalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + pos = 0; + + spad_count = ntb_spad_count(tc->ntb); + for (i = 0; i < spad_count; ++i) { + pos += scnprintf(buf + pos, buf_size - pos, "%d\t%#x\n", + i, spad_read_fn(tc->ntb, i)); + } + + rc = simple_read_from_buffer(ubuf, size, offp, buf, pos); + + kfree(buf); + + return rc; +} + +static ssize_t tool_spadfn_write(struct tool_ctx *tc, + const char __user *ubuf, + size_t size, loff_t *offp, + int (*spad_write_fn)(struct ntb_dev *, + int, u32)) +{ + int spad_idx; + u32 spad_val; + char *buf; + int pos, n; + ssize_t rc; + + if (!spad_write_fn) { + dev_dbg(&tc->ntb->dev, "no spad write fn\n"); + return -EINVAL; + } + + buf = kmalloc(size + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + rc = simple_write_to_buffer(buf, size, offp, ubuf, size); + if (rc < 0) { + kfree(buf); + return rc; + } + + buf[size] = 0; + + n = sscanf(buf, "%d %i%n", &spad_idx, &spad_val, &pos); + while (n == 2) { + rc = spad_write_fn(tc->ntb, spad_idx, spad_val); + if (rc) + break; + + n = sscanf(buf + pos, "%d %i%n", &spad_idx, &spad_val, &pos); + } + + if (n < 0) + rc = n; + + kfree(buf); + + return rc ? : size; +} + +static ssize_t tool_db_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_read(tc, ubuf, size, offp, + tc->ntb->ops->db_read); +} + +static ssize_t tool_db_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_write(tc, ubuf, size, offp, + tc->ntb->ops->db_set, + tc->ntb->ops->db_clear); +} + +static TOOL_FOPS_RDWR(tool_db_fops, + tool_db_read, + tool_db_write); + +static ssize_t tool_mask_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_read(tc, ubuf, size, offp, + tc->ntb->ops->db_read_mask); +} + +static ssize_t tool_mask_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_write(tc, ubuf, size, offp, + tc->ntb->ops->db_set_mask, + tc->ntb->ops->db_clear_mask); +} + +static TOOL_FOPS_RDWR(tool_mask_fops, + tool_mask_read, + tool_mask_write); + +static ssize_t tool_peer_db_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_read(tc, ubuf, size, offp, + tc->ntb->ops->peer_db_read); +} + +static ssize_t tool_peer_db_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_write(tc, ubuf, size, offp, + tc->ntb->ops->peer_db_set, + tc->ntb->ops->peer_db_clear); +} + +static TOOL_FOPS_RDWR(tool_peer_db_fops, + tool_peer_db_read, + tool_peer_db_write); + +static ssize_t tool_peer_mask_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_read(tc, ubuf, size, offp, + tc->ntb->ops->peer_db_read_mask); +} + +static ssize_t tool_peer_mask_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_write(tc, ubuf, size, offp, + tc->ntb->ops->peer_db_set_mask, + tc->ntb->ops->peer_db_clear_mask); +} + +static TOOL_FOPS_RDWR(tool_peer_mask_fops, + tool_peer_mask_read, + tool_peer_mask_write); + +static ssize_t tool_spad_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_spadfn_read(tc, ubuf, size, offp, + tc->ntb->ops->spad_read); +} + +static ssize_t tool_spad_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_spadfn_write(tc, ubuf, size, offp, + tc->ntb->ops->spad_write); +} + +static TOOL_FOPS_RDWR(tool_spad_fops, + tool_spad_read, + tool_spad_write); + +static ssize_t tool_peer_spad_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_spadfn_read(tc, ubuf, size, offp, + tc->ntb->ops->peer_spad_read); +} + +static ssize_t tool_peer_spad_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_spadfn_write(tc, ubuf, size, offp, + tc->ntb->ops->peer_spad_write); +} + +static TOOL_FOPS_RDWR(tool_peer_spad_fops, + tool_peer_spad_read, + tool_peer_spad_write); + +static void tool_setup_dbgfs(struct tool_ctx *tc) +{ + /* This modules is useless without dbgfs... */ + if (!tool_dbgfs) { + tc->dbgfs = NULL; + return; + } + + tc->dbgfs = debugfs_create_dir(dev_name(&tc->ntb->dev), + tool_dbgfs); + if (!tc->dbgfs) + return; + + debugfs_create_file("db", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_db_fops); + + debugfs_create_file("mask", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_mask_fops); + + debugfs_create_file("peer_db", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_peer_db_fops); + + debugfs_create_file("peer_mask", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_peer_mask_fops); + + debugfs_create_file("spad", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_spad_fops); + + debugfs_create_file("peer_spad", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_peer_spad_fops); +} + +static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) +{ + struct tool_ctx *tc; + int rc; + + if (ntb_db_is_unsafe(ntb)) + dev_dbg(&ntb->dev, "doorbell is unsafe\n"); + + if (ntb_spad_is_unsafe(ntb)) + dev_dbg(&ntb->dev, "scratchpad is unsafe\n"); + + tc = kmalloc(sizeof(*tc), GFP_KERNEL); + if (!tc) { + rc = -ENOMEM; + goto err_tc; + } + + tc->ntb = ntb; + + tool_setup_dbgfs(tc); + + rc = ntb_set_ctx(ntb, tc, &tool_ops); + if (rc) + goto err_ctx; + + ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + ntb_link_event(ntb); + + return 0; + +err_ctx: + debugfs_remove_recursive(tc->dbgfs); + kfree(tc); +err_tc: + return rc; +} + +static void tool_remove(struct ntb_client *self, struct ntb_dev *ntb) +{ + struct tool_ctx *tc = ntb->ctx; + + ntb_clear_ctx(ntb); + ntb_link_disable(ntb); + + debugfs_remove_recursive(tc->dbgfs); + kfree(tc); +} + +static struct ntb_client tool_client = { + .ops = { + .probe = tool_probe, + .remove = tool_remove, + }, +}; + +static int __init tool_init(void) +{ + int rc; + + if (debugfs_initialized()) + tool_dbgfs = debugfs_create_dir(KBUILD_MODNAME, NULL); + + rc = ntb_register_client(&tool_client); + if (rc) + goto err_client; + + return 0; + +err_client: + debugfs_remove_recursive(tool_dbgfs); + return rc; +} +module_init(tool_init); + +static void __exit tool_exit(void) +{ + ntb_unregister_client(&tool_client); + debugfs_remove_recursive(tool_dbgfs); +} +module_exit(tool_exit); From 2876228941ac5dcab12854aa5a3462b3f2274b09 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 11 May 2015 10:08:26 -0400 Subject: [PATCH 603/839] NTB: Rate limit ntb_qp_link_work When the ntb transport is connecting and waiting for the peer, the debug console receives lots of debug level messages about the remote qp link status being down. Rate limit those messages. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 2efd1eae9a7c..8ce0bf67ac20 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -850,7 +850,7 @@ static void ntb_qp_link_work(struct work_struct *work) /* query remote spad for qp ready bits */ ntb_peer_spad_read(nt->ndev, QP_LINKS); - dev_dbg(&pdev->dev, "Remote QP link status = %x\n", val); + dev_dbg_ratelimited(&pdev->dev, "Remote QP link status = %x\n", val); /* See if the remote side is up */ if (val & BIT(qp->qp_num)) { From 1199aa61264a74717bc747e7031673242bad5119 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 18 May 2015 06:20:47 -0400 Subject: [PATCH 604/839] NTB: Use NUMA memory and DMA chan in transport Allocate memory and request the DMA channel for the same NUMA node as the NTB device. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 46 ++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 8ce0bf67ac20..dc14ec81c43e 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -346,6 +346,7 @@ int ntb_transport_register_client_dev(char *device_name) { struct ntb_transport_client_dev *client_dev; struct ntb_transport_ctx *nt; + int node; int rc, i = 0; if (list_empty(&ntb_transport_list)) @@ -354,8 +355,10 @@ int ntb_transport_register_client_dev(char *device_name) list_for_each_entry(nt, &ntb_transport_list, entry) { struct device *dev; - client_dev = kzalloc(sizeof(*client_dev), - GFP_KERNEL); + node = dev_to_node(&nt->ndev->dev); + + client_dev = kzalloc_node(sizeof(*client_dev), + GFP_KERNEL, node); if (!client_dev) { rc = -ENOMEM; goto err; @@ -953,6 +956,7 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) struct ntb_transport_mw *mw; unsigned int mw_count, qp_count; u64 qp_bitmap; + int node; int rc, i; if (ntb_db_is_unsafe(ndev)) @@ -962,7 +966,9 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) dev_dbg(&ndev->dev, "scratchpad is unsafe, proceed anyway...\n"); - nt = kzalloc(sizeof(*nt), GFP_KERNEL); + node = dev_to_node(&ndev->dev); + + nt = kzalloc_node(sizeof(*nt), GFP_KERNEL, node); if (!nt) return -ENOMEM; @@ -972,7 +978,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) nt->mw_count = mw_count; - nt->mw_vec = kcalloc(mw_count, sizeof(*nt->mw_vec), GFP_KERNEL); + nt->mw_vec = kzalloc_node(mw_count * sizeof(*nt->mw_vec), + GFP_KERNEL, node); if (!nt->mw_vec) { rc = -ENOMEM; goto err; @@ -1012,7 +1019,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) nt->qp_bitmap = qp_bitmap; nt->qp_bitmap_free = qp_bitmap; - nt->qp_vec = kcalloc(qp_count, sizeof(*nt->qp_vec), GFP_KERNEL); + nt->qp_vec = kzalloc_node(qp_count * sizeof(*nt->qp_vec), + GFP_KERNEL, node); if (!nt->qp_vec) { rc = -ENOMEM; goto err2; @@ -1512,6 +1520,11 @@ static void ntb_send_link_down(struct ntb_transport_qp *qp) ntb_qp_link_down_reset(qp); } +static bool ntb_dma_filter_fn(struct dma_chan *chan, void *node) +{ + return dev_to_node(&chan->dev->device) == (int)(unsigned long)node; +} + /** * ntb_transport_create_queue - Create a new NTB transport layer queue * @rx_handler: receive callback function @@ -1537,12 +1550,16 @@ ntb_transport_create_queue(void *data, struct device *client_dev, struct ntb_transport_qp *qp; u64 qp_bit; unsigned int free_queue; + dma_cap_mask_t dma_mask; + int node; int i; ndev = dev_ntb(client_dev->parent); pdev = ndev->pdev; nt = ndev->ctx; + node = dev_to_node(&ndev->dev); + free_queue = ffs(nt->qp_bitmap); if (!free_queue) goto err; @@ -1560,15 +1577,16 @@ ntb_transport_create_queue(void *data, struct device *client_dev, qp->tx_handler = handlers->tx_handler; qp->event_handler = handlers->event_handler; - dmaengine_get(); - qp->dma_chan = dma_find_channel(DMA_MEMCPY); - if (!qp->dma_chan) { - dmaengine_put(); + dma_cap_zero(dma_mask); + dma_cap_set(DMA_MEMCPY, dma_mask); + + qp->dma_chan = dma_request_channel(dma_mask, ntb_dma_filter_fn, + (void *)(unsigned long)node); + if (!qp->dma_chan) dev_info(&pdev->dev, "Unable to allocate DMA channel, using CPU instead\n"); - } for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { - entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); if (!entry) goto err1; @@ -1578,7 +1596,7 @@ ntb_transport_create_queue(void *data, struct device *client_dev, } for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { - entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); if (!entry) goto err2; @@ -1601,7 +1619,7 @@ err1: while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) kfree(entry); if (qp->dma_chan) - dmaengine_put(); + dma_release_channel(qp->dma_chan); nt->qp_bitmap_free |= qp_bit; err: return NULL; @@ -1638,7 +1656,7 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) */ dma_sync_wait(chan, qp->last_cookie); dmaengine_terminate_all(chan); - dmaengine_put(); + dma_release_channel(chan); } qp_bit = BIT_ULL(qp->qp_num); From 0e041fb5369975c183d22ffeb156ea0dae760088 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Tue, 19 May 2015 12:04:52 -0400 Subject: [PATCH 605/839] NTB: Use NUMA memory in Intel driver Allocate memory for the NUMA node of the NTB device. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index c5ad7cb1706d..236b7d33b5af 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -413,10 +413,12 @@ static int ndev_init_isr(struct intel_ntb_dev *ndev, int msix_shift, int total_shift) { struct pci_dev *pdev; - int rc, i, msix_count; + int rc, i, msix_count, node; pdev = ndev_pdev(ndev); + node = dev_to_node(&pdev->dev); + /* Mask all doorbell interrupts */ ndev->db_mask = ndev->db_valid_mask; ndev->reg->db_iowrite(ndev->db_mask, @@ -425,11 +427,13 @@ static int ndev_init_isr(struct intel_ntb_dev *ndev, /* Try to set up msix irq */ - ndev->vec = kcalloc(msix_max, sizeof(*ndev->vec), GFP_KERNEL); + ndev->vec = kzalloc_node(msix_max * sizeof(*ndev->vec), + GFP_KERNEL, node); if (!ndev->vec) goto err_msix_vec_alloc; - ndev->msix = kcalloc(msix_max, sizeof(*ndev->msix), GFP_KERNEL); + ndev->msix = kzalloc_node(msix_max * sizeof(*ndev->msix), + GFP_KERNEL, node); if (!ndev->msix) goto err_msix_alloc; @@ -1955,10 +1959,12 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct intel_ntb_dev *ndev; - int rc; + int rc, node; + + node = dev_to_node(&pdev->dev); if (pdev_is_bwd(pdev)) { - ndev = kzalloc(sizeof(*ndev), GFP_KERNEL); + ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node); if (!ndev) { rc = -ENOMEM; goto err_ndev; @@ -1975,7 +1981,7 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, goto err_init_dev; } else if (pdev_is_snb(pdev)) { - ndev = kzalloc(sizeof(*ndev), GFP_KERNEL); + ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node); if (!ndev) { rc = -ENOMEM; goto err_ndev; From 06917f753547e6bba8a5d17f79971d1c071a70dd Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 19 May 2015 16:45:46 -0400 Subject: [PATCH 606/839] NTB: Improve performance with write combining Changing the memory window BAR mappings to write combining significantly boosts the performance. We will also use memcpy that uses non-temporal store, which showed performance improvement when doing non-cached memcpys. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index dc14ec81c43e..7a765d3230d8 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -58,6 +58,7 @@ #include #include #include +#include #include "linux/ntb.h" #include "linux/ntb_transport.h" @@ -993,7 +994,7 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) if (rc) goto err1; - mw->vbase = ioremap(mw->phys_addr, mw->phys_size); + mw->vbase = ioremap_wc(mw->phys_addr, mw->phys_size); if (!mw->vbase) { rc = -ENOMEM; goto err1; @@ -1375,7 +1376,15 @@ static void ntb_tx_copy_callback(void *data) static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset) { +#ifdef ARCH_HAS_NOCACHE_UACCESS + /* + * Using non-temporal mov to improve performance on non-cached + * writes, even though we aren't actually copying from user space. + */ + __copy_from_user_inatomic_nocache(offset, entry->buf, entry->len); +#else memcpy_toio(offset, entry->buf, entry->len); +#endif /* Ensure that the data is fully copied out before setting the flags */ wmb(); From a41ef053f700618f5f55a1dd658908a71163400b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 19 May 2015 16:52:04 -0400 Subject: [PATCH 607/839] NTB: Default to CPU memcpy for performance Disable DMA usage by default, since the CPU provides much better performance with write combining. Provide a module parameter to enable DMA usage when offloading the memcpy is preferred. Signed-off-by: Dave Jiang Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 7a765d3230d8..e07b056af3be 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -88,6 +88,10 @@ static unsigned int copy_bytes = 1024; module_param(copy_bytes, uint, 0644); MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the CPU to copy instead of DMA"); +static bool use_dma; +module_param(use_dma, bool, 0644); +MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy"); + static struct dentry *nt_debugfs_dir; struct ntb_queue_entry { @@ -1589,10 +1593,15 @@ ntb_transport_create_queue(void *data, struct device *client_dev, dma_cap_zero(dma_mask); dma_cap_set(DMA_MEMCPY, dma_mask); - qp->dma_chan = dma_request_channel(dma_mask, ntb_dma_filter_fn, - (void *)(unsigned long)node); - if (!qp->dma_chan) - dev_info(&pdev->dev, "Unable to allocate DMA channel, using CPU instead\n"); + if (use_dma) { + qp->dma_chan = dma_request_channel(dma_mask, ntb_dma_filter_fn, + (void *)(unsigned long)node); + if (!qp->dma_chan) + dev_info(&pdev->dev, "Unable to allocate DMA channel\n"); + } else { + qp->dma_chan = NULL; + } + dev_dbg(&pdev->dev, "Using %s memcpy\n", qp->dma_chan ? "DMA" : "CPU"); for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); From 2f887b9a44015a8146d52e40bef9e2b7bc6cd275 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 20 May 2015 12:55:47 -0400 Subject: [PATCH 608/839] NTB: Rename Intel code names to platform names Instead of using the platform code names, use the correct platform names to identify the respective Intel NTB hardware. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- Documentation/ntb.txt | 20 +- drivers/ntb/hw/intel/ntb_hw_intel.c | 606 ++++++++++++++-------------- drivers/ntb/hw/intel/ntb_hw_intel.h | 270 ++++++------- 3 files changed, 448 insertions(+), 448 deletions(-) diff --git a/Documentation/ntb.txt b/Documentation/ntb.txt index b48249a7b607..1d9bbabb6c79 100644 --- a/Documentation/ntb.txt +++ b/Documentation/ntb.txt @@ -115,13 +115,13 @@ Module Parameters: * b2b\_mw\_share - If the peer ntb is to be accessed via a memory window, and if the memory window is large enough, still allow the client to use the second half of the memory window for address translation to the peer. -* snb\_b2b\_usd\_bar2\_addr64 - If using B2B topology on Xeon hardware, use this - 64 bit address on the bus between the NTB devices for the window at - BAR2, on the upstream side of the link. -* snb\_b2b\_usd\_bar4\_addr64 - See *snb\_b2b\_bar2\_addr64*. -* snb\_b2b\_usd\_bar4\_addr32 - See *snb\_b2b\_bar2\_addr64*. -* snb\_b2b\_usd\_bar5\_addr32 - See *snb\_b2b\_bar2\_addr64*. -* snb\_b2b\_dsd\_bar2\_addr64 - See *snb\_b2b\_bar2\_addr64*. -* snb\_b2b\_dsd\_bar4\_addr64 - See *snb\_b2b\_bar2\_addr64*. -* snb\_b2b\_dsd\_bar4\_addr32 - See *snb\_b2b\_bar2\_addr64*. -* snb\_b2b\_dsd\_bar5\_addr32 - See *snb\_b2b\_bar2\_addr64*. +* xeon\_b2b\_usd\_bar2\_addr64 - If using B2B topology on Xeon hardware, use + this 64 bit address on the bus between the NTB devices for the window + at BAR2, on the upstream side of the link. +* xeon\_b2b\_usd\_bar4\_addr64 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_usd\_bar4\_addr32 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_usd\_bar5\_addr32 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_dsd\_bar2\_addr64 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_dsd\_bar4\_addr64 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_dsd\_bar4\_addr32 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_dsd\_bar5\_addr32 - See *xeon\_b2b\_bar2\_addr64*. diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 236b7d33b5af..eeaa1e8f8bd8 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -72,20 +72,20 @@ MODULE_AUTHOR("Intel Corporation"); #define bar0_off(base, bar) ((base) + ((bar) << 2)) #define bar2_off(base, bar) bar0_off(base, (bar) - 2) -static const struct intel_ntb_reg bwd_reg; -static const struct intel_ntb_alt_reg bwd_pri_reg; -static const struct intel_ntb_alt_reg bwd_sec_reg; -static const struct intel_ntb_alt_reg bwd_b2b_reg; -static const struct intel_ntb_xlat_reg bwd_pri_xlat; -static const struct intel_ntb_xlat_reg bwd_sec_xlat; -static const struct intel_ntb_reg snb_reg; -static const struct intel_ntb_alt_reg snb_pri_reg; -static const struct intel_ntb_alt_reg snb_sec_reg; -static const struct intel_ntb_alt_reg snb_b2b_reg; -static const struct intel_ntb_xlat_reg snb_pri_xlat; -static const struct intel_ntb_xlat_reg snb_sec_xlat; -static struct intel_b2b_addr snb_b2b_usd_addr; -static struct intel_b2b_addr snb_b2b_dsd_addr; +static const struct intel_ntb_reg atom_reg; +static const struct intel_ntb_alt_reg atom_pri_reg; +static const struct intel_ntb_alt_reg atom_sec_reg; +static const struct intel_ntb_alt_reg atom_b2b_reg; +static const struct intel_ntb_xlat_reg atom_pri_xlat; +static const struct intel_ntb_xlat_reg atom_sec_xlat; +static const struct intel_ntb_reg xeon_reg; +static const struct intel_ntb_alt_reg xeon_pri_reg; +static const struct intel_ntb_alt_reg xeon_sec_reg; +static const struct intel_ntb_alt_reg xeon_b2b_reg; +static const struct intel_ntb_xlat_reg xeon_pri_xlat; +static const struct intel_ntb_xlat_reg xeon_sec_xlat; +static struct intel_b2b_addr xeon_b2b_usd_addr; +static struct intel_b2b_addr xeon_b2b_dsd_addr; static const struct ntb_dev_ops intel_ntb_ops; static const struct file_operations intel_ntb_debugfs_info; @@ -105,45 +105,45 @@ MODULE_PARM_DESC(b2b_mw_share, "If the b2b mw is large enough, configure the " "the mw, so the second half can still be used as a mw. Both " "sides MUST set the same value here!"); -module_param_named(snb_b2b_usd_bar2_addr64, - snb_b2b_usd_addr.bar2_addr64, ullong, 0644); -MODULE_PARM_DESC(snb_b2b_usd_bar2_addr64, - "SNB B2B USD BAR 2 64-bit address"); +module_param_named(xeon_b2b_usd_bar2_addr64, + xeon_b2b_usd_addr.bar2_addr64, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, + "XEON B2B USD BAR 2 64-bit address"); -module_param_named(snb_b2b_usd_bar4_addr64, - snb_b2b_usd_addr.bar4_addr64, ullong, 0644); -MODULE_PARM_DESC(snb_b2b_usd_bar2_addr64, - "SNB B2B USD BAR 4 64-bit address"); +module_param_named(xeon_b2b_usd_bar4_addr64, + xeon_b2b_usd_addr.bar4_addr64, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, + "XEON B2B USD BAR 4 64-bit address"); -module_param_named(snb_b2b_usd_bar4_addr32, - snb_b2b_usd_addr.bar4_addr32, ullong, 0644); -MODULE_PARM_DESC(snb_b2b_usd_bar2_addr64, - "SNB B2B USD split-BAR 4 32-bit address"); +module_param_named(xeon_b2b_usd_bar4_addr32, + xeon_b2b_usd_addr.bar4_addr32, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, + "XEON B2B USD split-BAR 4 32-bit address"); -module_param_named(snb_b2b_usd_bar5_addr32, - snb_b2b_usd_addr.bar5_addr32, ullong, 0644); -MODULE_PARM_DESC(snb_b2b_usd_bar2_addr64, - "SNB B2B USD split-BAR 5 32-bit address"); +module_param_named(xeon_b2b_usd_bar5_addr32, + xeon_b2b_usd_addr.bar5_addr32, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, + "XEON B2B USD split-BAR 5 32-bit address"); -module_param_named(snb_b2b_dsd_bar2_addr64, - snb_b2b_dsd_addr.bar2_addr64, ullong, 0644); -MODULE_PARM_DESC(snb_b2b_dsd_bar2_addr64, - "SNB B2B DSD BAR 2 64-bit address"); +module_param_named(xeon_b2b_dsd_bar2_addr64, + xeon_b2b_dsd_addr.bar2_addr64, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, + "XEON B2B DSD BAR 2 64-bit address"); -module_param_named(snb_b2b_dsd_bar4_addr64, - snb_b2b_dsd_addr.bar4_addr64, ullong, 0644); -MODULE_PARM_DESC(snb_b2b_dsd_bar2_addr64, - "SNB B2B DSD BAR 4 64-bit address"); +module_param_named(xeon_b2b_dsd_bar4_addr64, + xeon_b2b_dsd_addr.bar4_addr64, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, + "XEON B2B DSD BAR 4 64-bit address"); -module_param_named(snb_b2b_dsd_bar4_addr32, - snb_b2b_dsd_addr.bar4_addr32, ullong, 0644); -MODULE_PARM_DESC(snb_b2b_dsd_bar2_addr64, - "SNB B2B DSD split-BAR 4 32-bit address"); +module_param_named(xeon_b2b_dsd_bar4_addr32, + xeon_b2b_dsd_addr.bar4_addr32, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, + "XEON B2B DSD split-BAR 4 32-bit address"); -module_param_named(snb_b2b_dsd_bar5_addr32, - snb_b2b_dsd_addr.bar5_addr32, ullong, 0644); -MODULE_PARM_DESC(snb_b2b_dsd_bar2_addr64, - "SNB B2B DSD split-BAR 5 32-bit address"); +module_param_named(xeon_b2b_dsd_bar5_addr32, + xeon_b2b_dsd_addr.bar5_addr32, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, + "XEON B2B DSD split-BAR 5 32-bit address"); #ifndef ioread64 #ifdef readq @@ -174,7 +174,7 @@ static inline void _iowrite64(u64 val, void __iomem *mmio) #endif #endif -static inline int pdev_is_bwd(struct pci_dev *pdev) +static inline int pdev_is_atom(struct pci_dev *pdev) { switch (pdev->device) { case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD: @@ -183,7 +183,7 @@ static inline int pdev_is_bwd(struct pci_dev *pdev) return 0; } -static inline int pdev_is_snb(struct pci_dev *pdev) +static inline int pdev_is_xeon(struct pci_dev *pdev) { switch (pdev->device) { case PCI_DEVICE_ID_INTEL_NTB_SS_JSF: @@ -636,70 +636,70 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, off += scnprintf(buf + off, buf_size - off, "LMT45 -\t\t\t%#018llx\n", u.v64); - if (pdev_is_snb(ndev->ntb.pdev)) { + if (pdev_is_xeon(ndev->ntb.pdev)) { if (ntb_topo_is_b2b(ndev->ntb.topo)) { off += scnprintf(buf + off, buf_size - off, "\nNTB Outgoing B2B XLAT:\n"); - u.v64 = ioread64(mmio + SNB_PBAR23XLAT_OFFSET); + u.v64 = ioread64(mmio + XEON_PBAR23XLAT_OFFSET); off += scnprintf(buf + off, buf_size - off, "B2B XLAT23 -\t\t%#018llx\n", u.v64); - u.v64 = ioread64(mmio + SNB_PBAR45XLAT_OFFSET); + u.v64 = ioread64(mmio + XEON_PBAR45XLAT_OFFSET); off += scnprintf(buf + off, buf_size - off, "B2B XLAT45 -\t\t%#018llx\n", u.v64); - u.v64 = ioread64(mmio + SNB_PBAR23LMT_OFFSET); + u.v64 = ioread64(mmio + XEON_PBAR23LMT_OFFSET); off += scnprintf(buf + off, buf_size - off, "B2B LMT23 -\t\t%#018llx\n", u.v64); - u.v64 = ioread64(mmio + SNB_PBAR45LMT_OFFSET); + u.v64 = ioread64(mmio + XEON_PBAR45LMT_OFFSET); off += scnprintf(buf + off, buf_size - off, "B2B LMT45 -\t\t%#018llx\n", u.v64); off += scnprintf(buf + off, buf_size - off, "\nNTB Secondary BAR:\n"); - u.v64 = ioread64(mmio + SNB_SBAR0BASE_OFFSET); + u.v64 = ioread64(mmio + XEON_SBAR0BASE_OFFSET); off += scnprintf(buf + off, buf_size - off, "SBAR01 -\t\t%#018llx\n", u.v64); - u.v64 = ioread64(mmio + SNB_SBAR23BASE_OFFSET); + u.v64 = ioread64(mmio + XEON_SBAR23BASE_OFFSET); off += scnprintf(buf + off, buf_size - off, "SBAR23 -\t\t%#018llx\n", u.v64); - u.v64 = ioread64(mmio + SNB_SBAR45BASE_OFFSET); + u.v64 = ioread64(mmio + XEON_SBAR45BASE_OFFSET); off += scnprintf(buf + off, buf_size - off, "SBAR45 -\t\t%#018llx\n", u.v64); } off += scnprintf(buf + off, buf_size - off, - "\nSNB NTB Statistics:\n"); + "\nXEON NTB Statistics:\n"); - u.v16 = ioread16(mmio + SNB_USMEMMISS_OFFSET); + u.v16 = ioread16(mmio + XEON_USMEMMISS_OFFSET); off += scnprintf(buf + off, buf_size - off, "Upstream Memory Miss -\t%u\n", u.v16); off += scnprintf(buf + off, buf_size - off, - "\nSNB NTB Hardware Errors:\n"); + "\nXEON NTB Hardware Errors:\n"); if (!pci_read_config_word(ndev->ntb.pdev, - SNB_DEVSTS_OFFSET, &u.v16)) + XEON_DEVSTS_OFFSET, &u.v16)) off += scnprintf(buf + off, buf_size - off, "DEVSTS -\t\t%#06x\n", u.v16); if (!pci_read_config_word(ndev->ntb.pdev, - SNB_LINK_STATUS_OFFSET, &u.v16)) + XEON_LINK_STATUS_OFFSET, &u.v16)) off += scnprintf(buf + off, buf_size - off, "LNKSTS -\t\t%#06x\n", u.v16); if (!pci_read_config_dword(ndev->ntb.pdev, - SNB_UNCERRSTS_OFFSET, &u.v32)) + XEON_UNCERRSTS_OFFSET, &u.v32)) off += scnprintf(buf + off, buf_size - off, "UNCERRSTS -\t\t%#06x\n", u.v32); if (!pci_read_config_dword(ndev->ntb.pdev, - SNB_CORERRSTS_OFFSET, &u.v32)) + XEON_CORERRSTS_OFFSET, &u.v32)) off += scnprintf(buf + off, buf_size - off, "CORERRSTS -\t\t%#06x\n", u.v32); } @@ -1091,67 +1091,67 @@ static int intel_ntb_peer_spad_write(struct ntb_dev *ntb, ndev->peer_reg->spad); } -/* BWD */ +/* ATOM */ -static u64 bwd_db_ioread(void __iomem *mmio) +static u64 atom_db_ioread(void __iomem *mmio) { return ioread64(mmio); } -static void bwd_db_iowrite(u64 bits, void __iomem *mmio) +static void atom_db_iowrite(u64 bits, void __iomem *mmio) { iowrite64(bits, mmio); } -static int bwd_poll_link(struct intel_ntb_dev *ndev) +static int atom_poll_link(struct intel_ntb_dev *ndev) { u32 ntb_ctl; - ntb_ctl = ioread32(ndev->self_mmio + BWD_NTBCNTL_OFFSET); + ntb_ctl = ioread32(ndev->self_mmio + ATOM_NTBCNTL_OFFSET); if (ntb_ctl == ndev->ntb_ctl) return 0; ndev->ntb_ctl = ntb_ctl; - ndev->lnk_sta = ioread32(ndev->self_mmio + BWD_LINK_STATUS_OFFSET); + ndev->lnk_sta = ioread32(ndev->self_mmio + ATOM_LINK_STATUS_OFFSET); return 1; } -static int bwd_link_is_up(struct intel_ntb_dev *ndev) +static int atom_link_is_up(struct intel_ntb_dev *ndev) { - return BWD_NTB_CTL_ACTIVE(ndev->ntb_ctl); + return ATOM_NTB_CTL_ACTIVE(ndev->ntb_ctl); } -static int bwd_link_is_err(struct intel_ntb_dev *ndev) +static int atom_link_is_err(struct intel_ntb_dev *ndev) { - if (ioread32(ndev->self_mmio + BWD_LTSSMSTATEJMP_OFFSET) - & BWD_LTSSMSTATEJMP_FORCEDETECT) + if (ioread32(ndev->self_mmio + ATOM_LTSSMSTATEJMP_OFFSET) + & ATOM_LTSSMSTATEJMP_FORCEDETECT) return 1; - if (ioread32(ndev->self_mmio + BWD_IBSTERRRCRVSTS0_OFFSET) - & BWD_IBIST_ERR_OFLOW) + if (ioread32(ndev->self_mmio + ATOM_IBSTERRRCRVSTS0_OFFSET) + & ATOM_IBIST_ERR_OFLOW) return 1; return 0; } -static inline enum ntb_topo bwd_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd) +static inline enum ntb_topo atom_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd) { - switch (ppd & BWD_PPD_TOPO_MASK) { - case BWD_PPD_TOPO_B2B_USD: + switch (ppd & ATOM_PPD_TOPO_MASK) { + case ATOM_PPD_TOPO_B2B_USD: dev_dbg(ndev_dev(ndev), "PPD %d B2B USD\n", ppd); return NTB_TOPO_B2B_USD; - case BWD_PPD_TOPO_B2B_DSD: + case ATOM_PPD_TOPO_B2B_DSD: dev_dbg(ndev_dev(ndev), "PPD %d B2B DSD\n", ppd); return NTB_TOPO_B2B_DSD; - case BWD_PPD_TOPO_PRI_USD: - case BWD_PPD_TOPO_PRI_DSD: /* accept bogus PRI_DSD */ - case BWD_PPD_TOPO_SEC_USD: - case BWD_PPD_TOPO_SEC_DSD: /* accept bogus SEC_DSD */ + case ATOM_PPD_TOPO_PRI_USD: + case ATOM_PPD_TOPO_PRI_DSD: /* accept bogus PRI_DSD */ + case ATOM_PPD_TOPO_SEC_USD: + case ATOM_PPD_TOPO_SEC_DSD: /* accept bogus SEC_DSD */ dev_dbg(ndev_dev(ndev), "PPD %d non B2B disabled\n", ppd); return NTB_TOPO_NONE; } @@ -1160,28 +1160,28 @@ static inline enum ntb_topo bwd_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd) return NTB_TOPO_NONE; } -static void bwd_link_hb(struct work_struct *work) +static void atom_link_hb(struct work_struct *work) { struct intel_ntb_dev *ndev = hb_ndev(work); unsigned long poll_ts; void __iomem *mmio; u32 status32; - poll_ts = ndev->last_ts + BWD_LINK_HB_TIMEOUT; + poll_ts = ndev->last_ts + ATOM_LINK_HB_TIMEOUT; /* Delay polling the link status if an interrupt was received, * unless the cached link status says the link is down. */ - if (time_after(poll_ts, jiffies) && bwd_link_is_up(ndev)) { + if (time_after(poll_ts, jiffies) && atom_link_is_up(ndev)) { schedule_delayed_work(&ndev->hb_timer, poll_ts - jiffies); return; } - if (bwd_poll_link(ndev)) + if (atom_poll_link(ndev)) ntb_link_event(&ndev->ntb); - if (bwd_link_is_up(ndev) || !bwd_link_is_err(ndev)) { - schedule_delayed_work(&ndev->hb_timer, BWD_LINK_HB_TIMEOUT); + if (atom_link_is_up(ndev) || !atom_link_is_err(ndev)) { + schedule_delayed_work(&ndev->hb_timer, ATOM_LINK_HB_TIMEOUT); return; } @@ -1190,91 +1190,91 @@ static void bwd_link_hb(struct work_struct *work) mmio = ndev->self_mmio; /* Driver resets the NTB ModPhy lanes - magic! */ - iowrite8(0xe0, mmio + BWD_MODPHY_PCSREG6); - iowrite8(0x40, mmio + BWD_MODPHY_PCSREG4); - iowrite8(0x60, mmio + BWD_MODPHY_PCSREG4); - iowrite8(0x60, mmio + BWD_MODPHY_PCSREG6); + iowrite8(0xe0, mmio + ATOM_MODPHY_PCSREG6); + iowrite8(0x40, mmio + ATOM_MODPHY_PCSREG4); + iowrite8(0x60, mmio + ATOM_MODPHY_PCSREG4); + iowrite8(0x60, mmio + ATOM_MODPHY_PCSREG6); /* Driver waits 100ms to allow the NTB ModPhy to settle */ msleep(100); /* Clear AER Errors, write to clear */ - status32 = ioread32(mmio + BWD_ERRCORSTS_OFFSET); + status32 = ioread32(mmio + ATOM_ERRCORSTS_OFFSET); dev_dbg(ndev_dev(ndev), "ERRCORSTS = %x\n", status32); status32 &= PCI_ERR_COR_REP_ROLL; - iowrite32(status32, mmio + BWD_ERRCORSTS_OFFSET); + iowrite32(status32, mmio + ATOM_ERRCORSTS_OFFSET); /* Clear unexpected electrical idle event in LTSSM, write to clear */ - status32 = ioread32(mmio + BWD_LTSSMERRSTS0_OFFSET); + status32 = ioread32(mmio + ATOM_LTSSMERRSTS0_OFFSET); dev_dbg(ndev_dev(ndev), "LTSSMERRSTS0 = %x\n", status32); - status32 |= BWD_LTSSMERRSTS0_UNEXPECTEDEI; - iowrite32(status32, mmio + BWD_LTSSMERRSTS0_OFFSET); + status32 |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI; + iowrite32(status32, mmio + ATOM_LTSSMERRSTS0_OFFSET); /* Clear DeSkew Buffer error, write to clear */ - status32 = ioread32(mmio + BWD_DESKEWSTS_OFFSET); + status32 = ioread32(mmio + ATOM_DESKEWSTS_OFFSET); dev_dbg(ndev_dev(ndev), "DESKEWSTS = %x\n", status32); - status32 |= BWD_DESKEWSTS_DBERR; - iowrite32(status32, mmio + BWD_DESKEWSTS_OFFSET); + status32 |= ATOM_DESKEWSTS_DBERR; + iowrite32(status32, mmio + ATOM_DESKEWSTS_OFFSET); - status32 = ioread32(mmio + BWD_IBSTERRRCRVSTS0_OFFSET); + status32 = ioread32(mmio + ATOM_IBSTERRRCRVSTS0_OFFSET); dev_dbg(ndev_dev(ndev), "IBSTERRRCRVSTS0 = %x\n", status32); - status32 &= BWD_IBIST_ERR_OFLOW; - iowrite32(status32, mmio + BWD_IBSTERRRCRVSTS0_OFFSET); + status32 &= ATOM_IBIST_ERR_OFLOW; + iowrite32(status32, mmio + ATOM_IBSTERRRCRVSTS0_OFFSET); /* Releases the NTB state machine to allow the link to retrain */ - status32 = ioread32(mmio + BWD_LTSSMSTATEJMP_OFFSET); + status32 = ioread32(mmio + ATOM_LTSSMSTATEJMP_OFFSET); dev_dbg(ndev_dev(ndev), "LTSSMSTATEJMP = %x\n", status32); - status32 &= ~BWD_LTSSMSTATEJMP_FORCEDETECT; - iowrite32(status32, mmio + BWD_LTSSMSTATEJMP_OFFSET); + status32 &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT; + iowrite32(status32, mmio + ATOM_LTSSMSTATEJMP_OFFSET); /* There is a potential race between the 2 NTB devices recovering at the * same time. If the times are the same, the link will not recover and * the driver will be stuck in this loop forever. Add a random interval * to the recovery time to prevent this race. */ - schedule_delayed_work(&ndev->hb_timer, BWD_LINK_RECOVERY_TIME - + prandom_u32() % BWD_LINK_RECOVERY_TIME); + schedule_delayed_work(&ndev->hb_timer, ATOM_LINK_RECOVERY_TIME + + prandom_u32() % ATOM_LINK_RECOVERY_TIME); } -static int bwd_init_isr(struct intel_ntb_dev *ndev) +static int atom_init_isr(struct intel_ntb_dev *ndev) { int rc; - rc = ndev_init_isr(ndev, 1, BWD_DB_MSIX_VECTOR_COUNT, - BWD_DB_MSIX_VECTOR_SHIFT, BWD_DB_TOTAL_SHIFT); + rc = ndev_init_isr(ndev, 1, ATOM_DB_MSIX_VECTOR_COUNT, + ATOM_DB_MSIX_VECTOR_SHIFT, ATOM_DB_TOTAL_SHIFT); if (rc) return rc; - /* BWD doesn't have link status interrupt, poll on that platform */ + /* ATOM doesn't have link status interrupt, poll on that platform */ ndev->last_ts = jiffies; - INIT_DELAYED_WORK(&ndev->hb_timer, bwd_link_hb); - schedule_delayed_work(&ndev->hb_timer, BWD_LINK_HB_TIMEOUT); + INIT_DELAYED_WORK(&ndev->hb_timer, atom_link_hb); + schedule_delayed_work(&ndev->hb_timer, ATOM_LINK_HB_TIMEOUT); return 0; } -static void bwd_deinit_isr(struct intel_ntb_dev *ndev) +static void atom_deinit_isr(struct intel_ntb_dev *ndev) { cancel_delayed_work_sync(&ndev->hb_timer); ndev_deinit_isr(ndev); } -static int bwd_init_ntb(struct intel_ntb_dev *ndev) +static int atom_init_ntb(struct intel_ntb_dev *ndev) { - ndev->mw_count = BWD_MW_COUNT; - ndev->spad_count = BWD_SPAD_COUNT; - ndev->db_count = BWD_DB_COUNT; + ndev->mw_count = ATOM_MW_COUNT; + ndev->spad_count = ATOM_SPAD_COUNT; + ndev->db_count = ATOM_DB_COUNT; switch (ndev->ntb.topo) { case NTB_TOPO_B2B_USD: case NTB_TOPO_B2B_DSD: - ndev->self_reg = &bwd_pri_reg; - ndev->peer_reg = &bwd_b2b_reg; - ndev->xlat_reg = &bwd_sec_xlat; + ndev->self_reg = &atom_pri_reg; + ndev->peer_reg = &atom_b2b_reg; + ndev->xlat_reg = &atom_sec_xlat; /* Enable Bus Master and Memory Space on the secondary side */ iowrite16(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER, - ndev->self_mmio + BWD_SPCICMD_OFFSET); + ndev->self_mmio + ATOM_SPCICMD_OFFSET); break; @@ -1287,31 +1287,31 @@ static int bwd_init_ntb(struct intel_ntb_dev *ndev) return 0; } -static int bwd_init_dev(struct intel_ntb_dev *ndev) +static int atom_init_dev(struct intel_ntb_dev *ndev) { u32 ppd; int rc; - rc = pci_read_config_dword(ndev->ntb.pdev, BWD_PPD_OFFSET, &ppd); + rc = pci_read_config_dword(ndev->ntb.pdev, ATOM_PPD_OFFSET, &ppd); if (rc) return -EIO; - ndev->ntb.topo = bwd_ppd_topo(ndev, ppd); + ndev->ntb.topo = atom_ppd_topo(ndev, ppd); if (ndev->ntb.topo == NTB_TOPO_NONE) return -EINVAL; - rc = bwd_init_ntb(ndev); + rc = atom_init_ntb(ndev); if (rc) return rc; - rc = bwd_init_isr(ndev); + rc = atom_init_isr(ndev); if (rc) return rc; if (ndev->ntb.topo != NTB_TOPO_SEC) { /* Initiate PCI-E link training */ - rc = pci_write_config_dword(ndev->ntb.pdev, BWD_PPD_OFFSET, - ppd | BWD_PPD_INIT_LINK); + rc = pci_write_config_dword(ndev->ntb.pdev, ATOM_PPD_OFFSET, + ppd | ATOM_PPD_INIT_LINK); if (rc) return rc; } @@ -1319,24 +1319,24 @@ static int bwd_init_dev(struct intel_ntb_dev *ndev) return 0; } -static void bwd_deinit_dev(struct intel_ntb_dev *ndev) +static void atom_deinit_dev(struct intel_ntb_dev *ndev) { - bwd_deinit_isr(ndev); + atom_deinit_isr(ndev); } -/* SNB */ +/* XEON */ -static u64 snb_db_ioread(void __iomem *mmio) +static u64 xeon_db_ioread(void __iomem *mmio) { return (u64)ioread16(mmio); } -static void snb_db_iowrite(u64 bits, void __iomem *mmio) +static void xeon_db_iowrite(u64 bits, void __iomem *mmio) { iowrite16((u16)bits, mmio); } -static int snb_poll_link(struct intel_ntb_dev *ndev) +static int xeon_poll_link(struct intel_ntb_dev *ndev) { u16 reg_val; int rc; @@ -1346,7 +1346,7 @@ static int snb_poll_link(struct intel_ntb_dev *ndev) ndev->self_reg->db_bell); rc = pci_read_config_word(ndev->ntb.pdev, - SNB_LINK_STATUS_OFFSET, ®_val); + XEON_LINK_STATUS_OFFSET, ®_val); if (rc) return 0; @@ -1358,7 +1358,7 @@ static int snb_poll_link(struct intel_ntb_dev *ndev) return 1; } -static int snb_link_is_up(struct intel_ntb_dev *ndev) +static int xeon_link_is_up(struct intel_ntb_dev *ndev) { if (ndev->ntb.topo == NTB_TOPO_SEC) return 1; @@ -1366,52 +1366,52 @@ static int snb_link_is_up(struct intel_ntb_dev *ndev) return NTB_LNK_STA_ACTIVE(ndev->lnk_sta); } -static inline enum ntb_topo snb_ppd_topo(struct intel_ntb_dev *ndev, u8 ppd) +static inline enum ntb_topo xeon_ppd_topo(struct intel_ntb_dev *ndev, u8 ppd) { - switch (ppd & SNB_PPD_TOPO_MASK) { - case SNB_PPD_TOPO_B2B_USD: + switch (ppd & XEON_PPD_TOPO_MASK) { + case XEON_PPD_TOPO_B2B_USD: return NTB_TOPO_B2B_USD; - case SNB_PPD_TOPO_B2B_DSD: + case XEON_PPD_TOPO_B2B_DSD: return NTB_TOPO_B2B_DSD; - case SNB_PPD_TOPO_PRI_USD: - case SNB_PPD_TOPO_PRI_DSD: /* accept bogus PRI_DSD */ + case XEON_PPD_TOPO_PRI_USD: + case XEON_PPD_TOPO_PRI_DSD: /* accept bogus PRI_DSD */ return NTB_TOPO_PRI; - case SNB_PPD_TOPO_SEC_USD: - case SNB_PPD_TOPO_SEC_DSD: /* accept bogus SEC_DSD */ + case XEON_PPD_TOPO_SEC_USD: + case XEON_PPD_TOPO_SEC_DSD: /* accept bogus SEC_DSD */ return NTB_TOPO_SEC; } return NTB_TOPO_NONE; } -static inline int snb_ppd_bar4_split(struct intel_ntb_dev *ndev, u8 ppd) +static inline int xeon_ppd_bar4_split(struct intel_ntb_dev *ndev, u8 ppd) { - if (ppd & SNB_PPD_SPLIT_BAR_MASK) { + if (ppd & XEON_PPD_SPLIT_BAR_MASK) { dev_dbg(ndev_dev(ndev), "PPD %d split bar\n", ppd); return 1; } return 0; } -static int snb_init_isr(struct intel_ntb_dev *ndev) +static int xeon_init_isr(struct intel_ntb_dev *ndev) { - return ndev_init_isr(ndev, SNB_DB_MSIX_VECTOR_COUNT, - SNB_DB_MSIX_VECTOR_COUNT, - SNB_DB_MSIX_VECTOR_SHIFT, - SNB_DB_TOTAL_SHIFT); + return ndev_init_isr(ndev, XEON_DB_MSIX_VECTOR_COUNT, + XEON_DB_MSIX_VECTOR_COUNT, + XEON_DB_MSIX_VECTOR_SHIFT, + XEON_DB_TOTAL_SHIFT); } -static void snb_deinit_isr(struct intel_ntb_dev *ndev) +static void xeon_deinit_isr(struct intel_ntb_dev *ndev) { ndev_deinit_isr(ndev); } -static int snb_setup_b2b_mw(struct intel_ntb_dev *ndev, - const struct intel_b2b_addr *addr, - const struct intel_b2b_addr *peer_addr) +static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev, + const struct intel_b2b_addr *addr, + const struct intel_b2b_addr *peer_addr) { struct pci_dev *pdev; void __iomem *mmio; @@ -1438,11 +1438,11 @@ static int snb_setup_b2b_mw(struct intel_ntb_dev *ndev, dev_dbg(ndev_dev(ndev), "b2b bar size %#llx\n", bar_size); - if (b2b_mw_share && SNB_B2B_MIN_SIZE <= bar_size >> 1) { + if (b2b_mw_share && XEON_B2B_MIN_SIZE <= bar_size >> 1) { dev_dbg(ndev_dev(ndev), "b2b using first half of bar\n"); ndev->b2b_off = bar_size >> 1; - } else if (SNB_B2B_MIN_SIZE <= bar_size) { + } else if (XEON_B2B_MIN_SIZE <= bar_size) { dev_dbg(ndev_dev(ndev), "b2b using whole bar\n"); ndev->b2b_off = 0; @@ -1460,7 +1460,7 @@ static int snb_setup_b2b_mw(struct intel_ntb_dev *ndev, * Note: code for each specific bar size register, because the register * offsets are not in a consistent order (bar5sz comes after ppd, odd). */ - pci_read_config_byte(pdev, SNB_PBAR23SZ_OFFSET, &bar_sz); + pci_read_config_byte(pdev, XEON_PBAR23SZ_OFFSET, &bar_sz); dev_dbg(ndev_dev(ndev), "PBAR23SZ %#x\n", bar_sz); if (b2b_bar == 2) { if (ndev->b2b_off) @@ -1468,12 +1468,12 @@ static int snb_setup_b2b_mw(struct intel_ntb_dev *ndev, else bar_sz = 0; } - pci_write_config_byte(pdev, SNB_SBAR23SZ_OFFSET, bar_sz); - pci_read_config_byte(pdev, SNB_SBAR23SZ_OFFSET, &bar_sz); + pci_write_config_byte(pdev, XEON_SBAR23SZ_OFFSET, bar_sz); + pci_read_config_byte(pdev, XEON_SBAR23SZ_OFFSET, &bar_sz); dev_dbg(ndev_dev(ndev), "SBAR23SZ %#x\n", bar_sz); if (!ndev->bar4_split) { - pci_read_config_byte(pdev, SNB_PBAR45SZ_OFFSET, &bar_sz); + pci_read_config_byte(pdev, XEON_PBAR45SZ_OFFSET, &bar_sz); dev_dbg(ndev_dev(ndev), "PBAR45SZ %#x\n", bar_sz); if (b2b_bar == 4) { if (ndev->b2b_off) @@ -1481,11 +1481,11 @@ static int snb_setup_b2b_mw(struct intel_ntb_dev *ndev, else bar_sz = 0; } - pci_write_config_byte(pdev, SNB_SBAR45SZ_OFFSET, bar_sz); - pci_read_config_byte(pdev, SNB_SBAR45SZ_OFFSET, &bar_sz); + pci_write_config_byte(pdev, XEON_SBAR45SZ_OFFSET, bar_sz); + pci_read_config_byte(pdev, XEON_SBAR45SZ_OFFSET, &bar_sz); dev_dbg(ndev_dev(ndev), "SBAR45SZ %#x\n", bar_sz); } else { - pci_read_config_byte(pdev, SNB_PBAR4SZ_OFFSET, &bar_sz); + pci_read_config_byte(pdev, XEON_PBAR4SZ_OFFSET, &bar_sz); dev_dbg(ndev_dev(ndev), "PBAR4SZ %#x\n", bar_sz); if (b2b_bar == 4) { if (ndev->b2b_off) @@ -1493,11 +1493,11 @@ static int snb_setup_b2b_mw(struct intel_ntb_dev *ndev, else bar_sz = 0; } - pci_write_config_byte(pdev, SNB_SBAR4SZ_OFFSET, bar_sz); - pci_read_config_byte(pdev, SNB_SBAR4SZ_OFFSET, &bar_sz); + pci_write_config_byte(pdev, XEON_SBAR4SZ_OFFSET, bar_sz); + pci_read_config_byte(pdev, XEON_SBAR4SZ_OFFSET, &bar_sz); dev_dbg(ndev_dev(ndev), "SBAR4SZ %#x\n", bar_sz); - pci_read_config_byte(pdev, SNB_PBAR5SZ_OFFSET, &bar_sz); + pci_read_config_byte(pdev, XEON_PBAR5SZ_OFFSET, &bar_sz); dev_dbg(ndev_dev(ndev), "PBAR5SZ %#x\n", bar_sz); if (b2b_bar == 5) { if (ndev->b2b_off) @@ -1505,8 +1505,8 @@ static int snb_setup_b2b_mw(struct intel_ntb_dev *ndev, else bar_sz = 0; } - pci_write_config_byte(pdev, SNB_SBAR5SZ_OFFSET, bar_sz); - pci_read_config_byte(pdev, SNB_SBAR5SZ_OFFSET, &bar_sz); + pci_write_config_byte(pdev, XEON_SBAR5SZ_OFFSET, bar_sz); + pci_read_config_byte(pdev, XEON_SBAR5SZ_OFFSET, &bar_sz); dev_dbg(ndev_dev(ndev), "SBAR5SZ %#x\n", bar_sz); } @@ -1525,7 +1525,7 @@ static int snb_setup_b2b_mw(struct intel_ntb_dev *ndev, return -EIO; dev_dbg(ndev_dev(ndev), "SBAR01 %#018llx\n", bar_addr); - iowrite64(bar_addr, mmio + SNB_SBAR0BASE_OFFSET); + iowrite64(bar_addr, mmio + XEON_SBAR0BASE_OFFSET); /* Other SBAR are normally hit by the PBAR xlat, except for b2b bar. * The b2b bar is either disabled above, or configured half-size, and @@ -1533,96 +1533,96 @@ static int snb_setup_b2b_mw(struct intel_ntb_dev *ndev, */ bar_addr = addr->bar2_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0); - iowrite64(bar_addr, mmio + SNB_SBAR23BASE_OFFSET); - bar_addr = ioread64(mmio + SNB_SBAR23BASE_OFFSET); + iowrite64(bar_addr, mmio + XEON_SBAR23BASE_OFFSET); + bar_addr = ioread64(mmio + XEON_SBAR23BASE_OFFSET); dev_dbg(ndev_dev(ndev), "SBAR23 %#018llx\n", bar_addr); if (!ndev->bar4_split) { bar_addr = addr->bar4_addr64 + (b2b_bar == 4 ? ndev->b2b_off : 0); - iowrite64(bar_addr, mmio + SNB_SBAR45BASE_OFFSET); - bar_addr = ioread64(mmio + SNB_SBAR45BASE_OFFSET); + iowrite64(bar_addr, mmio + XEON_SBAR45BASE_OFFSET); + bar_addr = ioread64(mmio + XEON_SBAR45BASE_OFFSET); dev_dbg(ndev_dev(ndev), "SBAR45 %#018llx\n", bar_addr); } else { bar_addr = addr->bar4_addr32 + (b2b_bar == 4 ? ndev->b2b_off : 0); - iowrite32(bar_addr, mmio + SNB_SBAR4BASE_OFFSET); - bar_addr = ioread32(mmio + SNB_SBAR4BASE_OFFSET); + iowrite32(bar_addr, mmio + XEON_SBAR4BASE_OFFSET); + bar_addr = ioread32(mmio + XEON_SBAR4BASE_OFFSET); dev_dbg(ndev_dev(ndev), "SBAR4 %#010llx\n", bar_addr); bar_addr = addr->bar5_addr32 + (b2b_bar == 5 ? ndev->b2b_off : 0); - iowrite32(bar_addr, mmio + SNB_SBAR5BASE_OFFSET); - bar_addr = ioread32(mmio + SNB_SBAR5BASE_OFFSET); + iowrite32(bar_addr, mmio + XEON_SBAR5BASE_OFFSET); + bar_addr = ioread32(mmio + XEON_SBAR5BASE_OFFSET); dev_dbg(ndev_dev(ndev), "SBAR5 %#010llx\n", bar_addr); } /* setup incoming bar limits == base addrs (zero length windows) */ bar_addr = addr->bar2_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0); - iowrite64(bar_addr, mmio + SNB_SBAR23LMT_OFFSET); - bar_addr = ioread64(mmio + SNB_SBAR23LMT_OFFSET); + iowrite64(bar_addr, mmio + XEON_SBAR23LMT_OFFSET); + bar_addr = ioread64(mmio + XEON_SBAR23LMT_OFFSET); dev_dbg(ndev_dev(ndev), "SBAR23LMT %#018llx\n", bar_addr); if (!ndev->bar4_split) { bar_addr = addr->bar4_addr64 + (b2b_bar == 4 ? ndev->b2b_off : 0); - iowrite64(bar_addr, mmio + SNB_SBAR45LMT_OFFSET); - bar_addr = ioread64(mmio + SNB_SBAR45LMT_OFFSET); + iowrite64(bar_addr, mmio + XEON_SBAR45LMT_OFFSET); + bar_addr = ioread64(mmio + XEON_SBAR45LMT_OFFSET); dev_dbg(ndev_dev(ndev), "SBAR45LMT %#018llx\n", bar_addr); } else { bar_addr = addr->bar4_addr32 + (b2b_bar == 4 ? ndev->b2b_off : 0); - iowrite32(bar_addr, mmio + SNB_SBAR4LMT_OFFSET); - bar_addr = ioread32(mmio + SNB_SBAR4LMT_OFFSET); + iowrite32(bar_addr, mmio + XEON_SBAR4LMT_OFFSET); + bar_addr = ioread32(mmio + XEON_SBAR4LMT_OFFSET); dev_dbg(ndev_dev(ndev), "SBAR4LMT %#010llx\n", bar_addr); bar_addr = addr->bar5_addr32 + (b2b_bar == 5 ? ndev->b2b_off : 0); - iowrite32(bar_addr, mmio + SNB_SBAR5LMT_OFFSET); - bar_addr = ioread32(mmio + SNB_SBAR5LMT_OFFSET); + iowrite32(bar_addr, mmio + XEON_SBAR5LMT_OFFSET); + bar_addr = ioread32(mmio + XEON_SBAR5LMT_OFFSET); dev_dbg(ndev_dev(ndev), "SBAR5LMT %#05llx\n", bar_addr); } /* zero incoming translation addrs */ - iowrite64(0, mmio + SNB_SBAR23XLAT_OFFSET); + iowrite64(0, mmio + XEON_SBAR23XLAT_OFFSET); if (!ndev->bar4_split) { - iowrite64(0, mmio + SNB_SBAR45XLAT_OFFSET); + iowrite64(0, mmio + XEON_SBAR45XLAT_OFFSET); } else { - iowrite32(0, mmio + SNB_SBAR4XLAT_OFFSET); - iowrite32(0, mmio + SNB_SBAR5XLAT_OFFSET); + iowrite32(0, mmio + XEON_SBAR4XLAT_OFFSET); + iowrite32(0, mmio + XEON_SBAR5XLAT_OFFSET); } /* zero outgoing translation limits (whole bar size windows) */ - iowrite64(0, mmio + SNB_PBAR23LMT_OFFSET); + iowrite64(0, mmio + XEON_PBAR23LMT_OFFSET); if (!ndev->bar4_split) { - iowrite64(0, mmio + SNB_PBAR45LMT_OFFSET); + iowrite64(0, mmio + XEON_PBAR45LMT_OFFSET); } else { - iowrite32(0, mmio + SNB_PBAR4LMT_OFFSET); - iowrite32(0, mmio + SNB_PBAR5LMT_OFFSET); + iowrite32(0, mmio + XEON_PBAR4LMT_OFFSET); + iowrite32(0, mmio + XEON_PBAR5LMT_OFFSET); } /* set outgoing translation offsets */ bar_addr = peer_addr->bar2_addr64; - iowrite64(bar_addr, mmio + SNB_PBAR23XLAT_OFFSET); - bar_addr = ioread64(mmio + SNB_PBAR23XLAT_OFFSET); + iowrite64(bar_addr, mmio + XEON_PBAR23XLAT_OFFSET); + bar_addr = ioread64(mmio + XEON_PBAR23XLAT_OFFSET); dev_dbg(ndev_dev(ndev), "PBAR23XLAT %#018llx\n", bar_addr); if (!ndev->bar4_split) { bar_addr = peer_addr->bar4_addr64; - iowrite64(bar_addr, mmio + SNB_PBAR45XLAT_OFFSET); - bar_addr = ioread64(mmio + SNB_PBAR45XLAT_OFFSET); + iowrite64(bar_addr, mmio + XEON_PBAR45XLAT_OFFSET); + bar_addr = ioread64(mmio + XEON_PBAR45XLAT_OFFSET); dev_dbg(ndev_dev(ndev), "PBAR45XLAT %#018llx\n", bar_addr); } else { bar_addr = peer_addr->bar4_addr32; - iowrite32(bar_addr, mmio + SNB_PBAR4XLAT_OFFSET); - bar_addr = ioread32(mmio + SNB_PBAR4XLAT_OFFSET); + iowrite32(bar_addr, mmio + XEON_PBAR4XLAT_OFFSET); + bar_addr = ioread32(mmio + XEON_PBAR4XLAT_OFFSET); dev_dbg(ndev_dev(ndev), "PBAR4XLAT %#010llx\n", bar_addr); bar_addr = peer_addr->bar5_addr32; - iowrite32(bar_addr, mmio + SNB_PBAR5XLAT_OFFSET); - bar_addr = ioread32(mmio + SNB_PBAR5XLAT_OFFSET); + iowrite32(bar_addr, mmio + XEON_PBAR5XLAT_OFFSET); + bar_addr = ioread32(mmio + XEON_PBAR5XLAT_OFFSET); dev_dbg(ndev_dev(ndev), "PBAR5XLAT %#010llx\n", bar_addr); } @@ -1642,13 +1642,13 @@ static int snb_setup_b2b_mw(struct intel_ntb_dev *ndev, /* B2B_XLAT_OFFSET is 64bit, but can only take 32bit writes */ dev_dbg(ndev_dev(ndev), "B2BXLAT %#018llx\n", bar_addr); - iowrite32(bar_addr, mmio + SNB_B2B_XLAT_OFFSETL); - iowrite32(bar_addr >> 32, mmio + SNB_B2B_XLAT_OFFSETU); + iowrite32(bar_addr, mmio + XEON_B2B_XLAT_OFFSETL); + iowrite32(bar_addr >> 32, mmio + XEON_B2B_XLAT_OFFSETU); if (b2b_bar) { /* map peer ntb mmio config space registers */ ndev->peer_mmio = pci_iomap(pdev, b2b_bar, - SNB_B2B_MIN_SIZE); + XEON_B2B_MIN_SIZE); if (!ndev->peer_mmio) return -EIO; } @@ -1656,7 +1656,7 @@ static int snb_setup_b2b_mw(struct intel_ntb_dev *ndev, return 0; } -static int snb_init_ntb(struct intel_ntb_dev *ndev) +static int xeon_init_ntb(struct intel_ntb_dev *ndev) { int rc; u32 ntb_ctl; @@ -1664,11 +1664,11 @@ static int snb_init_ntb(struct intel_ntb_dev *ndev) if (ndev->bar4_split) ndev->mw_count = HSX_SPLIT_BAR_MW_COUNT; else - ndev->mw_count = SNB_MW_COUNT; + ndev->mw_count = XEON_MW_COUNT; - ndev->spad_count = SNB_SPAD_COUNT; - ndev->db_count = SNB_DB_COUNT; - ndev->db_link_mask = SNB_DB_LINK_BIT; + ndev->spad_count = XEON_SPAD_COUNT; + ndev->db_count = XEON_DB_COUNT; + ndev->db_link_mask = XEON_DB_LINK_BIT; switch (ndev->ntb.topo) { case NTB_TOPO_PRI: @@ -1684,9 +1684,9 @@ static int snb_init_ntb(struct intel_ntb_dev *ndev) /* use half the spads for the peer */ ndev->spad_count >>= 1; - ndev->self_reg = &snb_pri_reg; - ndev->peer_reg = &snb_sec_reg; - ndev->xlat_reg = &snb_sec_xlat; + ndev->self_reg = &xeon_pri_reg; + ndev->peer_reg = &xeon_sec_reg; + ndev->xlat_reg = &xeon_sec_xlat; break; case NTB_TOPO_SEC: @@ -1696,19 +1696,19 @@ static int snb_init_ntb(struct intel_ntb_dev *ndev) } /* use half the spads for the peer */ ndev->spad_count >>= 1; - ndev->self_reg = &snb_sec_reg; - ndev->peer_reg = &snb_pri_reg; - ndev->xlat_reg = &snb_pri_xlat; + ndev->self_reg = &xeon_sec_reg; + ndev->peer_reg = &xeon_pri_reg; + ndev->xlat_reg = &xeon_pri_xlat; break; case NTB_TOPO_B2B_USD: case NTB_TOPO_B2B_DSD: - ndev->self_reg = &snb_pri_reg; - ndev->peer_reg = &snb_b2b_reg; - ndev->xlat_reg = &snb_sec_xlat; + ndev->self_reg = &xeon_pri_reg; + ndev->peer_reg = &xeon_b2b_reg; + ndev->xlat_reg = &xeon_sec_xlat; if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) { - ndev->peer_reg = &snb_pri_reg; + ndev->peer_reg = &xeon_pri_reg; if (b2b_mw_idx < 0) ndev->b2b_idx = b2b_mw_idx + ndev->mw_count; @@ -1725,20 +1725,20 @@ static int snb_init_ntb(struct intel_ntb_dev *ndev) } if (ndev->ntb.topo == NTB_TOPO_B2B_USD) { - rc = snb_setup_b2b_mw(ndev, - &snb_b2b_dsd_addr, - &snb_b2b_usd_addr); + rc = xeon_setup_b2b_mw(ndev, + &xeon_b2b_dsd_addr, + &xeon_b2b_usd_addr); } else { - rc = snb_setup_b2b_mw(ndev, - &snb_b2b_usd_addr, - &snb_b2b_dsd_addr); + rc = xeon_setup_b2b_mw(ndev, + &xeon_b2b_usd_addr, + &xeon_b2b_dsd_addr); } if (rc) return rc; /* Enable Bus Master and Memory Space on the secondary side */ iowrite16(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER, - ndev->self_mmio + SNB_SPCICMD_OFFSET); + ndev->self_mmio + XEON_SPCICMD_OFFSET); break; @@ -1755,7 +1755,7 @@ static int snb_init_ntb(struct intel_ntb_dev *ndev) return 0; } -static int snb_init_dev(struct intel_ntb_dev *ndev) +static int xeon_init_dev(struct intel_ntb_dev *ndev) { struct pci_dev *pdev; u8 ppd; @@ -1821,20 +1821,20 @@ static int snb_init_dev(struct intel_ntb_dev *ndev) break; } - ndev->reg = &snb_reg; + ndev->reg = &xeon_reg; - rc = pci_read_config_byte(pdev, SNB_PPD_OFFSET, &ppd); + rc = pci_read_config_byte(pdev, XEON_PPD_OFFSET, &ppd); if (rc) return -EIO; - ndev->ntb.topo = snb_ppd_topo(ndev, ppd); + ndev->ntb.topo = xeon_ppd_topo(ndev, ppd); dev_dbg(ndev_dev(ndev), "ppd %#x topo %s\n", ppd, ntb_topo_string(ndev->ntb.topo)); if (ndev->ntb.topo == NTB_TOPO_NONE) return -EINVAL; if (ndev->ntb.topo != NTB_TOPO_SEC) { - ndev->bar4_split = snb_ppd_bar4_split(ndev, ppd); + ndev->bar4_split = xeon_ppd_bar4_split(ndev, ppd); dev_dbg(ndev_dev(ndev), "ppd %#x bar4_split %d\n", ppd, ndev->bar4_split); } else { @@ -1849,16 +1849,16 @@ static int snb_init_dev(struct intel_ntb_dev *ndev) mem, ndev->bar4_split); } - rc = snb_init_ntb(ndev); + rc = xeon_init_ntb(ndev); if (rc) return rc; - return snb_init_isr(ndev); + return xeon_init_isr(ndev); } -static void snb_deinit_dev(struct intel_ntb_dev *ndev) +static void xeon_deinit_dev(struct intel_ntb_dev *ndev) { - snb_deinit_isr(ndev); + xeon_deinit_isr(ndev); } static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev) @@ -1963,7 +1963,7 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, node = dev_to_node(&pdev->dev); - if (pdev_is_bwd(pdev)) { + if (pdev_is_atom(pdev)) { ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node); if (!ndev) { rc = -ENOMEM; @@ -1976,11 +1976,11 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, if (rc) goto err_init_pci; - rc = bwd_init_dev(ndev); + rc = atom_init_dev(ndev); if (rc) goto err_init_dev; - } else if (pdev_is_snb(pdev)) { + } else if (pdev_is_xeon(pdev)) { ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node); if (!ndev) { rc = -ENOMEM; @@ -1993,7 +1993,7 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, if (rc) goto err_init_pci; - rc = snb_init_dev(ndev); + rc = xeon_init_dev(ndev); if (rc) goto err_init_dev; @@ -2016,10 +2016,10 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, err_register: ndev_deinit_debugfs(ndev); - if (pdev_is_bwd(pdev)) - bwd_deinit_dev(ndev); - else if (pdev_is_snb(pdev)) - snb_deinit_dev(ndev); + if (pdev_is_atom(pdev)) + atom_deinit_dev(ndev); + else if (pdev_is_xeon(pdev)) + xeon_deinit_dev(ndev); err_init_dev: intel_ntb_deinit_pci(ndev); err_init_pci: @@ -2034,70 +2034,70 @@ static void intel_ntb_pci_remove(struct pci_dev *pdev) ntb_unregister_device(&ndev->ntb); ndev_deinit_debugfs(ndev); - if (pdev_is_bwd(pdev)) - bwd_deinit_dev(ndev); - else if (pdev_is_snb(pdev)) - snb_deinit_dev(ndev); + if (pdev_is_atom(pdev)) + atom_deinit_dev(ndev); + else if (pdev_is_xeon(pdev)) + xeon_deinit_dev(ndev); intel_ntb_deinit_pci(ndev); kfree(ndev); } -static const struct intel_ntb_reg bwd_reg = { - .poll_link = bwd_poll_link, - .link_is_up = bwd_link_is_up, - .db_ioread = bwd_db_ioread, - .db_iowrite = bwd_db_iowrite, +static const struct intel_ntb_reg atom_reg = { + .poll_link = atom_poll_link, + .link_is_up = atom_link_is_up, + .db_ioread = atom_db_ioread, + .db_iowrite = atom_db_iowrite, .db_size = sizeof(u64), - .ntb_ctl = BWD_NTBCNTL_OFFSET, + .ntb_ctl = ATOM_NTBCNTL_OFFSET, .mw_bar = {2, 4}, }; -static const struct intel_ntb_alt_reg bwd_pri_reg = { - .db_bell = BWD_PDOORBELL_OFFSET, - .db_mask = BWD_PDBMSK_OFFSET, - .spad = BWD_SPAD_OFFSET, +static const struct intel_ntb_alt_reg atom_pri_reg = { + .db_bell = ATOM_PDOORBELL_OFFSET, + .db_mask = ATOM_PDBMSK_OFFSET, + .spad = ATOM_SPAD_OFFSET, }; -static const struct intel_ntb_alt_reg bwd_b2b_reg = { - .db_bell = BWD_B2B_DOORBELL_OFFSET, - .spad = BWD_B2B_SPAD_OFFSET, +static const struct intel_ntb_alt_reg atom_b2b_reg = { + .db_bell = ATOM_B2B_DOORBELL_OFFSET, + .spad = ATOM_B2B_SPAD_OFFSET, }; -static const struct intel_ntb_xlat_reg bwd_sec_xlat = { - /* FIXME : .bar0_base = BWD_SBAR0BASE_OFFSET, */ - /* FIXME : .bar2_limit = BWD_SBAR2LMT_OFFSET, */ - .bar2_xlat = BWD_SBAR2XLAT_OFFSET, +static const struct intel_ntb_xlat_reg atom_sec_xlat = { + /* FIXME : .bar0_base = ATOM_SBAR0BASE_OFFSET, */ + /* FIXME : .bar2_limit = ATOM_SBAR2LMT_OFFSET, */ + .bar2_xlat = ATOM_SBAR2XLAT_OFFSET, }; -static const struct intel_ntb_reg snb_reg = { - .poll_link = snb_poll_link, - .link_is_up = snb_link_is_up, - .db_ioread = snb_db_ioread, - .db_iowrite = snb_db_iowrite, +static const struct intel_ntb_reg xeon_reg = { + .poll_link = xeon_poll_link, + .link_is_up = xeon_link_is_up, + .db_ioread = xeon_db_ioread, + .db_iowrite = xeon_db_iowrite, .db_size = sizeof(u32), - .ntb_ctl = SNB_NTBCNTL_OFFSET, + .ntb_ctl = XEON_NTBCNTL_OFFSET, .mw_bar = {2, 4, 5}, }; -static const struct intel_ntb_alt_reg snb_pri_reg = { - .db_bell = SNB_PDOORBELL_OFFSET, - .db_mask = SNB_PDBMSK_OFFSET, - .spad = SNB_SPAD_OFFSET, +static const struct intel_ntb_alt_reg xeon_pri_reg = { + .db_bell = XEON_PDOORBELL_OFFSET, + .db_mask = XEON_PDBMSK_OFFSET, + .spad = XEON_SPAD_OFFSET, }; -static const struct intel_ntb_alt_reg snb_sec_reg = { - .db_bell = SNB_SDOORBELL_OFFSET, - .db_mask = SNB_SDBMSK_OFFSET, +static const struct intel_ntb_alt_reg xeon_sec_reg = { + .db_bell = XEON_SDOORBELL_OFFSET, + .db_mask = XEON_SDBMSK_OFFSET, /* second half of the scratchpads */ - .spad = SNB_SPAD_OFFSET + (SNB_SPAD_COUNT << 1), + .spad = XEON_SPAD_OFFSET + (XEON_SPAD_COUNT << 1), }; -static const struct intel_ntb_alt_reg snb_b2b_reg = { - .db_bell = SNB_B2B_DOORBELL_OFFSET, - .spad = SNB_B2B_SPAD_OFFSET, +static const struct intel_ntb_alt_reg xeon_b2b_reg = { + .db_bell = XEON_B2B_DOORBELL_OFFSET, + .spad = XEON_B2B_SPAD_OFFSET, }; -static const struct intel_ntb_xlat_reg snb_pri_xlat = { +static const struct intel_ntb_xlat_reg xeon_pri_xlat = { /* Note: no primary .bar0_base visible to the secondary side. * * The secondary side cannot get the base address stored in primary @@ -2108,28 +2108,28 @@ static const struct intel_ntb_xlat_reg snb_pri_xlat = { * window by setting the limit equal to base, nor can it limit the size * of the memory window by setting the limit to base + size. */ - .bar2_limit = SNB_PBAR23LMT_OFFSET, - .bar2_xlat = SNB_PBAR23XLAT_OFFSET, + .bar2_limit = XEON_PBAR23LMT_OFFSET, + .bar2_xlat = XEON_PBAR23XLAT_OFFSET, }; -static const struct intel_ntb_xlat_reg snb_sec_xlat = { - .bar0_base = SNB_SBAR0BASE_OFFSET, - .bar2_limit = SNB_SBAR23LMT_OFFSET, - .bar2_xlat = SNB_SBAR23XLAT_OFFSET, +static const struct intel_ntb_xlat_reg xeon_sec_xlat = { + .bar0_base = XEON_SBAR0BASE_OFFSET, + .bar2_limit = XEON_SBAR23LMT_OFFSET, + .bar2_xlat = XEON_SBAR23XLAT_OFFSET, }; -static struct intel_b2b_addr snb_b2b_usd_addr = { - .bar2_addr64 = SNB_B2B_BAR2_USD_ADDR64, - .bar4_addr64 = SNB_B2B_BAR4_USD_ADDR64, - .bar4_addr32 = SNB_B2B_BAR4_USD_ADDR32, - .bar5_addr32 = SNB_B2B_BAR5_USD_ADDR32, +static struct intel_b2b_addr xeon_b2b_usd_addr = { + .bar2_addr64 = XEON_B2B_BAR2_USD_ADDR64, + .bar4_addr64 = XEON_B2B_BAR4_USD_ADDR64, + .bar4_addr32 = XEON_B2B_BAR4_USD_ADDR32, + .bar5_addr32 = XEON_B2B_BAR5_USD_ADDR32, }; -static struct intel_b2b_addr snb_b2b_dsd_addr = { - .bar2_addr64 = SNB_B2B_BAR2_DSD_ADDR64, - .bar4_addr64 = SNB_B2B_BAR4_DSD_ADDR64, - .bar4_addr32 = SNB_B2B_BAR4_DSD_ADDR32, - .bar5_addr32 = SNB_B2B_BAR5_DSD_ADDR32, +static struct intel_b2b_addr xeon_b2b_dsd_addr = { + .bar2_addr64 = XEON_B2B_BAR2_DSD_ADDR64, + .bar4_addr64 = XEON_B2B_BAR4_DSD_ADDR64, + .bar4_addr32 = XEON_B2B_BAR4_DSD_ADDR32, + .bar5_addr32 = XEON_B2B_BAR5_DSD_ADDR32, }; /* operations for primary side of local ntb */ diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h index fec689dc95cf..7ddaf387b679 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.h +++ b/drivers/ntb/hw/intel/ntb_hw_intel.h @@ -68,141 +68,141 @@ #define PCI_DEVICE_ID_INTEL_NTB_SS_HSX 0x2F0F #define PCI_DEVICE_ID_INTEL_NTB_B2B_BWD 0x0C4E -/* SNB hardware (and JSF, IVT, HSX) */ +/* Intel Xeon hardware */ -#define SNB_PBAR23LMT_OFFSET 0x0000 -#define SNB_PBAR45LMT_OFFSET 0x0008 -#define SNB_PBAR4LMT_OFFSET 0x0008 -#define SNB_PBAR5LMT_OFFSET 0x000c -#define SNB_PBAR23XLAT_OFFSET 0x0010 -#define SNB_PBAR45XLAT_OFFSET 0x0018 -#define SNB_PBAR4XLAT_OFFSET 0x0018 -#define SNB_PBAR5XLAT_OFFSET 0x001c -#define SNB_SBAR23LMT_OFFSET 0x0020 -#define SNB_SBAR45LMT_OFFSET 0x0028 -#define SNB_SBAR4LMT_OFFSET 0x0028 -#define SNB_SBAR5LMT_OFFSET 0x002c -#define SNB_SBAR23XLAT_OFFSET 0x0030 -#define SNB_SBAR45XLAT_OFFSET 0x0038 -#define SNB_SBAR4XLAT_OFFSET 0x0038 -#define SNB_SBAR5XLAT_OFFSET 0x003c -#define SNB_SBAR0BASE_OFFSET 0x0040 -#define SNB_SBAR23BASE_OFFSET 0x0048 -#define SNB_SBAR45BASE_OFFSET 0x0050 -#define SNB_SBAR4BASE_OFFSET 0x0050 -#define SNB_SBAR5BASE_OFFSET 0x0054 -#define SNB_SBDF_OFFSET 0x005c -#define SNB_NTBCNTL_OFFSET 0x0058 -#define SNB_PDOORBELL_OFFSET 0x0060 -#define SNB_PDBMSK_OFFSET 0x0062 -#define SNB_SDOORBELL_OFFSET 0x0064 -#define SNB_SDBMSK_OFFSET 0x0066 -#define SNB_USMEMMISS_OFFSET 0x0070 -#define SNB_SPAD_OFFSET 0x0080 -#define SNB_PBAR23SZ_OFFSET 0x00d0 -#define SNB_PBAR45SZ_OFFSET 0x00d1 -#define SNB_PBAR4SZ_OFFSET 0x00d1 -#define SNB_SBAR23SZ_OFFSET 0x00d2 -#define SNB_SBAR45SZ_OFFSET 0x00d3 -#define SNB_SBAR4SZ_OFFSET 0x00d3 -#define SNB_PPD_OFFSET 0x00d4 -#define SNB_PBAR5SZ_OFFSET 0x00d5 -#define SNB_SBAR5SZ_OFFSET 0x00d6 -#define SNB_WCCNTRL_OFFSET 0x00e0 -#define SNB_UNCERRSTS_OFFSET 0x014c -#define SNB_CORERRSTS_OFFSET 0x0158 -#define SNB_LINK_STATUS_OFFSET 0x01a2 -#define SNB_SPCICMD_OFFSET 0x0504 -#define SNB_DEVCTRL_OFFSET 0x0598 -#define SNB_DEVSTS_OFFSET 0x059a -#define SNB_SLINK_STATUS_OFFSET 0x05a2 -#define SNB_B2B_SPAD_OFFSET 0x0100 -#define SNB_B2B_DOORBELL_OFFSET 0x0140 -#define SNB_B2B_XLAT_OFFSETL 0x0144 -#define SNB_B2B_XLAT_OFFSETU 0x0148 -#define SNB_PPD_CONN_MASK 0x03 -#define SNB_PPD_CONN_TRANSPARENT 0x00 -#define SNB_PPD_CONN_B2B 0x01 -#define SNB_PPD_CONN_RP 0x02 -#define SNB_PPD_DEV_MASK 0x10 -#define SNB_PPD_DEV_USD 0x00 -#define SNB_PPD_DEV_DSD 0x10 -#define SNB_PPD_SPLIT_BAR_MASK 0x40 +#define XEON_PBAR23LMT_OFFSET 0x0000 +#define XEON_PBAR45LMT_OFFSET 0x0008 +#define XEON_PBAR4LMT_OFFSET 0x0008 +#define XEON_PBAR5LMT_OFFSET 0x000c +#define XEON_PBAR23XLAT_OFFSET 0x0010 +#define XEON_PBAR45XLAT_OFFSET 0x0018 +#define XEON_PBAR4XLAT_OFFSET 0x0018 +#define XEON_PBAR5XLAT_OFFSET 0x001c +#define XEON_SBAR23LMT_OFFSET 0x0020 +#define XEON_SBAR45LMT_OFFSET 0x0028 +#define XEON_SBAR4LMT_OFFSET 0x0028 +#define XEON_SBAR5LMT_OFFSET 0x002c +#define XEON_SBAR23XLAT_OFFSET 0x0030 +#define XEON_SBAR45XLAT_OFFSET 0x0038 +#define XEON_SBAR4XLAT_OFFSET 0x0038 +#define XEON_SBAR5XLAT_OFFSET 0x003c +#define XEON_SBAR0BASE_OFFSET 0x0040 +#define XEON_SBAR23BASE_OFFSET 0x0048 +#define XEON_SBAR45BASE_OFFSET 0x0050 +#define XEON_SBAR4BASE_OFFSET 0x0050 +#define XEON_SBAR5BASE_OFFSET 0x0054 +#define XEON_SBDF_OFFSET 0x005c +#define XEON_NTBCNTL_OFFSET 0x0058 +#define XEON_PDOORBELL_OFFSET 0x0060 +#define XEON_PDBMSK_OFFSET 0x0062 +#define XEON_SDOORBELL_OFFSET 0x0064 +#define XEON_SDBMSK_OFFSET 0x0066 +#define XEON_USMEMMISS_OFFSET 0x0070 +#define XEON_SPAD_OFFSET 0x0080 +#define XEON_PBAR23SZ_OFFSET 0x00d0 +#define XEON_PBAR45SZ_OFFSET 0x00d1 +#define XEON_PBAR4SZ_OFFSET 0x00d1 +#define XEON_SBAR23SZ_OFFSET 0x00d2 +#define XEON_SBAR45SZ_OFFSET 0x00d3 +#define XEON_SBAR4SZ_OFFSET 0x00d3 +#define XEON_PPD_OFFSET 0x00d4 +#define XEON_PBAR5SZ_OFFSET 0x00d5 +#define XEON_SBAR5SZ_OFFSET 0x00d6 +#define XEON_WCCNTRL_OFFSET 0x00e0 +#define XEON_UNCERRSTS_OFFSET 0x014c +#define XEON_CORERRSTS_OFFSET 0x0158 +#define XEON_LINK_STATUS_OFFSET 0x01a2 +#define XEON_SPCICMD_OFFSET 0x0504 +#define XEON_DEVCTRL_OFFSET 0x0598 +#define XEON_DEVSTS_OFFSET 0x059a +#define XEON_SLINK_STATUS_OFFSET 0x05a2 +#define XEON_B2B_SPAD_OFFSET 0x0100 +#define XEON_B2B_DOORBELL_OFFSET 0x0140 +#define XEON_B2B_XLAT_OFFSETL 0x0144 +#define XEON_B2B_XLAT_OFFSETU 0x0148 +#define XEON_PPD_CONN_MASK 0x03 +#define XEON_PPD_CONN_TRANSPARENT 0x00 +#define XEON_PPD_CONN_B2B 0x01 +#define XEON_PPD_CONN_RP 0x02 +#define XEON_PPD_DEV_MASK 0x10 +#define XEON_PPD_DEV_USD 0x00 +#define XEON_PPD_DEV_DSD 0x10 +#define XEON_PPD_SPLIT_BAR_MASK 0x40 -#define SNB_PPD_TOPO_MASK (SNB_PPD_CONN_MASK | SNB_PPD_DEV_MASK) -#define SNB_PPD_TOPO_PRI_USD (SNB_PPD_CONN_RP | SNB_PPD_DEV_USD) -#define SNB_PPD_TOPO_PRI_DSD (SNB_PPD_CONN_RP | SNB_PPD_DEV_DSD) -#define SNB_PPD_TOPO_SEC_USD (SNB_PPD_CONN_TRANSPARENT | SNB_PPD_DEV_USD) -#define SNB_PPD_TOPO_SEC_DSD (SNB_PPD_CONN_TRANSPARENT | SNB_PPD_DEV_DSD) -#define SNB_PPD_TOPO_B2B_USD (SNB_PPD_CONN_B2B | SNB_PPD_DEV_USD) -#define SNB_PPD_TOPO_B2B_DSD (SNB_PPD_CONN_B2B | SNB_PPD_DEV_DSD) +#define XEON_PPD_TOPO_MASK (XEON_PPD_CONN_MASK | XEON_PPD_DEV_MASK) +#define XEON_PPD_TOPO_PRI_USD (XEON_PPD_CONN_RP | XEON_PPD_DEV_USD) +#define XEON_PPD_TOPO_PRI_DSD (XEON_PPD_CONN_RP | XEON_PPD_DEV_DSD) +#define XEON_PPD_TOPO_SEC_USD (XEON_PPD_CONN_TRANSPARENT | XEON_PPD_DEV_USD) +#define XEON_PPD_TOPO_SEC_DSD (XEON_PPD_CONN_TRANSPARENT | XEON_PPD_DEV_DSD) +#define XEON_PPD_TOPO_B2B_USD (XEON_PPD_CONN_B2B | XEON_PPD_DEV_USD) +#define XEON_PPD_TOPO_B2B_DSD (XEON_PPD_CONN_B2B | XEON_PPD_DEV_DSD) -#define SNB_MW_COUNT 2 +#define XEON_MW_COUNT 2 #define HSX_SPLIT_BAR_MW_COUNT 3 -#define SNB_DB_COUNT 15 -#define SNB_DB_LINK 15 -#define SNB_DB_LINK_BIT BIT_ULL(SNB_DB_LINK) -#define SNB_DB_MSIX_VECTOR_COUNT 4 -#define SNB_DB_MSIX_VECTOR_SHIFT 5 -#define SNB_DB_TOTAL_SHIFT 16 -#define SNB_SPAD_COUNT 16 +#define XEON_DB_COUNT 15 +#define XEON_DB_LINK 15 +#define XEON_DB_LINK_BIT BIT_ULL(XEON_DB_LINK) +#define XEON_DB_MSIX_VECTOR_COUNT 4 +#define XEON_DB_MSIX_VECTOR_SHIFT 5 +#define XEON_DB_TOTAL_SHIFT 16 +#define XEON_SPAD_COUNT 16 -/* BWD hardware */ +/* Intel Atom hardware */ -#define BWD_SBAR2XLAT_OFFSET 0x0008 -#define BWD_PDOORBELL_OFFSET 0x0020 -#define BWD_PDBMSK_OFFSET 0x0028 -#define BWD_NTBCNTL_OFFSET 0x0060 -#define BWD_SPAD_OFFSET 0x0080 -#define BWD_PPD_OFFSET 0x00d4 -#define BWD_PBAR2XLAT_OFFSET 0x8008 -#define BWD_B2B_DOORBELL_OFFSET 0x8020 -#define BWD_B2B_SPAD_OFFSET 0x8080 -#define BWD_SPCICMD_OFFSET 0xb004 -#define BWD_LINK_STATUS_OFFSET 0xb052 -#define BWD_ERRCORSTS_OFFSET 0xb110 -#define BWD_IP_BASE 0xc000 -#define BWD_DESKEWSTS_OFFSET (BWD_IP_BASE + 0x3024) -#define BWD_LTSSMERRSTS0_OFFSET (BWD_IP_BASE + 0x3180) -#define BWD_LTSSMSTATEJMP_OFFSET (BWD_IP_BASE + 0x3040) -#define BWD_IBSTERRRCRVSTS0_OFFSET (BWD_IP_BASE + 0x3324) -#define BWD_MODPHY_PCSREG4 0x1c004 -#define BWD_MODPHY_PCSREG6 0x1c006 +#define ATOM_SBAR2XLAT_OFFSET 0x0008 +#define ATOM_PDOORBELL_OFFSET 0x0020 +#define ATOM_PDBMSK_OFFSET 0x0028 +#define ATOM_NTBCNTL_OFFSET 0x0060 +#define ATOM_SPAD_OFFSET 0x0080 +#define ATOM_PPD_OFFSET 0x00d4 +#define ATOM_PBAR2XLAT_OFFSET 0x8008 +#define ATOM_B2B_DOORBELL_OFFSET 0x8020 +#define ATOM_B2B_SPAD_OFFSET 0x8080 +#define ATOM_SPCICMD_OFFSET 0xb004 +#define ATOM_LINK_STATUS_OFFSET 0xb052 +#define ATOM_ERRCORSTS_OFFSET 0xb110 +#define ATOM_IP_BASE 0xc000 +#define ATOM_DESKEWSTS_OFFSET (ATOM_IP_BASE + 0x3024) +#define ATOM_LTSSMERRSTS0_OFFSET (ATOM_IP_BASE + 0x3180) +#define ATOM_LTSSMSTATEJMP_OFFSET (ATOM_IP_BASE + 0x3040) +#define ATOM_IBSTERRRCRVSTS0_OFFSET (ATOM_IP_BASE + 0x3324) +#define ATOM_MODPHY_PCSREG4 0x1c004 +#define ATOM_MODPHY_PCSREG6 0x1c006 -#define BWD_PPD_INIT_LINK 0x0008 -#define BWD_PPD_CONN_MASK 0x0300 -#define BWD_PPD_CONN_TRANSPARENT 0x0000 -#define BWD_PPD_CONN_B2B 0x0100 -#define BWD_PPD_CONN_RP 0x0200 -#define BWD_PPD_DEV_MASK 0x1000 -#define BWD_PPD_DEV_USD 0x0000 -#define BWD_PPD_DEV_DSD 0x1000 -#define BWD_PPD_TOPO_MASK (BWD_PPD_CONN_MASK | BWD_PPD_DEV_MASK) -#define BWD_PPD_TOPO_PRI_USD (BWD_PPD_CONN_TRANSPARENT | BWD_PPD_DEV_USD) -#define BWD_PPD_TOPO_PRI_DSD (BWD_PPD_CONN_TRANSPARENT | BWD_PPD_DEV_DSD) -#define BWD_PPD_TOPO_SEC_USD (BWD_PPD_CONN_RP | BWD_PPD_DEV_USD) -#define BWD_PPD_TOPO_SEC_DSD (BWD_PPD_CONN_RP | BWD_PPD_DEV_DSD) -#define BWD_PPD_TOPO_B2B_USD (BWD_PPD_CONN_B2B | BWD_PPD_DEV_USD) -#define BWD_PPD_TOPO_B2B_DSD (BWD_PPD_CONN_B2B | BWD_PPD_DEV_DSD) +#define ATOM_PPD_INIT_LINK 0x0008 +#define ATOM_PPD_CONN_MASK 0x0300 +#define ATOM_PPD_CONN_TRANSPARENT 0x0000 +#define ATOM_PPD_CONN_B2B 0x0100 +#define ATOM_PPD_CONN_RP 0x0200 +#define ATOM_PPD_DEV_MASK 0x1000 +#define ATOM_PPD_DEV_USD 0x0000 +#define ATOM_PPD_DEV_DSD 0x1000 +#define ATOM_PPD_TOPO_MASK (ATOM_PPD_CONN_MASK | ATOM_PPD_DEV_MASK) +#define ATOM_PPD_TOPO_PRI_USD (ATOM_PPD_CONN_TRANSPARENT | ATOM_PPD_DEV_USD) +#define ATOM_PPD_TOPO_PRI_DSD (ATOM_PPD_CONN_TRANSPARENT | ATOM_PPD_DEV_DSD) +#define ATOM_PPD_TOPO_SEC_USD (ATOM_PPD_CONN_RP | ATOM_PPD_DEV_USD) +#define ATOM_PPD_TOPO_SEC_DSD (ATOM_PPD_CONN_RP | ATOM_PPD_DEV_DSD) +#define ATOM_PPD_TOPO_B2B_USD (ATOM_PPD_CONN_B2B | ATOM_PPD_DEV_USD) +#define ATOM_PPD_TOPO_B2B_DSD (ATOM_PPD_CONN_B2B | ATOM_PPD_DEV_DSD) -#define BWD_MW_COUNT 2 -#define BWD_DB_COUNT 34 -#define BWD_DB_VALID_MASK (BIT_ULL(BWD_DB_COUNT) - 1) -#define BWD_DB_MSIX_VECTOR_COUNT 34 -#define BWD_DB_MSIX_VECTOR_SHIFT 1 -#define BWD_DB_TOTAL_SHIFT 34 -#define BWD_SPAD_COUNT 16 +#define ATOM_MW_COUNT 2 +#define ATOM_DB_COUNT 34 +#define ATOM_DB_VALID_MASK (BIT_ULL(ATOM_DB_COUNT) - 1) +#define ATOM_DB_MSIX_VECTOR_COUNT 34 +#define ATOM_DB_MSIX_VECTOR_SHIFT 1 +#define ATOM_DB_TOTAL_SHIFT 34 +#define ATOM_SPAD_COUNT 16 -#define BWD_NTB_CTL_DOWN_BIT BIT(16) -#define BWD_NTB_CTL_ACTIVE(x) !(x & BWD_NTB_CTL_DOWN_BIT) +#define ATOM_NTB_CTL_DOWN_BIT BIT(16) +#define ATOM_NTB_CTL_ACTIVE(x) !(x & ATOM_NTB_CTL_DOWN_BIT) -#define BWD_DESKEWSTS_DBERR BIT(15) -#define BWD_LTSSMERRSTS0_UNEXPECTEDEI BIT(20) -#define BWD_LTSSMSTATEJMP_FORCEDETECT BIT(2) -#define BWD_IBIST_ERR_OFLOW 0x7FFF7FFF +#define ATOM_DESKEWSTS_DBERR BIT(15) +#define ATOM_LTSSMERRSTS0_UNEXPECTEDEI BIT(20) +#define ATOM_LTSSMSTATEJMP_FORCEDETECT BIT(2) +#define ATOM_IBIST_ERR_OFLOW 0x7FFF7FFF -#define BWD_LINK_HB_TIMEOUT msecs_to_jiffies(1000) -#define BWD_LINK_RECOVERY_TIME msecs_to_jiffies(500) +#define ATOM_LINK_HB_TIMEOUT msecs_to_jiffies(1000) +#define ATOM_LINK_RECOVERY_TIME msecs_to_jiffies(500) /* Ntb control and link status */ @@ -224,19 +224,19 @@ /* Use the following addresses for translation between b2b ntb devices in case * the hardware default values are not reliable. */ -#define SNB_B2B_BAR0_USD_ADDR 0x1000000000000000ull -#define SNB_B2B_BAR2_USD_ADDR64 0x2000000000000000ull -#define SNB_B2B_BAR4_USD_ADDR64 0x4000000000000000ull -#define SNB_B2B_BAR4_USD_ADDR32 0x20000000u -#define SNB_B2B_BAR5_USD_ADDR32 0x40000000u -#define SNB_B2B_BAR0_DSD_ADDR 0x9000000000000000ull -#define SNB_B2B_BAR2_DSD_ADDR64 0xa000000000000000ull -#define SNB_B2B_BAR4_DSD_ADDR64 0xc000000000000000ull -#define SNB_B2B_BAR4_DSD_ADDR32 0xa0000000u -#define SNB_B2B_BAR5_DSD_ADDR32 0xc0000000u +#define XEON_B2B_BAR0_USD_ADDR 0x1000000000000000ull +#define XEON_B2B_BAR2_USD_ADDR64 0x2000000000000000ull +#define XEON_B2B_BAR4_USD_ADDR64 0x4000000000000000ull +#define XEON_B2B_BAR4_USD_ADDR32 0x20000000u +#define XEON_B2B_BAR5_USD_ADDR32 0x40000000u +#define XEON_B2B_BAR0_DSD_ADDR 0x9000000000000000ull +#define XEON_B2B_BAR2_DSD_ADDR64 0xa000000000000000ull +#define XEON_B2B_BAR4_DSD_ADDR64 0xc000000000000000ull +#define XEON_B2B_BAR4_DSD_ADDR32 0xa0000000u +#define XEON_B2B_BAR5_DSD_ADDR32 0xc0000000u /* The peer ntb secondary config space is 32KB fixed size */ -#define SNB_B2B_MIN_SIZE 0x8000 +#define XEON_B2B_MIN_SIZE 0x8000 /* flags to indicate hardware errata */ #define NTB_HWERR_SDOORBELL_LOCKUP BIT_ULL(0) From 9891417de8a816de60faea8a2ff7a04c4b3cf3a1 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 3 Jun 2015 11:29:38 -0400 Subject: [PATCH 609/839] NTB: Increase transport MTU to 64k from 16k Benchmarking showed a significant performance increase with the MTU size to 64k instead of 16k. Change the driver default to 64k. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index e07b056af3be..1551493197b4 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -76,7 +76,7 @@ static unsigned long max_mw_size; module_param(max_mw_size, ulong, 0644); MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows"); -static unsigned int transport_mtu = 0x401E; +static unsigned int transport_mtu = 0x10000; module_param(transport_mtu, uint, 0644); MODULE_PARM_DESC(transport_mtu, "Maximum size of NTB transport packets"); From 7eb387813db6730a5793cd53077b4039d948d17f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 15 Jun 2015 08:21:33 -0400 Subject: [PATCH 610/839] NTB: Print driver name and version in module init Printouts driver name and version to indicate what is being loaded. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 4 ++++ drivers/ntb/ntb_transport.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index eeaa1e8f8bd8..70924e5c0c72 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -2012,6 +2012,8 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, if (rc) goto err_register; + dev_info(&pdev->dev, "NTB device registered.\n"); + return 0; err_register: @@ -2192,6 +2194,8 @@ static struct pci_driver intel_ntb_pci_driver = { static int __init intel_ntb_pci_driver_init(void) { + pr_info("%s %s\n", NTB_DESC, NTB_VER); + if (debugfs_initialized()) debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 1551493197b4..efe3ad4122f2 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -1958,6 +1958,8 @@ static int __init ntb_transport_init(void) { int rc; + pr_info("%s, version %s\n", NTB_TRANSPORT_DESC, NTB_TRANSPORT_VER); + if (debugfs_initialized()) nt_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); From fd839bf88485878ec602dbb3146fd44769eb3784 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 15 Jun 2015 08:22:30 -0400 Subject: [PATCH 611/839] NTB: Change WARN_ON_ONCE to pr_warn_once on unsafe The unsafe doorbell and scratchpad access should display reason when WARN is called. Otherwise we get a stack dump without any explanation. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 70924e5c0c72..ba4dc0d7e1f3 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -246,7 +246,8 @@ static inline int ndev_db_addr(struct intel_ntb_dev *ndev, phys_addr_t *db_addr, resource_size_t *db_size, phys_addr_t reg_addr, unsigned long reg) { - WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_DB)); + if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB)) + pr_warn_once("%s: NTB unsafe doorbell access", __func__); if (db_addr) { *db_addr = reg_addr + reg; @@ -264,7 +265,8 @@ static inline int ndev_db_addr(struct intel_ntb_dev *ndev, static inline u64 ndev_db_read(struct intel_ntb_dev *ndev, void __iomem *mmio) { - WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_DB)); + if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB)) + pr_warn_once("%s: NTB unsafe doorbell access", __func__); return ndev->reg->db_ioread(mmio); } @@ -272,7 +274,8 @@ static inline u64 ndev_db_read(struct intel_ntb_dev *ndev, static inline int ndev_db_write(struct intel_ntb_dev *ndev, u64 db_bits, void __iomem *mmio) { - WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_DB)); + if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB)) + pr_warn_once("%s: NTB unsafe doorbell access", __func__); if (db_bits & ~ndev->db_valid_mask) return -EINVAL; @@ -287,7 +290,8 @@ static inline int ndev_db_set_mask(struct intel_ntb_dev *ndev, u64 db_bits, { unsigned long irqflags; - WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_DB)); + if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB)) + pr_warn_once("%s: NTB unsafe doorbell access", __func__); if (db_bits & ~ndev->db_valid_mask) return -EINVAL; @@ -307,7 +311,8 @@ static inline int ndev_db_clear_mask(struct intel_ntb_dev *ndev, u64 db_bits, { unsigned long irqflags; - WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_DB)); + if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB)) + pr_warn_once("%s: NTB unsafe doorbell access", __func__); if (db_bits & ~ndev->db_valid_mask) return -EINVAL; @@ -336,7 +341,8 @@ static inline int ndev_spad_addr(struct intel_ntb_dev *ndev, int idx, phys_addr_t *spad_addr, phys_addr_t reg_addr, unsigned long reg) { - WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD)); + if (ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD)) + pr_warn_once("%s: NTB unsafe scratchpad access", __func__); if (idx < 0 || idx >= ndev->spad_count) return -EINVAL; @@ -352,7 +358,8 @@ static inline int ndev_spad_addr(struct intel_ntb_dev *ndev, int idx, static inline u32 ndev_spad_read(struct intel_ntb_dev *ndev, int idx, void __iomem *mmio) { - WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD)); + if (ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD)) + pr_warn_once("%s: NTB unsafe scratchpad access", __func__); if (idx < 0 || idx >= ndev->spad_count) return 0; @@ -363,7 +370,8 @@ static inline u32 ndev_spad_read(struct intel_ntb_dev *ndev, int idx, static inline int ndev_spad_write(struct intel_ntb_dev *ndev, int idx, u32 val, void __iomem *mmio) { - WARN_ON_ONCE(ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD)); + if (ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD)) + pr_warn_once("%s: NTB unsafe scratchpad access", __func__); if (idx < 0 || idx >= ndev->spad_count) return -EINVAL; From bf44fe4671f97ee17d178e08c2d3b346188d062b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 18 Jun 2015 05:17:30 -0400 Subject: [PATCH 612/839] NTB: Add split BAR output for debugfs stats When split BAR is enabled, the driver needs to dump out the split BAR registers rather than the original 64bit BAR registers. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 83 +++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 15 deletions(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index ba4dc0d7e1f3..87751cfd6f4f 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -632,17 +632,36 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, off += scnprintf(buf + off, buf_size - off, "XLAT23 -\t\t%#018llx\n", u.v64); - u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 4)); - off += scnprintf(buf + off, buf_size - off, - "XLAT45 -\t\t%#018llx\n", u.v64); + if (ndev->bar4_split) { + u.v32 = ioread32(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 4)); + off += scnprintf(buf + off, buf_size - off, + "XLAT4 -\t\t\t%#06x\n", u.v32); + + u.v32 = ioread32(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 5)); + off += scnprintf(buf + off, buf_size - off, + "XLAT5 -\t\t\t%#06x\n", u.v32); + } else { + u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 4)); + off += scnprintf(buf + off, buf_size - off, + "XLAT45 -\t\t%#018llx\n", u.v64); + } u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 2)); off += scnprintf(buf + off, buf_size - off, "LMT23 -\t\t\t%#018llx\n", u.v64); - u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 4)); - off += scnprintf(buf + off, buf_size - off, - "LMT45 -\t\t\t%#018llx\n", u.v64); + if (ndev->bar4_split) { + u.v32 = ioread32(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 4)); + off += scnprintf(buf + off, buf_size - off, + "LMT4 -\t\t\t%#06x\n", u.v32); + u.v32 = ioread32(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 5)); + off += scnprintf(buf + off, buf_size - off, + "LMT5 -\t\t\t%#06x\n", u.v32); + } else { + u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 4)); + off += scnprintf(buf + off, buf_size - off, + "LMT45 -\t\t\t%#018llx\n", u.v64); + } if (pdev_is_xeon(ndev->ntb.pdev)) { if (ntb_topo_is_b2b(ndev->ntb.topo)) { @@ -653,17 +672,41 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, off += scnprintf(buf + off, buf_size - off, "B2B XLAT23 -\t\t%#018llx\n", u.v64); - u.v64 = ioread64(mmio + XEON_PBAR45XLAT_OFFSET); - off += scnprintf(buf + off, buf_size - off, - "B2B XLAT45 -\t\t%#018llx\n", u.v64); + if (ndev->bar4_split) { + u.v32 = ioread32(mmio + XEON_PBAR4XLAT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B XLAT4 -\t\t%#06x\n", + u.v32); + u.v32 = ioread32(mmio + XEON_PBAR5XLAT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B XLAT5 -\t\t%#06x\n", + u.v32); + } else { + u.v64 = ioread64(mmio + XEON_PBAR45XLAT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B XLAT45 -\t\t%#018llx\n", + u.v64); + } u.v64 = ioread64(mmio + XEON_PBAR23LMT_OFFSET); off += scnprintf(buf + off, buf_size - off, "B2B LMT23 -\t\t%#018llx\n", u.v64); - u.v64 = ioread64(mmio + XEON_PBAR45LMT_OFFSET); - off += scnprintf(buf + off, buf_size - off, - "B2B LMT45 -\t\t%#018llx\n", u.v64); + if (ndev->bar4_split) { + u.v32 = ioread32(mmio + XEON_PBAR4LMT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B LMT4 -\t\t%#06x\n", + u.v32); + u.v32 = ioread32(mmio + XEON_PBAR5LMT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B LMT5 -\t\t%#06x\n", + u.v32); + } else { + u.v64 = ioread64(mmio + XEON_PBAR45LMT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B LMT45 -\t\t%#018llx\n", + u.v64); + } off += scnprintf(buf + off, buf_size - off, "\nNTB Secondary BAR:\n"); @@ -676,9 +719,19 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, off += scnprintf(buf + off, buf_size - off, "SBAR23 -\t\t%#018llx\n", u.v64); - u.v64 = ioread64(mmio + XEON_SBAR45BASE_OFFSET); - off += scnprintf(buf + off, buf_size - off, - "SBAR45 -\t\t%#018llx\n", u.v64); + if (ndev->bar4_split) { + u.v32 = ioread32(mmio + XEON_SBAR4BASE_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "SBAR4 -\t\t\t%#06x\n", u.v32); + u.v32 = ioread32(mmio + XEON_SBAR5BASE_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "SBAR5 -\t\t\t%#06x\n", u.v32); + } else { + u.v64 = ioread64(mmio + XEON_SBAR45BASE_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "SBAR45 -\t\t%#018llx\n", + u.v64); + } } off += scnprintf(buf + off, buf_size - off, From 44f4c054cae646a9296da05cdfe7d6e786f73d46 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 3 Jul 2015 10:40:38 -0400 Subject: [PATCH 613/839] dax: Add block size note to documentation For block devices which are small enough, mkfs will default to creating a filesystem with block sizes smaller than page size. Signed-off-by: Matthew Wilcox Signed-off-by: Al Viro --- Documentation/filesystems/dax.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt index baf41118660d..7af2851d667c 100644 --- a/Documentation/filesystems/dax.txt +++ b/Documentation/filesystems/dax.txt @@ -18,8 +18,10 @@ Usage ----- If you have a block device which supports DAX, you can make a filesystem -on it as usual. When mounting it, use the -o dax option manually -or add 'dax' to the options in /etc/fstab. +on it as usual. The DAX code currently only supports files with a block +size equal to your kernel's PAGE_SIZE, so you may need to specify a block +size when creating the filesystem. When mounting it, use the "-o dax" +option on the command line or add 'dax' to the options in /etc/fstab. Implementation Tips for Block Driver Writers From 872eb127e3a6cddcfca1410bb808d9b9bc773dc1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 3 Jul 2015 10:40:39 -0400 Subject: [PATCH 614/839] dax: Use copy_from_iter_nocache When userspace does a write, there's no need for the written data to pollute the CPU cache. This matches the original XIP code. Signed-off-by: Matthew Wilcox Signed-off-by: Al Viro --- fs/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index 6f65f00e58ec..159f796afa85 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -155,7 +155,7 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, } if (iov_iter_rw(iter) == WRITE) - len = copy_from_iter(addr, max - pos, iter); + len = copy_from_iter_nocache(addr, max - pos, iter); else if (!hole) len = copy_to_iter(addr, max - pos, iter); else From bbab37ddc20bae4709bca8745c128c4f46fe63c5 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 3 Jul 2015 10:40:42 -0400 Subject: [PATCH 615/839] block: Add support for DAX reads/writes to block devices If a block device supports the ->direct_access methods, bypass the normal DIO path and use DAX to go straight to memcpy() instead of allocating a DIO and a BIO. Includes support for the DIO_SKIP_DIO_COUNT flag in DAX, as is done in do_blockdev_direct_IO(). Signed-off-by: Matthew Wilcox Signed-off-by: Al Viro --- fs/block_dev.c | 4 ++++ fs/dax.c | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index c7e4163ede87..5dde6dff4940 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -151,6 +151,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + if (IS_DAX(inode)) + return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, + NULL, DIO_SKIP_DIO_COUNT); return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset, blkdev_get_block, NULL, NULL, DIO_SKIP_DIO_COUNT); @@ -1173,6 +1176,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = disk; bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; + bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0; if (!partno) { ret = -ENXIO; bdev->bd_part = disk_get_part(disk, partno); diff --git a/fs/dax.c b/fs/dax.c index 159f796afa85..37a0c4826c1a 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -209,7 +209,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode, } /* Protects against truncate */ - inode_dio_begin(inode); + if (!(flags & DIO_SKIP_DIO_COUNT)) + inode_dio_begin(inode); retval = dax_io(inode, iter, pos, end, get_block, &bh); @@ -219,7 +220,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode, if ((retval > 0) && end_io) end_io(iocb, pos, retval, bh.b_private); - inode_dio_end(inode); + if (!(flags & DIO_SKIP_DIO_COUNT)) + inode_dio_end(inode); out: return retval; } From 43c3dd08da890e458f670b4fc0630513fb405620 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 3 Jul 2015 10:40:43 -0400 Subject: [PATCH 616/839] dax: bdev_direct_access() may sleep The brd driver is the only in-tree driver that may sleep currently. After some discussion on linux-fsdevel, we decided that any driver may choose to sleep in its ->direct_access method. To ensure that all callers of bdev_direct_access() are prepared for this, add a call to might_sleep(). Signed-off-by: Matthew Wilcox Signed-off-by: Al Viro --- fs/block_dev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/block_dev.c b/fs/block_dev.c index 5dde6dff4940..12b22ddb22ef 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -445,6 +445,12 @@ long bdev_direct_access(struct block_device *bdev, sector_t sector, long avail; const struct block_device_operations *ops = bdev->bd_disk->fops; + /* + * The device driver is allowed to sleep, in order to make the + * memory directly accessible. + */ + might_sleep(); + if (size < 0) return size; if (!ops->direct_access) From a84b69cb6e0a41e86bc593904faa6def3b957343 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 4 Jul 2015 16:04:19 -0400 Subject: [PATCH 617/839] 9p: forgetting to cancel request on interrupted zero-copy RPC If we'd already sent a request and decide to abort it, we *must* issue TFLUSH properly and not just blindly reuse the tag, or we'll get seriously screwed when response eventually arrives and we confuse it for response to later request that had reused the same tag. Cc: stable@vger.kernel.org # v3.2 and later Signed-off-by: Al Viro --- net/9p/client.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/9p/client.c b/net/9p/client.c index 6f4c4c88db84..28f36e4556f9 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -843,7 +843,8 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, if (err < 0) { if (err == -EIO) c->status = Disconnected; - goto reterr; + if (err != -ERESTARTSYS) + goto reterr; } if (req->status == REQ_STATUS_ERROR) { p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); From 67e808fbb0404a12d9b9830a44bbb48d447d8bc9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 4 Jul 2015 16:11:05 -0400 Subject: [PATCH 618/839] p9_client_write(): avoid double p9_free_req() Braino in "9p: switch p9_client_write() to passing it struct iov_iter *"; if response is impossible to parse and we discard the request, get the out of the loop right there. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- net/9p/client.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/9p/client.c b/net/9p/client.c index 28f36e4556f9..81925b923318 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1648,6 +1648,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) if (*err) { trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); + break; } p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); From 0f1db7dee200127da4c07928189748918c312031 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 4 Jul 2015 16:17:39 -0400 Subject: [PATCH 619/839] 9p: cope with bogus responses from server in p9_client_{read,write} if server claims to have written/read more than we'd told it to, warn and cap the claimed byte count to avoid advancing more than we are ready to. --- net/9p/client.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/9p/client.c b/net/9p/client.c index 81925b923318..498454b3c06c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1583,6 +1583,10 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err) p9_free_req(clnt, req); break; } + if (rsize < count) { + pr_err("bogus RREAD count (%d > %d)\n", count, rsize); + count = rsize; + } p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); if (!count) { @@ -1650,6 +1654,10 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) p9_free_req(clnt, req); break; } + if (rsize < count) { + pr_err("bogus RWRITE count (%d > %d)\n", count, rsize); + count = rsize; + } p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); From 9b284cbdb5de3b8871014f8290d1b540e5181c21 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 4 Jul 2015 19:11:33 -0700 Subject: [PATCH 620/839] bluetooth: fix list handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 835a6a2f8603 ("Bluetooth: Stop sabotaging list poisoning") thought that the code was sabotaging the list poisoning when NULL'ing out the list pointers and removed it. But what was going on was that the bluetooth code was using NULL pointers for the list as a way to mark it empty, and that commit just broke it (and replaced the test with NULL with a "list_empty()" test on a uninitialized list instead, breaking things even further). So fix it all up to use the regular and real list_empty() handling (which does not use NULL, but a pointer to itself), also making sure to initialize the list properly (the previous NULL case was initialized implicitly by the session being allocated with kzalloc()) This is a combination of patches by Marcel Holtmann and Tedd Ho-Jeong An. [ I would normally expect to get this through the bt tree, but I'm going to release -rc1, so I'm just committing this directly - Linus ] Reported-and-tested-by: Jörg Otte Cc: Alexey Dobriyan Original-by: Tedd Ho-Jeong An Original-by: Marcel Holtmann : Signed-off-by: Linus Torvalds --- net/bluetooth/hidp/core.c | 1 + net/bluetooth/l2cap_core.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 9070dfd6b4ad..f1a117f8cad2 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -915,6 +915,7 @@ static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr, session->conn = l2cap_conn_get(conn); session->user.probe = hidp_session_probe; session->user.remove = hidp_session_remove; + INIT_LIST_HEAD(&session->user.list); session->ctrl_sock = ctrl_sock; session->intr_sock = intr_sock; skb_queue_head_init(&session->ctrl_transmit); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 51594fb7b9e7..45fffa413642 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1634,7 +1634,7 @@ void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user) if (list_empty(&user->list)) goto out_unlock; - list_del(&user->list); + list_del_init(&user->list); user->remove(conn, user); out_unlock: @@ -1648,7 +1648,7 @@ static void l2cap_unregister_all_users(struct l2cap_conn *conn) while (!list_empty(&conn->users)) { user = list_first_entry(&conn->users, struct l2cap_user, list); - list_del(&user->list); + list_del_init(&user->list); user->remove(conn, user); } } From d90a45b1e005da3351de795d2ba6d52dc5a28837 Mon Sep 17 00:00:00 2001 From: Jens Kuske Date: Fri, 15 May 2015 18:38:51 +0200 Subject: [PATCH 621/839] Documentation: sunxi: Update Allwinner SoC documentation There are some new Allwinner SoCs available, namely A33, A83T and H3. Update the documentation to mention those and the related documents. Signed-off-by: Jens Kuske Signed-off-by: Maxime Ripard --- Documentation/arm/sunxi/README | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/Documentation/arm/sunxi/README b/Documentation/arm/sunxi/README index 1fe2d7fd4108..5e38e1582f95 100644 --- a/Documentation/arm/sunxi/README +++ b/Documentation/arm/sunxi/README @@ -36,7 +36,7 @@ SunXi family + User Manual http://dl.linux-sunxi.org/A20/A20%20User%20Manual%202013-03-22.pdf - - Allwinner A23 + - Allwinner A23 (sun8i) + Datasheet http://dl.linux-sunxi.org/A23/A23%20Datasheet%20V1.0%2020130830.pdf + User Manual @@ -55,7 +55,23 @@ SunXi family + User Manual http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20User%20Manual%20%20V1.0%2020130322.pdf + - Allwinner A33 (sun8i) + + Datasheet + http://dl.linux-sunxi.org/A33/A33%20Datasheet%20release%201.1.pdf + + User Manual + http://dl.linux-sunxi.org/A33/A33%20user%20manual%20release%201.1.pdf + + - Allwinner H3 (sun8i) + + Datasheet + http://dl.linux-sunxi.org/H3/Allwinner_H3_Datasheet_V1.0.pdf + * Quad ARM Cortex-A15, Quad ARM Cortex-A7 based SoCs - Allwinner A80 + Datasheet http://dl.linux-sunxi.org/A80/A80_Datasheet_Revision_1.0_0404.pdf + + * Octa ARM Cortex-A7 based SoCs + - Allwinner A83T + + Not Supported + + Datasheet + http://dl.linux-sunxi.org/A83T/A83T_datasheet_Revision_1.1.pdf From 14a882df14a5ae859b245bc708ce3fce47a91594 Mon Sep 17 00:00:00 2001 From: Jens Kuske Date: Fri, 15 May 2015 18:38:55 +0200 Subject: [PATCH 622/839] ARM: sunxi: Introduce Allwinner H3 support The Allwinner H3 is a quad-core Cortex-A7-based SoC. It is very similar to other sun8i family SoCs like the A23. Signed-off-by: Jens Kuske Signed-off-by: Maxime Ripard --- Documentation/devicetree/bindings/arm/sunxi.txt | 1 + arch/arm/mach-sunxi/Kconfig | 2 +- arch/arm/mach-sunxi/sunxi.c | 4 +++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/sunxi.txt b/Documentation/devicetree/bindings/arm/sunxi.txt index 42941fdefb11..3cb4b946ff2b 100644 --- a/Documentation/devicetree/bindings/arm/sunxi.txt +++ b/Documentation/devicetree/bindings/arm/sunxi.txt @@ -9,4 +9,5 @@ using one of the following compatible strings: allwinner,sun6i-a31 allwinner,sun7i-a20 allwinner,sun8i-a23 + allwinner,sun8i-h3 allwinner,sun9i-a80 diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig index 81502b90dd91..4efe2d43a126 100644 --- a/arch/arm/mach-sunxi/Kconfig +++ b/arch/arm/mach-sunxi/Kconfig @@ -35,7 +35,7 @@ config MACH_SUN7I select SUN5I_HSTIMER config MACH_SUN8I - bool "Allwinner A23 (sun8i) SoCs support" + bool "Allwinner sun8i Family SoCs support" default ARCH_SUNXI select ARM_GIC select MFD_SUN6I_PRCM diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 1bc811a74a9f..82709020c57c 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -67,10 +67,12 @@ MACHINE_END static const char * const sun8i_board_dt_compat[] = { "allwinner,sun8i-a23", + "allwinner,sun8i-h3", NULL, }; -DT_MACHINE_START(SUN8I_DT, "Allwinner sun8i (A23) Family") +DT_MACHINE_START(SUN8I_DT, "Allwinner sun8i Family") + .init_time = sun6i_timer_init, .dt_compat = sun8i_board_dt_compat, .init_late = sunxi_dt_cpufreq_init, MACHINE_END From 159870d2413c92622790e9cecbce95099bed539a Mon Sep 17 00:00:00 2001 From: Vishnu Patekar Date: Sat, 30 May 2015 16:55:01 +0200 Subject: [PATCH 623/839] ARM: sunxi: Add Machine support for A33 Add machine support for the Allwinner A33 quad core cortex-a7 based SoC, which is similar to the A23 SoC. Signed-off-by: Vishnu Patekar Signed-off-by: Hans de Goede Signed-off-by: Maxime Ripard Tested-by: Chen-Yu Tsai --- Documentation/devicetree/bindings/arm/sunxi.txt | 1 + arch/arm/mach-sunxi/sunxi.c | 1 + drivers/clk/sunxi/clk-sunxi.c | 1 + 3 files changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/sunxi.txt b/Documentation/devicetree/bindings/arm/sunxi.txt index 3cb4b946ff2b..67da20539540 100644 --- a/Documentation/devicetree/bindings/arm/sunxi.txt +++ b/Documentation/devicetree/bindings/arm/sunxi.txt @@ -9,5 +9,6 @@ using one of the following compatible strings: allwinner,sun6i-a31 allwinner,sun7i-a20 allwinner,sun8i-a23 + allwinner,sun8i-a33 allwinner,sun8i-h3 allwinner,sun9i-a80 diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 82709020c57c..65bab2876343 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -67,6 +67,7 @@ MACHINE_END static const char * const sun8i_board_dt_compat[] = { "allwinner,sun8i-a23", + "allwinner,sun8i-a33", "allwinner,sun8i-h3", NULL, }; diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c index 7e1e2bd189b6..6d25e4e9dfec 100644 --- a/drivers/clk/sunxi/clk-sunxi.c +++ b/drivers/clk/sunxi/clk-sunxi.c @@ -1389,6 +1389,7 @@ static void __init sun6i_init_clocks(struct device_node *node) CLK_OF_DECLARE(sun6i_a31_clk_init, "allwinner,sun6i-a31", sun6i_init_clocks); CLK_OF_DECLARE(sun6i_a31s_clk_init, "allwinner,sun6i-a31s", sun6i_init_clocks); CLK_OF_DECLARE(sun8i_a23_clk_init, "allwinner,sun8i-a23", sun6i_init_clocks); +CLK_OF_DECLARE(sun8i_a33_clk_init, "allwinner,sun8i-a33", sun6i_init_clocks); static void __init sun9i_init_clocks(struct device_node *node) { From 07d1f344159764bcd4e228e4a65e804dfe59dfe9 Mon Sep 17 00:00:00 2001 From: Timo Sigurdsson Date: Mon, 6 Apr 2015 22:57:22 +0200 Subject: [PATCH 624/839] ARM: Remove deprecated symbol from defconfig files Commit b2b3a8b934e6 ("power/reset: Remove sun6i reboot driver") removed the sun6i reboot driver. But sunxi_defconfig and multi_v7_defconfig still contain the symbol CONFIG_POWER_RESET_SUN6I that was deprecated by that commit, so remove it. Signed-off-by: Timo Sigurdsson Signed-off-by: Maxime Ripard --- arch/arm/configs/multi_v7_defconfig | 1 - arch/arm/configs/sunxi_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index ab86655c1f4b..22df0021d265 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -319,7 +319,6 @@ CONFIG_POWER_RESET_AS3722=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_KEYSTONE=y -CONFIG_POWER_RESET_SUN6I=y CONFIG_POWER_RESET_RMOBILE=y CONFIG_SENSORS_LM90=y CONFIG_SENSORS_LM95245=y diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index 8ecba00dcd83..2c7af0a57cb1 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -77,7 +77,6 @@ CONFIG_SPI_SUN6I=y CONFIG_GPIO_SYSFS=y CONFIG_POWER_SUPPLY=y CONFIG_POWER_RESET=y -CONFIG_POWER_RESET_SUN6I=y CONFIG_THERMAL=y CONFIG_CPU_THERMAL=y CONFIG_WATCHDOG=y From 5738563bf6f2f1631dc71a35293b135f90969a35 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 20 Apr 2015 21:48:40 +0200 Subject: [PATCH 625/839] ARM: sunxi: Enable simplefb in the defconfig Now that we have simplefb support, we can enable it in our defconfig. Also enable the framebuffer console, so that we are sure that we actually get something displayed in any case. And while we're at it, enable the module support. Signed-off-by: Maxime Ripard --- arch/arm/configs/sunxi_defconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index 2c7af0a57cb1..7ebc346bf9fa 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -2,6 +2,7 @@ CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y CONFIG_PERF_EVENTS=y +CONFIG_MODULES=y CONFIG_ARCH_SUNXI=y CONFIG_SMP=y CONFIG_NR_CPUS=8 @@ -86,6 +87,10 @@ CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_AXP20X=y CONFIG_REGULATOR_GPIO=y +CONFIG_FB=y +CONFIG_FB_SIMPLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y CONFIG_USB=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y From 7444a072c387a93ebee7066e8aee776954ab0e41 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Sun, 5 Jul 2015 12:33:44 -0400 Subject: [PATCH 626/839] ext4: replace open coded nofail allocation in ext4_free_blocks() ext4_free_blocks is looping around the allocation request and mimics __GFP_NOFAIL behavior without any allocation fallback strategy. Let's remove the open coded loop and replace it with __GFP_NOFAIL. Without the flag the allocator has no way to find out never-fail requirement and cannot help in any way. Signed-off-by: Michal Hocko Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/mballoc.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1c535fa67640..2299d629eeb1 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4815,18 +4815,12 @@ do_more: /* * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed + * + * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed + * to fail. */ - retry: - new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); - if (!new_entry) { - /* - * We use a retry loop because - * ext4_free_blocks() is not allowed to fail. - */ - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto retry; - } + new_entry = kmem_cache_alloc(ext4_free_data_cachep, + GFP_NOFS|__GFP_NOFAIL); new_entry->efd_start_cluster = bit; new_entry->efd_group = block_group; new_entry->efd_count = count_clusters; From d770e558e21961ad6cfdf0ff7df0eb5d7d4f0754 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 5 Jul 2015 11:01:52 -0700 Subject: [PATCH 627/839] Linux 4.2-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 26ac0281bc74..13270c0a9336 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 -PATCHLEVEL = 1 +PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Hurr durr I'ma sheep # *DOCUMENTATION* From 728abda6a6654ee7f4e903dc921c6307065e1644 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 14:12:42 -0300 Subject: [PATCH 628/839] tools: Adopt {READ,WRITE_ONCE} from the kernel We need it to build rbtree.c after this cset: commit d72da4a4d973 Author: Peter Zijlstra Date: Wed May 27 11:09:36 2015 +0930 rbtree: Make lockless searches non-fatal Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-qlnzhezv5ddwst0w9fydju0y@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler.h | 58 ++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index f0e72674c52d..9098083869c8 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -41,4 +41,62 @@ #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) +#include + +static __always_inline void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(__u8 *)res = *(volatile __u8 *)p; break; + case 2: *(__u16 *)res = *(volatile __u16 *)p; break; + case 4: *(__u32 *)res = *(volatile __u32 *)p; break; + case 8: *(__u64 *)res = *(volatile __u64 *)p; break; + default: + barrier(); + __builtin_memcpy((void *)res, (const void *)p, size); + barrier(); + } +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile __u8 *)p = *(__u8 *)res; break; + case 2: *(volatile __u16 *)p = *(__u16 *)res; break; + case 4: *(volatile __u32 *)p = *(__u32 *)res; break; + case 8: *(volatile __u64 *)p = *(__u64 *)res; break; + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +/* + * Prevent the compiler from merging or refetching reads or writes. The + * compiler is also forbidden from reordering successive instances of + * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the + * compiler is aware of some particular ordering. One way to make the + * compiler aware of ordering is to put the two invocations of READ_ONCE, + * WRITE_ONCE or ACCESS_ONCE() in different C statements. + * + * In contrast to ACCESS_ONCE these two macros will also work on aggregate + * data types like structs or unions. If the size of the accessed data + * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) + * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a + * compile-time warning. + * + * Their two major use cases are: (1) Mediating communication between + * process-level code and irq/NMI handlers, all running on the same CPU, + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * mutilate accesses that either do not require ordering or that interact + * with an explicit memory barrier or atomic instruction that provides the + * required ordering. + */ + +#define READ_ONCE(x) \ + ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + +#define WRITE_ONCE(x, val) \ + ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + #endif /* _TOOLS_LINUX_COMPILER_H */ From 4407f967441aa1adfc11f739e8e9ec0f38fa839f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 14:17:04 -0300 Subject: [PATCH 629/839] perf tools: Copy rbtree.h from the kernel We were using the include/linux/rbtree.h directly from the kernel, which broke the build as soon as it started using rcupdate.h, to avoid dragging the rcu header files into tools/, for which there is no use so far, grab a copy of rbtree.h. This is the minimal fix, later patches will copy as well lib/rbtree.c and move rbtree.h into tools/include/, etc. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-dfmuj0j63w4by7vhlh4hhn74@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/MANIFEST | 1 - tools/perf/util/include/linux/rbtree.h | 92 +++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 3 deletions(-) diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index fe50a1b34aa0..6504b727f450 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -51,7 +51,6 @@ include/asm-generic/bitops/fls64.h include/asm-generic/bitops/__fls.h include/asm-generic/bitops/fls.h include/linux/perf_event.h -include/linux/rbtree.h include/linux/list.h include/linux/hash.h include/linux/stringify.h diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h index f06d89f0b867..112582253dd0 100644 --- a/tools/perf/util/include/linux/rbtree.h +++ b/tools/perf/util/include/linux/rbtree.h @@ -1,7 +1,95 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + See Documentation/rbtree.txt for documentation and samples. +*/ + #ifndef __TOOLS_LINUX_PERF_RBTREE_H #define __TOOLS_LINUX_PERF_RBTREE_H -#include -#include "../../../../include/linux/rbtree.h" + +#include +#include + +struct rb_node { + unsigned long __rb_parent_color; + struct rb_node *rb_right; + struct rb_node *rb_left; +} __attribute__((aligned(sizeof(long)))); + /* The alignment might seem pointless, but allegedly CRIS needs it */ + +struct rb_root { + struct rb_node *rb_node; +}; + + +#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) + +#define RB_ROOT (struct rb_root) { NULL, } +#define rb_entry(ptr, type, member) container_of(ptr, type, member) + +#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) + +/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ +#define RB_EMPTY_NODE(node) \ + ((node)->__rb_parent_color == (unsigned long)(node)) +#define RB_CLEAR_NODE(node) \ + ((node)->__rb_parent_color = (unsigned long)(node)) + + +extern void rb_insert_color(struct rb_node *, struct rb_root *); +extern void rb_erase(struct rb_node *, struct rb_root *); + + +/* Find logical next and previous nodes in a tree */ +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); + +/* Postorder iteration - always visit the parent after its children */ +extern struct rb_node *rb_first_postorder(const struct rb_root *); +extern struct rb_node *rb_next_postorder(const struct rb_node *); + +/* Fast replacement of a single node without remove/rebalance/add/rebalance */ +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root); + +static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; +} + +#define rb_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + ____ptr ? rb_entry(____ptr, type, member) : NULL; \ + }) + /* * Handy for checking that we are not deleting an entry that is From 3f735377bfd6567d80815a6242c147211963680a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 22:48:21 -0300 Subject: [PATCH 630/839] tools: Copy lib/rbtree.c to tools/lib/ So that we can remove kernel specific stuff we've been stubbing out via a tools/include/linux/export.h that gets removed in this patch and to avoid breakages in the future like the one fixed recently where rcupdate.h started being used in rbtree.h. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-rxuzfsozpb8hv1emwpx06rm6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/export.h | 10 - tools/lib/rbtree.c | 548 +++++++++++++++++++++++++++++++++++ tools/perf/MANIFEST | 2 +- tools/perf/util/Build | 2 +- 4 files changed, 550 insertions(+), 12 deletions(-) delete mode 100644 tools/include/linux/export.h create mode 100644 tools/lib/rbtree.c diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h deleted file mode 100644 index d07e586b9ba0..000000000000 --- a/tools/include/linux/export.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _TOOLS_LINUX_EXPORT_H_ -#define _TOOLS_LINUX_EXPORT_H_ - -#define EXPORT_SYMBOL(sym) -#define EXPORT_SYMBOL_GPL(sym) -#define EXPORT_SYMBOL_GPL_FUTURE(sym) -#define EXPORT_UNUSED_SYMBOL(sym) -#define EXPORT_UNUSED_SYMBOL_GPL(sym) - -#endif diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c new file mode 100644 index 000000000000..17c2b596f043 --- /dev/null +++ b/tools/lib/rbtree.c @@ -0,0 +1,548 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + (C) 2002 David Woodhouse + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c +*/ + +#include + +/* + * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree + * + * 1) A node is either red or black + * 2) The root is black + * 3) All leaves (NULL) are black + * 4) Both children of every red node are black + * 5) Every simple path from root to leaves contains the same number + * of black nodes. + * + * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two + * consecutive red nodes in a path and every red node is therefore followed by + * a black. So if B is the number of black nodes on every simple path (as per + * 5), then the longest possible path due to 4 is 2B. + * + * We shall indicate color with case, where black nodes are uppercase and red + * nodes will be lowercase. Unknown color nodes shall be drawn as red within + * parentheses and have some accompanying text comment. + */ + +static inline void rb_set_black(struct rb_node *rb) +{ + rb->__rb_parent_color |= RB_BLACK; +} + +static inline struct rb_node *rb_red_parent(struct rb_node *red) +{ + return (struct rb_node *)red->__rb_parent_color; +} + +/* + * Helper function for rotations: + * - old's parent and color get assigned to new + * - old gets assigned new as a parent and 'color' as a color. + */ +static inline void +__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, + struct rb_root *root, int color) +{ + struct rb_node *parent = rb_parent(old); + new->__rb_parent_color = old->__rb_parent_color; + rb_set_parent_color(old, new, color); + __rb_change_child(old, new, parent, root); +} + +static __always_inline void +__rb_insert(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + + while (true) { + /* + * Loop invariant: node is red + * + * If there is a black parent, we are done. + * Otherwise, take some corrective action as we don't + * want a red root or two consecutive red nodes. + */ + if (!parent) { + rb_set_parent_color(node, NULL, RB_BLACK); + break; + } else if (rb_is_black(parent)) + break; + + gparent = rb_red_parent(parent); + + tmp = gparent->rb_right; + if (parent != tmp) { /* parent == gparent->rb_left */ + if (tmp && rb_is_red(tmp)) { + /* + * Case 1 - color flips + * + * G g + * / \ / \ + * p u --> P U + * / / + * n n + * + * However, since g's parent might be red, and + * 4) does not allow this, we need to recurse + * at g. + */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_right; + if (node == tmp) { + /* + * Case 2 - left rotate at parent + * + * G G + * / \ / \ + * p U --> n U + * \ / + * n p + * + * This still leaves us in violation of 4), the + * continuation into Case 3 will fix that. + */ + parent->rb_right = tmp = node->rb_left; + node->rb_left = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_right; + } + + /* + * Case 3 - right rotate at gparent + * + * G P + * / \ / \ + * p U --> n g + * / \ + * n U + */ + gparent->rb_left = tmp; /* == parent->rb_right */ + parent->rb_right = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } else { + tmp = gparent->rb_left; + if (tmp && rb_is_red(tmp)) { + /* Case 1 - color flips */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_left; + if (node == tmp) { + /* Case 2 - right rotate at parent */ + parent->rb_left = tmp = node->rb_right; + node->rb_right = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_left; + } + + /* Case 3 - left rotate at gparent */ + gparent->rb_right = tmp; /* == parent->rb_left */ + parent->rb_left = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } + } +} + +/* + * Inline version for rb_erase() use - we want to be able to inline + * and eliminate the dummy_rotate callback there + */ +static __always_inline void +____rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *node = NULL, *sibling, *tmp1, *tmp2; + + while (true) { + /* + * Loop invariants: + * - node is black (or NULL on first iteration) + * - node is not the root (parent is not NULL) + * - All leaf paths going through parent and node have a + * black node count that is 1 lower than other leaf paths. + */ + sibling = parent->rb_right; + if (node != sibling) { /* node == parent->rb_left */ + if (rb_is_red(sibling)) { + /* + * Case 1 - left rotate at parent + * + * P S + * / \ / \ + * N s --> p Sr + * / \ / \ + * Sl Sr N Sl + */ + parent->rb_right = tmp1 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_right; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_left; + if (!tmp2 || rb_is_black(tmp2)) { + /* + * Case 2 - sibling color flip + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N s + * / \ / \ + * Sl Sr Sl Sr + * + * This leaves us violating 5) which + * can be fixed by flipping p to black + * if it was red, or by recursing at p. + * p is red when coming from Case 1. + */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* + * Case 3 - right rotate at sibling + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N Sl + * / \ \ + * sl Sr s + * \ + * Sr + */ + sibling->rb_left = tmp1 = tmp2->rb_right; + tmp2->rb_right = sibling; + parent->rb_right = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* + * Case 4 - left rotate at parent + color flips + * (p and sl could be either color here. + * After rotation, p becomes black, s acquires + * p's color, and sl keeps its color) + * + * (p) (s) + * / \ / \ + * N S --> P Sr + * / \ / \ + * (sl) sr N (sl) + */ + parent->rb_right = tmp2 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } else { + sibling = parent->rb_left; + if (rb_is_red(sibling)) { + /* Case 1 - right rotate at parent */ + parent->rb_left = tmp1 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_left; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_right; + if (!tmp2 || rb_is_black(tmp2)) { + /* Case 2 - sibling color flip */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* Case 3 - right rotate at sibling */ + sibling->rb_right = tmp1 = tmp2->rb_left; + tmp2->rb_left = sibling; + parent->rb_left = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* Case 4 - left rotate at parent + color flips */ + parent->rb_left = tmp2 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } + } +} + +/* Non-inline version for rb_erase_augmented() use */ +void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + ____rb_erase_color(parent, root, augment_rotate); +} + +/* + * Non-augmented rbtree manipulation functions. + * + * We use dummy augmented callbacks here, and have the compiler optimize them + * out of the rb_insert_color() and rb_erase() function definitions. + */ + +static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {} +static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} +static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} + +static const struct rb_augment_callbacks dummy_callbacks = { + dummy_propagate, dummy_copy, dummy_rotate +}; + +void rb_insert_color(struct rb_node *node, struct rb_root *root) +{ + __rb_insert(node, root, dummy_rotate); +} + +void rb_erase(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, root, dummy_rotate); +} + +/* + * Augmented rbtree manipulation functions. + * + * This instantiates the same __always_inline functions as in the non-augmented + * case, but this time with user-defined callbacks. + */ + +void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + __rb_insert(node, root, augment_rotate); +} + +/* + * This function returns the first node (in sort order) of the tree. + */ +struct rb_node *rb_first(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} + +struct rb_node *rb_last(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} + +struct rb_node *rb_next(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a right-hand child, go down and then left as far + * as we can. + */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) + node=node->rb_left; + return (struct rb_node *)node; + } + + /* + * No right-hand children. Everything down and left is smaller than us, + * so any 'next' node must be in the general direction of our parent. + * Go up the tree; any time the ancestor is a right-hand child of its + * parent, keep going up. First time it's a left-hand child of its + * parent, said parent is our 'next' node. + */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} + +struct rb_node *rb_prev(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a left-hand child, go down and then right as far + * as we can. + */ + if (node->rb_left) { + node = node->rb_left; + while (node->rb_right) + node=node->rb_right; + return (struct rb_node *)node; + } + + /* + * No left-hand children. Go up till we find an ancestor which + * is a right-hand child of its parent. + */ + while ((parent = rb_parent(node)) && node == parent->rb_left) + node = parent; + + return parent; +} + +void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root) +{ + struct rb_node *parent = rb_parent(victim); + + /* Set the surrounding nodes to point to the replacement */ + __rb_change_child(victim, new, parent, root); + if (victim->rb_left) + rb_set_parent(victim->rb_left, new); + if (victim->rb_right) + rb_set_parent(victim->rb_right, new); + + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; +} + +static struct rb_node *rb_left_deepest_node(const struct rb_node *node) +{ + for (;;) { + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + else + return (struct rb_node *)node; + } +} + +struct rb_node *rb_next_postorder(const struct rb_node *node) +{ + const struct rb_node *parent; + if (!node) + return NULL; + parent = rb_parent(node); + + /* If we're sitting on node, we've already seen our children */ + if (parent && node == parent->rb_left && parent->rb_right) { + /* If we are the parent's left node, go to the parent's right + * node then all the way down to the left */ + return rb_left_deepest_node(parent->rb_right); + } else + /* Otherwise we are the parent's right node, and the parent + * should be next */ + return (struct rb_node *)parent; +} + +struct rb_node *rb_first_postorder(const struct rb_root *root) +{ + if (!root->rb_node) + return NULL; + + return rb_left_deepest_node(root->rb_node); +} diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 6504b727f450..ff667e36ca52 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -18,6 +18,7 @@ tools/arch/x86/include/asm/atomic.h tools/arch/x86/include/asm/rmwcc.h tools/lib/traceevent tools/lib/api +tools/lib/rbtree.c tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h tools/lib/util/find_next_bit.c @@ -55,7 +56,6 @@ include/linux/list.h include/linux/hash.h include/linux/stringify.h lib/hweight.c -lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h arch/*/include/uapi/asm/unistd*.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 586a59d46022..601d11440596 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -139,7 +139,7 @@ $(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c FORCE +$(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) From 03da23a34a039a74f550646e68d562655306e331 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 22:54:01 -0300 Subject: [PATCH 631/839] tools: Move rbtree.h from tools/perf/ The previous step, copying the contents minus the rcupdate.h parts, was done as a minimal fix, now do the move from tools/perf/. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-52fllxtsgmtke66pmv98mcma@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/{perf/util => }/include/linux/rbtree.h | 0 tools/perf/MANIFEST | 1 + 2 files changed, 1 insertion(+) rename tools/{perf/util => }/include/linux/rbtree.h (100%) diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/include/linux/rbtree.h similarity index 100% rename from tools/perf/util/include/linux/rbtree.h rename to tools/include/linux/rbtree.h diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index ff667e36ca52..9df268167b8c 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -45,6 +45,7 @@ tools/include/linux/kernel.h tools/include/linux/list.h tools/include/linux/log2.h tools/include/linux/poison.h +tools/include/linux/rbtree.h tools/include/linux/types.h include/asm-generic/bitops/arch_hweight.h include/asm-generic/bitops/const_hweight.h From 307bc971959aaa2df44032e7f6b0bda1f7e26890 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 22:59:05 -0300 Subject: [PATCH 632/839] tools: Copy rbtree_augmented.h from the kernel To complete the transitioning to not to share the same files with the kernel, also moving it from tools/perf/include/linux/ to tools/include/linux to make the whoke rbtree kit to other tools/ living codebases. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-5bxyehixafckqm6ez25alnfo@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/rbtree_augmented.h | 245 ++++++++++++++++++ tools/perf/MANIFEST | 2 +- .../util/include/linux/rbtree_augmented.h | 2 - 3 files changed, 246 insertions(+), 3 deletions(-) create mode 100644 tools/include/linux/rbtree_augmented.h delete mode 100644 tools/perf/util/include/linux/rbtree_augmented.h diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h new file mode 100644 index 000000000000..43be941db695 --- /dev/null +++ b/tools/include/linux/rbtree_augmented.h @@ -0,0 +1,245 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + (C) 2002 David Woodhouse + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + tools/linux/include/linux/rbtree_augmented.h + + Copied from: + linux/include/linux/rbtree_augmented.h +*/ + +#ifndef _TOOLS_LINUX_RBTREE_AUGMENTED_H +#define _TOOLS_LINUX_RBTREE_AUGMENTED_H + +#include +#include + +/* + * Please note - only struct rb_augment_callbacks and the prototypes for + * rb_insert_augmented() and rb_erase_augmented() are intended to be public. + * The rest are implementation details you are not expected to depend on. + * + * See Documentation/rbtree.txt for documentation and samples. + */ + +struct rb_augment_callbacks { + void (*propagate)(struct rb_node *node, struct rb_node *stop); + void (*copy)(struct rb_node *old, struct rb_node *new); + void (*rotate)(struct rb_node *old, struct rb_node *new); +}; + +extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); +/* + * Fixup the rbtree and update the augmented information when rebalancing. + * + * On insertion, the user must update the augmented information on the path + * leading to the inserted node, then call rb_link_node() as usual and + * rb_augment_inserted() instead of the usual rb_insert_color() call. + * If rb_augment_inserted() rebalances the rbtree, it will callback into + * a user provided function to update the augmented information on the + * affected subtrees. + */ +static inline void +rb_insert_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + __rb_insert_augmented(node, root, augment->rotate); +} + +#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ + rbtype, rbaugmented, rbcompute) \ +static inline void \ +rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \ +{ \ + while (rb != stop) { \ + rbstruct *node = rb_entry(rb, rbstruct, rbfield); \ + rbtype augmented = rbcompute(node); \ + if (node->rbaugmented == augmented) \ + break; \ + node->rbaugmented = augmented; \ + rb = rb_parent(&node->rbfield); \ + } \ +} \ +static inline void \ +rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ +} \ +static void \ +rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ + old->rbaugmented = rbcompute(old); \ +} \ +rbstatic const struct rb_augment_callbacks rbname = { \ + rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ +}; + + +#define RB_RED 0 +#define RB_BLACK 1 + +#define __rb_parent(pc) ((struct rb_node *)(pc & ~3)) + +#define __rb_color(pc) ((pc) & 1) +#define __rb_is_black(pc) __rb_color(pc) +#define __rb_is_red(pc) (!__rb_color(pc)) +#define rb_color(rb) __rb_color((rb)->__rb_parent_color) +#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color) +#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color) + +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) +{ + rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; +} + +static inline void rb_set_parent_color(struct rb_node *rb, + struct rb_node *p, int color) +{ + rb->__rb_parent_color = (unsigned long)p | color; +} + +static inline void +__rb_change_child(struct rb_node *old, struct rb_node *new, + struct rb_node *parent, struct rb_root *root) +{ + if (parent) { + if (parent->rb_left == old) + parent->rb_left = new; + else + parent->rb_right = new; + } else + root->rb_node = new; +} + +extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); + +static __always_inline struct rb_node * +__rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *child = node->rb_right, *tmp = node->rb_left; + struct rb_node *parent, *rebalance; + unsigned long pc; + + if (!tmp) { + /* + * Case 1: node to erase has no more than 1 child (easy!) + * + * Note that if there is one child it must be red due to 5) + * and node must be black due to 4). We adjust colors locally + * so as to bypass __rb_erase_color() later on. + */ + pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, child, parent, root); + if (child) { + child->__rb_parent_color = pc; + rebalance = NULL; + } else + rebalance = __rb_is_black(pc) ? parent : NULL; + tmp = parent; + } else if (!child) { + /* Still case 1, but this time the child is node->rb_left */ + tmp->__rb_parent_color = pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, tmp, parent, root); + rebalance = NULL; + tmp = parent; + } else { + struct rb_node *successor = child, *child2; + tmp = child->rb_left; + if (!tmp) { + /* + * Case 2: node's successor is its right child + * + * (n) (s) + * / \ / \ + * (x) (s) -> (x) (c) + * \ + * (c) + */ + parent = successor; + child2 = successor->rb_right; + augment->copy(node, successor); + } else { + /* + * Case 3: node's successor is leftmost under + * node's right child subtree + * + * (n) (s) + * / \ / \ + * (x) (y) -> (x) (y) + * / / + * (p) (p) + * / / + * (s) (c) + * \ + * (c) + */ + do { + parent = successor; + successor = tmp; + tmp = tmp->rb_left; + } while (tmp); + parent->rb_left = child2 = successor->rb_right; + successor->rb_right = child; + rb_set_parent(child, successor); + augment->copy(node, successor); + augment->propagate(parent, successor); + } + + successor->rb_left = tmp = node->rb_left; + rb_set_parent(tmp, successor); + + pc = node->__rb_parent_color; + tmp = __rb_parent(pc); + __rb_change_child(node, successor, tmp, root); + if (child2) { + successor->__rb_parent_color = pc; + rb_set_parent_color(child2, parent, RB_BLACK); + rebalance = NULL; + } else { + unsigned long pc2 = successor->__rb_parent_color; + successor->__rb_parent_color = pc; + rebalance = __rb_is_black(pc2) ? parent : NULL; + } + tmp = successor; + } + + augment->propagate(tmp, NULL); + return rebalance; +} + +static __always_inline void +rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); + if (rebalance) + __rb_erase_color(rebalance, root, augment->rotate); +} + +#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */ diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 9df268167b8c..09dc0aabb515 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -46,6 +46,7 @@ tools/include/linux/list.h tools/include/linux/log2.h tools/include/linux/poison.h tools/include/linux/rbtree.h +tools/include/linux/rbtree_augmented.h tools/include/linux/types.h include/asm-generic/bitops/arch_hweight.h include/asm-generic/bitops/const_hweight.h @@ -65,7 +66,6 @@ arch/*/lib/memcpy*.S arch/*/lib/memset*.S include/linux/poison.h include/linux/hw_breakpoint.h -include/linux/rbtree_augmented.h include/uapi/linux/perf_event.h include/uapi/linux/const.h include/uapi/linux/swab.h diff --git a/tools/perf/util/include/linux/rbtree_augmented.h b/tools/perf/util/include/linux/rbtree_augmented.h deleted file mode 100644 index 9d6fcdf1788b..000000000000 --- a/tools/perf/util/include/linux/rbtree_augmented.h +++ /dev/null @@ -1,2 +0,0 @@ -#include -#include "../../../../include/linux/rbtree_augmented.h" From d8ea782b56d9d2c46a47b3231cfd16ecfb538c60 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Mon, 29 Jun 2015 10:47:55 +0530 Subject: [PATCH 633/839] powerpc/powernv: Fix vma page prot flags in opal-prd driver opal-prd driver will mmap() firmware code/data area as private mapping to prd user space daemon. Write to this page will trigger COW faults. The new COW pages are normal kernel RAM pages accounted by the kernel and are not special. vma->vm_page_prot value will be used at page fault time for the new COW pages, while pgprot_t value passed in remap_pfn_range() is used for the initial page table entry. Hence: * Do not add _PAGE_SPECIAL in vma, but only for remap_pfn_range() * Also remap_pfn_range() will add the _PAGE_SPECIAL flag using pte_mkspecial() call, hence no need to specify in the driver This fix resolves the page accounting warning shown below: BUG: Bad rss-counter state mm:c0000007d34ac600 idx:1 val:19 The above warning is triggered since _PAGE_SPECIAL was incorrectly being set for the normal kernel COW pages. Signed-off-by: Vaidyanathan Srinivasan Acked-by: Jeremy Kerr Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-prd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 46cb3feb0a13..4ece8e40dd54 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -112,6 +112,7 @@ static int opal_prd_open(struct inode *inode, struct file *file) static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) { size_t addr, size; + pgprot_t page_prot; int rc; pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n", @@ -125,13 +126,11 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) if (!opal_prd_range_is_valid(addr, size)) return -EINVAL; - vma->vm_page_prot = __pgprot(pgprot_val(phys_mem_access_prot(file, - vma->vm_pgoff, - size, vma->vm_page_prot)) - | _PAGE_SPECIAL); + page_prot = phys_mem_access_prot(file, vma->vm_pgoff, + size, vma->vm_page_prot); rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, - vma->vm_page_prot); + page_prot); return rc; } From 5aac644a9944bea93b4f05ced1883a902a2535f6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 3 Jun 2015 10:39:46 +0300 Subject: [PATCH 634/839] x86/tsc: Let high latency PIT fail fast in quick_pit_calibrate() If it takes longer than 12us to read the PIT counter lsb/msb, then the error margin will never fall below 500ppm within 50ms, and Fast TSC calibration will always fail. This patch detects when that will happen and fails fast. Note the failure message is not printed in that case because: 1. it will always happen on that class of hardware 2. the absence of the message is more informative than its presence Signed-off-by: Adrian Hunter Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Len Brown Cc: Andi Kleen Link: http://lkml.kernel.org/r/556EB717.9070607@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 505449700e0c..7437b41f6a47 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -598,10 +598,19 @@ static unsigned long quick_pit_calibrate(void) if (!pit_expect_msb(0xff-i, &delta, &d2)) break; + delta -= tsc; + + /* + * Extrapolate the error and fail fast if the error will + * never be below 500 ppm. + */ + if (i == 1 && + d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11) + return 0; + /* * Iterate until the error is less than 500 ppm */ - delta -= tsc; if (d1+d2 >= delta >> 11) continue; From acb33cc541d7a5495b16a133702d4c401ea4e294 Mon Sep 17 00:00:00 2001 From: "Vutla, Lokesh" Date: Thu, 2 Jul 2015 18:33:28 +0530 Subject: [PATCH 635/839] crypto: omap-des - Fix unmapping of dma channels dma_unmap_sg() is being called twice after completing the task. Looks like this is a copy paste error when creating des driver. With this the following warn appears during boot: [ 4.210457] ------------[ cut here ]------------ [ 4.215114] WARNING: CPU: 0 PID: 0 at lib/dma-debug.c:1080 check_unmap+0x710/0x9a0() [ 4.222899] omap-des 480a5000.des: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x00000000ab2ce000] [size=8 bytes] [ 4.236785] Modules linked in: [ 4.239860] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.14.39-02999-g1bc045a-dirty #182 [ 4.247918] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 4.255710] [] (show_stack) from [] (dump_stack+0x84/0xb8) [ 4.262977] [] (dump_stack) from [] (warn_slowpath_common+0x68/0x8c) [ 4.271107] [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x30/0x40) [ 4.279854] [] (warn_slowpath_fmt) from [] (check_unmap+0x710/0x9a0) [ 4.287991] [] (check_unmap) from [] (debug_dma_unmap_sg+0x90/0x19c) [ 4.296128] [] (debug_dma_unmap_sg) from [] (omap_des_done_task+0x1cc/0x3e4) [ 4.304963] [] (omap_des_done_task) from [] (tasklet_action+0x84/0x124) [ 4.313370] [] (tasklet_action) from [] (__do_softirq+0xf0/0x20c) [ 4.321235] [] (__do_softirq) from [] (irq_exit+0x98/0xec) [ 4.328500] [] (irq_exit) from [] (handle_IRQ+0x50/0xb0) [ 4.335589] [] (handle_IRQ) from [] (gic_handle_irq+0x28/0x5c) Removing the duplicate call to dma_unmap_sg(). Cc: stable@vger.kernel.org Reported-by: Tomi Valkeinen Signed-off-by: Lokesh Vutla Signed-off-by: Herbert Xu --- drivers/crypto/omap-des.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index 46307098f8ba..0a70e46d5416 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -536,9 +536,6 @@ static int omap_des_crypt_dma_stop(struct omap_des_dev *dd) dmaengine_terminate_all(dd->dma_lch_in); dmaengine_terminate_all(dd->dma_lch_out); - dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE); - dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len, DMA_FROM_DEVICE); - return err; } From 0d7b6b1182ef6f72be592688c8a22025a5b7b483 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 2 Jul 2015 14:29:58 +0300 Subject: [PATCH 636/839] drm/i915/chv: fix HW readout of the port PLL fractional divider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ville noticed that the PLL HW readout code parsed the fractional divider value as if the fractional divider was always enabled. This may result in a port clock state check mismatch if the preceeding modeset disabled the fractional divider, but left a non-zero divider value in the register. Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1b61f9810387..8aa803848f35 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7887,7 +7887,7 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, int pipe = pipe_config->cpu_transcoder; enum dpio_channel port = vlv_pipe_to_channel(pipe); intel_clock_t clock; - u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2; + u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2, pll_dw3; int refclk = 100000; mutex_lock(&dev_priv->sb_lock); @@ -7895,10 +7895,13 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port)); pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port)); pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port)); + pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port)); mutex_unlock(&dev_priv->sb_lock); clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0; - clock.m2 = ((pll_dw0 & 0xff) << 22) | (pll_dw2 & 0x3fffff); + clock.m2 = (pll_dw0 & 0xff) << 22; + if (pll_dw3 & DPIO_CHV_FRAC_DIV_EN) + clock.m2 |= pll_dw2 & 0x3fffff; clock.n = (pll_dw1 >> DPIO_CHV_N_DIV_SHIFT) & 0xf; clock.p1 = (cmn_dw13 >> DPIO_CHV_P1_DIV_SHIFT) & 0x7; clock.p2 = (cmn_dw13 >> DPIO_CHV_P2_DIV_SHIFT) & 0x1f; From 14f21189df33bc972455d6a0ed875aa68718d7fc Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Mon, 29 Jun 2015 16:05:11 +0530 Subject: [PATCH 637/839] cxl/vphb.c: Use phb pointer after NULL check static Anlaysis detected below error:- (error) Possible null pointer dereference: phb So, Use phb after NULL check. Signed-off-by: Maninder Singh Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/vphb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index b1d1983a84a5..2eba002b580b 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -112,9 +112,10 @@ static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, unsigned long addr; phb = pci_bus_to_host(bus); - afu = (struct cxl_afu *)phb->private_data; if (phb == NULL) return PCIBIOS_DEVICE_NOT_FOUND; + afu = (struct cxl_afu *)phb->private_data; + if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num) return PCIBIOS_DEVICE_NOT_FOUND; if (offset >= (unsigned long)phb->cfg_data) From 27ea2c420cad57386c25668cb9a9f0f082635586 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 15 Jun 2015 13:25:19 +1000 Subject: [PATCH 638/839] powerpc: Set the correct kernel taint on machine check errors. This means the 'M' flag will work properly when the kernel prints a backtrace. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6530f1b8874d..37de90f8a845 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -297,6 +297,8 @@ long machine_check_early(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); return handled; From 8c00d5c9d3e7452658dda55024485d52919ebfdd Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 2 Jul 2015 15:55:21 +1000 Subject: [PATCH 639/839] cxl: Test the correct mmio space before unmapping Before freeing p2n, test p2n, not p1n. Signed-off-by: Daniel Axtens Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index c68ef5806dbe..32ad09705949 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -539,7 +539,7 @@ err: static void cxl_unmap_slice_regs(struct cxl_afu *afu) { - if (afu->p1n_mmio) + if (afu->p2n_mmio) iounmap(afu->p2n_mmio); if (afu->p1n_mmio) iounmap(afu->p1n_mmio); From eab861a7a52208868637f47a92caa926ddadd9c7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 2 Jul 2015 14:56:20 +1000 Subject: [PATCH 640/839] powerpc: Add plain English description for alignment exception oopses If we take an alignment exception which we cannot fix, the oops currently prints: Unable to handle kernel paging request for unknown fault Lets print something more useful: Unable to handle kernel paging request for unaligned access at address 0xc0000000f77bba8f Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/mm/fault.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 6d535973b200..a67c6d781c52 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -529,6 +529,10 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) printk(KERN_ALERT "Unable to handle kernel paging request for " "instruction fetch\n"); break; + case 0x600: + printk(KERN_ALERT "Unable to handle kernel paging request for " + "unaligned access at address 0x%08lx\n", regs->dar); + break; default: printk(KERN_ALERT "Unable to handle kernel paging request for " "unknown fault\n"); From aaf6fd5c75eb4aa734ec2062deab371633c3655f Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 6 Jul 2015 09:40:34 +1000 Subject: [PATCH 641/839] powerpc/ppc4xx_hsta_msi: Include ppc-pci.h to fix reference to hose_list An earlier commit referenced 'hose_list' in sysdev/ppc4xx_hsta_msi.c. hose_list is defined in ppc-pci.h, which was not included in that file. Include it, fixing the build for the akebono defconfig used by the kbuild test robot. Fixes: f2c800aaceb6 ("powerpc/ppc4xx_hsta_msi: Move MSI-related ops to pci_controller_ops") Reported-by: kbuild test robot Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/ppc4xx_hsta_msi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c index 2bc33674ebfc..87f9623ca805 100644 --- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c @@ -18,6 +18,7 @@ #include #include #include +#include struct ppc4xx_hsta_msi { struct device *dev; From a8956a7b7232da5f4ce4a305c72a54cc5e4a8307 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 3 Jul 2015 17:39:12 +1000 Subject: [PATCH 642/839] powerpc/powernv: Fix opal-elog interrupt handler The conversion of opal events to a proper irqchip means that handlers are called until the relevant opal event has been cleared by processing it. Events that queue work should therefore use a threaded handler to mask the event until processing is complete. Signed-off-by: Alistair Popple Tested-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-elog.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 4949ef0d9400..37f959bf392e 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -237,7 +237,7 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) return elog; } -static void elog_work_fn(struct work_struct *work) +static irqreturn_t elog_event(int irq, void *data) { __be64 size; __be64 id; @@ -251,7 +251,7 @@ static void elog_work_fn(struct work_struct *work) rc = opal_get_elog_size(&id, &size, &type); if (rc != OPAL_SUCCESS) { pr_err("ELOG: OPAL log info read failed\n"); - return; + return IRQ_HANDLED; } elog_size = be64_to_cpu(size); @@ -270,16 +270,10 @@ static void elog_work_fn(struct work_struct *work) * entries. */ if (kset_find_obj(elog_kset, name)) - return; + return IRQ_HANDLED; create_elog_obj(log_id, elog_size, elog_type); -} -static DECLARE_WORK(elog_work, elog_work_fn); - -static irqreturn_t elog_event(int irq, void *data) -{ - schedule_work(&elog_work); return IRQ_HANDLED; } @@ -304,8 +298,8 @@ int __init opal_elog_init(void) return irq; } - rc = request_irq(irq, elog_event, - IRQ_TYPE_LEVEL_HIGH, "opal-elog", NULL); + rc = request_threaded_irq(irq, NULL, elog_event, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, "opal-elog", NULL); if (rc) { pr_err("%s: Can't request OPAL event irq (%d)\n", __func__, rc); From 57ffc5ca679f499f4704fd9b6a372916f59930ee Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2015 12:32:49 +0200 Subject: [PATCH 643/839] perf: Fix AUX buffer refcounting Its currently possible to drop the last refcount to the aux buffer from NMI context, which results in the expected fireworks. The refcounting needs a bigger overhaul, but to cure the immediate problem, delay the freeing by using an irq_work. Reviewed-and-tested-by: Alexander Shishkin Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150618103249.GK19282@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 -------- kernel/events/internal.h | 10 ++++++++++ kernel/events/ring_buffer.c | 27 +++++++++++++++++++++++++-- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index e965cfae4207..d3dae3419b99 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4358,14 +4358,6 @@ static void ring_buffer_wakeup(struct perf_event *event) rcu_read_unlock(); } -static void rb_free_rcu(struct rcu_head *rcu_head) -{ - struct ring_buffer *rb; - - rb = container_of(rcu_head, struct ring_buffer, rcu_head); - rb_free(rb); -} - struct ring_buffer *ring_buffer_get(struct perf_event *event) { struct ring_buffer *rb; diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 2deb24c7a40d..2bbad9c1274c 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -11,6 +11,7 @@ struct ring_buffer { atomic_t refcount; struct rcu_head rcu_head; + struct irq_work irq_work; #ifdef CONFIG_PERF_USE_VMALLOC struct work_struct work; int page_order; /* allocation order */ @@ -55,6 +56,15 @@ struct ring_buffer { }; extern void rb_free(struct ring_buffer *rb); + +static inline void rb_free_rcu(struct rcu_head *rcu_head) +{ + struct ring_buffer *rb; + + rb = container_of(rcu_head, struct ring_buffer, rcu_head); + rb_free(rb); +} + extern struct ring_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 96472824a752..b2be01b1aa9d 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -221,6 +221,8 @@ void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } +static void rb_irq_work(struct irq_work *work); + static void ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) { @@ -241,6 +243,16 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) INIT_LIST_HEAD(&rb->event_list); spin_lock_init(&rb->event_lock); + init_irq_work(&rb->irq_work, rb_irq_work); +} + +static void ring_buffer_put_async(struct ring_buffer *rb) +{ + if (!atomic_dec_and_test(&rb->refcount)) + return; + + rb->rcu_head.next = (void *)rb; + irq_work_queue(&rb->irq_work); } /* @@ -319,7 +331,7 @@ err_put: rb_free_aux(rb); err: - ring_buffer_put(rb); + ring_buffer_put_async(rb); handle->event = NULL; return NULL; @@ -370,7 +382,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, local_set(&rb->aux_nest, 0); rb_free_aux(rb); - ring_buffer_put(rb); + ring_buffer_put_async(rb); } /* @@ -557,7 +569,18 @@ static void __rb_free_aux(struct ring_buffer *rb) void rb_free_aux(struct ring_buffer *rb) { if (atomic_dec_and_test(&rb->aux_refcount)) + irq_work_queue(&rb->irq_work); +} + +static void rb_irq_work(struct irq_work *work) +{ + struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work); + + if (!atomic_read(&rb->aux_refcount)) __rb_free_aux(rb); + + if (rb->rcu_head.next == (void *)rb) + call_rcu(&rb->rcu_head, rb_free_rcu); } #ifndef CONFIG_PERF_USE_VMALLOC From ebf2d2689de551d90965090bb991fc640a0c0d41 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 22 Jun 2015 21:38:43 +0200 Subject: [PATCH 644/839] perf/x86: Fix copy_from_user_nmi() return if range is not ok Commit 0a196848ca36 ("perf: Fix arch_perf_out_copy_user default"), changes copy_from_user_nmi() to return the number of remaining bytes so that it behave like copy_from_user(). Unfortunately, when the range is outside of the process memory, the return value is still the number of byte copied, eg. 0, instead of the remaining bytes. As all users of copy_from_user_nmi() were modified as part of commit 0a196848ca36, the function should be fixed to return the total number of bytes if range is not correct. Signed-off-by: Yann Droneaud Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435001923-30986-1-git-send-email-ydroneaud@opteya.com Signed-off-by: Ingo Molnar --- arch/x86/lib/usercopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index ddf9ecb53cc3..e342586db6e4 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -20,7 +20,7 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) unsigned long ret; if (__range_not_ok(from, n, TASK_SIZE)) - return 0; + return n; /* * Even though this function is typically called from NMI/IRQ context From 5c250adb51ca318b67ac3a230a565dbe2ea9f0ef Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 11 Jun 2015 16:18:38 +0200 Subject: [PATCH 645/839] Revert "ARM: dts: am335x-boneblack: disable RTC-only sleep" This reverts commit 3d76be5b933e2a66d85a2f7444e68e99e8a48ad4. The latest revision of Beaglebone Black does not support RTC-only mode. To avoid potential hardware damage, RTC-only mode was disabled by default by commit 7a6cb0abe1aa ("ARM: dts: am335x-boneblack: disable RTC-only sleep to avoid hardware damage"). Unfortunately, an incorrect fix had already been applied, which instead of just disabling RTC-only mode, prevents the Beaglebone from powering down at all. Revert this patch to fix the power-off regression. Signed-off-by: Johan Hovold Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-boneblack.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts index 901739fcb85a..5c42d259fa68 100644 --- a/arch/arm/boot/dts/am335x-boneblack.dts +++ b/arch/arm/boot/dts/am335x-boneblack.dts @@ -80,3 +80,7 @@ status = "okay"; }; }; + +&rtc { + system-power-controller; +}; From fff75ee150104fdc29d86a75a5511482a981c27b Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Wed, 6 May 2015 12:25:33 -0500 Subject: [PATCH 646/839] ARM: dts: am4372: Add emif node Add node for TI AM4372 EMIF. Without this we get a warning with the recent commit fabbe6df (ARM: OMAP: AM43xx hwmod: Add data for am43xx emif hwmod). Signed-off-by: Dave Gerlach Tested-by: Felipe Balbi Acked-by: Felipe Balbi [tony@atomide.com: updated comments] Signed-off-by: Tony Lindgren --- .../devicetree/bindings/memory-controllers/ti/emif.txt | 1 + arch/arm/boot/dts/am4372.dtsi | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt index 938f8e1ba205..0db60470ebb6 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt +++ b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt @@ -8,6 +8,7 @@ of the EMIF IP and memory parts attached to it. Required properties: - compatible : Should be of the form "ti,emif-" where is the IP revision of the specific EMIF instance. + For am437x should be ti,emif-am4372. - phy-type : indicating the DDR phy type. Following are the allowed values diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index c80a3e233792..9521a3827021 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -132,6 +132,12 @@ }; }; + emif: emif@4c000000 { + compatible = "ti,emif-am4372"; + reg = <0x4c000000 0x1000000>; + ti,hwmods = "emif"; + }; + edma: edma@49000000 { compatible = "ti,edma3"; ti,hwmods = "tpcc", "tptc0", "tptc1", "tptc2"; From 9ab402aed38b95d9ce453108622be0fc6f167568 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 17 Jun 2015 17:52:43 +0300 Subject: [PATCH 647/839] ARM: dts: am57xx-beagle-x15: Provide supply for usb2_phy2 Without this USB2 breaks if USB1 is disabled or USB1 initializes after USB2 e.g. due to deferred probing. Fixes: 5a0f93c6576a ("ARM: dts: Add am57xx-beagle-x15") Signed-off-by: Roger Quadros Cc: stable@vger.kernel.org (v3.19+) Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am57xx-beagle-x15.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts index a42cc377a862..a63bf78191ea 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts @@ -605,6 +605,10 @@ phy-supply = <&ldousb_reg>; }; +&usb2_phy2 { + phy-supply = <&ldousb_reg>; +}; + &usb1 { dr_mode = "host"; pinctrl-names = "default"; From 22a5dc10e3f8fb8370748ea19dc4e3e1620d8296 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 30 Jun 2015 15:04:54 +0300 Subject: [PATCH 648/839] ARM: dts: am4372.dtsi: disable rfbi When DSS nodes were added to am4372.dtsi, the rfbi node was not marked as disabled. This should have been done, as the rule of thumb is to disable all DSS nodes that are not used, and especially rfbi, as we don't have a driver for rfbi. Signed-off-by: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am4372.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index 9521a3827021..ade28c790f4b 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -947,6 +947,7 @@ ti,hwmods = "dss_rfbi"; clocks = <&disp_clk>; clock-names = "fck"; + status = "disabled"; }; }; From d0f77d4d04b222a817925d33ba3589b190bfa863 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:33 +0300 Subject: [PATCH 649/839] x86/init: Clear 'init_level4_pgt' earlier Currently x86_64_start_kernel() has two KASAN related function calls. The first call maps shadow to early_level4_pgt, the second maps shadow to init_level4_pgt. If we move clear_page(init_level4_pgt) earlier, we could hide KASAN low level detail from generic x86_64 initialization code. The next patch will do it. Signed-off-by: Andrey Ryabinin Cc: # 4.0+ Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-2-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 5a4668136e98..65c8985e3eb2 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -166,6 +166,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); + clear_page(init_level4_pgt); + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) set_intr_gate(i, early_idt_handler_array[i]); load_idt((const struct desc_ptr *)&idt_descr); @@ -177,7 +179,6 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) */ load_ucode_bsp(); - clear_page(init_level4_pgt); /* set init_level4_pgt kernel high mapping*/ init_level4_pgt[511] = early_level4_pgt[511]; From 5d5aa3cfca5cf74cd928daf3674642e6004328d1 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Thu, 2 Jul 2015 12:09:34 +0300 Subject: [PATCH 650/839] x86/kasan: Fix KASAN shadow region page tables Currently KASAN shadow region page tables created without respect of physical offset (phys_base). This causes kernel halt when phys_base is not zero. So let's initialize KASAN shadow region page tables in kasan_early_init() using __pa_nodebug() which considers phys_base. This patch also separates x86_64_start_kernel() from KASAN low level details by moving kasan_map_early_shadow(init_level4_pgt) into kasan_early_init(). Remove the comment before clear_bss() which stopped bringing much profit to the code readability. Otherwise describing all the new order dependencies would be too verbose. Signed-off-by: Alexander Popov Signed-off-by: Andrey Ryabinin Cc: # 4.0+ Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-3-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kasan.h | 8 ++------ arch/x86/kernel/head64.c | 7 ++----- arch/x86/kernel/head_64.S | 29 ----------------------------- arch/x86/mm/kasan_init_64.c | 36 ++++++++++++++++++++++++++++++++++-- 4 files changed, 38 insertions(+), 42 deletions(-) diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 8b22422fbad8..74a2a8dc9908 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -14,15 +14,11 @@ #ifndef __ASSEMBLY__ -extern pte_t kasan_zero_pte[]; -extern pte_t kasan_zero_pmd[]; -extern pte_t kasan_zero_pud[]; - #ifdef CONFIG_KASAN -void __init kasan_map_early_shadow(pgd_t *pgd); +void __init kasan_early_init(void); void __init kasan_init(void); #else -static inline void kasan_map_early_shadow(pgd_t *pgd) { } +static inline void kasan_early_init(void) { } static inline void kasan_init(void) { } #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 65c8985e3eb2..f129a9af6357 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -161,13 +161,12 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* Kill off the identity-map trampoline */ reset_early_page_tables(); - kasan_map_early_shadow(early_level4_pgt); - - /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); clear_page(init_level4_pgt); + kasan_early_init(); + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) set_intr_gate(i, early_idt_handler_array[i]); load_idt((const struct desc_ptr *)&idt_descr); @@ -182,8 +181,6 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* set init_level4_pgt kernel high mapping*/ init_level4_pgt[511] = early_level4_pgt[511]; - kasan_map_early_shadow(init_level4_pgt); - x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index e5c27f729a38..1d40ca8a73f2 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -516,38 +516,9 @@ ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ .quad 0x0000000000000000 -#ifdef CONFIG_KASAN -#define FILL(VAL, COUNT) \ - .rept (COUNT) ; \ - .quad (VAL) ; \ - .endr - -NEXT_PAGE(kasan_zero_pte) - FILL(kasan_zero_page - __START_KERNEL_map + _KERNPG_TABLE, 512) -NEXT_PAGE(kasan_zero_pmd) - FILL(kasan_zero_pte - __START_KERNEL_map + _KERNPG_TABLE, 512) -NEXT_PAGE(kasan_zero_pud) - FILL(kasan_zero_pmd - __START_KERNEL_map + _KERNPG_TABLE, 512) - -#undef FILL -#endif - - #include "../../x86/xen/xen-head.S" __PAGE_ALIGNED_BSS NEXT_PAGE(empty_zero_page) .skip PAGE_SIZE -#ifdef CONFIG_KASAN -/* - * This page used as early shadow. We don't use empty_zero_page - * at early stages, stack instrumentation could write some garbage - * to this page. - * Latter we reuse it as zero shadow for large ranges of memory - * that allowed to access, but not instrumented by kasan - * (vmalloc/vmemmap ...). - */ -NEXT_PAGE(kasan_zero_page) - .skip PAGE_SIZE -#endif diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 4860906c6b9f..0e4a05fa34d7 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -11,7 +11,19 @@ extern pgd_t early_level4_pgt[PTRS_PER_PGD]; extern struct range pfn_mapped[E820_X_MAX]; -extern unsigned char kasan_zero_page[PAGE_SIZE]; +static pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; +static pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss; +static pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss; + +/* + * This page used as early shadow. We don't use empty_zero_page + * at early stages, stack instrumentation could write some garbage + * to this page. + * Latter we reuse it as zero shadow for large ranges of memory + * that allowed to access, but not instrumented by kasan + * (vmalloc/vmemmap ...). + */ +static unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; static int __init map_range(struct range *range) { @@ -36,7 +48,7 @@ static void __init clear_pgds(unsigned long start, pgd_clear(pgd_offset_k(start)); } -void __init kasan_map_early_shadow(pgd_t *pgd) +static void __init kasan_map_early_shadow(pgd_t *pgd) { int i; unsigned long start = KASAN_SHADOW_START; @@ -166,6 +178,26 @@ static struct notifier_block kasan_die_notifier = { }; #endif +void __init kasan_early_init(void) +{ + int i; + pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL; + pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE; + pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE; + + for (i = 0; i < PTRS_PER_PTE; i++) + kasan_zero_pte[i] = __pte(pte_val); + + for (i = 0; i < PTRS_PER_PMD; i++) + kasan_zero_pmd[i] = __pmd(pmd_val); + + for (i = 0; i < PTRS_PER_PUD; i++) + kasan_zero_pud[i] = __pud(pud_val); + + kasan_map_early_shadow(early_level4_pgt); + kasan_map_early_shadow(init_level4_pgt); +} + void __init kasan_init(void) { int i; From 241d2c54c62fa0939fc9a9512b48ac3434e90a89 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:35 +0300 Subject: [PATCH 651/839] x86/kasan: Flush TLBs after switching CR3 load_cr3() doesn't cause tlb_flush if PGE enabled. This may cause tons of false positive reports spamming the kernel to death. To fix this __flush_tlb_all() should be called explicitly after CR3 changed. Signed-off-by: Andrey Ryabinin Cc: # 4.0+ Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-4-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/mm/kasan_init_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 0e4a05fa34d7..5d26642e4e8a 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -208,6 +208,7 @@ void __init kasan_init(void) memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt)); load_cr3(early_level4_pgt); + __flush_tlb_all(); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); @@ -234,5 +235,6 @@ void __init kasan_init(void) memset(kasan_zero_page, 0, PAGE_SIZE); load_cr3(init_level4_pgt); + __flush_tlb_all(); init_task.kasan_depth = 0; } From d4f86beacc21d538dc41e1fc75a22e084f547edf Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:36 +0300 Subject: [PATCH 652/839] x86/kasan: Fix boot crash on AMD processors While populating zero shadow wrong bits in upper level page tables used. __PAGE_KERNEL_RO that was used for pgd/pud/pmd has _PAGE_BIT_GLOBAL set. Global bit is present only in the lowest level of the page translation hierarchy (ptes), and it should be zero in upper levels. This bug seems doesn't cause any troubles on Intel cpus, while on AMDs it cause kernel crash on boot. Use _KERNPG_TABLE bits for pgds/puds/pmds to fix this. Reported-by: Borislav Petkov Signed-off-by: Andrey Ryabinin Cc: # 4.0+ Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-5-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/mm/kasan_init_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 5d26642e4e8a..9a54dbe98064 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -85,7 +85,7 @@ static int __init zero_pmd_populate(pud_t *pud, unsigned long addr, while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) { WARN_ON(!pmd_none(*pmd)); set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PMD_SIZE; pmd = pmd_offset(pud, addr); } @@ -111,7 +111,7 @@ static int __init zero_pud_populate(pgd_t *pgd, unsigned long addr, while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) { WARN_ON(!pud_none(*pud)); set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PUD_SIZE; pud = pud_offset(pgd, addr); } @@ -136,7 +136,7 @@ static int __init zero_pgd_populate(unsigned long addr, unsigned long end) while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) { WARN_ON(!pgd_none(*pgd)); set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PGDIR_SIZE; pgd = pgd_offset_k(addr); } From 8515522949951d81fe2d06c0a3292f171f2b8ec4 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:37 +0300 Subject: [PATCH 653/839] x86/kasan: Add message about KASAN being initialized Print informational message to tell user that kernel runs with KASAN enabled. Add a "kasan: " prefix to all messages in kasan_init_64.c. Signed-off-by: Andrey Ryabinin Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-6-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/mm/kasan_init_64.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 9a54dbe98064..e1840f3db5b5 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -1,3 +1,4 @@ +#define pr_fmt(fmt) "kasan: " fmt #include #include #include @@ -237,4 +238,6 @@ void __init kasan_init(void) load_cr3(init_level4_pgt); __flush_tlb_all(); init_task.kasan_depth = 0; + + pr_info("Kernel address sanitizer initialized\n"); } From d6f2d75a7ae06ffd793bb504c4f0d1665548cffc Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:38 +0300 Subject: [PATCH 654/839] x86/kasan: Move KASAN_SHADOW_OFFSET to the arch Kconfig KASAN_SHADOW_OFFSET is purely arch specific setting, so it should be in arch's Kconfig file. Signed-off-by: Andrey Ryabinin Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Paul Bolle Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-7-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 5 +++++ lib/Kconfig.kasan | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 55bced17dc95..0b2929f49531 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -254,6 +254,11 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0xdffffc0000000000 + config HAVE_INTEL_TXT def_bool y depends on INTEL_IOMMU && ACPI diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 777eda7d1ab4..39f24d6721e5 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -18,10 +18,6 @@ config KASAN For better error detection enable CONFIG_STACKTRACE, and add slub_debug=U to boot cmdline. -config KASAN_SHADOW_OFFSET - hex - default 0xdffffc0000000000 if X86_64 - choice prompt "Instrumentation type" depends on KASAN From 1db875631f8d5bbf497f67e47f254eece888d51d Mon Sep 17 00:00:00 2001 From: Zhu Guihua Date: Fri, 3 Jul 2015 17:37:18 +0800 Subject: [PATCH 655/839] x86/espfix: Add 'cpu' parameter to init_espfix_ap() Add a CPU index parameter to init_espfix_ap(), so that the parameter could be propagated to the function for espfix page allocation. Signed-off-by: Zhu Guihua Cc: Cc: Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/cde3fcf1b3211f3f03feb1a995bce3fee850f0fc.1435824469.git.zhugh.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/espfix.h | 2 +- arch/x86/kernel/espfix_64.c | 7 +++---- arch/x86/kernel/smpboot.c | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h index 99efebb2f69d..ca3ce9ab9385 100644 --- a/arch/x86/include/asm/espfix.h +++ b/arch/x86/include/asm/espfix.h @@ -9,7 +9,7 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); extern void init_espfix_bsp(void); -extern void init_espfix_ap(void); +extern void init_espfix_ap(int cpu); #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index f5d0730e7b08..1fb0e0003c94 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -131,12 +131,12 @@ void __init init_espfix_bsp(void) init_espfix_random(); /* The rest is the same as for any other processor */ - init_espfix_ap(); + init_espfix_ap(0); } -void init_espfix_ap(void) +void init_espfix_ap(int cpu) { - unsigned int cpu, page; + unsigned int page; unsigned long addr; pud_t pud, *pud_p; pmd_t pmd, *pmd_p; @@ -149,7 +149,6 @@ void init_espfix_ap(void) if (likely(this_cpu_read(espfix_stack))) return; /* Already initialized */ - cpu = smp_processor_id(); addr = espfix_base_addr(cpu); page = cpu/ESPFIX_STACKS_PER_PAGE; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8add66b22f33..6f5abac6c28b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -242,7 +242,7 @@ static void notrace start_secondary(void *unused) * Enable the espfix hack for this CPU */ #ifdef CONFIG_X86_ESPFIX64 - init_espfix_ap(); + init_espfix_ap(smp_processor_id()); #endif /* From 20d5e4a9cd453991e2590a4c25230a99b42dee0c Mon Sep 17 00:00:00 2001 From: Zhu Guihua Date: Fri, 3 Jul 2015 17:37:19 +0800 Subject: [PATCH 656/839] x86/espfix: Init espfix on the boot CPU side As we alloc pages with GFP_KERNEL in init_espfix_ap() which is called before we enable local irqs, so the lockdep sub-system would (correctly) trigger a warning about the potentially blocking API. So we allocate them on the boot CPU side when the secondary CPU is brought up by the boot CPU, and hand them over to the secondary CPU. And we use alloc_pages_node() with the secondary CPU's node, to make sure the espfix stack is NUMA-local to the CPU that is going to use it. Signed-off-by: Zhu Guihua Cc: Cc: Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c97add2670e9abebb90095369f0cfc172373ac94.1435824469.git.zhugh.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/espfix_64.c | 21 +++++++++++++-------- arch/x86/kernel/smpboot.c | 14 +++++++------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 1fb0e0003c94..ce95676abd60 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -141,12 +141,12 @@ void init_espfix_ap(int cpu) pud_t pud, *pud_p; pmd_t pmd, *pmd_p; pte_t pte, *pte_p; - int n; + int n, node; void *stack_page; pteval_t ptemask; /* We only have to do this once... */ - if (likely(this_cpu_read(espfix_stack))) + if (likely(per_cpu(espfix_stack, cpu))) return; /* Already initialized */ addr = espfix_base_addr(cpu); @@ -164,12 +164,15 @@ void init_espfix_ap(int cpu) if (stack_page) goto unlock_done; + node = cpu_to_node(cpu); ptemask = __supported_pte_mask; pud_p = &espfix_pud_page[pud_index(addr)]; pud = *pud_p; if (!pud_present(pud)) { - pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); + struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0); + + pmd_p = (pmd_t *)page_address(page); pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PUD_CLONES; n++) @@ -179,7 +182,9 @@ void init_espfix_ap(int cpu) pmd_p = pmd_offset(&pud, addr); pmd = *pmd_p; if (!pmd_present(pmd)) { - pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); + struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0); + + pte_p = (pte_t *)page_address(page); pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PMD_CLONES; n++) @@ -187,7 +192,7 @@ void init_espfix_ap(int cpu) } pte_p = pte_offset_kernel(&pmd, addr); - stack_page = (void *)__get_free_page(GFP_KERNEL); + stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0)); pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); for (n = 0; n < ESPFIX_PTE_CLONES; n++) set_pte(&pte_p[n*PTE_STRIDE], pte); @@ -198,7 +203,7 @@ void init_espfix_ap(int cpu) unlock_done: mutex_unlock(&espfix_init_mutex); done: - this_cpu_write(espfix_stack, addr); - this_cpu_write(espfix_waddr, (unsigned long)stack_page - + (addr & ~PAGE_MASK)); + per_cpu(espfix_stack, cpu) = addr; + per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page + + (addr & ~PAGE_MASK); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6f5abac6c28b..0bd8c1d507b3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -238,13 +238,6 @@ static void notrace start_secondary(void *unused) */ check_tsc_sync_target(); - /* - * Enable the espfix hack for this CPU - */ -#ifdef CONFIG_X86_ESPFIX64 - init_espfix_ap(smp_processor_id()); -#endif - /* * We need to hold vector_lock so there the set of online cpus * does not change while we are assigning vectors to cpus. Holding @@ -854,6 +847,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) initial_code = (unsigned long)start_secondary; stack_start = idle->thread.sp; + /* + * Enable the espfix hack for this CPU + */ +#ifdef CONFIG_X86_ESPFIX64 + init_espfix_ap(cpu); +#endif + /* So we see what's up */ announce_cpu(cpu, apicid); From 827a82ff399523a954253dfea401af748640f0f4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 6 Jul 2015 10:14:34 -0400 Subject: [PATCH 657/839] x86/earlyprintk: Allow early_printk() to use console style parameters like '115200n8' When I enable early_printk on a kernel, I cut and paste the console= input and add to earlyprintk parameter. But I notice recently that ktest has not been detecting triple faults. The way it detects it, is by seeing the kernel banner "Linux version .." with a different kernel version pop up. Then I noticed that early printk was no longer working on my console, which was why ktest was not seeing it. I bisected it down and it was added to 4.0 with this commit: ea9e9d802902 ("Specify PCI based UART for earlyprintk") because it converted the simple_strtoul() that converts the baud number into a kstrtoul(). The problem with this is, I had as my baud rate, 115200n8 (acceptable for console=ttyS0), but because of the "n8", the kstrtoul() doesn't parse the baud rate and returns an error, which sets the baud rate to the default 9600. This explains the garbage on my screen. Now, earlyprintk= kernel parameter does not say it accepts that format. Thus, one answer would simply be me changing my kernel parameters to remove the "n8" since it isn't parsed anyway. But I wonder if other people run into this, and it seems strange that the two consoles for serial accepts different input. I could also extend this to have earlyprintk do something with that "n8" or whatever it has and have it match the console parsing (which, BTW, still uses simple_strtoul(), as I guess it has to). This patch just makes my old kernel parameter parsing work like it use to. Although, simple_strtoull() is considered obsolete, it is the only standard string parsing function that parses a number that is attached to text. Ironically, commit ea9e9d802902 also added several calls to simple_strtoul()! Signed-off-by: Steven Rostedt Cc: Alan Cox Cc: Andy Lutomirski Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Brian Gerst Cc: David Cohen Cc: Denys Vlasenko Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stuart R. Anderson Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150706101434.5f6a351b@gandalf.local.home [ Cleaned it up a bit. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 89427d8d4fc5..eec40f595ab9 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -175,7 +175,9 @@ static __init void early_serial_init(char *s) } if (*s) { - if (kstrtoul(s, 0, &baud) < 0 || baud == 0) + baud = simple_strtoull(s, &e, 0); + + if (baud == 0 || s == e) baud = DEFAULT_BAUD; } From 4b59246d9aeaac71f5f7a29459cd2abe32c527e6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 6 Jul 2015 17:08:09 +0200 Subject: [PATCH 658/839] arm64: remove another unnecessary libfdt include path Patch 63a4aea55670 ("of: clean-up unnecessary libfdt include paths") removed all explicit libfdt include paths, since those are no longer necessary after the latest dtc upgrade. However, this one snuck in during the same merge window. Remove it. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/mm/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 9d84feb41a16..773d37a14039 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,5 +4,3 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_ARM64_PTDUMP) += dump.o - -CFLAGS_mmu.o := -I$(srctree)/scripts/dtc/libfdt/ From 3446af31b721d839c69f8fc70b8a434df6176a64 Mon Sep 17 00:00:00 2001 From: Suneel Garapati Date: Mon, 29 Jun 2015 07:02:08 +0200 Subject: [PATCH 659/839] arm64: defconfig: Add Ceva ahci to the defconfig The Ceva ahci controller is available on the Xilinx Zynq UltraScale+ MPSoC. Signed-off-by: Suneel Garapati Signed-off-by: Michal Simek [catalin.marinas@arm.com: removed unnecessary defconfig changes] Signed-off-by: Catalin Marinas --- arch/arm64/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index f38c94f1d898..4e17e7ede33d 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -83,6 +83,7 @@ CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_AHCI_CEVA=y CONFIG_AHCI_XGENE=y CONFIG_PATA_PLATFORM=y CONFIG_PATA_OF_PLATFORM=y From 6d451367bfa16fc103604bacd258f534c65d1540 Mon Sep 17 00:00:00 2001 From: Hai Li Date: Thu, 25 Jun 2015 18:35:33 -0400 Subject: [PATCH 660/839] clk: qcom: Use parent rate when set rate to pixel RCG clock Since the parent rate has been recalculated, pixel RCG clock should rely on it to find the correct M/N values during set_rate, instead of calling __clk_round_rate() to its parent again. Signed-off-by: Hai Li Tested-by: Archit Taneja Fixes: 99cbd064b059 ("clk: qcom: Support display RCG clocks") [sboyd@codeaurora.org: Silenced unused parent variable warning] Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-rcg2.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index b95d17fbb8d7..92936f0912d2 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -530,19 +530,16 @@ static int clk_pixel_set_rate(struct clk_hw *hw, unsigned long rate, struct clk_rcg2 *rcg = to_clk_rcg2(hw); struct freq_tbl f = *rcg->freq_tbl; const struct frac_entry *frac = frac_table_pixel; - unsigned long request, src_rate; + unsigned long request; int delta = 100000; u32 mask = BIT(rcg->hid_width) - 1; u32 hid_div; - int index = qcom_find_src_index(hw, rcg->parent_map, f.src); - struct clk *parent = clk_get_parent_by_index(hw->clk, index); for (; frac->num; frac++) { request = (rate * frac->den) / frac->num; - src_rate = __clk_round_rate(parent, request); - if ((src_rate < (request - delta)) || - (src_rate > (request + delta))) + if ((parent_rate < (request - delta)) || + (parent_rate > (request + delta))) continue; regmap_read(rcg->clkr.regmap, rcg->cmd_rcgr + CFG_REG, From c14bada8f71eef63d4465aa8e3a479104a06e7c7 Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Tue, 23 Jun 2015 16:09:21 +0200 Subject: [PATCH 661/839] drivers: clk: st: Remove unused code Remove this duplicated code due to a bad copy / paste. Signed-off-by: Gabriel Fernandez Signed-off-by: Stephen Boyd --- drivers/clk/st/clkgen-fsyn.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index e94197f04b0b..099ed6050d25 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -1082,10 +1082,6 @@ static const struct of_device_id quadfs_of_match[] = { .compatible = "st,stih407-quadfs660-D", .data = &st_fs660c32_D_407 }, - { - .compatible = "st,stih407-quadfs660-D", - .data = (void *)&st_fs660c32_D_407 - }, {} }; From c4d339c69f91c20def1366c1db8af6ac7a3013a9 Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Tue, 23 Jun 2015 16:09:22 +0200 Subject: [PATCH 662/839] drivers: clk: st: Fix FSYN channel values This patch fixes the value for disabling the FSYN channel clock. The 'is_enabled' returned value is also fixed. Signed-off-by: Pankaj Dev Signed-off-by: Gabriel Fernandez Signed-off-by: Stephen Boyd --- drivers/clk/st/clkgen-fsyn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index 099ed6050d25..95851c464478 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -489,7 +489,7 @@ static int quadfs_pll_is_enabled(struct clk_hw *hw) struct st_clk_quadfs_pll *pll = to_quadfs_pll(hw); u32 npda = CLKGEN_READ(pll, npda); - return !!npda; + return pll->data->powerup_polarity ? !npda : !!npda; } static int clk_fs660c32_vco_get_rate(unsigned long input, struct stm_fs *fs, @@ -774,7 +774,7 @@ static void quadfs_fsynth_disable(struct clk_hw *hw) if (fs->lock) spin_lock_irqsave(fs->lock, flags); - CLKGEN_WRITE(fs, nsb[fs->chan], !fs->data->standby_polarity); + CLKGEN_WRITE(fs, nsb[fs->chan], fs->data->standby_polarity); if (fs->lock) spin_unlock_irqrestore(fs->lock, flags); From 0f4f2afd4402883a51ad27a1d9e046643bb1e3cb Mon Sep 17 00:00:00 2001 From: Giuseppe Cavallaro Date: Tue, 23 Jun 2015 16:09:23 +0200 Subject: [PATCH 663/839] drivers: clk: st: Fix flexgen lock init While proving lock, the following warning happens and it is fixed after initializing lock in the setup function INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.10.27-02861-g39df285-dirty #33 [] (unwind_backtrace+0x0/0xf4) from [] (show_stack+0x10/0x14) [] (show_stack+0x10/0x14) from [] (__lock_acquire+0x900/0xb14) [] (__lock_acquire+0x900/0xb14) from [] (lock_acquire+0x68/0x7c) [] (lock_acquire+0x68/0x7c) from [] (_raw_spin_lock_irqsave+0x48/0x5c) [] (_raw_spin_lock_irqsave+0x48/0x5c) from [] (clk_gate_endisable+0x28/0x88) [] (clk_gate_endisable+0x28/0x88) from [] (clk_gate_enable+0xc/0x14) [] (clk_gate_enable+0xc/0x14) from [] (flexgen_enable+0x28/0x40) [] (flexgen_enable+0x28/0x40) from [] (__clk_enable+0x5c/0x9c) [] (__clk_enable+0x5c/0x9c) from [] (clk_enable+0x18/0x2c) [] (clk_enable+0x18/0x2c) from [] (st_lpc_of_register+0xc0/0x248) [] (st_lpc_of_register+0xc0/0x248) from [] (clocksource_of_init+0x34/0x58) [] (clocksource_of_init+0x34/0x58) from [] (sti_timer_init+0x10/0x18) [] (sti_timer_init+0x10/0x18) from [] (time_init+0x20/0x30) [] (time_init+0x20/0x30) from [] (start_kernel+0x20c/0x2e8) [] (start_kernel+0x20c/0x2e8) from [<40008074>] (0x40008074) Signed-off-by: Giuseppe Cavallaro Signed-off-by: Gabriel Fernandez Fixes: b116517055b7 ("clk: st: STiH407: Support for Flexgen Clocks") Signed-off-by: Stephen Boyd --- drivers/clk/st/clk-flexgen.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/st/clk-flexgen.c b/drivers/clk/st/clk-flexgen.c index 657ca14ba709..be06d2a79dce 100644 --- a/drivers/clk/st/clk-flexgen.c +++ b/drivers/clk/st/clk-flexgen.c @@ -303,6 +303,8 @@ static void __init st_of_flexgen_setup(struct device_node *np) if (!rlock) goto err; + spin_lock_init(rlock); + for (i = 0; i < clk_data->clk_num; i++) { struct clk *clk; const char *clk_name; From 18fee4538fe534c53fa95fe9eaa7f96586814e0a Mon Sep 17 00:00:00 2001 From: Pankaj Dev Date: Tue, 23 Jun 2015 16:09:24 +0200 Subject: [PATCH 664/839] drivers: clk: st: Add CLK_GET_RATE_NOCACHE flag to clocks Add the CLK_GET_RATE_NOCACHE flag to all the clocks with recalc ops, so that they reflect Hw rate after CPS wake-up when a clk_get_rate() is called Signed-off-by: Pankaj Dev Signed-off-by: Gabriel Fernandez Signed-off-by: Stephen Boyd --- drivers/clk/st/clk-flexgen.c | 2 +- drivers/clk/st/clkgen-fsyn.c | 2 +- drivers/clk/st/clkgen-mux.c | 8 +++++--- drivers/clk/st/clkgen-pll.c | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/clk/st/clk-flexgen.c b/drivers/clk/st/clk-flexgen.c index be06d2a79dce..8dd8cce27361 100644 --- a/drivers/clk/st/clk-flexgen.c +++ b/drivers/clk/st/clk-flexgen.c @@ -190,7 +190,7 @@ static struct clk *clk_register_flexgen(const char *name, init.name = name; init.ops = &flexgen_ops; - init.flags = CLK_IS_BASIC | flexgen_flags; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE | flexgen_flags; init.parent_names = parent_names; init.num_parents = num_parents; diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index 95851c464478..99c98c15d4a4 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -635,7 +635,7 @@ static struct clk * __init st_clk_register_quadfs_pll( init.name = name; init.ops = quadfs->pll_ops; - init.flags = CLK_IS_BASIC; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE; init.parent_names = &parent_name; init.num_parents = 1; diff --git a/drivers/clk/st/clkgen-mux.c b/drivers/clk/st/clkgen-mux.c index 4fbe6e099587..bd30f8d4e902 100644 --- a/drivers/clk/st/clkgen-mux.c +++ b/drivers/clk/st/clkgen-mux.c @@ -237,7 +237,7 @@ static struct clk *clk_register_genamux(const char *name, init.name = name; init.ops = &clkgena_divmux_ops; - init.flags = CLK_IS_BASIC; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE; init.parent_names = parent_names; init.num_parents = num_parents; @@ -513,7 +513,8 @@ static void __init st_of_clkgena_prediv_setup(struct device_node *np) 0, &clk_name)) return; - clk = clk_register_divider_table(NULL, clk_name, parent_name, 0, + clk = clk_register_divider_table(NULL, clk_name, parent_name, + CLK_GET_RATE_NOCACHE, reg + data->offset, data->shift, 1, 0, data->table, NULL); if (IS_ERR(clk)) @@ -786,7 +787,8 @@ static void __init st_of_clkgen_vcc_setup(struct device_node *np) &mux->hw, &clk_mux_ops, &div->hw, &clk_divider_ops, &gate->hw, &clk_gate_ops, - data->clk_flags); + data->clk_flags | + CLK_GET_RATE_NOCACHE); if (IS_ERR(clk)) { kfree(gate); kfree(div); diff --git a/drivers/clk/st/clkgen-pll.c b/drivers/clk/st/clkgen-pll.c index 106532207213..72d1c27eaffa 100644 --- a/drivers/clk/st/clkgen-pll.c +++ b/drivers/clk/st/clkgen-pll.c @@ -406,7 +406,7 @@ static struct clk * __init clkgen_pll_register(const char *parent_name, init.name = clk_name; init.ops = pll_data->ops; - init.flags = CLK_IS_BASIC; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE; init.parent_names = &parent_name; init.num_parents = 1; From 3be6d8ce639d92e60d144fb99dd74a53fe3799bb Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Tue, 23 Jun 2015 16:09:25 +0200 Subject: [PATCH 665/839] drivers: clk: st: Fix mux bit-setting for Cortex A9 clocks This patch fixes the mux bit-setting for ClockgenA9. Signed-off-by: Gabriel Fernandez Fixes: 13e6f2da1ddf ("clk: st: STiH407: Support for A9 MUX Clocks") Signed-off-by: Stephen Boyd --- drivers/clk/st/clkgen-mux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/st/clkgen-mux.c b/drivers/clk/st/clkgen-mux.c index bd30f8d4e902..717c4a91a17b 100644 --- a/drivers/clk/st/clkgen-mux.c +++ b/drivers/clk/st/clkgen-mux.c @@ -583,7 +583,7 @@ static struct clkgen_mux_data stih416_a9_mux_data = { }; static struct clkgen_mux_data stih407_a9_mux_data = { .offset = 0x1a4, - .shift = 1, + .shift = 0, .width = 2, }; From 0294112ee3135fbd15eaa70015af8283642dd970 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 4 Jul 2015 03:09:03 +0200 Subject: [PATCH 666/839] ACPI / PNP: Reserve ACPI resources at the fs_initcall_sync stage This effectively reverts the following three commits: 7bc10388ccdd ACPI / resources: free memory on error in add_region_before() 0f1b414d1907 ACPI / PNP: Avoid conflicting resource reservations b9a5e5e18fbf ACPI / init: Fix the ordering of acpi_reserve_resources() (commit b9a5e5e18fbf introduced regressions some of which, but not all, were addressed by commit 0f1b414d1907 and commit 7bc10388ccdd was a fixup on top of the latter) and causes ACPI fixed hardware resources to be reserved at the fs_initcall_sync stage of system initialization. The story is as follows. First, a boot regression was reported due to an apparent resource reservation ordering change after a commit that shouldn't lead to such changes. Investigation led to the conclusion that the problem happened because acpi_reserve_resources() was executed at the device_initcall() stage of system initialization which wasn't strictly ordered with respect to driver initialization (and with respect to the initialization of the pcieport driver in particular), so a random change causing the device initcalls to be run in a different order might break things. The response to that was to attempt to run acpi_reserve_resources() as soon as we knew that ACPI would be in use (commit b9a5e5e18fbf). However, that turned out to be too early, because it caused resource reservations made by the PNP system driver to fail on at least one system and that failure was addressed by commit 0f1b414d1907. That fix still turned out to be insufficient, though, because calling acpi_reserve_resources() before the fs_initcall stage of system initialization caused a boot regression to happen on the eCAFE EC-800-H20G/S netbook. That meant that we only could call acpi_reserve_resources() at the fs_initcall initialization stage or later, but then we might just as well call it after the PNP initalization in which case commit 0f1b414d1907 wouldn't be necessary any more. For this reason, the changes made by commit 0f1b414d1907 are reverted (along with a memory leak fixup on top of that commit), the changes made by commit b9a5e5e18fbf that went too far are reverted too and acpi_reserve_resources() is changed into fs_initcall_sync, which will cause it to be executed after the PNP subsystem initialization (which is an fs_initcall) and before device initcalls (including the pcieport driver initialization) which should avoid the initial issue. Link: https://bugzilla.kernel.org/show_bug.cgi?id=100581 Link: http://marc.info/?t=143092384600002&r=1&w=2 Link: https://bugzilla.kernel.org/show_bug.cgi?id=99831 Link: http://marc.info/?t=143389402600001&r=1&w=2 Fixes: b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" Reported-by: Roland Dreier Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/acpi/osl.c | 12 ++- drivers/acpi/resource.c | 162 ---------------------------------------- drivers/pnp/system.c | 35 +++------ include/linux/acpi.h | 10 --- 4 files changed, 18 insertions(+), 201 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index c262e4acd68d..3b8963f21b36 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -175,10 +175,14 @@ static void __init acpi_request_region (struct acpi_generic_address *gas, if (!addr || !length) return; - acpi_reserve_region(addr, length, gas->space_id, 0, desc); + /* Resources are never freed */ + if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) + request_region(addr, length, desc); + else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + request_mem_region(addr, length, desc); } -static void __init acpi_reserve_resources(void) +static int __init acpi_reserve_resources(void) { acpi_request_region(&acpi_gbl_FADT.xpm1a_event_block, acpi_gbl_FADT.pm1_event_length, "ACPI PM1a_EVT_BLK"); @@ -207,7 +211,10 @@ static void __init acpi_reserve_resources(void) if (!(acpi_gbl_FADT.gpe1_block_length & 0x1)) acpi_request_region(&acpi_gbl_FADT.xgpe1_block, acpi_gbl_FADT.gpe1_block_length, "ACPI GPE1_BLK"); + + return 0; } +fs_initcall_sync(acpi_reserve_resources); void acpi_os_printf(const char *fmt, ...) { @@ -1862,7 +1869,6 @@ acpi_status __init acpi_os_initialize(void) acpi_status __init acpi_os_initialize1(void) { - acpi_reserve_resources(); kacpid_wq = alloc_workqueue("kacpid", 0, 1); kacpi_notify_wq = alloc_workqueue("kacpi_notify", 0, 1); kacpi_hotplug_wq = alloc_ordered_workqueue("kacpi_hotplug", 0); diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 10561ce16ed1..8244f013f210 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #ifdef CONFIG_X86 @@ -622,164 +621,3 @@ int acpi_dev_filter_resource_type(struct acpi_resource *ares, return (type & types) ? 0 : 1; } EXPORT_SYMBOL_GPL(acpi_dev_filter_resource_type); - -struct reserved_region { - struct list_head node; - u64 start; - u64 end; -}; - -static LIST_HEAD(reserved_io_regions); -static LIST_HEAD(reserved_mem_regions); - -static int request_range(u64 start, u64 end, u8 space_id, unsigned long flags, - char *desc) -{ - unsigned int length = end - start + 1; - struct resource *res; - - res = space_id == ACPI_ADR_SPACE_SYSTEM_IO ? - request_region(start, length, desc) : - request_mem_region(start, length, desc); - if (!res) - return -EIO; - - res->flags &= ~flags; - return 0; -} - -static int add_region_before(u64 start, u64 end, u8 space_id, - unsigned long flags, char *desc, - struct list_head *head) -{ - struct reserved_region *reg; - int error; - - reg = kmalloc(sizeof(*reg), GFP_KERNEL); - if (!reg) - return -ENOMEM; - - error = request_range(start, end, space_id, flags, desc); - if (error) { - kfree(reg); - return error; - } - - reg->start = start; - reg->end = end; - list_add_tail(®->node, head); - return 0; -} - -/** - * acpi_reserve_region - Reserve an I/O or memory region as a system resource. - * @start: Starting address of the region. - * @length: Length of the region. - * @space_id: Identifier of address space to reserve the region from. - * @flags: Resource flags to clear for the region after requesting it. - * @desc: Region description (for messages). - * - * Reserve an I/O or memory region as a system resource to prevent others from - * using it. If the new region overlaps with one of the regions (in the given - * address space) already reserved by this routine, only the non-overlapping - * parts of it will be reserved. - * - * Returned is either 0 (success) or a negative error code indicating a resource - * reservation problem. It is the code of the first encountered error, but the - * routine doesn't abort until it has attempted to request all of the parts of - * the new region that don't overlap with other regions reserved previously. - * - * The resources requested by this routine are never released. - */ -int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, - unsigned long flags, char *desc) -{ - struct list_head *regions; - struct reserved_region *reg; - u64 end = start + length - 1; - int ret = 0, error = 0; - - if (space_id == ACPI_ADR_SPACE_SYSTEM_IO) - regions = &reserved_io_regions; - else if (space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - regions = &reserved_mem_regions; - else - return -EINVAL; - - if (list_empty(regions)) - return add_region_before(start, end, space_id, flags, desc, regions); - - list_for_each_entry(reg, regions, node) - if (reg->start == end + 1) { - /* The new region can be prepended to this one. */ - ret = request_range(start, end, space_id, flags, desc); - if (!ret) - reg->start = start; - - return ret; - } else if (reg->start > end) { - /* No overlap. Add the new region here and get out. */ - return add_region_before(start, end, space_id, flags, - desc, ®->node); - } else if (reg->end == start - 1) { - goto combine; - } else if (reg->end >= start) { - goto overlap; - } - - /* The new region goes after the last existing one. */ - return add_region_before(start, end, space_id, flags, desc, regions); - - overlap: - /* - * The new region overlaps an existing one. - * - * The head part of the new region immediately preceding the existing - * overlapping one can be combined with it right away. - */ - if (reg->start > start) { - error = request_range(start, reg->start - 1, space_id, flags, desc); - if (error) - ret = error; - else - reg->start = start; - } - - combine: - /* - * The new region is adjacent to an existing one. If it extends beyond - * that region all the way to the next one, it is possible to combine - * all three of them. - */ - while (reg->end < end) { - struct reserved_region *next = NULL; - u64 a = reg->end + 1, b = end; - - if (!list_is_last(®->node, regions)) { - next = list_next_entry(reg, node); - if (next->start <= end) - b = next->start - 1; - } - error = request_range(a, b, space_id, flags, desc); - if (!error) { - if (next && next->start == b + 1) { - reg->end = next->end; - list_del(&next->node); - kfree(next); - } else { - reg->end = end; - break; - } - } else if (next) { - if (!ret) - ret = error; - - reg = next; - } else { - break; - } - } - - return ret ? ret : error; -} -EXPORT_SYMBOL_GPL(acpi_reserve_region); diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c index 515f33882ab8..49c1720df59a 100644 --- a/drivers/pnp/system.c +++ b/drivers/pnp/system.c @@ -7,7 +7,6 @@ * Bjorn Helgaas */ -#include #include #include #include @@ -23,41 +22,25 @@ static const struct pnp_device_id pnp_dev_table[] = { {"", 0} }; -#ifdef CONFIG_ACPI -static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) -{ - u8 space_id = io ? ACPI_ADR_SPACE_SYSTEM_IO : ACPI_ADR_SPACE_SYSTEM_MEMORY; - return !acpi_reserve_region(start, length, space_id, IORESOURCE_BUSY, desc); -} -#else -static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) -{ - struct resource *res; - - res = io ? request_region(start, length, desc) : - request_mem_region(start, length, desc); - if (res) { - res->flags &= ~IORESOURCE_BUSY; - return true; - } - return false; -} -#endif - static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) { char *regionid; const char *pnpid = dev_name(&dev->dev); resource_size_t start = r->start, end = r->end; - bool reserved; + struct resource *res; regionid = kmalloc(16, GFP_KERNEL); if (!regionid) return; snprintf(regionid, 16, "pnp %s", pnpid); - reserved = __reserve_range(start, end - start + 1, !!port, regionid); - if (!reserved) + if (port) + res = request_region(start, end - start + 1, regionid); + else + res = request_mem_region(start, end - start + 1, regionid); + if (res) + res->flags &= ~IORESOURCE_BUSY; + else kfree(regionid); /* @@ -66,7 +49,7 @@ static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) * have double reservations. */ dev_info(&dev->dev, "%pR %s reserved\n", r, - reserved ? "has been" : "could not be"); + res ? "has been" : "could not be"); } static void reserve_resources_of_dev(struct pnp_dev *dev) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c471dfc93b71..fedfd1bea3bf 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -309,9 +309,6 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_resources_are_enforced(void); -int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, - unsigned long flags, char *desc); - #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif @@ -507,13 +504,6 @@ static inline int acpi_check_region(resource_size_t start, resource_size_t n, return 0; } -static inline int acpi_reserve_region(u64 start, unsigned int length, - u8 space_id, unsigned long flags, - char *desc) -{ - return -ENXIO; -} - struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) From d3e13ff3c1aa2403d9a5f371baac088daeb8f56d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 7 Jul 2015 00:31:47 +0200 Subject: [PATCH 667/839] ACPI / LPSS: Fix up acpi_lpss_create_device() Fix a return value (which should be a negative error code) and a memory leak (the list allocated by acpi_dev_get_resources() needs to be freed on ioremap() errors too) in acpi_lpss_create_device() introduced by commit 4483d59e29fe 'ACPI / LPSS: check the result of ioremap()'. Fixes: 4483d59e29fe 'ACPI / LPSS: check the result of ioremap()' Reported-by: Dan Carpenter Cc: 4.0+ # 4.0+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_lpss.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 569ee090343f..46b58abb08c5 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -352,13 +352,16 @@ static int acpi_lpss_create_device(struct acpi_device *adev, pdata->mmio_size = resource_size(rentry->res); pdata->mmio_base = ioremap(rentry->res->start, pdata->mmio_size); - if (!pdata->mmio_base) - goto err_out; break; } acpi_dev_free_resource_list(&resource_list); + if (!pdata->mmio_base) { + ret = -ENOMEM; + goto err_out; + } + pdata->dev_desc = dev_desc; if (dev_desc->setup) From 7b2a4635b84b4dbb07c93201a8c0aea82ed65e4f Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 30 Jun 2015 10:58:44 +0800 Subject: [PATCH 668/839] clk: mediatek: mt8173: Fix enabling of critical clocks On the MT8173 the clocks are provided by different units. To enable the critical clocks we must be sure that all parent clocks are already registered, otherwise the parents of the critical clocks end up being unused and get disabled later. To find a place where all parents are registered we try each time after we've registered some clocks if all known providers are present now and only then we enable the critical clocks Signed-off-by: Sascha Hauer Signed-off-by: James Liao [sboyd@codeaurora.org: Marked function and data __init] Signed-off-by: Stephen Boyd --- drivers/clk/mediatek/clk-mt8173.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/clk/mediatek/clk-mt8173.c b/drivers/clk/mediatek/clk-mt8173.c index 4b9e04cdf7e8..8b6523d15fb8 100644 --- a/drivers/clk/mediatek/clk-mt8173.c +++ b/drivers/clk/mediatek/clk-mt8173.c @@ -700,6 +700,22 @@ static const struct mtk_composite peri_clks[] __initconst = { MUX(CLK_PERI_UART3_SEL, "uart3_ck_sel", uart_ck_sel_parents, 0x40c, 3, 1), }; +static struct clk_onecell_data *mt8173_top_clk_data __initdata; +static struct clk_onecell_data *mt8173_pll_clk_data __initdata; + +static void __init mtk_clk_enable_critical(void) +{ + if (!mt8173_top_clk_data || !mt8173_pll_clk_data) + return; + + clk_prepare_enable(mt8173_pll_clk_data->clks[CLK_APMIXED_ARMCA15PLL]); + clk_prepare_enable(mt8173_pll_clk_data->clks[CLK_APMIXED_ARMCA7PLL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_MEM_SEL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_DDRPHYCFG_SEL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_CCI400_SEL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_RTC_SEL]); +} + static void __init mtk_topckgen_init(struct device_node *node) { struct clk_onecell_data *clk_data; @@ -712,19 +728,19 @@ static void __init mtk_topckgen_init(struct device_node *node) return; } - clk_data = mtk_alloc_clk_data(CLK_TOP_NR_CLK); + mt8173_top_clk_data = clk_data = mtk_alloc_clk_data(CLK_TOP_NR_CLK); mtk_clk_register_factors(root_clk_alias, ARRAY_SIZE(root_clk_alias), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, &mt8173_clk_lock, clk_data); - clk_prepare_enable(clk_data->clks[CLK_TOP_CCI400_SEL]); - r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data); if (r) pr_err("%s(): could not register clock provider: %d\n", __func__, r); + + mtk_clk_enable_critical(); } CLK_OF_DECLARE(mtk_topckgen, "mediatek,mt8173-topckgen", mtk_topckgen_init); @@ -818,13 +834,13 @@ static void __init mtk_apmixedsys_init(struct device_node *node) { struct clk_onecell_data *clk_data; - clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR_CLK); + mt8173_pll_clk_data = clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR_CLK); if (!clk_data) return; mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - clk_prepare_enable(clk_data->clks[CLK_APMIXED_ARMCA15PLL]); + mtk_clk_enable_critical(); } CLK_OF_DECLARE(mtk_apmixedsys, "mediatek,mt8173-apmixedsys", mtk_apmixedsys_init); From 93af5e93544328285a6f65f7d47bbea8979b28fb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 26 Jun 2015 11:14:14 +0200 Subject: [PATCH 669/839] PM / Domains: Avoid infinite loops in attach/detach code If pm_genpd_{add,remove}_device() keeps on failing with -EAGAIN, we end up with an infinite loop in genpd_dev_pm_{at,de}tach(). This may happen due to a genpd.prepared_count imbalance. This is a bug elsewhere, but it will result in a system lock up, possibly during reboot of an otherwise functioning system. To avoid this, put a limit on the maximum number of loop iterations, using an exponential back-off mechanism. If the limit is reached, the operation will just fail. An error message is already printed. Signed-off-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index cdd547bd67df..0ee43c1056e0 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -6,6 +6,7 @@ * This file is released under the GPLv2. */ +#include #include #include #include @@ -19,6 +20,8 @@ #include #include +#define GENPD_RETRY_MAX_MS 250 /* Approximate */ + #define GENPD_DEV_CALLBACK(genpd, type, callback, dev) \ ({ \ type (*__routine)(struct device *__d); \ @@ -2131,6 +2134,7 @@ EXPORT_SYMBOL_GPL(of_genpd_get_from_provider); static void genpd_dev_pm_detach(struct device *dev, bool power_off) { struct generic_pm_domain *pd; + unsigned int i; int ret = 0; pd = pm_genpd_lookup_dev(dev); @@ -2139,10 +2143,12 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off) dev_dbg(dev, "removing from PM domain %s\n", pd->name); - while (1) { + for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) { ret = pm_genpd_remove_device(pd, dev); if (ret != -EAGAIN) break; + + mdelay(i); cond_resched(); } @@ -2183,6 +2189,7 @@ int genpd_dev_pm_attach(struct device *dev) { struct of_phandle_args pd_args; struct generic_pm_domain *pd; + unsigned int i; int ret; if (!dev->of_node) @@ -2218,10 +2225,12 @@ int genpd_dev_pm_attach(struct device *dev) dev_dbg(dev, "adding to PM domain %s\n", pd->name); - while (1) { + for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) { ret = pm_genpd_add_device(pd, dev); if (ret != -EAGAIN) break; + + mdelay(i); cond_resched(); } From b51f9b103f58db2c5c0a20d6cee26c8bc255d3ae Mon Sep 17 00:00:00 2001 From: Uwe Geuder Date: Mon, 29 Jun 2015 23:35:05 +0300 Subject: [PATCH 670/839] PM / hibernate: clarify resume documentation it was not the whole truth that kernel mode cannot be used with swap on LVM Signed-off-by: Uwe Geuder Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- Documentation/power/swsusp.txt | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index f732a8321e8a..8cc17ca71813 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -410,8 +410,17 @@ Documentation/usb/persist.txt. Q: Can I suspend-to-disk using a swap partition under LVM? -A: No. You can suspend successfully, but you'll not be able to -resume. uswsusp should be able to work with LVM. See suspend.sf.net. +A: Yes and No. You can suspend successfully, but the kernel will not be able +to resume on its own. You need an initramfs that can recognize the resume +situation, activate the logical volume containing the swap volume (but not +touch any filesystems!), and eventually call + +echo -n "$major:$minor" > /sys/power/resume + +where $major and $minor are the respective major and minor device numbers of +the swap volume. + +uswsusp works with LVM, too. See http://suspend.sourceforge.net/ Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were compiled with the similar configuration files. Anyway I found that From 26095a01d359827eeccec5459c28ddd976183179 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Tue, 7 Jul 2015 01:55:20 +0200 Subject: [PATCH 671/839] ACPI / scan: Add support for ACPI _CLS device matching Device drivers typically use ACPI _HIDs/_CIDs listed in struct device_driver acpi_match_table to match devices. However, for generic drivers, we do not want to list _HID for all supported devices. Also, certain classes of devices do not have _CID (e.g. SATA, USB). Instead, we can leverage ACPI _CLS, which specifies PCI-defined class code (i.e. base-class, subclass and programming interface). This patch adds support for matching ACPI devices using the _CLS method. To support loadable module, current design uses _HID or _CID to match device's modalias. With the new way of matching with _CLS this would requires modification to the current ACPI modalias key to include _CLS. This patch appends PCI-defined class-code to the existing ACPI modalias as following. acpi::::..::: E.g: # cat /sys/devices/platform/AMDI0600:00/modalias acpi:AMDI0600:010601: where bb is th base-class code, ss is te sub-class code, and pp is the programming interface code Since there would not be _HID/_CID in the ACPI matching table of the driver, this patch adds a field to acpi_device_id to specify the matching _CLS. static const struct acpi_device_id ahci_acpi_match[] = { { ACPI_DEVICE_CLASS(PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff) }, {}, }; In this case, the corresponded entry in modules.alias file would be: alias acpi*:010601:* ahci_platform Acked-by: Mika Westerberg Reviewed-by: Hanjun Guo Signed-off-by: Suravee Suthikulpanit Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 32 +++++++++++++++++++++++++++++-- include/linux/acpi.h | 14 ++++++++++++++ include/linux/mod_devicetable.h | 2 ++ scripts/mod/devicetable-offsets.c | 2 ++ scripts/mod/file2alias.c | 32 +++++++++++++++++++++++++++++-- 5 files changed, 78 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 2649a068671d..ec256352f423 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1019,6 +1019,29 @@ static bool acpi_of_match_device(struct acpi_device *adev, return false; } +static bool __acpi_match_device_cls(const struct acpi_device_id *id, + struct acpi_hardware_id *hwid) +{ + int i, msk, byte_shift; + char buf[3]; + + if (!id->cls) + return false; + + /* Apply class-code bitmask, before checking each class-code byte */ + for (i = 1; i <= 3; i++) { + byte_shift = 8 * (3 - i); + msk = (id->cls_msk >> byte_shift) & 0xFF; + if (!msk) + continue; + + sprintf(buf, "%02x", (id->cls >> byte_shift) & msk); + if (strncmp(buf, &hwid->id[(i - 1) * 2], 2)) + return false; + } + return true; +} + static const struct acpi_device_id *__acpi_match_device( struct acpi_device *device, const struct acpi_device_id *ids, @@ -1036,9 +1059,12 @@ static const struct acpi_device_id *__acpi_match_device( list_for_each_entry(hwid, &device->pnp.ids, list) { /* First, check the ACPI/PNP IDs provided by the caller. */ - for (id = ids; id->id[0]; id++) - if (!strcmp((char *) id->id, hwid->id)) + for (id = ids; id->id[0] || id->cls; id++) { + if (id->id[0] && !strcmp((char *) id->id, hwid->id)) return id; + else if (id->cls && __acpi_match_device_cls(id, hwid)) + return id; + } /* * Next, check ACPI_DT_NAMESPACE_HID and try to match the @@ -2101,6 +2127,8 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp, if (info->valid & ACPI_VALID_UID) pnp->unique_id = kstrdup(info->unique_id.string, GFP_KERNEL); + if (info->valid & ACPI_VALID_CLS) + acpi_add_id(pnp, info->class_code.string); kfree(info); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c471dfc93b71..bdfdb9f65c23 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -58,6 +58,19 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) +/** + * ACPI_DEVICE_CLASS - macro used to describe an ACPI device with + * the PCI-defined class-code information + * + * @_cls : the class, subclass, prog-if triple for this device + * @_msk : the class mask for this device + * + * This macro is used to create a struct acpi_device_id that matches a + * specific PCI class. The .id and .driver_data fields will be left + * initialized with the default value. + */ +#define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (_cls), .cls_msk = (_msk), + static inline bool has_acpi_companion(struct device *dev) { return is_acpi_node(dev->fwnode); @@ -446,6 +459,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *); #define ACPI_COMPANION(dev) (NULL) #define ACPI_COMPANION_SET(dev, adev) do { } while (0) #define ACPI_HANDLE(dev) (NULL) +#define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0), struct fwnode_handle; diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 8183d6640ca7..34f25b7bf642 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -189,6 +189,8 @@ struct css_device_id { struct acpi_device_id { __u8 id[ACPI_ID_LEN]; kernel_ulong_t driver_data; + __u32 cls; + __u32 cls_msk; }; #define PNP_ID_LEN 8 diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index eff7de1fc82e..e70fcd12eeeb 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -63,6 +63,8 @@ int main(void) DEVID(acpi_device_id); DEVID_FIELD(acpi_device_id, id); + DEVID_FIELD(acpi_device_id, cls); + DEVID_FIELD(acpi_device_id, cls_msk); DEVID(pnp_device_id); DEVID_FIELD(pnp_device_id, id); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 84c86f3cd6cd..5f2088209132 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -523,12 +523,40 @@ static int do_serio_entry(const char *filename, } ADD_TO_DEVTABLE("serio", serio_device_id, do_serio_entry); -/* looks like: "acpi:ACPI0003 or acpi:PNP0C0B" or "acpi:LNXVIDEO" */ +/* looks like: "acpi:ACPI0003" or "acpi:PNP0C0B" or "acpi:LNXVIDEO" or + * "acpi:bbsspp" (bb=base-class, ss=sub-class, pp=prog-if) + * + * NOTE: Each driver should use one of the following : _HID, _CIDs + * or _CLS. Also, bb, ss, and pp can be substituted with ?? + * as don't care byte. + */ static int do_acpi_entry(const char *filename, void *symval, char *alias) { DEF_FIELD_ADDR(symval, acpi_device_id, id); - sprintf(alias, "acpi*:%s:*", *id); + DEF_FIELD_ADDR(symval, acpi_device_id, cls); + DEF_FIELD_ADDR(symval, acpi_device_id, cls_msk); + + if (id && strlen((const char *)*id)) + sprintf(alias, "acpi*:%s:*", *id); + else if (cls) { + int i, byte_shift, cnt = 0; + unsigned int msk; + + sprintf(&alias[cnt], "acpi*:"); + cnt = 6; + for (i = 1; i <= 3; i++) { + byte_shift = 8 * (3-i); + msk = (*cls_msk >> byte_shift) & 0xFF; + if (msk) + sprintf(&alias[cnt], "%02x", + (*cls >> byte_shift) & 0xFF); + else + sprintf(&alias[cnt], "??"); + cnt += 2; + } + sprintf(&alias[cnt], ":*"); + } return 1; } ADD_TO_DEVTABLE("acpi", acpi_device_id, do_acpi_entry); From 2051e9248688e4c36e4de2e93b88fa0d74e86261 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Tue, 7 Jul 2015 01:55:21 +0200 Subject: [PATCH 672/839] ata: ahci_platform: Add ACPI _CLS matching This patch adds ACPI supports for AHCI platform driver, which uses _CLS method to match the device. The following is an example of ASL structure in DSDT for a SATA controller, which contains _CLS package to be matched by the ahci_platform driver: Device (AHC0) // AHCI Controller { Name(_HID, "AMDI0600") Name (_CCA, 1) Name (_CLS, Package (3) { 0x01, // Base Class: Mass Storage 0x06, // Sub-Class: serial ATA 0x01, // Interface: AHCI }) Name (_CRS, ResourceTemplate () { Memory32Fixed (ReadWrite, 0xE0300000, 0x00010000) Interrupt (ResourceConsumer, Level, ActiveHigh, Exclusive,,,) { 387 } }) } Also, since ATA driver should not require PCI support for ATA_ACPI, this patch removes dependency in the driver/ata/Kconfig. Acked-by: Tejun Heo Acked-by: Mika Westerberg Reviewed-by: Hanjun Guo Signed-off-by: Suravee Suthikulpanit Signed-off-by: Rafael J. Wysocki --- drivers/ata/Kconfig | 2 +- drivers/ata/ahci_platform.c | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 6d17a3b65ef7..15e40ee62a94 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -48,7 +48,7 @@ config ATA_VERBOSE_ERROR config ATA_ACPI bool "ATA ACPI Support" - depends on ACPI && PCI + depends on ACPI default y help This option adds support for ATA-related ACPI objects. diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 614c78f510f0..1befb114c384 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include "ahci.h" #define DRV_NAME "ahci" @@ -79,12 +81,19 @@ static const struct of_device_id ahci_of_match[] = { }; MODULE_DEVICE_TABLE(of, ahci_of_match); +static const struct acpi_device_id ahci_acpi_match[] = { + { ACPI_DEVICE_CLASS(PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff) }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, ahci_acpi_match); + static struct platform_driver ahci_driver = { .probe = ahci_probe, .remove = ata_platform_remove_one, .driver = { .name = DRV_NAME, .of_match_table = ahci_of_match, + .acpi_match_table = ahci_acpi_match, .pm = &ahci_pm_ops, }, }; From b32aadc1a8ed84afbe924cd2ced31cd6a2e67074 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Tue, 7 Jul 2015 01:39:23 +0530 Subject: [PATCH 673/839] powerpc/powernv: Fix race in updating core_idle_state core_idle_state is maintained for each core. It uses 0-7 bits to track whether a thread in the core has entered fastsleep or winkle. 8th bit is used as a lock bit. The lock bit is set in these 2 scenarios- - The thread is first in subcore to wakeup from sleep/winkle. - If its the last thread in the core about to enter sleep/winkle While the lock bit is set, if any other thread in the core wakes up, it loops until the lock bit is cleared before proceeding in the wakeup path. This helps prevent race conditions w.r.t fastsleep workaround and prevents threads from switching to process context before core/subcore resources are restored. But, in the path to sleep/winkle entry, we currently don't check for lock-bit. This exposes us to following race when running with subcore on- First thread in the subcorea Another thread in the same waking up core entering sleep/winkle lwarx r15,0,r14 ori r15,r15,PNV_CORE_IDLE_LOCK_BIT stwcx. r15,0,r14 [Code to restore subcore state] lwarx r15,0,r14 [clear thread bit] stwcx. r15,0,r14 andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS stw r15,0(r14) Here, after the thread entering sleep clears its thread bit in core_idle_state, the value is overwritten by the thread waking up. In such cases when the core enters fastsleep, code mistakes an idle thread as running. Because of this, the first thread waking up from fastsleep which is supposed to resync timebase skips it. So we can end up having a core with stale timebase value. This patch fixes the above race by looping on the lock bit even while entering the idle states. Signed-off-by: Shreyas B. Prabhu Fixes: 7b54e9f213f76 'powernv/powerpc: Add winkle support for offline cpus' Cc: stable@vger.kernel.org # 3.19+ Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_power7.S | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index ccde8f084ce4..112ccf497562 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -51,6 +51,22 @@ .text +/* + * Used by threads when the lock bit of core_idle_state is set. + * Threads will spin in HMT_LOW until the lock bit is cleared. + * r14 - pointer to core_idle_state + * r15 - used to load contents of core_idle_state + */ + +core_idle_lock_held: + HMT_LOW +3: lwz r15,0(r14) + andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT + bne 3b + HMT_MEDIUM + lwarx r15,0,r14 + blr + /* * Pass requested state in r3: * r3 - PNV_THREAD_NAP/SLEEP/WINKLE @@ -150,6 +166,10 @@ power7_enter_nap_mode: ld r14,PACA_CORE_IDLE_STATE_PTR(r13) lwarx_loop1: lwarx r15,0,r14 + + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + andc r15,r15,r7 /* Clear thread bit */ andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS @@ -294,7 +314,7 @@ lwarx_loop2: * workaround undo code or resyncing timebase or restoring context * In either case loop until the lock bit is cleared. */ - bne core_idle_lock_held + bnel core_idle_lock_held cmpwi cr2,r15,0 lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) @@ -319,15 +339,6 @@ lwarx_loop2: isync b common_exit -core_idle_lock_held: - HMT_LOW -core_idle_lock_loop: - lwz r15,0(14) - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bne core_idle_lock_loop - HMT_MEDIUM - b lwarx_loop2 - first_thread_in_subcore: /* First thread in subcore to wakeup */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT From 747d34e7313034768aac83d679db43cedc5ab778 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sat, 27 Jun 2015 23:11:44 +0200 Subject: [PATCH 674/839] clocksource/imx: Define clocksource for mx27 The rework of the imx clocksource driver missed to add an entry for imx27 which results in a boot failure on those machines. Add the proper CLOCKSOURCE_OF_DECLARE() entry for imx27 and map it to the imx21 init. Fixes: bef11c881ba5 'ARM: imx: initialize gpt device type for DT boot' Signed-off-by: Philippe Reynes Cc: linux-arm-kernel@lists.infradead.org Cc: daniel.lezcano@linaro.org Cc: fabio.estevam@freescale.com Cc: shawnguo@kernel.org Cc: kernel@pengutronix.de Link: http://lkml.kernel.org/r/1435439504-406-1-git-send-email-tremyfr@gmail.com Signed-off-by: Thomas Gleixner --- drivers/clocksource/timer-imx-gpt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c index 879c78423546..2d59038dec43 100644 --- a/drivers/clocksource/timer-imx-gpt.c +++ b/drivers/clocksource/timer-imx-gpt.c @@ -529,6 +529,7 @@ static void __init imx6dl_timer_init_dt(struct device_node *np) CLOCKSOURCE_OF_DECLARE(imx1_timer, "fsl,imx1-gpt", imx1_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx21_timer, "fsl,imx21-gpt", imx21_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx27_timer, "fsl,imx27-gpt", imx21_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx31_timer, "fsl,imx31-gpt", imx31_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx25_timer, "fsl,imx25-gpt", imx31_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx50_timer, "fsl,imx50-gpt", imx31_timer_init_dt); From 7c4a976cd55972b68c75a978f171b6db5df4ce66 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 7 Jul 2015 10:14:35 +0200 Subject: [PATCH 675/839] clockevents: Allow set-state callbacks to be optional Its mandatory for the drivers to provide set_state_{oneshot|periodic}() (only if related modes are supported) and set_state_shutdown() callbacks today, if they are implementing the new set-state interface. But this leads to unnecessary noop callbacks for drivers which don't want to implement them. Over that, it will lead to a full function call for nothing really useful. Lets make all set-state callbacks optional. Suggested-by: Daniel Lezcano Signed-off-by: Viresh Kumar Signed-off-by: Daniel Lezcano Link: http://lkml.kernel.org/r/1436256875-15562-1-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/clockevents.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 08ccc3da3ca0..50eb107f1198 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -120,19 +120,25 @@ static int __clockevents_switch_state(struct clock_event_device *dev, /* The clockevent device is getting replaced. Shut it down. */ case CLOCK_EVT_STATE_SHUTDOWN: - return dev->set_state_shutdown(dev); + if (dev->set_state_shutdown) + return dev->set_state_shutdown(dev); + return 0; case CLOCK_EVT_STATE_PERIODIC: /* Core internal bug */ if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC)) return -ENOSYS; - return dev->set_state_periodic(dev); + if (dev->set_state_periodic) + return dev->set_state_periodic(dev); + return 0; case CLOCK_EVT_STATE_ONESHOT: /* Core internal bug */ if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) return -ENOSYS; - return dev->set_state_oneshot(dev); + if (dev->set_state_oneshot) + return dev->set_state_oneshot(dev); + return 0; case CLOCK_EVT_STATE_ONESHOT_STOPPED: /* Core internal bug */ @@ -471,18 +477,6 @@ static int clockevents_sanity_check(struct clock_event_device *dev) if (dev->features & CLOCK_EVT_FEAT_DUMMY) return 0; - /* New state-specific callbacks */ - if (!dev->set_state_shutdown) - return -EINVAL; - - if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && - !dev->set_state_periodic) - return -EINVAL; - - if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) && - !dev->set_state_oneshot) - return -EINVAL; - return 0; } From 3f8dc44d88d3e86178eb9322c779c599f3745b21 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 7 Jul 2015 11:01:17 +1000 Subject: [PATCH 676/839] cxl: Fix refcounting in kernel API Currently the kernel API AFU dev refcounting is done on context start and stop. This patch moves this refcounting to context init and release, bringing it inline with how the userspace API does it. Without this we've seen the refcounting on the AFU get out of whack between the user and kernel API usage. This causes the AFU structures to be freed when they are actually still in use. This fixes some kref warnings we've been seeing and spurious ErrIVTE IRQs. Signed-off-by: Michael Neuling Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/api.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 0c77240ae2fc..729e0851167d 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -23,6 +23,7 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) afu = cxl_pci_to_afu(dev); + get_device(&afu->dev); ctx = cxl_context_alloc(); if (IS_ERR(ctx)) return ctx; @@ -31,6 +32,7 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) rc = cxl_context_init(ctx, afu, false, NULL); if (rc) { kfree(ctx); + put_device(&afu->dev); return ERR_PTR(-ENOMEM); } cxl_assign_psn_space(ctx); @@ -60,6 +62,8 @@ int cxl_release_context(struct cxl_context *ctx) if (ctx->status != CLOSED) return -EBUSY; + put_device(&ctx->afu->dev); + cxl_context_free(ctx); return 0; @@ -159,7 +163,6 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed, } ctx->status = STARTED; - get_device(&ctx->afu->dev); out: mutex_unlock(&ctx->status_mutex); return rc; @@ -175,12 +178,7 @@ EXPORT_SYMBOL_GPL(cxl_process_element); /* Stop a context. Returns 0 on success, otherwise -Errno */ int cxl_stop_context(struct cxl_context *ctx) { - int rc; - - rc = __detach_context(ctx); - if (!rc) - put_device(&ctx->afu->dev); - return rc; + return __detach_context(ctx); } EXPORT_SYMBOL_GPL(cxl_stop_context); From 5a3f75e3f02836518ce49536e9c460ca8e1fa290 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 17:12:32 +0000 Subject: [PATCH 677/839] x86/irq: Plug irq vector hotplug race Jin debugged a nasty cpu hotplug race which results in leaking a irq vector on the newly hotplugged cpu. cpu N cpu M native_cpu_up device_shutdown do_boot_cpu free_msi_irqs start_secondary arch_teardown_msi_irqs smp_callin default_teardown_msi_irqs setup_vector_irq arch_teardown_msi_irq __setup_vector_irq native_teardown_msi_irq lock(vector_lock) destroy_irq install vectors unlock(vector_lock) lock(vector_lock) ---> __clear_irq_vector unlock(vector_lock) lock(vector_lock) set_cpu_online unlock(vector_lock) This leaves the irq vector(s) which are torn down on CPU M stale in the vector array of CPU N, because CPU M does not see CPU N online yet. There is a similar issue with concurrent newly setup interrupts. The alloc/free protection of irq descriptors does not prevent the above race, because it merily prevents interrupt descriptors from going away or changing concurrently. Prevent this by moving the call to setup_vector_irq() into the vector_lock held region which protects set_cpu_online(): cpu N cpu M native_cpu_up device_shutdown do_boot_cpu free_msi_irqs start_secondary arch_teardown_msi_irqs smp_callin default_teardown_msi_irqs lock(vector_lock) arch_teardown_msi_irq setup_vector_irq() __setup_vector_irq native_teardown_msi_irq install vectors destroy_irq set_cpu_online unlock(vector_lock) lock(vector_lock) __clear_irq_vector unlock(vector_lock) So cpu M either sees the cpu N online before clearing the vector or cpu N installs the vectors after cpu M has cleared it. Reported-by: xiao jin Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20150705171102.141898931@linutronix.de --- arch/x86/kernel/apic/vector.c | 10 ++-------- arch/x86/kernel/smpboot.c | 13 +++++-------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 28eba2d38b15..f813261d9740 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -409,12 +409,6 @@ static void __setup_vector_irq(int cpu) int irq, vector; struct apic_chip_data *data; - /* - * vector_lock will make sure that we don't run into irq vector - * assignments that might be happening on another cpu in parallel, - * while we setup our initial vector to irq mappings. - */ - raw_spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_active_irq(irq) { data = apic_chip_data(irq_get_irq_data(irq)); @@ -436,16 +430,16 @@ static void __setup_vector_irq(int cpu) if (!cpumask_test_cpu(cpu, data->domain)) per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; } - raw_spin_unlock(&vector_lock); } /* - * Setup the vector to irq mappings. + * Setup the vector to irq mappings. Must be called with vector_lock held. */ void setup_vector_irq(int cpu) { int irq; + lockdep_assert_held(&vector_lock); /* * On most of the platforms, legacy PIC delivers the interrupts on the * boot cpu. But there are certain platforms where PIC interrupts are diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0bd8c1d507b3..d3010aa79daf 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -170,11 +170,6 @@ static void smp_callin(void) */ apic_ap_setup(); - /* - * Need to setup vector mappings before we enable interrupts. - */ - setup_vector_irq(smp_processor_id()); - /* * Save our processor parameters. Note: this information * is needed for clock calibration. @@ -239,11 +234,13 @@ static void notrace start_secondary(void *unused) check_tsc_sync_target(); /* - * We need to hold vector_lock so there the set of online cpus - * does not change while we are assigning vectors to cpus. Holding - * this lock ensures we don't half assign or remove an irq from a cpu. + * Lock vector_lock and initialize the vectors on this cpu + * before setting the cpu online. We must set it online with + * vector_lock held to prevent a concurrent setup/teardown + * from seeing a half valid vector space. */ lock_vector_lock(); + setup_vector_irq(smp_processor_id()); set_cpu_online(smp_processor_id(), true); unlock_vector_lock(); cpu_set_state_online(smp_processor_id()); From cbb24dc761d95fe39a7a122bb1b298e9604cae15 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 17:12:33 +0000 Subject: [PATCH 678/839] x86/irq: Use proper locking in check_irq_vectors_for_cpu_disable() It's unsafe to examine fields in the irq descriptor w/o holding the descriptor lock. Add proper locking. While at it add a comment why the vector check can run lock less Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: xiao jin Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20150705171102.236544164@linutronix.de --- arch/x86/kernel/irq.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 88b366487b0e..85ca76e6241c 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -347,14 +347,22 @@ int check_irq_vectors_for_cpu_disable(void) if (!desc) continue; + /* + * Protect against concurrent action removal, + * affinity changes etc. + */ + raw_spin_lock(&desc->lock); data = irq_desc_get_irq_data(desc); cpumask_copy(&affinity_new, data->affinity); cpumask_clear_cpu(this_cpu, &affinity_new); /* Do not count inactive or per-cpu irqs. */ - if (!irq_has_action(irq) || irqd_is_per_cpu(data)) + if (!irq_has_action(irq) || irqd_is_per_cpu(data)) { + raw_spin_unlock(&desc->lock); continue; + } + raw_spin_unlock(&desc->lock); /* * A single irq may be mapped to multiple * cpu's vector_irq[] (for example IOAPIC cluster @@ -385,6 +393,9 @@ int check_irq_vectors_for_cpu_disable(void) * vector. If the vector is marked in the used vectors * bitmap or an irq is assigned to it, we don't count * it as available. + * + * As this is an inaccurate snapshot anyway, we can do + * this w/o holding vector_lock. */ for (vector = FIRST_EXTERNAL_VECTOR; vector < first_system_vector; vector++) { From 09cf92b784fae6109450c5d64f9908066d605249 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 17:12:35 +0000 Subject: [PATCH 679/839] x86/irq: Retrieve irq data after locking irq_desc irq_data is protected by irq_desc->lock, so retrieving the irq chip from irq_data outside the lock is racy vs. an concurrent update. Move it into the lock held region. While at it add a comment why the vector walk does not require vector_lock. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: xiao jin Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20150705171102.331320612@linutronix.de --- arch/x86/kernel/irq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 85ca76e6241c..c7dfe1be784e 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -497,6 +497,11 @@ void fixup_irqs(void) */ mdelay(1); + /* + * We can walk the vector array of this cpu without holding + * vector_lock because the cpu is already marked !online, so + * nothing else will touch it. + */ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { unsigned int irr; @@ -508,9 +513,9 @@ void fixup_irqs(void) irq = __this_cpu_read(vector_irq[vector]); desc = irq_to_desc(irq); + raw_spin_lock(&desc->lock); data = irq_desc_get_irq_data(desc); chip = irq_data_get_irq_chip(data); - raw_spin_lock(&desc->lock); if (chip->irq_retrigger) { chip->irq_retrigger(data); __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); From 6d3dab7d84177f836b14961b4d252d0959d66768 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 7 Jul 2015 13:08:39 +0200 Subject: [PATCH 680/839] PM / wakeirq: Avoid setting power.wakeirq too hastily If dev_pm_attach_wake_irq() fails, the device's power.wakeirq field should not be set to point to the struct wake_irq passed to that function, as that object will be freed going forward. For this reason, make dev_pm_attach_wake_irq() first call device_wakeup_attach_irq() and only set the device's power.wakeirq field if that's successful. That requires device_wakeup_attach_irq() to be called under the device's power.lock lock, but since dev_pm_attach_wake_irq() is the only caller of it, the requisite changes are easy to make. Fixes: 4990d4fe327b (PM / Wakeirq: Add automated device wake IRQ handling) Reported-by: Felipe Balbi Tested-by: Tony Lindgren Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeirq.c | 12 +++++------- drivers/base/power/wakeup.c | 31 ++++++++++--------------------- 2 files changed, 15 insertions(+), 28 deletions(-) diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 7470004ca810..eb6e67451dec 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -45,14 +45,12 @@ static int dev_pm_attach_wake_irq(struct device *dev, int irq, return -EEXIST; } - dev->power.wakeirq = wirq; - spin_unlock_irqrestore(&dev->power.lock, flags); - err = device_wakeup_attach_irq(dev, wirq); - if (err) - return err; + if (!err) + dev->power.wakeirq = wirq; - return 0; + spin_unlock_irqrestore(&dev->power.lock, flags); + return err; } /** @@ -105,10 +103,10 @@ void dev_pm_clear_wake_irq(struct device *dev) return; spin_lock_irqsave(&dev->power.lock, flags); + device_wakeup_detach_irq(dev); dev->power.wakeirq = NULL; spin_unlock_irqrestore(&dev->power.lock, flags); - device_wakeup_detach_irq(dev); if (wirq->dedicated_irq) free_irq(wirq->irq, wirq); kfree(wirq); diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 7332ebc9cab0..15d27d782dc1 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -247,32 +247,25 @@ EXPORT_SYMBOL_GPL(device_wakeup_enable); * Attach a device wakeirq to the wakeup source so the device * wake IRQ can be configured automatically for suspend and * resume. + * + * Call under the device's power.lock lock. */ int device_wakeup_attach_irq(struct device *dev, struct wake_irq *wakeirq) { struct wakeup_source *ws; - int ret = 0; - spin_lock_irq(&dev->power.lock); ws = dev->power.wakeup; if (!ws) { dev_err(dev, "forgot to call call device_init_wakeup?\n"); - ret = -EINVAL; - goto unlock; + return -EINVAL; } - if (ws->wakeirq) { - ret = -EEXIST; - goto unlock; - } + if (ws->wakeirq) + return -EEXIST; ws->wakeirq = wakeirq; - -unlock: - spin_unlock_irq(&dev->power.lock); - - return ret; + return 0; } /** @@ -280,20 +273,16 @@ unlock: * @dev: Device to handle * * Removes a device wakeirq from the wakeup source. + * + * Call under the device's power.lock lock. */ void device_wakeup_detach_irq(struct device *dev) { struct wakeup_source *ws; - spin_lock_irq(&dev->power.lock); ws = dev->power.wakeup; - if (!ws) - goto unlock; - - ws->wakeirq = NULL; - -unlock: - spin_unlock_irq(&dev->power.lock); + if (ws) + ws->wakeirq = NULL; } /** From b6cfb277378ef831c0fa84bcff5049307294adc6 Mon Sep 17 00:00:00 2001 From: Al Stone Date: Mon, 6 Jul 2015 17:16:47 -0600 Subject: [PATCH 681/839] ACPI / ARM64: add BAD_MADT_GICC_ENTRY() macro The BAD_MADT_ENTRY() macro is designed to work for all of the subtables of the MADT. In the ACPI 5.1 version of the spec, the struct for the GICC subtable (struct acpi_madt_generic_interrupt) is 76 bytes long; in ACPI 6.0, the struct is 80 bytes long. But, there is only one definition in ACPICA for this struct -- and that is the 6.0 version. Hence, when BAD_MADT_ENTRY() compares the struct size to the length in the GICC subtable, it fails if 5.1 structs are in use, and there are systems in the wild that have them. This patch adds the BAD_MADT_GICC_ENTRY() that checks the GICC subtable only, accounting for the difference in specification versions that are possible. The BAD_MADT_ENTRY() will continue to work as is for all other MADT subtables. This code is being added to an arm64 header file since that is currently the only architecture using the GICC subtable of the MADT. As a GIC is specific to ARM, it is also unlikely the subtable will be used elsewhere. Fixes: aeb823bbacc2 ("ACPICA: ACPI 6.0: Add changes for FADT table.") Signed-off-by: Al Stone Acked-by: Will Deacon Acked-by: "Rafael J. Wysocki" [catalin.marinas@arm.com: extra brackets around macro arguments] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/acpi.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 39248d3adf5d..406485ed110a 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -19,6 +19,14 @@ #include #include +/* Macros for consistency checks of the GICC subtable of MADT */ +#define ACPI_MADT_GICC_LENGTH \ + (acpi_gbl_FADT.header.revision < 6 ? 76 : 80) + +#define BAD_MADT_GICC_ENTRY(entry, end) \ + (!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) || \ + (entry)->header.length != ACPI_MADT_GICC_LENGTH) + /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI /* ACPI table mapping after acpi_gbl_permanent_mmap is set */ From 99e3e3ae332b6ca91d5c444ea7849f367f5e5a76 Mon Sep 17 00:00:00 2001 From: Al Stone Date: Mon, 6 Jul 2015 17:16:48 -0600 Subject: [PATCH 682/839] ACPI / ARM64 : use the new BAD_MADT_GICC_ENTRY macro For those parts of the arm64 ACPI code that need to check GICC subtables in the MADT, use the new BAD_MADT_GICC_ENTRY macro instead of the previous BAD_MADT_ENTRY. The new macro takes into account differences in the size of the GICC subtable that the old macro did not; this caused failures even though the subtable entries are valid. Fixes: aeb823bbacc2 ("ACPICA: ACPI 6.0: Add changes for FADT table.") Signed-off-by: Al Stone Reviewed-by: Hanjun Guo Acked-by: Will Deacon Acked-by: "Rafael J. Wysocki" Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 2 +- drivers/irqchip/irq-gic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 695801a54ca5..50fb4696654e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -438,7 +438,7 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, struct acpi_madt_generic_interrupt *processor; processor = (struct acpi_madt_generic_interrupt *)header; - if (BAD_MADT_ENTRY(processor, end)) + if (BAD_MADT_GICC_ENTRY(processor, end)) return -EINVAL; acpi_table_print_madt_entry(header); diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 8d7e1c8b6d56..4dd88264dff5 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1055,7 +1055,7 @@ gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header, processor = (struct acpi_madt_generic_interrupt *)header; - if (BAD_MADT_ENTRY(processor, end)) + if (BAD_MADT_GICC_ENTRY(processor, end)) return -EINVAL; /* From ef37566cf8d607844cee5a01597d828c8fd1a501 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 7 Jul 2015 17:15:39 +0100 Subject: [PATCH 683/839] arm64: Keep the ARM64 Kconfig selects sorted Move EDAC_SUPPORT to the right place. Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0f6edb14b7e4..318175f62c24 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -23,9 +23,9 @@ config ARM64 select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK - select EDAC_SUPPORT select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS + select EDAC_SUPPORT select GENERIC_ALLOCATOR select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST if SMP From 8eb231261fdd20768db23863d00ef277de4b0543 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 14:11:00 +0200 Subject: [PATCH 684/839] tick/broadcast: Prevent hrtimer recursion The hrtimer based broadcast vehicle can cause a hrtimer recursion which went unnoticed until we changed the hrtimer expiry code to keep track of the currently running timer. local_timer_interrupt() local_handler() hrtimer_interrupt() expire_hrtimers() broadcast_hrtimer() send_ipis() local_handler() hrtimer_interrupt() .... Solution is simple: Prevent the local handler call from the broadcast code when the broadcast 'device' is hrtimer based. [ Split out from a larger combo patch ] Tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index d39f32cdd1b5..a76204089f78 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -265,8 +265,22 @@ static bool tick_do_broadcast(struct cpumask *mask) * Check, if the current cpu is in the mask */ if (cpumask_test_cpu(cpu, mask)) { + struct clock_event_device *bc = tick_broadcast_device.evtdev; + cpumask_clear_cpu(cpu, mask); - local = true; + /* + * We only run the local handler, if the broadcast + * device is not hrtimer based. Otherwise we run into + * a hrtimer recursion. + * + * local timer_interrupt() + * local_handler() + * expire_hrtimers() + * bc_handler() + * local_handler() + * expire_hrtimers() + */ + local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); } if (!cpumask_empty(mask)) { From e0454311903d3fd0f12a86c9e65d7b271c2bb05d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 14:07:27 +0200 Subject: [PATCH 685/839] tick/broadcast: Sanity check the shutdown of the local clock_event The broadcast code shuts down the local clock event unconditionally even if no broadcast device is installed or if the broadcast device is hrtimer based. Add proper sanity checks. [ Split out from a larger combo patch ] Reported-and-tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index a76204089f78..9877d0b0aefc 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -159,7 +159,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { struct clock_event_device *bc = tick_broadcast_device.evtdev; unsigned long flags; - int ret; + int ret = 0; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -221,13 +221,14 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) * If we kept the cpu in the broadcast mask, * tell the caller to leave the per cpu device * in shutdown state. The periodic interrupt - * is delivered by the broadcast device. + * is delivered by the broadcast device, if + * the broadcast device exists and is not + * hrtimer based. */ - ret = cpumask_test_cpu(cpu, tick_broadcast_mask); + if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) + ret = cpumask_test_cpu(cpu, tick_broadcast_mask); break; default: - /* Nothing to do */ - ret = 0; break; } } @@ -373,8 +374,16 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) case TICK_BROADCAST_ON: cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { - if (tick_broadcast_device.mode == - TICKDEV_MODE_PERIODIC) + /* + * Only shutdown the cpu local device, if: + * + * - the broadcast device exists + * - the broadcast device is not a hrtimer based one + * - the broadcast device is in periodic mode to + * avoid a hickup during switch to oneshot mode + */ + if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && + tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } break; From f32dd117051185da6e923b35491a44d7debeeea5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:29:38 +0200 Subject: [PATCH 686/839] tick/broadcast: Make idle check independent from mode and config Currently the broadcast busy check, which prevents the idle code from going into deep idle, works only in one shot mode. If NOHZ and HIGHRES are off (config or command line) there is no sanity check at all, so under certain conditions cpus are allowed to go into deep idle, where the local timer stops, and are not woken up again because there is no broadcast timer installed or a hrtimer based broadcast device is not evaluated. Move tick_broadcast_oneshot_control() into the common code and provide proper subfunctions for the various config combinations. The common check in tick_broadcast_oneshot_control() is for the C3STOP misfeature flag of the local clock event device. If its not set, idle can proceed. If set, further checks are necessary. Provide checks for the trivial cases: - If broadcast is disabled in the config, then return busy - If oneshot mode (NOHZ/HIGHES) is disabled in the config, return busy if the broadcast device is hrtimer based. - If oneshot mode is enabled in the config call the original tick_broadcast_oneshot_control() function. That function needs extra checks which will be implemented in seperate patches. [ Split out from a larger combo patch ] Reported-and-tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- include/linux/tick.h | 4 ---- kernel/time/tick-broadcast.c | 26 +++++++++++--------------- kernel/time/tick-common.c | 21 +++++++++++++++++++++ kernel/time/tick-sched.h | 10 ++++++++++ 4 files changed, 42 insertions(+), 19 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index 3741ba1a652c..6916dcb61857 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -67,11 +67,7 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode); static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ -#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); -#else -static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { return 0; } -#endif static inline void tick_broadcast_enable(void) { diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 9877d0b0aefc..ef77b16ad5df 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -685,18 +685,7 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } -/** - * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode - * @state: The target state (enter/exit) - * - * The system enters/leaves a state, where affected devices might stop - * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. - * - * Called with interrupts disabled, so clockevents_lock is not - * required here because the local clock event device cannot go away - * under us. - */ -int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc, *dev; struct tick_device *td; @@ -717,9 +706,6 @@ int tick_broadcast_oneshot_control(enum tick_broadcast_state state) td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; - if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) - return 0; - raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; cpu = smp_processor_id(); @@ -961,6 +947,16 @@ bool tick_broadcast_oneshot_available(void) return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; } +#else +int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + struct clock_event_device *bc = tick_broadcast_device.evtdev; + + if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER)) + return -EBUSY; + + return 0; +} #endif void __init tick_broadcast_init(void) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 76446cb5dfe1..55e13efff1ab 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -343,6 +343,27 @@ out_bc: tick_install_broadcast_device(newdev); } +/** + * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode + * @state: The target state (enter/exit) + * + * The system enters/leaves a state, where affected devices might stop + * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. + * + * Called with interrupts disabled, so clockevents_lock is not + * required here because the local clock event device cannot go away + * under us. + */ +int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); + + if (!(td->evtdev->features & CLOCK_EVT_FEAT_C3STOP)) + return 0; + + return __tick_broadcast_oneshot_control(state); +} + #ifdef CONFIG_HOTPLUG_CPU /* * Transfer the do_timer job away from a dying cpu. diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 42fdf4958bcc..a4a8d4e9baa1 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -71,4 +71,14 @@ extern void tick_cancel_sched_timer(int cpu); static inline void tick_cancel_sched_timer(int cpu) { } #endif +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +extern int __tick_broadcast_oneshot_control(enum tick_broadcast_state state); +#else +static inline int +__tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + return -EBUSY; +} +#endif + #endif From b78f3f3c898c824bf56ab55cfa59fc72be49c349 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:34:32 +0200 Subject: [PATCH 687/839] tick/broadcast: Prevent deep idle if no broadcast device available Add a check for a installed broadcast device to the oneshot control function and return busy if not. [ Split out from a larger combo patch ] Reported-and-tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index ef77b16ad5df..fad3f789beec 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -692,6 +692,13 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) int cpu, ret = 0; ktime_t now; + /* + * If there is no broadcast device, tell the caller not to go + * into deep idle. + */ + if (!tick_broadcast_device.evtdev) + return -EBUSY; + /* * Periodic mode does not care about the enter/exit of power * states From e3ac79e087ffe8a1f953ed44a74acf7616cb0b25 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:38:11 +0200 Subject: [PATCH 688/839] tick/broadcast: Move the check for periodic mode inside state handling We need to check more than the periodic mode for proper operation in all runtime combinations. To avoid code duplication move the check into the enter state handling. No functional change. [ Split out from a larger combo patch ] Reported-and-tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index fad3f789beec..83aa92e87a85 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -688,7 +688,6 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc, *dev; - struct tick_device *td; int cpu, ret = 0; ktime_t now; @@ -699,25 +698,20 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) if (!tick_broadcast_device.evtdev) return -EBUSY; - /* - * Periodic mode does not care about the enter/exit of power - * states - */ - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) - return 0; - - /* - * We are called with preemtion disabled from the depth of the - * idle code, so we can't be moved away. - */ - td = this_cpu_ptr(&tick_cpu_device); - dev = td->evtdev; + dev = this_cpu_ptr(&tick_cpu_device)->evtdev; raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; cpu = smp_processor_id(); if (state == TICK_BROADCAST_ENTER) { + /* + * If the broadcast device is in periodic mode, we + * return. + */ + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + goto out; + if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); broadcast_shutdown_local(bc, dev); From d33257264b0267a8fd20f6717abbb484c9e21130 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 17:45:22 +0200 Subject: [PATCH 689/839] tick/broadcast: Return busy if periodic mode and hrtimer broadcast If the system is in periodic mode and the broadcast device is hrtimer based, return busy as we have no proper handling for this. [ Split out from a larger combo patch ] Tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 83aa92e87a85..da7b40fdf6d0 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -709,8 +709,12 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) * If the broadcast device is in periodic mode, we * return. */ - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { + /* If it is a hrtimer based broadcast, return busy */ + if (bc->features & CLOCK_EVT_FEAT_HRTIMER) + ret = -EBUSY; goto out; + } if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); From 0cc5281aa592d0020868f6ccaed359b4ad7b2684 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:45:15 +0200 Subject: [PATCH 690/839] tick/broadcast: Return busy when IPI is pending Tell the idle code not to go deep if the broadcast IPI is about to arrive. [ Split out from a larger combo patch ] Tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index da7b40fdf6d0..70b47bc928b9 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -725,11 +725,15 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) * if the cpu local event is earlier than the * broadcast event. If the current CPU is in * the force mask, then we are going to be - * woken by the IPI right away. + * woken by the IPI right away; we return + * busy, so the CPU does not try to go deep + * idle. */ - if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) && - dev->next_event.tv64 < bc->next_event.tv64) + if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { + ret = -EBUSY; + } else if (dev->next_event.tv64 < bc->next_event.tv64) { tick_broadcast_set_event(bc, cpu, dev->next_event); + } } /* * If the current CPU owns the hrtimer broadcast From d5113e13a550bc9c2b53cc9944b8a06453c4a0a1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:43:04 +0200 Subject: [PATCH 691/839] tick/broadcast: Check for hrtimer broadcast active early If the current cpu is the one which has the hrtimer based broadcast queued then we better return busy immediately instead of going through loops and hoops to figure that out. [ Split out from a larger combo patch ] Tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 70b47bc928b9..c8d731ac9563 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -705,6 +705,17 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) cpu = smp_processor_id(); if (state == TICK_BROADCAST_ENTER) { + /* + * If the current CPU owns the hrtimer broadcast + * mechanism, it cannot go deep idle and we do not add + * the CPU to the broadcast mask. We don't have to go + * through the EXIT path as the local timer is not + * shutdown. + */ + ret = broadcast_needs_cpu(bc, cpu); + if (ret) + goto out; + /* * If the broadcast device is in periodic mode, we * return. @@ -718,7 +729,10 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); + + /* Conditionally shut down the local timer. */ broadcast_shutdown_local(bc, dev); + /* * We only reprogram the broadcast timer if we * did not mark ourself in the force mask and @@ -733,18 +747,20 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) ret = -EBUSY; } else if (dev->next_event.tv64 < bc->next_event.tv64) { tick_broadcast_set_event(bc, cpu, dev->next_event); + /* + * In case of hrtimer broadcasts the + * programming might have moved the + * timer to this cpu. If yes, remove + * us from the broadcast mask and + * return busy. + */ + ret = broadcast_needs_cpu(bc, cpu); + if (ret) { + cpumask_clear_cpu(cpu, + tick_broadcast_oneshot_mask); + } } } - /* - * If the current CPU owns the hrtimer broadcast - * mechanism, it cannot go deep idle and we remove the - * CPU from the broadcast mask. We don't have to go - * through the EXIT path as the local timer is not - * shutdown. - */ - ret = broadcast_needs_cpu(bc, cpu); - if (ret) - cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); From c4288334818c81c946acb23d2319881f58c3d497 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 20:53:17 +0000 Subject: [PATCH 692/839] tick/broadcast: Handle spurious interrupts gracefully Andriy reported that on a virtual machine the warning about negative expiry time in the clock events programming code triggered: hpet: hpet0 irq 40 for MSI hpet: hpet1 irq 41 for MSI Switching to clocksource hpet WARNING: at kernel/time/clockevents.c:239 [] clockevents_program_event+0xdb/0xf0 [] tick_handle_periodic_broadcast+0x41/0x50 [] timer_interrupt+0x15/0x20 When the second hpet is installed as a per cpu timer the broadcast event is not longer required and stopped, which sets the next_evt of the broadcast device to KTIME_MAX. If after that a spurious interrupt happens on the broadcast device, then the current code blindly handles it and tries to reprogram the broadcast device afterwards, which adds the period to next_evt. KTIME_MAX + period results in a negative expiry value causing the WARN_ON in the clockevents code to trigger. Add a proper check for the state of the broadcast device into the interrupt handler and return if the interrupt is spurious. [ Folded in pointer fix from Sudeep ] Reported-by: Andriy Gapon Signed-off-by: Thomas Gleixner Cc: Sudeep Holla Cc: Peter Zijlstra Cc: Preeti U Murthy Link: http://lkml.kernel.org/r/20150705205221.802094647@linutronix.de --- kernel/time/tick-broadcast.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index c8d731ac9563..ee3cf942d6eb 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -316,6 +316,13 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) bool bc_local; raw_spin_lock(&tick_broadcast_lock); + + /* Handle spurious interrupts gracefully */ + if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { + raw_spin_unlock(&tick_broadcast_lock); + return; + } + bc_local = tick_do_periodic_broadcast(); if (clockevent_state_oneshot(dev)) { From 539c4b88146cc320aee18b08ebb43ab57d523dcc Mon Sep 17 00:00:00 2001 From: duson Date: Tue, 7 Jul 2015 10:25:39 -0700 Subject: [PATCH 693/839] Input: elan_i2c - change the hover event from MT to ST The firmware only reports hover condition while the very first contact is approaching the surface; the hover is not reported for the subsequent contacts. Therefore we should not be using ABS_MT_DISTANCE to report hover but rather its single-touch counterpart ABS_DISTANCE. Signed-off-by: Duson Lin Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 62641f2adaf7..5b5f403d8ce6 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -771,7 +771,7 @@ static const struct attribute_group *elan_sysfs_groups[] = { */ static void elan_report_contact(struct elan_tp_data *data, int contact_num, bool contact_valid, - bool hover_event, u8 *finger_data) + u8 *finger_data) { struct input_dev *input = data->input; unsigned int pos_x, pos_y; @@ -815,9 +815,7 @@ static void elan_report_contact(struct elan_tp_data *data, input_mt_report_slot_state(input, MT_TOOL_FINGER, true); input_report_abs(input, ABS_MT_POSITION_X, pos_x); input_report_abs(input, ABS_MT_POSITION_Y, data->max_y - pos_y); - input_report_abs(input, ABS_MT_DISTANCE, hover_event); - input_report_abs(input, ABS_MT_PRESSURE, - hover_event ? 0 : scaled_pressure); + input_report_abs(input, ABS_MT_PRESSURE, scaled_pressure); input_report_abs(input, ABS_TOOL_WIDTH, mk_x); input_report_abs(input, ABS_MT_TOUCH_MAJOR, major); input_report_abs(input, ABS_MT_TOUCH_MINOR, minor); @@ -839,14 +837,14 @@ static void elan_report_absolute(struct elan_tp_data *data, u8 *packet) hover_event = hover_info & 0x40; for (i = 0; i < ETP_MAX_FINGERS; i++) { contact_valid = tp_info & (1U << (3 + i)); - elan_report_contact(data, i, contact_valid, hover_event, - finger_data); + elan_report_contact(data, i, contact_valid, finger_data); if (contact_valid) finger_data += ETP_FINGER_DATA_LEN; } input_report_key(input, BTN_LEFT, tp_info & 0x01); + input_report_abs(input, ABS_DISTANCE, hover_event != 0); input_mt_report_pointer_emulation(input, true); input_sync(input); } @@ -922,6 +920,7 @@ static int elan_setup_input_device(struct elan_tp_data *data) input_abs_set_res(input, ABS_Y, data->y_res); input_set_abs_params(input, ABS_PRESSURE, 0, ETP_MAX_PRESSURE, 0, 0); input_set_abs_params(input, ABS_TOOL_WIDTH, 0, ETP_FINGER_WIDTH, 0, 0); + input_set_abs_params(input, ABS_DISTANCE, 0, 1, 0, 0); /* And MT parameters */ input_set_abs_params(input, ABS_MT_POSITION_X, 0, data->max_x, 0, 0); @@ -934,7 +933,6 @@ static int elan_setup_input_device(struct elan_tp_data *data) ETP_FINGER_WIDTH * max_width, 0, 0); input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0, ETP_FINGER_WIDTH * min_width, 0, 0); - input_set_abs_params(input, ABS_MT_DISTANCE, 0, 1, 0, 0); data->input = input; From 71eeedcf51544831ae356a773814401143ed32d4 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 7 Jul 2015 20:49:03 +0200 Subject: [PATCH 694/839] MIPS: Lemote 2F: Fix build caused by recent mass rename. CC arch/mips/loongson64/lemote-2f/clock.o /home/ralf/src/linux/linux-mips/arch/mips/loongson64/lemote-2f/clock.c:18:40: fatal error: asm/mach-loongson/loongson.h: No such file or directory #include ^ compilation terminated. Signed-off-by: Ralf Baechle --- arch/mips/loongson64/lemote-2f/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/loongson64/lemote-2f/clock.c b/arch/mips/loongson64/lemote-2f/clock.c index 462e34d46b4a..4c5b94af3643 100644 --- a/arch/mips/loongson64/lemote-2f/clock.c +++ b/arch/mips/loongson64/lemote-2f/clock.c @@ -15,7 +15,7 @@ #include #include -#include +#include static LIST_HEAD(clock_list); static DEFINE_SPINLOCK(clock_lock); From 0bb383a2d8f51e32ecc156f3f84067420ffe6d20 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 7 Jul 2015 20:56:04 +0200 Subject: [PATCH 695/839] MIPS, CPUFREQ: Fix spelling of Institute. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-loongson64/mmzone.h | 2 +- arch/mips/loongson64/common/bonito-irq.c | 2 +- arch/mips/loongson64/common/cmdline.c | 2 +- arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c | 2 +- arch/mips/loongson64/common/env.c | 2 +- arch/mips/loongson64/common/irq.c | 2 +- arch/mips/loongson64/common/setup.c | 2 +- arch/mips/loongson64/fuloong-2e/irq.c | 2 +- arch/mips/loongson64/lemote-2f/clock.c | 2 +- arch/mips/loongson64/loongson-3/numa.c | 2 +- drivers/cpufreq/loongson2_cpufreq.c | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/mips/include/asm/mach-loongson64/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h index 37c08a27b4f0..c9f7e231e66b 100644 --- a/arch/mips/include/asm/mach-loongson64/mmzone.h +++ b/arch/mips/include/asm/mach-loongson64/mmzone.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2010 Loongson Inc. & Lemote Inc. & - * Insititute of Computing Technology + * Institute of Computing Technology * Author: Xiang Gao, gaoxiang@ict.ac.cn * Huacai Chen, chenhc@lemote.com * Xiaofu Meng, Shuangshuang Zhang diff --git a/arch/mips/loongson64/common/bonito-irq.c b/arch/mips/loongson64/common/bonito-irq.c index cc0e4fd548e6..4e116d23bab3 100644 --- a/arch/mips/loongson64/common/bonito-irq.c +++ b/arch/mips/loongson64/common/bonito-irq.c @@ -3,7 +3,7 @@ * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net * Copyright (C) 2000, 2001 Ralf Baechle (ralf@gnu.org) * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/common/cmdline.c b/arch/mips/loongson64/common/cmdline.c index 72fed003a536..01fbed137028 100644 --- a/arch/mips/loongson64/common/cmdline.c +++ b/arch/mips/loongson64/common/cmdline.c @@ -6,7 +6,7 @@ * Copyright 2003 ICT CAS * Author: Michael Guo * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * Copyright (C) 2009 Lemote Inc. diff --git a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c index 12c75db23420..875037063a80 100644 --- a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c +++ b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c @@ -1,7 +1,7 @@ /* * CS5536 General timer functions * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Yanhua, yanh@lemote.com * * Copyright (C) 2009 Lemote Inc. diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c index 22f04ca2ff3e..f6c44dd332e2 100644 --- a/arch/mips/loongson64/common/env.c +++ b/arch/mips/loongson64/common/env.c @@ -6,7 +6,7 @@ * Copyright 2003 ICT CAS * Author: Michael Guo * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * Copyright (C) 2009 Lemote Inc. diff --git a/arch/mips/loongson64/common/irq.c b/arch/mips/loongson64/common/irq.c index 687003b19b45..d36d969a4a87 100644 --- a/arch/mips/loongson64/common/irq.c +++ b/arch/mips/loongson64/common/irq.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/common/setup.c b/arch/mips/loongson64/common/setup.c index d477dd6bb326..2dc5122f0e09 100644 --- a/arch/mips/loongson64/common/setup.c +++ b/arch/mips/loongson64/common/setup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/fuloong-2e/irq.c b/arch/mips/loongson64/fuloong-2e/irq.c index ef5ec8f3de5f..892963f860b7 100644 --- a/arch/mips/loongson64/fuloong-2e/irq.c +++ b/arch/mips/loongson64/fuloong-2e/irq.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/lemote-2f/clock.c b/arch/mips/loongson64/lemote-2f/clock.c index 4c5b94af3643..a78fb657068c 100644 --- a/arch/mips/loongson64/lemote-2f/clock.c +++ b/arch/mips/loongson64/lemote-2f/clock.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006 - 2008 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2006 - 2008 Lemote Inc. & Institute of Computing Technology * Author: Yanhua, yanh@lemote.com * * This file is subject to the terms and conditions of the GNU General Public diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index 12d14ed48778..6f9e010cec4d 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2010 Loongson Inc. & Lemote Inc. & - * Insititute of Computing Technology + * Institute of Computing Technology * Author: Xiang Gao, gaoxiang@ict.ac.cn * Huacai Chen, chenhc@lemote.com * Xiaofu Meng, Shuangshuang Zhang diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c index fc897babab55..e362860c2b50 100644 --- a/drivers/cpufreq/loongson2_cpufreq.c +++ b/drivers/cpufreq/loongson2_cpufreq.c @@ -3,7 +3,7 @@ * * The 2E revision of loongson processor not support this feature. * - * Copyright (C) 2006 - 2008 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2006 - 2008 Lemote Inc. & Institute of Computing Technology * Author: Yanhua, yanh@lemote.com * * This file is subject to the terms and conditions of the GNU General Public From 37b64a42067a04a22468c4e52c12af00d72e462b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 21:56:34 +0200 Subject: [PATCH 696/839] tick/broadcast: Unbreak CONFIG_GENERIC_CLOCKEVENTS=n build Making tick_broadcast_oneshot_control() independent from CONFIG_GENERIC_CLOCKEVENTS_BROADCAST broke the build for CONFIG_GENERIC_CLOCKEVENTS=n because the function is not defined there. Provide a proper stub inline. Fixes: f32dd1170511 'tick/broadcast: Make idle check independent from mode and config' Reported-by: kbuild test robot Signed-off-by: Thomas Gleixner --- include/linux/tick.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/tick.h b/include/linux/tick.h index 6916dcb61857..edbfc9a5293e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -67,7 +67,14 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode); static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ +#ifdef CONFIG_GENERIC_CLOCKEVENTS extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); +#else +static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + return 0; +} +#endif static inline void tick_broadcast_enable(void) { From 4f273959b850569253299987eee611927f048de7 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 8 Jul 2015 00:22:03 +0300 Subject: [PATCH 697/839] mei: nfc: fix deadlock on shutdown/suspend path In function mei_nfc_host_exit mei_cl_remove_device cannot be called under the device mutex as device removing flow invokes the device driver remove handler that calls in turn to mei_cl_disable_device which naturally acquires the device mutex. Also remove mei_cl_bus_remove_devices which has the same issue, but is never executed as currently the only device on the mei client bus is NFC and a new device cannot be easily added till the bus revamp is completed. This fixes regression caused by commit be9b720a0ccb ("mei_phy: move all nfc logic from mei driver to nfc") Prior to this change the nfc driver remove handler called to no-op disable function while actual nfc device was disabled directly from the mei driver. Reported-by: Linus Torvalds Acked-by: Greg Kroah-Hartman Cc: Samuel Ortiz Signed-off-by: Tomas Winkler Signed-off-by: Linus Torvalds --- drivers/misc/mei/bus.c | 16 ---------------- drivers/misc/mei/init.c | 2 -- drivers/misc/mei/nfc.c | 3 ++- 3 files changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 357b6ae4d207..458aa5a09c52 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -552,22 +552,6 @@ void mei_cl_bus_rx_event(struct mei_cl *cl) schedule_work(&device->event_work); } -void mei_cl_bus_remove_devices(struct mei_device *dev) -{ - struct mei_cl *cl, *next; - - mutex_lock(&dev->device_lock); - list_for_each_entry_safe(cl, next, &dev->device_list, device_link) { - if (cl->device) - mei_cl_remove_device(cl->device); - - list_del(&cl->device_link); - mei_cl_unlink(cl); - kfree(cl); - } - mutex_unlock(&dev->device_lock); -} - int __init mei_cl_bus_init(void) { return bus_register(&mei_cl_bus_type); diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index 94514b2c7a50..00c3865ca3b1 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -333,8 +333,6 @@ void mei_stop(struct mei_device *dev) mei_nfc_host_exit(dev); - mei_cl_bus_remove_devices(dev); - mutex_lock(&dev->device_lock); mei_wd_stop(dev); diff --git a/drivers/misc/mei/nfc.c b/drivers/misc/mei/nfc.c index b983c4ecad38..290ef3037437 100644 --- a/drivers/misc/mei/nfc.c +++ b/drivers/misc/mei/nfc.c @@ -402,11 +402,12 @@ void mei_nfc_host_exit(struct mei_device *dev) cldev->priv_data = NULL; - mutex_lock(&dev->device_lock); /* Need to remove the device here * since mei_nfc_free will unlink the clients */ mei_cl_remove_device(cldev); + + mutex_lock(&dev->device_lock); mei_nfc_free(ndev); mutex_unlock(&dev->device_lock); } From 56551da9255f20ffd3a9711728a1a3ad4b7100af Mon Sep 17 00:00:00 2001 From: Pankaj Dev Date: Tue, 7 Jul 2015 09:40:50 +0200 Subject: [PATCH 698/839] drivers: clk: st: Incorrect register offset used for lock_status Incorrect register offset used for sthi407 clockgenC Signed-off-by: Pankaj Dev Signed-off-by: Gabriel Fernandez Fixes: 51306d56ba81 ("clk: st: STiH407: Support for clockgenC0") Signed-off-by: Stephen Boyd --- drivers/clk/st/clkgen-fsyn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index 99c98c15d4a4..d9eb2e1d8471 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -340,7 +340,7 @@ static const struct clkgen_quadfs_data st_fs660c32_C_407 = { CLKGEN_FIELD(0x30c, 0xf, 20), CLKGEN_FIELD(0x310, 0xf, 20) }, .lockstatus_present = true, - .lock_status = CLKGEN_FIELD(0x2A0, 0x1, 24), + .lock_status = CLKGEN_FIELD(0x2f0, 0x1, 24), .powerup_polarity = 1, .standby_polarity = 1, .pll_ops = &st_quadfs_pll_c32_ops, From 7928eb0370d1133d0d8cd2f5ddfca19c309079d5 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 8 Jul 2015 04:49:10 +0200 Subject: [PATCH 699/839] MIPS: O32: Do not handle require 32 bytes from the stack to be readable. Commit 46e12c07b3b9603c60fc1d421ff18618241cb081 (MIPS: O32 / 32-bit: Always copy 4 stack arguments.) change the O32 syscall handler to always load four arguments from the userspace stack even for syscalls that require fewer or no arguments to be copied. This removes a large table from kernel space and need to maintain it. It appeared that it was ok the implementation chosen requires 16 bytes of readable stack space above the user stack pointer. Turned out a few threading implementations munmap the user stack before the thread exits resulting in errors due to the unreadable stack. We now treat any failed load as a if the loaded value was zero and let the actual syscall deal with the situation. Signed-off-by: Ralf Baechle --- arch/mips/kernel/scall32-o32.S | 37 +++++++++++++++++++++++++--------- arch/mips/kernel/scall64-o32.S | 33 ++++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 6e8de80bb446..4cc13508d967 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -73,10 +73,11 @@ NESTED(handle_sys, PT_SIZE, sp) .set noreorder .set nomacro -1: user_lw(t5, 16(t0)) # argument #5 from usp -4: user_lw(t6, 20(t0)) # argument #6 from usp -3: user_lw(t7, 24(t0)) # argument #7 from usp -2: user_lw(t8, 28(t0)) # argument #8 from usp +load_a4: user_lw(t5, 16(t0)) # argument #5 from usp +load_a5: user_lw(t6, 20(t0)) # argument #6 from usp +load_a6: user_lw(t7, 24(t0)) # argument #7 from usp +load_a7: user_lw(t8, 28(t0)) # argument #8 from usp +loads_done: sw t5, 16(sp) # argument #5 to ksp sw t6, 20(sp) # argument #6 to ksp @@ -85,10 +86,10 @@ NESTED(handle_sys, PT_SIZE, sp) .set pop .section __ex_table,"a" - PTR 1b,bad_stack - PTR 2b,bad_stack - PTR 3b,bad_stack - PTR 4b,bad_stack + PTR load_a4, bad_stack_a4 + PTR load_a5, bad_stack_a5 + PTR load_a6, bad_stack_a6 + PTR load_a7, bad_stack_a7 .previous lw t0, TI_FLAGS($28) # syscall tracing enabled? @@ -153,8 +154,8 @@ syscall_trace_entry: /* ------------------------------------------------------------------------ */ /* - * The stackpointer for a call with more than 4 arguments is bad. - * We probably should handle this case a bit more drastic. + * Our open-coded access area sanity test for the stack pointer + * failed. We probably should handle this case a bit more drastic. */ bad_stack: li v0, EFAULT @@ -163,6 +164,22 @@ bad_stack: sw t0, PT_R7(sp) j o32_syscall_exit +bad_stack_a4: + li t5, 0 + b load_a5 + +bad_stack_a5: + li t6, 0 + b load_a6 + +bad_stack_a6: + li t7, 0 + b load_a7 + +bad_stack_a7: + li t8, 0 + b loads_done + /* * The system call does not exist in this kernel */ diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index d07b210fbeff..66d618bb2fa2 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -69,16 +69,17 @@ NESTED(handle_sys, PT_SIZE, sp) daddu t1, t0, 32 bltz t1, bad_stack -1: lw a4, 16(t0) # argument #5 from usp -2: lw a5, 20(t0) # argument #6 from usp -3: lw a6, 24(t0) # argument #7 from usp -4: lw a7, 28(t0) # argument #8 from usp (for indirect syscalls) +load_a4: lw a4, 16(t0) # argument #5 from usp +load_a5: lw a5, 20(t0) # argument #6 from usp +load_a6: lw a6, 24(t0) # argument #7 from usp +load_a7: lw a7, 28(t0) # argument #8 from usp +loads_done: .section __ex_table,"a" - PTR 1b, bad_stack - PTR 2b, bad_stack - PTR 3b, bad_stack - PTR 4b, bad_stack + PTR load_a4, bad_stack_a4 + PTR load_a5, bad_stack_a5 + PTR load_a6, bad_stack_a6 + PTR load_a7, bad_stack_a7 .previous li t1, _TIF_WORK_SYSCALL_ENTRY @@ -167,6 +168,22 @@ bad_stack: sd t0, PT_R7(sp) j o32_syscall_exit +bad_stack_a4: + li a4, 0 + b load_a5 + +bad_stack_a5: + li a5, 0 + b load_a6 + +bad_stack_a6: + li a6, 0 + b load_a7 + +bad_stack_a7: + li a7, 0 + b loads_done + not_o32_scall: /* * This is not an o32 compatibility syscall, pass it on From 5caaf5346892d1e7f0b8b7223062644f8538483f Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Tue, 7 Jul 2015 15:45:46 +1000 Subject: [PATCH 700/839] cxl: Fail mmap if requested mapping is larger than assigned problem state area This patch makes the mmap call fail outright if the requested region is larger than the problem state area assigned to the context so the error is reported immediately rather than waiting for an attempt to access an address out of bounds. Although we never expect users to map more than the assigned problem state area and are not aware of anyone doing this (other than for testing), this does have the potential to break users if someone has used a larger range regardless. I'm submitting it for consideration, but if this change is not considered acceptable the previous patch is sufficient to prevent access out of bounds without breaking anyone. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/context.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 2a4c80ac322a..6236d4a1f7ad 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -145,8 +145,16 @@ static const struct vm_operations_struct cxl_mmap_vmops = { */ int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma) { + u64 start = vma->vm_pgoff << PAGE_SHIFT; u64 len = vma->vm_end - vma->vm_start; - len = min(len, ctx->psn_size); + + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { + if (start + len > ctx->afu->adapter->ps_size) + return -EINVAL; + } else { + if (start + len > ctx->psn_size) + return -EINVAL; + } if (ctx->afu->current_mode != CXL_MODE_DEDICATED) { /* make sure there is a valid per process space for this AFU */ From 10a5894f2dedd8a26b3132497445b314c0d952c4 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Tue, 7 Jul 2015 15:45:45 +1000 Subject: [PATCH 701/839] cxl: Fix off by one error allowing subsequent mmap page to be accessed It was discovered that if a process mmaped their problem state area they were able to access one page more than expected, potentially allowing them to access the problem state area of an unrelated process. This was due to a simple off by one error in the mmap fault handler introduced in 0712dc7e73e59d79bcead5d5520acf4e9e917e87 ("cxl: Fix issues when unmapping contexts"), which is fixed in this patch. Cc: stable@vger.kernel.org Fixes: 0712dc7e73e5 ("cxl: Fix issues when unmapping contexts") Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 6236d4a1f7ad..1287148629c0 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -113,11 +113,11 @@ static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { area = ctx->afu->psn_phys; - if (offset > ctx->afu->adapter->ps_size) + if (offset >= ctx->afu->adapter->ps_size) return VM_FAULT_SIGBUS; } else { area = ctx->psn_phys; - if (offset > ctx->psn_size) + if (offset >= ctx->psn_size) return VM_FAULT_SIGBUS; } From 442053e57a4fc58b719b6ceab60f29ef9cf4404c Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 7 Jul 2015 12:21:10 -0400 Subject: [PATCH 702/839] powerpc/perf/24x7: Fix lockdep warning The sysfs attributes for the 24x7 counters are dynamically allocated. Initialize the attributes using sysfs_attr_init() to fix following warning which occurs when CONFIG_DEBUG_LOCK_VMALLOC=y. [ 0.346249] audit: initializing netlink subsys (disabled) [ 0.346284] audit: type=2000 audit(1436295254.340:1): initialized [ 0.346489] BUG: key c0000000efe90198 not in .data! [ 0.346491] DEBUG_LOCKS_WARN_ON(1) [ 0.346502] ------------[ cut here ]------------ [ 0.346504] WARNING: at ../kernel/locking/lockdep.c:3002 [ 0.346506] Modules linked in: Reported-by: Gustavo Luiz Duarte Signed-off-by: Sukadev Bhattiprolu Tested-by: Gustavo Luiz Duarte Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-24x7.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index ec2eb20631d1..df956295c2a7 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -320,6 +320,8 @@ static struct attribute *device_str_attr_create_(char *name, char *str) if (!attr) return NULL; + sysfs_attr_init(&attr->attr.attr); + attr->var = str; attr->attr.attr.name = name; attr->attr.attr.mode = 0444; From 9958084a5275ca2e8f55c5b18729307f2f0cb53b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 7 Jul 2015 19:16:23 +1000 Subject: [PATCH 703/839] powerpc: Update MAINTAINERS to point at shared tree Now that we have a shared powerpc tree on kernel.org, point folks at that as the primary place to look for powerpc stuff. Signed-off-by: Michael Ellerman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8133cefb6b6e..b29f8ebc10b0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6161,7 +6161,7 @@ M: Michael Ellerman W: http://www.penguinppc.org/ L: linuxppc-dev@lists.ozlabs.org Q: http://patchwork.ozlabs.org/project/linuxppc-dev/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git S: Supported F: Documentation/powerpc/ F: arch/powerpc/ From 030f4e968741d65aea9cd5f7814d1164967801ef Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 7 Jul 2015 17:30:25 +0800 Subject: [PATCH 704/839] crypto: nx - Fix reentrancy bugs This patch fixes a host of reentrancy bugs in the nx driver. The following algorithms are affected: * CCM * GCM * CTR * XCBC * SHA256 * SHA512 The crypto API allows a single transform to be used by multiple threads simultaneously. For example, IPsec will use a single tfm to process packets for a given SA. As packets may arrive on multiple CPUs that tfm must be reentrant. The nx driver does try to deal with this by using a spin lock. Unfortunately only the basic AES/CBC/ECB algorithms do this in the correct way. The symptom of these bugs may range from the generation of incorrect output to memory corruption. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu --- drivers/crypto/nx/nx-aes-ccm.c | 6 ++- drivers/crypto/nx/nx-aes-ctr.c | 7 ++-- drivers/crypto/nx/nx-aes-gcm.c | 17 ++++---- drivers/crypto/nx/nx-aes-xcbc.c | 70 +++++++++++++++++++++------------ drivers/crypto/nx/nx-sha256.c | 43 +++++++++++--------- drivers/crypto/nx/nx-sha512.c | 44 ++++++++++++--------- drivers/crypto/nx/nx.c | 3 ++ drivers/crypto/nx/nx.h | 14 +++++-- 8 files changed, 125 insertions(+), 79 deletions(-) diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c index 67f80813a06f..e4311ce0cd78 100644 --- a/drivers/crypto/nx/nx-aes-ccm.c +++ b/drivers/crypto/nx/nx-aes-ccm.c @@ -494,8 +494,9 @@ out: static int ccm4309_aes_nx_encrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct blkcipher_desc desc; - u8 *iv = nx_ctx->priv.ccm.iv; + u8 *iv = rctx->iv; iv[0] = 3; memcpy(iv + 1, nx_ctx->priv.ccm.nonce, 3); @@ -525,8 +526,9 @@ static int ccm_aes_nx_encrypt(struct aead_request *req) static int ccm4309_aes_nx_decrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct blkcipher_desc desc; - u8 *iv = nx_ctx->priv.ccm.iv; + u8 *iv = rctx->iv; iv[0] = 3; memcpy(iv + 1, nx_ctx->priv.ccm.nonce, 3); diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c index 2617cd4d54dd..dd7e9f3f5b6b 100644 --- a/drivers/crypto/nx/nx-aes-ctr.c +++ b/drivers/crypto/nx/nx-aes-ctr.c @@ -72,7 +72,7 @@ static int ctr3686_aes_nx_set_key(struct crypto_tfm *tfm, if (key_len < CTR_RFC3686_NONCE_SIZE) return -EINVAL; - memcpy(nx_ctx->priv.ctr.iv, + memcpy(nx_ctx->priv.ctr.nonce, in_key + key_len - CTR_RFC3686_NONCE_SIZE, CTR_RFC3686_NONCE_SIZE); @@ -131,14 +131,15 @@ static int ctr3686_aes_nx_crypt(struct blkcipher_desc *desc, unsigned int nbytes) { struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *iv = nx_ctx->priv.ctr.iv; + u8 iv[16]; + memcpy(iv, nx_ctx->priv.ctr.nonce, CTR_RFC3686_IV_SIZE); memcpy(iv + CTR_RFC3686_NONCE_SIZE, desc->info, CTR_RFC3686_IV_SIZE); iv[12] = iv[13] = iv[14] = 0; iv[15] = 1; - desc->info = nx_ctx->priv.ctr.iv; + desc->info = iv; return ctr_aes_nx_crypt(desc, dst, src, nbytes); } diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c index 08ac6d48688c..92c993f08213 100644 --- a/drivers/crypto/nx/nx-aes-gcm.c +++ b/drivers/crypto/nx/nx-aes-gcm.c @@ -317,6 +317,7 @@ out: static int gcm_aes_nx_crypt(struct aead_request *req, int enc) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct blkcipher_desc desc; unsigned int nbytes = req->cryptlen; @@ -326,7 +327,7 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc) spin_lock_irqsave(&nx_ctx->lock, irq_flags); - desc.info = nx_ctx->priv.gcm.iv; + desc.info = rctx->iv; /* initialize the counter */ *(u32 *)(desc.info + NX_GCM_CTR_OFFSET) = 1; @@ -424,8 +425,8 @@ out: static int gcm_aes_nx_encrypt(struct aead_request *req) { - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; memcpy(iv, req->iv, 12); @@ -434,8 +435,8 @@ static int gcm_aes_nx_encrypt(struct aead_request *req) static int gcm_aes_nx_decrypt(struct aead_request *req) { - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; memcpy(iv, req->iv, 12); @@ -445,7 +446,8 @@ static int gcm_aes_nx_decrypt(struct aead_request *req) static int gcm4106_aes_nx_encrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; char *nonce = nx_ctx->priv.gcm.nonce; memcpy(iv, nonce, NX_GCM4106_NONCE_LEN); @@ -457,7 +459,8 @@ static int gcm4106_aes_nx_encrypt(struct aead_request *req) static int gcm4106_aes_nx_decrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; char *nonce = nx_ctx->priv.gcm.nonce; memcpy(iv, nonce, NX_GCM4106_NONCE_LEN); diff --git a/drivers/crypto/nx/nx-aes-xcbc.c b/drivers/crypto/nx/nx-aes-xcbc.c index 8c2faffab4a3..c2f7d4befb55 100644 --- a/drivers/crypto/nx/nx-aes-xcbc.c +++ b/drivers/crypto/nx/nx-aes-xcbc.c @@ -42,6 +42,7 @@ static int nx_xcbc_set_key(struct crypto_shash *desc, unsigned int key_len) { struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(desc); + struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; switch (key_len) { case AES_KEYSIZE_128: @@ -51,7 +52,7 @@ static int nx_xcbc_set_key(struct crypto_shash *desc, return -EINVAL; } - memcpy(nx_ctx->priv.xcbc.key, in_key, key_len); + memcpy(csbcpb->cpb.aes_xcbc.key, in_key, key_len); return 0; } @@ -148,32 +149,29 @@ out: return rc; } -static int nx_xcbc_init(struct shash_desc *desc) +static int nx_crypto_ctx_aes_xcbc_init2(struct crypto_tfm *tfm) { - struct xcbc_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; - struct nx_sg *out_sg; - int len; + int err; + + err = nx_crypto_ctx_aes_xcbc_init(tfm); + if (err) + return err; nx_ctx_init(nx_ctx, HCOP_FC_AES); - memset(sctx, 0, sizeof *sctx); - NX_CPB_SET_KEY_SIZE(csbcpb, NX_KS_AES_128); csbcpb->cpb.hdr.mode = NX_MODE_AES_XCBC_MAC; - memcpy(csbcpb->cpb.aes_xcbc.key, nx_ctx->priv.xcbc.key, AES_BLOCK_SIZE); - memset(nx_ctx->priv.xcbc.key, 0, sizeof *nx_ctx->priv.xcbc.key); + return 0; +} - len = AES_BLOCK_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, nx_ctx->ap->sglen); +static int nx_xcbc_init(struct shash_desc *desc) +{ + struct xcbc_state *sctx = shash_desc_ctx(desc); - if (len != AES_BLOCK_SIZE) - return -EINVAL; - - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + memset(sctx, 0, sizeof *sctx); return 0; } @@ -186,6 +184,7 @@ static int nx_xcbc_update(struct shash_desc *desc, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u32 to_process = 0, leftover, total; unsigned int max_sg_len; unsigned long irq_flags; @@ -213,6 +212,17 @@ static int nx_xcbc_update(struct shash_desc *desc, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = AES_BLOCK_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &len, nx_ctx->ap->sglen); + + if (data_len != AES_BLOCK_SIZE) { + rc = -EINVAL; + goto out; + } + + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + do { to_process = total - to_process; to_process = to_process & ~(AES_BLOCK_SIZE - 1); @@ -235,8 +245,10 @@ static int nx_xcbc_update(struct shash_desc *desc, (u8 *) sctx->buffer, &data_len, max_sg_len); - if (data_len != sctx->count) - return -EINVAL; + if (data_len != sctx->count) { + rc = -EINVAL; + goto out; + } } data_len = to_process - sctx->count; @@ -245,8 +257,10 @@ static int nx_xcbc_update(struct shash_desc *desc, &data_len, max_sg_len); - if (data_len != to_process - sctx->count) - return -EINVAL; + if (data_len != to_process - sctx->count) { + rc = -EINVAL; + goto out; + } nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); @@ -325,15 +339,19 @@ static int nx_xcbc_final(struct shash_desc *desc, u8 *out) in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)sctx->buffer, &len, nx_ctx->ap->sglen); - if (len != sctx->count) - return -EINVAL; + if (len != sctx->count) { + rc = -EINVAL; + goto out; + } len = AES_BLOCK_SIZE; out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, nx_ctx->ap->sglen); - if (len != AES_BLOCK_SIZE) - return -EINVAL; + if (len != AES_BLOCK_SIZE) { + rc = -EINVAL; + goto out; + } nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); @@ -372,7 +390,7 @@ struct shash_alg nx_shash_aes_xcbc_alg = { .cra_blocksize = AES_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_aes_xcbc_init, + .cra_init = nx_crypto_ctx_aes_xcbc_init2, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index 4e91bdb83c59..08f8d5cd6334 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -29,34 +29,28 @@ #include "nx.h" -static int nx_sha256_init(struct shash_desc *desc) +static int nx_crypto_ctx_sha256_init(struct crypto_tfm *tfm) { - struct sha256_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); - struct nx_sg *out_sg; - int len; - u32 max_sg_len; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); + int err; + + err = nx_crypto_ctx_sha_init(tfm); + if (err) + return err; nx_ctx_init(nx_ctx, HCOP_FC_SHA); - memset(sctx, 0, sizeof *sctx); - nx_ctx->ap = &nx_ctx->props[NX_PROPS_SHA256]; NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA256); - max_sg_len = min_t(u64, nx_ctx->ap->sglen, - nx_driver.of.max_sg_len/sizeof(struct nx_sg)); - max_sg_len = min_t(u64, max_sg_len, - nx_ctx->ap->databytelen/NX_PAGE_SIZE); + return 0; +} - len = SHA256_DIGEST_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, max_sg_len); - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); +static int nx_sha256_init(struct shash_desc *desc) { + struct sha256_state *sctx = shash_desc_ctx(desc); - if (len != SHA256_DIGEST_SIZE) - return -EINVAL; + memset(sctx, 0, sizeof *sctx); sctx->state[0] = __cpu_to_be32(SHA256_H0); sctx->state[1] = __cpu_to_be32(SHA256_H1); @@ -78,6 +72,7 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u64 to_process = 0, leftover, total; unsigned long irq_flags; int rc = 0; @@ -108,6 +103,16 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = SHA256_DIGEST_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &data_len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + + if (data_len != SHA256_DIGEST_SIZE) { + rc = -EINVAL; + goto out; + } + do { /* * to_process: the SHA256_BLOCK_SIZE data chunk to process in @@ -282,7 +287,7 @@ struct shash_alg nx_shash_sha256_alg = { .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_sha_init, + .cra_init = nx_crypto_ctx_sha256_init, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index e6a58d2ee628..aff0fe58eac0 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -28,34 +28,29 @@ #include "nx.h" -static int nx_sha512_init(struct shash_desc *desc) +static int nx_crypto_ctx_sha512_init(struct crypto_tfm *tfm) { - struct sha512_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); - struct nx_sg *out_sg; - int len; - u32 max_sg_len; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); + int err; + + err = nx_crypto_ctx_sha_init(tfm); + if (err) + return err; nx_ctx_init(nx_ctx, HCOP_FC_SHA); - memset(sctx, 0, sizeof *sctx); - nx_ctx->ap = &nx_ctx->props[NX_PROPS_SHA512]; NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA512); - max_sg_len = min_t(u64, nx_ctx->ap->sglen, - nx_driver.of.max_sg_len/sizeof(struct nx_sg)); - max_sg_len = min_t(u64, max_sg_len, - nx_ctx->ap->databytelen/NX_PAGE_SIZE); + return 0; +} - len = SHA512_DIGEST_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, max_sg_len); - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); +static int nx_sha512_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); - if (len != SHA512_DIGEST_SIZE) - return -EINVAL; + memset(sctx, 0, sizeof *sctx); sctx->state[0] = __cpu_to_be64(SHA512_H0); sctx->state[1] = __cpu_to_be64(SHA512_H1); @@ -77,6 +72,7 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u64 to_process, leftover = 0, total; unsigned long irq_flags; int rc = 0; @@ -107,6 +103,16 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = SHA512_DIGEST_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &data_len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + + if (data_len != SHA512_DIGEST_SIZE) { + rc = -EINVAL; + goto out; + } + do { /* * to_process: the SHA512_BLOCK_SIZE data chunk to process in @@ -288,7 +294,7 @@ struct shash_alg nx_shash_sha512_alg = { .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_sha_init, + .cra_init = nx_crypto_ctx_sha512_init, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index f6198f29a4a8..436971343ff7 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -713,12 +713,15 @@ static int nx_crypto_ctx_init(struct nx_crypto_ctx *nx_ctx, u32 fc, u32 mode) /* entry points from the crypto tfm initializers */ int nx_crypto_ctx_aes_ccm_init(struct crypto_tfm *tfm) { + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct nx_ccm_rctx)); return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_AES, NX_MODE_AES_CCM); } int nx_crypto_ctx_aes_gcm_init(struct crypto_aead *tfm) { + crypto_aead_set_reqsize(tfm, sizeof(struct nx_gcm_rctx)); return nx_crypto_ctx_init(crypto_aead_ctx(tfm), NX_FC_AES, NX_MODE_AES_GCM); } diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index de3ea8738146..cdff03a42ae7 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -2,6 +2,8 @@ #ifndef __NX_H__ #define __NX_H__ +#include + #define NX_NAME "nx-crypto" #define NX_STRING "IBM Power7+ Nest Accelerator Crypto Driver" #define NX_VERSION "1.0" @@ -91,8 +93,11 @@ struct nx_crypto_driver { #define NX_GCM4106_NONCE_LEN (4) #define NX_GCM_CTR_OFFSET (12) -struct nx_gcm_priv { +struct nx_gcm_rctx { u8 iv[16]; +}; + +struct nx_gcm_priv { u8 iauth_tag[16]; u8 nonce[NX_GCM4106_NONCE_LEN]; }; @@ -100,8 +105,11 @@ struct nx_gcm_priv { #define NX_CCM_AES_KEY_LEN (16) #define NX_CCM4309_AES_KEY_LEN (19) #define NX_CCM4309_NONCE_LEN (3) -struct nx_ccm_priv { +struct nx_ccm_rctx { u8 iv[16]; +}; + +struct nx_ccm_priv { u8 b0[16]; u8 iauth_tag[16]; u8 oauth_tag[16]; @@ -113,7 +121,7 @@ struct nx_xcbc_priv { }; struct nx_ctr_priv { - u8 iv[16]; + u8 nonce[CTR_RFC3686_NONCE_SIZE]; }; struct nx_crypto_ctx { From 91c269a0d3eadd63f7112411ee812fbc170dc488 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 6 Jul 2015 20:55:35 +0200 Subject: [PATCH 705/839] MAINTAINER: add bridge netfilter So scripts/get_maintainer.pl shows the Netfilter mailing lists. Reported-by: Julien Grall Signed-off-by: Pablo Neira Ayuso --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 993d4cfd5aa0..8183b4659a4d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6996,6 +6996,7 @@ F: include/uapi/linux/netfilter/ F: net/*/netfilter.c F: net/*/netfilter/ F: net/netfilter/ +F: net/bridge/br_netfilter*.c NETLABEL M: Paul Moore From 86e8971800381c3a8d8d9327f83b1f97ccb04a4f Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Tue, 7 Jul 2015 15:55:21 +0100 Subject: [PATCH 706/839] netfilter: bridge: Use __in6_dev_get rather than in6_dev_get in br_validate_ipv6 The commit efb6de9b4ba0092b2c55f6a52d16294a8a698edd "netfilter: bridge: forward IPv6 fragmented packets" introduced a new function br_validate_ipv6 which take a reference on the inet6 device. Although, the reference is not released at the end. This will result to the impossibility to destroy any netdevice using ipv6 and bridge. It's possible to directly retrieve the inet6 device without taking a reference as all netfilter hooks are protected by rcu_read_lock via nf_hook_slow. Spotted while trying to destroy a Xen guest on the upstream Linux: "unregister_netdevice: waiting for vif1.0 to become free. Usage count = 1" Signed-off-by: Julien Grall Cc: Bernhard Thaler Cc: Pablo Neira Ayuso Cc: fw@strlen.de Cc: ian.campbell@citrix.com Cc: wei.liu2@citrix.com Cc: Bob Liu Acked-by: Stephen Hemminger Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 6d12d2675c80..13b7d1e3d185 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -104,7 +104,7 @@ int br_validate_ipv6(struct sk_buff *skb) { const struct ipv6hdr *hdr; struct net_device *dev = skb->dev; - struct inet6_dev *idev = in6_dev_get(skb->dev); + struct inet6_dev *idev = __in6_dev_get(skb->dev); u32 pkt_len; u8 ip6h_len = sizeof(struct ipv6hdr); From 69711ca19b06d1b33d8f21213b540b5d1c638dbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Hinderer?= Date: Wed, 8 Jul 2015 00:02:01 +0200 Subject: [PATCH 707/839] x86/kconfig: Fix typo in the CONFIG_CMDLINE_BOOL help text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sébastien Hinderer Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Samuel Thibault Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0b2929f49531..3dbb7e7909ca 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2020,7 +2020,7 @@ config CMDLINE_BOOL To compile command line arguments into the kernel, set this option to 'Y', then fill in the - the boot arguments in CONFIG_CMDLINE. + boot arguments in CONFIG_CMDLINE. Systems with fully functional boot loaders (i.e. non-embedded) should leave this option set to 'N'. From 2c3d99845eb7b1e9f3d2863099bc5a25c8461d3f Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 6 Jul 2015 15:15:01 +0100 Subject: [PATCH 708/839] drm/i915: Restore all GGTT VMAs on resume When rotated and partial views were added no one spotted the resume path which assumes only one GGTT VMA per object and hence is now skipping rebind of alternative views. Signed-off-by: Tvrtko Ursulin Cc: Daniel Vetter Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9daa2883ac18..dcc6a88c560e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2546,6 +2546,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; struct i915_address_space *vm; + struct i915_vma *vma; + bool flush; i915_check_and_clear_faults(dev); @@ -2555,17 +2557,24 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) dev_priv->gtt.base.total, true); + /* Cache flush objects bound into GGTT and rebind them. */ + vm = &dev_priv->gtt.base; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - struct i915_vma *vma = i915_gem_obj_to_vma(obj, - &dev_priv->gtt.base); - if (!vma) - continue; + flush = false; + list_for_each_entry(vma, &obj->vma_list, vma_link) { + if (vma->vm != vm) + continue; - i915_gem_clflush_object(obj, obj->pin_display); - WARN_ON(i915_vma_bind(vma, obj->cache_level, PIN_UPDATE)); + WARN_ON(i915_vma_bind(vma, obj->cache_level, + PIN_UPDATE)); + + flush = true; + } + + if (flush) + i915_gem_clflush_object(obj, obj->pin_display); } - if (INTEL_INFO(dev)->gen >= 8) { if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) chv_setup_private_ppat(dev_priv); From a899418167264c7bac574b1a0f1b2c26c5b0995a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 17:12:30 +0000 Subject: [PATCH 709/839] hotplug: Prevent alloc/free of irq descriptors during cpu up/down When a cpu goes up some architectures (e.g. x86) have to walk the irq space to set up the vector space for the cpu. While this needs extra protection at the architecture level we can avoid a few race conditions by preventing the concurrent allocation/free of irq descriptors and the associated data. When a cpu goes down it moves the interrupts which are targeted to this cpu away by reassigning the affinities. While this happens interrupts can be allocated and freed, which opens a can of race conditions in the code which reassignes the affinities because interrupt descriptors might be freed underneath. Example: CPU1 CPU2 cpu_up/down irq_desc = irq_to_desc(irq); remove_from_radix_tree(desc); raw_spin_lock(&desc->lock); free(desc); We could protect the irq descriptors with RCU, but that would require a full tree change of all accesses to interrupt descriptors. But fortunately these kind of race conditions are rather limited to a few things like cpu hotplug. The normal setup/teardown is very well serialized. So the simpler and obvious solution is: Prevent allocation and freeing of interrupt descriptors accross cpu hotplug. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: xiao jin Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20150705171102.063519515@linutronix.de --- include/linux/irqdesc.h | 7 ++++++- kernel/cpu.c | 22 +++++++++++++++++++++- kernel/irq/internals.h | 4 ---- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 624a668e61f1..fcea4e48e21f 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -87,7 +87,12 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; -#ifndef CONFIG_SPARSE_IRQ +#ifdef CONFIG_SPARSE_IRQ +extern void irq_lock_sparse(void); +extern void irq_unlock_sparse(void); +#else +static inline void irq_lock_sparse(void) { } +static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 9c9c9fab16cc..6a374544d495 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "smpboot.h" @@ -391,14 +392,20 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) smpboot_park_threads(cpu); + /* + * Prevent irq alloc/free while the dying cpu reorganizes the + * interrupt affinities. + */ + irq_lock_sparse(); + /* * So now all preempt/rcu users must observe !cpu_active(). */ - err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); if (err) { /* CPU didn't die: tell everyone. Can't complain. */ cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); + irq_unlock_sparse(); goto out_release; } BUG_ON(cpu_online(cpu)); @@ -415,6 +422,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */ per_cpu(cpu_dead_idle, cpu) = false; + /* Interrupts are moved away from the dying cpu, reenable alloc/free */ + irq_unlock_sparse(); + hotplug_cpu__broadcast_tick_pull(cpu); /* This actually kills the CPU. */ __cpu_die(cpu); @@ -517,8 +527,18 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen) goto out_notify; } + /* + * Some architectures have to walk the irq descriptors to + * setup the vector space for the cpu which comes online. + * Prevent irq alloc/free across the bringup. + */ + irq_lock_sparse(); + /* Arch-specific enabling code. */ ret = __cpu_up(cpu, idle); + + irq_unlock_sparse(); + if (ret != 0) goto out_notify; BUG_ON(!cpu_online(cpu)); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4834ee828c41..61008b8433ab 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -76,12 +76,8 @@ extern void unmask_threaded_irq(struct irq_desc *desc); #ifdef CONFIG_SPARSE_IRQ static inline void irq_mark_irq(unsigned int irq) { } -extern void irq_lock_sparse(void); -extern void irq_unlock_sparse(void); #else extern void irq_mark_irq(unsigned int irq); -static inline void irq_lock_sparse(void) { } -static inline void irq_unlock_sparse(void) { } #endif extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); From 63fef06ada94172d7b4295fc205441bafd8f8fb3 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 7 Jul 2015 11:15:46 +0200 Subject: [PATCH 710/839] drm/i915: Check crtc->active in intel_crtc_disable_planes This was lost in commit ce22dba92de22e951dee2ff89937a39754d2dd91 Author: Maarten Lankhorst Date: Tue Apr 21 17:12:56 2015 +0300 drm/i915: Move toggling planes out of crtc enable/disable. and we still need that crtc->active check since the overall modeset flow doesn't yet take dpms state into account properly. Fixes WARNING backtraces on at least bdw/hsw due to the ips disabling code being upset about being run on a switched-off pipe. We don't need a corresponding change on the enable side since with the old setCrtc semantics we always force-enable the pipe after a modeset. And the dpms function intel_crtc_control already checks for ->active. Reported-by: Linus Torvalds Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Ander Conselvan de Oliveira Signed-off-by: Daniel Vetter Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8aa803848f35..647b1404c441 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4854,6 +4854,9 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc) struct intel_plane *intel_plane; int pipe = intel_crtc->pipe; + if (!intel_crtc->active) + return; + intel_crtc_wait_for_pending_flips(crtc); intel_pre_disable_primary(crtc); From dec4f799d0a4c9edae20512fa60b0a36f3299ca2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 7 Jul 2015 11:15:47 +0200 Subject: [PATCH 711/839] drm/i915: Use crtc_state->active in primary check_plane func Since commit 8c7b5ccb729870e606321b3703e2c2e698c49a95 Author: Ander Conselvan de Oliveira Date: Tue Apr 21 17:13:19 2015 +0300 drm/i915: Use atomic helpers for computing changed flags we compute the plane state for a modeset before actually committing any changes, which means crtc->active won't be correct yet. Looking at future work in the modeset conversion targetting 4.3 the only places where crtc_state->active isn't accurate is when disabling other CRTCs than the one the modeset is for (when stealing connectors). Which isn't the case here. And that's also confirmed by an audit, we do unconditionally update crtc_state->active for the current pipe. We also don't need to update any other plane check functions since we only ever add the primary state to the modeset update right now. Cc: Ander Conselvan de Oliveira Cc: Maarten Lankhorst Cc: Jani Nikula Signed-off-by: Daniel Vetter Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 647b1404c441..ba9321998a41 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13276,7 +13276,7 @@ intel_check_primary_plane(struct drm_plane *plane, if (ret) return ret; - if (intel_crtc->active) { + if (crtc_state->base.active) { struct intel_plane_state *old_state = to_intel_plane_state(plane->state); From 45820c294fe1b1a9df495d57f40585ef2d069a39 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Jul 2015 09:33:38 -0700 Subject: [PATCH 712/839] Fix broken audit tests for exec arg len The "fix" in commit 0b08c5e5944 ("audit: Fix check of return value of strnlen_user()") didn't fix anything, it broke things. As reported by Steven Rostedt: "Yes, strnlen_user() returns 0 on fault, but if you look at what len is set to, than you would notice that on fault len would be -1" because we just subtracted one from the return value. So testing against 0 doesn't test for a fault condition, it tests against a perfectly valid empty string. Also fix up the usual braindamage wrt using WARN_ON() inside a conditional - make it part of the conditional and remove the explicit unlikely() (which is already part of the WARN_ON*() logic, exactly so that you don't have to write unreadable code. Reported-and-tested-by: Steven Rostedt Cc: Jan Kara Cc: Paul Moore Signed-off-by: Linus Torvalds --- kernel/auditsc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 09c65640cad6..e85bdfd15fed 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1021,8 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, * for strings that are too long, we should not have created * any. */ - if (unlikely((len == 0) || len > MAX_ARG_STRLEN - 1)) { - WARN_ON(1); + if (WARN_ON_ONCE(len < 0 || len > MAX_ARG_STRLEN - 1)) { send_sig(SIGKILL, current, 0); return -1; } From 07f18f0bb8d8d65badd8b4988b40d329fc0cc6dc Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 3 Jul 2015 06:03:06 +0200 Subject: [PATCH 713/839] drm/radeon: Handle irqs only based on irq ring, not irq status regs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trying to resolve issues with missed vblanks and impossible values inside delivered kms pageflip completion events showed that radeon's irq handling sometimes doesn't handle valid irqs, but silently skips them. This was observed for vblank interrupts. Although those irqs have corresponding events queued in the gpu's irq ring at time of interrupt, and therefore the corresponding handling code gets triggered by these events, the handling code sometimes silently skipped processing the irq. The reason for those skips is that the handling code double-checks for each irq event if the corresponding irq status bits in the irq status registers are set. Sometimes those bits are not set at time of check for valid irqs, maybe due to some hardware race on some setups? The problem only seems to happen on some machine + card combos sometimes, e.g., never happened during my testing of different PC cards of the DCE-2/3/4 generation a year ago, but happens consistently now on two different Apple Mac cards (RV730, DCE-3, Apple iMac and Evergreen JUNIPER, DCE-4 in a Apple MacPro). It also doesn't happen at each interrupt but only occassionally every couple of hundred or thousand vblank interrupts. This results in XOrg warning messages like "[ 7084.472] (WW) RADEON(0): radeon_dri2_flip_event_handler: Pageflip completion event has impossible msc 420120 < target_msc 420121" as well as skipped frames and problems for applications that use kms pageflip events or vblank events, e.g., users of DRI2 and DRI3/Present, Waylands Weston compositor, etc. See also https://bugs.freedesktop.org/show_bug.cgi?id=85203 After some talking to Alex and Michel, we decided to fix this by turning the double-check for asserted irq status bits into a warning. Whenever a irq event is queued in the IH ring, always execute the corresponding interrupt handler. Still check the irq status bits, but only to log a DRM_DEBUG message on a mismatch. This fixed the problems reliably on both previously failing cards, RV-730 dual-head tested on both crtcs (pipes D1 and D2) and a triple-output Juniper HD-5770 card tested on all three available crtcs (D1/D2/D3). The r600 and evergreen irq handling is therefore tested, but the cik an si handling is only compile tested due to lack of hw. Reviewed-by: Christian König Signed-off-by: Mario Kleiner CC: Michel Dänzer CC: Alex Deucher CC: # v3.16+ Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 336 ++++++++++++++----------- drivers/gpu/drm/radeon/evergreen.c | 392 ++++++++++++++++------------- drivers/gpu/drm/radeon/r600.c | 155 +++++++----- drivers/gpu/drm/radeon/si.c | 336 ++++++++++++++----------- 4 files changed, 688 insertions(+), 531 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 4ecf5caa8c6d..248953d2fdb7 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7964,23 +7964,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -7990,23 +7994,27 @@ restart_ih: case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8016,23 +8024,27 @@ restart_ih: case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8042,23 +8054,27 @@ restart_ih: case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8068,23 +8084,27 @@ restart_ih: case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8094,23 +8114,27 @@ restart_ih: case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8130,88 +8154,112 @@ restart_ih: case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); + break; case 1: - if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); + break; case 2: - if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); + break; case 3: - if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); + break; case 4: - if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); + break; case 5: - if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; case 6: - if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); + break; case 7: - if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); + break; case 8: - if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); + break; case 9: - if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); + break; case 10: - if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); + break; case 11: - if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 3a6d483a2c36..0acde1949c18 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4924,7 +4924,7 @@ restart_ih: return IRQ_NONE; rptr = rdev->ih.rptr; - DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); + DRM_DEBUG("evergreen_irq_process start: rptr %d, wptr %d\n", rptr, wptr); /* Order reading of wptr vs. reading of IH ring data */ rmb(); @@ -4942,23 +4942,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D1 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4968,23 +4972,27 @@ restart_ih: case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D2 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4994,23 +5002,27 @@ restart_ih: case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D3 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D3 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5020,23 +5032,27 @@ restart_ih: case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D4 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D4 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5046,23 +5062,27 @@ restart_ih: case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D5 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D5 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5072,23 +5092,27 @@ restart_ih: case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D6 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D6 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5108,88 +5132,100 @@ restart_ih: case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); break; case 1: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); break; case 2: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); break; case 3: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); break; case 4: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); break; case 5: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); break; case 6: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); break; case 7: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); break; case 8: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); break; case 9: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); break; case 10: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); break; case 11: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5199,46 +5235,52 @@ restart_ih: case 44: /* hdmi */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.afmt_status1 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status1 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI0\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status1 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status1 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI0\n"); break; case 1: - if (rdev->irq.stat_regs.evergreen.afmt_status2 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status2 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status2 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status2 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI1\n"); break; case 2: - if (rdev->irq.stat_regs.evergreen.afmt_status3 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status3 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status3 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status3 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI2\n"); break; case 3: - if (rdev->irq.stat_regs.evergreen.afmt_status4 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status4 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status4 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status4 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI3\n"); break; case 4: - if (rdev->irq.stat_regs.evergreen.afmt_status5 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status5 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status5 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status5 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI4\n"); break; case 5: - if (rdev->irq.stat_regs.evergreen.afmt_status6 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status6 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status6 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status6 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI5\n"); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 35dafd77a639..4ea5b10ff5f4 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -4086,23 +4086,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D1 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4112,23 +4116,27 @@ restart_ih: case 5: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D2 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4148,46 +4156,53 @@ restart_ih: case 19: /* HPD/DAC hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: HPD1 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); break; case 1: - if (rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: HPD2 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); break; case 4: - if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: HPD3 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); break; case 5: - if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: HPD4 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); break; case 10: - if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: HPD5 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); break; case 12: - if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: HPD6 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4197,18 +4212,22 @@ restart_ih: case 21: /* hdmi */ switch (src_data) { case 4: - if (rdev->irq.stat_regs.r600.hdmi0_status & HDMI0_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.r600.hdmi0_status &= ~HDMI0_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI0\n"); - } + if (!(rdev->irq.stat_regs.r600.hdmi0_status & HDMI0_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: HDMI0 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.hdmi0_status &= ~HDMI0_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI0\n"); + break; case 5: - if (rdev->irq.stat_regs.r600.hdmi1_status & HDMI0_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.r600.hdmi1_status &= ~HDMI0_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI1\n"); - } + if (!(rdev->irq.stat_regs.r600.hdmi1_status & HDMI0_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: HDMI1 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.hdmi1_status &= ~HDMI0_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI1\n"); + break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 26388b5dd6ed..07037e32dea3 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6466,23 +6466,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6492,23 +6496,27 @@ restart_ih: case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6518,23 +6526,27 @@ restart_ih: case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6544,23 +6556,27 @@ restart_ih: case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6570,23 +6586,27 @@ restart_ih: case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6596,23 +6616,27 @@ restart_ih: case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6632,88 +6656,112 @@ restart_ih: case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); + break; case 1: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); + break; case 2: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); + break; case 3: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); + break; case 4: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); + break; case 5: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; case 6: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); + break; case 7: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); + break; case 8: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); + break; case 9: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); + break; case 10: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); + break; case 11: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); From bd833144a23dead304744dc748f5d72d7e92d315 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 3 Jul 2015 06:03:07 +0200 Subject: [PATCH 714/839] drm/amdgpu: Handle irqs only based on irq ring, not irq status regs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a translation of the patch ... "drm/radeon: Handle irqs only based on irq ring, not irq status regs." ... for the vblank irq handling, to fix the same problem described in that patch on the new driver. Only compile tested due to lack of suitable hw. Reviewed-by: Christian König Signed-off-by: Mario Kleiner CC: Michel Dänzer CC: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 22 ++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 22 ++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 22 ++++++++++++++-------- 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 5cde635978f9..6e77964f1b64 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -3403,19 +3403,25 @@ static int dce_v10_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data) { case 0: /* vblank */ - if (disp_int & interrupt_status_offsets[crtc].vblank) { + if (disp_int & interrupt_status_offsets[crtc].vblank) dce_v10_0_crtc_vblank_int_ack(adev, crtc); - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(adev->ddev, crtc); - } - DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (amdgpu_irq_enabled(adev, source, irq_type)) { + drm_handle_vblank(adev->ddev, crtc); } + DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + break; case 1: /* vline */ - if (disp_int & interrupt_status_offsets[crtc].vline) { + if (disp_int & interrupt_status_offsets[crtc].vline) dce_v10_0_crtc_vline_int_ack(adev, crtc); - DRM_DEBUG("IH: D%d vline\n", crtc + 1); - } + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + DRM_DEBUG("IH: D%d vline\n", crtc + 1); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 95efd98b202d..7f7abb0e0be5 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3402,19 +3402,25 @@ static int dce_v11_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data) { case 0: /* vblank */ - if (disp_int & interrupt_status_offsets[crtc].vblank) { + if (disp_int & interrupt_status_offsets[crtc].vblank) dce_v11_0_crtc_vblank_int_ack(adev, crtc); - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(adev->ddev, crtc); - } - DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (amdgpu_irq_enabled(adev, source, irq_type)) { + drm_handle_vblank(adev->ddev, crtc); } + DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + break; case 1: /* vline */ - if (disp_int & interrupt_status_offsets[crtc].vline) { + if (disp_int & interrupt_status_offsets[crtc].vline) dce_v11_0_crtc_vline_int_ack(adev, crtc); - DRM_DEBUG("IH: D%d vline\n", crtc + 1); - } + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + DRM_DEBUG("IH: D%d vline\n", crtc + 1); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index aaca8d663f2c..08387dfd98a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -3237,19 +3237,25 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data) { case 0: /* vblank */ - if (disp_int & interrupt_status_offsets[crtc].vblank) { + if (disp_int & interrupt_status_offsets[crtc].vblank) WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], LB_VBLANK_STATUS__VBLANK_ACK_MASK); - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(adev->ddev, crtc); - } - DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (amdgpu_irq_enabled(adev, source, irq_type)) { + drm_handle_vblank(adev->ddev, crtc); } + DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + break; case 1: /* vline */ - if (disp_int & interrupt_status_offsets[crtc].vline) { + if (disp_int & interrupt_status_offsets[crtc].vline) WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], LB_VLINE_STATUS__VLINE_ACK_MASK); - DRM_DEBUG("IH: D%d vline\n", crtc + 1); - } + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + DRM_DEBUG("IH: D%d vline\n", crtc + 1); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); From 828202a382a06eda74202d4888d01a1078c68922 Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Fri, 3 Jul 2015 01:54:10 +0200 Subject: [PATCH 715/839] drm/radeon: use RCU query for GEM_BUSY syscall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to call the (expensive) radeon_bo_wait, checking the fences via RCU is much faster. The reservation done by radeon_bo_wait does not save us from any race conditions. Reviewed-by: Christian König Signed-off-by: Grigori Goronzy Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index ac3c1310b953..7199e19eb8ba 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -428,7 +428,6 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data, int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_busy *args = data; struct drm_gem_object *gobj; struct radeon_bo *robj; @@ -440,10 +439,16 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_radeon_bo(gobj); - r = radeon_bo_wait(robj, &cur_placement, true); + + r = reservation_object_test_signaled_rcu(robj->tbo.resv, true); + if (r == 0) + r = -EBUSY; + else + r = 0; + + cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); args->domain = radeon_mem_type_to_domain(cur_placement); drm_gem_object_unreference_unlocked(gobj); - r = radeon_gem_handle_lockup(rdev, r); return r; } From 54e03986133468e02cb01b76215e4d53a9cf6380 Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Fri, 3 Jul 2015 01:54:11 +0200 Subject: [PATCH 716/839] drm/radeon: fix HDP flushing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was regressed by commit 39e7f6f8, although I don't know of any actual issues caused by it. The storage domain is read without TTM locking now, but the lock never helped to prevent any races. Reviewed-by: Christian König Cc: stable@vger.kernel.org Signed-off-by: Grigori Goronzy Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 7199e19eb8ba..013ec7106e55 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -476,6 +476,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = ret; /* Flush HDP cache via MMIO if necessary */ + cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); if (rdev->asic->mmio_hdp_flush && radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM) robj->rdev->asic->mmio_hdp_flush(rdev); From 5e3c4f907043a7fae1b48e86536dc7b9efa07e29 Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Fri, 3 Jul 2015 01:54:12 +0200 Subject: [PATCH 717/839] drm/radeon: default to 2048 MB GART size on SI+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer ASICs have more VRAM on average and allocating more GART as well can have advantages. Also see commit edcd26e8. Ideally, we should scale GART size based on actual VRAM size, but that requires significant restructuring of initialization. v2: extract small helper, apply to error paths Reviewed-by: Christian König Signed-off-by: Grigori Goronzy Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 32 +++++++++++++++----------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 2593b1168bd6..1a532a9bc3a3 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1079,6 +1079,22 @@ static bool radeon_check_pot_argument(int arg) return (arg & (arg - 1)) == 0; } +/** + * Determine a sensible default GART size according to ASIC family. + * + * @family ASIC family name + */ +static int radeon_gart_size_auto(enum radeon_family family) +{ + /* default to a larger gart size on newer asics */ + if (family >= CHIP_TAHITI) + return 2048; + else if (family >= CHIP_RV770) + return 1024; + else + return 512; +} + /** * radeon_check_arguments - validate module params * @@ -1097,27 +1113,17 @@ static void radeon_check_arguments(struct radeon_device *rdev) } if (radeon_gart_size == -1) { - /* default to a larger gart size on newer asics */ - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } /* gtt size must be power of two and greater or equal to 32M */ if (radeon_gart_size < 32) { dev_warn(rdev->dev, "gart size (%d) too small\n", radeon_gart_size); - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } else if (!radeon_check_pot_argument(radeon_gart_size)) { dev_warn(rdev->dev, "gart size (%d) must be a power of 2\n", radeon_gart_size); - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } rdev->mc.gtt_size = (uint64_t)radeon_gart_size << 20; From 866a92040392d844d151aa6a2ba15be2e6e5df4f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Jul 2015 11:54:28 +0300 Subject: [PATCH 718/839] drm/radeon: fix underflow in r600_cp_dispatch_texture() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "if (pass_size > buf->total)" can underflow so I have changed the type of size and pass_size to unsigned to avoid this problem. Reviewed-by: Christian König Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600_cp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index 09e3f39925fa..98f9adaccc3d 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -2483,7 +2483,7 @@ int r600_cp_dispatch_texture(struct drm_device *dev, struct drm_buf *buf; u32 *buffer; const u8 __user *data; - int size, pass_size; + unsigned int size, pass_size; u64 src_offset, dst_offset; if (!radeon_check_offset(dev_priv, tex->offset)) { From 1b42804d27b1c2623309950e9b203b11f4c67f4f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 7 Jul 2015 18:00:49 +0100 Subject: [PATCH 719/839] arm64: entry: handle debug exceptions in el*_inv Currently we enable debug exceptions before reading ESR_EL1 in both el0_inv and el1_inv. If a debug exception is taken before we read ESR_EL1, the value will have been corrupted. As el*_inv is typically fatal, an intervening debug exception results in misleading debug information being logged to the console, but is not otherwise harmful. As with the other entry paths, we can use the ESR_EL1 value stashed earlier in the exception entry (in x25 for el0_sync{,_compat}, and x1 for el1_sync), giving us better error reporting in this case. Signed-off-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a7691a378668..f860bfda454a 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -352,8 +352,8 @@ el1_inv: // TODO: add support for undefined instructions in kernel mode enable_dbg mov x0, sp + mov x2, x1 mov x1, #BAD_SYNC - mrs x2, esr_el1 b bad_mode ENDPROC(el1_sync) @@ -553,7 +553,7 @@ el0_inv: ct_user_exit mov x0, sp mov x1, #BAD_SYNC - mrs x2, esr_el1 + mov x2, x25 bl bad_mode b ret_to_user ENDPROC(el0_sync) From 32f675bbc9be14a40d972820e190ac56341ce198 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Jul 2015 15:57:19 +0200 Subject: [PATCH 720/839] net_sched: gen_estimator: extend pps limit rate estimators are limited to 4 Mpps, which was fine years ago, but too small with current hardware generation. Lets use 2^5 scaling instead of 2^10 to get 128 Mpps new limit. On 64bit arch, use an "unsigned long" for temp storage and remove limit. (We do not expect 32bit arches to be able to reach this point) Tested: tc -s -d filter sh dev eth0 parent ffff: filter protocol ip pref 1 u32 filter protocol ip pref 1 u32 fh 800: ht divisor 1 filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15 match 07000000/ff000000 at 12 action order 1: gact action drop random type none pass val 0 index 1 ref 1 bind 1 installed 166 sec Action statistics: Sent 39734251496 bytes 863788076 pkt (dropped 863788117, overlimits 0 requeues 0) rate 4067Mbit 11053596pps backlog 0b 0p requeues 0 Signed-off-by: Eric Dumazet Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 9dfb88a933e7..92d886f4adcb 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -66,7 +66,7 @@ NOTES. - * avbps is scaled by 2^5, avpps is scaled by 2^10. + * avbps and avpps are scaled by 2^5. * both values are reported as 32 bit unsigned values. bps can overflow for fast links : max speed being 34360Mbit/sec * Minimal interval is HZ/4=250msec (it is the greatest common divisor @@ -85,10 +85,10 @@ struct gen_estimator struct gnet_stats_rate_est64 *rate_est; spinlock_t *stats_lock; int ewma_log; + u32 last_packets; + unsigned long avpps; u64 last_bytes; u64 avbps; - u32 last_packets; - u32 avpps; struct rcu_head e_rcu; struct rb_node node; struct gnet_stats_basic_cpu __percpu *cpu_bstats; @@ -118,8 +118,8 @@ static void est_timer(unsigned long arg) rcu_read_lock(); list_for_each_entry_rcu(e, &elist[idx].list, list) { struct gnet_stats_basic_packed b = {0}; + unsigned long rate; u64 brate; - u32 rate; spin_lock(e->stats_lock); read_lock(&est_lock); @@ -133,10 +133,11 @@ static void est_timer(unsigned long arg) e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log); e->rate_est->bps = (e->avbps+0xF)>>5; - rate = (b.packets - e->last_packets)<<(12 - idx); + rate = b.packets - e->last_packets; + rate <<= (7 - idx); e->last_packets = b.packets; e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log); - e->rate_est->pps = (e->avpps+0x1FF)>>10; + e->rate_est->pps = (e->avpps + 0xF) >> 5; skip: read_unlock(&est_lock); spin_unlock(e->stats_lock); From aa43c5ff737dac5dcfccd12d23abeefbdf0579c7 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sat, 4 Jul 2015 11:22:30 +0200 Subject: [PATCH 721/839] NET: hamradio: Fix IP over bpq encapsulation. Since 1d5da757da860a6916adbf68b09e868062b4b3b8 (ax25: Stop using magic neighbour cache operations.) any attempt to transmit IP packets over a bpqether device will result in a message like "Dead loop on virtual device bpq0, fix it urgently!" Fix suggested by Eric W. Biederman . Signed-off-by: Ralf Baechle Cc: # 4.1 Signed-off-by: David S. Miller --- drivers/net/hamradio/bpqether.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 7856b6ccf5c5..d95a50ae996d 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -482,6 +482,7 @@ static void bpq_setup(struct net_device *dev) memcpy(dev->dev_addr, &ax25_defaddr, AX25_ADDR_LEN); dev->flags = 0; + dev->features = NETIF_F_LLTX; /* Allow recursion */ #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) dev->header_ops = &ax25_header_ops; From 3d8cc14152a4b661580761049c7b9711efa60d82 Mon Sep 17 00:00:00 2001 From: Y Vo Date: Fri, 26 Jun 2015 15:01:47 +0700 Subject: [PATCH 722/839] arm64: dts: Add poweroff button device node for APM X-Gene platform This patch adds poweroff button device node to support poweroff feature on APM X-Gene Mustang platform. Signed-off-by: Y Vo Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/apm/apm-mustang.dts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts index 83578e766b94..4c55833d8a41 100644 --- a/arch/arm64/boot/dts/apm/apm-mustang.dts +++ b/arch/arm64/boot/dts/apm/apm-mustang.dts @@ -23,6 +23,16 @@ device_type = "memory"; reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */ }; + + gpio-keys { + compatible = "gpio-keys"; + button@1 { + label = "POWER"; + linux,code = <116>; + linux,input-type = <0x1>; + interrupts = <0x0 0x2d 0x1>; + }; + }; }; &pcie0clk { From cfa520056407acaa29f3aa7aee10c929b371c7cf Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Sun, 5 Jul 2015 12:16:27 -0500 Subject: [PATCH 723/839] net: phy: add dependency on HAS_IOMEM to MDIO_BUS_MUX_MMIOREG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On UML builds, mdio-mux-mmioreg.c fails to compile: drivers/net/phy/mdio-mux-mmioreg.c:50:3: error: implicit declaration of function ‘ioremap’ [-Werror=implicit-function-declaration] drivers/net/phy/mdio-mux-mmioreg.c:63:3: error: implicit declaration of function ‘iounmap’ [-Werror=implicit-function-declaration] This is due to CONFIG_OF now being user selectable. Add a dependency on HAS_IOMEM to fix this. Signed-off-by: Rob Herring Cc: Florian Fainelli Cc: David S. Miller Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index cf18940f4e84..cb86d7a01542 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -191,7 +191,7 @@ config MDIO_BUS_MUX_GPIO config MDIO_BUS_MUX_MMIOREG tristate "Support for MMIO device-controlled MDIO bus multiplexers" - depends on OF_MDIO + depends on OF_MDIO && HAS_IOMEM select MDIO_BUS_MUX help This module provides a driver for MDIO bus multiplexers that From efc5120b8259243fa945d0028450c0a7a5a4b9ef Mon Sep 17 00:00:00 2001 From: Tirumalesh Chalamarla Date: Fri, 26 Jun 2015 12:12:23 -0700 Subject: [PATCH 724/839] GICv3: Add ITS entry to THUNDER dts The PCIe host controller uses MSIs provided by GICv3 ITS. Enable it on Thunder SoCs by adding an entry to DT. Signed-off-by: Tirumalesh Chalamarla Acked-by: Marc Zyngier Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/cavium/thunder-88xx.dtsi | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi index d8c0bdc51882..9cb7cf94284a 100644 --- a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi +++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi @@ -376,10 +376,19 @@ gic0: interrupt-controller@8010,00000000 { compatible = "arm,gic-v3"; #interrupt-cells = <3>; + #address-cells = <2>; + #size-cells = <2>; + ranges; interrupt-controller; reg = <0x8010 0x00000000 0x0 0x010000>, /* GICD */ <0x8010 0x80000000 0x0 0x600000>; /* GICR */ interrupts = <1 9 0xf04>; + + its: gic-its@8010,00020000 { + compatible = "arm,gic-v3-its"; + msi-controller; + reg = <0x8010 0x20000 0x0 0x200000>; + }; }; uaa0: serial@87e0,24000000 { From 758556bdc1c8a8dffea0ea9f9df891878cc2468c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Jul 2015 06:48:06 +0930 Subject: [PATCH 725/839] module: Fix load_module() error path The load_module() error path frees a module but forgot to take it out of the mod_tree, leaving a dangling entry in the tree, causing havoc. Cc: Mathieu Desnoyers Reported-by: Arthur Marsh Tested-by: Arthur Marsh Fixes: 93c2e105f6bc ("module: Optimize __module_address() using a latched RB-tree") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- kernel/module.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/module.c b/kernel/module.c index 3e0e19763d24..4d2b82e610e2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3557,6 +3557,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + mod_tree_remove(mod); wake_up_all(&module_wq); /* Wait for RCU-sched synchronizing before releasing mod->list. */ synchronize_sched(); From 4d44f2a0266cdcc1226c7d94431ab1d57d0f6d53 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 1 Jul 2015 13:36:01 +0100 Subject: [PATCH 726/839] arm: dts: vexpress: describe all PMUs in TC2 dts The dts for the CoreTile Express A15x2 A7x3 (TC2) only describes the PMUs of the Cortex-A15 CPUs, and not the Cortex-A7 CPUs. Now that we have a mechanism for describing disparate PMUs and their interrupts in device tree, this patch makes use of these to describe the PMUs for all CPUs in the system. For consistency, the existing A15 PMU interrupt-affinity property is reflowed across two lines. Signed-off-by: Mark Rutland Acked-by: Will Deacon Acked-by: Sudeep Holla Cc: Liviu Dudau Cc: Lorenzo Pieralisi Signed-off-by: Kevin Hilman --- arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index 107395c32d82..038e30e4332f 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts @@ -187,11 +187,22 @@ <1 10 0xf08>; }; - pmu { + pmu_a15 { compatible = "arm,cortex-a15-pmu"; interrupts = <0 68 4>, <0 69 4>; - interrupt-affinity = <&cpu0>, <&cpu1>; + interrupt-affinity = <&cpu0>, + <&cpu1>; + }; + + pmu_a7 { + compatible = "arm,cortex-a7-pmu"; + interrupts = <0 128 4>, + <0 129 4>, + <0 130 4>; + interrupt-affinity = <&cpu2>, + <&cpu3>, + <&cpu4>; }; oscclk6a: oscclk6a { From 3adf7aaa76e720b52092e4bf8e14d8d231583af6 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 1 Jul 2015 13:36:02 +0100 Subject: [PATCH 727/839] arm: dts: vexpress: add missing CCI PMU device node to TC2 The CCI device node was added to vexpress CA15_A7(i.e. TC2) much before the CCI PMU support and binding was added. This patch adds the missing PMU node so that CCI PMUs can be used on TC2. Cc: Liviu Dudau Cc: Lorenzo Pieralisi Acked-by: Punit Agrawal Signed-off-by: Sudeep Holla Signed-off-by: Kevin Hilman --- arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index 038e30e4332f..17f63f7dfd9e 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts @@ -150,6 +150,16 @@ interface-type = "ace"; reg = <0x5000 0x1000>; }; + + pmu@9000 { + compatible = "arm,cci-400-pmu,r0"; + reg = <0x9000 0x5000>; + interrupts = <0 105 4>, + <0 101 4>, + <0 102 4>, + <0 103 4>, + <0 104 4>; + }; }; memory-controller@7ffd0000 { From 9ccd608070b6d4fd1a89fd76a84184fe401fb6a8 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 1 Jul 2015 13:36:03 +0100 Subject: [PATCH 728/839] arm64: dts: add device tree for ARM SMM-A53x2 on LogicTile Express 20MG Add a DTS file for the MP2 Cortex-A53 Soft Macrocell Model implemented on a LogicTile Express 20MG (V2F-1XV7) daughterboard. This is based on the version that's currently available from the ARM DTS repository [1]. [1] git://linux-arm.org/arm-dts.git Signed-off-by: Kristina Martsenko Acked-by: Sudeep Holla Signed-off-by: Kevin Hilman --- MAINTAINERS | 1 + arch/arm64/boot/dts/arm/Makefile | 1 + .../boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts | 191 ++++++++++++++++++ 3 files changed, 193 insertions(+) create mode 100644 arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts diff --git a/MAINTAINERS b/MAINTAINERS index 8133cefb6b6e..3729a4a1d560 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1614,6 +1614,7 @@ M: Lorenzo Pieralisi L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/boot/dts/vexpress* +F: arch/arm64/boot/dts/arm/vexpress* F: arch/arm/mach-vexpress/ F: */*/vexpress* F: */*/*/vexpress* diff --git a/arch/arm64/boot/dts/arm/Makefile b/arch/arm64/boot/dts/arm/Makefile index c5c98b91514e..bb3c07209676 100644 --- a/arch/arm64/boot/dts/arm/Makefile +++ b/arch/arm64/boot/dts/arm/Makefile @@ -1,6 +1,7 @@ dtb-$(CONFIG_ARCH_VEXPRESS) += foundation-v8.dtb dtb-$(CONFIG_ARCH_VEXPRESS) += juno.dtb juno-r1.dtb dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb +dtb-$(CONFIG_ARCH_VEXPRESS) += vexpress-v2f-1xv7-ca53x2.dtb always := $(dtb-y) subdir-y := $(dts-dirs) diff --git a/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts new file mode 100644 index 000000000000..5b1d0181023b --- /dev/null +++ b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts @@ -0,0 +1,191 @@ +/* + * ARM Ltd. Versatile Express + * + * LogicTile Express 20MG + * V2F-1XV7 + * + * Cortex-A53 (2 cores) Soft Macrocell Model + * + * HBI-0247C + */ + +/dts-v1/; + +#include + +/ { + model = "V2F-1XV7 Cortex-A53x2 SMM"; + arm,hbi = <0x247>; + arm,vexpress,site = <0xf>; + compatible = "arm,vexpress,v2f-1xv7,ca53x2", "arm,vexpress,v2f-1xv7", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { + stdout-path = "serial0:38400n8"; + }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + i2c0 = &v2m_i2c_dvi; + i2c1 = &v2m_i2c_pcie; + }; + + cpus { + #address-cells = <2>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0 0>; + next-level-cache = <&L2_0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0 1>; + next-level-cache = <&L2_0>; + }; + + L2_0: l2-cache0 { + compatible = "cache"; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; /* 2GB @ 2GB */ + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,gic-400"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x2000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = ; + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = , + , + , + ; + }; + + pmu { + compatible = "arm,armv8-pmuv3"; + interrupts = , + ; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + smbclk: osc@4 { + /* SMC clock */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 4>; + freq-range = <40000000 40000000>; + #clock-cells = <0>; + clock-output-names = "smclk"; + }; + + volt@0 { + /* VIO to expansion board above */ + compatible = "arm,vexpress-volt"; + arm,vexpress-sysreg,func = <2 0>; + regulator-name = "VIO_UP"; + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; + }; + + volt@1 { + /* 12V from power connector J6 */ + compatible = "arm,vexpress-volt"; + arm,vexpress-sysreg,func = <2 1>; + regulator-name = "12"; + regulator-always-on; + }; + + temp@0 { + /* FPGA temperature */ + compatible = "arm,vexpress-temp"; + arm,vexpress-sysreg,func = <4 0>; + label = "FPGA"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, + <0 0 1 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, + <0 0 2 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, + <0 0 3 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, + <0 0 4 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, + <0 0 5 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, + <0 0 6 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, + <0 0 7 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, + <0 0 8 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, + <0 0 9 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, + <0 0 10 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, + <0 0 11 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>, + <0 0 12 &gic GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, + <0 0 13 &gic GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>, + <0 0 14 &gic GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>, + <0 0 15 &gic GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>, + <0 0 16 &gic GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>, + <0 0 17 &gic GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>, + <0 0 18 &gic GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, + <0 0 19 &gic GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>, + <0 0 20 &gic GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>, + <0 0 21 &gic GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>, + <0 0 22 &gic GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>, + <0 0 23 &gic GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>, + <0 0 24 &gic GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>, + <0 0 25 &gic GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>, + <0 0 26 &gic GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, + <0 0 27 &gic GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>, + <0 0 28 &gic GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>, + <0 0 29 &gic GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>, + <0 0 30 &gic GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>, + <0 0 31 &gic GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>, + <0 0 32 &gic GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>, + <0 0 33 &gic GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>, + <0 0 34 &gic GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>, + <0 0 35 &gic GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>, + <0 0 36 &gic GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>, + <0 0 37 &gic GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>, + <0 0 38 &gic GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>, + <0 0 39 &gic GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>, + <0 0 40 &gic GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>, + <0 0 41 &gic GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>, + <0 0 42 &gic GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>; + + /include/ "../../../../arm/boot/dts/vexpress-v2m-rs1.dtsi" + }; +}; From f7e2965db17dd3b60f05fad88e7afc79ea75b48f Mon Sep 17 00:00:00 2001 From: Satish Ashok Date: Mon, 6 Jul 2015 05:53:35 -0700 Subject: [PATCH 729/839] bridge: mdb: start delete timer for temp static entries Start the delete timer when adding temp static entries so they can expire. Signed-off-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Fixes: ccb1c31a7a87 ("bridge: add flags to distinguish permanent mdb entires") Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index e29ad70b3000..3bfc675cf0d1 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -323,6 +323,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; struct net_bridge_mdb_htable *mdb; + unsigned long now = jiffies; int err; mdb = mlock_dereference(br->mdb, br); @@ -347,6 +348,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); + if (state == MDB_TEMPORARY) + mod_timer(&p->timer, now + br->multicast_membership_interval); br_mdb_notify(br->dev, port, group, RTM_NEWMDB); return 0; From 142b942a75cb10ede1b42bf85368d41449ab4e3b Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Mon, 6 Jul 2015 15:51:20 +0200 Subject: [PATCH 730/839] rhashtable: fix for resize events during table walk If rhashtable_walk_next detects a resize operation in progress, it jumps to the new table and continues walking that one. But it misses to drop the reference to it's current item, leading it to continue traversing the new table's bucket in which the current item is sorted into, and after reaching that bucket's end continues traversing the new table's second bucket instead of the first one, thereby potentially missing items. This fixes the rhashtable runtime test for me. Bug probably introduced by Herbert Xu's patch eddee5ba ("rhashtable: Fix walker behaviour during rehash") although not explicitly tested. Fixes: eddee5ba ("rhashtable: Fix walker behaviour during rehash") Signed-off-by: Phil Sutter Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/rhashtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index a60a6d335a91..cc0c69710dcf 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -610,6 +610,8 @@ next: iter->skip = 0; } + iter->p = NULL; + /* Ensure we see any new tables. */ smp_rmb(); @@ -620,8 +622,6 @@ next: return ERR_PTR(-EAGAIN); } - iter->p = NULL; - return NULL; } EXPORT_SYMBOL_GPL(rhashtable_walk_next); From d339727c2b1a10f25e6636670ab6e1841170e328 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jul 2015 17:13:26 +0200 Subject: [PATCH 731/839] net: graceful exit from netif_alloc_netdev_queues() User space can crash kernel with ip link add ifb10 numtxqueues 100000 type ifb We must replace a BUG_ON() by proper test and return -EINVAL for crazy values. Fixes: 60877a32bce00 ("net: allow large number of tx queues") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 6778a9999d52..0ad626214332 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6409,7 +6409,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev) struct netdev_queue *tx; size_t sz = count * sizeof(*tx); - BUG_ON(count < 1 || count > 0xffff); + if (count < 1 || count > 0xffff) + return -EINVAL; tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); if (!tx) { From 95ec655bc465ccb2a3329d4aff9a45e3c8188db5 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 6 Jul 2015 17:25:10 +0200 Subject: [PATCH 732/839] Revert "dev: set iflink to 0 for virtual interfaces" This reverts commit e1622baf54df8cc958bf29d71de5ad545ea7d93c. The side effect of this commit is to add a '@NONE' after each virtual interface name with a 'ip link'. It may break existing scripts. Reported-by: Olivier Hartkopp Signed-off-by: Nicolas Dichtel Tested-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/core/dev.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 0ad626214332..1e57efda055b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -677,10 +677,6 @@ int dev_get_iflink(const struct net_device *dev) if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) return dev->netdev_ops->ndo_get_iflink(dev); - /* If dev->rtnl_link_ops is set, it's a virtual interface. */ - if (dev->rtnl_link_ops) - return 0; - return dev->ifindex; } EXPORT_SYMBOL(dev_get_iflink); From 673c2c34f684e9d4328459e426ab54d51a5865c5 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 8 Jul 2015 17:07:41 -0400 Subject: [PATCH 733/839] modpost: work correctly with tile coldtext sections The tilegx and tilepro compilers use .coldtext for their unlikely executed text section name, so an __attribute__((cold)) function will (when compiled with higher optimization levels) land in the .coldtext section. Modify modpost to add .coldtext to the set of OTHER_TEXT_SECTIONS so we don't get warnings about referencing such a section in an __ex_table block, and then also modify arch/tile/lib/memcpy_user_64.c so that it uses plain ".coldtext" instead of ".coldtext.memcpy". The latter naming is a relic of an earlier use of -ffunction-sections, which we no longer use by default. Signed-off-by: Chris Metcalf Acked-by: Rusty Russell --- arch/tile/lib/memcpy_user_64.c | 4 ++-- scripts/mod/modpost.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c index 88c7016492c4..97bbb6060b25 100644 --- a/arch/tile/lib/memcpy_user_64.c +++ b/arch/tile/lib/memcpy_user_64.c @@ -28,7 +28,7 @@ #define _ST(p, inst, v) \ ({ \ asm("1: " #inst " %0, %1;" \ - ".pushsection .coldtext.memcpy,\"ax\";" \ + ".pushsection .coldtext,\"ax\";" \ "2: { move r0, %2; jrp lr };" \ ".section __ex_table,\"a\";" \ ".align 8;" \ @@ -41,7 +41,7 @@ ({ \ unsigned long __v; \ asm("1: " #inst " %0, %1;" \ - ".pushsection .coldtext.memcpy,\"ax\";" \ + ".pushsection .coldtext,\"ax\";" \ "2: { move r0, %2; jrp lr };" \ ".section __ex_table,\"a\";" \ ".align 8;" \ diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 91ee1b2e0f9a..12d3db3bd46b 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -886,7 +886,8 @@ static void check_section(const char *modname, struct elf_info *elf, #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ ".kprobes.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ - ".fixup", ".entry.text", ".exception.text", ".text.*" + ".fixup", ".entry.text", ".exception.text", ".text.*", \ + ".coldtext" #define INIT_SECTIONS ".init.*" #define MEM_INIT_SECTIONS ".meminit.*" From cfbfd86bfde15020bccde377e11586ee5c8b701d Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Mon, 6 Jul 2015 11:57:37 -0500 Subject: [PATCH 734/839] amd-xgbe: Fix DMA API debug warning When running a kernel configured with CONFIG_DMA_API_DEBUG=y a warning is issued: DMA-API: device driver tries to sync DMA memory it has not allocated This warning is the result of mapping the full range of the Rx buffer pages allocated and then performing a dma_sync_single_for_cpu against a calculated DMA address. The proper thing to do is to use the dma_sync_single_range_for_cpu with a base DMA address and an offset. Reported-by: Kim Phillips Signed-off-by: Tom Lendacky Tested-by: Kim Phillips Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-desc.c | 3 ++- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 11 +++++++---- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 17 +++++++++++------ drivers/net/ethernet/amd/xgbe/xgbe.h | 3 ++- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index 661cdaa7ea96..b3bc87fe3764 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -303,7 +303,8 @@ static void xgbe_set_buffer_data(struct xgbe_buffer_data *bd, get_page(pa->pages); bd->pa = *pa; - bd->dma = pa->pages_dma + pa->pages_offset; + bd->dma_base = pa->pages_dma; + bd->dma_off = pa->pages_offset; bd->dma_len = len; pa->pages_offset += len; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 506e832c9e9a..a4473d8ff4fa 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -1110,6 +1110,7 @@ static void xgbe_rx_desc_reset(struct xgbe_prv_data *pdata, unsigned int rx_usecs = pdata->rx_usecs; unsigned int rx_frames = pdata->rx_frames; unsigned int inte; + dma_addr_t hdr_dma, buf_dma; if (!rx_usecs && !rx_frames) { /* No coalescing, interrupt for every descriptor */ @@ -1129,10 +1130,12 @@ static void xgbe_rx_desc_reset(struct xgbe_prv_data *pdata, * Set buffer 2 (hi) address to buffer dma address (hi) and * set control bits OWN and INTE */ - rdesc->desc0 = cpu_to_le32(lower_32_bits(rdata->rx.hdr.dma)); - rdesc->desc1 = cpu_to_le32(upper_32_bits(rdata->rx.hdr.dma)); - rdesc->desc2 = cpu_to_le32(lower_32_bits(rdata->rx.buf.dma)); - rdesc->desc3 = cpu_to_le32(upper_32_bits(rdata->rx.buf.dma)); + hdr_dma = rdata->rx.hdr.dma_base + rdata->rx.hdr.dma_off; + buf_dma = rdata->rx.buf.dma_base + rdata->rx.buf.dma_off; + rdesc->desc0 = cpu_to_le32(lower_32_bits(hdr_dma)); + rdesc->desc1 = cpu_to_le32(upper_32_bits(hdr_dma)); + rdesc->desc2 = cpu_to_le32(lower_32_bits(buf_dma)); + rdesc->desc3 = cpu_to_le32(upper_32_bits(buf_dma)); XGMAC_SET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, INTE, inte); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 1e9c28d19ef8..aae9d5ecd182 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1765,8 +1765,9 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata, /* Start with the header buffer which may contain just the header * or the header plus data */ - dma_sync_single_for_cpu(pdata->dev, rdata->rx.hdr.dma, - rdata->rx.hdr.dma_len, DMA_FROM_DEVICE); + dma_sync_single_range_for_cpu(pdata->dev, rdata->rx.hdr.dma_base, + rdata->rx.hdr.dma_off, + rdata->rx.hdr.dma_len, DMA_FROM_DEVICE); packet = page_address(rdata->rx.hdr.pa.pages) + rdata->rx.hdr.pa.pages_offset; @@ -1778,8 +1779,11 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata, len -= copy_len; if (len) { /* Add the remaining data as a frag */ - dma_sync_single_for_cpu(pdata->dev, rdata->rx.buf.dma, - rdata->rx.buf.dma_len, DMA_FROM_DEVICE); + dma_sync_single_range_for_cpu(pdata->dev, + rdata->rx.buf.dma_base, + rdata->rx.buf.dma_off, + rdata->rx.buf.dma_len, + DMA_FROM_DEVICE); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rdata->rx.buf.pa.pages, @@ -1945,8 +1949,9 @@ read_again: if (!skb) error = 1; } else if (rdesc_len) { - dma_sync_single_for_cpu(pdata->dev, - rdata->rx.buf.dma, + dma_sync_single_range_for_cpu(pdata->dev, + rdata->rx.buf.dma_base, + rdata->rx.buf.dma_off, rdata->rx.buf.dma_len, DMA_FROM_DEVICE); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 63d72a140053..717ce21b6077 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -337,7 +337,8 @@ struct xgbe_buffer_data { struct xgbe_page_alloc pa; struct xgbe_page_alloc pa_unmap; - dma_addr_t dma; + dma_addr_t dma_base; + unsigned long dma_off; unsigned int dma_len; }; From 6c3e921b18edca290099adfddde8a50236bf2d80 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 6 Jul 2015 20:34:55 +0200 Subject: [PATCH 735/839] net: fec: Ensure clocks are enabled while using mdio bus When a switch is attached to the mdio bus, the mdio bus can be used while the interface is not open. If the IPG clock is not enabled, MDIO reads/writes will simply time out. Add support for runtime PM to control this clock. Enable/disable this clock using runtime PM, with open()/close() and mdio read()/write() function triggering runtime PM operations. Since PM is optional, the IPG clock is enabled at probe and is no longer modified by fec_enet_clk_enable(), thus if PM is not enabled in the kernel, it is guaranteed the clock is running when MDIO operations are performed. Signed-off-by: Andrew Lunn Acked-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 88 +++++++++++++++++++---- 1 file changed, 75 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1f89c59b4353..42e20e5385ac 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -77,6 +78,7 @@ static void fec_enet_itr_coal_init(struct net_device *ndev); #define FEC_ENET_RAEM_V 0x8 #define FEC_ENET_RAFL_V 0x8 #define FEC_ENET_OPD_V 0xFFF0 +#define FEC_MDIO_PM_TIMEOUT 100 /* ms */ static struct platform_device_id fec_devtype[] = { { @@ -1767,7 +1769,13 @@ static void fec_enet_adjust_link(struct net_device *ndev) static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) { struct fec_enet_private *fep = bus->priv; + struct device *dev = &fep->pdev->dev; unsigned long time_left; + int ret = 0; + + ret = pm_runtime_get_sync(dev); + if (IS_ERR_VALUE(ret)) + return ret; fep->mii_timeout = 0; init_completion(&fep->mdio_done); @@ -1783,18 +1791,30 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) if (time_left == 0) { fep->mii_timeout = 1; netdev_err(fep->netdev, "MDIO read timeout\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto out; } - /* return value */ - return FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); + ret = FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); + +out: + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + return ret; } static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value) { struct fec_enet_private *fep = bus->priv; + struct device *dev = &fep->pdev->dev; unsigned long time_left; + int ret = 0; + + ret = pm_runtime_get_sync(dev); + if (IS_ERR_VALUE(ret)) + return ret; fep->mii_timeout = 0; init_completion(&fep->mdio_done); @@ -1811,10 +1831,13 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, if (time_left == 0) { fep->mii_timeout = 1; netdev_err(fep->netdev, "MDIO write timeout\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; } - return 0; + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + return ret; } static int fec_enet_clk_enable(struct net_device *ndev, bool enable) @@ -1826,9 +1849,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) ret = clk_prepare_enable(fep->clk_ahb); if (ret) return ret; - ret = clk_prepare_enable(fep->clk_ipg); - if (ret) - goto failed_clk_ipg; if (fep->clk_enet_out) { ret = clk_prepare_enable(fep->clk_enet_out); if (ret) @@ -1852,7 +1872,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) } } else { clk_disable_unprepare(fep->clk_ahb); - clk_disable_unprepare(fep->clk_ipg); if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); if (fep->clk_ptp) { @@ -1874,8 +1893,6 @@ failed_clk_ptp: if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); failed_clk_enet_out: - clk_disable_unprepare(fep->clk_ipg); -failed_clk_ipg: clk_disable_unprepare(fep->clk_ahb); return ret; @@ -2847,10 +2864,14 @@ fec_enet_open(struct net_device *ndev) struct fec_enet_private *fep = netdev_priv(ndev); int ret; + ret = pm_runtime_get_sync(&fep->pdev->dev); + if (IS_ERR_VALUE(ret)) + return ret; + pinctrl_pm_select_default_state(&fep->pdev->dev); ret = fec_enet_clk_enable(ndev, true); if (ret) - return ret; + goto clk_enable; /* I should reset the ring buffers here, but I don't yet know * a simple way to do that. @@ -2881,6 +2902,9 @@ err_enet_mii_probe: fec_enet_free_buffers(ndev); err_enet_alloc: fec_enet_clk_enable(ndev, false); +clk_enable: + pm_runtime_mark_last_busy(&fep->pdev->dev); + pm_runtime_put_autosuspend(&fep->pdev->dev); pinctrl_pm_select_sleep_state(&fep->pdev->dev); return ret; } @@ -2903,6 +2927,9 @@ fec_enet_close(struct net_device *ndev) fec_enet_clk_enable(ndev, false); pinctrl_pm_select_sleep_state(&fep->pdev->dev); + pm_runtime_mark_last_busy(&fep->pdev->dev); + pm_runtime_put_autosuspend(&fep->pdev->dev); + fec_enet_free_buffers(ndev); return 0; @@ -3388,6 +3415,10 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_clk; + ret = clk_prepare_enable(fep->clk_ipg); + if (ret) + goto failed_clk_ipg; + fep->reg_phy = devm_regulator_get(&pdev->dev, "phy"); if (!IS_ERR(fep->reg_phy)) { ret = regulator_enable(fep->reg_phy); @@ -3434,6 +3465,8 @@ fec_probe(struct platform_device *pdev) netif_carrier_off(ndev); fec_enet_clk_enable(ndev, false); pinctrl_pm_select_sleep_state(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); ret = register_netdev(ndev); if (ret) @@ -3447,6 +3480,12 @@ fec_probe(struct platform_device *pdev) fep->rx_copybreak = COPYBREAK_DEFAULT; INIT_WORK(&fep->tx_timeout_work, fec_enet_timeout_work); + + pm_runtime_set_autosuspend_delay(&pdev->dev, FEC_MDIO_PM_TIMEOUT); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); + return 0; failed_register: @@ -3457,6 +3496,8 @@ failed_init: if (fep->reg_phy) regulator_disable(fep->reg_phy); failed_regulator: + clk_disable_unprepare(fep->clk_ipg); +failed_clk_ipg: fec_enet_clk_enable(ndev, false); failed_clk: failed_phy: @@ -3568,7 +3609,28 @@ failed_clk: return ret; } -static SIMPLE_DEV_PM_OPS(fec_pm_ops, fec_suspend, fec_resume); +static int __maybe_unused fec_runtime_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct fec_enet_private *fep = netdev_priv(ndev); + + clk_disable_unprepare(fep->clk_ipg); + + return 0; +} + +static int __maybe_unused fec_runtime_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct fec_enet_private *fep = netdev_priv(ndev); + + return clk_prepare_enable(fep->clk_ipg); +} + +static const struct dev_pm_ops fec_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(fec_suspend, fec_resume) + SET_RUNTIME_PM_OPS(fec_runtime_suspend, fec_runtime_resume, NULL) +}; static struct platform_driver fec_driver = { .driver = { From 4f7d2cdfdde71ffe962399b7020c674050329423 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Jul 2015 00:07:52 +0200 Subject: [PATCH 736/839] rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes anymore with respect to their policy, that is, ifla_vfinfo_policy[]. Before, they were part of ifla_policy[], but they have been nested since placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO, which is another nested attribute for the actual VF attributes such as IFLA_VF_MAC, IFLA_VF_VLAN, etc. Despite the policy being split out from ifla_policy[] in this commit, it's never applied anywhere. nla_for_each_nested() only does basic nla_ok() testing for struct nlattr, but it doesn't know about the data context and their requirements. Fix, on top of Jason's initial work, does 1) parsing of the attributes with the right policy, and 2) using the resulting parsed attribute table from 1) instead of the nla_for_each_nested() loop (just like we used to do when still part of ifla_policy[]). Reference: http://thread.gmane.org/gmane.linux.network/368913 Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric") Reported-by: Jason Gunthorpe Cc: Chris Wright Cc: Sucheta Chakraborty Cc: Greg Rose Cc: Jeff Kirsher Cc: Rony Efraim Cc: Vlad Zolotarov Cc: Nicolas Dichtel Cc: Thomas Graf Signed-off-by: Jason Gunthorpe Signed-off-by: Daniel Borkmann Acked-by: Vlad Zolotarov Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 187 ++++++++++++++++++++++--------------------- 1 file changed, 96 insertions(+), 91 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 01ced4a889e0..9e433d58d265 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1328,10 +1328,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED }, }; -static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { - [IFLA_VF_INFO] = { .type = NLA_NESTED }, -}; - static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, @@ -1488,96 +1484,98 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return 0; } -static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) +static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) { - int rem, err = -EINVAL; - struct nlattr *vf; const struct net_device_ops *ops = dev->netdev_ops; + int err = -EINVAL; - nla_for_each_nested(vf, attr, rem) { - switch (nla_type(vf)) { - case IFLA_VF_MAC: { - struct ifla_vf_mac *ivm; - ivm = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_mac) - err = ops->ndo_set_vf_mac(dev, ivm->vf, - ivm->mac); - break; - } - case IFLA_VF_VLAN: { - struct ifla_vf_vlan *ivv; - ivv = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_vlan) - err = ops->ndo_set_vf_vlan(dev, ivv->vf, - ivv->vlan, - ivv->qos); - break; - } - case IFLA_VF_TX_RATE: { - struct ifla_vf_tx_rate *ivt; - struct ifla_vf_info ivf; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_get_vf_config) - err = ops->ndo_get_vf_config(dev, ivt->vf, - &ivf); - if (err) - break; - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivf.min_tx_rate, - ivt->rate); - break; - } - case IFLA_VF_RATE: { - struct ifla_vf_rate *ivt; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivt->min_tx_rate, - ivt->max_tx_rate); - break; - } - case IFLA_VF_SPOOFCHK: { - struct ifla_vf_spoofchk *ivs; - ivs = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_spoofchk) - err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, - ivs->setting); - break; - } - case IFLA_VF_LINK_STATE: { - struct ifla_vf_link_state *ivl; - ivl = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_link_state) - err = ops->ndo_set_vf_link_state(dev, ivl->vf, - ivl->link_state); - break; - } - case IFLA_VF_RSS_QUERY_EN: { - struct ifla_vf_rss_query_en *ivrssq_en; + if (tb[IFLA_VF_MAC]) { + struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); - ivrssq_en = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rss_query_en) - err = ops->ndo_set_vf_rss_query_en(dev, - ivrssq_en->vf, - ivrssq_en->setting); - break; - } - default: - err = -EINVAL; - break; - } - if (err) - break; + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_mac) + err = ops->ndo_set_vf_mac(dev, ivm->vf, + ivm->mac); + if (err < 0) + return err; } + + if (tb[IFLA_VF_VLAN]) { + struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_vlan) + err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, + ivv->qos); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_TX_RATE]) { + struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); + struct ifla_vf_info ivf; + + err = -EOPNOTSUPP; + if (ops->ndo_get_vf_config) + err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf); + if (err < 0) + return err; + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivf.min_tx_rate, + ivt->rate); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_RATE]) { + struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivt->min_tx_rate, + ivt->max_tx_rate); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_SPOOFCHK]) { + struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_spoofchk) + err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, + ivs->setting); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_LINK_STATE]) { + struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_link_state) + err = ops->ndo_set_vf_link_state(dev, ivl->vf, + ivl->link_state); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_RSS_QUERY_EN]) { + struct ifla_vf_rss_query_en *ivrssq_en; + + err = -EOPNOTSUPP; + ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]); + if (ops->ndo_set_vf_rss_query_en) + err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf, + ivrssq_en->setting); + if (err < 0) + return err; + } + return err; } @@ -1773,14 +1771,21 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_VFINFO_LIST]) { + struct nlattr *vfinfo[IFLA_VF_MAX + 1]; struct nlattr *attr; int rem; + nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { - if (nla_type(attr) != IFLA_VF_INFO) { + if (nla_type(attr) != IFLA_VF_INFO || + nla_len(attr) < NLA_HDRLEN) { err = -EINVAL; goto errout; } - err = do_setvfinfo(dev, attr); + err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, + ifla_vf_policy); + if (err < 0) + goto errout; + err = do_setvfinfo(dev, vfinfo); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; From fc24f2b2094366da8786f59f2606307e934cea17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Tue, 7 Jul 2015 08:34:13 +0300 Subject: [PATCH 737/839] ip_tunnel: fix ipv4 pmtu check to honor inner ip header df MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Frag needed should be sent only if the inner header asked to not fragment. Currently fragmentation is broken if the tunnel has df set, but df was not asked in the original packet. The tunnel's df needs to be still checked to update internally the pmtu cache. Commit 23a3647bc4f93bac broke it, and this commit fixes the ipv4 df check back to the way it was. Fixes: 23a3647bc4f93bac ("ip_tunnels: Use skb-len to PMTU check.") Cc: Pravin B Shelar Signed-off-by: Timo Teräs Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 4c2c3ba4ba65..626d9e56a6bd 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -586,7 +586,8 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, EXPORT_SYMBOL(ip_tunnel_encap); static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, - struct rtable *rt, __be16 df) + struct rtable *rt, __be16 df, + const struct iphdr *inner_iph) { struct ip_tunnel *tunnel = netdev_priv(dev); int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; @@ -603,7 +604,8 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && - (df & htons(IP_DF)) && mtu < pkt_size) { + (inner_iph->frag_off & htons(IP_DF)) && + mtu < pkt_size) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -E2BIG; @@ -737,7 +739,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { ip_rt_put(rt); goto tx_error; } From b5a983f3141239b5b0b4a4e66efa31f8a26354b8 Mon Sep 17 00:00:00 2001 From: Mazhar Rana Date: Tue, 7 Jul 2015 15:04:50 +0530 Subject: [PATCH 738/839] bonding: "primary_reselect" with "failure" is not working properly When "primary_reselect" is set to "failure", primary interface should not become active until current active slave is down. But if we set first member of bond device as a "primary" interface and "primary_reselect" is set to "failure" then whenever primary interface's link get back(up) it become active slave even if current active slave is still up. With this patch, "bond_find_best_slave" will not traverse members if primary interface is not candidate for failover/reselection and current active slave is still up. Signed-off-by: Mazhar Rana Signed-off-by: Jay Vosburgh Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 51 ++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 19eb990d398c..317a49480475 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -689,40 +689,57 @@ out: } -static bool bond_should_change_active(struct bonding *bond) +static struct slave *bond_choose_primary_or_current(struct bonding *bond) { struct slave *prim = rtnl_dereference(bond->primary_slave); struct slave *curr = rtnl_dereference(bond->curr_active_slave); - if (!prim || !curr || curr->link != BOND_LINK_UP) - return true; + if (!prim || prim->link != BOND_LINK_UP) { + if (!curr || curr->link != BOND_LINK_UP) + return NULL; + return curr; + } + if (bond->force_primary) { bond->force_primary = false; - return true; + return prim; + } + + if (!curr || curr->link != BOND_LINK_UP) + return prim; + + /* At this point, prim and curr are both up */ + switch (bond->params.primary_reselect) { + case BOND_PRI_RESELECT_ALWAYS: + return prim; + case BOND_PRI_RESELECT_BETTER: + if (prim->speed < curr->speed) + return curr; + if (prim->speed == curr->speed && prim->duplex <= curr->duplex) + return curr; + return prim; + case BOND_PRI_RESELECT_FAILURE: + return curr; + default: + netdev_err(bond->dev, "impossible primary_reselect %d\n", + bond->params.primary_reselect); + return curr; } - if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER && - (prim->speed < curr->speed || - (prim->speed == curr->speed && prim->duplex <= curr->duplex))) - return false; - if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE) - return false; - return true; } /** - * find_best_interface - select the best available slave to be the active one + * bond_find_best_slave - select the best available slave to be the active one * @bond: our bonding struct */ static struct slave *bond_find_best_slave(struct bonding *bond) { - struct slave *slave, *bestslave = NULL, *primary; + struct slave *slave, *bestslave = NULL; struct list_head *iter; int mintime = bond->params.updelay; - primary = rtnl_dereference(bond->primary_slave); - if (primary && primary->link == BOND_LINK_UP && - bond_should_change_active(bond)) - return primary; + slave = bond_choose_primary_or_current(bond); + if (slave) + return slave; bond_for_each_slave(bond, slave, iter) { if (slave->link == BOND_LINK_UP) From 9e9f665a18008999e749bd41394efcbf5c37a726 Mon Sep 17 00:00:00 2001 From: Daniel Pieczko Date: Tue, 7 Jul 2015 11:37:00 +0100 Subject: [PATCH 739/839] sfc: refactor code in efx_ef10_set_mac_address() Re-organize the structure of error handling to avoid having to duplicate the netif_err() around the ifdefs. The only change to the behaviour of the error-handling is that the PF's data structure to record VF details should only be updated if the original command succeeded. Signed-off-by: Shradha Shah Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 45 +++++++++++++++------------------ 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 847643455468..9740cd02f81d 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3829,38 +3829,27 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) efx_net_open(efx->net_dev); netif_device_attach(efx->net_dev); -#if !defined(CONFIG_SFC_SRIOV) - if (rc == -EPERM) - netif_err(efx, drv, efx->net_dev, - "Cannot change MAC address; use sfboot to enable mac-spoofing" - " on this interface\n"); -#else - if (rc == -EPERM) { +#ifdef CONFIG_SFC_SRIOV + if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) { struct pci_dev *pci_dev_pf = efx->pci_dev->physfn; - /* Switch to PF and change MAC address on vport */ - if (efx->pci_dev->is_virtfn && pci_dev_pf) { - struct efx_nic *efx_pf = pci_get_drvdata(pci_dev_pf); + if (rc == -EPERM) { + struct efx_nic *efx_pf; - if (!efx_ef10_sriov_set_vf_mac(efx_pf, + /* Switch to PF and change MAC address on vport */ + efx_pf = pci_get_drvdata(pci_dev_pf); + + rc = efx_ef10_sriov_set_vf_mac(efx_pf, nic_data->vf_index, - efx->net_dev->dev_addr)) - return 0; - } - netif_err(efx, drv, efx->net_dev, - "Cannot change MAC address; use sfboot to enable mac-spoofing" - " on this interface\n"); - } else if (efx->pci_dev->is_virtfn) { - /* Successfully changed by VF (with MAC spoofing), so update the - * parent PF if possible. - */ - struct pci_dev *pci_dev_pf = efx->pci_dev->physfn; - - if (pci_dev_pf) { + efx->net_dev->dev_addr); + } else if (!rc) { struct efx_nic *efx_pf = pci_get_drvdata(pci_dev_pf); struct efx_ef10_nic_data *nic_data = efx_pf->nic_data; unsigned int i; + /* MAC address successfully changed by VF (with MAC + * spoofing) so update the parent PF if possible. + */ for (i = 0; i < efx_pf->vf_count; ++i) { struct ef10_vf *vf = nic_data->vf + i; @@ -3871,8 +3860,14 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) } } } - } + } else #endif + if (rc == -EPERM) { + netif_err(efx, drv, efx->net_dev, + "Cannot change MAC address; use sfboot to enable" + " mac-spoofing on this interface\n"); + } + return rc; } From 7a186f4703de6f4b7feb5d09735b096145b8918c Mon Sep 17 00:00:00 2001 From: Daniel Pieczko Date: Tue, 7 Jul 2015 11:37:19 +0100 Subject: [PATCH 740/839] sfc: add legacy method for changing a PF's MAC address Some versions of MCFW do not support the MC_CMD_VADAPTOR_SET_MAC command, and ENOSYS will be returned. If the PF created its own vport, the function's datapath must be stopped and the vport can be reconfigured to reflect the new MAC address. If the MCFW created the vport for the PF (which is the case when the nic_data->vport_mac is blank), nothing further needs to be done as the vport is not under the control of the PF. This only applies to PFs because the MCFW in question does not support VFs. Signed-off-by: Shradha Shah Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 120 ++++++++++++++++++++++++++ drivers/net/ethernet/sfc/ef10_sriov.c | 42 --------- drivers/net/ethernet/sfc/ef10_sriov.h | 6 ++ 3 files changed, 126 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 9740cd02f81d..e0cb3617f549 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -101,6 +101,11 @@ static unsigned int efx_ef10_mem_map_size(struct efx_nic *efx) return resource_size(&efx->pci_dev->resource[bar]); } +static bool efx_ef10_is_vf(struct efx_nic *efx) +{ + return efx->type->is_vf; +} + static int efx_ef10_get_pf_index(struct efx_nic *efx) { MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN); @@ -677,6 +682,48 @@ static int efx_ef10_probe_pf(struct efx_nic *efx) return efx_ef10_probe(efx); } +int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_ALLOC_IN_LEN); + + MCDI_SET_DWORD(inbuf, VADAPTOR_ALLOC_IN_UPSTREAM_PORT_ID, port_id); + return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_ALLOC, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +int efx_ef10_vadaptor_free(struct efx_nic *efx, unsigned int port_id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_FREE_IN_LEN); + + MCDI_SET_DWORD(inbuf, VADAPTOR_FREE_IN_UPSTREAM_PORT_ID, port_id); + return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_FREE, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +int efx_ef10_vport_add_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_ADD_MAC_ADDRESS_IN_LEN); + + MCDI_SET_DWORD(inbuf, VPORT_ADD_MAC_ADDRESS_IN_VPORT_ID, port_id); + ether_addr_copy(MCDI_PTR(inbuf, VPORT_ADD_MAC_ADDRESS_IN_MACADDR), mac); + + return efx_mcdi_rpc(efx, MC_CMD_VPORT_ADD_MAC_ADDRESS, inbuf, + sizeof(inbuf), NULL, 0, NULL); +} + +int efx_ef10_vport_del_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_DEL_MAC_ADDRESS_IN_LEN); + + MCDI_SET_DWORD(inbuf, VPORT_DEL_MAC_ADDRESS_IN_VPORT_ID, port_id); + ether_addr_copy(MCDI_PTR(inbuf, VPORT_DEL_MAC_ADDRESS_IN_MACADDR), mac); + + return efx_mcdi_rpc(efx, MC_CMD_VPORT_DEL_MAC_ADDRESS, inbuf, + sizeof(inbuf), NULL, 0, NULL); +} + #ifdef CONFIG_SFC_SRIOV static int efx_ef10_probe_vf(struct efx_nic *efx) { @@ -3804,6 +3851,72 @@ static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx) WARN_ON(remove_failed); } +static int efx_ef10_vport_set_mac_address(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + u8 mac_old[ETH_ALEN]; + int rc, rc2; + + /* Only reconfigure a PF-created vport */ + if (is_zero_ether_addr(nic_data->vport_mac)) + return 0; + + efx_device_detach_sync(efx); + efx_net_stop(efx->net_dev); + down_write(&efx->filter_sem); + efx_ef10_filter_table_remove(efx); + up_write(&efx->filter_sem); + + rc = efx_ef10_vadaptor_free(efx, nic_data->vport_id); + if (rc) + goto restore_filters; + + ether_addr_copy(mac_old, nic_data->vport_mac); + rc = efx_ef10_vport_del_mac(efx, nic_data->vport_id, + nic_data->vport_mac); + if (rc) + goto restore_vadaptor; + + rc = efx_ef10_vport_add_mac(efx, nic_data->vport_id, + efx->net_dev->dev_addr); + if (!rc) { + ether_addr_copy(nic_data->vport_mac, efx->net_dev->dev_addr); + } else { + rc2 = efx_ef10_vport_add_mac(efx, nic_data->vport_id, mac_old); + if (rc2) { + /* Failed to add original MAC, so clear vport_mac */ + eth_zero_addr(nic_data->vport_mac); + goto reset_nic; + } + } + +restore_vadaptor: + rc2 = efx_ef10_vadaptor_alloc(efx, nic_data->vport_id); + if (rc2) + goto reset_nic; +restore_filters: + down_write(&efx->filter_sem); + rc2 = efx_ef10_filter_table_probe(efx); + up_write(&efx->filter_sem); + if (rc2) + goto reset_nic; + + rc2 = efx_net_open(efx->net_dev); + if (rc2) + goto reset_nic; + + netif_device_attach(efx->net_dev); + + return rc; + +reset_nic: + netif_err(efx, drv, efx->net_dev, + "Failed to restore when changing MAC address - scheduling reset\n"); + efx_schedule_reset(efx, RESET_TYPE_DATAPATH); + + return rc ? rc : rc2; +} + static int efx_ef10_set_mac_address(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_SET_MAC_IN_LEN); @@ -3866,6 +3979,13 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) netif_err(efx, drv, efx->net_dev, "Cannot change MAC address; use sfboot to enable" " mac-spoofing on this interface\n"); + } else if (rc == -ENOSYS && !efx_ef10_is_vf(efx)) { + /* If the active MCFW does not support MC_CMD_VADAPTOR_SET_MAC + * fall-back to the method of changing the MAC address on the + * vport. This only applies to PFs because such versions of + * MCFW do not support VFs. + */ + rc = efx_ef10_vport_set_mac_address(efx); } return rc; diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 6c9b6e45509a..7485f71b4e2f 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -29,30 +29,6 @@ static int efx_ef10_evb_port_assign(struct efx_nic *efx, unsigned int port_id, NULL, 0, NULL); } -static int efx_ef10_vport_add_mac(struct efx_nic *efx, - unsigned int port_id, u8 *mac) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_ADD_MAC_ADDRESS_IN_LEN); - - MCDI_SET_DWORD(inbuf, VPORT_ADD_MAC_ADDRESS_IN_VPORT_ID, port_id); - ether_addr_copy(MCDI_PTR(inbuf, VPORT_ADD_MAC_ADDRESS_IN_MACADDR), mac); - - return efx_mcdi_rpc(efx, MC_CMD_VPORT_ADD_MAC_ADDRESS, inbuf, - sizeof(inbuf), NULL, 0, NULL); -} - -static int efx_ef10_vport_del_mac(struct efx_nic *efx, - unsigned int port_id, u8 *mac) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_DEL_MAC_ADDRESS_IN_LEN); - - MCDI_SET_DWORD(inbuf, VPORT_DEL_MAC_ADDRESS_IN_VPORT_ID, port_id); - ether_addr_copy(MCDI_PTR(inbuf, VPORT_DEL_MAC_ADDRESS_IN_MACADDR), mac); - - return efx_mcdi_rpc(efx, MC_CMD_VPORT_DEL_MAC_ADDRESS, inbuf, - sizeof(inbuf), NULL, 0, NULL); -} - static int efx_ef10_vswitch_alloc(struct efx_nic *efx, unsigned int port_id, unsigned int vswitch_type) { @@ -136,24 +112,6 @@ static int efx_ef10_vport_free(struct efx_nic *efx, unsigned int port_id) NULL, 0, NULL); } -static int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_ALLOC_IN_LEN); - - MCDI_SET_DWORD(inbuf, VADAPTOR_ALLOC_IN_UPSTREAM_PORT_ID, port_id); - return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_ALLOC, inbuf, sizeof(inbuf), - NULL, 0, NULL); -} - -static int efx_ef10_vadaptor_free(struct efx_nic *efx, unsigned int port_id) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_FREE_IN_LEN); - - MCDI_SET_DWORD(inbuf, VADAPTOR_FREE_IN_UPSTREAM_PORT_ID, port_id); - return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_FREE, inbuf, sizeof(inbuf), - NULL, 0, NULL); -} - static void efx_ef10_sriov_free_vf_vports(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; diff --git a/drivers/net/ethernet/sfc/ef10_sriov.h b/drivers/net/ethernet/sfc/ef10_sriov.h index db4ef537c610..6d25b92cb45e 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.h +++ b/drivers/net/ethernet/sfc/ef10_sriov.h @@ -65,5 +65,11 @@ int efx_ef10_vswitching_restore_pf(struct efx_nic *efx); int efx_ef10_vswitching_restore_vf(struct efx_nic *efx); void efx_ef10_vswitching_remove_pf(struct efx_nic *efx); void efx_ef10_vswitching_remove_vf(struct efx_nic *efx); +int efx_ef10_vport_add_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac); +int efx_ef10_vport_del_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac); +int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id); +int efx_ef10_vadaptor_free(struct efx_nic *efx, unsigned int port_id); #endif /* EF10_SRIOV_H */ From 535a61777f44eebcb71f2a6866f95b17ee0f13c7 Mon Sep 17 00:00:00 2001 From: Daniel Pieczko Date: Tue, 7 Jul 2015 11:37:33 +0100 Subject: [PATCH 741/839] sfc: suppress handled MCDI failures when changing the MAC address Signed-off-by: Shradha Shah Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index e0cb3617f549..605cc8948594 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3933,8 +3933,8 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) efx->net_dev->dev_addr); MCDI_SET_DWORD(inbuf, VADAPTOR_SET_MAC_IN_UPSTREAM_PORT_ID, nic_data->vport_id); - rc = efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_SET_MAC, inbuf, - sizeof(inbuf), NULL, 0, NULL); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_VADAPTOR_SET_MAC, inbuf, + sizeof(inbuf), NULL, 0, NULL); efx_ef10_filter_table_probe(efx); up_write(&efx->filter_sem); @@ -3986,6 +3986,9 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) * MCFW do not support VFs. */ rc = efx_ef10_vport_set_mac_address(efx); + } else { + efx_mcdi_display_error(efx, MC_CMD_VADAPTOR_SET_MAC, + sizeof(inbuf), NULL, 0, rc); } return rc; From fdd75ea8df370f206a8163786e7470c1277a5064 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 7 Jul 2015 09:43:45 -0400 Subject: [PATCH 742/839] net/tipc: initialize security state for new connection socket Calling connect() with an AF_TIPC socket would trigger a series of error messages from SELinux along the lines of: SELinux: Invalid class 0 type=AVC msg=audit(1434126658.487:34500): avc: denied { } for pid=292 comm="kworker/u16:5" scontext=system_u:system_r:kernel_t:s0 tcontext=system_u:object_r:unlabeled_t:s0 tclass= permissive=0 This was due to a failure to initialize the security state of the new connection sock by the tipc code, leaving it with junk in the security class field and an unlabeled secid. Add a call to security_sk_clone() to inherit the security state from the parent socket. Reported-by: Tim Shearer Signed-off-by: Stephen Smalley Acked-by: Paul Moore Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 46b6ed534ef2..3a7567f690f3 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2007,6 +2007,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; + security_sk_clone(sock->sk, new_sock->sk); new_sk = new_sock->sk; new_tsock = tipc_sk(new_sk); From 1973db0df7c3bd69de2a1041d3364567287771d9 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Tue, 7 Jul 2015 18:30:39 +0530 Subject: [PATCH 743/839] drivers: net: cpsw: fix crash while accessing second slave ethernet interface When cpsw's number of slave is set to 1 in device tree and while accessing second slave ndev and priv in cpsw_tx_interrupt(), there is a kernel crash. This is due to cpsw_get_slave_priv() not verifying number of slaves while retriving netdev priv and returns a invalid memory region. Fixing the issue by introducing number of slave check in cpsw_get_slave_priv() and cpsw_get_slave_ndev(). [ 15.879589] Unable to handle kernel paging request at virtual address 0f0e142c [ 15.888540] pgd = ed374000 [ 15.891359] [0f0e142c] *pgd=00000000 [ 15.895105] Internal error: Oops: 5 [#1] SMP ARM [ 15.899936] Modules linked in: [ 15.903139] CPU: 0 PID: 593 Comm: udhcpc Tainted: G W 4.1.0-12205-gfda8b18-dirty #10 [ 15.912386] Hardware name: Generic AM43 (Flattened Device Tree) [ 15.918557] task: ed2a2e00 ti: ed3fe000 task.ti: ed3fe000 [ 15.924187] PC is at cpsw_tx_interrupt+0x30/0x44 [ 15.929008] LR is at _raw_spin_unlock_irqrestore+0x40/0x44 [ 15.934726] pc : [] lr : [] psr: 20000193 [ 15.934726] sp : ed3ffc08 ip : ed2a2e40 fp : 00000000 [ 15.946685] r10: c0969ce8 r9 : c0969cfc r8 : 00000000 [ 15.952129] r7 : 000000c6 r6 : ee54ab00 r5 : ee169c64 r4 : ee534e00 [ 15.958932] r3 : 0f0e0d0c r2 : 00000000 r1 : ed3ffbc0 r0 : 00000001 [ 15.965735] Flags: nzCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user [ 15.973261] Control: 10c5387d Table: ad374059 DAC: 00000015 [ 15.979246] Process udhcpc (pid: 593, stack limit = 0xed3fe218) [ 15.985414] Stack: (0xed3ffc08 to 0xed400000) [ 15.989954] fc00: ee54ab00 c009928c c0a9e648 60000193 000032e4 ee169c00 [ 15.998478] fc20: ee169c64 ee169c00 ee169c64 ee54ab00 00000001 00000001 ee67e268 ee008800 [ 16.006995] fc40: ee534800 c009946c ee169c00 ee169c64 c08bd660 c009c370 c009c2a4 000000c6 [ 16.015513] fc60: c08b75c4 c08b0854 00000000 c0098b3c 000000c6 c0098c50 ed3ffcb0 0000003a [ 16.024033] fc80: ed3ffcb0 fa24010c c08b7800 fa240100 ee7e9880 c00094c4 c05ef4e8 60000013 [ 16.032556] fca0: ffffffff ed3ffce4 ee7e9880 c05ef964 00000001 ed2a33d8 00000000 ed2a2e00 [ 16.041080] fcc0: 60000013 ee536bf8 60000013 ee51b800 ee7e9880 ee67e268 ee7e9880 ee534800 [ 16.049603] fce0: c0ad0768 ed3ffcf8 c008e910 c05ef4e8 60000013 ffffffff 00000001 00000001 [ 16.058121] fd00: ee536bf8 c0487a04 00000000 00000000 ee534800 00000000 00000156 c048c990 [ 16.066645] fd20: 00000000 00000000 c0969f40 00000000 00000000 c05000e8 00000001 00000000 [ 16.075167] fd40: 00000000 c051eefc 00000000 ee67e268 00000000 00000000 ee51b800 ed3ffd9c [ 16.083690] fd60: 00000000 ee67e200 ee51b800 ee7e9880 ee67e268 00000000 00000000 ee67e200 [ 16.092211] fd80: ee51b800 ee7e9880 ee67e268 ee534800 ee67e200 c051eedc ee67e268 00000010 [ 16.100727] fda0: 00000000 00000000 ee7e9880 ee534800 00000000 ee67e268 ee51b800 c05006fc [ 16.109247] fdc0: ee67e268 00000001 c0500488 00000156 ee7e9880 00000000 ed3fe000 fffffff4 [ 16.117771] fde0: ed3fff1c ee7e9880 ee534800 00000148 00000000 ed1f8340 00000000 00000000 [ 16.126289] fe00: 00000000 c05a9054 00000000 00000000 00000156 c0ab62a8 00000010 ed3e7000 [ 16.134812] fe20: 00000000 00000008 edcfb700 ed3fff1c c0fb5f94 ed2a2e00 c0fb5f64 000005d8 [ 16.143336] fe40: c0a9b3b8 00000000 ed3e7070 00000000 00000000 00000000 00009f40 00000000 [ 16.151858] fe60: 00000000 00020022 00110008 00000000 00000000 43004400 00000000 ffffffff [ 16.160374] fe80: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 16.168898] fea0: edcfb700 bee5f380 00000014 00000000 ed3fe000 00000000 00004400 c04e2b64 [ 16.177415] fec0: 00000002 c04e3b00 ed3ffeec 00000001 0000011a 00000000 00000000 bee5f394 [ 16.185937] fee0: 00000148 ed3fff10 00000014 00000001 00000000 00000000 ed3ffee4 00000000 [ 16.194459] ff00: 00000000 00000000 00000000 c04e3664 00080011 00000002 06000000 ffffffff [ 16.202980] ff20: 0000ffff ffffffff 0000ffff c008dd54 ee5a6f08 ee636e80 c096972d c0089c14 [ 16.211499] ff40: 00000000 60000013 ee5a6f40 60000013 00000000 ee5a6f40 00000002 00000006 [ 16.220023] ff60: 00000000 edcfb700 00000001 ed2a2e00 c000f60c 00000001 0000011a c008ea34 [ 16.228540] ff80: 00000006 00000000 bee5f380 00000014 bee5f380 00000014 bee5f380 00000122 [ 16.237059] ffa0: c000f7c4 c000f5e0 bee5f380 00000014 00000006 bee5f394 00000148 00000000 [ 16.245581] ffc0: bee5f380 00000014 bee5f380 00000122 fffffd6e 00004300 00004800 00004400 [ 16.254104] ffe0: bee5f378 bee5f36c 000307ec b6f39044 40000010 00000006 ed36fa40 00000000 [ 16.262642] [] (cpsw_tx_interrupt) from [] (handle_irq_event_percpu+0x64/0x204) [ 16.272076] [] (handle_irq_event_percpu) from [] (handle_irq_event+0x40/0x64) [ 16.281330] [] (handle_irq_event) from [] (handle_fasteoi_irq+0xcc/0x1a8) [ 16.290220] [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x20/0x30) [ 16.299197] [] (generic_handle_irq) from [] (__handle_domain_irq+0x64/0xdc) [ 16.308273] [] (__handle_domain_irq) from [] (gic_handle_irq+0x20/0x60) [ 16.316987] [] (gic_handle_irq) from [] (__irq_svc+0x44/0x5c) [ 16.324779] Exception stack(0xed3ffcb0 to 0xed3ffcf8) [ 16.330044] fca0: 00000001 ed2a33d8 00000000 ed2a2e00 [ 16.338567] fcc0: 60000013 ee536bf8 60000013 ee51b800 ee7e9880 ee67e268 ee7e9880 ee534800 [ 16.347090] fce0: c0ad0768 ed3ffcf8 c008e910 c05ef4e8 60000013 ffffffff [ 16.353987] [] (__irq_svc) from [] (_raw_spin_unlock_irqrestore+0x34/0x44) [ 16.362973] [] (_raw_spin_unlock_irqrestore) from [] (cpdma_check_free_tx_desc+0x60/0x6c) [ 16.373311] [] (cpdma_check_free_tx_desc) from [] (cpsw_ndo_start_xmit+0xb4/0x1ac) [ 16.383017] [] (cpsw_ndo_start_xmit) from [] (dev_hard_start_xmit+0x2a4/0x4c0) [ 16.392364] [] (dev_hard_start_xmit) from [] (sch_direct_xmit+0xf4/0x210) [ 16.401246] [] (sch_direct_xmit) from [] (__dev_queue_xmit+0x2ac/0x7bc) [ 16.409960] [] (__dev_queue_xmit) from [] (packet_sendmsg+0xc68/0xeb4) [ 16.418585] [] (packet_sendmsg) from [] (sock_sendmsg+0x14/0x24) [ 16.426663] [] (sock_sendmsg) from [] (SyS_sendto+0xb4/0xe0) [ 16.434377] [] (SyS_sendto) from [] (ret_fast_syscall+0x0/0x54) [ 16.442360] Code: e5943118 e593303c e3530000 0a000002 (e5930720) [ 16.448716] ---[ end trace a68159f094d85ba6 ]--- [ 16.453526] Kernel panic - not syncing: Fatal exception in interrupt [ 16.460149] ---[ end Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Mugunthan V N Cc: # v3.8+ Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 462820514fae..e778703880d3 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -509,9 +509,11 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = { (func)(slave++, ##arg); \ } while (0) #define cpsw_get_slave_ndev(priv, __slave_no__) \ - (priv->slaves[__slave_no__].ndev) + ((__slave_no__ < priv->data.slaves) ? \ + priv->slaves[__slave_no__].ndev : NULL) #define cpsw_get_slave_priv(priv, __slave_no__) \ - ((priv->slaves[__slave_no__].ndev) ? \ + (((__slave_no__ < priv->data.slaves) && \ + (priv->slaves[__slave_no__].ndev)) ? \ netdev_priv(priv->slaves[__slave_no__].ndev) : NULL) \ #define cpsw_dual_emac_src_port_detect(status, priv, ndev, skb) \ From f1158b74e54f2e2462ba5e2f45a118246d9d5b43 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 7 Jul 2015 15:55:56 +0200 Subject: [PATCH 744/839] bridge: mdb: zero out the local br_ip variable before use Since commit b0e9a30dd669 ("bridge: Add vlan id to multicast groups") there's a check in br_ip_equal() for a matching vlan id, but the mdb functions were not modified to use (or at least zero it) so when an entry was added it would have a garbage vlan id (from the local br_ip variable in __br_mdb_add/del) and this would prevent it from being matched and also deleted. So zero out the whole local ip var to protect ourselves from future changes and also to fix the current bug, since there's no vlan id support in the mdb uapi - use always vlan id 0. Example before patch: root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent RTNETLINK answers: Invalid argument After patch: root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb Signed-off-by: Nikolay Aleksandrov Fixes: b0e9a30dd669 ("bridge: Add vlan id to multicast groups") Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 3bfc675cf0d1..60868c212679 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -374,6 +374,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, if (!p || p->br != br || p->state == BR_STATE_DISABLED) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; @@ -420,6 +421,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!netif_running(br->dev) || br->multicast_disabled) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) { if (timer_pending(&br->ip4_other_query.timer)) From 671b53eec2edcbfac3e53d02cf3d0c6d9ecc07de Mon Sep 17 00:00:00 2001 From: Shradha Shah Date: Wed, 8 Jul 2015 10:12:45 +0100 Subject: [PATCH 745/839] sfc: Ensure down_write(&filter_sem) and up_write() are matched before calling efx_net_open() This patch avoids the double up_write to filter_sem if efx_net_open() fails. Resolves: 2d432f20d27c1813a2746008e16dd6ce12a14dc1 Signed-off-by: Shradha Shah Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10_sriov.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 7485f71b4e2f..3c17f274e802 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -598,21 +598,21 @@ int efx_ef10_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i, u16 vlan, MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL, vf->vlan, &vf->vport_id); if (rc) - goto reset_nic; + goto reset_nic_up_write; restore_mac: if (!is_zero_ether_addr(vf->mac)) { rc2 = efx_ef10_vport_add_mac(efx, vf->vport_id, vf->mac); if (rc2) { eth_zero_addr(vf->mac); - goto reset_nic; + goto reset_nic_up_write; } } restore_evb_port: rc2 = efx_ef10_evb_port_assign(efx, vf->vport_id, vf_i); if (rc2) - goto reset_nic; + goto reset_nic_up_write; else vf->vport_assigned = 1; @@ -620,14 +620,16 @@ restore_vadaptor: if (vf->efx) { rc2 = efx_ef10_vadaptor_alloc(vf->efx, EVB_PORT_ID_ASSIGNED); if (rc2) - goto reset_nic; + goto reset_nic_up_write; } restore_filters: if (vf->efx) { rc2 = vf->efx->type->filter_table_probe(vf->efx); if (rc2) - goto reset_nic; + goto reset_nic_up_write; + + up_write(&vf->efx->filter_sem); up_write(&vf->efx->filter_sem); @@ -639,9 +641,12 @@ restore_filters: } return rc; +reset_nic_up_write: + if (vf->efx) + up_write(&vf->efx->filter_sem); + reset_nic: if (vf->efx) { - up_write(&vf->efx->filter_sem); netif_err(efx, drv, efx->net_dev, "Failed to restore VF - scheduling reset.\n"); efx_schedule_reset(vf->efx, RESET_TYPE_DATAPATH); From 974d7af5fcc295dcf8315255142b2fe44fd74b0c Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Tue, 7 Jul 2015 13:56:57 -0400 Subject: [PATCH 746/839] ipv4: add support for linkdown sysctl to netconf This kernel patch exports the value of the new ignore_routes_with_linkdown via netconf. v2: changes to notify userspace via netlink when sysctl values change and proposed for 'net' since this could be considered a bugfix Signed-off-by: Andy Gospodarek Suggested-by: Nicolas Dichtel Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/netconf.h | 1 + net/ipv4/devinet.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h index 669a1f0b1d97..23cbd34e4ac7 100644 --- a/include/uapi/linux/netconf.h +++ b/include/uapi/linux/netconf.h @@ -15,6 +15,7 @@ enum { NETCONFA_RP_FILTER, NETCONFA_MC_FORWARDING, NETCONFA_PROXY_NEIGH, + NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7498716e8f54..e813196c91c7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1740,6 +1740,8 @@ static int inet_netconf_msgsize_devconf(int type) size += nla_total_size(4); if (type == -1 || type == NETCONFA_PROXY_NEIGH) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) + size += nla_total_size(4); return size; } @@ -1780,6 +1782,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_PROXY_NEIGH, IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && + nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -1819,6 +1825,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { [NETCONFA_FORWARDING] = { .len = sizeof(int) }, [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, + [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, @@ -2048,6 +2055,12 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write, inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, ifindex, cnf); } + if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 && + new_value != old_value) { + ifindex = devinet_conf_ifindex(net, cnf); + inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + ifindex, cnf); + } } return ret; From 0769636cb5b95665ebadcd1a41c46f331f5a397d Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 7 Jul 2015 14:02:18 -0400 Subject: [PATCH 747/839] vmxnet3: prevent receive getting out of sequence on napi poll vmxnet3's current napi path is built to count every rx descriptor we recieve, and use that as a count of the napi budget. That means its possible to return from a napi poll halfway through recieving a fragmented packet accross multiple dma descriptors. If that happens, the next napi poll will start with the descriptor ring in an improper state (e.g. the first descriptor we look at may have the end-of-packet bit set), which will cause a BUG halt in the driver. Fix the issue by only counting whole received packets in the napi poll and returning that value, rather than the descriptor count. Tested by the reporter and myself, successfully Signed-off-by: Neil Horman CC: Shreyas Bhatewara CC: "David S. Miller" Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index da11bb5e9c7f..46f4caddccbe 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1216,7 +1216,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, static const u32 rxprod_reg[2] = { VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2 }; - u32 num_rxd = 0; + u32 num_pkts = 0; bool skip_page_frags = false; struct Vmxnet3_RxCompDesc *rcd; struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; @@ -1235,13 +1235,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxDesc *rxd; u32 idx, ring_idx; struct vmxnet3_cmd_ring *ring = NULL; - if (num_rxd >= quota) { + if (num_pkts >= quota) { /* we may stop even before we see the EOP desc of * the current pkt */ break; } - num_rxd++; BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2); idx = rcd->rxdIdx; ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1; @@ -1413,6 +1412,7 @@ not_lro: napi_gro_receive(&rq->napi, skb); ctx->skb = NULL; + num_pkts++; } rcd_done: @@ -1443,7 +1443,7 @@ rcd_done: &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp); } - return num_rxd; + return num_pkts; } From 4eed4d8ff984abcb983ada5b3dbf56fce35f1068 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 7 Jul 2015 20:48:55 +0200 Subject: [PATCH 748/839] 3c59x: Fix shared IRQ handling As its first order of business, boomerang_interrupt() checks whether the device really has any pending interrupts. If it does not, it does nothing and returns, but it still returns IRQ_HANDLED. This is wrong: interrupt was not handled, IRQ handlers of other devices sharing this IRQ line need to be called. vortex_interrupt() has it right: it returns IRQ_NONE in this case via IRQ_RETVAL(0). Do the same in boomerang_interrupt(). Signed-off-by: Denys Vlasenko CC: David S. Miller CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/3com/3c59x.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 41095ebad97f..2d1ce3c5d0dd 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -2382,6 +2382,7 @@ boomerang_interrupt(int irq, void *dev_id) void __iomem *ioaddr; int status; int work_done = max_interrupt_work; + int handled = 0; ioaddr = vp->ioaddr; @@ -2400,6 +2401,7 @@ boomerang_interrupt(int irq, void *dev_id) if ((status & IntLatch) == 0) goto handler_exit; /* No interrupt: shared IRQs can cause this */ + handled = 1; if (status == 0xffff) { /* h/w no longer present (hotplug)? */ if (vortex_debug > 1) @@ -2501,7 +2503,7 @@ boomerang_interrupt(int irq, void *dev_id) handler_exit: vp->handling_irq = 0; spin_unlock(&vp->lock); - return IRQ_HANDLED; + return IRQ_RETVAL(handled); } static int vortex_rx(struct net_device *dev) From d065c3c17dae95832badf6329512dd057c905890 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Tue, 7 Jul 2015 13:54:12 -0700 Subject: [PATCH 749/839] drivers/net/usb: add device id for NVIDIA Tegra USB 3.0 Ethernet This device is sold as 'NVIDIA Tegra USB 3.0 Ethernet'. Chipset is RTL8153 and works with r8152. Signed-off-by: Zheng Liu Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 8 ++++++++ drivers/net/usb/r8152.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 4545e78840b0..35a2bffe848a 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -523,6 +523,7 @@ static const struct driver_info wwan_info = { #define REALTEK_VENDOR_ID 0x0bda #define SAMSUNG_VENDOR_ID 0x04e8 #define LENOVO_VENDOR_ID 0x17ef +#define NVIDIA_VENDOR_ID 0x0955 static const struct usb_device_id products[] = { /* BLACKLIST !! @@ -710,6 +711,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* NVIDIA Tegra USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(NVIDIA_VENDOR_ID, 0x09ff, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index aafa1a1898e4..7f6419ebb5e1 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -494,6 +494,7 @@ enum rtl8152_flags { #define VENDOR_ID_REALTEK 0x0bda #define VENDOR_ID_SAMSUNG 0x04e8 #define VENDOR_ID_LENOVO 0x17ef +#define VENDOR_ID_NVIDIA 0x0955 #define MCU_TYPE_PLA 0x0100 #define MCU_TYPE_USB 0x0000 @@ -4117,6 +4118,7 @@ static struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f)}, + {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, {} }; From c936835c1ec6f871f32c9b87a7708700320075b3 Mon Sep 17 00:00:00 2001 From: Peter Dunning Date: Wed, 8 Jul 2015 10:05:10 +0100 Subject: [PATCH 750/839] sfc: Report TX completions to BQL after all TX events in interrupt The limit for BQL is updated each time we call netdev_tx_completed_queue. Without this patch the BQL limit was updated for every TX event we see. The issue was that this only updated the limit to handle the data we complete in two events as the first event wouldn't show that enough traffic had been processed between them. This was OK when interrupt moderation was off but not when it was on as more data had to be completed in a single interrupt. The patch changes this so that we do report the completion to BQL only when all the TX events in the interrupt have been processed. Signed-off-by: Shradha Shah Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 14 ++++++++++++++ drivers/net/ethernet/sfc/net_driver.h | 2 ++ drivers/net/ethernet/sfc/tx.c | 3 ++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 804b9ad553d3..03bc03b67f08 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -245,11 +245,17 @@ static int efx_check_disabled(struct efx_nic *efx) */ static int efx_process_channel(struct efx_channel *channel, int budget) { + struct efx_tx_queue *tx_queue; int spent; if (unlikely(!channel->enabled)) return 0; + efx_for_each_channel_tx_queue(tx_queue, channel) { + tx_queue->pkts_compl = 0; + tx_queue->bytes_compl = 0; + } + spent = efx_nic_process_eventq(channel, budget); if (spent && efx_channel_has_rx_queue(channel)) { struct efx_rx_queue *rx_queue = @@ -259,6 +265,14 @@ static int efx_process_channel(struct efx_channel *channel, int budget) efx_fast_push_rx_descriptors(rx_queue, true); } + /* Update BQL */ + efx_for_each_channel_tx_queue(tx_queue, channel) { + if (tx_queue->bytes_compl) { + netdev_tx_completed_queue(tx_queue->core_txq, + tx_queue->pkts_compl, tx_queue->bytes_compl); + } + } + return spent; } diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index d72f522bf9c3..47d1e3a96522 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -241,6 +241,8 @@ struct efx_tx_queue { unsigned int read_count ____cacheline_aligned_in_smp; unsigned int old_write_count; unsigned int merge_events; + unsigned int bytes_compl; + unsigned int pkts_compl; /* Members used only on the xmit path */ unsigned int insert_count ____cacheline_aligned_in_smp; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index aaf2987512b5..1833a0146571 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -617,7 +617,8 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask); efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); - netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl); + tx_queue->pkts_compl += pkts_compl; + tx_queue->bytes_compl += bytes_compl; if (pkts_compl > 1) ++tx_queue->merge_events; From d5de1987853a7778bb048a9e52b3486eb9aceb17 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 8 Jul 2015 17:16:49 +0200 Subject: [PATCH 751/839] macvtap: Destroy minor_idr on module_exit Destroy minor_idr on module_exit, reclaiming the allocated memory. This was detected by the following semantic patch (written by Luis Rodriguez ) @ defines_module_init @ declarer name module_init, module_exit; declarer name DEFINE_IDR; identifier init; @@ module_init(init); @ defines_module_exit @ identifier exit; @@ module_exit(exit); @ declares_idr depends on defines_module_init && defines_module_exit @ identifier idr; @@ DEFINE_IDR(idr); @ on_exit_calls_destroy depends on declares_idr && defines_module_exit @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... idr_destroy(&idr); ... } @ missing_module_idr_destroy depends on declares_idr && defines_module_exit && !on_exit_calls_destroy @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... +idr_destroy(&idr); } Signed-off-by: Johannes Thumshirn Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 6a64197f5bce..b4b61910f616 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -1294,6 +1294,7 @@ static void macvtap_exit(void) class_unregister(macvtap_class); cdev_del(&macvtap_cdev); unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); + idr_destroy(&minor_idr); } module_exit(macvtap_exit); From 19ee835cdb0b5a8eb11a68f25a51b8039d564488 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 29 Jun 2015 14:01:19 +0100 Subject: [PATCH 752/839] drm/i915: Declare the swizzling unknown for L-shaped configurations The old style of memory interleaving swizzled upto the end of the first even bank of memory, and then used the remainder as unswizzled on the unpaired bank - i.e. swizzling is not constant for all memory. This causes problems when we try to migrate memory and so the kernel prevents migration at all when we detect L-shaped inconsistent swizzling. However, this issue also extends to userspace who try to manually detile into memory as the swizzling for an individual page is unknown (it depends on its physical address only known to the kernel), userspace cannot correctly swizzle objects. v2: Mark the global swizzling as unknown rather than adjust the value reported to userspace. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91105 Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_tiling.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 633bd1fcab69..d61e74a08f82 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -183,8 +183,18 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) if (IS_GEN4(dev)) { uint32_t ddc2 = I915_READ(DCC2); - if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE)) + if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE)) { + /* Since the swizzling may vary within an + * object, we have no idea what the swizzling + * is for any page in particular. Thus we + * cannot migrate tiled pages using the GPU, + * nor can we tell userspace what the exact + * swizzling is for any object. + */ dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + } } if (dcc == 0xffffffff) { From 52613921b31d8573a212a4b0854b390a18d9849c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 29 Jun 2015 20:28:35 +0300 Subject: [PATCH 753/839] Revert "drm/i915: Allocate context objects from stolen" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stolen gets trashed during hibernation, so storing contexts there is not a very good idea. On my IVB machines this leads to a totally dead GPU on resume. A reboot is required to resurrect it. So let's not store contexts where they will get trampled. This reverts commit 149c86e74fe44dcbac5e9f8d145c5fbc5dc21261. Cc: Chris Wilson Signed-off-by: Ville Syrjälä Acked-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8867818b1401..d65cbe6afb92 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -157,9 +157,7 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) struct drm_i915_gem_object *obj; int ret; - obj = i915_gem_object_create_stolen(dev, size); - if (obj == NULL) - obj = i915_gem_alloc_object(dev, size); + obj = i915_gem_alloc_object(dev, size); if (obj == NULL) return ERR_PTR(-ENOMEM); From 0ec62aaee919991d9e0e278f70776d560d95fc29 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 9 Jul 2015 10:51:46 +0200 Subject: [PATCH 754/839] cris: Replace do_posix_clock_monotonic_gettime() ktime_get_ts() is the proper interface today. Signed-off-by: Thomas Gleixner Cc: Jesper Nilsson --- arch/cris/arch-v32/drivers/sync_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c index 4dda9bd6b8fb..e989cee77414 100644 --- a/arch/cris/arch-v32/drivers/sync_serial.c +++ b/arch/cris/arch-v32/drivers/sync_serial.c @@ -1464,7 +1464,7 @@ static inline void handle_rx_packet(struct sync_port *port) if (port->write_ts_idx == NBR_IN_DESCR) port->write_ts_idx = 0; idx = port->write_ts_idx++; - do_posix_clock_monotonic_gettime(&port->timestamp[idx]); + ktime_get_ts(&port->timestamp[idx]); port->in_buffer_len += port->inbufchunk; } spin_unlock_irqrestore(&port->lock, flags); From 1f6823faa8c563431a94e614d2b63ce16bb6f658 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 9 Jul 2015 10:51:46 +0200 Subject: [PATCH 755/839] time: Get rid of do_posix_clock_monotonic_gettime All users gone. Remove it before we get another one. Signed-off-by: Thomas Gleixner --- include/linux/timekeeping.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 3aa72e648650..6e191e4e6ab6 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -145,7 +145,6 @@ static inline void getboottime(struct timespec *ts) } #endif -#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) #define ktime_get_real_ts64(ts) getnstimeofday64(ts) /* From a6335fa11e08fc386740557b2a4c1b7ff34bc499 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Thu, 2 Jul 2015 17:16:01 +0200 Subject: [PATCH 756/839] MIPS: bootmem: Don't use memory holes for page bitmap Commit f9a7febd leads to a fact that mapstart and therefore a page bitmap for bootmem allocator immediately follows initrd_end. This doesn't always work well on Octeon, where there are holes in PFN ranges (refer to 5b3b1688 and 4MB-aligned PFN allocation). Depending on the inird location it could happen, that mapstart would be in an area not allocated by plat_mem_setup() in arch/mips/cavium-octeon/setup.c, but in the alignment hole between initrd and the next PFN area. Later on this memory will be unconditionally made available to buddy allocator at the end of free_all_bootmem_core() (mm/bootmem.c). All of this results in Linux using the memory not designated for Linux in Octeon's plat_mem_setup(), which in turn means corruption of the memory used by another OS/baremetal code on the same SoC. It doesn't look to me as a problem of Octeon platform code, but rather as an inability of f9a7febd to deal correctly with the fragmented memory-mappings. Proposed fix moves the check for initrd address to the same calculation-loop in bootmem_init() (arch/mips/kernel/setup.c), which also accounts for kernel code location. This should result in mapstart located starting from the first PFN area after kernel code AND initrd. Signed-off-by: Alexander Sverdlin Cc: linux-mips@linux-mips.org Cc: David Daney Cc: Zubair Lutfullah Kakakhel Cc: Huacai Chen Cc: Andreas Herrmann Cc: Joe Perches Cc: Steven J. Hill Cc: Yusuf Khan Cc: Michael Kreuzer Cc: Aaro Koskinen Patchwork: https://patchwork.linux-mips.org/patch/10594/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/setup.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index be73c491182b..008b3378653a 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -337,6 +337,11 @@ static void __init bootmem_init(void) min_low_pfn = start; if (end <= reserved_end) continue; +#ifdef CONFIG_BLK_DEV_INITRD + /* mapstart should be after initrd_end */ + if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end))) + continue; +#endif if (start >= mapstart) continue; mapstart = max(reserved_end, start); @@ -366,14 +371,6 @@ static void __init bootmem_init(void) max_low_pfn = PFN_DOWN(HIGHMEM_START); } -#ifdef CONFIG_BLK_DEV_INITRD - /* - * mapstart should be after initrd_end - */ - if (initrd_end) - mapstart = max(mapstart, (unsigned long)PFN_UP(__pa(initrd_end))); -#endif - /* * Initialize the boot-time allocator with low memory only. */ From 761b4493bbb7b614387794f39e3969716e9e0215 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 24 Jun 2015 09:29:20 +0100 Subject: [PATCH 757/839] MIPS: kernel: traps: Fix broken indentation Fix broken indentation caused by the SMTC removal commit b633648c5ad3cfbda0b3daea50d2135d44899259 ("MIPS: MT: Remove SMTC support") Signed-off-by: Markos Chandras Fixes: b633648c5ad3c ("MIPS: MT: Remove SMTC support") Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10581/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/traps.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 2a7b38ed23f0..e207a43b5f8f 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2130,10 +2130,10 @@ void per_cpu_trap_init(bool is_boot_cpu) BUG_ON(current->mm); enter_lazy_tlb(&init_mm, current); - /* Boot CPU's cache setup in setup_arch(). */ - if (!is_boot_cpu) - cpu_cache_init(); - tlb_init(); + /* Boot CPU's cache setup in setup_arch(). */ + if (!is_boot_cpu) + cpu_cache_init(); + tlb_init(); TLBMISS_HANDLER_SETUP(); } From e9d92d223381f1f3be5d87322b576721d3b93612 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 24 Jun 2015 09:52:00 +0100 Subject: [PATCH 758/839] MIPS: Fix branch emulation for BLTC and BGEC instructions Commits f1b44067c19258b7614e3cd09dfe8d8e12ff5895 ("MIPS: Emulate the new MIPS R6 B{L,G}T{Z,}{AL,}C instructions") and commit a8ff66f52d3f17b5ae793955270675c197f73d6c ("MIPS: Emulate the new MIPS R6 B{L,G}E{Z,}{AL,}C instructions") added support for emulating various branch compact instructions. However, it missed the case for those which use the old BLEZL and BGTZL opcodes leading to random crashes when the R6 emulator is disabled. We fix this by ensuring that the 'rt' field is not zero which is always true for these branch compact instructions. Fixes: f1b44067c192 ("MIPS: Emulate the new MIPS R6 B{L,G}T{Z,}{AL,}C instructions") Fixes: a8ff66f52d3f ("MIPS: Emulate the new MIPS R6 B{L,G}E{Z,}{AL,}C instructions") Cc: # 4.0+ Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: Markos Chandras Patchwork: https://patchwork.linux-mips.org/patch/10582/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/branch.c | 4 ++-- arch/mips/math-emu/cp1emu.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index c0c5e5972256..d8f9b357b222 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -600,7 +600,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, break; case blezl_op: /* not really i_format */ - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) goto sigill_r6; case blez_op: /* @@ -635,7 +635,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, break; case bgtzl_op: - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) goto sigill_r6; case bgtz_op: /* diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 22b9b2cb9219..00c241ae04ce 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -551,7 +551,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.next_pc_inc; return 1; case blezl_op: - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) break; case blez_op: @@ -588,7 +588,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.next_pc_inc; return 1; case bgtzl_op: - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) break; case bgtz_op: /* From 143fefc8f315cd10e046e6860913c421c3385cb1 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 24 Jun 2015 09:52:01 +0100 Subject: [PATCH 759/839] MIPS: Fix erroneous JR emulation for MIPS R6 Commit 5f9f41c474befb4ebbc40b27f65bb7d649241581 ("MIPS: kernel: Prepare the JR instruction for emulation on MIPS R6") added support for emulating the JR instruction on MIPS R6 cores but that introduced a bug which could be triggered when hitting a JALR opcode because the code used the wrong field in the 'r_format' struct to determine the instruction opcode. This lead to crashes because an emulated JALR instruction was treated as a JR one when the R6 emulator was turned off. Fixes: 5f9f41c474be ("MIPS: kernel: Prepare the JR instruction for emulation on MIPS R6") Cc: # 4.0+ Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10583/ Signed-off-by: Ralf Baechle --- arch/mips/math-emu/cp1emu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 00c241ae04ce..712f17a2ecf2 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -451,7 +451,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, /* Fall through */ case jr_op: /* For R6, JR already emulated in jalr_op */ - if (NO_R6EMU && insn.r_format.opcode == jr_op) + if (NO_R6EMU && insn.r_format.func == jr_op) break; *contpc = regs->regs[insn.r_format.rs]; return 1; From fd5ed3066bb2f47814fe53cdc56d11a678551ae1 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:28 +0100 Subject: [PATCH 760/839] MIPS: kernel: smp-cps: Fix 64-bit compatibility errors due to pointer casting Commit 1d8f1f5a780a ("MIPS: smp-cps: hotplug support") added hotplug support in the SMP/CPS implementation but it introduced a few build problems on 64-bit kernels due to pointer being casted to and from 'int' C types. We fix this problem by using 'unsigned long' instead which should match the size of the pointers in 32/64-bit kernels. Finally, we fix the comment since the CM base address is loaded to v1($3) instead of v0. Fixes the following build problems: arch/mips/kernel/smp-cps.c: In function 'wait_for_sibling_halt': arch/mips/kernel/smp-cps.c:366:17: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast] [...] arch/mips/kernel/smp-cps.c: In function 'cps_cpu_die': arch/mips/kernel/smp-cps.c:427:13: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] cc1: all warnings being treated as errors Fixes: 1d8f1f5a780a ("MIPS: smp-cps: hotplug support") Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10586/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp-cps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 4251d390b5b6..c88937745b4e 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -133,7 +133,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus) /* * Patch the start of mips_cps_core_entry to provide: * - * v0 = CM base address + * v1 = CM base address * s0 = kseg0 CCA */ entry_code = (u32 *)&mips_cps_core_entry; @@ -369,7 +369,7 @@ void play_dead(void) static void wait_for_sibling_halt(void *ptr_cpu) { - unsigned cpu = (unsigned)ptr_cpu; + unsigned cpu = (unsigned long)ptr_cpu; unsigned vpe_id = cpu_vpe_id(&cpu_data[cpu]); unsigned halted; unsigned long flags; @@ -430,7 +430,7 @@ static void cps_cpu_die(unsigned int cpu) */ err = smp_call_function_single(cpu_death_sibling, wait_for_sibling_halt, - (void *)cpu, 1); + (void *)(unsigned long)cpu, 1); if (err) panic("Failed to call remote sibling CPU\n"); } From 81a02e34ded906357deac7003fbb0d36b6cc503f Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:29 +0100 Subject: [PATCH 761/839] MIPS: kernel: cps-vec: Replace 'la' macro with PTR_LA The PTR_LA macro will pick the correct "la" or "dla" macro to load an address to a register. This gets rids of the following warnings (and others) when building a 64-bit CPS kernel: arch/mips/kernel/cps-vec.S:63: Warning: la used to load 64-bit address arch/mips/kernel/cps-vec.S:159: Warning: la used to load 64-bit address arch/mips/kernel/cps-vec.S:220: Warning: la used to load 64-bit address arch/mips/kernel/cps-vec.S:240: Warning: la used to load 64-bit address [...] Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10587/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 55b759a0019e..a4b2d81f45dd 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -60,7 +60,7 @@ LEAF(mips_cps_core_entry) nop /* This is an NMI */ - la k0, nmi_handler + PTR_LA k0, nmi_handler jr k0 nop @@ -156,7 +156,7 @@ dcache_done: ehb /* Jump to kseg0 */ - la t0, 1f + PTR_LA t0, 1f jr t0 nop @@ -217,7 +217,7 @@ LEAF(excep_intex) .org 0x480 LEAF(excep_ejtag) - la k0, ejtag_debug_handler + PTR_LA k0, ejtag_debug_handler jr k0 nop END(excep_ejtag) @@ -237,7 +237,7 @@ LEAF(mips_cps_core_init) /* ...and for the moment only 1 VPE */ dvpe - la t1, 1f + PTR_LA t1, 1f jr.hb t1 nop @@ -298,14 +298,14 @@ LEAF(mips_cps_core_init) LEAF(mips_cps_boot_vpes) /* Retrieve CM base address */ - la t0, mips_cm_base + PTR_LA t0, mips_cm_base lw t0, 0(t0) /* Calculate a pointer to this cores struct core_boot_config */ lw t0, GCR_CL_ID_OFS(t0) li t1, COREBOOTCFG_SIZE mul t0, t0, t1 - la t1, mips_cps_core_bootcfg + PTR_LA t1, mips_cps_core_bootcfg lw t1, 0(t1) addu t0, t0, t1 @@ -351,7 +351,7 @@ LEAF(mips_cps_boot_vpes) 1: /* Enter VPE configuration state */ dvpe - la t1, 1f + PTR_LA t1, 1f jr.hb t1 nop 1: mfc0 t1, CP0_MVPCONTROL @@ -445,7 +445,7 @@ LEAF(mips_cps_boot_vpes) /* This VPE should be offline, halt the TC */ li t0, TCHALT_H mtc0 t0, CP0_TCHALT - la t0, 1f + PTR_LA t0, 1f 1: jr.hb t0 nop @@ -466,10 +466,10 @@ LEAF(mips_cps_boot_vpes) .set noat lw $1, TI_CPU(gp) sll $1, $1, LONGLOG - la \dest, __per_cpu_offset + PTR_LA \dest, __per_cpu_offset addu $1, $1, \dest lw $1, 0($1) - la \dest, cps_cpu_state + PTR_LA \dest, cps_cpu_state addu \dest, \dest, $1 .set pop .endm From 977e043d5ea1270ce985e4c165724ff91dc3c3e2 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:30 +0100 Subject: [PATCH 762/839] MIPS: kernel: cps-vec: Replace mips32r2 ISA level with mips64r2 mips32r2 is a subset of mips64r2, so we replace mips32r2 with mips64r2 in preparation for 64-bit CPS support. Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10588/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index a4b2d81f45dd..bbbd88e994f0 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -229,7 +229,7 @@ LEAF(mips_cps_core_init) nop .set push - .set mips32r2 + .set mips64r2 .set mt /* Only allow 1 TC per VPE to execute... */ @@ -346,7 +346,7 @@ LEAF(mips_cps_boot_vpes) nop .set push - .set mips32r2 + .set mips64r2 .set mt 1: /* Enter VPE configuration state */ From 0586ac75cd0746a4d5c43372dabcea8739ae0176 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:31 +0100 Subject: [PATCH 763/839] MIPS: kernel: cps-vec: Use ta0-ta3 pseudo-registers for 64-bit The cps-vec code assumes O32 ABI and uses t4-t7 in quite a few places. This breaks the build on 64-bit. As a result of which, use the pseudo-registers ta0-ta3 to make the code compatible with 64-bit. Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10589/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 42 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index bbbd88e994f0..21f714a81ebd 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -250,25 +250,25 @@ LEAF(mips_cps_core_init) mfc0 t0, CP0_MVPCONF0 srl t0, t0, MVPCONF0_PVPE_SHIFT andi t0, t0, (MVPCONF0_PVPE >> MVPCONF0_PVPE_SHIFT) - addiu t7, t0, 1 + addiu ta3, t0, 1 /* If there's only 1, we're done */ beqz t0, 2f nop /* Loop through each VPE within this core */ - li t5, 1 + li ta1, 1 1: /* Operate on the appropriate TC */ - mtc0 t5, CP0_VPECONTROL + mtc0 ta1, CP0_VPECONTROL ehb /* Bind TC to VPE (1:1 TC:VPE mapping) */ - mttc0 t5, CP0_TCBIND + mttc0 ta1, CP0_TCBIND /* Set exclusive TC, non-active, master */ li t0, VPECONF0_MVP - sll t1, t5, VPECONF0_XTC_SHIFT + sll t1, ta1, VPECONF0_XTC_SHIFT or t0, t0, t1 mttc0 t0, CP0_VPECONF0 @@ -280,8 +280,8 @@ LEAF(mips_cps_core_init) mttc0 t0, CP0_TCHALT /* Next VPE */ - addiu t5, t5, 1 - slt t0, t5, t7 + addiu ta1, ta1, 1 + slt t0, ta1, ta3 bnez t0, 1b nop @@ -310,7 +310,7 @@ LEAF(mips_cps_boot_vpes) addu t0, t0, t1 /* Calculate this VPEs ID. If the core doesn't support MT use 0 */ - has_mt t6, 1f + has_mt ta2, 1f li t9, 0 /* Find the number of VPEs present in the core */ @@ -334,13 +334,13 @@ LEAF(mips_cps_boot_vpes) 1: /* Calculate a pointer to this VPEs struct vpe_boot_config */ li t1, VPEBOOTCFG_SIZE mul v0, t9, t1 - lw t7, COREBOOTCFG_VPECONFIG(t0) - addu v0, v0, t7 + lw ta3, COREBOOTCFG_VPECONFIG(t0) + addu v0, v0, ta3 #ifdef CONFIG_MIPS_MT /* If the core doesn't support MT then return */ - bnez t6, 1f + bnez ta2, 1f nop jr ra nop @@ -360,12 +360,12 @@ LEAF(mips_cps_boot_vpes) ehb /* Loop through each VPE */ - lw t6, COREBOOTCFG_VPEMASK(t0) - move t8, t6 - li t5, 0 + lw ta2, COREBOOTCFG_VPEMASK(t0) + move t8, ta2 + li ta1, 0 /* Check whether the VPE should be running. If not, skip it */ -1: andi t0, t6, 1 +1: andi t0, ta2, 1 beqz t0, 2f nop @@ -373,7 +373,7 @@ LEAF(mips_cps_boot_vpes) mfc0 t0, CP0_VPECONTROL ori t0, t0, VPECONTROL_TARGTC xori t0, t0, VPECONTROL_TARGTC - or t0, t0, t5 + or t0, t0, ta1 mtc0 t0, CP0_VPECONTROL ehb @@ -384,8 +384,8 @@ LEAF(mips_cps_boot_vpes) /* Calculate a pointer to the VPEs struct vpe_boot_config */ li t0, VPEBOOTCFG_SIZE - mul t0, t0, t5 - addu t0, t0, t7 + mul t0, t0, ta1 + addu t0, t0, ta3 /* Set the TC restart PC */ lw t1, VPEBOOTCFG_PC(t0) @@ -423,9 +423,9 @@ LEAF(mips_cps_boot_vpes) mttc0 t0, CP0_VPECONF0 /* Next VPE */ -2: srl t6, t6, 1 - addiu t5, t5, 1 - bnez t6, 1b +2: srl ta2, ta2, 1 + addiu ta1, ta1, 1 + bnez ta2, 1b nop /* Leave VPE configuration state */ From 717f14255a52ad445d6f0eca7d0f22f59d6ba1f8 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:32 +0100 Subject: [PATCH 764/839] MIPS: kernel: cps-vec: Replace KSEG0 with CKSEG0 In preparation for 64-bit CPS support, we replace KSEG0 with CKSEG0 so 64-bit kernels can be supported. Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10590/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 21f714a81ebd..2f95568e0da5 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -107,7 +107,7 @@ not_nmi: mul t1, t1, t0 mul t1, t1, t2 - li a0, KSEG0 + li a0, CKSEG0 add a1, a0, t1 1: cache Index_Store_Tag_I, 0(a0) add a0, a0, t0 @@ -134,7 +134,7 @@ icache_done: mul t1, t1, t0 mul t1, t1, t2 - li a0, KSEG0 + li a0, CKSEG0 addu a1, a0, t1 subu a1, a1, t0 1: cache Index_Store_Tag_D, 0(a0) From b677bc03d757c7d749527cccdd2afcf34ebeeb07 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:33 +0100 Subject: [PATCH 765/839] MIPS: cps-vec: Use macros for various arithmetics and memory operations Replace lw/sw and various arithmetic instructions with macros so the code can work on 64-bit kernels as well. Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10591/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 2f95568e0da5..1b6ca634e646 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -108,9 +108,9 @@ not_nmi: mul t1, t1, t2 li a0, CKSEG0 - add a1, a0, t1 + PTR_ADD a1, a0, t1 1: cache Index_Store_Tag_I, 0(a0) - add a0, a0, t0 + PTR_ADD a0, a0, t0 bne a0, a1, 1b nop icache_done: @@ -135,11 +135,11 @@ icache_done: mul t1, t1, t2 li a0, CKSEG0 - addu a1, a0, t1 - subu a1, a1, t0 + PTR_ADDU a1, a0, t1 + PTR_SUBU a1, a1, t0 1: cache Index_Store_Tag_D, 0(a0) bne a0, a1, 1b - add a0, a0, t0 + PTR_ADD a0, a0, t0 dcache_done: /* Set Kseg0 CCA to that in s0 */ @@ -152,7 +152,7 @@ dcache_done: /* Enter the coherent domain */ li t0, 0xff - sw t0, GCR_CL_COHERENCE_OFS(v1) + PTR_S t0, GCR_CL_COHERENCE_OFS(v1) ehb /* Jump to kseg0 */ @@ -178,9 +178,9 @@ dcache_done: nop /* Off we go! */ - lw t1, VPEBOOTCFG_PC(v0) - lw gp, VPEBOOTCFG_GP(v0) - lw sp, VPEBOOTCFG_SP(v0) + PTR_L t1, VPEBOOTCFG_PC(v0) + PTR_L gp, VPEBOOTCFG_GP(v0) + PTR_L sp, VPEBOOTCFG_SP(v0) jr t1 nop END(mips_cps_core_entry) @@ -299,15 +299,15 @@ LEAF(mips_cps_core_init) LEAF(mips_cps_boot_vpes) /* Retrieve CM base address */ PTR_LA t0, mips_cm_base - lw t0, 0(t0) + PTR_L t0, 0(t0) /* Calculate a pointer to this cores struct core_boot_config */ - lw t0, GCR_CL_ID_OFS(t0) + PTR_L t0, GCR_CL_ID_OFS(t0) li t1, COREBOOTCFG_SIZE mul t0, t0, t1 PTR_LA t1, mips_cps_core_bootcfg - lw t1, 0(t1) - addu t0, t0, t1 + PTR_L t1, 0(t1) + PTR_ADDU t0, t0, t1 /* Calculate this VPEs ID. If the core doesn't support MT use 0 */ has_mt ta2, 1f @@ -334,8 +334,8 @@ LEAF(mips_cps_boot_vpes) 1: /* Calculate a pointer to this VPEs struct vpe_boot_config */ li t1, VPEBOOTCFG_SIZE mul v0, t9, t1 - lw ta3, COREBOOTCFG_VPECONFIG(t0) - addu v0, v0, ta3 + PTR_L ta3, COREBOOTCFG_VPECONFIG(t0) + PTR_ADDU v0, v0, ta3 #ifdef CONFIG_MIPS_MT @@ -360,7 +360,7 @@ LEAF(mips_cps_boot_vpes) ehb /* Loop through each VPE */ - lw ta2, COREBOOTCFG_VPEMASK(t0) + PTR_L ta2, COREBOOTCFG_VPEMASK(t0) move t8, ta2 li ta1, 0 From 1c885357da2d3cf62132e611c0beaf4cdf607dd9 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:31:14 +0100 Subject: [PATCH 766/839] Revert "MIPS: Kconfig: Disable SMP/CPS for 64-bit" This reverts commit 6ca716f2e5571d25a3899c6c5c91ff72ea6d6f5e. SMP/CPS is now supported on 64bit cores. Cc: # 4.1 Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10592/ Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 2a14585c90d2..aab7e46cadd5 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2231,7 +2231,7 @@ config MIPS_CMP config MIPS_CPS bool "MIPS Coherent Processing System support" - depends on SYS_SUPPORTS_MIPS_CPS && !64BIT + depends on SYS_SUPPORTS_MIPS_CPS select MIPS_CM select MIPS_CPC select MIPS_CPS_PM if HOTPLUG_CPU From a0f67441b06525a1e5fd713ba0d75af4e5d6b198 Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Thu, 9 Jul 2015 14:41:53 +0530 Subject: [PATCH 767/839] drm/amdkfd: validate pdd where it acquired first Currently pdd is validate after dereferencing it, which is not correct, Thus validate pdd before its first use. Signed-off-by: Maninder Singh Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 8a1f999daa24..9be007081b72 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -420,6 +420,12 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) pqm_uninit(&p->pqm); pdd = kfd_get_process_device_data(dev, p); + + if (!pdd) { + mutex_unlock(&p->mutex); + return; + } + if (pdd->reset_wavefronts) { dbgdev_wave_reset_wavefronts(pdd->dev, p); pdd->reset_wavefronts = false; @@ -431,8 +437,7 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) * We don't call amd_iommu_unbind_pasid() here * because the IOMMU called us. */ - if (pdd) - pdd->bound = false; + pdd->bound = false; mutex_unlock(&p->mutex); } From cd404af0c930104462aa91344f07d002cf8248ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Tue, 7 Jul 2015 16:27:28 +0900 Subject: [PATCH 768/839] drm/radeon: Clean up reference counting and pinning of the cursor BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take a GEM reference for and pin the new cursor BO, unpin and drop the GEM reference for the old cursor BO in radeon_crtc_cursor_set2, and use radeon_crtc->cursor_addr in radeon_set_cursor. This fixes radeon_cursor_reset accidentally incrementing the cursor BO pin count, and cleans up the code a little. Cc: stable@vger.kernel.org Reviewed-by: Grigori Goronzy Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_cursor.c | 84 ++++++++++++-------------- drivers/gpu/drm/radeon/radeon_mode.h | 1 - 2 files changed, 37 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 45e54060ee97..fa661744a1f5 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -205,8 +205,9 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) | (x << 16) | y)); /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, (radeon_crtc->legacy_cursor_offset + - (yorigin * 256))); + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr + + yorigin * 256); } radeon_crtc->cursor_x = x; @@ -227,51 +228,32 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, return ret; } -static int radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj) +static void radeon_set_cursor(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_device *rdev = crtc->dev->dev_private; - struct radeon_bo *robj = gem_to_radeon_bo(obj); - uint64_t gpu_addr; - int ret; - - ret = radeon_bo_reserve(robj, false); - if (unlikely(ret != 0)) - goto fail; - /* Only 27 bit offset for legacy cursor */ - ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM, - ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, - &gpu_addr); - radeon_bo_unreserve(robj); - if (ret) - goto fail; if (ASIC_IS_DCE4(rdev)) { WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, - upper_32_bits(gpu_addr)); + upper_32_bits(radeon_crtc->cursor_addr)); WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - gpu_addr & 0xffffffff); + lower_32_bits(radeon_crtc->cursor_addr)); } else if (ASIC_IS_AVIVO(rdev)) { if (rdev->family >= CHIP_RV770) { if (radeon_crtc->crtc_id) - WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr)); + WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); else - WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr)); + WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); } WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - gpu_addr & 0xffffffff); + lower_32_bits(radeon_crtc->cursor_addr)); } else { - radeon_crtc->legacy_cursor_offset = gpu_addr - radeon_crtc->legacy_display_base_addr; /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, radeon_crtc->legacy_cursor_offset); + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr); } - - return 0; - -fail: - drm_gem_object_unreference_unlocked(obj); - - return ret; } int radeon_crtc_cursor_set2(struct drm_crtc *crtc, @@ -283,7 +265,9 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, int32_t hot_y) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + struct radeon_device *rdev = crtc->dev->dev_private; struct drm_gem_object *obj; + struct radeon_bo *robj; int ret; if (!handle) { @@ -305,6 +289,23 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, return -ENOENT; } + robj = gem_to_radeon_bo(obj); + ret = radeon_bo_reserve(robj, false); + if (ret != 0) { + drm_gem_object_unreference_unlocked(obj); + return ret; + } + /* Only 27 bit offset for legacy cursor */ + ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM, + ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, + &radeon_crtc->cursor_addr); + radeon_bo_unreserve(robj); + if (ret) { + DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); + drm_gem_object_unreference_unlocked(obj); + return ret; + } + radeon_crtc->cursor_width = width; radeon_crtc->cursor_height = height; @@ -323,13 +324,8 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, radeon_crtc->cursor_hot_y = hot_y; } - ret = radeon_set_cursor(crtc, obj); - - if (ret) - DRM_ERROR("radeon_set_cursor returned %d, not changing cursor\n", - ret); - else - radeon_show_cursor(crtc); + radeon_set_cursor(crtc); + radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); @@ -341,8 +337,7 @@ unpin: radeon_bo_unpin(robj); radeon_bo_unreserve(robj); } - if (radeon_crtc->cursor_bo != obj) - drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo); + drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo); } radeon_crtc->cursor_bo = obj; @@ -360,7 +355,6 @@ unpin: void radeon_cursor_reset(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - int ret; if (radeon_crtc->cursor_bo) { radeon_lock_cursor(crtc, true); @@ -368,12 +362,8 @@ void radeon_cursor_reset(struct drm_crtc *crtc) radeon_cursor_move_locked(crtc, radeon_crtc->cursor_x, radeon_crtc->cursor_y); - ret = radeon_set_cursor(crtc, radeon_crtc->cursor_bo); - if (ret) - DRM_ERROR("radeon_set_cursor returned %d, not showing " - "cursor\n", ret); - else - radeon_show_cursor(crtc); + radeon_set_cursor(crtc); + radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); } diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 6de5459316b5..07909d817381 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -343,7 +343,6 @@ struct radeon_crtc { int max_cursor_width; int max_cursor_height; uint32_t legacy_display_base_addr; - uint32_t legacy_cursor_offset; enum radeon_rmx_type rmx_type; u8 h_border; u8 v_border; From f3cbb17bcf676a2fc6aedebe9fbebd59e550c51a Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Tue, 7 Jul 2015 16:27:29 +0900 Subject: [PATCH 769/839] drm/radeon: unpin cursor BOs on suspend and pin them again on resume (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Everything is evicted from VRAM before suspend, so we need to make sure all BOs are unpinned and re-pinned after resume. Fixes broken mouse cursor after resume introduced by commit b9729b17. [Michel Dänzer: Add pinning BOs on resume] v2: [Alex Deucher: merge cursor unpin into fb unpin loop] Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100541 Cc: stable@vger.kernel.org Reviewed-by: Christian König (v1) Signed-off-by: Grigori Goronzy Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 34 +++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 1a532a9bc3a3..d8319dae8358 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1578,11 +1578,21 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); } - /* unpin the front buffers */ + /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->primary->fb); struct radeon_bo *robj; + if (radeon_crtc->cursor_bo) { + struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo); + r = radeon_bo_reserve(robj, false); + if (r == 0) { + radeon_bo_unpin(robj); + radeon_bo_unreserve(robj); + } + } + if (rfb == NULL || rfb->obj == NULL) { continue; } @@ -1645,6 +1655,7 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) { struct drm_connector *connector; struct radeon_device *rdev = dev->dev_private; + struct drm_crtc *crtc; int r; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) @@ -1684,6 +1695,27 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) radeon_restore_bios_scratch_regs(rdev); + /* pin cursors */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + + if (radeon_crtc->cursor_bo) { + struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo); + r = radeon_bo_reserve(robj, false); + if (r == 0) { + /* Only 27 bit offset for legacy cursor */ + r = radeon_bo_pin_restricted(robj, + RADEON_GEM_DOMAIN_VRAM, + ASIC_IS_AVIVO(rdev) ? + 0 : 1 << 27, + &radeon_crtc->cursor_addr); + if (r != 0) + DRM_ERROR("Failed to pin cursor BO (%d)\n", r); + radeon_bo_unreserve(robj); + } + } + } + /* init dig PHYs, disp eng pll */ if (rdev->is_atom_bios) { radeon_atom_encoder_init(rdev); From 8991668ab4e26f985a8485719bce5d6d0623a644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Tue, 7 Jul 2015 16:27:30 +0900 Subject: [PATCH 770/839] drm/radeon: Fold radeon_set_cursor() into radeon_show_cursor() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Grigori Goronzy Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_cursor.c | 49 ++++++++++---------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index fa661744a1f5..afaf346bd50e 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -91,15 +91,34 @@ static void radeon_show_cursor(struct drm_crtc *crtc) struct radeon_device *rdev = crtc->dev->dev_private; if (ASIC_IS_DCE4(rdev)) { + WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, + upper_32_bits(radeon_crtc->cursor_addr)); + WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + lower_32_bits(radeon_crtc->cursor_addr)); WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset); WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_EN | EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) | EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2)); } else if (ASIC_IS_AVIVO(rdev)) { + if (rdev->family >= CHIP_RV770) { + if (radeon_crtc->crtc_id) + WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); + else + WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); + } + + WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + lower_32_bits(radeon_crtc->cursor_addr)); WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset); WREG32(RADEON_MM_DATA, AVIVO_D1CURSOR_EN | (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT)); } else { + /* offset is from DISP(2)_BASE_ADDRESS */ + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr); + switch (radeon_crtc->crtc_id) { case 0: WREG32(RADEON_MM_INDEX, RADEON_CRTC_GEN_CNTL); @@ -228,34 +247,6 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, return ret; } -static void radeon_set_cursor(struct drm_crtc *crtc) -{ - struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - struct radeon_device *rdev = crtc->dev->dev_private; - - if (ASIC_IS_DCE4(rdev)) { - WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, - upper_32_bits(radeon_crtc->cursor_addr)); - WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - lower_32_bits(radeon_crtc->cursor_addr)); - } else if (ASIC_IS_AVIVO(rdev)) { - if (rdev->family >= CHIP_RV770) { - if (radeon_crtc->crtc_id) - WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, - upper_32_bits(radeon_crtc->cursor_addr)); - else - WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, - upper_32_bits(radeon_crtc->cursor_addr)); - } - WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - lower_32_bits(radeon_crtc->cursor_addr)); - } else { - /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, - radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr); - } -} - int radeon_crtc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv, uint32_t handle, @@ -324,7 +315,6 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, radeon_crtc->cursor_hot_y = hot_y; } - radeon_set_cursor(crtc); radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); @@ -362,7 +352,6 @@ void radeon_cursor_reset(struct drm_crtc *crtc) radeon_cursor_move_locked(crtc, radeon_crtc->cursor_x, radeon_crtc->cursor_y); - radeon_set_cursor(crtc); radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); From d57c0edfe00d3274b50f91ce3076ed0e82d28782 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Jul 2015 14:08:12 -0400 Subject: [PATCH 771/839] Revert "Revert "drm/radeon: dont switch vt on suspend"" This reverts commit ac9134906b3f5c2b45dc80dab0fee792bd516d52. We've fixed the underlying problem with cursors, so re-enable this. --- drivers/gpu/drm/radeon/radeon_fb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 634793ea8418..aeb676708e60 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -257,6 +257,7 @@ static int radeonfb_create(struct drm_fb_helper *helper, } info->par = rfbdev; + info->skip_vt_switch = true; ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj); if (ret) { From eb99070b4ad105f8a516d99e95226180924eddc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 8 Jul 2015 09:56:13 +0200 Subject: [PATCH 772/839] drm/radeon: allways add the VM clear duplicate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to allways add the VM clear duplicate of the BO_VA, no matter what the old status was. Signed-off-by: Christian König Test-by: hadack@gmx.de Tested-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_vm.c | 34 ++++++++++++++---------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index ec10533a49b8..031096599f84 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -493,29 +493,27 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, } if (bo_va->it.start || bo_va->it.last) { - spin_lock(&vm->status_lock); - if (list_empty(&bo_va->vm_status)) { - /* add a clone of the bo_va to clear the old address */ - struct radeon_bo_va *tmp; - spin_unlock(&vm->status_lock); - tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); - if (!tmp) { - mutex_unlock(&vm->mutex); - r = -ENOMEM; - goto error_unreserve; - } - tmp->it.start = bo_va->it.start; - tmp->it.last = bo_va->it.last; - tmp->vm = vm; - tmp->bo = radeon_bo_ref(bo_va->bo); - spin_lock(&vm->status_lock); - list_add(&tmp->vm_status, &vm->freed); + /* add a clone of the bo_va to clear the old address */ + struct radeon_bo_va *tmp; + tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); + if (!tmp) { + mutex_unlock(&vm->mutex); + r = -ENOMEM; + goto error_unreserve; } - spin_unlock(&vm->status_lock); + tmp->it.start = bo_va->it.start; + tmp->it.last = bo_va->it.last; + tmp->vm = vm; + tmp->bo = radeon_bo_ref(bo_va->bo); interval_tree_remove(&bo_va->it, &vm->va); bo_va->it.start = 0; bo_va->it.last = 0; + + spin_lock(&vm->status_lock); + list_del_init(&bo_va->vm_status); + list_add(&tmp->vm_status, &vm->freed); + spin_unlock(&vm->status_lock); } if (soffset || eoffset) { From dbedff05d163f8d1b4a97b91c57b939830fc235f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 8 Jul 2015 09:56:14 +0200 Subject: [PATCH 773/839] drm/radeon: check if BO_VA is set before adding it to the invalidation list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we try to clear BO_VAs without an address. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=91141 Signed-off-by: Christian König Test-by: hadack@gmx.de Tested-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_vm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 031096599f84..48d97c040f49 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -507,22 +507,21 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, tmp->bo = radeon_bo_ref(bo_va->bo); interval_tree_remove(&bo_va->it, &vm->va); + spin_lock(&vm->status_lock); bo_va->it.start = 0; bo_va->it.last = 0; - - spin_lock(&vm->status_lock); list_del_init(&bo_va->vm_status); list_add(&tmp->vm_status, &vm->freed); spin_unlock(&vm->status_lock); } if (soffset || eoffset) { + spin_lock(&vm->status_lock); bo_va->it.start = soffset; bo_va->it.last = eoffset - 1; - interval_tree_insert(&bo_va->it, &vm->va); - spin_lock(&vm->status_lock); list_add(&bo_va->vm_status, &vm->cleared); spin_unlock(&vm->status_lock); + interval_tree_insert(&bo_va->it, &vm->va); } bo_va->flags = flags; @@ -1156,7 +1155,8 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, list_for_each_entry(bo_va, &bo->va, bo_list) { spin_lock(&bo_va->vm->status_lock); - if (list_empty(&bo_va->vm_status)) + if (list_empty(&bo_va->vm_status) && + (bo_va->it.start || bo_va->it.last)) list_add(&bo_va->vm_status, &bo_va->vm->invalidated); spin_unlock(&bo_va->vm->status_lock); } From 0f11770417d849558c727ed20e1cd07444e15a00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 8 Jul 2015 16:58:48 +0200 Subject: [PATCH 774/839] drm/amdgpu: fix timeout calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 975edb1000a2..ae43b58c9733 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -352,7 +352,7 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns) if (((int64_t)timeout_ns) < 0) return MAX_SCHEDULE_TIMEOUT; - timeout = ktime_sub_ns(ktime_get(), timeout_ns); + timeout = ktime_sub(ns_to_ktime(timeout_ns), ktime_get()); if (ktime_to_ns(timeout) < 0) return 0; From 355c822847fa70fa1df6a7451b7ecf76116efcd2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Jul 2015 12:58:20 -0400 Subject: [PATCH 775/839] drm/radeon: disable vce init on cayman (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cayman does not have vce. There were a few places in the shared cayman/TV code where we were trying to do vce stuff. v2: remove -ENOENT check Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ni.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 8e5aeeb058a5..158872eb78e4 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2162,18 +2162,20 @@ static int cayman_startup(struct radeon_device *rdev) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } - ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; - if (ring->ring_size) - r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); + if (rdev->family == CHIP_ARUBA) { + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); - ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; - if (ring->ring_size) - r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); - if (!r) - r = vce_v1_0_init(rdev); - else if (r != -ENOENT) - DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + if (!r) + r = vce_v1_0_init(rdev); + if (r) + DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + } r = radeon_ib_pool_init(rdev); if (r) { @@ -2396,7 +2398,8 @@ void cayman_fini(struct radeon_device *rdev) radeon_irq_kms_fini(rdev); uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); - radeon_vce_fini(rdev); + if (rdev->family == CHIP_ARUBA) + radeon_vce_fini(rdev); cayman_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); From 757856d2b9568a701df9ea6a4be68effbb9d6f44 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 25 Jun 2015 17:47:45 +0300 Subject: [PATCH 776/839] libceph: enable ceph in a non-default network namespace Grab a reference on a network namespace of the 'rbd map' (in case of rbd) or 'mount' (in case of ceph) process and use that to open sockets instead of always using init_net and bailing if network namespace is anything but init_net. Be careful to not share struct ceph_client instances between different namespaces and don't add any code in the !CONFIG_NET_NS case. This is based on a patch from Hong Zhiguo . Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/messenger.h | 3 +++ net/ceph/ceph_common.c | 16 ++++++++++------ net/ceph/messenger.c | 10 +++++++++- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e15499422fdc..37753278987a 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ struct ceph_messenger { struct ceph_entity_addr my_enc_addr; atomic_t stopping; + possible_net_t net; bool nocrc; bool tcp_nodelay; @@ -267,6 +269,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr, u64 required_features, bool nocrc, bool tcp_nodelay); +extern void ceph_messenger_fini(struct ceph_messenger *msgr); extern void ceph_con_init(struct ceph_connection *con, void *private, const struct ceph_connection_operations *ops, diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index cb7db320dd27..f30329f72641 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -16,8 +17,6 @@ #include #include #include -#include -#include #include @@ -131,6 +130,13 @@ int ceph_compare_options(struct ceph_options *new_opt, int i; int ret; + /* + * Don't bother comparing options if network namespaces don't + * match. + */ + if (!net_eq(current->nsproxy->net_ns, read_pnet(&client->msgr.net))) + return -1; + ret = memcmp(opt1, opt2, ofs); if (ret) return ret; @@ -335,9 +341,6 @@ ceph_parse_options(char *options, const char *dev_name, int err = -ENOMEM; substring_t argstr[MAX_OPT_ARGS]; - if (current->nsproxy->net_ns != &init_net) - return ERR_PTR(-EINVAL); - opt = kzalloc(sizeof(*opt), GFP_KERNEL); if (!opt) return ERR_PTR(-ENOMEM); @@ -608,6 +611,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, fail_monc: ceph_monc_stop(&client->monc); fail: + ceph_messenger_fini(&client->msgr); kfree(client); return ERR_PTR(err); } @@ -621,8 +625,8 @@ void ceph_destroy_client(struct ceph_client *client) /* unmount */ ceph_osdc_stop(&client->osdc); - ceph_monc_stop(&client->monc); + ceph_messenger_fini(&client->msgr); ceph_debugfs_client_cleanup(client); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 1679f47280e2..5c1f98ea6741 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -479,7 +480,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) int ret; BUG_ON(con->sock); - ret = sock_create_kern(&init_net, con->peer_addr.in_addr.ss_family, + ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret) return ret; @@ -2944,11 +2945,18 @@ void ceph_messenger_init(struct ceph_messenger *msgr, msgr->tcp_nodelay = tcp_nodelay; atomic_set(&msgr->stopping, 0); + write_pnet(&msgr->net, get_net(current->nsproxy->net_ns)); dout("%s %p\n", __func__, msgr); } EXPORT_SYMBOL(ceph_messenger_init); +void ceph_messenger_fini(struct ceph_messenger *msgr) +{ + put_net(read_pnet(&msgr->net)); +} +EXPORT_SYMBOL(ceph_messenger_fini); + static void clear_standby(struct ceph_connection *con) { /* come back from STANDBY? */ From c44bd69c0c8cfadf0239437635b2933efb1f6c4c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 9 Jul 2015 13:57:52 +0300 Subject: [PATCH 777/839] libceph: treat sockaddr_storage with uninitialized family as blank addr_is_blank() should return true if family is neither AF_INET nor AF_INET6. This is what its counterpart entity_addr_t::is_blank_ip() is doing and it is the right thing to do: in process_banner() we check if our address is blank and if it is "learn" it from our peer. As it is, we never learn our address and always send out a blank one. This goes way back to ceph.git commit dd732cbfc1c9 ("use sockaddr_storage; and some ipv6 support groundwork") from 2009. While at at, do not open-code ipv6_addr_any() and use INADDR_ANY constant instead of 0. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/messenger.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5c1f98ea6741..e3be1d22a247 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1732,17 +1732,17 @@ static int verify_hello(struct ceph_connection *con) static bool addr_is_blank(struct sockaddr_storage *ss) { + struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr; + struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr; + switch (ss->ss_family) { case AF_INET: - return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0; + return addr->s_addr == htonl(INADDR_ANY); case AF_INET6: - return - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0; + return ipv6_addr_any(addr6); + default: + return true; } - return false; } static int addr_port(struct sockaddr_storage *ss) From 398ecff5a562b7b69f77582f98a4b04d0de1f066 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 9 Jul 2015 11:46:14 -0400 Subject: [PATCH 778/839] MAINTAINERS: update ceph entries - The Ceph common code is used by both fs/ceph and drivers/block/rbd. Add a separate maintainers entry. - Add Ilya as libceph maintainer and cephfs submaintainer. - Attribute Documentation/ABI/testing/sysfs-bus-rbd to rbd. - ceph-devel@vger.kernel.org should be L, not M in rbd entry. Signed-off-by: Sage Weil Signed-off-by: Ilya Dryomov --- MAINTAINERS | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8133cefb6b6e..58cbc2118e46 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2562,19 +2562,29 @@ F: arch/powerpc/include/uapi/asm/spu*.h F: arch/powerpc/oprofile/*cell* F: arch/powerpc/platforms/cell/ -CEPH DISTRIBUTED FILE SYSTEM CLIENT +CEPH COMMON CODE (LIBCEPH) +M: Ilya Dryomov M: "Yan, Zheng" M: Sage Weil L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git S: Supported -F: Documentation/filesystems/ceph.txt -F: fs/ceph/ F: net/ceph/ F: include/linux/ceph/ F: include/linux/crush/ +CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH) +M: "Yan, Zheng" +M: Sage Weil +M: Ilya Dryomov +L: ceph-devel@vger.kernel.org +W: http://ceph.com/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +S: Supported +F: Documentation/filesystems/ceph.txt +F: fs/ceph/ + CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: L: linux-usb@vger.kernel.org S: Orphan @@ -8366,10 +8376,11 @@ RADOS BLOCK DEVICE (RBD) M: Ilya Dryomov M: Sage Weil M: Alex Elder -M: ceph-devel@vger.kernel.org +L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git S: Supported +F: Documentation/ABI/testing/sysfs-bus-rbd F: drivers/block/rbd.c F: drivers/block/rbd_types.h From 6e67b7ae2157bdeb6b56381e530fc74bb68b6149 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 9 Jul 2015 11:47:37 -0400 Subject: [PATCH 779/839] MAINTAINERS: add secondary tree for ceph modules The Ceph kernel code is primarily developed in the github tree, and only pushed to the korg tree before going to Linus. If Sage is unavailable and another maintainer needs to push something upstream, pull requests may originate from the github tree instead of Sage's korg tree. Signed-off-by: Sage Weil Signed-off-by: Ilya Dryomov --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 58cbc2118e46..0d70760e8135 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2569,6 +2569,7 @@ M: Sage Weil L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git S: Supported F: net/ceph/ F: include/linux/ceph/ @@ -2581,6 +2582,7 @@ M: Ilya Dryomov L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git S: Supported F: Documentation/filesystems/ceph.txt F: fs/ceph/ @@ -8379,6 +8381,7 @@ M: Alex Elder L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git S: Supported F: Documentation/ABI/testing/sysfs-bus-rbd F: drivers/block/rbd.c From 6b7339f4c31ad69c8e9c0b2859276e22cf72176d Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 6 Jul 2015 23:18:37 +0300 Subject: [PATCH 780/839] mm: avoid setting up anonymous pages into file mapping Reading page fault handler code I've noticed that under right circumstances kernel would map anonymous pages into file mappings: if the VMA doesn't have vm_ops->fault() and the VMA wasn't fully populated on ->mmap(), kernel would handle page fault to not populated pte with do_anonymous_page(). Let's change page fault handler to use do_anonymous_page() only on anonymous VMA (->vm_ops == NULL) and make sure that the VMA is not shared. For file mappings without vm_ops->fault() or shred VMA without vm_ops, page fault on pte_none() entry would lead to SIGBUS. Signed-off-by: Kirill A. Shutemov Acked-by: Oleg Nesterov Cc: Andrew Morton Cc: Willy Tarreau Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- mm/memory.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index a84fbb772034..388dcf9aa283 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2670,6 +2670,10 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, pte_unmap(page_table); + /* File mapping without ->vm_ops ? */ + if (vma->vm_flags & VM_SHARED) + return VM_FAULT_SIGBUS; + /* Check if we need to add a guard page to the stack */ if (check_stack_guard_page(vma, address) < 0) return VM_FAULT_SIGSEGV; @@ -3099,6 +3103,9 @@ static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; pte_unmap(page_table); + /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ + if (!vma->vm_ops->fault) + return VM_FAULT_SIGBUS; if (!(flags & FAULT_FLAG_WRITE)) return do_read_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); @@ -3244,13 +3251,12 @@ static int handle_pte_fault(struct mm_struct *mm, barrier(); if (!pte_present(entry)) { if (pte_none(entry)) { - if (vma->vm_ops) { - if (likely(vma->vm_ops->fault)) - return do_fault(mm, vma, address, pte, - pmd, flags, entry); - } - return do_anonymous_page(mm, vma, address, - pte, pmd, flags); + if (vma->vm_ops) + return do_fault(mm, vma, address, pte, pmd, + flags, entry); + + return do_anonymous_page(mm, vma, address, pte, pmd, + flags); } return do_swap_page(mm, vma, address, pte, pmd, flags, entry); From 6f957724b94cb19f5c1c97efd01dd4df8ced323c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 9 Jul 2015 11:20:01 -0700 Subject: [PATCH 781/839] Fix firmware loader uevent buffer NULL pointer dereference The firmware class uevent function accessed the "fw_priv->buf" buffer without the proper locking and testing for NULL. This is an old bug (looks like it goes back to 2012 and commit 1244691c73b2: "firmware loader: introduce firmware_buf"), but for some reason it's triggering only now in 4.2-rc1. Shuah Khan is trying to bisect what it is that causes this to trigger more easily, but in the meantime let's just fix the bug since others are hitting it too (at least Ingo reports having seen it as well). Reported-and-tested-by: Shuah Khan Acked-by: Ming Lei Cc: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- drivers/base/firmware_class.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 9c4288362a8e..894bda114224 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -563,10 +563,8 @@ static void fw_dev_release(struct device *dev) kfree(fw_priv); } -static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) +static int do_firmware_uevent(struct firmware_priv *fw_priv, struct kobj_uevent_env *env) { - struct firmware_priv *fw_priv = to_firmware_priv(dev); - if (add_uevent_var(env, "FIRMWARE=%s", fw_priv->buf->fw_id)) return -ENOMEM; if (add_uevent_var(env, "TIMEOUT=%i", loading_timeout)) @@ -577,6 +575,18 @@ static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } +static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct firmware_priv *fw_priv = to_firmware_priv(dev); + int err = 0; + + mutex_lock(&fw_lock); + if (fw_priv->buf) + err = do_firmware_uevent(fw_priv, env); + mutex_unlock(&fw_lock); + return err; +} + static struct class firmware_class = { .name = "firmware", .class_attrs = firmware_class_attrs, From 3324603524925c7727207027d1c15e597412d15e Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 9 Jul 2015 14:20:36 -0400 Subject: [PATCH 782/839] selinux: don't waste ebitmap space when importing NetLabel categories At present we don't create efficient ebitmaps when importing NetLabel category bitmaps. This can present a problem when comparing ebitmaps since ebitmap_cmp() is very strict about these things and considers these wasteful ebitmaps not equal when compared to their more efficient counterparts, even if their values are the same. This isn't likely to cause problems on 64-bit systems due to a bit of luck on how NetLabel/CIPSO works and the default ebitmap size, but it can be a problem on 32-bit systems. This patch fixes this problem by being a bit more intelligent when importing NetLabel category bitmaps by skipping over empty sections which should result in a nice, efficient ebitmap. Cc: stable@vger.kernel.org # 3.17 Signed-off-by: Paul Moore --- security/selinux/ss/ebitmap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index afe6a269ec17..57644b1dc42e 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -153,6 +153,12 @@ int ebitmap_netlbl_import(struct ebitmap *ebmap, if (offset == (u32)-1) return 0; + /* don't waste ebitmap space if the netlabel bitmap is empty */ + if (bitmap == 0) { + offset += EBITMAP_UNIT_SIZE; + continue; + } + if (e_iter == NULL || offset >= e_iter->startbit + EBITMAP_SIZE) { e_prev = e_iter; From 9abea2d64ce93b6909de7f83a7f681f572369708 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 9 Jul 2015 18:05:15 +0200 Subject: [PATCH 783/839] ioctl_compat: handle FITRIM The FITRIM ioctl has the same arguments on 32-bit and 64-bit architectures, so we can add it to the list of compatible ioctls and drop it from compat_ioctl method of various filesystems. Signed-off-by: Mikulas Patocka Cc: Al Viro Cc: Ted Ts'o Signed-off-by: Linus Torvalds --- fs/compat_ioctl.c | 1 + fs/ecryptfs/file.c | 1 - fs/ext4/ioctl.c | 1 - fs/jfs/ioctl.c | 3 --- fs/nilfs2/ioctl.c | 1 - fs/ocfs2/ioctl.c | 1 - 6 files changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 6b8e2f091f5b..48851f6ea6ec 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -896,6 +896,7 @@ COMPATIBLE_IOCTL(FIGETBSZ) /* 'X' - originally XFS but some now in the VFS */ COMPATIBLE_IOCTL(FIFREEZE) COMPATIBLE_IOCTL(FITHAW) +COMPATIBLE_IOCTL(FITRIM) COMPATIBLE_IOCTL(KDGETKEYCODE) COMPATIBLE_IOCTL(KDSETKEYCODE) COMPATIBLE_IOCTL(KDGKBTYPE) diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 72afcc629d7b..feef8a9c4de7 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -325,7 +325,6 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return rc; switch (cmd) { - case FITRIM: case FS_IOC32_GETFLAGS: case FS_IOC32_SETFLAGS: case FS_IOC32_GETVERSION: diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index cb8451246b30..1346cfa355d0 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -755,7 +755,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return err; } case EXT4_IOC_MOVE_EXT: - case FITRIM: case EXT4_IOC_RESIZE_FS: case EXT4_IOC_PRECACHE_EXTENTS: case EXT4_IOC_SET_ENCRYPTION_POLICY: diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index 93a1232894f6..8db8b7d61e40 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -180,9 +180,6 @@ long jfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case JFS_IOC_SETFLAGS32: cmd = JFS_IOC_SETFLAGS; break; - case FITRIM: - cmd = FITRIM; - break; } return jfs_ioctl(filp, cmd, arg); } diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 9a20e513d7eb..aba43811d6ef 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -1369,7 +1369,6 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_SYNC: case NILFS_IOCTL_RESIZE: case NILFS_IOCTL_SET_ALLOC_RANGE: - case FITRIM: break; default: return -ENOIOCTLCMD; diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 53e6c40ed4c6..3cb097ccce60 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -980,7 +980,6 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) case OCFS2_IOC_GROUP_EXTEND: case OCFS2_IOC_GROUP_ADD: case OCFS2_IOC_GROUP_ADD64: - case FITRIM: break; case OCFS2_IOC_REFLINK: if (copy_from_user(&args, argp, sizeof(args))) From fc0a1f035cf15f704f4092da294963c57e94c1c0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 26 Jun 2015 14:12:10 +0200 Subject: [PATCH 784/839] i2c: I2C_MT65XX should depend on HAS_DMA If NO_DMA=y: ERROR: "dma_unmap_single" [drivers/i2c/busses/i2c-mt65xx.ko] undefined! ERROR: "dma_mapping_error" [drivers/i2c/busses/i2c-mt65xx.ko] undefined! ERROR: "dma_map_single" [drivers/i2c/busses/i2c-mt65xx.ko] undefined! Add a dependency on HAS_DMA to fix this. Signed-off-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 35ac23768ce9..577d58d1f1a1 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -633,6 +633,7 @@ config I2C_MPC config I2C_MT65XX tristate "MediaTek I2C adapter" depends on ARCH_MEDIATEK || COMPILE_TEST + depends on HAS_DMA help This selects the MediaTek(R) Integrated Inter Circuit bus driver for MT65xx and MT81xx. From 724948106ed236fc528c720ae12c79af7e2aea4e Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 30 Jun 2015 11:08:46 +0800 Subject: [PATCH 785/839] i2c: xgene-slimpro: Fix missing mbox_free_channel call in probe error path Free requested mailbox channel before return error. Signed-off-by: Axel Lin Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xgene-slimpro.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c index dcca7076231e..1c9cb65ac4cf 100644 --- a/drivers/i2c/busses/i2c-xgene-slimpro.c +++ b/drivers/i2c/busses/i2c-xgene-slimpro.c @@ -419,6 +419,7 @@ static int xgene_slimpro_i2c_probe(struct platform_device *pdev) rc = i2c_add_adapter(adapter); if (rc) { dev_err(&pdev->dev, "Adapter registeration failed\n"); + mbox_free_channel(ctx->mbox_chan); return rc; } From eb8173e3d7b94664ecd3acf34942c9c9d6f6cb73 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 30 Jun 2015 11:41:58 +0800 Subject: [PATCH 786/839] i2c: jz4780: Fix return value if probe fails Current code returns 0 if fails to read clock-frequency DT property, fix it. Also add checking return value of clk_prepare_enable and propagate return value of devm_request_irq. Signed-off-by: Axel Lin Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-jz4780.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index 19b2d689a5ef..f325663c27c5 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -764,12 +764,15 @@ static int jz4780_i2c_probe(struct platform_device *pdev) if (IS_ERR(i2c->clk)) return PTR_ERR(i2c->clk); - clk_prepare_enable(i2c->clk); + ret = clk_prepare_enable(i2c->clk); + if (ret) + return ret; - if (of_property_read_u32(pdev->dev.of_node, "clock-frequency", - &clk_freq)) { + ret = of_property_read_u32(pdev->dev.of_node, "clock-frequency", + &clk_freq); + if (ret) { dev_err(&pdev->dev, "clock-frequency not specified in DT"); - return clk_freq; + goto err; } i2c->speed = clk_freq / 1000; @@ -790,10 +793,8 @@ static int jz4780_i2c_probe(struct platform_device *pdev) i2c->irq = platform_get_irq(pdev, 0); ret = devm_request_irq(&pdev->dev, i2c->irq, jz4780_i2c_irq, 0, dev_name(&pdev->dev), i2c); - if (ret) { - ret = -ENODEV; + if (ret) goto err; - } ret = i2c_add_adapter(&i2c->adap); if (ret < 0) { From 4f001fd30145a6a8f72f9544c982cfd3dcb7c6df Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Sat, 24 Jan 2015 09:16:29 +0200 Subject: [PATCH 787/839] i2c: Mark instantiated device nodes with OF_POPULATE Mark (and unmark) device nodes with the POPULATE flag as appropriate. This is required to avoid multi probing when using I2C and device overlays containing a mux. This patch is also more careful with the release of the adapter device which caused a deadlock with muxes, and does not break the build on !OF since the node flag accessors are not defined then. Signed-off-by: Pantelis Antoniou Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 069a41f116dd..e6d4935161e4 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1012,6 +1012,8 @@ EXPORT_SYMBOL_GPL(i2c_new_device); */ void i2c_unregister_device(struct i2c_client *client) { + if (client->dev.of_node) + of_node_clear_flag(client->dev.of_node, OF_POPULATED); device_unregister(&client->dev); } EXPORT_SYMBOL_GPL(i2c_unregister_device); @@ -1320,8 +1322,11 @@ static void of_i2c_register_devices(struct i2c_adapter *adap) dev_dbg(&adap->dev, "of_i2c: walking child nodes\n"); - for_each_available_child_of_node(adap->dev.of_node, node) + for_each_available_child_of_node(adap->dev.of_node, node) { + if (of_node_test_and_set_flag(node, OF_POPULATED)) + continue; of_i2c_register_device(adap, node); + } } static int of_dev_node_match(struct device *dev, void *data) @@ -1853,6 +1858,11 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, if (adap == NULL) return NOTIFY_OK; /* not for us */ + if (of_node_test_and_set_flag(rd->dn, OF_POPULATED)) { + put_device(&adap->dev); + return NOTIFY_OK; + } + client = of_i2c_register_device(adap, rd->dn); put_device(&adap->dev); @@ -1863,6 +1873,10 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, } break; case OF_RECONFIG_CHANGE_REMOVE: + /* already depopulated? */ + if (!of_node_check_flag(rd->dn, OF_POPULATED)) + return NOTIFY_OK; + /* find our device by node */ client = of_find_i2c_device_by_node(rd->dn); if (client == NULL) From a27b5b97d6fe91f55058ad8ac28a8768700201ab Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 28 Jun 2015 15:16:57 +0200 Subject: [PATCH 788/839] hpfs: add fstrim support This patch adds support for fstrim to the HPFS filesystem. Signed-off-by: Mikulas Patocka Signed-off-by: Linus Torvalds --- fs/hpfs/alloc.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++ fs/hpfs/dir.c | 1 + fs/hpfs/file.c | 1 + fs/hpfs/hpfs_fn.h | 4 ++ fs/hpfs/super.c | 27 ++++++++++++++ 5 files changed, 128 insertions(+) diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c index f005046e1591..d6a4b55d2ab0 100644 --- a/fs/hpfs/alloc.c +++ b/fs/hpfs/alloc.c @@ -484,3 +484,98 @@ struct anode *hpfs_alloc_anode(struct super_block *s, secno near, anode_secno *a a->btree.first_free = cpu_to_le16(8); return a; } + +static unsigned find_run(__le32 *bmp, unsigned *idx) +{ + unsigned len; + while (tstbits(bmp, *idx, 1)) { + (*idx)++; + if (unlikely(*idx >= 0x4000)) + return 0; + } + len = 1; + while (!tstbits(bmp, *idx + len, 1)) + len++; + return len; +} + +static int do_trim(struct super_block *s, secno start, unsigned len, secno limit_start, secno limit_end, unsigned minlen, unsigned *result) +{ + int err; + secno end; + if (fatal_signal_pending(current)) + return -EINTR; + end = start + len; + if (start < limit_start) + start = limit_start; + if (end > limit_end) + end = limit_end; + if (start >= end) + return 0; + if (end - start < minlen) + return 0; + err = sb_issue_discard(s, start, end - start, GFP_NOFS, 0); + if (err) + return err; + *result += end - start; + return 0; +} + +int hpfs_trim_fs(struct super_block *s, u64 start, u64 end, u64 minlen, unsigned *result) +{ + int err = 0; + struct hpfs_sb_info *sbi = hpfs_sb(s); + unsigned idx, len, start_bmp, end_bmp; + __le32 *bmp; + struct quad_buffer_head qbh; + + *result = 0; + if (!end || end > sbi->sb_fs_size) + end = sbi->sb_fs_size; + if (start >= sbi->sb_fs_size) + return 0; + if (minlen > 0x4000) + return 0; + if (start < sbi->sb_dirband_start + sbi->sb_dirband_size && end > sbi->sb_dirband_start) { + hpfs_lock(s); + if (s->s_flags & MS_RDONLY) { + err = -EROFS; + goto unlock_1; + } + if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) { + err = -EIO; + goto unlock_1; + } + idx = 0; + while ((len = find_run(bmp, &idx)) && !err) { + err = do_trim(s, sbi->sb_dirband_start + idx * 4, len * 4, start, end, minlen, result); + idx += len; + } + hpfs_brelse4(&qbh); +unlock_1: + hpfs_unlock(s); + } + start_bmp = start >> 14; + end_bmp = (end + 0x3fff) >> 14; + while (start_bmp < end_bmp && !err) { + hpfs_lock(s); + if (s->s_flags & MS_RDONLY) { + err = -EROFS; + goto unlock_2; + } + if (!(bmp = hpfs_map_bitmap(s, start_bmp, &qbh, "trim"))) { + err = -EIO; + goto unlock_2; + } + idx = 0; + while ((len = find_run(bmp, &idx)) && !err) { + err = do_trim(s, (start_bmp << 14) + idx, len, start, end, minlen, result); + idx += len; + } + hpfs_brelse4(&qbh); +unlock_2: + hpfs_unlock(s); + start_bmp++; + } + return err; +} diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 2a8e07425de0..dc540bfcee1d 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -327,4 +327,5 @@ const struct file_operations hpfs_dir_ops = .iterate = hpfs_readdir, .release = hpfs_dir_release, .fsync = hpfs_file_fsync, + .unlocked_ioctl = hpfs_ioctl, }; diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 6d8cfe9b52d6..7ca28d604bf7 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -203,6 +203,7 @@ const struct file_operations hpfs_file_ops = .release = hpfs_file_release, .fsync = hpfs_file_fsync, .splice_read = generic_file_splice_read, + .unlocked_ioctl = hpfs_ioctl, }; const struct inode_operations hpfs_file_iops = diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index bb04b58d1d69..c4867b5116dd 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include "hpfs.h" @@ -200,6 +202,7 @@ void hpfs_free_dnode(struct super_block *, secno); struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *); struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **); struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **); +int hpfs_trim_fs(struct super_block *, u64, u64, u64, unsigned *); /* anode.c */ @@ -318,6 +321,7 @@ __printf(2, 3) void hpfs_error(struct super_block *, const char *, ...); int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *); unsigned hpfs_get_free_dnodes(struct super_block *); +long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg); /* * local time (HPFS) to GMT (Unix) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 7cd00d3a7c9b..037e3e597ff4 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -196,6 +196,33 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } + +long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg) +{ + switch (cmd) { + case FITRIM: { + struct fstrim_range range; + secno n_trimmed; + int r; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) + return -EFAULT; + r = hpfs_trim_fs(file_inode(file)->i_sb, range.start >> 9, (range.start + range.len) >> 9, (range.minlen + 511) >> 9, &n_trimmed); + if (r) + return r; + range.len = (u64)n_trimmed << 9; + if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range))) + return -EFAULT; + return 0; + } + default: { + return -ENOIOCTLCMD; + } + } +} + + static struct kmem_cache * hpfs_inode_cachep; static struct inode *hpfs_alloc_inode(struct super_block *sb) From d7b04097c250e6322ba73d3b03337074afeeb314 Mon Sep 17 00:00:00 2001 From: Firo Yang Date: Thu, 23 Apr 2015 17:28:45 +0800 Subject: [PATCH 789/839] hpfs: Remove unessary cast Avoid a pointless kmem_cache_alloc() return value cast in fs/hpfs/super.c::hpfs_alloc_inode() Signed-off-by: Firo Yang Signed-off-by: Mikulas Patocka Signed-off-by: Linus Torvalds --- fs/hpfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 037e3e597ff4..0642516d36c8 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -228,7 +228,7 @@ static struct kmem_cache * hpfs_inode_cachep; static struct inode *hpfs_alloc_inode(struct super_block *sb) { struct hpfs_inode_info *ei; - ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS); + ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS); if (!ei) return NULL; ei->vfs_inode.i_version = 1; From ce657611baf902f14ae559ce4e0787ead6712067 Mon Sep 17 00:00:00 2001 From: Sanidhya Kashyap Date: Sat, 21 Mar 2015 12:57:50 -0400 Subject: [PATCH 790/839] hpfs: kstrdup() out of memory handling There is a possibility of nothing being allocated to the new_opts in case of memory pressure, therefore return ENOMEM for such case. Signed-off-by: Sanidhya Kashyap Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/hpfs/super.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 0642516d36c8..cde044d41f69 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -451,11 +451,14 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) int o; struct hpfs_sb_info *sbi = hpfs_sb(s); char *new_opts = kstrdup(data, GFP_KERNEL); - + + if (!new_opts) + return -ENOMEM; + sync_filesystem(s); *flags |= MS_NOATIME; - + hpfs_lock(s); uid = sbi->sb_uid; gid = sbi->sb_gid; umask = 0777 & ~sbi->sb_mode; From a28e4b2b18ccb90df402da3f21e1a83c9d4f8ec1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 26 Mar 2015 20:47:10 -0700 Subject: [PATCH 791/839] hpfs: hpfs_error: Remove static buffer, use vsprintf extension %pV instead Removing unnecessary static buffers is good. Use the vsprintf %pV extension instead. Signed-off-by: Joe Perches Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v2.6.36+ Signed-off-by: Linus Torvalds --- fs/hpfs/super.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index cde044d41f69..68a9bed05628 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -52,17 +52,20 @@ static void unmark_dirty(struct super_block *s) } /* Filesystem error... */ -static char err_buf[1024]; - void hpfs_error(struct super_block *s, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - vsnprintf(err_buf, sizeof(err_buf), fmt, args); + + vaf.fmt = fmt; + vaf.va = &args; + + pr_err("filesystem error: %pV", &vaf); + va_end(args); - pr_err("filesystem error: %s", err_buf); if (!hpfs_sb(s)->sb_was_error) { if (hpfs_sb(s)->sb_err == 2) { pr_cont("; crashing the system because you wanted it\n"); From adb3505086def6d3e37013ec6cf8313cc719cbb6 Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Wed, 8 Jul 2015 10:49:30 +0530 Subject: [PATCH 792/839] net: systemport: Use eth_hw_addr_random Use eth_hw_addr_random() instead of calling random_ether_addr(). Here, this change is setting addr_assign_type to NET_ADDR_RANDOM. The Coccinelle semantic patch that performs this transformation is as follows: @@ identifier a,b; @@ -random_ether_addr(a->b); +eth_hw_addr_random(a); Signed-off-by: Vaishali Thakkar Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 909ad7a0d480..4566cdf0bc39 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1793,7 +1793,7 @@ static int bcm_sysport_probe(struct platform_device *pdev) macaddr = of_get_mac_address(dn); if (!macaddr || !is_valid_ether_addr(macaddr)) { dev_warn(&pdev->dev, "using random Ethernet MAC\n"); - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); } else { ether_addr_copy(dev->dev_addr, macaddr); } From 5a0266af10246c84a6c712f365788800c7e23fd4 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Wed, 8 Jul 2015 14:35:22 +0530 Subject: [PATCH 793/839] drivers: net: cpsw: fix disabling of tx interrupt in rx isr In commit 'c03abd84634d ("net: ethernet: cpsw: don't requests IRQs we don't use")', common isr is split into tx and rx, but in rx isr tx interrupt is also disabledi in cpsw_disable_irq(). So tx interrupts are not handled during rx interrupts and rx napi completion and results in poor tx performance by 40Mbps. Fixing by disabling only rx interrupt in rx isr. Cc: Felipe Balbi Cc: # v4.0+ Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index e778703880d3..f335bf119ab5 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -138,19 +138,6 @@ do { \ #define CPSW_CMINTMAX_INTVL (1000 / CPSW_CMINTMIN_CNT) #define CPSW_CMINTMIN_INTVL ((1000 / CPSW_CMINTMAX_CNT) + 1) -#define cpsw_enable_irq(priv) \ - do { \ - u32 i; \ - for (i = 0; i < priv->num_irqs; i++) \ - enable_irq(priv->irqs_table[i]); \ - } while (0) -#define cpsw_disable_irq(priv) \ - do { \ - u32 i; \ - for (i = 0; i < priv->num_irqs; i++) \ - disable_irq_nosync(priv->irqs_table[i]); \ - } while (0) - #define cpsw_slave_index(priv) \ ((priv->data.dual_emac) ? priv->emac_port : \ priv->data.active_slave) @@ -783,7 +770,7 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) cpsw_intr_disable(priv); if (priv->irq_enabled == true) { - cpsw_disable_irq(priv); + disable_irq_nosync(priv->irqs_table[0]); priv->irq_enabled = false; } @@ -819,7 +806,7 @@ static int cpsw_poll(struct napi_struct *napi, int budget) prim_cpsw = cpsw_get_slave_priv(priv, 0); if (prim_cpsw->irq_enabled == false) { prim_cpsw->irq_enabled = true; - cpsw_enable_irq(priv); + enable_irq(priv->irqs_table[0]); } } @@ -1335,7 +1322,7 @@ static int cpsw_ndo_open(struct net_device *ndev) if (prim_cpsw->irq_enabled == false) { if ((priv == prim_cpsw) || !netif_running(prim_cpsw->ndev)) { prim_cpsw->irq_enabled = true; - cpsw_enable_irq(prim_cpsw); + enable_irq(prim_cpsw->irqs_table[0]); } } From 4a0e3e989d66bb7204b163d9cfaa7fa96d0f2023 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Wed, 8 Jul 2015 13:05:57 +0200 Subject: [PATCH 794/839] cdc_ncm: Add support for moving NDP to end of NCM frame NCM specs are not actually mandating a specific position in the frame for the NDP (Network Datagram Pointer). However, some Huawei devices will ignore our aggregates if it is not placed after the datagrams it points to. Add support for doing just this, in a per-device configurable way. While at it, update NCM subdrivers, disabling this functionality in all of them, except in huawei_cdc_ncm where it is enabled instead. We aren't making any distinction between different Huawei NCM devices, based on what the vendor driver does. Standard NCM devices are left unaffected: if they are compliant, they should be always usable, still stay on the safe side. This change has been tested and working with a Huawei E3131 device (which works regardless of NDP position), a Huawei E3531 (also working both ways) and an E3372 (which mandates NDP to be after indexed datagrams). V1->V2: - corrected wrong NDP acronym definition - fixed possible NULL pointer dereference - patch cleanup V2->V3: - Properly account for the NDP size when writing new packets to SKB Signed-off-by: Enrico Mioso Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 2 +- drivers/net/usb/cdc_ncm.c | 61 ++++++++++++++++++++++++++++---- drivers/net/usb/huawei_cdc_ncm.c | 7 ++-- include/linux/usb/cdc_ncm.h | 7 +++- 4 files changed, 67 insertions(+), 10 deletions(-) diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index e4b7a47a825c..efc18e05af0a 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -158,7 +158,7 @@ static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf) if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) goto err; - ret = cdc_ncm_bind_common(dev, intf, data_altsetting); + ret = cdc_ncm_bind_common(dev, intf, data_altsetting, 0); if (ret) goto err; diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 8067b8fbb0ee..1991e4a24657 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -684,10 +684,12 @@ static void cdc_ncm_free(struct cdc_ncm_ctx *ctx) ctx->tx_curr_skb = NULL; } + kfree(ctx->delayed_ndp16); + kfree(ctx); } -int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting) +int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags) { const struct usb_cdc_union_desc *union_desc = NULL; struct cdc_ncm_ctx *ctx; @@ -855,6 +857,17 @@ advance: /* finish setting up the device specific data */ cdc_ncm_setup(dev); + /* Device-specific flags */ + ctx->drvflags = drvflags; + + /* Allocate the delayed NDP if needed. */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) { + ctx->delayed_ndp16 = kzalloc(ctx->max_ndp_size, GFP_KERNEL); + if (!ctx->delayed_ndp16) + goto error2; + dev_info(&intf->dev, "NDP will be placed at end of frame for this device."); + } + /* override ethtool_ops */ dev->net->ethtool_ops = &cdc_ncm_ethtool_ops; @@ -954,8 +967,11 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM) return -ENODEV; - /* The NCM data altsetting is fixed */ - ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM); + /* The NCM data altsetting is fixed, so we hard-coded it. + * Additionally, generic NCM devices are assumed to accept arbitrarily + * placed NDP. + */ + ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0); /* * We should get an event when network connection is "connected" or @@ -986,6 +1002,14 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ struct usb_cdc_ncm_nth16 *nth16 = (void *)skb->data; size_t ndpoffset = le16_to_cpu(nth16->wNdpIndex); + /* If NDP should be moved to the end of the NCM package, we can't follow the + * NTH16 header as we would normally do. NDP isn't written to the SKB yet, and + * the wNdpIndex field in the header is actually not consistent with reality. It will be later. + */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) + if (ctx->delayed_ndp16->dwSignature == sign) + return ctx->delayed_ndp16; + /* follow the chain of NDPs, looking for a match */ while (ndpoffset) { ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb->data + ndpoffset); @@ -995,7 +1019,8 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ } /* align new NDP */ - cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max); + if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) + cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max); /* verify that there is room for the NDP and the datagram (reserve) */ if ((ctx->tx_max - skb->len - reserve) < ctx->max_ndp_size) @@ -1008,7 +1033,11 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ nth16->wNdpIndex = cpu_to_le16(skb->len); /* push a new empty NDP */ - ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, ctx->max_ndp_size), 0, ctx->max_ndp_size); + if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) + ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, ctx->max_ndp_size), 0, ctx->max_ndp_size); + else + ndp16 = ctx->delayed_ndp16; + ndp16->dwSignature = sign; ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + sizeof(struct usb_cdc_ncm_dpe16)); return ndp16; @@ -1023,6 +1052,15 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) struct sk_buff *skb_out; u16 n = 0, index, ndplen; u8 ready2send = 0; + u32 delayed_ndp_size; + + /* When our NDP gets written in cdc_ncm_ndp(), then skb_out->len gets updated + * accordingly. Otherwise, we should check here. + */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) + delayed_ndp_size = ctx->max_ndp_size; + else + delayed_ndp_size = 0; /* if there is a remaining skb, it gets priority */ if (skb != NULL) { @@ -1077,7 +1115,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) cdc_ncm_align_tail(skb_out, ctx->tx_modulus, ctx->tx_remainder, ctx->tx_max); /* check if we had enough room left for both NDP and frame */ - if (!ndp16 || skb_out->len + skb->len > ctx->tx_max) { + if (!ndp16 || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_max) { if (n == 0) { /* won't fit, MTU problem? */ dev_kfree_skb_any(skb); @@ -1150,6 +1188,17 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* variables will be reset at next call */ } + /* If requested, put NDP at end of frame. */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) { + nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data; + cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_max); + nth16->wNdpIndex = cpu_to_le16(skb_out->len); + memcpy(skb_put(skb_out, ctx->max_ndp_size), ctx->delayed_ndp16, ctx->max_ndp_size); + + /* Zero out delayed NDP - signature checking will naturally fail. */ + ndp16 = memset(ctx->delayed_ndp16, 0, ctx->max_ndp_size); + } + /* If collected data size is less or equal ctx->min_tx_pkt * bytes, we send buffers as it is. If we get more data, it * would be more efficient for USB HS mobile device with DMA diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c index 735f7dadb9a0..2680a65cd5e4 100644 --- a/drivers/net/usb/huawei_cdc_ncm.c +++ b/drivers/net/usb/huawei_cdc_ncm.c @@ -73,11 +73,14 @@ static int huawei_cdc_ncm_bind(struct usbnet *usbnet_dev, struct usb_driver *subdriver = ERR_PTR(-ENODEV); int ret = -ENODEV; struct huawei_cdc_ncm_state *drvstate = (void *)&usbnet_dev->data; + int drvflags = 0; /* altsetting should always be 1 for NCM devices - so we hard-coded - * it here + * it here. Some huawei devices will need the NDP part of the NCM package to + * be at the end of the frame. */ - ret = cdc_ncm_bind_common(usbnet_dev, intf, 1); + drvflags |= CDC_NCM_FLAG_NDP_TO_END; + ret = cdc_ncm_bind_common(usbnet_dev, intf, 1, drvflags); if (ret) goto err; diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 7c9b484735c5..1f6526c76ee8 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -80,6 +80,9 @@ #define CDC_NCM_TIMER_INTERVAL_MIN 5UL #define CDC_NCM_TIMER_INTERVAL_MAX (U32_MAX / NSEC_PER_USEC) +/* Driver flags */ +#define CDC_NCM_FLAG_NDP_TO_END 0x02 /* NDP is placed at end of frame */ + #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) #define cdc_ncm_data_intf_is_mbim(x) ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB) @@ -103,9 +106,11 @@ struct cdc_ncm_ctx { spinlock_t mtx; atomic_t stop; + int drvflags; u32 timer_interval; u32 max_ndp_size; + struct usb_cdc_ncm_ndp16 *delayed_ndp16; u32 tx_timer_pending; u32 tx_curr_frame_num; @@ -133,7 +138,7 @@ struct cdc_ncm_ctx { }; u8 cdc_ncm_select_altsetting(struct usb_interface *intf); -int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); +int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags); void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); From fecdf8be2d91e04b0a9a4f79ff06499a36f5d14f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Jul 2015 21:42:11 +0200 Subject: [PATCH 795/839] net: pktgen: fix race between pktgen_thread_worker() and kthread_stop() pktgen_thread_worker() is obviously racy, kthread_stop() can come between the kthread_should_stop() check and set_current_state(). Signed-off-by: Oleg Nesterov Reported-by: Jan Stancek Reported-by: Marcelo Leitner Signed-off-by: David S. Miller --- net/core/pktgen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 05badbb58865..41489e3c69af 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3572,8 +3572,10 @@ static int pktgen_thread_worker(void *arg) pktgen_rem_thread(t); /* Wait for kthread_stop */ - while (!kthread_should_stop()) { + for (;;) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; schedule(); } __set_current_state(TASK_RUNNING); From 1fbe4b46caca5b01b070af93d513031ffbcc480c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Jul 2015 21:42:13 +0200 Subject: [PATCH 796/839] net: pktgen: kill the "Wait for kthread_stop" code in pktgen_thread_worker() pktgen_thread_worker() doesn't need to wait for kthread_stop(), it can simply exit. Just pktgen_create_thread() and pg_net_exit() should do get_task_struct()/put_task_struct(). kthread_stop(dead_thread) is fine. Signed-off-by: Oleg Nesterov Signed-off-by: David S. Miller --- net/core/pktgen.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 41489e3c69af..1ebdf1c0d118 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3571,15 +3571,6 @@ static int pktgen_thread_worker(void *arg) pr_debug("%s removing thread\n", t->tsk->comm); pktgen_rem_thread(t); - /* Wait for kthread_stop */ - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) - break; - schedule(); - } - __set_current_state(TASK_RUNNING); - return 0; } @@ -3771,6 +3762,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) } t->net = pn; + get_task_struct(p); wake_up_process(p); wait_for_completion(&t->start_done); @@ -3893,6 +3885,7 @@ static void __net_exit pg_net_exit(struct net *net) t = list_entry(q, struct pktgen_thread, th_list); list_del(&t->th_list); kthread_stop(t->tsk); + put_task_struct(t->tsk); kfree(t); } From 2c069a118fe1d80c47dca84e1561045fc7f3cc9e Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 10 Jul 2015 09:04:25 +1000 Subject: [PATCH 797/839] cxl: Check if afu is not null in cxl_slbia The pointer to an AFU in the adapter's list of AFUs can be null if we're in the process of removing AFUs. The afu_list_lock doesn't guard against this. Say we have 2 slices, and we're in the process of removing cxl. - We remove the AFUs in order (see cxl_remove). In cxl_remove_afu for AFU 0, we take the lock, set adapter->afu[0] = NULL, and release the lock. - Then we get an slbia. In cxl_slbia we take the lock, and set afu = adapter->afu[0], which is NULL. - Therefore our attempt to check afu->enabled will blow up. Therefore, check if afu is a null pointer before dereferencing it. Cc: stable@vger.kernel.org Signed-off-by: Daniel Axtens Acked-by: Michael Neuling Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 833348e2c9cb..4a164ab8b35a 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -73,7 +73,7 @@ static inline void cxl_slbia_core(struct mm_struct *mm) spin_lock(&adapter->afu_list_lock); for (slice = 0; slice < adapter->slices; slice++) { afu = adapter->afu[slice]; - if (!afu->enabled) + if (!afu || !afu->enabled) continue; rcu_read_lock(); idr_for_each_entry(&afu->contexts_idr, ctx, id) From fcc028c106e5750aa66dc43595b3f29f88801ff0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 9 Jul 2015 22:21:20 +0900 Subject: [PATCH 798/839] net: axienet: Fix devm_ioremap_resource return value check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Value returned by devm_ioremap_resource() was checked for non-NULL but devm_ioremap_resource() returns IOMEM_ERR_PTR, not NULL. In case of error this could lead to dereference of ERR_PTR. Signed-off-by: Krzysztof Kozlowski Cc: Fixes: 46aa27df8853 ("net: axienet: Use devm_* calls") Reviewed-by: Sören Brinkmann Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 4208dd7ef101..d95f9aae95e7 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1530,9 +1530,9 @@ static int axienet_probe(struct platform_device *pdev) /* Map device registers */ ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0); lp->regs = devm_ioremap_resource(&pdev->dev, ethres); - if (!lp->regs) { + if (IS_ERR(lp->regs)) { dev_err(&pdev->dev, "could not map Axi Ethernet regs.\n"); - ret = -ENOMEM; + ret = PTR_ERR(lp->regs); goto free_netdev; } @@ -1599,9 +1599,9 @@ static int axienet_probe(struct platform_device *pdev) goto free_netdev; } lp->dma_regs = devm_ioremap_resource(&pdev->dev, &dmares); - if (!lp->dma_regs) { + if (IS_ERR(lp->dma_regs)) { dev_err(&pdev->dev, "could not map DMA regs\n"); - ret = -ENOMEM; + ret = PTR_ERR(lp->dma_regs); goto free_netdev; } lp->rx_irq = irq_of_parse_and_map(np, 1); From a7d35f9d73e9ffa74a02304b817e579eec632f67 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Jul 2015 18:56:07 +0200 Subject: [PATCH 799/839] bridge: fix potential crash in __netdev_pick_tx() Commit c29390c6dfee ("xps: must clear sender_cpu before forwarding") fixed an issue in normal forward path, caused by sender_cpu & napi_id skb fields being an union. Bridge is another point where skb can be forwarded, so we need the same cure. Bug triggers if packet was received on a NIC using skb_mark_napi_id() Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices") Signed-off-by: Eric Dumazet Reported-by: Bob Liu Tested-by: Bob Liu Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index e97572b5d2cc..0ff6e1bbca91 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -42,6 +42,7 @@ int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) } else { skb_push(skb, ETH_HLEN); br_drop_fake_rtable(skb); + skb_sender_cpu_clear(skb); dev_queue_xmit(skb); } From cccf34e9411c41b0cbfb41980fe55fc8e7c98fd2 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Fri, 10 Jul 2015 09:29:10 +0100 Subject: [PATCH 800/839] MIPS: c-r4k: Fix cache flushing for MT cores MT_SMP is not the only SMP option for MT cores. The MT_SMP option allows more than one VPE per core to appear as a secondary CPU in the system. Because of how CM works, it propagates the address-based cache ops to the secondary cores but not the index-based ones. Because of that, the code does not use IPIs to flush the L1 caches on secondary cores because the CM would have done that already. However, the CM functionality is independent of the type of SMP kernel so even in non-MT kernels, IPIs are not necessary. As a result of which, we change the conditional to depend on the CM presence. Moreover, since VPEs on the same core share the same L1 caches, there is no need to send an IPI on all of them so we calculate a suitable cpumask with only one VPE per core. Signed-off-by: Markos Chandras Cc: # 3.15+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10654/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/smp.h | 1 + arch/mips/kernel/smp.c | 44 ++++++++++++++++++++++++++++++++++++- arch/mips/mm/c-r4k.c | 14 +++++++++--- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 2b25d1ba1ea0..16f1ea9ab191 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -23,6 +23,7 @@ extern int smp_num_siblings; extern cpumask_t cpu_sibling_map[]; extern cpumask_t cpu_core_map[]; +extern cpumask_t cpu_foreign_map; #define raw_smp_processor_id() (current_thread_info()->cpu) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index faa46ebd9dda..d0744cc77ea7 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -63,6 +63,13 @@ EXPORT_SYMBOL(cpu_sibling_map); cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); +/* + * A logcal cpu mask containing only one VPE per core to + * reduce the number of IPIs on large MT systems. + */ +cpumask_t cpu_foreign_map __read_mostly; +EXPORT_SYMBOL(cpu_foreign_map); + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -103,6 +110,29 @@ static inline void set_cpu_core_map(int cpu) } } +/* + * Calculate a new cpu_foreign_map mask whenever a + * new cpu appears or disappears. + */ +static inline void calculate_cpu_foreign_map(void) +{ + int i, k, core_present; + cpumask_t temp_foreign_map; + + /* Re-calculate the mask */ + for_each_online_cpu(i) { + core_present = 0; + for_each_cpu(k, &temp_foreign_map) + if (cpu_data[i].package == cpu_data[k].package && + cpu_data[i].core == cpu_data[k].core) + core_present = 1; + if (!core_present) + cpumask_set_cpu(i, &temp_foreign_map); + } + + cpumask_copy(&cpu_foreign_map, &temp_foreign_map); +} + struct plat_smp_ops *mp_ops; EXPORT_SYMBOL(mp_ops); @@ -146,6 +176,8 @@ asmlinkage void start_secondary(void) set_cpu_sibling_map(cpu); set_cpu_core_map(cpu); + calculate_cpu_foreign_map(); + cpumask_set_cpu(cpu, &cpu_callin_map); synchronise_count_slave(cpu); @@ -173,9 +205,18 @@ void __irq_entry smp_call_function_interrupt(void) static void stop_this_cpu(void *dummy) { /* - * Remove this CPU: + * Remove this CPU. Be a bit slow here and + * set the bits for every online CPU so we don't miss + * any IPI whilst taking this VPE down. */ + + cpumask_copy(&cpu_foreign_map, cpu_online_mask); + + /* Make it visible to every other CPU */ + smp_mb(); + set_cpu_online(smp_processor_id(), false); + calculate_cpu_foreign_map(); local_irq_disable(); while (1); } @@ -197,6 +238,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) mp_ops->prepare_cpus(max_cpus); set_cpu_sibling_map(0); set_cpu_core_map(0); + calculate_cpu_foreign_map(); #ifndef CONFIG_HOTPLUG_CPU init_cpu_present(cpu_possible_mask); #endif diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 7f660dc67596..a5974ddba44b 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -37,6 +37,7 @@ #include /* for run_uncached() */ #include #include +#include /* * Special Variant of smp_call_function for use by cache functions: @@ -51,9 +52,16 @@ static inline void r4k_on_each_cpu(void (*func) (void *info), void *info) { preempt_disable(); -#ifndef CONFIG_MIPS_MT_SMP - smp_call_function(func, info, 1); -#endif + /* + * The Coherent Manager propagates address-based cache ops to other + * cores but not index-based ops. However, r4k_on_each_cpu is used + * in both cases so there is no easy way to tell what kind of op is + * executed to the other cores. The best we can probably do is + * to restrict that call when a CM is not present because both + * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops. + */ + if (!mips_cm_present()) + smp_call_function_many(&cpu_foreign_map, func, info, 1); func(info); preempt_enable(); } From 6249ecbbb75cd635025cc681fcf51fb8659edbab Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 17 Apr 2015 10:44:15 +0100 Subject: [PATCH 801/839] MIPS: Malta: Make GIC FDC IRQ workaround Malta specific Wider testing reveals that the Fast Debug Channel (FDC) interrupt is routed through the GIC just fine on Pistachio SoC, even though it contains interAptiv cores. Clearly the FDC interrupt routing problems previously observed on interAptiv and proAptiv cores are specific to the Malta FPGA bitstreams. Move the workaround for interAptiv and proAptiv out of gic_get_c0_fdc_int() in the GIC irqchip driver into Malta's get_c0_fdc_int() platform callback, to allow the Pistachio SoC to use the FDC interrupt. Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Andrew Bresticker Cc: Thomas Gleixner Cc: Jason Cooper Cc: linux-mips@linux-mips.org Reviewed-by: Andrew Bresticker Cc: James Hartley Patchwork: http://patchwork.linux-mips.org/patch/9748/ Signed-off-by: Ralf Baechle --- arch/mips/mti-malta/malta-time.c | 20 +++++++++++++------- drivers/irqchip/irq-mips-gic.c | 10 ---------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 185e68261f45..5625b190edc0 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -119,18 +119,24 @@ void read_persistent_clock(struct timespec *ts) int get_c0_fdc_int(void) { - int mips_cpu_fdc_irq; + /* + * Some cores claim the FDC is routable through the GIC, but it doesn't + * actually seem to be connected for those Malta bitstreams. + */ + switch (current_cpu_type()) { + case CPU_INTERAPTIV: + case CPU_PROAPTIV: + return -1; + }; if (cpu_has_veic) - mips_cpu_fdc_irq = -1; + return -1; else if (gic_present) - mips_cpu_fdc_irq = gic_get_c0_fdc_int(); + return gic_get_c0_fdc_int(); else if (cp0_fdc_irq >= 0) - mips_cpu_fdc_irq = MIPS_CPU_IRQ_BASE + cp0_fdc_irq; + return MIPS_CPU_IRQ_BASE + cp0_fdc_irq; else - mips_cpu_fdc_irq = -1; - - return mips_cpu_fdc_irq; + return -1; } int get_c0_perfcount_int(void) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 4400edd1a6c7..b7d54d428b5e 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -257,16 +257,6 @@ int gic_get_c0_fdc_int(void) return MIPS_CPU_IRQ_BASE + cp0_fdc_irq; } - /* - * Some cores claim the FDC is routable but it doesn't actually seem to - * be connected. - */ - switch (current_cpu_type()) { - case CPU_INTERAPTIV: - case CPU_PROAPTIV: - return -1; - } - return irq_create_mapping(gic_irq_domain, GIC_LOCAL_TO_HWIRQ(GIC_LOCAL_INT_FDC)); } From 6b5e741e9a834a8cf2d5b895319045ab17ad37fe Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 17 Apr 2015 10:44:16 +0100 Subject: [PATCH 802/839] MIPS: Pistachio: Support CDMM & Fast Debug Channel Implement the mips_cdmm_phys_base() platform callback to provide a default Common Device Memory Map (CDMM) physical base address for the Pistachio SoC. This allows the CDMM in each VPE to be configured and probed for devices, such as the Fast Debug Channel (FDC). The physical address chosen is just below the default CPC address, which appears to also be unallocated. The FDC IRQ is also usable on Pistachio, and is routed through the GIC, so implement the get_c0_fdc_int() platform callback using gic_get_c0_fdc_int(), so the FDC driver doesn't have to fall back to polling. Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Andrew Bresticker Cc: James Hartley Cc: linux-mips@linux-mips.org Reviewed-by: Andrew Bresticker Patchwork: http://patchwork.linux-mips.org/patch/9749/ Signed-off-by: Ralf Baechle --- arch/mips/pistachio/init.c | 8 +++++++- arch/mips/pistachio/time.c | 5 +++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c index d2dc836523a3..8bd8ebb20a72 100644 --- a/arch/mips/pistachio/init.c +++ b/arch/mips/pistachio/init.c @@ -63,13 +63,19 @@ void __init plat_mem_setup(void) plat_setup_iocoherency(); } -#define DEFAULT_CPC_BASE_ADDR 0x1bde0000 +#define DEFAULT_CPC_BASE_ADDR 0x1bde0000 +#define DEFAULT_CDMM_BASE_ADDR 0x1bdd0000 phys_addr_t mips_cpc_default_phys_base(void) { return DEFAULT_CPC_BASE_ADDR; } +phys_addr_t mips_cdmm_phys_base(void) +{ + return DEFAULT_CDMM_BASE_ADDR; +} + static void __init mips_nmi_setup(void) { void *base; diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c index 67889fcea8aa..7c73fcb92a10 100644 --- a/arch/mips/pistachio/time.c +++ b/arch/mips/pistachio/time.c @@ -27,6 +27,11 @@ int get_c0_perfcount_int(void) return gic_get_c0_perfcount_int(); } +int get_c0_fdc_int(void) +{ + return gic_get_c0_fdc_int(); +} + void __init plat_time_init(void) { struct device_node *np; From 1e18ac7aeaec357048172695b1fbb461205b166f Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 9 Jul 2015 10:40:41 +0100 Subject: [PATCH 803/839] MIPS: c-r4k: Extend way_string array The L2 cache in the I6400 core has 16 ways, so extend the way_string array to take such caches into account. [ralf@linux-mips.org: Other already supported CPUs are free to support more than 8 ways of cache as well.] Signed-off-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10640/ Signed-off-by: Ralf Baechle --- arch/mips/mm/c-r4k.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index a5974ddba44b..fbea4432f3f2 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -945,7 +945,9 @@ static void b5k_instruction_hazard(void) } static char *way_string[] = { NULL, "direct mapped", "2-way", - "3-way", "4-way", "5-way", "6-way", "7-way", "8-way" + "3-way", "4-way", "5-way", "6-way", "7-way", "8-way", + "9-way", "10-way", "11-way", "12-way", + "13-way", "14-way", "15-way", "16-way", }; static void probe_pcache(void) From 51d53674c3fff11e9231e66ca8616c65deadfb6c Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 9 Jul 2015 18:02:51 +0200 Subject: [PATCH 804/839] MIPS: O32: Use compat_sys_getsockopt. We were using the native syscall and that results in subtle breakage. This is the same issue as fixed in 077d0e65618f27b2199d622e12ada6d8f3dbd862 (MIPS: N32: Use compat getsockopt syscall) but that commit did fix it only for N32. Signed-off-by: Ralf Baechle Link: https://bugzilla.kernel.org/show_bug.cgi?id=100291 --- arch/mips/kernel/scall64-o32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 66d618bb2fa2..f543ff4feef9 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -400,7 +400,7 @@ EXPORT(sys32_call_table) PTR sys_connect /* 4170 */ PTR sys_getpeername PTR sys_getsockname - PTR sys_getsockopt + PTR compat_sys_getsockopt PTR sys_listen PTR compat_sys_recv /* 4175 */ PTR compat_sys_recvfrom From ad2daa85bd50b7ff5c851b80a0b813bdc8d14f8e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 10 Jul 2015 15:46:32 +0100 Subject: [PATCH 805/839] arm64: entry32: remove pointless register assignment We currently set x27 in compat_sys_sigreturn_wrapper and compat_sys_rt_sigreturn_wrapper, similarly to what we do with r8/why on 32-bit ARM, in an attempt to prevent sigreturns from being restarted. However, on arm64 we have always used pt_regs::syscallno for syscall restarting (for both native and compat tasks), and x27 is never inspected again before being overwritten in kernel_exit. This patch removes the pointless register assignments. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry32.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S index bd9bfaa9269b..f332d5d1f6b4 100644 --- a/arch/arm64/kernel/entry32.S +++ b/arch/arm64/kernel/entry32.S @@ -32,13 +32,11 @@ ENTRY(compat_sys_sigreturn_wrapper) mov x0, sp - mov x27, #0 // prevent syscall restart handling (why) b compat_sys_sigreturn ENDPROC(compat_sys_sigreturn_wrapper) ENTRY(compat_sys_rt_sigreturn_wrapper) mov x0, sp - mov x27, #0 // prevent syscall restart handling (why) b compat_sys_rt_sigreturn ENDPROC(compat_sys_rt_sigreturn_wrapper) From dbf3c370862d73fcd2c74ca55e254bb02143238d Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 10 Jul 2015 10:11:07 -0700 Subject: [PATCH 806/839] Revert "Input: synaptics - allocate 3 slots to keep stability in image sensors" This reverts commit 63c4fda3c0bb841b1aad1298fc7fe94058fc79f8 as it causes issues with detecting 3-finger taps. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100481 Cc: stable@vger.kernel.org Acked-by: Benjamin Tissoires --- drivers/input/mouse/synaptics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 35c8d0ceabee..3a32caf06bf1 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1199,7 +1199,7 @@ static void set_input_params(struct psmouse *psmouse, ABS_MT_POSITION_Y); /* Image sensors can report per-contact pressure */ input_set_abs_params(dev, ABS_MT_PRESSURE, 0, 255, 0, 0); - input_mt_init_slots(dev, 3, INPUT_MT_POINTER | INPUT_MT_TRACK); + input_mt_init_slots(dev, 2, INPUT_MT_POINTER | INPUT_MT_TRACK); /* Image sensors can signal 4 and 5 finger clicks */ __set_bit(BTN_TOOL_QUADTAP, dev->keybit); From b864bc17f1c326783f2388057e15d3e153125ab9 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 10 Jul 2015 11:06:10 -0600 Subject: [PATCH 807/839] pmem: add maintainer for include/linux/pmem.h The file include/linux/pmem.h was recently created to hold the PMEM API, and is logically part of the PMEM driver. Add an entry for this file to MAINTAINERS. Signed-off-by: Ross Zwisler Signed-off-by: Dan Williams --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0e6b09150aad..a8306b25c13c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6134,6 +6134,7 @@ L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported F: drivers/nvdimm/pmem.c +F: include/linux/pmem.h LINUX FOR IBM pSERIES (RS/6000) M: Paul Mackerras From b1b2e6235a44174151fa3bb22657f74972635618 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Jul 2015 11:06:11 -0600 Subject: [PATCH 808/839] tools/testing/nvdimm: mock ioremap_wt In the 4.2-rc1 merge the default_memremap_pmem() implementation switched from ioremap_nocache() to ioremap_wt(). Add it to the list of mocked routines to restore the ability to run the unit tests. Signed-off-by: Dan Williams --- tools/testing/nvdimm/Kbuild | 1 + tools/testing/nvdimm/test/iomap.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 8e9b64520ec1..8e020601c773 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -1,3 +1,4 @@ +ldflags-y += --wrap=ioremap_wt ldflags-y += --wrap=ioremap_cache ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index c85a6f6ba559..9f21b150396b 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -77,6 +77,12 @@ void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) } EXPORT_SYMBOL(__wrap_ioremap_nocache); +void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_wt); +} +EXPORT_SYMBOL(__wrap_ioremap_wt); + void __wrap_iounmap(volatile void __iomem *addr) { struct nfit_test_resource *nfit_res; From f7ec83684af020c961d7fab801f8e3ef7ce5de33 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Jul 2015 11:06:12 -0600 Subject: [PATCH 809/839] tools/testing/nvdimm: fix return code for unimplemented commands The implementation for the new "DIMM Flags" DSM relies on the -ENOTTY return code to indicate that the flags are unimplimented and to fall back to a safe default. As is the -ENXIO error code erroneoously indicates to fail enabling a BLK region. Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 4b69b8368de0..092d4724fe16 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -155,7 +155,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, int i, rc; if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask)) - return -ENXIO; + return -ENOTTY; /* lookup label space for the given dimm */ for (i = 0; i < ARRAY_SIZE(handle); i++) From 9d27a87ec9e1317d368b1e5e3f4808078baa8c4c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Jul 2015 14:07:03 -0400 Subject: [PATCH 810/839] tools/testing/nvdimm: add mock acpi_nfit_flush_address entries to nfit_test In preparation for fixing the BLK path to properly use "directed pcommit" enable the unit test infrastructure to emit mock "flush" tables. Writes to these flush addresses trigger a memory controller to flush its internal buffers to persistent media, similar to the x86 "pcommit" instruction. Signed-off-by: Dan Williams --- tools/testing/nvdimm/Kbuild | 2 ++ tools/testing/nvdimm/test/iomap.c | 21 +++++++++++++ tools/testing/nvdimm/test/nfit.c | 50 +++++++++++++++++++++++++++++-- 3 files changed, 71 insertions(+), 2 deletions(-) diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 8e020601c773..f56914c7929b 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -1,4 +1,6 @@ ldflags-y += --wrap=ioremap_wt +ldflags-y += --wrap=ioremap_wc +ldflags-y += --wrap=devm_ioremap_nocache ldflags-y += --wrap=ioremap_cache ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 9f21b150396b..64bfaa50831c 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -65,6 +65,21 @@ void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, return fallback_fn(offset, size); } +void __iomem *__wrap_devm_ioremap_nocache(struct device *dev, + resource_size_t offset, unsigned long size) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res(offset); + rcu_read_unlock(); + if (nfit_res) + return (void __iomem *) nfit_res->buf + offset + - nfit_res->res->start; + return devm_ioremap_nocache(dev, offset, size); +} +EXPORT_SYMBOL(__wrap_devm_ioremap_nocache); + void __iomem *__wrap_ioremap_cache(resource_size_t offset, unsigned long size) { return __nfit_test_ioremap(offset, size, ioremap_cache); @@ -83,6 +98,12 @@ void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size) } EXPORT_SYMBOL(__wrap_ioremap_wt); +void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_wc); +} +EXPORT_SYMBOL(__wrap_ioremap_wc); + void __wrap_iounmap(volatile void __iomem *addr) { struct nfit_test_resource *nfit_res; diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 092d4724fe16..d0bdae40ccc9 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -128,6 +128,8 @@ struct nfit_test { int num_pm; void **dimm; dma_addr_t *dimm_dma; + void **flush; + dma_addr_t *flush_dma; void **label; dma_addr_t *label_dma; void **spa_set; @@ -331,7 +333,8 @@ static int nfit_test0_alloc(struct nfit_test *t) + sizeof(struct acpi_nfit_system_address) * NUM_SPA + sizeof(struct acpi_nfit_memory_map) * NUM_MEM + sizeof(struct acpi_nfit_control_region) * NUM_DCR - + sizeof(struct acpi_nfit_data_region) * NUM_BDW; + + sizeof(struct acpi_nfit_data_region) * NUM_BDW + + sizeof(struct acpi_nfit_flush_address) * NUM_DCR; int i; t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); @@ -356,6 +359,10 @@ static int nfit_test0_alloc(struct nfit_test *t) if (!t->label[i]) return -ENOMEM; sprintf(t->label[i], "label%d", i); + + t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]); + if (!t->flush[i]) + return -ENOMEM; } for (i = 0; i < NUM_DCR; i++) { @@ -408,6 +415,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_system_address *spa; struct acpi_nfit_control_region *dcr; struct acpi_nfit_data_region *bdw; + struct acpi_nfit_flush_address *flush; unsigned int offset; nfit_test_init_header(nfit_buf, size); @@ -831,6 +839,39 @@ static void nfit_test0_setup(struct nfit_test *t) bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset = offset + sizeof(struct acpi_nfit_data_region) * 4; + /* flush0 (dimm0) */ + flush = nfit_buf + offset; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[0]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[0]; + + /* flush1 (dimm1) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[1]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[1]; + + /* flush2 (dimm2) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[2]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[2]; + + /* flush3 (dimm3) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[3]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[3]; + acpi_desc = &t->acpi_desc; set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); @@ -933,6 +974,10 @@ static int nfit_test_probe(struct platform_device *pdev) GFP_KERNEL); nfit_test->dimm_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), GFP_KERNEL); + nfit_test->flush = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->flush_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), + GFP_KERNEL); nfit_test->label = devm_kcalloc(dev, num, sizeof(void *), GFP_KERNEL); nfit_test->label_dma = devm_kcalloc(dev, num, @@ -943,7 +988,8 @@ static int nfit_test_probe(struct platform_device *pdev) sizeof(dma_addr_t), GFP_KERNEL); if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label && nfit_test->label_dma && nfit_test->dcr - && nfit_test->dcr_dma) + && nfit_test->dcr_dma && nfit_test->flush + && nfit_test->flush_dma) /* pass */; else return -ENOMEM; From c2ad29540cb913bd9e526fae77c35c7fb45f24a3 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 10 Jul 2015 11:06:13 -0600 Subject: [PATCH 811/839] nfit: update block I/O path to use PMEM API Update the nfit block I/O path to use the new PMEM API and to adhere to the read/write flows outlined in the "NVDIMM Block Window Driver Writer's Guide": http://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf This includes adding support for targeted NVDIMM flushes called "flush hints" in the ACPI 6.0 specification: http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf For performance and media durability the mapping for a BLK aperture is moved to a write-combining mapping which is consistent with memcpy_to_pmem() and wmb_blk(). Signed-off-by: Ross Zwisler Acked-by: Rafael J. Wysocki Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 97 ++++++++++++++++++++++++++++++++++++++++----- drivers/acpi/nfit.h | 15 ++++++- 2 files changed, 100 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index a20b7c883ca0..11aa513f2fda 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "nfit.h" @@ -305,6 +306,23 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc, return true; } +static bool add_flush(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_flush_address *flush) +{ + struct device *dev = acpi_desc->dev; + struct nfit_flush *nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), + GFP_KERNEL); + + if (!nfit_flush) + return false; + INIT_LIST_HEAD(&nfit_flush->list); + nfit_flush->flush = flush; + list_add_tail(&nfit_flush->list, &acpi_desc->flushes); + dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__, + flush->device_handle, flush->hint_count); + return true; +} + static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table, const void *end) { @@ -338,7 +356,8 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table, return err; break; case ACPI_NFIT_TYPE_FLUSH_ADDRESS: - dev_dbg(dev, "%s: flush\n", __func__); + if (!add_flush(acpi_desc, table)) + return err; break; case ACPI_NFIT_TYPE_SMBIOS: dev_dbg(dev, "%s: smbios\n", __func__); @@ -389,6 +408,7 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, { u16 dcr = __to_nfit_memdev(nfit_mem)->region_index; struct nfit_memdev *nfit_memdev; + struct nfit_flush *nfit_flush; struct nfit_dcr *nfit_dcr; struct nfit_bdw *nfit_bdw; struct nfit_idt *nfit_idt; @@ -442,6 +462,14 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, nfit_mem->idt_bdw = nfit_idt->idt; break; } + + list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) { + if (nfit_flush->flush->device_handle != + nfit_memdev->memdev->device_handle) + continue; + nfit_mem->nfit_flush = nfit_flush; + break; + } break; } @@ -978,6 +1006,24 @@ static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio) return mmio->base_offset + line_offset + table_offset + sub_line_offset; } +static void wmb_blk(struct nfit_blk *nfit_blk) +{ + + if (nfit_blk->nvdimm_flush) { + /* + * The first wmb() is needed to 'sfence' all previous writes + * such that they are architecturally visible for the platform + * buffer flush. Note that we've already arranged for pmem + * writes to avoid the cache via arch_memcpy_to_pmem(). The + * final wmb() ensures ordering for the NVDIMM flush write. + */ + wmb(); + writeq(1, nfit_blk->nvdimm_flush); + wmb(); + } else + wmb_pmem(); +} + static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) { struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; @@ -1012,6 +1058,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, offset = to_interleave_offset(offset, mmio); writeq(cmd, mmio->base + offset); + wmb_blk(nfit_blk); /* FIXME: conditionally perform read-back if mandated by firmware */ } @@ -1026,7 +1073,6 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES + lane * mmio->size; - /* TODO: non-temporal access, flush hints, cache management etc... */ write_blk_ctl(nfit_blk, lane, dpa, len, rw); while (len) { unsigned int c; @@ -1045,13 +1091,19 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, } if (rw) - memcpy(mmio->aperture + offset, iobuf + copied, c); + memcpy_to_pmem(mmio->aperture + offset, + iobuf + copied, c); else - memcpy(iobuf + copied, mmio->aperture + offset, c); + memcpy_from_pmem(iobuf + copied, + mmio->aperture + offset, c); copied += c; len -= c; } + + if (rw) + wmb_blk(nfit_blk); + rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0; return rc; } @@ -1124,7 +1176,7 @@ static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc, } static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, - struct acpi_nfit_system_address *spa) + struct acpi_nfit_system_address *spa, enum spa_map_type type) { resource_size_t start = spa->address; resource_size_t n = spa->length; @@ -1152,8 +1204,15 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, if (!res) goto err_mem; - /* TODO: cacheability based on the spa type */ - spa_map->iomem = ioremap_nocache(start, n); + if (type == SPA_MAP_APERTURE) { + /* + * TODO: memremap_pmem() support, but that requires cache + * flushing when the aperture is moved. + */ + spa_map->iomem = ioremap_wc(start, n); + } else + spa_map->iomem = ioremap_nocache(start, n); + if (!spa_map->iomem) goto err_map; @@ -1171,6 +1230,7 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges * @nvdimm_bus: NFIT-bus that provided the spa table entry * @nfit_spa: spa table to map + * @type: aperture or control region * * In the case where block-data-window apertures and * dimm-control-regions are interleaved they will end up sharing a @@ -1180,12 +1240,12 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, * unbound. */ static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc, - struct acpi_nfit_system_address *spa) + struct acpi_nfit_system_address *spa, enum spa_map_type type) { void __iomem *iomem; mutex_lock(&acpi_desc->spa_map_mutex); - iomem = __nfit_spa_map(acpi_desc, spa); + iomem = __nfit_spa_map(acpi_desc, spa, type); mutex_unlock(&acpi_desc->spa_map_mutex); return iomem; @@ -1212,6 +1272,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct nd_blk_region *ndbr = to_nd_blk_region(dev); + struct nfit_flush *nfit_flush; struct nfit_blk_mmio *mmio; struct nfit_blk *nfit_blk; struct nfit_mem *nfit_mem; @@ -1237,7 +1298,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, /* map block aperture memory */ nfit_blk->bdw_offset = nfit_mem->bdw->offset; mmio = &nfit_blk->mmio[BDW]; - mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw); + mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw, + SPA_MAP_APERTURE); if (!mmio->base) { dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, nvdimm_name(nvdimm)); @@ -1259,7 +1321,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, nfit_blk->cmd_offset = nfit_mem->dcr->command_offset; nfit_blk->stat_offset = nfit_mem->dcr->status_offset; mmio = &nfit_blk->mmio[DCR]; - mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr); + mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr, + SPA_MAP_CONTROL); if (!mmio->base) { dev_dbg(dev, "%s: %s failed to map dcr\n", __func__, nvdimm_name(nvdimm)); @@ -1277,6 +1340,17 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, return rc; } + nfit_flush = nfit_mem->nfit_flush; + if (nfit_flush && nfit_flush->flush->hint_count != 0) { + nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev, + nfit_flush->flush->hint_address[0], 8); + if (!nfit_blk->nvdimm_flush) + return -ENOMEM; + } + + if (!arch_has_pmem_api() && !nfit_blk->nvdimm_flush) + dev_warn(dev, "unable to guarantee persistence of writes\n"); + if (mmio->line_size == 0) return 0; @@ -1459,6 +1533,7 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) INIT_LIST_HEAD(&acpi_desc->dcrs); INIT_LIST_HEAD(&acpi_desc->bdws); INIT_LIST_HEAD(&acpi_desc->idts); + INIT_LIST_HEAD(&acpi_desc->flushes); INIT_LIST_HEAD(&acpi_desc->memdevs); INIT_LIST_HEAD(&acpi_desc->dimms); mutex_init(&acpi_desc->spa_map_mutex); diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 81f2e8c5a79c..815cb561cdba 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -60,6 +60,11 @@ struct nfit_idt { struct list_head list; }; +struct nfit_flush { + struct acpi_nfit_flush_address *flush; + struct list_head list; +}; + struct nfit_memdev { struct acpi_nfit_memory_map *memdev; struct list_head list; @@ -77,6 +82,7 @@ struct nfit_mem { struct acpi_nfit_system_address *spa_bdw; struct acpi_nfit_interleave *idt_dcr; struct acpi_nfit_interleave *idt_bdw; + struct nfit_flush *nfit_flush; struct list_head list; struct acpi_device *adev; unsigned long dsm_mask; @@ -88,6 +94,7 @@ struct acpi_nfit_desc { struct mutex spa_map_mutex; struct list_head spa_maps; struct list_head memdevs; + struct list_head flushes; struct list_head dimms; struct list_head spas; struct list_head dcrs; @@ -109,7 +116,7 @@ struct nfit_blk { struct nfit_blk_mmio { union { void __iomem *base; - void *aperture; + void __pmem *aperture; }; u64 size; u64 base_offset; @@ -123,6 +130,12 @@ struct nfit_blk { u64 bdw_offset; /* post interleave offset */ u64 stat_offset; u64 cmd_offset; + void __iomem *nvdimm_flush; +}; + +enum spa_map_type { + SPA_MAP_CONTROL, + SPA_MAP_APERTURE, }; struct nfit_spa_mapping { From f0f2c072cf530d5b8890be5051cc8b36b0c54cce Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 10 Jul 2015 11:06:14 -0600 Subject: [PATCH 812/839] nfit: add support for NVDIMM "latch" flag Add support in the NFIT BLK I/O path for the "latch" flag defined in the "Get Block NVDIMM Flags" _DSM function: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf This flag requires the driver to read back the command register after it is written in the block I/O path. This ensures that the hardware has fully processed the new command and moved the aperture appropriately. Signed-off-by: Ross Zwisler Acked-by: Rafael J. Wysocki Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 33 ++++++++++++++++++++++++++++++++- drivers/acpi/nfit.h | 5 +++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 11aa513f2fda..628a42c41ab1 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -1059,7 +1059,9 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, writeq(cmd, mmio->base + offset); wmb_blk(nfit_blk); - /* FIXME: conditionally perform read-back if mandated by firmware */ + + if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH) + readq(mmio->base + offset); } static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, @@ -1266,6 +1268,28 @@ static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio, return 0; } +static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, struct nfit_blk *nfit_blk) +{ + struct nd_cmd_dimm_flags flags; + int rc; + + memset(&flags, 0, sizeof(flags)); + rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags, + sizeof(flags)); + + if (rc >= 0 && flags.status == 0) + nfit_blk->dimm_flags = flags.flags; + else if (rc == -ENOTTY) { + /* fall back to a conservative default */ + nfit_blk->dimm_flags = ND_BLK_DCR_LATCH; + rc = 0; + } else + rc = -ENXIO; + + return rc; +} + static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, struct device *dev) { @@ -1340,6 +1364,13 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, return rc; } + rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk); + if (rc < 0) { + dev_dbg(dev, "%s: %s failed get DIMM flags\n", + __func__, nvdimm_name(nvdimm)); + return rc; + } + nfit_flush = nfit_mem->nfit_flush; if (nfit_flush && nfit_flush->flush->hint_count != 0) { nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev, diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 815cb561cdba..79b6d83875c1 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -40,6 +40,10 @@ enum nfit_uuids { NFIT_UUID_MAX, }; +enum { + ND_BLK_DCR_LATCH = 2, +}; + struct nfit_spa { struct acpi_nfit_system_address *spa; struct list_head list; @@ -131,6 +135,7 @@ struct nfit_blk { u64 stat_offset; u64 cmd_offset; void __iomem *nvdimm_flush; + u32 dimm_flags; }; enum spa_map_type { From cb908ed3495496b9973a2b9ed1a60f43933fdf01 Mon Sep 17 00:00:00 2001 From: Alex Ivanov Date: Mon, 15 Jun 2015 08:50:45 +0300 Subject: [PATCH 813/839] stifb: Implement hardware accelerated copyarea This patch adds hardware assisted scrolling. The code is based upon the following investigation: https://parisc.wiki.kernel.org/index.php/NGLE#Blitter A simple 'time ls -la /usr/bin' test shows 1.6x speed increase over soft copy and 2.3x increase over FBINFO_READS_FAST (prefer soft copy over screen redraw) on Artist framebuffer. Signed-off-by: Alex Ivanov Signed-off-by: Helge Deller --- drivers/video/fbdev/stifb.c | 40 +++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 86621fabbb8b..735355b0e023 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -121,6 +121,7 @@ static int __initdata stifb_bpp_pref[MAX_STI_ROMS]; #define REG_3 0x0004a0 #define REG_4 0x000600 #define REG_6 0x000800 +#define REG_7 0x000804 #define REG_8 0x000820 #define REG_9 0x000a04 #define REG_10 0x018000 @@ -135,6 +136,8 @@ static int __initdata stifb_bpp_pref[MAX_STI_ROMS]; #define REG_21 0x200218 #define REG_22 0x0005a0 #define REG_23 0x0005c0 +#define REG_24 0x000808 +#define REG_25 0x000b00 #define REG_26 0x200118 #define REG_27 0x200308 #define REG_32 0x21003c @@ -429,6 +432,9 @@ ARTIST_ENABLE_DISABLE_DISPLAY(struct stifb_info *fb, int enable) #define SET_LENXY_START_RECFILL(fb, lenxy) \ WRITE_WORD(lenxy, fb, REG_9) +#define SETUP_COPYAREA(fb) \ + WRITE_BYTE(0, fb, REG_16b1) + static void HYPER_ENABLE_DISABLE_DISPLAY(struct stifb_info *fb, int enable) { @@ -1004,6 +1010,36 @@ stifb_blank(int blank_mode, struct fb_info *info) return 0; } +static void +stifb_copyarea(struct fb_info *info, const struct fb_copyarea *area) +{ + struct stifb_info *fb = container_of(info, struct stifb_info, info); + + SETUP_COPYAREA(fb); + + SETUP_HW(fb); + if (fb->info.var.bits_per_pixel == 32) { + WRITE_WORD(0xBBA0A000, fb, REG_10); + + NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xffffffff); + } else { + WRITE_WORD(fb->id == S9000_ID_HCRX ? 0x13a02000 : 0x13a01000, fb, REG_10); + + NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xff); + } + + NGLE_QUICK_SET_IMAGE_BITMAP_OP(fb, + IBOvals(RopSrc, MaskAddrOffset(0), + BitmapExtent08, StaticReg(1), + DataDynamic, MaskOtc, BGx(0), FGx(0))); + + WRITE_WORD(((area->sx << 16) | area->sy), fb, REG_24); + WRITE_WORD(((area->width << 16) | area->height), fb, REG_7); + WRITE_WORD(((area->dx << 16) | area->dy), fb, REG_25); + + SETUP_FB(fb); +} + static void __init stifb_init_display(struct stifb_info *fb) { @@ -1069,7 +1105,7 @@ static struct fb_ops stifb_ops = { .fb_setcolreg = stifb_setcolreg, .fb_blank = stifb_blank, .fb_fillrect = cfb_fillrect, - .fb_copyarea = cfb_copyarea, + .fb_copyarea = stifb_copyarea, .fb_imageblit = cfb_imageblit, }; @@ -1258,7 +1294,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) info->fbops = &stifb_ops; info->screen_base = ioremap_nocache(REGION_BASE(fb,1), fix->smem_len); info->screen_size = fix->smem_len; - info->flags = FBINFO_DEFAULT; + info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA; info->pseudo_palette = &fb->pseudo_palette; /* This has to be done !!! */ From 01ab60570427caa24b9debc369e452e86cd9beb4 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Wed, 1 Jul 2015 17:18:37 -0400 Subject: [PATCH 814/839] parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller --- arch/parisc/include/asm/pgtable.h | 55 ++++++---- arch/parisc/include/asm/tlbflush.h | 53 +++++----- arch/parisc/kernel/cache.c | 105 ++++++++++++------- arch/parisc/kernel/entry.S | 163 ++++++++++++++--------------- arch/parisc/kernel/traps.c | 4 - 5 files changed, 212 insertions(+), 168 deletions(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 0a183756d6ec..f93c4a4e6580 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -16,7 +16,7 @@ #include #include -extern spinlock_t pa_dbit_lock; +extern spinlock_t pa_tlb_lock; /* * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel @@ -33,6 +33,19 @@ extern spinlock_t pa_dbit_lock; */ #define kern_addr_valid(addr) (1) +/* Purge data and instruction TLB entries. Must be called holding + * the pa_tlb_lock. The TLB purge instructions are slow on SMP + * machines since the purge must be broadcast to all CPUs. + */ + +static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) +{ + mtsp(mm->context, 1); + pdtlb(addr); + if (unlikely(split_tlb)) + pitlb(addr); +} + /* Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following * hook is made available. @@ -42,15 +55,20 @@ extern spinlock_t pa_dbit_lock; *(pteptr) = (pteval); \ } while(0) -extern void purge_tlb_entries(struct mm_struct *, unsigned long); +#define pte_inserted(x) \ + ((pte_val(x) & (_PAGE_PRESENT|_PAGE_ACCESSED)) \ + == (_PAGE_PRESENT|_PAGE_ACCESSED)) -#define set_pte_at(mm, addr, ptep, pteval) \ - do { \ +#define set_pte_at(mm, addr, ptep, pteval) \ + do { \ + pte_t old_pte; \ unsigned long flags; \ - spin_lock_irqsave(&pa_dbit_lock, flags); \ - set_pte(ptep, pteval); \ - purge_tlb_entries(mm, addr); \ - spin_unlock_irqrestore(&pa_dbit_lock, flags); \ + spin_lock_irqsave(&pa_tlb_lock, flags); \ + old_pte = *ptep; \ + set_pte(ptep, pteval); \ + if (pte_inserted(old_pte)) \ + purge_tlb_entries(mm, addr); \ + spin_unlock_irqrestore(&pa_tlb_lock, flags); \ } while (0) #endif /* !__ASSEMBLY__ */ @@ -268,7 +286,7 @@ extern unsigned long *empty_zero_page; #define pte_none(x) (pte_val(x) == 0) #define pte_present(x) (pte_val(x) & _PAGE_PRESENT) -#define pte_clear(mm,addr,xp) do { pte_val(*(xp)) = 0; } while (0) +#define pte_clear(mm, addr, xp) set_pte_at(mm, addr, xp, __pte(0)) #define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK) #define pmd_address(x) ((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) @@ -435,15 +453,15 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned if (!pte_young(*ptep)) return 0; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); pte = *ptep; if (!pte_young(pte)) { - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return 0; } set_pte(ptep, pte_mkold(pte)); purge_tlb_entries(vma->vm_mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return 1; } @@ -453,11 +471,12 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t old_pte; unsigned long flags; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); old_pte = *ptep; - pte_clear(mm,addr,ptep); - purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + set_pte(ptep, __pte(0)); + if (pte_inserted(old_pte)) + purge_tlb_entries(mm, addr); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return old_pte; } @@ -465,10 +484,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { unsigned long flags; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); set_pte(ptep, pte_wrprotect(*ptep)); purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); } #define pte_same(A,B) (pte_val(A) == pte_val(B)) diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h index 9d086a599fa0..e84b96478193 100644 --- a/arch/parisc/include/asm/tlbflush.h +++ b/arch/parisc/include/asm/tlbflush.h @@ -13,6 +13,9 @@ * active at any one time on the Merced bus. This tlb purge * synchronisation is fairly lightweight and harmless so we activate * it on all systems not just the N class. + + * It is also used to ensure PTE updates are atomic and consistent + * with the TLB. */ extern spinlock_t pa_tlb_lock; @@ -24,20 +27,24 @@ extern void flush_tlb_all_local(void *); #define smp_flush_tlb_all() flush_tlb_all() +int __flush_tlb_range(unsigned long sid, + unsigned long start, unsigned long end); + +#define flush_tlb_range(vma, start, end) \ + __flush_tlb_range((vma)->vm_mm->context, start, end) + +#define flush_tlb_kernel_range(start, end) \ + __flush_tlb_range(0, start, end) + /* * flush_tlb_mm() * - * XXX This code is NOT valid for HP-UX compatibility processes, - * (although it will probably work 99% of the time). HP-UX - * processes are free to play with the space id's and save them - * over long periods of time, etc. so we have to preserve the - * space and just flush the entire tlb. We need to check the - * personality in order to do that, but the personality is not - * currently being set correctly. - * - * Of course, Linux processes could do the same thing, but - * we don't support that (and the compilers, dynamic linker, - * etc. do not do that). + * The code to switch to a new context is NOT valid for processes + * which play with the space id's. Thus, we have to preserve the + * space and just flush the entire tlb. However, the compilers, + * dynamic linker, etc, do not manipulate space id's, so there + * could be a significant performance benefit in switching contexts + * and not flushing the whole tlb. */ static inline void flush_tlb_mm(struct mm_struct *mm) @@ -45,10 +52,18 @@ static inline void flush_tlb_mm(struct mm_struct *mm) BUG_ON(mm == &init_mm); /* Should never happen */ #if 1 || defined(CONFIG_SMP) + /* Except for very small threads, flushing the whole TLB is + * faster than using __flush_tlb_range. The pdtlb and pitlb + * instructions are very slow because of the TLB broadcast. + * It might be faster to do local range flushes on all CPUs + * on PA 2.0 systems. + */ flush_tlb_all(); #else /* FIXME: currently broken, causing space id and protection ids - * to go out of sync, resulting in faults on userspace accesses. + * to go out of sync, resulting in faults on userspace accesses. + * This approach needs further investigation since running many + * small applications (e.g., GCC testsuite) is faster on HP-UX. */ if (mm) { if (mm->context != 0) @@ -65,22 +80,12 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, { unsigned long flags, sid; - /* For one page, it's not worth testing the split_tlb variable */ - - mb(); sid = vma->vm_mm->context; purge_tlb_start(flags); mtsp(sid, 1); pdtlb(addr); - pitlb(addr); + if (unlikely(split_tlb)) + pitlb(addr); purge_tlb_end(flags); } - -void __flush_tlb_range(unsigned long sid, - unsigned long start, unsigned long end); - -#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end) - -#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end) - #endif diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index f6448c7c62b5..cda6dbbe9842 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -342,12 +342,15 @@ EXPORT_SYMBOL(flush_data_cache_local); EXPORT_SYMBOL(flush_kernel_icache_range_asm); #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */ -int parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; +static unsigned long parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; + +#define FLUSH_TLB_THRESHOLD (2*1024*1024) /* 2MB initial TLB threshold */ +static unsigned long parisc_tlb_flush_threshold __read_mostly = FLUSH_TLB_THRESHOLD; void __init parisc_setup_cache_timing(void) { unsigned long rangetime, alltime; - unsigned long size; + unsigned long size, start; alltime = mfctl(16); flush_data_cache(); @@ -364,14 +367,43 @@ void __init parisc_setup_cache_timing(void) /* Racy, but if we see an intermediate value, it's ok too... */ parisc_cache_flush_threshold = size * alltime / rangetime; - parisc_cache_flush_threshold = (parisc_cache_flush_threshold + L1_CACHE_BYTES - 1) &~ (L1_CACHE_BYTES - 1); + parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold); if (!parisc_cache_flush_threshold) parisc_cache_flush_threshold = FLUSH_THRESHOLD; if (parisc_cache_flush_threshold > cache_info.dc_size) parisc_cache_flush_threshold = cache_info.dc_size; - printk(KERN_INFO "Setting cache flush threshold to %x (%d CPUs online)\n", parisc_cache_flush_threshold, num_online_cpus()); + printk(KERN_INFO "Setting cache flush threshold to %lu kB\n", + parisc_cache_flush_threshold/1024); + + /* calculate TLB flush threshold */ + + alltime = mfctl(16); + flush_tlb_all(); + alltime = mfctl(16) - alltime; + + size = PAGE_SIZE; + start = (unsigned long) _text; + rangetime = mfctl(16); + while (start < (unsigned long) _end) { + flush_tlb_kernel_range(start, start + PAGE_SIZE); + start += PAGE_SIZE; + size += PAGE_SIZE; + } + rangetime = mfctl(16) - rangetime; + + printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n", + alltime, size, rangetime); + + parisc_tlb_flush_threshold = size * alltime / rangetime; + parisc_tlb_flush_threshold *= num_online_cpus(); + parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold); + if (!parisc_tlb_flush_threshold) + parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD; + + printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n", + parisc_tlb_flush_threshold/1024); } extern void purge_kernel_dcache_page_asm(unsigned long); @@ -403,48 +435,45 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, } EXPORT_SYMBOL(copy_user_page); -void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) +/* __flush_tlb_range() + * + * returns 1 if all TLBs were flushed. + */ +int __flush_tlb_range(unsigned long sid, unsigned long start, + unsigned long end) { - unsigned long flags; + unsigned long flags, size; - /* Note: purge_tlb_entries can be called at startup with - no context. */ - - purge_tlb_start(flags); - mtsp(mm->context, 1); - pdtlb(addr); - pitlb(addr); - purge_tlb_end(flags); -} -EXPORT_SYMBOL(purge_tlb_entries); - -void __flush_tlb_range(unsigned long sid, unsigned long start, - unsigned long end) -{ - unsigned long npages; - - npages = ((end - (start & PAGE_MASK)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - if (npages >= 512) /* 2MB of space: arbitrary, should be tuned */ + size = (end - start); + if (size >= parisc_tlb_flush_threshold) { flush_tlb_all(); - else { - unsigned long flags; + return 1; + } + /* Purge TLB entries for small ranges using the pdtlb and + pitlb instructions. These instructions execute locally + but cause a purge request to be broadcast to other TLBs. */ + if (likely(!split_tlb)) { + while (start < end) { + purge_tlb_start(flags); + mtsp(sid, 1); + pdtlb(start); + purge_tlb_end(flags); + start += PAGE_SIZE; + } + return 0; + } + + /* split TLB case */ + while (start < end) { purge_tlb_start(flags); mtsp(sid, 1); - if (split_tlb) { - while (npages--) { - pdtlb(start); - pitlb(start); - start += PAGE_SIZE; - } - } else { - while (npages--) { - pdtlb(start); - start += PAGE_SIZE; - } - } + pdtlb(start); + pitlb(start); purge_tlb_end(flags); + start += PAGE_SIZE; } + return 0; } static void cacheflush_h_tmp_function(void *dummy) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 75819617f93b..c5ef4081b01d 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -45,7 +45,7 @@ .level 2.0 #endif - .import pa_dbit_lock,data + .import pa_tlb_lock,data /* space_to_prot macro creates a prot id from a space id */ @@ -420,8 +420,8 @@ SHLREG %r9,PxD_VALUE_SHIFT,\pmd extru \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */ - shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd - LDREG %r0(\pmd),\pte /* pmd is now pte */ + shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */ + LDREG %r0(\pmd),\pte bb,>=,n \pte,_PAGE_PRESENT_BIT,\fault .endm @@ -453,57 +453,53 @@ L2_ptep \pgd,\pte,\index,\va,\fault .endm - /* Acquire pa_dbit_lock lock. */ - .macro dbit_lock spc,tmp,tmp1 + /* Acquire pa_tlb_lock lock and recheck page is still present. */ + .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault #ifdef CONFIG_SMP cmpib,COND(=),n 0,\spc,2f - load32 PA(pa_dbit_lock),\tmp + load32 PA(pa_tlb_lock),\tmp 1: LDCW 0(\tmp),\tmp1 cmpib,COND(=) 0,\tmp1,1b nop + LDREG 0(\ptp),\pte + bb,<,n \pte,_PAGE_PRESENT_BIT,2f + b \fault + stw \spc,0(\tmp) 2: #endif .endm - /* Release pa_dbit_lock lock without reloading lock address. */ - .macro dbit_unlock0 spc,tmp + /* Release pa_tlb_lock lock without reloading lock address. */ + .macro tlb_unlock0 spc,tmp #ifdef CONFIG_SMP or,COND(=) %r0,\spc,%r0 stw \spc,0(\tmp) #endif .endm - /* Release pa_dbit_lock lock. */ - .macro dbit_unlock1 spc,tmp + /* Release pa_tlb_lock lock. */ + .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP - load32 PA(pa_dbit_lock),\tmp - dbit_unlock0 \spc,\tmp + load32 PA(pa_tlb_lock),\tmp + tlb_unlock0 \spc,\tmp #endif .endm /* Set the _PAGE_ACCESSED bit of the PTE. Be clever and * don't needlessly dirty the cache line if it was already set */ - .macro update_ptep spc,ptep,pte,tmp,tmp1 -#ifdef CONFIG_SMP - or,COND(=) %r0,\spc,%r0 - LDREG 0(\ptep),\pte -#endif + .macro update_accessed ptp,pte,tmp,tmp1 ldi _PAGE_ACCESSED,\tmp1 or \tmp1,\pte,\tmp and,COND(<>) \tmp1,\pte,%r0 - STREG \tmp,0(\ptep) + STREG \tmp,0(\ptp) .endm /* Set the dirty bit (and accessed bit). No need to be * clever, this is only used from the dirty fault */ - .macro update_dirty spc,ptep,pte,tmp -#ifdef CONFIG_SMP - or,COND(=) %r0,\spc,%r0 - LDREG 0(\ptep),\pte -#endif + .macro update_dirty ptp,pte,tmp ldi _PAGE_ACCESSED|_PAGE_DIRTY,\tmp or \tmp,\pte,\pte - STREG \pte,0(\ptep) + STREG \pte,0(\ptp) .endm /* bitshift difference between a PFN (based on kernel's PAGE_SIZE) @@ -1148,14 +1144,14 @@ dtlb_miss_20w: L3_ptep ptp,pte,t0,va,dtlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1174,14 +1170,14 @@ nadtlb_miss_20w: L3_ptep ptp,pte,t0,va,nadtlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1202,20 +1198,20 @@ dtlb_miss_11: L2_ptep ptp,pte,t0,va,dtlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 idtlba pte,(%sr1,va) idtlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1235,21 +1231,20 @@ nadtlb_miss_11: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 idtlba pte,(%sr1,va) idtlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1269,16 +1264,16 @@ dtlb_miss_20: L2_ptep ptp,pte,t0,va,dtlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1297,16 +1292,16 @@ nadtlb_miss_20: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 - idtlbt pte,prot - dbit_unlock1 spc,t0 + idtlbt pte,prot + tlb_unlock1 spc,t0 rfir nop @@ -1406,14 +1401,14 @@ itlb_miss_20w: L3_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1430,14 +1425,14 @@ naitlb_miss_20w: L3_ptep ptp,pte,t0,va,naitlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1458,20 +1453,20 @@ itlb_miss_11: L2_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 iitlba pte,(%sr1,va) iitlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1482,20 +1477,20 @@ naitlb_miss_11: L2_ptep ptp,pte,t0,va,naitlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 iitlba pte,(%sr1,va) iitlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1516,16 +1511,16 @@ itlb_miss_20: L2_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1536,16 +1531,16 @@ naitlb_miss_20: L2_ptep ptp,pte,t0,va,naitlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1568,14 +1563,14 @@ dbit_trap_20w: L3_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock0 spc,t0 + tlb_unlock0 spc,t0 rfir nop #else @@ -1588,8 +1583,8 @@ dbit_trap_11: L2_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb_11 spc,pte,prot @@ -1600,8 +1595,8 @@ dbit_trap_11: idtlbp prot,(%sr1,va) mtsp t1, %sr1 /* Restore sr1 */ - dbit_unlock0 spc,t0 + tlb_unlock0 spc,t0 rfir nop @@ -1612,16 +1607,16 @@ dbit_trap_20: L2_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb spc,pte,prot f_extend pte,t1 - idtlbt pte,prot - dbit_unlock0 spc,t0 + idtlbt pte,prot + tlb_unlock0 spc,t0 rfir nop #endif diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 6548fd1d2e62..b99b39f1da02 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -43,10 +43,6 @@ #include "../math-emu/math-emu.h" /* for handle_fpe() */ -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -DEFINE_SPINLOCK(pa_dbit_lock); -#endif - static void parisc_show_stack(struct task_struct *task, unsigned long *sp, struct pt_regs *regs); From 892e8cac99a71f6254f84fc662068d912e1943bf Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 10 Jul 2015 09:40:59 -0400 Subject: [PATCH 815/839] selinux: fix mprotect PROT_EXEC regression caused by mm change commit 66fc13039422ba7df2d01a8ee0873e4ef965b50b ("mm: shmem_zero_setup skip security check and lockdep conflict with XFS") caused a regression for SELinux by disabling any SELinux checking of mprotect PROT_EXEC on shared anonymous mappings. However, even before that regression, the checking on such mprotect PROT_EXEC calls was inconsistent with the checking on a mmap PROT_EXEC call for a shared anonymous mapping. On a mmap, the security hook is passed a NULL file and knows it is dealing with an anonymous mapping and therefore applies an execmem check and no file checks. On a mprotect, the security hook is passed a vma with a non-NULL vm_file (as this was set from the internally-created shmem file during mmap) and therefore applies the file-based execute check and no execmem check. Since the aforementioned commit now marks the shmem zero inode with the S_PRIVATE flag, the file checks are disabled and we have no checking at all on mprotect PROT_EXEC. Add a test to the mprotect hook logic for such private inodes, and apply an execmem check in that case. This makes the mmap and mprotect checking consistent for shared anonymous mappings, as well as for /dev/zero and ashmem. Cc: # 4.1.x Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/hooks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 312537d48050..692e3cc8ce23 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3271,7 +3271,8 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared int rc = 0; if (default_noexec && - (prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) { + (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) || + (!shared && (prot & PROT_WRITE)))) { /* * We are making executable an anonymous mapping or a * private file mapping that will also be writable. From e9e4dd3267d0c5234c5c0f47440456b10875dec9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 9 Jul 2015 09:59:09 +0300 Subject: [PATCH 816/839] net: do not process device backlog during unregistration commit 381c759d9916 ("ipv4: Avoid crashing in ip_error") fixes a problem where processed packet comes from device with destroyed inetdev (dev->ip_ptr). This is not expected because inetdev_destroy is called in NETDEV_UNREGISTER phase and packets should not be processed after dev_close_many() and synchronize_net(). Above fix is still required because inetdev_destroy can be called for other reasons. But it shows the real problem: backlog can keep packets for long time and they do not hold reference to device. Such packets are then delivered to upper levels at the same time when device is unregistered. Calling flush_backlog after NETDEV_UNREGISTER_FINAL still accounts all packets from backlog but before that some packets continue to be delivered to upper levels long after the synchronize_net call which is supposed to wait the last ones. Also, as Eric pointed out, processed packets, mostly from other devices, can continue to add new packets to backlog. Fix the problem by moving flush_backlog early, after the device driver is stopped and before the synchronize_net() call. Then use netif_running check to make sure we do not add more packets to backlog. We have to do it in enqueue_to_backlog context when the local IRQ is disabled. As result, after the flush_backlog and synchronize_net sequence all packets should be accounted. Thanks to Eric W. Biederman for the test script and his valuable feedback! Reported-by: Vittorio Gambaletta Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue") Cc: Eric W. Biederman Cc: Stephen Hemminger Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1e57efda055b..6dd126a69aa0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3448,6 +3448,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, local_irq_save(flags); rps_lock(sd); + if (!netif_running(skb->dev)) + goto drop; qlen = skb_queue_len(&sd->input_pkt_queue); if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { if (qlen) { @@ -3469,6 +3471,7 @@ enqueue: goto enqueue; } +drop: sd->dropped++; rps_unlock(sd); @@ -6135,6 +6138,7 @@ static void rollback_registered_many(struct list_head *head) unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; + on_each_cpu(flush_backlog, dev, 1); } synchronize_net(); @@ -6770,8 +6774,6 @@ void netdev_run_todo(void) dev->reg_state = NETREG_UNREGISTERED; - on_each_cpu(flush_backlog, dev, 1); - netdev_wait_allrefs(dev); /* paranoia */ From 2c17d27c36dcce2b6bf689f41a46b9e909877c21 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 9 Jul 2015 09:59:10 +0300 Subject: [PATCH 817/839] net: call rcu_read_lock early in process_backlog Incoming packet should be either in backlog queue or in RCU read-side section. Otherwise, the final sequence of flush_backlog() and synchronize_net() may miss packets that can run without device reference: CPU 1 CPU 2 skb->dev: no reference process_backlog:__skb_dequeue process_backlog:local_irq_enable on_each_cpu for flush_backlog => IPI(hardirq): flush_backlog - packet not found in backlog CPU delayed ... synchronize_net - no ongoing RCU read-side sections netdev_run_todo, rcu_barrier: no ongoing callbacks __netif_receive_skb_core:rcu_read_lock - too late free dev process packet for freed dev Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue") Cc: Eric W. Biederman Cc: Stephen Hemminger Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/core/dev.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 6dd126a69aa0..a8e4dd430285 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3774,8 +3774,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) pt_prev = NULL; - rcu_read_lock(); - another_round: skb->skb_iif = skb->dev->ifindex; @@ -3785,7 +3783,7 @@ another_round: skb->protocol == cpu_to_be16(ETH_P_8021AD)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) - goto unlock; + goto out; } #ifdef CONFIG_NET_CLS_ACT @@ -3815,10 +3813,10 @@ skip_taps: if (static_key_false(&ingress_needed)) { skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto unlock; + goto out; if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) - goto unlock; + goto out; } #endif #ifdef CONFIG_NET_CLS_ACT @@ -3836,7 +3834,7 @@ ncls: if (vlan_do_receive(&skb)) goto another_round; else if (unlikely(!skb)) - goto unlock; + goto out; } rx_handler = rcu_dereference(skb->dev->rx_handler); @@ -3848,7 +3846,7 @@ ncls: switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: ret = NET_RX_SUCCESS; - goto unlock; + goto out; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3902,8 +3900,7 @@ drop: ret = NET_RX_DROP; } -unlock: - rcu_read_unlock(); +out: return ret; } @@ -3934,29 +3931,30 @@ static int __netif_receive_skb(struct sk_buff *skb) static int netif_receive_skb_internal(struct sk_buff *skb) { + int ret; + net_timestamp_check(netdev_tstamp_prequeue, skb); if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; + rcu_read_lock(); + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; - int cpu, ret; - - rcu_read_lock(); - - cpu = get_rps_cpu(skb->dev, skb, &rflow); + int cpu = get_rps_cpu(skb->dev, skb, &rflow); if (cpu >= 0) { ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); return ret; } - rcu_read_unlock(); } #endif - return __netif_receive_skb(skb); + ret = __netif_receive_skb(skb); + rcu_read_unlock(); + return ret; } /** @@ -4501,8 +4499,10 @@ static int process_backlog(struct napi_struct *napi, int quota) struct sk_buff *skb; while ((skb = __skb_dequeue(&sd->process_queue))) { + rcu_read_lock(); local_irq_enable(); __netif_receive_skb(skb); + rcu_read_unlock(); local_irq_disable(); input_queue_head_incr(sd); if (++work >= quota) { From 51ed7f3e7d33824820837ad784801973f147c51a Mon Sep 17 00:00:00 2001 From: Satish Ashok Date: Thu, 9 Jul 2015 04:12:45 -0700 Subject: [PATCH 818/839] bridge: mdb: allow the user to delete mdb entry if there's a querier Until now when a querier was present static entries couldn't be deleted. Fix this and allow the user to manipulate the mdb with or without a querier. Signed-off-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 60868c212679..c11cf2611db0 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -423,19 +423,12 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; - if (ip.proto == htons(ETH_P_IP)) { - if (timer_pending(&br->ip4_other_query.timer)) - return -EBUSY; - + if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) - } else { - if (timer_pending(&br->ip6_other_query.timer)) - return -EBUSY; - + else ip.u.ip6 = entry->addr.u.ip6; #endif - } spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); From 8220ea23243178c16b87fc3ccadf071647b64e04 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 10 Jul 2015 11:39:57 +0200 Subject: [PATCH 819/839] net: inet_diag: always export IPV6_V6ONLY sockopt for listening sockets Reconsidering my commit 20462155 "net: inet_diag: export IPV6_V6ONLY sockopt", I am not happy with the limitations it causes for socket analysing code in userspace. Exporting the value only if it is set makes it hard for userspace to decide whether the option is not set or the kernel does not support exporting the option at all. >From an auditor's perspective, the interesting question for listening AF_INET6 sockets is: "Does it NOT have IPV6_V6ONLY set?" Because it is the unexpected case. This patch allows to answer this question reliably. Signed-off-by: Phil Sutter Cc: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 9bc26677058e..c3b1f3a0f4cf 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -152,8 +152,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, inet6_sk(sk)->tclass) < 0) goto errout; - if (ipv6_only_sock(sk) && - nla_put_u8(skb, INET_DIAG_SKV6ONLY, 1)) + if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && + nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) goto errout; } #endif From 145c37084e3e6584b95f82361922965cc3af41bf Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Fri, 10 Jul 2015 21:20:28 +0900 Subject: [PATCH 820/839] Doc: z8530book: Fix typo in API-z8530-sync-txdma-open.html This patch fix a spelling typo found in API-z8530-sync-txdma-open.html. It is because this file was generated from comment in source, I have to fix comment in source. Signed-off-by: Masanari Iida Signed-off-by: David S. Miller --- drivers/net/wan/z85230.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c index feacc3b994b7..2f0bd6955f33 100644 --- a/drivers/net/wan/z85230.c +++ b/drivers/net/wan/z85230.c @@ -1044,7 +1044,7 @@ EXPORT_SYMBOL(z8530_sync_dma_close); * @dev: The network device to attach * @c: The Z8530 channel to configure in sync DMA mode. * - * Set up a Z85x30 device for synchronous DMA tranmission. One + * Set up a Z85x30 device for synchronous DMA transmission. One * ISA DMA channel must be available for this to work. The receive * side is run in PIO mode, but then it has the bigger FIFO. */ From c4d029f2d43b39de7b9299e8b58102a442ba86f8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 11 Jul 2015 14:26:34 +0200 Subject: [PATCH 821/839] tick/broadcast: Prevent NULL pointer dereference Dan reported that the recent changes to the broadcast code introduced a potential NULL dereference. Add the proper check. Fixes: e0454311903d "tick/broadcast: Sanity check the shutdown of the local clock_event" Reported-by: Dan Carpenter Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index ee3cf942d6eb..52b9e199b5ac 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -409,14 +409,16 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) break; } - if (cpumask_empty(tick_broadcast_mask)) { - if (!bc_stopped) - clockevents_shutdown(bc); - } else if (bc_stopped) { - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) - tick_broadcast_start_periodic(bc); - else - tick_broadcast_setup_oneshot(bc); + if (bc) { + if (cpumask_empty(tick_broadcast_mask)) { + if (!bc_stopped) + clockevents_shutdown(bc); + } else if (bc_stopped) { + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + tick_broadcast_start_periodic(bc); + else + tick_broadcast_setup_oneshot(bc); + } } raw_spin_unlock(&tick_broadcast_lock); } From 22401ff17f993bda9acd14ac39213e6f9bb985d0 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Sat, 11 Jul 2015 17:30:01 +0200 Subject: [PATCH 822/839] cdc_ncm: update specs URL Update referenced specs link to reflect actual file version and location. Signed-off-by: Enrico Mioso Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 1991e4a24657..db40175b1a0b 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -6,7 +6,7 @@ * Original author: Hans Petter Selasky * * USB Host Driver for Network Control Model (NCM) - * http://www.usb.org/developers/devclass_docs/NCM10.zip + * http://www.usb.org/developers/docs/devclass_docs/NCM10_012011.zip * * The NCM encoding, decoding and initialization logic * derives from FreeBSD 8.x. if_cdce.c and if_cdcereg.h From c590032f9ab8d03aab235a7601db22a267c2d958 Mon Sep 17 00:00:00 2001 From: Petri Gynther Date: Fri, 10 Jul 2015 16:20:00 -0700 Subject: [PATCH 823/839] net: bcmgenet: fix accounting of packet drops vs errors bcmgenet driver needs to separate packet drops from packet errors. When the driver has to drop a *good* packet, due to lack of buffers or replacement skbs, increment only dev->stats.[rx|tx]_dropped. When the driver encounters a bad Rx packet or Tx error, increment only dev->stats.[rx|tx]_errors + relevant detailed error counter. Signed-off-by: Petri Gynther Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index b43b2cb9b830..64c1e9db6b0b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1230,7 +1230,6 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, new_skb = skb_realloc_headroom(skb, sizeof(*status)); dev_kfree_skb(skb); if (!new_skb) { - dev->stats.tx_errors++; dev->stats.tx_dropped++; return NULL; } @@ -1465,7 +1464,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, if (unlikely(!skb)) { dev->stats.rx_dropped++; - dev->stats.rx_errors++; goto next; } @@ -1493,7 +1491,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, if (unlikely(!(dma_flag & DMA_EOP) || !(dma_flag & DMA_SOP))) { netif_err(priv, rx_status, dev, "dropping fragmented packet!\n"); - dev->stats.rx_dropped++; dev->stats.rx_errors++; dev_kfree_skb_any(skb); goto next; @@ -1515,7 +1512,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, dev->stats.rx_frame_errors++; if (dma_flag & DMA_RX_LG) dev->stats.rx_length_errors++; - dev->stats.rx_dropped++; dev->stats.rx_errors++; dev_kfree_skb_any(skb); goto next; From 2ee94014d9bd3868b1c0d17405f96d63bec83f28 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 10 Jul 2015 19:48:58 -0400 Subject: [PATCH 824/839] net: switchdev: don't abort unsupported operations There is no need to abort attribute setting or object addition, if the prepare phase returned operation not supported. Thus, abort these two transactions only if the error is not -EOPNOTSUPP. Signed-off-by: Vivien Didelot Acked-by: Jiri Pirko Acked-by: Scott Feldman Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 84f77a054025..9f2add3cba26 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -171,8 +171,10 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) * released. */ - attr->trans = SWITCHDEV_TRANS_ABORT; - __switchdev_port_attr_set(dev, attr); + if (err != -EOPNOTSUPP) { + attr->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_attr_set(dev, attr); + } return err; } @@ -249,8 +251,10 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) * released. */ - obj->trans = SWITCHDEV_TRANS_ABORT; - __switchdev_port_obj_add(dev, obj); + if (err != -EOPNOTSUPP) { + obj->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_obj_add(dev, obj); + } return err; } From 8f5063e97f393d49611151d3cf7dcbeb41397f12 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 11 Jul 2015 18:02:10 -0700 Subject: [PATCH 825/839] net: dsa: Test array index before use port_index is used an index into an array, and this information comes from Device Tree, make sure that port_index is not equal to the array size before using it. Move the check against port_index earlier in the loop. Fixes: 5e95329b701c: ("dsa: add device tree bindings to register DSA switches") Reported-by: Dan Carpenter Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 392e29a0227d..52beeb8829dc 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -642,6 +642,8 @@ static int dsa_of_probe(struct device *dev) continue; port_index = be32_to_cpup(port_reg); + if (port_index >= DSA_MAX_PORTS) + break; port_name = of_get_property(port, "label", NULL); if (!port_name) @@ -666,8 +668,6 @@ static int dsa_of_probe(struct device *dev) goto out_free_chip; } - if (port_index == DSA_MAX_PORTS) - break; } } From c8cf89f73f3d9ecbdea479778f0ac714be79be33 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 11 Jul 2015 18:02:11 -0700 Subject: [PATCH 826/839] net: dsa: Fix off-by-one in switch address parsing cd->sw_addr is used as a MDIO bus address, which cannot exceed PHY_MAX_ADDR (32), our check was off-by-one. Fixes: 5e95329b701c ("dsa: add device tree bindings to register DSA switches") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 52beeb8829dc..b445d492c115 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -630,7 +630,7 @@ static int dsa_of_probe(struct device *dev) continue; cd->sw_addr = be32_to_cpup(sw_addr); - if (cd->sw_addr > PHY_MAX_ADDR) + if (cd->sw_addr >= PHY_MAX_ADDR) continue; if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) From 0a73d0a204a4a04a1e110539c5a524ae51f91d6d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jul 2015 10:34:29 -0400 Subject: [PATCH 827/839] 9p: don't leave a half-initialized inode sitting around Cc: stable@vger.kernel.org # all branches Signed-off-by: Al Viro --- fs/9p/vfs_inode.c | 3 +-- fs/9p/vfs_inode_dotl.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 510040b04c96..b1dc51888048 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -540,8 +540,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, unlock_new_inode(inode); return inode; error: - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ERR_PTR(retval); } diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 09e4433717b8..e8aa57dc8d6d 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -149,8 +149,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, unlock_new_inode(inode); return inode; error: - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ERR_PTR(retval); } From 9391dd00d13c853ab4f2a85435288ae2202e0e43 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jul 2015 10:39:45 -0400 Subject: [PATCH 828/839] fix a braino in ovl_d_select_inode() when opening a directory we want the overlayfs inode, not one from the topmost layer. Reported-By: Andrey Jr. Melnikov Tested-By: Andrey Jr. Melnikov Signed-off-by: Al Viro --- fs/overlayfs/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index f140e3dbfb7b..d9da5a4e9382 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -343,6 +343,9 @@ struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags) struct path realpath; enum ovl_path_type type; + if (d_is_dir(dentry)) + return d_backing_inode(dentry); + type = ovl_path_real(dentry, &realpath); if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) { err = ovl_want_write(dentry); From 75a6f82a0d10ef8f13cd8fe7212911a0252ab99e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 8 Jul 2015 02:42:38 +0100 Subject: [PATCH 829/839] freeing unlinked file indefinitely delayed Normally opening a file, unlinking it and then closing will have the inode freed upon close() (provided that it's not otherwise busy and has no remaining links, of course). However, there's one case where that does *not* happen. Namely, if you open it by fhandle with cold dcache, then unlink() and close(). In normal case you get d_delete() in unlink(2) notice that dentry is busy and unhash it; on the final dput() it will be forcibly evicted from dcache, triggering iput() and inode removal. In this case, though, we end up with *two* dentries - disconnected (created by open-by-fhandle) and regular one (used by unlink()). The latter will have its reference to inode dropped just fine, but the former will not - it's considered hashed (it is on the ->s_anon list), so it will stay around until the memory pressure will finally do it in. As the result, we have the final iput() delayed indefinitely. It's trivial to reproduce - void flush_dcache(void) { system("mount -o remount,rw /"); } static char buf[20 * 1024 * 1024]; main() { int fd; union { struct file_handle f; char buf[MAX_HANDLE_SZ]; } x; int m; x.f.handle_bytes = sizeof(x); chdir("/root"); mkdir("foo", 0700); fd = open("foo/bar", O_CREAT | O_RDWR, 0600); close(fd); name_to_handle_at(AT_FDCWD, "foo/bar", &x.f, &m, 0); flush_dcache(); fd = open_by_handle_at(AT_FDCWD, &x.f, O_RDWR); unlink("foo/bar"); write(fd, buf, sizeof(buf)); system("df ."); /* 20Mb eaten */ close(fd); system("df ."); /* should've freed those 20Mb */ flush_dcache(); system("df ."); /* should be the same as #2 */ } will spit out something like Filesystem 1K-blocks Used Available Use% Mounted on /dev/root 322023 303843 1131 100% / Filesystem 1K-blocks Used Available Use% Mounted on /dev/root 322023 303843 1131 100% / Filesystem 1K-blocks Used Available Use% Mounted on /dev/root 322023 283282 21692 93% / - inode gets freed only when dentry is finally evicted (here we trigger than by remount; normally it would've happened in response to memory pressure hell knows when). Cc: stable@vger.kernel.org # v2.6.38+; earlier ones need s/kill_it/unhash_it/ Acked-by: J. Bruce Fields Signed-off-by: Al Viro --- fs/dcache.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 7a3f3e5f9cea..5c8ea15e73a5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -642,7 +642,7 @@ static inline bool fast_dput(struct dentry *dentry) /* * If we have a d_op->d_delete() operation, we sould not - * let the dentry count go to zero, so use "put__or_lock". + * let the dentry count go to zero, so use "put_or_lock". */ if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) return lockref_put_or_lock(&dentry->d_lockref); @@ -697,7 +697,7 @@ static inline bool fast_dput(struct dentry *dentry) */ smp_rmb(); d_flags = ACCESS_ONCE(dentry->d_flags); - d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST; + d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED; /* Nothing to do? Dropping the reference was all we needed? */ if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) @@ -776,6 +776,9 @@ repeat: if (unlikely(d_unhashed(dentry))) goto kill_it; + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + goto kill_it; + if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { if (dentry->d_op->d_delete(dentry)) goto kill_it; From 5e63e6baa159fa8c787cf783dbf3d77fbea97331 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:32:46 +0300 Subject: [PATCH 830/839] can: rcar_can: fix IRQ check rcar_can_probe() regards 0 as a wrong IRQ #, despite platform_get_irq() that it calls returns negative error code in that case. This leads to the following being printed to the console when attempting to open the device: error requesting interrupt fffffffa because rcar_can_open() calls request_irq() with a negative IRQ #, and that function naturally fails with -EINVAL. Check for the negative error codes instead and propagate them upstream instead of just returning -ENODEV. Fixes: fd1159318e55 ("can: add Renesas R-Car CAN driver") Signed-off-by: Sergei Shtylyov Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar_can.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 7deb80dcbe8c..93017c09cfc3 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -758,8 +758,9 @@ static int rcar_can_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (!irq) { + if (irq < 0) { dev_err(&pdev->dev, "No IRQ resource\n"); + err = irq; goto fail; } From c1a4c87b06fa564d6e2760a12d4e5a09badc684b Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:33:53 +0300 Subject: [PATCH 831/839] can: rcar_can: print signed IRQ # Printing IRQ # using "%x" and "%u" unsigned formats isn't quite correct as 'ndev->irq' is of type *int*, so the "%d" format needs to be used instead. While fixing this, beautify the dev_info() message in rcar_can_probe() a bit. Fixes: fd1159318e55 ("can: add Renesas R-Car CAN driver") Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar_can.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 93017c09cfc3..2f9ebad4ff56 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -526,7 +526,7 @@ static int rcar_can_open(struct net_device *ndev) napi_enable(&priv->napi); err = request_irq(ndev->irq, rcar_can_interrupt, 0, ndev->name, ndev); if (err) { - netdev_err(ndev, "error requesting interrupt %x\n", ndev->irq); + netdev_err(ndev, "error requesting interrupt %d\n", ndev->irq); goto out_close; } can_led_event(ndev, CAN_LED_EVENT_OPEN); @@ -824,7 +824,7 @@ static int rcar_can_probe(struct platform_device *pdev) devm_can_led_init(ndev); - dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%u)\n", + dev_info(&pdev->dev, "device registered (regs @ %p, IRQ%d)\n", priv->regs, ndev->irq); return 0; From 3255f68c132a8ed784b9fe1804ef4642a1d16b9a Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:34:55 +0300 Subject: [PATCH 832/839] can: rcar_can: fix typo in error message Fix typo in the first error message printed by rcar_can_open(). Based on the original patch by Vladimir Barinov. Fixes: 862e2b6af941 ("can: rcar_can: support all input clocks") Reported-by: Vladimir Barinov Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar_can.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 2f9ebad4ff56..310a0cd20679 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -508,7 +508,8 @@ static int rcar_can_open(struct net_device *ndev) err = clk_prepare_enable(priv->clk); if (err) { - netdev_err(ndev, "failed to enable periperal clock, error %d\n", + netdev_err(ndev, + "failed to enable peripheral clock, error %d\n", err); goto out; } From ae185f19662565f5320e49247faab8752a977642 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:50:35 +0300 Subject: [PATCH 833/839] can: rcar_can: print request_irq() error code Also print the error code when the request_irq() call fails in rcar_can_open(), rewording the error message... Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar_can.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 310a0cd20679..5e81afffc073 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -527,7 +527,8 @@ static int rcar_can_open(struct net_device *ndev) napi_enable(&priv->napi); err = request_irq(ndev->irq, rcar_can_interrupt, 0, ndev->name, ndev); if (err) { - netdev_err(ndev, "error requesting interrupt %d\n", ndev->irq); + netdev_err(ndev, "request_irq(%d) failed, error %d\n", + ndev->irq, err); goto out_close; } can_led_event(ndev, CAN_LED_EVENT_OPEN); From 585bc2ac4cc0e2398f5b128018247f51fa3e0e12 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:51:34 +0300 Subject: [PATCH 834/839] can: rcar_can: unify error messages All the error messages in the driver but the ones from devm_clk_get() failures use similar format. Make those two messages consitent with others. Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar_can.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 5e81afffc073..7bd54191f962 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -785,7 +785,8 @@ static int rcar_can_probe(struct platform_device *pdev) priv->clk = devm_clk_get(&pdev->dev, "clkp1"); if (IS_ERR(priv->clk)) { err = PTR_ERR(priv->clk); - dev_err(&pdev->dev, "cannot get peripheral clock: %d\n", err); + dev_err(&pdev->dev, "cannot get peripheral clock, error %d\n", + err); goto fail_clk; } @@ -797,7 +798,7 @@ static int rcar_can_probe(struct platform_device *pdev) priv->can_clk = devm_clk_get(&pdev->dev, clock_names[clock_select]); if (IS_ERR(priv->can_clk)) { err = PTR_ERR(priv->can_clk); - dev_err(&pdev->dev, "cannot get CAN clock: %d\n", err); + dev_err(&pdev->dev, "cannot get CAN clock, error %d\n", err); goto fail_clk; } From 033365191136c97f88c81b7bd0011414db28bb4e Mon Sep 17 00:00:00 2001 From: "J.D. Schroeder" Date: Wed, 8 Jul 2015 14:38:12 +0300 Subject: [PATCH 835/839] can: c_can: Fix default pinmux glitch at init The previous change 3973c526ae9c (net: can: c_can: Disable pins when CAN interface is down) causes a slight glitch on the pinctrl settings when used. Since commit ab78029 (drivers/pinctrl: grab default handles from device core), the device core will automatically set the default pins. This causes the pins to be momentarily set to the default and then to the sleep state in register_c_can_dev(). By adding an optional "enable" state, boards can set the default pin state to be disabled and avoid the glitch when the switch from default to sleep first occurs. If the "enable" state is not available c_can_pinctrl_select_state() falls back to using the "default" pinctrl state. [Roger Q] - Forward port to v4.2 and use pinctrl_get_select(). Signed-off-by: J.D. Schroeder Signed-off-by: Roger Quadros Reviewed-by: Grygorii Strashko Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 041525d2595c..5d214d135332 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -592,6 +592,7 @@ static int c_can_start(struct net_device *dev) { struct c_can_priv *priv = netdev_priv(dev); int err; + struct pinctrl *p; /* basic c_can configuration */ err = c_can_chip_config(dev); @@ -604,8 +605,13 @@ static int c_can_start(struct net_device *dev) priv->can.state = CAN_STATE_ERROR_ACTIVE; - /* activate pins */ - pinctrl_pm_select_default_state(dev->dev.parent); + /* Attempt to use "active" if available else use "default" */ + p = pinctrl_get_select(priv->device, "active"); + if (!IS_ERR(p)) + pinctrl_put(p); + else + pinctrl_pm_select_default_state(priv->device); + return 0; } From 2acb5c301edf39ab6d066687ce70da1166e4de9e Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 7 Jul 2015 17:27:57 +0300 Subject: [PATCH 836/839] ARM: dts: dra7x-evm: Prevent glitch on DCAN1 pinmux Driver core sets "default" pinmux on on probe and CAN driver sets "sleep" pinmux during register. This causes a small window where the CAN pins are in "default" state with the DCAN module being disabled. Change the "default" state to be like sleep so this glitch is avoided. Add a new "active" state that is used by the driver when CAN is actually active. Signed-off-by: Roger Quadros Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- arch/arm/boot/dts/dra7-evm.dts | 5 +++-- arch/arm/boot/dts/dra72-evm.dts | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts index aa465904f6cc..096f68be99e2 100644 --- a/arch/arm/boot/dts/dra7-evm.dts +++ b/arch/arm/boot/dts/dra7-evm.dts @@ -686,7 +686,8 @@ &dcan1 { status = "ok"; - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&dcan1_pins_default>; + pinctrl-names = "default", "sleep", "active"; + pinctrl-0 = <&dcan1_pins_sleep>; pinctrl-1 = <&dcan1_pins_sleep>; + pinctrl-2 = <&dcan1_pins_default>; }; diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts index 4e1b60581782..803738414086 100644 --- a/arch/arm/boot/dts/dra72-evm.dts +++ b/arch/arm/boot/dts/dra72-evm.dts @@ -587,9 +587,10 @@ &dcan1 { status = "ok"; - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&dcan1_pins_default>; + pinctrl-names = "default", "sleep", "active"; + pinctrl-0 = <&dcan1_pins_sleep>; pinctrl-1 = <&dcan1_pins_sleep>; + pinctrl-2 = <&dcan1_pins_default>; }; &qspi { From d3b58c47d330de8c29898fe9746f7530408f8a59 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 26 Jun 2015 11:58:19 +0200 Subject: [PATCH 837/839] can: replace timestamp as unique skb attribute Commit 514ac99c64b "can: fix multiple delivery of a single CAN frame for overlapping CAN filters" requires the skb->tstamp to be set to check for identical CAN skbs. Without timestamping to be required by user space applications this timestamp was not generated which lead to commit 36c01245eb8 "can: fix loss of CAN frames in raw_rcv" - which forces the timestamp to be set in all CAN related skbuffs by introducing several __net_timestamp() calls. This forces e.g. out of tree drivers which are not using alloc_can{,fd}_skb() to add __net_timestamp() after skbuff creation to prevent the frame loss fixed in mainline Linux. This patch removes the timestamp dependency and uses an atomic counter to create an unique identifier together with the skbuff pointer. Btw: the new skbcnt element introduced in struct can_skb_priv has to be initialized with zero in out-of-tree drivers which are not using alloc_can{,fd}_skb() too. Signed-off-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 7 ++----- drivers/net/can/slcan.c | 2 +- drivers/net/can/vcan.c | 3 --- include/linux/can/skb.h | 2 ++ net/can/af_can.c | 12 +++++++----- net/can/bcm.c | 2 ++ net/can/raw.c | 7 ++++--- 7 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index e9b1810d319f..aede704605c6 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -440,9 +440,6 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) struct can_frame *cf = (struct can_frame *)skb->data; u8 dlc = cf->can_dlc; - if (!(skb->tstamp.tv64)) - __net_timestamp(skb); - netif_rx(priv->echo_skb[idx]); priv->echo_skb[idx] = NULL; @@ -578,7 +575,6 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) if (unlikely(!skb)) return NULL; - __net_timestamp(skb); skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -589,6 +585,7 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); memset(*cf, 0, sizeof(struct can_frame)); @@ -607,7 +604,6 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, if (unlikely(!skb)) return NULL; - __net_timestamp(skb); skb->protocol = htons(ETH_P_CANFD); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -618,6 +614,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; *cfd = (struct canfd_frame *)skb_put(skb, sizeof(struct canfd_frame)); memset(*cfd, 0, sizeof(struct canfd_frame)); diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index f64f5290d6f8..a23a7af8eb9a 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -207,7 +207,6 @@ static void slc_bump(struct slcan *sl) if (!skb) return; - __net_timestamp(skb); skb->dev = sl->dev; skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; @@ -215,6 +214,7 @@ static void slc_bump(struct slcan *sl) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = sl->dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; memcpy(skb_put(skb, sizeof(struct can_frame)), &cf, sizeof(struct can_frame)); diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c index 0ce868de855d..674f367087c5 100644 --- a/drivers/net/can/vcan.c +++ b/drivers/net/can/vcan.c @@ -78,9 +78,6 @@ static void vcan_rx(struct sk_buff *skb, struct net_device *dev) skb->dev = dev; skb->ip_summed = CHECKSUM_UNNECESSARY; - if (!(skb->tstamp.tv64)) - __net_timestamp(skb); - netif_rx_ni(skb); } diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index b6a52a4b457a..51bb6532785c 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -27,10 +27,12 @@ /** * struct can_skb_priv - private additional data inside CAN sk_buffs * @ifindex: ifindex of the first interface the CAN frame appeared on + * @skbcnt: atomic counter to have an unique id together with skb pointer * @cf: align to the following CAN frame at skb->data */ struct can_skb_priv { int ifindex; + int skbcnt; struct can_frame cf[0]; }; diff --git a/net/can/af_can.c b/net/can/af_can.c index 7933e62a7318..166d436196c1 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -89,6 +89,8 @@ struct timer_list can_stattimer; /* timer for statistics update */ struct s_stats can_stats; /* packet statistics */ struct s_pstats can_pstats; /* receive list statistics */ +static atomic_t skbcounter = ATOMIC_INIT(0); + /* * af_can socket functions */ @@ -310,12 +312,8 @@ int can_send(struct sk_buff *skb, int loop) return err; } - if (newskb) { - if (!(newskb->tstamp.tv64)) - __net_timestamp(newskb); - + if (newskb) netif_rx_ni(newskb); - } /* update statistics */ can_stats.tx_frames++; @@ -683,6 +681,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) can_stats.rx_frames++; can_stats.rx_frames_delta++; + /* create non-zero unique skb identifier together with *skb */ + while (!(can_skb_prv(skb)->skbcnt)) + can_skb_prv(skb)->skbcnt = atomic_inc_return(&skbcounter); + rcu_read_lock(); /* deliver the packet to sockets listening on all devices */ diff --git a/net/can/bcm.c b/net/can/bcm.c index b523453585be..a1ba6875c2a2 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -261,6 +261,7 @@ static void bcm_can_tx(struct bcm_op *op) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; memcpy(skb_put(skb, CFSIZ), cf, CFSIZ); @@ -1217,6 +1218,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) } can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; skb->dev = dev; can_skb_set_owner(skb, sk); err = can_send(skb, 1); /* send with loopback */ diff --git a/net/can/raw.c b/net/can/raw.c index 31b9748cbb4e..2e67b1423cd3 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -75,7 +75,7 @@ MODULE_ALIAS("can-proto-1"); */ struct uniqframe { - ktime_t tstamp; + int skbcnt; const struct sk_buff *skb; unsigned int join_rx_count; }; @@ -133,7 +133,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) /* eliminate multiple filter matches for the same skb */ if (this_cpu_ptr(ro->uniq)->skb == oskb && - ktime_equal(this_cpu_ptr(ro->uniq)->tstamp, oskb->tstamp)) { + this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) { if (ro->join_filters) { this_cpu_inc(ro->uniq->join_rx_count); /* drop frame until all enabled filters matched */ @@ -144,7 +144,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) } } else { this_cpu_ptr(ro->uniq)->skb = oskb; - this_cpu_ptr(ro->uniq)->tstamp = oskb->tstamp; + this_cpu_ptr(ro->uniq)->skbcnt = can_skb_prv(oskb)->skbcnt; this_cpu_ptr(ro->uniq)->join_rx_count = 1; /* drop first frame to check all enabled filters? */ if (ro->join_filters && ro->count > 1) @@ -749,6 +749,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; err = memcpy_from_msg(skb_put(skb, size), msg, size); if (err < 0) From 01e2d0627a9a6edb24c37db45db5ecb31e9de808 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Jul 2015 15:00:20 -0700 Subject: [PATCH 838/839] Revert "drm/i915: Use crtc_state->active in primary check_plane func" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit dec4f799d0a4c9edae20512fa60b0a36f3299ca2. Jörg Otte reports a NULL pointder dereference due to this commit, as 'crtc_state' very much can be NULL: crtc_state = state->base.state ? intel_atomic_get_crtc_state(state->base.state, intel_crtc) : NULL; So the change to test 'crtc_state->base.active' cannot possibly be correct as-is. There may be some other minimal fix (like just checking crtc_state for NULL), but I'm just reverting it now for the rc2 release, and people like Daniel Vetter who actually know this code will figure out what the right solution is in the longer term. Reported-and-bisected-by: Jörg Otte Cc: Ander Conselvan de Oliveira Cc: Jani Nikula Cc: Daniel Vetter CC: Maarten Lankhorst Signed-off-by: Linus Torvalds --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ba9321998a41..647b1404c441 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13276,7 +13276,7 @@ intel_check_primary_plane(struct drm_plane *plane, if (ret) return ret; - if (crtc_state->base.active) { + if (intel_crtc->active) { struct intel_plane_state *old_state = to_intel_plane_state(plane->state); From bc0195aad0daa2ad5b0d76cce22b167bc3435590 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Jul 2015 15:10:30 -0700 Subject: [PATCH 839/839] Linux 4.2-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 13270c0a9336..257ef5892ab7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Hurr durr I'ma sheep # *DOCUMENTATION*